\documentclass[11pt,titlepage]{article}
\usepackage{amsmath}
\usepackage{graphicx}
\allowdisplaybreaks

\jot=.2in \pagestyle{plain} \setlength{\topmargin}{-0.5in}
\setlength{\textheight}{9.5in} \setlength{\oddsidemargin}{0in}
\setlength{\evensidemargin}{0in} \setlength{\textwidth}{6.5in}
\font\heada=cmbx10 scaled\magstep3 \font\headb=cmsl10
scaled\magstep1 \font\headc=cmr8 \pretolerance=10000
\setlength{\parindent}{2 em}
%\input macros
\newdimen\digitwidth
\newdimen\minuswidth
\setbox0=\hbox{\rm0}
\digitwidth=\wd0
\setbox1=\hbox{$-$}
\minuswidth=\wd1
\newdimen\starr
\setbox2=\hbox{${}^*$}
\starr=\wd2


{\catcode`?=\active
\def?{\kern\digitwidth}
\catcode`@=\active
\def@{\kern\minuswidth}
\catcode`!=\active
\def!{\kern\starr}}

% Data in question 2 is from Roberts Fall 2004 project 1

\begin{document}
\begin{center}
{\heada PROJECT 1 - DATA}\\
{\headb Statistics 401: Spring 2007}\\
{\it Due Monday, January 29}
\end{center}
\small

\noindent {\small Read the Syllabus and Chapter 4 in
\underline{Course Notes: Statistics for Researchers STAT401 FALL
2006} and write up your report for this assignment accordingly.  The
write-up, not including the Appendix which contains your R-code and
R-output, should not exceed two pages.  Your grade will be
determined by how well you answer the questions and by the
professionalism and clarity of your write-up.}

\begin{enumerate}
\item Read the article {\em Search-and-rescue dogs not sickened by post-9/11 work, scientists
say} available through the STAT401 web site.
\begin{enumerate}
\item Give the five steps of the scientific method.

\item For each of the five steps of the scientific method, indicate
how that step was carried out by the scientists described in the
article.

\item Who or what are the individuals of interest?

\item The article describes an explanatory variable and a response variable.
What is  the explanatory variable?   What is the response variable?

\item Classify each variable from 1d as either categorical or numerical. For each
categorical variable (if there are any), give the number of
categories. For each numerical variable (if there are any), classify
it as either discrete or continuous.  Summarize your results in a
Table, with three columns: Variable, Type and Properties. Label this
Table and reference it from the body of your report.

\end{enumerate}

\item \label{prob} Table 1 displays the dimensions (in mm) of
jellyfish from two samples taken from the Hawkesbury River in New
South Wales, Australia.  One of the samples is from Dangar Island
and the other is from Salamander Bay.  The investigator wants to
know to what degree one can determine the location of the
jellyfish given its dimensions.


\begin{center}{\bf Table 1: Dimension of Jellyfish}\\
{\catcode`?=\active
\def?{\kern\digitwidth}
\catcode`@=\active
\def@{\kern\minuswidth}
\begin{tabular}{||cc|cc||}\hline
\multicolumn{2}{||c}{Dangar Island} &
\multicolumn{2}{|c||}{Salamander
  Bay} \\
Breadth & Length & Breadth & Length\\ \hline
$?6.5$ & $?8.0$ & $12.0$ & $14.0$\\
$?6.0$ & $?9.0$ & $15.0$ & $16.0$\\
$?6.5$ & $?9.0$ & $14.0$ & $16.5$\\
$?7.0$ & $?9.0$ & $13.0$ & $17.0$\\
$?8.0$ & $?9.5$ & $15.0$ & $17.0$\\
$?7.0$ & $10.0$ & $15.0$ & $18.0$\\
$?8.0$ & $10.0$ & $15.0$ & $18.0$\\
$?8.0$ & $10.0$ & $16.0$ & $18.0$\\
$?7.0$ & $11.0$ & $14.0$ & $19.0$\\
$?8.0$ & $11.0$ & $15.0$ & $19.0$\\
$?9.0$ & $11.0$ & $16.0$ & $19.0$\\
$10.0$ & $13.0$ & $16.5$ & $19.0$\\
$11.0$ & $13.0$ & $18.0$ & $19.0$\\
$12.0$ & $13.0$ & $18.0$ & $19.0$\\
$11.0$ & $14.0$ & $16.0$ & $20.0$\\
$11.0$ & $14.0$ & $16.0$ & $20.0$\\
$13.0$ & $14.0$ & $17.0$ & $20.0$\\
$14.0$ & $16.0$ & $18.0$ & $20.0$\\
$15.0$ & $16.0$ & $19.0$ & $20.0$\\
$15.0$ & $16.0$ & $15.0$ & $21.0$\\
$15.0$ & $19.0$ & $16.0$ & $21.0$\\
$16.0$ & $16.0$ & $21.0$ & $21.0$\\
&&$19.0$ & $22.0$\\
&&$20.0$ & $22.0$\\ \hline
\end{tabular}}
\end{center}


\begin{enumerate}
\item Create a table, with three columns: Variable, Type and Properties.  In this table, list the
variables in this study. Classify each variable as either
categorical or numerical. Give the number of categories and list the
categories of each categorical variable (if there are any). Classify
each numerical variable as either discrete or continuous (if there
are any). If a variable is continuous but rounded, then classify it
as continuous.  Label the Table and reference it from the body of
your report.

\item Of the variables listed in
2a, which are response variables and which are explanatory
variables?

\item A data file which contains the data from Table 1 is at the
STAT401 web site.  Read the data file into a data frame in R. In an
Appendix in your report, include the commands that (a) make it
possible to refer to the variables by their names alone, rather than
as components of the data frame, and (b) display the contents of the
data frame.  Include this R-output in the Appendix of your report as
well.  Remember, you'll need to click on tab {\bf File $\to$ (Change
dir ...)} if the data file does not reside in the working directory.


\end{enumerate}

\item Use R to find the mean and standard deviation of the Length
of the jellyfish from Dangar Island and from Salamander Bay.  Do
these values suggest that there may be a true difference in the mean
length of jellyfish from these two locations? In an Appendix,
include the R-code and R-output you used to answer this question.

\item Find a data set in your research area. Write a
self-contained description of the data. Assume that you are
writing for an educated reader, but one who is not familiar with
your research area. Make sure that I will be able to understand the
meaning of your variables.

\item Answer the questions in \#\ref{prob} for your chosen data
set.  Include all R-code and R-output in an Appendix of your report.



\end{enumerate}


\end{document}

