\documentclass[11pt,titlepage]{article}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{verbatim}

\allowdisplaybreaks
\def\ds{\displaystyle}

\jot=.2in \pagestyle{empty} \setlength{\topmargin}{-0.5in}
\setlength{\textheight}{9.5in} \setlength{\oddsidemargin}{-0.35in}
\setlength{\evensidemargin}{-0.35in} \setlength{\textwidth}{7in}
\font\heada=cmbx10 scaled\magstep3 \font\headb=cmsl10
scaled\magstep1 \font\headc=cmr8 \pretolerance=10000
\setlength{\parindent}{2 em}

\begin{document}

\noindent {\heada Exam 2 Solutions}\\
\noindent {\bf Statistics 401} \hfill Name \underline{\hspace{3in}} \\
\noindent {\headb April 4, 2007} \\
\bigskip

{\small \noindent {\fbox{\bf TRUE/FALSE: (3 pts each) For each of
the following, circle T or F. }}}

\begin{enumerate}

\item {\bf FALSE} \ \ When choosing between an unbiased and a
biased statistic, always choose the unbiased one.

\item {\bf FALSE} \ \ If a 95\% confidence interval for $\mu$, the mean
mpg for a new SUV, is (17.21, 19.75), then $\mu$ is larger than
19.75 mpg  about $2.5\%$ of the time.

\item {\bf TRUE} \ \ As sample size increases, the $t$
distribution $t(n-1)$ converges to a normal distribution.


\item {\bf FALSE} \ \ The larger the $p$-value, the stronger the evidence
that ${\rm H}_0$ is true.

\item {\bf FALSE} \ \ A farmer owns a population
of 50 cows.  He randomly chooses 5 cows without replacement from the
herd and carefully monitors $X$, gestation length, for each of these
five.  If $\mu_x=281$ days and $\sigma_x=13$ days, then
$\sigma_{\bar x} = \frac{13}{\sqrt{5}}$.

\item {\bf FALSE} \ \ The population standard deviation $\sigma$ decreases
as the sample size $n$ increases.





\bigskip
\bigskip


{\small \noindent { \hskip -.4in {\fbox{\bf SHORT ANSWER AND COMPUTATION: Show all work to receive
full credit! Write legibly.}}}} \smallskip

\medskip

\item (6 pts) State the Central Limit Theorem.


\bigskip

For large SRS $X_1$, ..., $X_n$, $\overline X ~\sim~ N(\mu_x,
\frac{\sigma_x}{\sqrt{n}})$.

\bigskip

\item Joey and Henrietta, an intrepid young couple, opened a netflix account in September,
and now enjoy watching movies which get sent to them in the mail.
Since it costs \$15 each month, Joey says that netflix isn't worth
it if they don't watch more than 4 movies on average each month.
From October through March, Joey looks up how many movies each month
they have watched:

  \hspace{1in}4, 5, 6, 5, 4, 4


\begin{enumerate}
\item (6 pts) State the null and alternative hypotheses corresponding to what Joey wants to test: are they
watching more than four movies on average each month?

\hspace{1in}${\rm H}_0: \mu = 4$

\hspace{1in}${\rm H}_a: \mu > 4$

\bigskip




\item \label{normass} (4 pts) To perform the test, what, besides being a SRS, must be assumed about the
data?  Explain.

Since $n<15$, we must assume that the data is normally distributed.


\newpage
\item \label{normcheck} (6 pts) Does the data appear to satisfy the assumption in
\eqref{normass}?  Indicate which of the R-output below you are using
to answer this question.  Be sure to indicate how you are using the
correlation value.

The evidence is conflicted.  From the histogram and boxplot, there
is skewness in the data.   However, the normal probability plot does
not look too bad.  Indeed, the correlation between the percentiles
of the data and the theoretical normal quantiles is $r = .911$,
which is larger than $r_{\rm critical}=.88$, which indicates that
there is not conclusive evidence that the data is non-normal.




\verbatiminput{exam2movies.txt}

\begin{center}
{\bf Table 7.1 - Critical correlation coefficients}

\smallskip
\begin{tabular}{lrrrrrrrrrr} \hline
$n$ & 5 & 10 & 15 & 20 & 25 & 30 & 40 & 50 & 60 & 75 \\
Critical $r$ & 0.832 & 0.880 & 0.911 & 0.929 & 0.941 & 0.949 & 0.960
& 0.966 & 0.971 & 0.976 \\ \hline
\end{tabular}
\end{center}

\begin{tabular}{cc}
\includegraphics[width=3in]{exam2norm.ps} &
\includegraphics[width=3in]{exam2boxcox.ps}
\end{tabular}

\item Regardless of your answer to \eqref{normcheck}, assume that all of the assumptions are satisfied.
Conduct the hypothesis test on the original data set $\{4, 5, 6, 5, 4,
4\}$.  The statistics are $\bar x=4.6667$ and $s_x=.8165$.


\begin{enumerate}
\item (4 pts) Calculate the value of the test statistic.

$$t=\frac{4.\bar6 - 4}{\frac{.8165}{\sqrt{6}}} \approx 2$$
\bigskip


\item (3 pts) Give the distribution of the test statistic assuming that the
null hypothesis is true.

$$t ~\sim~t(5)$$

\bigskip


\item (3 pts) Assuming that the $p$-value = .045, then make a decision regarding ${\rm H}_0$ at a significance level of $\alpha=.05$.

\bigskip
Since the $p$-value $< .05$, then REJECT ${\rm H}_0$.

\bigskip

\item (4 pts) Give a conclusion {\bf in terms of the problem}.

\bigskip

The evidence suggests that the couple watch more than four movies on
average each month.

\end{enumerate}


\end{enumerate}



\newpage
\item According to the Center for Disease Control (CDC), 76 million
people in the US get diarrhea and upset stomachs each year.  Most of
these infections can be prevented by regularly washing one's hands.
A microbiologist believes that a majority (i.e. more than 50\%) of
women wash their hands after using the bathroom.   She collects a
sample of 40 women, calculates $p$, and performs a hypothesis test
at $\alpha=.05$.

% In fact, CDC reports that 85% of women wash their hands after using the bathroom, and 25% of males.

\begin{enumerate}

\item \label{prophyp} (6 pts) Give the hypotheses which the microbiologist wants to test.


\hspace{1in}${\rm H}_0:\pi=.5$

\hspace{1in}${\rm H}_a:\pi>.5$

\bigskip


\item (4 pts) Describe a Type II error {\bf in terms of this problem}.

\bigskip

A Type II Error is failing to find that the majority of women wash
their hands after using the bathroom when in fact a majority of
women DO wash their hands.

\bigskip

\item \label{Hosamp} (4 pts) If the ${\rm H}_0$ from \eqref{prophyp} is true, sketch the sampling distribution for $\overline X$,
and shade and clearly indicate the region corresponding to $\alpha$.

\bigskip

The sampling distribution for $p$ when ${\rm H}_0$ is true is
$N\left(.5,\sqrt{\frac{.5(1-.5)}{40}}\right)$.  The area
corresponding to $\alpha=.05$ is the upper tail with probability
$.05$ in it.


\item (4 pts) Now suppose that the alternative hypothesis is true, such that the true
proportion of women who wash their hands is $\pi=0.70$.  Add to the
graph you made in \eqref{Hosamp} a sketch of the sampling
distribution for $\overline X$, and shade and clearly indicate the
region corresponding to $\beta$, the probability of a Type II error.

The sampling distribution for $p$ when ${\rm H}_a$ is true with $pi
= .7$ is $N\left(.7,\sqrt{\frac{.7(1-.7)}{40}}\right)$.  The area
corresponding to $\beta$ is the lower tail which has boundary at the
boundary of the $\alpha$ rejection region.



\item  (8 pts) Suppose that the true proportion of women who wash their hands
is $\pi = 0.70$ and that ${\rm H}_0$ will be rejected if the test
statistic $z
> z_{.95}=1.645$ (which corresponds to a test with $\alpha=.05$).
For the hypotheses in problem \eqref{prophyp}, compute $\beta$, the
probability of a Type II error.

First, find the value of the sample proportion which is on the
boundary of the rejection region of size $\alpha$ by solving for
$p$:
    $$1.645 = \frac{p-.5}{\sqrt{\frac{.5(1-.5)}{40}}}\approx 0.63.$$
Thus
    $$\beta = P(p < .63) = P\left(z < \frac{.63 -
    .7}{\sqrt{\frac{.7(1-.7)}{40}}}\approx -.97\right)\approx.1660.$$

\newpage
\item (3 pts) What is the power of the test?

\bigskip

Power = $1 - \beta = 1 - .1660 = .8370.$

\bigskip

\item (4 pts) The microbiologist submits a paper presenting the hand-washing study for publication in a peer-reviewed journal.
A reviewer complains that the power of the test is not large enough.
What TWO things can be changed so that the power of the test is
increased?

\bigskip

Increase sample size and/or increase $\alpha$.

\bigskip

\end{enumerate}




\item Suppose that a population of $N=4$ female wolves have each had a litter of pups of size $X$=
    $$2,~ 3,~ 4,~ 6$$
  pups respectively.  An investigator plans on taking a
  random sample of $n=3$ wolves (without replacement) from these four and then computing
  $\tilde x$, the sample median.

\begin{enumerate}
\item (5 pts) Construct the sampling distribution of $\tilde x$, sampling without
  replacement.  There are 4 different samples of size 3!

The sampling distribution of the sample median is $\tilde X$ is:

\begin{tabular}{|c|c|c|}
  \hline
  % after \\: \hline or \cline{col1-col2} \cline{col3-col4} ...
  sample & $\tilde x$ & $P(\tilde x)$ \\\hline
  2, 3, 4 & 3 & .25 \\
  2, 3, 6 & 3 & .25 \\
  3, 4, 6 & 4 & .25 \\
  2, 4, 6 & 4 & .25 \\
  \hline
\end{tabular}

\bigskip


\item (4 pts) \label{sampmedian} Calculate $\mu_{\tilde x}$, the mean of $\tilde x$.

\bigskip
$$\mu_{\tilde x}=\frac{3 + 4}{2}=3.5$$
\bigskip


\item (4 pts) Is $\tilde x$ unbiased for the true median of the population?
Use your answer from \eqref{sampmedian} to justify your answer.

\bigskip

The true median of the population is $\tilde \mu = 3.5$.   Since
$\mu_{\tilde x}=\tilde \mu$, then $\tilde X$ is an unbiased
statistic for $\tilde \mu$.

\end{enumerate}

\newpage



















\end{enumerate}
\end{document}



\item Five temperature measurements, $X$, (in Fahrenheit) are taken from an underground
lake in Antarctica.  In order to satisfy the assumptions to
construct a valid CI, the investigator decides to take the
reciprocal transform of the data:

    $$Y=X^{-1}.$$

She then computes a 95\% CI for $\mu_Y$ and gets (.026, .032).

\begin{enumerate}

\item \label{CItrans} Give a 95\% CI for $\mu_X$.

\vskip 1in

\item \label{CIcheck} The sample statistics are $\bar x=35.2$, $s_x=3.1$, $\bar
y=.029$ and $s_y=.0034$.  Is the CI in \eqref{CItrans} valid?  Why
or why not?

\vskip 1in


\item Regardless of your answer to \eqref{CIcheck}, interpret the CI
in \eqref{CItrans} {\bf in terms of the problem}.

\end{enumerate}


\item We wish to calculate a 95\% confidence interval for the
proportion of males who wash their hands after using the bathroom.
To assure that the margin of error is 3\%, how large must the sample
be?

 \vspace{1.5in}

