\documentclass[11pt,titlepage]{article}
\usepackage{amsmath}
\usepackage{graphicx}
\allowdisplaybreaks

\jot=.2in \pagestyle{plain} \setlength{\topmargin}{-.4in}
% \setlength{\footheight}{0 in}
\setlength{\textheight}{9. in}
\setlength{\oddsidemargin}{-0.2in}
\setlength{\evensidemargin}{-0.1in}
\setlength{\textwidth}{6.5in}
\font\heada=cmbx10 scaled\magstep3
\font\headb=cmsl10 scaled\magstep1
\font\headc=cmr8
\pretolerance=10000
\setlength{\parindent}{2 em}
%\input macros
\newdimen\digitwidth
\newdimen\minuswidth
\setbox0=\hbox{\rm0}
\digitwidth=\wd0
\setbox1=\hbox{$-$}
\minuswidth=\wd1
\newdimen\starr
\setbox2=\hbox{${}^*$}
\starr=\wd2

{\catcode`?=\active
\def?{\kern\digitwidth}
\catcode`@=\active
\def@{\kern\minuswidth}
\catcode`|=\active
\def|{\kern\starr}}



\begin{document}
\noindent {\heada Project 5 Solutions}\\
\noindent {\headb Statistics 401: Fall 2006}\\
{\it Due: Monday, October 16}
\bigskip


\begin{enumerate}

\item (12 pts, 2 pts for (d) and (j), 1 pt otherwise)

\begin{enumerate}
\item Figure 1 contains a density plot, a boxplot, and a normal
probability plot of the service calls data.

\begin{center}
{\bf Figure 1: Checking for Normality}
\includegraphics[angle=0,width=5in]{project5Plot1.ps}
\end{center}

\item Table 1 gives the sample mean, sample standard deviation,
and the five number summary for the service calls data.


\begin{center}
{\bf Table 1: Statistics for Service Calls data}

\begin{tabular}{||c|c|c|c|c|c|c||}\hline
$\bar x$ & $s$ & $\min$ & $Q_1$ & $\tilde X$ & $Q_3$ & $\max$ \\
\hline
200.79 & 313.03 & 1.00 &  56.75 & 117.00 & 234.00 & 2631.00\\
\hline
\end{tabular}
\end{center}


\item The correlation between the normal scores and the calls data
shown in Figure 1 is $0.712$ (see the Appendix for details). Since
$.712 <r_{\rm critical}=.98,$ then the evidence suggests that there
is a deviation from normality.

\item \label{normal} The severe right skew in the histogram and
boxplot, the severe deviation from linearity in the normal
probability plot, as well as the test of correlation all indicate
that the service calls data are not normal.

\item Figure 2 shows the confidence interval for the appropriate
$\lambda$ for the Box-Cox transform.

\begin{center}
{\bf Figure 2: Confidence interval for $\lambda$ for the Box-Cox
transform}

\vspace{-.3in}
\includegraphics[angle=0,width=5in]{project5BoxCox.ps}
\end{center}

\item From the confidence interval, it appears that the transform
$Y=X^{\frac{1}{4}}$ is appropriate for the service calls data.


\item Figure 3 contains a density plot, a boxplot, and a normal
probability plot of the transformed calls data.

\newpage
\begin{center}
{\bf Figure 3: Checking for normality of the transformed data}
\includegraphics[angle=0,width=5in]{project5Plot2.ps}

\end{center}


\item Table 2 gives the sample mean, sample standard deviation,
and and five number summary for the transformed service calls data.


\begin{center}
{\bf Table 2: Statistics for $({\rm Service~Calls})^{\frac{1}{4}}$}

\begin{tabular}{||c|c|c|c|c|c|c||}\hline
$\bar x$ & $s$ & $\min$ & $Q_1$ & $\tilde X$ & $Q_3$ & $\max$ \\
\hline
3.3016 & 1.0573 & 1.000 &  2.745 &  3.289 &  3.911  & 7.162\\
\hline
\end{tabular}
\end{center}


\item The correlation between the normal scores and the
transformed calls is 0.9865378 (see the Appendix for details).
 Since this value is larger than the critical $r$ value of .98
 then the evidence fails to suggest that the transformed data
 deviates from normality.

\item The histogram of the transformed data is less skewed than
the original service calls data.   The boxplot looks more symmetric
and the normal probability plot looks better.  Also, considering the
test of correlation, we conclude that the evidence fails to suggest
that the transformed data is not normal.  It appears that our
transformation to normality was effective.


\end{enumerate}



\item (2 pts) Let $X$ be the number of SPAM
emails received per day per employee at a large software engineering
company.   Suppose that the distribution  of $X$ is:

\begin{center}
\begin{tabular}{||c|ccc|}\hline
$X$ & $0$ & $1$ & $2$\\ \hline $P(X=x)$ & $0.60$ & $0.30$ &
$0.10$\\ \hline
\end{tabular}
\end{center}


By definition, $\mu_X=\sum_x xP(x)=0(.6) + 1(.3) + 2(.1)=.5$ and $\sigma^2_X=\sum_x (x-\mu)^2P(x)=(0-.5)^2(.6) + (1-.5)^2(.3) + (2-.5)^2(.1)=.45$ so $\sigma_X= \sqrt{.45}=0.6708$.

\item (6 pts) Consider the population of four textbooks from problem
8.10(a) on page 340.
\begin{enumerate}

\item The population mean is $\mu=\frac{212 + 379 + 350 + 575}{4}=379$ pages.  The population variance is $\sigma^2=\frac{(212-379)^2 + (379-379)^2 + (350-379)^2 + (575-379)^2}{4}=16786.5$, so $\sigma=\sqrt{16786.5}\approx 129.56$.

\item and (c) Table 1 shows the 6 possible samples that can be drawn from the population and the corresponding values of $\overline X$.
Table 2 shows the sampling distribution for $\overline X$, where
$\mu_{\overline X}=\frac{281 + 295.5 + 364.5 + 393.5 + 462.5 +
477}{6}$ and $\sigma^2_{\overline X}=\frac{(281-379)^2 +
(295.5-379)^2 + (364.5-379)^2 + (393.5-379)^2 + (462.5-379)^2 +
(477-379)^2}{6}$.

\bigskip
\noindent For any type of sample (doesn't have to be random and the
sample size doesn't have to be less than 5\% of the size of the
population!)) that $\mu_{\bar x}=\mu_x$. However, since the size of
each sample of $n=2$ is $50\%$ of the population of size $N=4$, then
$\sigma_{\bar x}\ne \frac{\sigma}{\sqrt{n}}$.
\end{enumerate}



\begin{center}
{\catcode`?=\active
\def?{\kern\digitwidth}

{\bf Table 1: Samples from the population of 4 books}\\
\vspace{.1in}

\begin{tabular}{||l|c||}\hline
\multicolumn{1}{||c}{Sample} & $\bar x$ \\ \hline
$?1$. 212, 379 & 295.5 \\[.05in]
$?2$. 212, 350& 281  \\[.05in]
$?3$. 212, 575 & 393.5 \\[.05in]
$?4$. 379, 350& 364.5  \\[.05in]
$?5$. 379, 575 & 477 \\[.05in]
$?6$. 350, 575& 462.5  \\ \hline
\hline
\end{tabular}

\vspace{0.5in}

\newpage
{\bf Table 2: Sampling Distribution of $\overline X$}\\
\begin{tabular}{|c|c|}
\multicolumn{2}{c}{~} \\[.1in] \hline
Value of $\bar x$ & $P(\bar x)$ \\ \hline
281 & $\frac{1}{6}$ \\[.05in]
295.5 & $\frac{1}{6}$ \\[.05in]
364.5 & $\frac{1}{6}$ \\[.05in]
393.5 & $\frac{1}{6}$ \\[.05in]
462.5 & $\frac{1}{6}$ \\[.05in]
477 & $\frac{1}{6}$ \\[.05in]
\hline
$\mu_{\bar X}$: & 379 \\[.05in]
$\sigma^2_{\bar X}$: & 5595.5 \\[.05in]
$\sigma_{\bar X}$ & 74.803 \\\hline \end{tabular}

\vspace{0.5in}


}

\end{center}




\item (Problem 8.16 on page 350, 2 pts) The population mean $\mu$ is equal to the mean of the sampling
distribution of $\overline X$, $\mu_{\overline x}$.  On the other
hand, the sampling variability of $\overline X$ gets smaller as the
sample size gets larger, $\sigma_{\overline
x}=\frac{\sigma}{\sqrt{n}}$.

\item (4 pts) Regarding wait times for an elevator in Problem 8.18 on page
350:


\begin{enumerate}
\item The mean is $\mu_{\overline x}=\mu=.5$ minutes.  The standard deviation is $\sigma_{\overline
x}=\frac{\sigma}{\sqrt{n}}=\frac{.289}{\sqrt{16}}\approx.07225$
minutes.

\item The mean is $\mu_{\overline x}=\mu=.5$ minutes.  The standard deviation is $\sigma_{\overline
x}=\frac{\sigma}{\sqrt{n}}=\frac{.289}{\sqrt{50}}\approx.0409$
minutes.

\item The approximate distribution of $\overline X$ when the sample size is
$n=50$ is $N(.5,.0409)$ because the Central Limit Theorem applies
when $n=50>30$.

\item The probability is $P(\overline X > \frac{25}{60})=P(z>\frac{\frac{25}{60}-.5}{.0409}\approx-2.04)\approx
0.9792$.  Thus, for 50 individuals, the average weight time is more
than 25 seconds 98\% of the time.
\end{enumerate}

\item (4 pts) Regarding the ``Should Women Move" in Problem 8.32 on
page 357:
\begin{enumerate}
\item The Central Limit Theorem applies when $n\pi\ge 10$ and
$n(1-\pi)\ge 10$.   In this case, $n\pi=10(.3)=3$ (and
$n(1-\pi)=7$), so CLT does not apply.  Thus, we can not be certain
that $p$ has an approximate normal distribution.

\item The mean is $\mu_p=n\pi=.3$ and the standard deviation
is
$\sigma_p=\sqrt{\frac{\pi(1-\pi)}{n}}=\sqrt{\frac{.3(.7)}{400}}\approx
.0229$

\item For a sample size of $n=400$, $n\pi=400(.3)=120>10$ and
$n(1-\pi)=400(.7)=280>10$, so the Central Limit Theorem applies and
shows that $p~\dot\sim~ N(.3,.0229)$.   Thus, $P(.25\le p \le
.35)=P(\frac{.25-.3}{.0229}\le z \le \frac{.35-.3}{.0229})\approx
P(-2.18\le z\le 2.18)\approx .9707$.  Thus, when the sample size is
400, the sample proportion will be between .25 and .35 97\% of the
time.

\item When the sample size increases, the sampling variability of
$p$ decreases, and so the probability in the tails decreases. Thus,
the probability that $p$ is between .25 and .35 will increase.
\end{enumerate}




\end{enumerate}



\end{document}


%\item \#8.20, page 350.
%\item \#8.22, page 350.

