\documentclass[11pt]{article}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{verbatim}
\allowdisplaybreaks

\pagestyle{empty} \setlength{\topmargin}{-0.75in}
\setlength{\textheight}{9.5in} \setlength{\oddsidemargin}{-0.1in}
\setlength{\evensidemargin}{-.2in} \setlength{\textwidth}{6.7in}
\font\heada=cmbx10 scaled\magstep3 \font\headb=cmsl10
scaled\magstep1 \font\headc=cmr8 \pretolerance=10000
\def\ds{\displaystyle}

\begin{document}
\section*{Chapter 9 - Estimation}

\bigskip

\subsection*{Point Estimation}

\begin{enumerate}
\item {\bf Point Estimator} - A formula applied to a data set which results in a single value or point.  In other words, a
statistic.  Usually, estimators are used to give plausible values of
some population parameter.
\begin{itemize}
\item $\overline X$, $\tilde X$, $S^2$, and $p$ are point estimators of the parameters $\mu$, $\tilde \mu$, $\sigma^2$ and $\pi$ respectively.
\end{itemize}
\vspace{0.05in}

\item {\bf Point Estimate} - The resulting value of a point estimator, when applied to a data
set.
\begin{itemize}
\item $\overline x$ = 27.6, $\tilde x$ = 85, $s^2 = 2.45$, and $p$ = 0.30
are point estimates of the values of $\mu$, $\tilde \mu$, $\sigma^2$
and $\pi$ respectively.
\end{itemize}
\end{enumerate}
\vspace{0.15in}



\noindent {\bf \underline{Desirable Properties of a Point
Estimator}:}
\begin{enumerate}
\item {\bf Unbiased} - A point estimator is \underline{unbiased} for a parameter if the mean
of the estimator's sampling distribution equals the value of the
parameter. Otherwise, the estimator is \underline{biased}.
\begin{itemize}
\item $\overline X$ is an unbiased estimator of $\mu$ because $\mu_{_{\overline X}} = \mu$.

\item $S^2$ is an unbiased estimator of $\sigma^2$ because
$\mu_{S^2}=\sigma^2$.  $S$ is NOT an unbiased estimate of $\sigma$!



\item $p$ is an unbiased estimator of $\pi$ because $\mu_p = \pi$.
\end{itemize}
\vspace{0.05in}

\item {\bf Smallest Variability} - When choosing among unbiased estimators, the
one with the smallest sampling variability (i.e. smallest standard
deviation) is the best, because the point estimates will be most
closely concentrated around the parameter value.
\end{enumerate}

\begin{itemize}
\item A Point Estimator that has properties 1 and 2 above is called a {\bf
Minimum Variance Unbiased Estimator (MVUE)}.  When the data is
normal, then {$\overline X$ is an MVUE for $\mu$}.  That is,
$\sigma^2_{\overline X}=\frac{\sigma^2}{n}$ is smaller than the
variance for any other point estimator of $\mu$.

\item When the data is not normal, then $\overline X$ is not an MVUE
for $\mu$ (see page 365 of your textbook). \vspace{0.25in}

\end{itemize}


\subsection*{Interval Estimation}

\begin{enumerate}
\item {\bf Interval Estimator}: A formula applied to a data set
which results in an interval of plausible values for some parameter.
It has the form
\begin{center}
{\bf point estimator $\pm$ margin of error}.
\end{center}

\item {\bf Confidence Interval (C.I.)}: An interval estimator which has a {\it level of confidence} attached to it.

\item {\bf Confidence Level:} A quantity (typically stated
as a percentage) describing how often a confidence interval (over
all samples of a given size) captures the parameter value.
\begin{itemize}
\item Commonly-used confidence levels are 90\%, 95\%, and 99\%.
\item A 95\% confidence level means that:

\begin{itemize}
\item With probability .95, the formula for the confidence interval captures the value of
the parameter.

\item If confidence intervals were calculated from all possible
samples, then 95\% of the intervals would contain the parameter
value.
\end{itemize}
\end{itemize}

\end{enumerate}
\newpage


\noindent {\bf Confidence Interval for $\mu$ (when $\sigma$ is
known)}\ \ \ $\longrightarrow$ \ \ \ \fbox{$\overline
X~\pm~z_{_{1-\frac{\alpha}{2}}}\left(\ds
\frac{\sigma}{\sqrt{n}}\right)$}

\begin{verse}
\noindent {\bf Assumptions:} \vspace{-0.05in}
\begin{itemize}
\item The data must be a SRS (so if you sample a finite population without replacement, then $.05N\ge n$).
\item The sampling distribution of $\overline X$ must be at least approximately normal (so the data is normal OR $n>15$ for symmetric non-normal data
OR $n>30$ for skewed data).

\end{itemize}
\end{verse}
\vspace{0.15in}

\noindent {\bf Notation:}

\begin{itemize}
\item the margin of error is $m =
z_{_{1-\frac{\alpha}{2}}}\left(\ds \frac{\sigma}{\sqrt{n}}\right)$.

\item $z_{_{1-\frac{\alpha}{2}}}$ is the {\em critical value}, the
$100\left(1-\frac{\alpha}{2}\right)$ percentile of the Z
distribution, Z $\sim$ N(0,~1).

\item $\alpha$ is a {\em significance level}

\item $C=1-\alpha$ is the confidence level

\end{itemize}

\begin{tabular}{||c|c|c|c||} \hline
C & $\alpha$ = 1 - C & 1 - $\frac{\alpha}{2}$ & $z_{_{1-\frac{\alpha}{2}}}$\\ \hline
0.90 & 0.10 & 0.95 & \\
0.95 & 0.05 & 0.975 & \\
0.99 & 0.01 & 0.995 & \\ \hline
\end{tabular}
\vspace{0.25in}

\noindent Find $z_{_{1-\frac{\alpha}{2}}}$ for 90\%, 95\% and 99\%
confidence intervals using the normal table.

\vspace{2.5in}



\noindent \underline{EXAMPLE \#1}:  (Problem 9.37 on page 392)
Seventy seven students at the University of Virginia were asked to
keep a diary of conversations with their mothers, recording any lies
they told during the conversations.  Suppose that $\overline x=.5$
and $\sigma=.4$.

\begin{itemize}
\item Is the sample size large enough to calculate a C.I.?

\item Construct a 90\% C.I. for $\mu$, the true number of lies told
to mothers during each conversation.


\end{itemize}





\newpage
\noindent {\bf \underline{Confidence Interval Behavior}:} \\ \vspace{0.1in}

\noindent The margin of error $m$, and hence the width of the
interval depends, on:
\begin{enumerate}
\item The confidence level $C$: Increasing $C$ increases $m$ and the
interval width.
\item The variability of the response $\sigma$: Increasing $\sigma$
increases $m$ and the interval width.
\item The sample size $n$:Increasing $n$ decreases $m$ and the interval
width.
\end{enumerate}


\vspace{3in}

\begin{itemize}
\item The C.I.'s with $\overline x$'s that are less than one margin of error away from $\mu$
will be the intervals that \underline{capture $\mu$}.  This happens
$100C\%$ of the time.
\item All $\overline x$'s that are more than one margin of error away from $\mu$
will be the intervals that \underline{do not capture $\mu$}.   This
happens $100\alpha\%$ of the time.
\item Typically, only one sample is selected from the population and therefore only one
confidence interval will be calculated. The {\bf CORRECT INTERPRETATION} of this
{\it one} confidence interval is: \vspace{-0.15in}

\begin{center}
\noindent We are \underline{\hspace{0.4in}}\% confident that $\mu$
lies between \underline{\hspace{0.6in}} and
\underline{\hspace{0.6in}}.
\end{center}
Usually, one interprets $\mu$ in terms of the problem.

\item An {\bf INCORRECT INTERPRETATION} of this {\it one} confidence interval is:
\vspace{-0.1in}

\begin{center}
\noindent There is a \underline{\hspace{0.4in}}\% chance (i.e. probability) that $\mu$ lies
between \underline{\hspace{0.6in}} and \underline{\hspace{0.6in}}.
\end{center}
Probability statements can only be made about confidence intervals
BEFORE you calculate the specific confidence interval estimate for
the data. A given interval estimate captures the parameter with
probability {\bf 0} or {\bf 1}, we simply do not know which it is.
\end{itemize}
\vspace{0.05in}

\noindent \underline{EXAMPLE \#1} revisited: From our previous
example, we'd interpret the C.I. like this:
\begin{center}
We are \underline{\hspace{0.4in}}\% confident that, on average,
University of Virginia students lie to their mothers between
\underline{\hspace{0.6in}} and \underline{\hspace{0.6in}} times per
conversation.
\end{center}

\noindent {\bf \underline{QUESTION}: True or False?} \\ \vspace{0.05in}

\noindent Suppose $\mu$ is the average weight (in pounds) of dogs in
Bozeman. A 95\% C.I. for $\mu$ is (42,~48).

\begin{itemize}
\item[1.] T \ / \ F: \ Ninety-five percent of the weights in the population are between 42 and
48 pounds. \vspace{-0.05in}
\item[2.] T \ / \ F: \ Ninety-five percent of the weights in the sample are between 42 and 48
pounds. \vspace{-0.05in}
\item[3.] T \ / \ F: \ The probability that the confidence interval (42,~48) includes $\mu$
is 0.95. \vspace{-0.05in}
\item[4.] T \ / \ F: \ The sample mean $\overline{x}$ is in the confidence interval with
probability 0.95. \vspace{-0.05in}
\item[5.] T \ / \ F: \ If 100 confidence intervals were generated using this same process,
approximately 5 of the confidence intervals would not include $\mu$.
\end{itemize}



\noindent \underline{EXAMPLE \#2}: \\
\noindent Gas mileages (in MPG) of a SRS of 20 cars of a certain
make and model are recorded and the sample mean is found to be 18.48
MPG. Suppose that the data is not normal but is fairly symmetric.
Also suppose that the population standard deviation is known to be
$\sigma$ = 2.9 MPG.

\begin{itemize}
\item Do we have a large enough sample size?

\vspace{1in}

\item Find and interpret a 95\% confidence interval for $\mu$, the mean
MPG for this vehicle.

\end{itemize}

\vspace{2.25in}


\newpage
\noindent {\bf Confidence Interval for $\mu$ ($\sigma$ unknown)} \ \
\ $\longrightarrow$ \ \ \ \fbox{$\overline
X~\pm~t_{_{1-\frac{\alpha}{2},~n-1}}\left(\ds
\frac{s}{\sqrt{n}}\right)$}

\begin{verse}
\noindent {\bf Assumptions:} \vspace{-0.05in}
\begin{itemize}
\item The data must be a SRS (so if you sample a finite population without replacement, then $.05N\ge n$).
\item The sampling distribution of $\overline X$ must be at least approximately normal (so the data is normal OR $n>15$ for symmetric non-normal data
OR $n>30$ for skewed data).
\end{itemize}
\end{verse}
\vspace{0.15in}

\noindent {\bf \underline{t Distribution}} \ \ $\longrightarrow$ \ \
$T \sim t(df)$ \vspace{0.1in}


\begin{itemize}
\item The t distribution is symmetric, unimodal, bell-shaped, and centered at zero. \vspace{-0.05in}
\item The t distribution has heavier tails than the Z distribution because s (an
estimate of $\sigma$) is used instead of $\sigma$. \vspace{-0.05in}
\item As the degrees of freedom (df) increases, the t distribution approaches
the Z distribution.
\end{itemize}
\vspace{1.75in}

\noindent {\bf Notation:}

\begin{itemize}
\item the margin of error is $m =
t_{_{1-\frac{\alpha}{2},~n-1}}\left(\ds \frac{s}{\sqrt{n}}\right)$.

\item Use the t-table or R to calculate $t_{_{1-\frac{\alpha}{2},~n-1}}$, the t {\em critical value}, the $100\left(1-\frac{\alpha}{2}\right)$
percentile of the t distribution with n - 1 degrees of freedom,
$t(n-1)$. Since $t(n-1)$ has thicker tails than $N(0,1)$, then
$t_{1-\frac{\alpha}{2},~n-1}>z_{1-\frac{\alpha}{2}}$.

\item $\frac{s}{\sqrt{n}}$ is the {\em standard error} of $\overline X$

\end{itemize}
\vspace{.05in}

\noindent \underline{EXAMPLE \#3}: From a SRS of 8 shipments of corn
soy blend, a highly nutritious food sent for emergency relief, the
mean vitamin C content (in mg/100g) is $\overline x = 22.5$ and the
sample standard deviation is $s = 7.19$.

\begin{itemize}
\item Do we have a large enough sample size?

\item  Calculate and interpret a 99\% confidence
interval for $\mu$, the true mean vitamin C content of corn soy
blend.

\end{itemize}

\vspace{2.25in}








\newpage
\noindent {\bf Confidence Interval for $\pi$} \ \ \
$\longrightarrow$ \ \ \ \fbox{$p~\pm~z_{1-\frac{\alpha}{2}}\left(\ds
\sqrt{\frac{p(1-p)}{n}}\right)$}

\begin{verse}
\noindent {\bf Assumptions:} \vspace{-0.05in}
\begin{itemize}
\item The data must be a SRS (so if you sample a finite population without replacement, then $.05N\ge n$).
\item The sampling distribution for $p$ must be approximately normal (so $np\ge 10$ and \ $n(1-p) \ge
10$)
\end{itemize}
\end{verse}


\noindent \underline{EXAMPLE \#4}: \\
\noindent In a study of heavy drinking on college campuses, 17096
students were interviewed.  Of these, 3314 admitted ti consuming
more than 5 drinks at a time, three times a week.

\begin{itemize}
\item Give a point estimate of the proportion of college students who are ``heavy"
drinkers.

\vspace{1in}

\item Is the sample large enough to assume that the sampling distribution of $p$ is approximately
normal?

\vspace{1in}

\item Give a $99\%$ C.I. for the proportion of all college students
who are heavy drinkers.

\end{itemize}
\vspace{2.25in}





\newpage

\subsection*{Sample Size Calculations}

Before any study, a researcher often already knows the confidence
level and the {\em precision} (or margin of error) of a desired
confidence interval.  For a fixed confidence level and margin of
error, the only other factor under the researcher's control is the
sample size.  So a researcher must collect enough data to be able to
construct the desired C.I.'s.

\noindent {\bf Sample Size Calculation for Estimating $\mu$}: \ \ \
$\longrightarrow$ \ \ \ \fbox{$n = \left(\ds \frac{z_{_{1 -
\frac{\alpha}{2}}}~\sigma}{m}\right)^2$}

\begin{itemize}
\item $z_{1 - \frac{\alpha}{2}}$ is the critical value for the
desired confidence level $C=1-\alpha$.

\item $m$ is the desired
margin of error

\item $\sigma$ is the standard deviation of the population
\end{itemize}


\noindent \underline{If $\sigma$ is unknown, two options for
estimating $\sigma$ are}:
\begin{enumerate}
\item Use a sample standard deviation, $s$, from a previous study.
\item Use the anticipated range divided by 4.
\end{enumerate}


\noindent \underline{EXAMPLE}: \\
\noindent Find the sample size necessary to estimate the mean level
of phosphate in the blood of dialysis patients to within 0.05 with
90\% confidence. A previous study calculated a sample standard
deviation of $s=1.6$. \vspace{1.75in}



\noindent {\bf Sample Size Calculation for Estimating $\pi$}: \ \ \
$\longrightarrow$ \ \ \ \fbox{$n = \pi(1-\pi)\left(\ds \frac{z_{_{1
- \frac{\alpha}{2}}}}{m}\right)^2$} \vspace{0.1in}


\noindent \underline{Two options for the value of $\pi$}:
\begin{enumerate}
\item Use an estimate, $p$, from a previous study.
\item Use $\pi = \frac{1}{2}$.  This is the more conservative choice because using it
will result in a sample size $n$ even larger than needed.
\end{enumerate}


\noindent \underline{EXAMPLE}: \\
\noindent Your company would like to carry out a survey of customers
to determine the degree of satisfaction with your customer service.
You want to estimate the proportion of customers who are satisfied.
What sample size is needed to attain 95\% confidence and a margin of
error less than or equal to 3\%, or 0.03?

\vspace{2.25in}


\newpage
\subsection*{R commands}

\verbatiminput{Chapter9Rcode.txt}


\newpage
\subsection*{Estimating $\mu$ after transforming data}
For large sample sizes ($n>30$), we do not need to assume that the
data $\{x_i\}$ is normal in order to find a C.I. for $\mu_X$.   But
when we have a small sample ($n<15$) from a population which is
clearly non-normal, then a ``transform to normality" $Y=f(X)$ may be
appropriate (such as the Box-Cox family of transforms in Chapter 7
notes). In many cases, one can not directly interpret the point
estimate $\overline y$ or a C.I. for $\mu_Y$.

\begin{enumerate}

\item \textbf{The General Case $Y=f(X)$}
\begin{itemize}
\item $f^{-1}({\rm point ~~estimator ~for ~} \mu_Y$) is a point estimator for
$f^{-1}(\mu_Y)$ (invariance of MLE's).

\item $f^{-1}({\rm point ~~estimator ~for ~} \mu_Y$) is a $1^{st}$ order Taylor
series (a better $2^{nd}$ order estimator is given below) point estimator for $\mu_X$.

\item When $f$ is monotone, the limits of the C.I. for $\mu_Y$ can ALWAYS be back-transformed to get a C.I. for
$f^{-1}(\mu_Y)$.  SOMETIMES, the limits of the C.I. for $\mu_Y$ can
be back-transformed to get a C.I. for $\mu_X$.




\item \textbf{Delta Method:}  Point
estimators of $\mu_Y$ and $\sigma^2_Y$ (in addition to  $\overline
Y$ and $S^2_Y$) can be derived by considering the Taylor series
expansion
    $$Y=f(X)\approx f(\mu_X) + f'(\mu_X)(X-\mu_X) + \frac{1}{2}f''(\mu_X)(X-\mu_X)^2$$
and taking the mean (to $2^{nd}$ order) and variance (to $1^{st}$
order) of both sides
    \begin{eqnarray*}
    \mu_Y\approx f(\mu_X) + \frac{1}{2}f''(\mu_X)\sigma^2_X ~~~~~~~~~~~~  \sigma^2_Y \approx \left(f'(\mu_X)\right)^2\sigma^2_X.
    \end{eqnarray*}

\item $X$ and $Y$ are random variables which can be viewed as either data or
statistics.  In the second case, if $\{X_i\}$ is a sequence of
statistics such that $X_i~\dot\sim~ N(\mu_X,\sigma^2_X)$ (and
approximation gets better as $i$ increases, with $\sigma_X\to 0$, eg when $X_i$ is a sample mean), then
    $$Y_i=f(X_i)~\dot\sim~ N\left(f(\mu_X),\left(f'(\mu_X)\right)^2\sigma_X^2\right)$$
(as long as $f'$ is continuous).  Thus
    $$\sigma^2_{Y_i}\to \left(f'(\mu_X)\right)^2\sigma_X^2$$
where convergence is in probability.
The technical formulation is that if $\sqrt{n}(X_i-\mu_X) \to N(0,\sigma^2)$, then
    $$\sqrt{n}\left(Y_i-f(\mu_X)\right)=\sqrt{n}\left(f(X_i)-f(\mu_X)\right)~\to~ N\left(0,\left(f'(\mu_X)\right)^2\sigma^2\right)$$
(as long as $f'$ is continuous), with $\sigma^2_X=\frac{\sigma^2}{n}$.  

\item The case where either $X$ or $Y$ is multivariate is easily
dealt with using a multivariate Taylor series, so that
the covariance matrix $\Sigma_Y\approx J^T\Sigma_X J$ where $J$ is a Jacobian evaluated at $\mu_X$, $J_{ij}=\frac{\partial f_j}{\partial X_i}(\mu_X)$. When the statistics $X_i$ converge to a multivariate normal distribution, then
    $$Y_i=f(X_i)~\dot\sim~ N\left(f(\mu_X),J^T\Sigma_X J\right)$$
where the approximation gets better as $i$ increases (see Casella and Berger p.329).

\end{itemize}


\item \textbf{Box-Cox transform $Y=X^\lambda$ with $\lambda \ne 0$}

\begin{itemize}

\item By the Delta Method, $\mu_Y^{\frac{1}{\lambda}}\approx
    \mu_X\left(1+\frac{\lambda(\lambda-1)}{2}\frac{\sigma_X^2}{\mu_X^2}\right)^{\frac{1}{\lambda}}.$

\item \fbox{$\overline y^{\frac{1}{\lambda}}\approx
\mu_X$} if $\frac{s^2}{\overline
x^2}\approx\frac{\sigma_X^2}{\mu_X^2}$ is ``small"


\item If $Y=X^\lambda$ is monotone (you need to check this), then the back-transformed interval

     \hspace{1in}\fbox{$\left(\left(\overline
y~-~t_{_{1-\frac{\alpha}{2},~n-1}}\ds
\frac{s_y}{\sqrt{n}}\right)^{\frac{1}{\lambda}}, \left(\overline
y~+~t_{_{1-\frac{\alpha}{2},~n-1}}\ds
\frac{s_y}{\sqrt{n}}\right)^{\frac{1}{\lambda}}\right)$}

\begin{itemize}

\item is a $100(1-\frac{\alpha}{2})$ C.I. for
$\left(\mu_Y\right)^{\frac{1}{\lambda}}$.

\item is an approximate $100(1-\frac{\alpha}{2})$ C.I. for $\mu_X$
when $\frac{s^2}{\overline x^2}\approx\frac{\sigma_X^2}{\mu_X^2}$ is
``small"





\end{itemize}


\item When $\lambda<0$, you'll need to swap the endpoints of the C.I.


\end{itemize}

\item \textbf{The Box-Cox transform $Y = \ln(X)$}


\begin{itemize}

\item By the Delta Method, $\exp(\mu_Y)\approx
    \mu_X\left(\exp\left(-\frac{\sigma_X^2}{2\mu_X^2}\right)\right).$

\item \fbox{$\exp(\overline y)\approx \mu_X$} if $\frac{s^2}{\overline
x^2}\approx\frac{\sigma_X^2}{\mu_X^2}$ is ``small"

\item \fbox{$\exp(\overline y)=\left(\Pi_ix_i\right)^{\frac{1}{n}}$} is the geometric mean  of the untransformed data %$\{x_i\}$ $\mu_Y\approx 1/n \sum_i \ln X_i=\ln\left(\Pi_iX_i\right)^{\frac{1}{n}}$



\item Since the log transform is strictly monotone, then the back-transformed interval


\hspace{1in}\fbox{$\left(\exp\left(\overline
y~-~t_{_{1-\frac{\alpha}{2},~n-1}}\ds
\frac{s_y}{\sqrt{n}}\right), \exp\left(\overline
y~+~t_{_{1-\frac{\alpha}{2},~n-1}}\ds
\frac{s_y}{\sqrt{n}}\right)\right)$.}


\begin{itemize}

\item is a $100(1-\frac{\alpha}{2})$ C.I. for $\exp\left(\mu_Y\right)$, the population
geometric mean of $X$.

\item is an approximate $100(1-\frac{\alpha}{2})$ C.I. for $\mu_X$
when $\frac{s^2}{\overline x^2}\approx\frac{\sigma_X^2}{\mu_X^2}$ is
``small"





\end{itemize}


\end{itemize}


\end{enumerate}


\subsection*{Exercises}

\noindent 9.1 on p366: 1-7 odd

\noindent 9.2 on p379 (C.I. for $\pi$): 11-17 odd, 21-25 odd, 27
(sample size calculation).

\noindent 9.3 on p391 (C.I. for $\mu$): 29-35 odd, 39-45 odd, 47
(sample size calculation)


\subsection*{Reading} Sections 9.1-9.3
\end{document}

