\documentclass[12pt,titlepage]{article}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{verbatim}
\allowdisplaybreaks

\jot=.2in \pagestyle{empty} \setlength{\topmargin}{-0.5in}
\setlength{\textheight}{9.5in} \setlength{\oddsidemargin}{-0.1in}
\setlength{\evensidemargin}{-0.1in} \setlength{\textwidth}{6.7in}
\font\heada=cmbx10 scaled\magstep3 \font\headb=cmsl10
scaled\magstep1 \font\headc=cmr8 \pretolerance=10000
\setlength{\parindent}{2 em}

\begin{document}
\noindent {\heada Chapter 5 - Bivariate Numerical Data}

\subsection*{5.1 Correlation}

\begin{itemize}
\item \underline{Bivariate} means that there are two variables to be studied.
One variable, called the {\em explanatory variable} $X$, is used to
describe the other variable, the {\em response} $Y$.  So a sample of
bivariate data consists of ordered pairs $(x_1,y_1)$, $(x_2,y_2)$,
..., $(x_n,y_n)$.

\item \underline{Multivariate} means that there are more than two variables to
be studied.  One or more explanatory variables can be used to
describe one or more response variables.

\item \underline{Numerical} data indicates that the sample space of
 each variable being measured is a subset of the real numbers.
\end{itemize}


\noindent \underline{\bf Scatterplot} - a graphical display of the
relationship between two numerical variables.   The explanatory
variable is along the horizontal axis.   The response variable is
along the vertical axis.

\vspace{0.2in}


\noindent\underline{EXAMPLE}:  From $n=5$ skeletons from a
particular dinosaur, femur measurements and humerus measurements (in
inches) are taken.   Create a scatterplot of the data.

\bigskip

\begin{tabular}{c|c}
  % after \\: \hline or \cline{col1-col2} \cline{col3-col4} ...
$X$=femur & $Y$=humerus\\\hline
  38 & 41 \\
  50 & 63 \\
  59 & 70 \\
  64 & 72 \\
  74 & 84 \\
\end{tabular}

\bigskip

\bigskip


\noindent \underline{\bf How to Describe a Relationship}:
\begin{enumerate}
\item {\bf Form} - linear, non-linear (curved), clustered, etc.
\item {\bf Direction} positive or negative.  A positive relationship indicates that increasing values of one variable are associated with
increasing values of the other variable.  A negative relationship
indicates that increasing values of one variable are associated with
decreasing values of the other variable.
\item {\bf Strength} - strong, moderate, or weak
\end{enumerate}
\vspace{0.1in}

\newpage
\noindent \underline{\bf Sample Correlation Coefficient} - A
statistic which estimates the {\it direction} and {\it strength} of
a linear relationship between two variables.



$$r = \frac{1}{n-1}\sum_{i=1}^n \left(\frac{x_i - \bar x}{s_x}\right) \left(\frac{y_i - \bar y}{s_y}\right)$$

\begin{itemize}
\item $-1 \le r \le 1$

\vspace{-.1in}

\begin{itemize}
\item The closer $r$ is to $\pm 1$, the stronger the linear
relationship between $X$ and $Y$

\item The closer $r$ is to $0$, the weaker the relationship between
$X$ and $Y$.

\item If $r>0$, then the direction of the relationship between $X$
and $Y$ is positive.

\item If $r<0$, then the direction of the relationship between $X$
and $Y$ is negative.

\end{itemize}

\item $r$ is unitless and does not depend on the units of either numerical variable.
\item $r$ does not depend on which variable is considered $X$ and which is $Y$.

\item Strong correlation does not imply causation!



\end{itemize}

\vspace{0.1in}

\noindent \underline{\bf Population Correlation Coefficient}:
$\rho$, the Greek letter for ``r" pronounced ``row," is the true
correlation between $X$ and $Y$.   $r$ is an unbiased estimate of
$\rho$.

\bigskip

\noindent \underline{EXAMPLE}: Calculate the sample correlation
coefficient for the dinosaur bone example.  Use the statistics $\bar
x=57$, $s_x=13.71$, $\bar y=66$ and $s_y=15.89$.


\vspace{2in}


\newpage
\section*{R code}
\begin{verbatim}
> # Input the data
> femur=c(38,50,59,64,74)
> humerus=c(41,63,70,72,84)


> # Create a scatterplot
> plot(x=femur,y=humerus)


> # Calculate the correlation
> cor(femur,humerus)
[1] 0.9776198


> # Calculate the least-squares regression line
> dino.lm=lm(humerus ~ femur)


> # Add the least-squares regression line to the scatterplot
> abline(dino.lm)


\end{verbatim}




\section*{Exercises}
5.1 on p194: 1-11 odd, 15



\section*{Reading}
Sections 5.1


\end{document}

