> dat = read.table("http://www.math.montana.edu/shancock/courses/stat539/data/mental-impairment.txt",header=T) > head(dat) Y SES Life 1 1 1 1 2 1 1 9 3 1 1 4 4 1 1 3 5 1 0 2 6 1 1 0 > summary(dat) Y SES Life Min. :1.000 Min. :0.00 Min. :0.000 1st Qu.:1.000 1st Qu.:0.00 1st Qu.:2.000 Median :2.000 Median :1.00 Median :4.000 Mean :2.325 Mean :0.55 Mean :4.275 3rd Qu.:3.000 3rd Qu.:1.00 3rd Qu.:6.250 Max. :4.000 Max. :1.00 Max. :9.000 > table(dat$Y) 1 2 3 4 12 12 7 9 > barchart(Y ~ SES) Error: could not find function "barchart" > barplot(Y ~ SES) Error in barplot.default(Y ~ SES) : 'height' must be a vector or a matrix > library(ggplot2) Need help getting started? Try the cookbook for R: http://www.cookbook-r.com/Graphs/ > ggplot(dat, aes(y = Y, x = SES)) + geom_barchart(position="fill") Error: could not find function "geom_barchart" > ggplot(dat, aes(y = Y, x = SES)) + geom_barplot(position="fill") Error: could not find function "geom_barplot" > ggplot(dat, aes(y = Y, x = SES)) + geom_bar(position="fill") Error: stat_count() must not be used with a y aesthetic. > ggplot(dat, aes(x = Y, group = SES)) + geom_bar(position="fill") > ggplot(dat, aes(x = Y, group = SES, colour= Y)) + geom_bar(position="fill") > ggplot(dat, aes(x = Y, group = SES, fill= Y)) + geom_bar(position="fill") > ggplot(dat, aes(x = Y, group = SES, fill= Y)) + geom_bar() > ggplot(dat, aes(x = Y, group = SES, fill= SES)) + geom_bar() > table(Y, SES) Error in table(Y, SES) : object 'Y' not found > attach(dat) > table(Y, SES) SES Y 0 1 1 4 8 2 4 8 3 5 2 4 5 4 > prop.table(dat) Y SES Life 1 0.003496503 0.003496503 0.003496503 2 0.003496503 0.003496503 0.031468531 3 0.003496503 0.003496503 0.013986014 4 0.003496503 0.003496503 0.010489510 5 0.003496503 0.000000000 0.006993007 6 0.003496503 0.003496503 0.000000000 7 0.003496503 0.000000000 0.003496503 8 0.003496503 0.003496503 0.010489510 9 0.003496503 0.003496503 0.010489510 10 0.003496503 0.003496503 0.024475524 11 0.003496503 0.000000000 0.003496503 12 0.003496503 0.000000000 0.006993007 13 0.006993007 0.003496503 0.017482517 14 0.006993007 0.000000000 0.020979021 15 0.006993007 0.003496503 0.010489510 16 0.006993007 0.000000000 0.003496503 17 0.006993007 0.003496503 0.027972028 18 0.006993007 0.003496503 0.006993007 19 0.006993007 0.000000000 0.017482517 20 0.006993007 0.003496503 0.017482517 21 0.006993007 0.003496503 0.031468531 22 0.006993007 0.000000000 0.010489510 23 0.006993007 0.003496503 0.010489510 24 0.006993007 0.003496503 0.003496503 25 0.010489510 0.000000000 0.000000000 26 0.010489510 0.003496503 0.013986014 27 0.010489510 0.000000000 0.010489510 28 0.010489510 0.000000000 0.031468531 29 0.010489510 0.003496503 0.020979021 30 0.010489510 0.000000000 0.013986014 31 0.010489510 0.000000000 0.010489510 32 0.013986014 0.003496503 0.027972028 33 0.013986014 0.003496503 0.006993007 34 0.013986014 0.003496503 0.024475524 35 0.013986014 0.000000000 0.017482517 36 0.013986014 0.000000000 0.013986014 37 0.013986014 0.000000000 0.013986014 38 0.013986014 0.003496503 0.027972028 39 0.013986014 0.000000000 0.027972028 40 0.013986014 0.000000000 0.031468531 > prop.table(Y ~ SES) Error in sum(x) : invalid 'type' (language) of argument > prop.table(SES, Y) Error in margin.table(x, margin) : 'x' is not an array > prop.table(table(Y, SES)) SES Y 0 1 1 0.100 0.200 2 0.100 0.200 3 0.125 0.050 4 0.125 0.100 > table(Y, SES) SES Y 0 1 1 4 8 2 4 8 3 5 2 4 5 4 > 4/(8+10) [1] 0.2222222 > prop.table(table(SES, Y)) Y SES 1 2 3 4 0 0.100 0.100 0.125 0.125 1 0.200 0.200 0.050 0.100 > rowsums(table(SES,Y)) Error: could not find function "rowsums" > ??rowsums > colSums(table(Y,SES)) 0 1 18 22 > table(Y,SES)/colSums(table(Y,SES)) SES Y 0 1 1 0.2222222 0.4444444 2 0.1818182 0.3636364 3 0.2777778 0.1111111 4 0.2272727 0.1818182 > barplot(props) Error in barplot(props) : object 'props' not found > props <- table(Y,SES)/colSums(table(Y,SES)) > barplot(props) > ?barplot starting httpd help server ... done > barplot(t(props)) > props SES Y 0 1 1 0.2222222 0.4444444 2 0.1818182 0.3636364 3 0.2777778 0.1111111 4 0.2272727 0.1818182 > ?rowSums > props <- table(SES,Y)/rowSums(table(SES,Y)) > props Y SES 1 2 3 4 0 0.22222222 0.22222222 0.27777778 0.27777778 1 0.36363636 0.36363636 0.09090909 0.18181818 > props <- table(SES,Y)/colSums(table(SES,Y)) > props Y SES 1 2 3 4 0 0.3333333 0.5714286 0.4166667 0.7142857 1 0.6666667 0.8888889 0.1666667 0.4444444 > table(SES,Y) Y SES 1 2 3 4 0 4 4 5 5 1 8 8 2 4 > colSums(table(SES,Y)) 1 2 3 4 12 12 7 9 > props <- table(SES,Y)/rep(colSums(table(SES,Y)), each=2) > props Y SES 1 2 3 4 0 0.3333333 0.3333333 0.7142857 0.5555556 1 0.6666667 0.6666667 0.2857143 0.4444444 > barplot(t(props)) > barplot(props) > barplot(props, xlab="Mental Impairment", ylab="Proportion in SES") > mosaicplot(Y ~ SES) > mosaicplot(SES ~ Y) > table(Life) Life 0 1 2 3 4 5 6 7 8 9 2 5 4 8 5 4 2 2 4 4 > plot(Y ~ Life) > plot(jitter(Y) ~ Life) > plot(Y ~ jitter(Life)) > plot(Y ~ jitter(Life, factor=.5)) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score") > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=SES+1) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=SES+2) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=SES+3) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=SES+3, cex=1.3) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=c("tomato","purple")[SES], cex=1.3) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=c("tomato","purple")[SES==1], cex=1.3) > SES [1] 1 1 1 1 0 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 0 1 1 0 1 0 0 1 [30] 0 0 1 1 1 0 0 0 1 0 0 > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=c("tomato","purple")[SES+1], cex=1.3) > legend(locator(1), c("High","Low"), col=c("purple","tomato"), pch=c(2,1)) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=c("tomato","purple")[SES+1], cex=1.3) > legend(locator(1), c("High","Low"), col=c("purple","tomato"), pch=c(2,1)) > plot(Y ~ jitter(Life, factor=.5), xlab="Life Index", ylab="Mental Impairment Score", pch=SES+1, col=c("tomato","purple")[SES+1], cex=1.3) > legend("topleft", c("High","Low"), col=c("purple","tomato"), pch=c(2,1)) > prop.table(table(Y, Life)) Life Y 0 1 2 3 4 5 6 7 8 1 0.025 0.075 0.050 0.075 0.025 0.000 0.000 0.025 0.000 2 0.000 0.050 0.025 0.075 0.000 0.075 0.025 0.000 0.025 3 0.025 0.000 0.000 0.050 0.050 0.000 0.025 0.000 0.000 4 0.000 0.000 0.025 0.000 0.050 0.025 0.000 0.025 0.075 Life Y 9 1 0.025 2 0.025 3 0.025 4 0.025 > prop.table(table(Life, Y)) Y Life 1 2 3 4 0 0.025 0.000 0.025 0.000 1 0.075 0.050 0.000 0.000 2 0.050 0.025 0.000 0.025 3 0.075 0.075 0.050 0.000 4 0.025 0.000 0.050 0.050 5 0.000 0.075 0.000 0.025 6 0.000 0.025 0.025 0.000 7 0.025 0.000 0.000 0.025 8 0.000 0.025 0.000 0.075 9 0.025 0.025 0.025 0.025 > table(Life, Y)/rep(rowSums(Life,Y), each=4) # Not helpful Error in rowSums(Life, Y) : 'x' must be an array of at least two dimensions > rowSums(Life,Y) Error in rowSums(Life, Y) : 'x' must be an array of at least two dimensions > table(Life, Y)/rep(rowSums(table(Life,Y), each=4)) # Not helpful Error in rowSums(table(Life, Y), each = 4) : unused argument (each = 4) > table(Life, Y)/rep(rowSums(table(Life,Y)), each=4) # Not helpful Y Life 1 2 3 4 0 0.500 0.000 0.250 0.000 1 1.500 0.500 0.000 0.000 2 1.000 0.125 0.000 0.250 3 1.500 0.375 0.500 0.000 4 0.200 0.000 1.000 0.500 5 0.000 0.375 0.000 0.250 6 0.000 0.200 0.500 0.000 7 0.200 0.000 0.000 0.250 8 0.000 0.200 0.000 0.750 9 0.250 0.200 0.500 0.250 > tapply(Y, Life, median) 0 1 2 3 4 5 6 7 8 9 2.0 1.0 1.5 2.0 3.0 2.0 2.5 2.5 4.0 2.5 > dim(dat) [1] 40 3 > dat$Y = as.ordered(dat$Y) # Change class to ordered factor > dat$Y [1] 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 [30] 3 3 4 4 4 4 4 4 4 4 4 Levels: 1 < 2 < 3 < 4 > levels(dat$Y) = c("well", "mild", "mod", "imp") > levels(dat$SES) = c("low", "high") > head(dat) Y SES Life 1 well 1 1 2 well 1 9 3 well 1 4 4 well 1 3 5 well 0 2 6 well 1 0 > levels(dat$SES) = c("low", "high") > head(dat) Y SES Life 1 well 1 1 2 well 1 9 3 well 1 4 4 well 1 3 5 well 0 2 6 well 1 0 > dat$SES [1] 1 1 1 1 0 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 0 1 1 0 1 0 0 1 [30] 0 0 1 1 1 0 0 0 1 0 0 attr(,"levels") [1] "low" "high" > detach(dat) > dat = read.table("http://www.math.montana.edu/shancock/courses/stat539/data/mental-impairment.txt",header=T) > dat$Y = as.ordered(dat$Y) # Change class to ordered factor > levels(dat$Y) = c("well", "mild", "mod", "imp") > dat$SES = factor(dat$SES, levels=c(0,1), labels=c("low", "high")) > head(dat) Y SES Life 1 well high 1 2 well high 9 3 well high 4 4 well high 3 5 well low 2 6 well high 0 > ?polr No documentation for ‘polr’ in specified packages and libraries: you could try ‘??polr’ > library(VGAM) # vglm Loading required package: stats4 Loading required package: splines > library(MASS) # polr, housing > ?polr > fit1 <- polr(Y ~ Life, data = dat) > summary(fit1) Re-fitting to get Hessian Call: polr(formula = Y ~ Life, data = dat) Coefficients: Value Std. Error t value Life 0.2879 0.1175 2.451 Intercepts: Value Std. Error t value well|mild 0.2614 0.5639 0.4636 mild|mod 1.6563 0.6171 2.6838 mod|imp 2.5876 0.6938 3.7296 Residual Deviance: 102.5271 AIC: 110.5271 > exp(1.6563) [1] 5.239887 > exp(.2879) [1] 1.333624 > exp(-.2879) [1] 0.7498366 > # Beta estimate: > b=-fit1$coef > # Alpha estimates: > a=fit1$zeta > b=-fit1$coef > b Life -0.28793 > a=fit1$zeta > a well|mild mild|mod mod|imp 0.2614334 1.6562752 2.5876265 > cols = c("darkgreen","darkblue","purple","red") > curve(1/(1+exp(-(a[1]+b*x))),0,10,ylim=c(0,1),col=cols[1],lty=2, + main = "Cumulative Probabilities",xlab="Life Events",ylab="P(Y<=j)") > curve(1/(1+exp(-(a[2]+b*x))),add=T,col=cols[2],lty=3) > curve(1/(1+exp(-(a[3]+b*x))),add=T,col=cols[3],lty=4) > legend(7,.9,c("Well","Mild","Mod"),col=cols[1:3],lty=c(2:4)) > curve(1/(1+exp(-(a[1]+b*x))),0,10,ylim=c(0,1),col=cols[1],lty=2, + main="Category Probabilities",xlab="Life Events",ylab="P(Y=j)") > curve(1/(1+exp(-(a[2]+b*x)))-1/(1+exp(-(a[1]+b*x))),add=T,col=cols[2],lty=3) > curve(1/(1+exp(-(a[3]+b*x)))-1/(1+exp(-(a[2]+b*x))),add=T,col=cols[3],lty=4) > curve(1-1/(1+exp(-(a[3]+b*x))),add=T,col=cols[4],lty=5) > legend(7,.9,c("Well","Mild","Mod","Impaired"),col=cols,lty=c(2:5)) > fit2 = polr(Y ~ SES + Life, data = dat) > summary(fit2) Re-fitting to get Hessian Call: polr(formula = Y ~ SES + Life, data = dat) Coefficients: Value Std. Error t value SEShigh -1.1112 0.6109 -1.819 Life 0.3189 0.1210 2.635 Intercepts: Value Std. Error t value well|mild -0.2819 0.6423 -0.4389 mild|mod 1.2128 0.6607 1.8357 mod|imp 2.2094 0.7210 3.0644 Residual Deviance: 99.0979 AIC: 109.0979 > # Beta estimates: > b2=-fit2$coef > # Alpha estimates: > a2=fit2$zeta > > # Plot cumulative probabilities: > cols = c("darkgreen","darkblue","purple","red") > # SES = low > curve(1/(1+exp(-(a2[1]+b2[2]*x))),0,10,ylim=c(0,1),col=cols[1],lty=1, + main = "Cumulative Probabilities",xlab="Life Events",ylab="P(Y<=j)") > curve(1/(1+exp(-(a2[2]+b2[2]*x))),add=T,col=cols[2],lty=1) > curve(1/(1+exp(-(a2[3]+b2[2]*x))),add=T,col=cols[3],lty=1) > # SES = high > curve(1/(1+exp(-(a2[1]+b2[1]+b2[2]*x))),add=T,col=cols[1],lty=2) > curve(1/(1+exp(-(a2[2]+b2[1]+b2[2]*x))),add=T,col=cols[2],lty=2) > curve(1/(1+exp(-(a2[3]+b2[1]+b2[2]*x))),add=T,col=cols[3],lty=2) > > legend(8,.95,c("Well","Mild","Mod"),fill=cols[1:3]) > legend(0,.1,c("High SES","Low SES"),lty=c(2,1)) > anova(fit1, fit2) Likelihood ratio tests of ordinal regression models Response: Y Model Resid. df Resid. Dev Test Df LR stat. 1 Life 36 102.5271 2 SES + Life 35 99.0979 1 vs 2 1 3.42918 Pr(Chi) 1 2 0.06405392 > anova(fit2, update(fit2, .~.+SES:Life)) Likelihood ratio tests of ordinal regression models Response: Y Model Resid. df Resid. Dev Test Df 1 SES + Life 35 99.09790 2 SES + Life + SES:Life 34 98.50444 1 vs 2 1 LR stat. Pr(Chi) 1 2 0.5934586 0.4410848 >