## #### Lecture 9: Nodal involvement in prostate cancer ## ## Example in Gillen's slides on diagnostics. source("http://www.math.montana.edu/shancock/courses/stat539/r/GillenRFunctions.R") ## ##### Read in prostate cancer data ## prostate <- read.table( "http://www.math.montana.edu/shancock/courses/stat539/data/ProstateCancer.txt", header=TRUE ) prostate[1:10,] ## ##### Sort dataset according to acid (for plotting) ## prostate <- prostate[ order( prostate$acid ), ] fit <- glm( y ~ acid, data=prostate, family=binomial ) glmCI( fit ) muhat <- fitted( fit ) ## ##### Smooth observed versus fitted probabilities ## plot( prostate$acid, prostate$y, xlab="Serum acid phosphatase", ylab="Spread" ) sfit <- loess( y ~ acid, data=prostate ) lines( sfit$x, fitted( sfit ), col="red", lwd=2 ) ## Nonparametric smoother lines( prostate$acid, muhat, col="blue", lwd=2 ) ## Model fit ## ##### Remove outlier ## plot( prostate$acid[ prostate$acid<1.5 ], prostate$y[ prostate$acid<1.5 ], xlab="Serum acid phosphatase", ylab="Spread" ) sfit <- loess( y ~ acid, data=prostate, subset=acid<1.5 ) lines( sfit$x, fitted( sfit ), col="red", lwd=2 ) lines( prostate$acid, muhat, col="blue", lwd=2 ) ## ##### Try the log transform of acid ## prostate$logacid <- log( prostate$acid ) fit <- glm( y ~ logacid, data=prostate, family=binomial ) glmCI(fit) muhat <- fitted( fit ) plot( prostate$logacid[ prostate$acid<1.5 ], prostate$y[ prostate$acid<1.5 ], xlab="Serum acid phosphatase", ylab="Spread" ) sfit <- loess( y ~ logacid, data=prostate, subset=acid<1.5 ) lines( sfit$x, fitted( sfit ), col="red", lwd=2 ) lines( prostate$logacid, muhat, col="blue", lwd=2 ) ## ##### Check functional form via partial residuals ## fit <- glm( y ~ x.ray + acid + size*grade, data=prostate, family=binomial ) summary( fit ) glmCI( fit ) muhat <- fitted( fit ) pracid <- (prostate$y - muhat) / (muhat*(1-muhat)) + fit$coef[3]*prostate$acid plot( prostate$acid, pracid, xlab="Acid", ylab="Partial Residual" ) sfit <- loess( pracid ~ prostate$acid ) lines( sfit$x, fitted( sfit ), col="red", lwd=2 ) lines( prostate$acid, fitted( lm( pracid ~ prostate$acid ) ), col="blue", lwd=2 ) ## ##### Check functional form via partial residuals removing outliers ## plot( prostate$acid[ pracid < 20 ], pracid[ pracid < 20 ], xlab="Acid", ylab="Partial Residual", ylim=c(-1,5.5) ) sfit <- loess( pracid ~ prostate$acid, subset=pracid < 20 ) lines( sfit$x, fitted( sfit ), col="red", lwd=2 ) lines( prostate$acid[ pracid < 20 ], fitted( lm( pracid ~ prostate$acid, subset=pracid < 20 ) ), col="blue", lwd=2 ) ## ##### Check functional form via partial residuals with log-transform of acid ## fit <- glm( y ~ x.ray + logacid + size*grade, data=prostate, family=binomial ) summary( fit ) muhat <- fitted( fit ) pracid <- (prostate$y - muhat) / (muhat*(1-muhat)) + fit$coef[3]*prostate$logacid plot( prostate$logacid, pracid, xlab="log(Acid)", ylab="Partial Residual" ) sfit <- loess( pracid ~ prostate$logacid ) lines( sfit$x, fitted( sfit ), col="red", lwd=2 ) lines( prostate$logacid, fitted( lm( pracid ~ prostate$logacid ) ), col="blue", lwd=2 ) ## ##### Check functional form via partial residuals with log (removing outlier) ## plot( prostate$logacid[ pracid < 20 ], pracid[ pracid < 20 ], xlab="log(Acid)", ylab="Partial Residual", ylim=c(-4,2.5) ) sfit <- loess( pracid ~ prostate$logacid, subset=pracid < 20 ) lines( sfit$x, fitted( sfit ), col="red", lwd=2 ) lines( prostate$logacid[ pracid < 20 ], fitted( lm( pracid ~ prostate$logacid, subset=pracid < 20 ) ), col="blue", lwd=2 ) ## ##### Plot of deviance residuals (not much help here) ## dresid <- residuals( fit, type="deviance" ) plot( muhat, dresid, xlab="Fitted probability of spreading", ylab="Deviance residual" ) ## ##### Plot of leverage ## plot( prostate$patient, hatvalues(fit) ) abline( h=3*mean(hatvalues(fit) ), col="blue", lwd=2 ) text( prostate$patient-2, hatvalues(fit), prostate$patient ) summary( prostate ) prostate[ prostate$patient==24, ] par( mfrow=c(3,2) ) for( i in 1:6 ){ plot( prostate$patient, dfbeta(fit)[,i], xlab="Patient ID", ylab="Delta Beta", main=dimnames(dfbeta(fit))[[2]][i] ) text( prostate$patient-2, dfbeta(fit)[,i], prostate$patient ) } par( mfrow=c(1,1) ) i <- 3 plot( prostate$patient, dfbeta(fit)[,i], xlab="Patient ID", ylab="Delta Beta", main=dimnames(dfbeta(fit))[[2]][i] ) text( prostate$patient-2, dfbeta(fit)[,i], prostate$patient ) ## ##### Plot of delta betas simultaneously using my function ## plot.dfbeta( dfbeta(fit), labels=prostate$patient ) ## ##### Plot of Cook's distance vs. leverage ## par(mfrow=c(1,1)) plot( hatvalues(fit), cooks.distance(fit), xlab="Leverage", ylab="Cooks Distance", ylim=c(0,.3) ) abline( h=qf(.05,length(fit$coef), fit$df.residual), col="blue", lwd=2 ) abline( v=2*mean(hatvalues(fit) ), col="red", lwd=2 ) text( hatvalues(fit)-.02, cooks.distance(fit), prostate$patient ) prostate[ prostate$patient==26, ] ### ### H-L Goodness of Fit ### binary.gof(fit) ### ### Check Predictive Power ### # Fitted probabilities: head(muhat) # Proportion in sample with y=1 mean(prostate$y) # Classification table with pi0 = 0.4 preds <- as.numeric(muhat > 0.4) table(prostate$y, preds) # Sensitivity = P(y-hat = 1 | y = 1) 16/(16+4) # Specificity = P(y-hat = 0 | y = 0) 29/(29+4) # ROC Curve library(ROCR) # May need to install package pred.obj = prediction(muhat,prostate$y) # Plot ROC curve plot(performance(pred.obj,"tpr","fpr")) # Area under the ROC curve performance(pred.obj,"auc")