## ### Lecture 18: ### GLMMs and GEEs with Binary Data ## ##### Load Required Libraries ##### library(lattice) library(lme4) library(geepack) ## Data on attitudes about abortion from Agresti (2013) ## Subjects were asked whether they supported legalizing ## abortion in each of three situations: ## 1. If the family has a very low income and cannot afford any more children. ## 2. When the woman is not married and does not want to marry the man. ## 3. When the woman wants it for any reason. ## Gender was also measured. ## Note: Since model implies nonnegative association among responses, ## situations and scales should correspond accordingly. ## For example, with scale (yes, no), it would not make sense to ask ## "Should abortion be legal...?" and " Should abortion be illegal...?" ## as two different situations. ## Read in data: attitudes = read.table("http://www.math.montana.edu/shancock/courses/stat539/data/abortion.txt", header=TRUE) # Variables: # gender = 1 if female; 0 if male # response = 1 if support; 0 if do not support # question = situation (1, 2, or 3) # case = subject id # Code situation as factor: attitudes$question = factor(attitudes$question) # Make situation 3 the reference situation: attitudes$question = relevel(attitudes$question, ref="3") # Note that we can leave gender and response as numeric # since they are already coded as 0 or 1. # Plot of proportion of support in each covariate pattern: means = tapply(attitudes$response, list(attitudes$question,attitudes$gender), mean) plot(c(3,1,2),means[1:3,1],type="p",col="red",pch=16, xlab="Situation",ylab="Proportion Support", main="Proportion Support by Situation and Gender", cex=2,ylim=c(0.45,.52)) points(c(3,1,2),means[1:3,2],col="darkblue",cex=2) legend("bottomleft", c("Male","Female"), col=c("red","darkblue"), pch=c(16,1)) # OR barplot: barplot(t(means), beside=TRUE, col=c("Red","Blue"), ylim=c(0,0.6), legend = c("Male","Female"), xlab="Situation",ylab="Proportion Support", main="Proportion Support by Situation and Gender") # Contingency table summary of data by situation: my.tab = xtabs(~response+gender+question, data=attitudes) my.tab # Visualize three-way contingency table: library(vcd) pairs(my.tab, diag_panel = pairs_barplot, labeling=TRUE) # Binary data on three occasions --> # only 8 possible values of response vector. # Convert data to wide form: att.wide = reshape(attitudes, v.names="response", timevar="question", idvar="case", direction="wide") dim(att.wide) # 1850 subjects questioned # How many supported legalizing abortion in all three situations, # i.e., responded (1,1,1)? # Create sequence or responses variable: # (this code takes a few seconds... for loops are slow) resp.seq = NULL for(i in 1:1850){ x = att.wide[i,3:5] if(x[1]==1 & x[2]==1 & x[3]==1){resp.seq[i]="111"} else if(x[1]==1 & x[2]==1 & x[3]==0){resp.seq[i]="110"} else if(x[1]==1 & x[2]==0 & x[3]==1){resp.seq[i]="101"} else if(x[1]==0 & x[2]==1 & x[3]==1){resp.seq[i]="011"} else if(x[1]==1 & x[2]==0 & x[3]==0){resp.seq[i]="100"} else if(x[1]==0 & x[2]==1 & x[3]==0){resp.seq[i]="010"} else if(x[1]==0 & x[2]==0 & x[3]==1){resp.seq[i]="001"} else if(x[1]==0 & x[2]==0 & x[3]==0){resp.seq[i]="000"} } att.wide$resp.seq = resp.seq xtabs(~gender+resp.seq, data=att.wide) # Note that the majority of the sample was either 000 or 111. # --> high association between repeated measurements. # Sample correlations among repeated measurements: cor(att.wide[,3:5]) ### Fit GLMM by ML ### # No interaction between gender and question --> # gender effect assumed to be identical for each situation. mod1 = glmer(response ~ gender+question+(1|case), family=binomial, data=attitudes, nAGQ = 25) # Larger value of "nAGQ" = greater accuracy in approximation # --> longer computational time # Can only go up to 25. # Default is 1 (Laplace approximation) - gives very different results! summary(mod1) summary(update(mod1, nAGQ = 1)) mod2 = glmer(response ~ gender*question+(1|case), family=binomial, data=attitudes, nAGQ = 25) # Test for interaction effect between gender and question: anova(mod2) anova(mod1,mod2) # EBLUPS same for all individuals with same response sequence # and same gender, e.g., ranef(mod1)$case[resp.seq=="000",1] att.wide$gender[resp.seq=="000"] # Note different answers if we use a different R library: library(glmmML) # May need to install package # glmmML function only allows for a random intercept mod1b = glmmML(response ~ gender+question, cluster=case, family=binomial, method="ghq", n.points=70, start.sigma=9, data=attitudes) summary(mod1b) ### Fit Marginal Model by GEE ## # Remember to sort data by case and then by situation: att.sort = attitudes[order(attitudes$case,attitudes$question),] mod2 = geeglm(response ~ gender+question, family=binomial, corstr="exchangeable", id=case, data=attitudes) mod2.int = geeglm(response ~ gender*question, family=binomial, corstr="exchangeable", id=case, data=attitudes) anova(mod2,mod2.int) summary(mod2)