# bloodk.r # # BLOODK(MAXITER,L,K,S) # Function to examine the Coagulation of Blood example on p 284 (sect # 9.5) and p385 (sect 11.6) of Gelman # et # al., which has joint posterior density: # theta,mu,log(sigma),log(tao)|y ~ prod(j=1:4)N(thetaj|mu,tao^2)* # tao*prod(j=1:4)prod(i=1:n_j)N(y_ij|thetaj,sigma^2) # using # Gibb's Sampler with more than one Gibb's sequence # # INPUTS: # MAXITER - # of iterations to run (for each Kth sequence) - (I like 20-100) # L - Number of initial samples to make for importance resampling (IR) - # set to 1 if don't want IR (I like 1000-2000) (sect 10.5) # K - Number of Gibb's sampler sequences (I like 2-10) - if you want # to use K=1, use blood1.r # S - Make Inference from the Sth spot in the Gibb's sequence to make # inference about the parameters (I like .5 - .7). For example, S=0 # considers the whole seq, S=.75 considers the last 1/4 of the seq # and S=1 only considers the last iterate of the seq # bloodk<-function(maxiter,L,K,S) { # load needed function: getR source("getR.r") # FOR DEBUGGING FUNCTION - Define algorithm parameters #maxiter<-1 #L<-1 # Number of initial samples to make for importance resampling #K<-5 # Number of Gibb's sampler saquences - must be >1 #S<-.7 # Make inference from the S_th spot in the Gibb's sequence to the end # define the data, y_ij a<-c(62,60,63,59,NA,NA,NA,NA) b<-c(63,67,71,64,65,66,NA,NA) c<-c(68,66,71,67,68,68,NA,NA) d<-c(56,62,60,61,63,64,63,59) m<-8 n<-4 lenvec<-c(length(a[(!is.na(a))]),length(b[(!is.na(b))]),length(c[(!is.na(c))]),length(d[(!is.na(d))])) lenvecmat<-matrix(lenvec,n,K) N<-sum(lenvec) # total samples across n experiments lenmat<-matrix(lenvec,m,n*K,,byrow=TRUE) x<-matrix(c(a,b,c,d),m,n) xmat<-matrix(x,m,n*K) # block matrix with data X: XMAT=[X|X|X|...|X] ymj<-apply(x,2,mean,na.rm=T) ymjmat<-matrix(ymj,n,K) # Get first estimates for thetaj, mu, sigma and tao if (L==1) { # 1. start with K crude estimates from p. 285 thetaj<-ymjmat thetam<-matrix(thetaj,m,n*K,byrow=TRUE) # THETAM=[THETA1|...|THETAn|THETA1|...|THETAn] sigmaj2<-apply((xmat-thetam)^2/(lenmat-1),2,sum,na.rm=T) sigma2<-apply(matrix(sigmaj2,n,K),2,mean) sigma<-sqrt(sigma2) mu<-apply(thetaj,2,mean) tao2<-apply(thetaj,2,var) tao<-sqrt(tao2) } if (L>1) { # 2. start with IMPORTANCE RESAMPLING of size K from L # draws from a t_4 distribution # t_4 is approx to p(mu,log(sigma),log(tao)) - see p 335 and table # 9.4, results from run of conditional maximization and EM # start<-matrix(0,L,3) lenstartmat<-matrix(lenvec,L,n,byrow=TRUE) ymjstartmat<-matrix(ymj,L,n,byrow=TRUE) mustart<-rt(L,4)*diff(c(65.29,62.73))/diff(qt(c(.25,.75),4))+64.05 # mu starts mustartmat<-matrix(mustart,L,n) sigmastart<-abs(rt(L,4)*diff(c(2.12,2.64))/diff(qt(c(.25,.75),4))+2.37) # sigma starts sigma2startmat<-matrix(sigmastart,L,n)^2 taostart<-abs(rt(L,4)*diff(c(2.62,4.65))/diff(qt(c(.25,.75),4))+3.43) # tao starts tao2startmat<-matrix(taostart,L,n)^2 # An L*n matrix, each row is THETA thetastart<-(1/tao2startmat*mustartmat + lenstartmat/sigma2startmat*ymjstartmat)/(1/tao2startmat +lenstartmat/sigma2startmat) Vstart<-1/(1/tao2startmat + lenstartmat/sigma2startmat) tempstart<-matrix(0,L,n) for (i in 1:L) { tempstart[i,1:n]<-apply(dnorm(x,matrix(thetastart[i,1:n],m,n,byrow=TRUE),sigmastart[i]),2,prod,na.rm=TRUE) } # from (9.19), p(mu,logsigma,logtao|y) from p 288, an LX1 matrix weight<-taostart*apply((dnorm(thetastart,mustartmat,sqrt(tao2startmat))*tempstart*sqrt(Vstart)),1,prod) #IMPORTANCE RESAMPLING startindex<-sample(1:L,K,replace=FALSE,prob=weight) thetaj<-ymjmat mu<-mustart[startindex] sigma<-sigmastart[startindex] sigma2<-sigma^2 tao<-taostart[startindex] tao2<-tao^2 } thetahat<-matrix(0,n,(maxiter+1)*K) thetahat[1:n,1:K]<-thetaj muhat<-matrix(0,K,maxiter+1) muhat[1:K,1]<-mu sigmahat<-matrix(0,K,maxiter+1) sigmahat[1:K,1]<-sigma taohat<-matrix(0,K,maxiter+1) taohat[1:K,1]<-tao for (i in 2:(maxiter+1)) { # Get theta from N(Mtheta,Vtheta) (9.11) mumat<- matrix(mu,n,K,byrow=TRUE) sigma2mat<-matrix(sigma2,n,K,byrow=TRUE) tao2mat<-matrix(tao2,n,K,byrow=TRUE) Mtheta<-(1/tao2mat*mumat + lenvecmat/sigma2mat*ymjmat)/(1/tao2mat + lenvecmat/sigma2mat) Vtheta<-1/(1/tao2mat + lenvecmat/sigma2mat) thetahat[1:n,(i*K-K+1):(i*K)]<-rnorm(n*K,Mtheta,sqrt(Vtheta)) thetaj<-thetahat[1:n,(i*K-K+1):(i*K)] thetam<-matrix(thetaj,m,n*K,byrow=TRUE) # Get mu from N(Mmu,Vmu) (9.14) Mmu<-1/n*apply(thetaj,2,sum) Vmu<-tao2/n mu<-rnorm(K,Mmu,sqrt(Vmu)) muhat[1:K,i]<-mu # Get sigma from scaled Inv-ci^2(df=Dfsigma,scale=Ssigma) (9.16) Dfsigma<-N Ssigma2<-1/N*apply(matrix(apply((xmat-thetam)^2,2,sum,na.rm=T),n,K),2,sum) # scaled Inv-chi^2=Df*scale^2/chisq sigma<-sqrt(Dfsigma*Ssigma2/rchisq(K,Dfsigma)) sigmahat[1:K,i]<-sigma sigma2<-sigma^2 # Get tao from scaled Inv-chi^2(df=Dftao,scale=Stao) (9.18) Dftao<-n-1 Stao2<-1/(n-1)*apply((thetaj-mumat)^2,2,sum) # scaled Inv-chi^2=scale^2/chisq tao<-sqrt(Dftao*Stao2/rchisq(K,Dftao)) taohat[1:K,i]<-tao tao2<-tao^2 } print("Using Gibb's Sampler to simulate p(theta,mu,sigma,tao|y)") # Show numerical results of the method if (K==1) { tot<-matrix(0,n+4,maxiter+1) tot[1:n,1:(maxiter+1)]<-thetahat tot[n+1,1:(maxiter+1)]<-muhat tot[n+2,1:(maxiter+1)]<-sigmahat tot[n+3,1:(maxiter+1)]<-taohat tot[n+4,1:(maxiter+1)]<-log(jp) rownames(tot)<-c("theta1","theta2","theta3","theta4","mu","sigma","tao","log(jp)") #dimnames(tot)<-list(c("theta1","theta2","theta3","theta4","mu","sigma","tao","log(jp)"),c("crude","1st","2nd","3rd","4th")) print(tot) } print("Posterior Inference for individual parameters") probs<-c(.025,.25,.5,.75,.975) suminf<-matrix(0,n+4,6) Sindex<-ceiling((maxiter+1)*S) if (Sindex==0) Sindex<-1 seqlen<-maxiter+2-Sindex #stlentheta<-(maxiter+1-Sindex)*K+1 stlentheta<-(Sindex-1)*K+1 seqlentheta<-K*(seqlen) psitheta<-matrix(0,n,seqlentheta) for (i in 1:n) { psitheta[i,1:seqlentheta]<-thetahat[i,stlentheta:((maxiter+1)*K)] suminf[i,1:5]<-quantile(psitheta[i,1:seqlentheta],probs) # the arg passed to "getR" is thetai for each of the K Gibb's sequences suminf[i,6]<-getR(matrix(psitheta[i,1:seqlentheta],K,seqlen)) } psimu<-muhat[1:K,Sindex:(maxiter+1)] suminf[(n+1),1:5]<-quantile(psimu,probs) suminf[(n+1),6]<-getR(psimu) psisigma<-sigmahat[1:K,Sindex:(maxiter+1)] suminf[(n+2),1:5]<-quantile(psisigma,probs) suminf[(n+2),6]<-getR(psisigma) psitao<-taohat[1:K,Sindex:(maxiter+1)] suminf[(n+3),1:5]<-quantile(psitao,probs) suminf[(n+3),6]<-getR(psitao) jp<-seq(0,0,length=5) for (i in 1:5) { # Get log(p(theta,mu,log(sigma),log(tao)|y)) = # tao*prod(j=1:4)prod(i=1:n_j)N(y_ij|thetaj,sigma^2) (p 285) z<-suminf[1:(n+3),i] thetaj<-z[1:n] thetam<-matrix(thetaj,m,n,byrow=TRUE) mu<-z[5] sigma<-z[6] tao<-z[7] jp[i]<-tao*prod(dnorm(thetaj,mu,tao))*prod(apply(dnorm(x,thetam,sigma),2,prod,na.rm=T)) } suminf[(n+4),1:5]<-log(jp) suminf[(n+4),6]<-getR(log(jp)) dimnames(suminf)<-list(c("theta1","theta2","theta3","theta4","mu","sigma","tao","log(jp)"),c(2.5,25,50,75,97.5,"sqrt(R)")) print(suminf) }