################################################################ # # # Rat tumor example, based on Gelman et al. (1995) pp 121ff; # # Data from Tarone (1982). # # # # Each of the 71 data points gives y_i = number of rats with # # tumors, of n_i rats tested, in each of 71 experiments. # # Experiments 1..70 are "past" experiments with these tumors; # # Experiment #71 is a "current" experiment. # # # # Questions: # # # # 1. Summarize probabilities of tumors over the course of all # # "past" experiments. # # # # 2. Use "past" experiments to "calibrate" our analysis of the # # "current" experiment. # # # ################################################################ if(F) { # Nonhierarchical model -- Nothing fancy! # # Each y_i ~ Bin(n_i,th_i), th_i ~ Beta(alpha,beta) # so... each th_i ~ Beta(alpha + y_i, beta + n_i - y_i) # rats _ read.table("ratdata.txt",header=T) # # Naive summary of probabilities of tumor: # probs _ (rats$y/rats$n)[1:70] meanprob _ mean(probs) varprob _ var(probs) print(round(c(mean=meanprob,sd=sqrt(varprob)),3)) # mean sd # 0.136 0.103 # # But this probably isn't a good summary... # hist(probs,xlim=c(0,1),probability=T) # th-summary.ps # # Note that prob = y/n is the MLE for theta in each experiment. # If we think that th ~ Beta(a,b), we could get a crude estimate # of a,b from # E[th] = a/(a+b), Var(th) = ab/[(a+b)^2(a+b+1)] # # This leads to a = (a + b) E[th] , b = (a + b) (1-E[th]) and # (a+b) = E[th](1-E[th])/Var(th) - 1 # a.plus.b _ meanprob*(1-meanprob)/varprob - 1 a _ a.plus.b * meanprob b _ a.plus.b - a print(round(c(alpha=a,beta=b),3)) # alpha beta # 1.356 8.615 # # And really, the estimated binomial is not too bad a fit to the # histogram... # th _ seq(0,1,by=0.01) lines(th,dbeta(th,1.356,8.615)) # th-summary.ps # # Finally something Bayesian... We can use the estimated alpha and # beta in an informal "empirical Bayes" analysis of the 71'st # experiment... # # y_i ~ Bin(n_i,th_i), th_i ~ Beta(alpha,beta) # so... th_i ~ Beta(alpha + y_i, beta + n_i - y_i) # # Apply this to experiment 71 (y=4, n=14)... # plot(th,dbeta(th,1.356,8.615),type="l",xlab="theta",ylab="density") lines(th,dbeta(th,1.356 + 4, 8.615 + 14 - 4),lty=2) legend(locator(),c("Prior","Posterior"),lty=1:2) # post-plot.ps source("infomat.s") pos.log.lik _ function(th) { return(log(dbinom(4,14,th))) } MLE _ 4/14 SE.MLE _ 1/sqrt(info.mtx.fun(pos.log.post,MLE)) post _ function(th) { return(dbeta(th,1.356 + 4, 8.615 + 14 - 4)) } neg.log.post _ function(th) { return(-log(post(th))) } pos.log.post _ function(th) { return(log(post(th))) } postmode _ nlmin(neg.log.post,x=0.5)$x SE.PM _ 1/sqrt(info.mtx.fun(pos.log.post,postmode)) print(round(c(MLE=MLE,SE=SE.MLE),3)) print(round(c(post=postmode,SE=SE.PM),3)) # MLE SE # 0.286 0.107 # # post SE # 0.198 0.085 # # Is the "current" value of 0.286 unusual among the past probabilities # of tumor? # sum(probs>0.286)/70 # [1] 0.1 integrate(dbeta,0.286,1,shape1=1.356,shape2=8.615)$integral # [1] 0.09446745 } if(T) { ########################################################################### # # Now for the hierarchical analysis. We assume: # # 1. y_i ~ Bin(n_i, th_i) # 2. th_i ~ Beta(alpha,beta) # 3. alpha ~ Gamma(a1,b1) # beta ~ Gamma(a2,b2) # # Note: G ~ Gamma(a,b) ==> E(G) = a/b, Var(G) = a/b^2; # We will take # # a1=1 , b1=1 # a2=8 , b2=1 # # We produce MCMC sample based on the following complete conditionals: # # p(th_i | rest) = Beta (th_i | alpha + y_i, beta + n_i - y_i) # # p(alpha | rest) = const * [G(alpha+beta)/G(alpha)]^N # * prod_1^N th_i^alpha # * Gamma(alpha|a1,b1) # # p(beta | rest) = const * [G(alpha+beta)/G(beta)]^N # * prod_1^N (1-th_i)^beta # * Gamma(beta|a2,b2) # # The th_i's we can do with Gibbs steps. The alpha and beta must be # done with M-H steps, which we will implement using Normal random # walk proposals. drawtheta _ function(a,b,y,n,num=length(y)) { return(rbeta(num,a+y,b+n-y)) } log.cca _ function(a,b,th,a1,b1,N) { tmp _ N*(lgamma(a+b) - lgamma(a)) tmp _ tmp + a*sum(log(th)) tmp _ tmp + log(dgamma(a,a1,b1)) # tmp _ tmp - 2.5*log(a+b) return(tmp) } drawalpha _ function(a,b,th,a1,b1,sda,N) { anew _ rnorm(1,a,sda) if(anew<=0) { return(a) } else { log.acc _ min(log.cca(anew,b,th,a1,b1,N) - log.cca(a,b,th,a1,b1,N),0) if(log(runif(1))0) { for (i in 1:burnin) { a _ drawalpha(a,b,th,a1,b1,sda,N) b _ drawbeta (a,b,th,a2,b2,sdb,N) th _ drawtheta(a,b,y,n) if ((i%%100)==0) cat ("*") } cat("\n") } # alpha.rate _ 0 beta.rate _ 0 # for (i in 1:M) { alpha[i] _ anew _ drawalpha(a,b,th,a1,b1,sda,N) beta[i] _ bnew _ drawbeta (a,b,th,a2,b2,sdb,N) alpha.rate _ alpha.rate + ifelse(anew!=a,1,0) beta.rate _ beta.rate + ifelse(bnew!=b,1,0) a _ anew b _ bnew theta[,i] _ th _ drawtheta(a,b,y,n) if ((i%%100)==0) cat (".") } cat("\n") # return(list(alpha=alpha,beta=beta,theta=theta, alpha.rate=alpha.rate/M,beta.rate=beta.rate/M)) } } ##################################################################### if (F) { mcmc1000noburn _ drawsome(1000) mcmc _ drawsome(1000,burnin=500) par(mfrow=c(2,2)) plot(mcmc1000noburn$alpha,type="l") plot(mcmc1000noburn$beta,type="l") plot(mcmc$alpha,type="l") plot(mcmc$beta,type="l") par(mfrow=c(3,2)) # alpha-beta-draws.ps plot(mcmc$alpha,type="l") plot(mcmc$beta,type="l") acf(mcmc$alpha,lag.max=400) acf(mcmc$beta,lag.max=400) par(mfrow=c(2,2)) # alpha-beta-posteriors.ps hist(mcmc$alpha) hist(mcmc$beta) plot(density(mcmc$alpha),type="l") plot(density(mcmc$beta),type="l") par(mfrow=c(2,2)) # theta-draws.ps plot(mcmc$th[50,],type="l") plot(mcmc$th[71,],type="l") acf(mcmc$th[50,]) acf(mcmc$th[71,]) par(mfrow=c(2,2)) # theta-posteriors.ps hist(mcmc$th[50,]) hist(mcmc$th[71,]) plot(density(mcmc$th[50,]),type="l") plot(density(mcmc$th[71,]),type="l") }