########################################################### # # GLMM analysis of Table 12.5 (also Table 6.9) of Agresti # ########################################################### multicenter <- read.table(stdin(),col.names=c("Center","Tx","Resp","y")) a 1 1 11 a 1 2 25 a 2 1 10 a 2 2 27 b 1 1 16 b 1 2 4 b 2 1 22 b 2 2 10 c 1 1 14 c 1 2 5 c 2 1 7 c 2 2 12 d 1 1 2 d 1 2 14 d 2 1 1 d 2 2 16 e 1 1 6 e 1 2 11 e 2 1 0 e 2 2 12 f 1 1 1 f 1 2 10 f 2 1 0 f 2 2 10 g 1 1 1 g 1 2 4 g 2 1 1 g 2 2 8 h 1 1 4 h 1 2 2 h 2 1 6 h 2 2 1 multicenter$Tx <- ifelse(multicenter$Tx==2,0,1) multicenter$Resp<-ifelse(multicenter$Resp==2,0,1) multicenter[1:4,] # Center Tx Resp y # 1 a 1 1 11 # 2 a 1 0 25 # 3 a 0 1 10 # 4 a 0 0 27 summary(fit.2way <- glm(y~.^2,data=multicenter,family=poisson)) # Call: # glm(formula = y ~ .^2, family = poisson, data = multicenter) # # Deviance Residuals: # Min 1Q Median 3Q Max # -1.8121334 -0.4290585 -0.0002592 0.3679177 0.9294712 # # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 3.3746 0.1774 19.025 < 2e-16 *** # Centerb -1.0345 0.3281 -3.153 0.001617 ** # Centerc -1.0423 0.3268 -3.190 0.001423 ** # Centerd -0.6039 0.3015 -2.003 0.045180 * # Centere -1.0368 0.3428 -3.025 0.002488 ** # Centerf -1.1027 0.3633 -3.035 0.002405 ** # Centerg -1.2907 0.3853 -3.350 0.000809 *** # Centerh -2.6598 0.6303 -4.220 2.44e-05 *** # Tx -0.2484 0.2524 -0.984 0.325105 # Resp -1.3220 0.3165 -4.177 2.95e-05 *** # Centerb:Tx -0.8059 0.4041 -1.994 0.046120 * # Centerc:Tx -0.1820 0.4160 -0.438 0.661747 # Centerd:Tx 0.1189 0.4275 0.278 0.780883 # Centere:Tx 0.4476 0.4503 0.994 0.320164 # Centerf:Tx 0.3088 0.5038 0.613 0.539932 # Centerg:Tx -0.4580 0.6123 -0.748 0.454385 # Centerh:Tx -0.5111 0.6316 -0.809 0.418401 # Centerb:Resp 2.0554 0.4201 4.893 9.94e-07 *** # Centerc:Resp 1.1529 0.4246 2.715 0.006618 ** # Centerd:Resp -1.4185 0.6636 -2.138 0.032552 * # Centere:Resp -0.5199 0.5338 -0.974 0.330074 # Centerf:Resp -2.1469 1.0614 -2.023 0.043100 * # Centerg:Resp -0.7977 0.8149 -0.979 0.327638 # Centerh:Resp 2.2079 0.7195 3.069 0.002150 ** # Tx:Resp 0.7769 0.3067 2.533 0.011301 * # --- # Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 # # (Dispersion parameter for poisson family taken to be 1) # # Null deviance: 197.0943 on 31 degrees of freedom # Residual deviance: 9.7463 on 7 degrees of freedom # AIC: 170.47 # # Number of Fisher Scoring iterations: 5 anova(fit.2way,fit.full<-update(fit.2way,.~.+Center*Resp*Tx),test="Chisq") # Analysis of Deviance Table # # Model 1: y ~ (Center + Tx + Resp)^2 # Model 2: y ~ Center + Tx + Resp + Center:Tx + Center:Resp + Tx:Resp + # Center:Tx:Resp # Resid. Df Resid. Dev Df Deviance P(>|Chi|) # 1 7 9.7463 # 2 0 3.033e-10 7 9.7463 0.2034 res.2way <- step(fit.2way,direction="backward",test="Chisq") # [...] # # Step: AIC= 164.92 # y ~ Center + Tx + Resp + Center:Resp + Tx:Resp # # Df Deviance AIC LRT Pr(Chi) # 18.190 164.919 # - Tx:Resp 1 20.784 165.513 2.594 0.1072 # - Center:Resp 7 95.329 228.058 77.139 5.269e-14 *** # --- # Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 # # Suggests "C indep Tx | Resp"... The table is collapsible, so odds ratios # calculated from the marginal "Tx by Resp" table are in equal as # parameters to the conditional "Tx by Resp" tables in each clinic. # # Now, let's look at the same problem from the point of view of # logistic GLMM's, with a random center effect (like patients within # clinics discussed previously in class)... # # First we fit a model that allows for greater variability in success # between centeres, but keeps a common odds ratio across centers, is: # # logit P[Y_{it}=1|u_i] = a + b(t-1/2) + u_i (u_i is the random effect) # # for the i'th clinic and t=1 for active drug and t=0 for ctrl. The # reason for rewriting the treatment indicator as +/- 0.5 will become # clear in a moment. Here is the fit of this model: # multicenter$Treat <- multicenter$Tx - 0.5 # Typically have to "unroll" the table for more complex modeling # procedures, unless they are clever about including weights! multicenter.big <- multicenter[1:2,-4] # drop the counts for (i in 1:length(multicenter$y)) { if (multicenter$y[i]>0) { for (j in 1:multicenter$y[i]) { multicenter.big <- rbind(multicenter.big,multicenter[i,-4]) } } } names(multicenter.big) <- c("Center","Tx","y","Treat") multicenter.big <- multicenter.big[-c(1,2),] row.names(multicenter.big) <- paste(1:length(multicenter.big$y)) # could try to fit it with glmmML... can only fit the random # intercept model with glmmML... library(glmmML) glmmML.fit1 <- glmmML(y ~ Treat, cluster=multicenter.big$Center, family=binomial , data=multicenter.big) glmmML.fit1 # Call: glmmML(formula = y ~ Treat, data = multicenter.big, # cluster = multicenter.big$Center, family = binomial) # # # coef se(coef) z Pr(>|z|) # (Intercept) -0.4975 0.1777 -2.799 0.00512 # Treat 0.7614 0.3024 2.518 0.01180 # # Standard deviation in mixing distribution: 1.448 # Std. Error: 0.2398 # # Residual deviance: 301.3 on 270 degrees of freedom AIC: 307.3 > glm.fit0 <- glm(y ~ Treat, family=binomial, data=multicenter.big) > summary(glm.fit0) # Call: # glm(formula = y ~ Treat, family = binomial, data = multicenter.big) # # Deviance Residuals: # Min 1Q Median 3Q Max # -1.0489 -1.0489 -0.8927 1.3116 1.4918 # # Coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) -0.5122 0.1257 -4.074 4.61e-05 *** # Treat 0.4040 0.2514 1.607 0.108 # --- # Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 # # (Dispersion parameter for binomial family taken to be 1) # # Null deviance: 360.83 on 272 degrees of freedom # Residual deviance: 358.23 on 271 degrees of freedom # AIC: 362.23 # # Comparing these two models, we see: # # deviance df AIC # Fixed effects: gl.fit0 358.2 271 362.2 # Random Int'pt: glmmML.fit1 301.3 270 307.3 # # There is a substantial decrease in deviance and AIC for the random # intercepts model. The difference in df suggests a LR test that is # Chi^2 on one df. However, it is well known (Self and Liang, 1987) # that the LR test for the random intercept is a 1 df test (H0: # sigma^2=0 for the random effect!), but under the null it is a 50-50 # mixture of a chi^2_1 and a point mass at 0 (reason: H0 is at the # edge, rather than in the middle, of H1). Therefore a test of # significance should cut the chi^2 p-value in half (i.e. bias toward # including the random intercept!). In this case we are looking at 57 # on 1 df even for the uncorrected chi-squared, so there is very # little doubt that it is a good idea to include the random intercept! ###################################################################### # Now we switch to glmmPQL, since it can deal with more general # models. In principle, the "same" model should have the "same" fit # in glmmML or glmmPQL (the only difference is the set of # approximations used in optimizing the function) but the designers # of the two functions used different standardizations at various # points, so the model fits are not as directly compatible as one # would like! # library(MASS) glmmPQL.fit1 <- glmmPQL(y ~ Treat, random = ~ 1 | Center, family = binomial, data=multicenter.big) summary(glmmPQL.fit1) # Linear mixed-effects model fit by maximum likelihood # Data: multicenter.big # AIC BIC logLik # 1271.560 1285.998 -631.7799 # # Random effects: # Formula: ~1 | Center # (Intercept) Residual # StdDev: 1.323061 0.9652609 # # Variance function: # Structure: fixed weights # Formula: ~invwt # Fixed effects: y ~ Treat # Value Std.Error DF t-value p-value # (Intercept) -0.7799349 0.5019781 264 -1.553723 0.1214 # Treat 0.7210237 0.2864029 264 2.517515 0.0124 # Correlation: # (Intr) # Treat -0.012 # # Standardized Within-Group Residuals: # Min Q1 Med Q3 Max # -2.0633991 -0.6235402 -0.3736916 0.7461471 3.5571301 # # Number of Observations: 273 # Number of Groups: 8 # # # To model a random-effect interaction between Center and Treatment, # we may put a random effect in the slope on treatment: # # logit P[Y_{it}=1|u_i] = a + (b+ v_i)(t-1/2) + u_i # # (u_i is the random effect for the intercept) # (v_i is the random effect for the slope) # glmmPQL.fit2 <- glmmPQL(y ~ Treat, random = ~ Treat | Center, family = binomial, data=multicenter.big) summary(glmmPQL.fit2) # Linear mixed-effects model fit by maximum likelihood # Data: multicenter.big # AIC BIC logLik # 1278.671 1300.328 -633.3354 # # Random effects: # Formula: ~Treat | Center # Structure: General positive-definite, Log-Cholesky parametrization # StdDev Corr # (Intercept) 1.3489277 (Intr) # Treat 0.3083281 -0.867 # Residual 0.9627373 # # Variance function: # Structure: fixed weights # Formula: ~invwt # Fixed effects: y ~ Treat # Value Std.Error DF t-value p-value # (Intercept) -0.8130185 0.5116942 264 -1.588876 0.1133 # Treat 0.8084582 0.3120634 264 2.590686 0.0101 # Correlation: # (Intr) # Treat -0.332 # # Standardized Within-Group Residuals: # Min Q1 Med Q3 Max # -1.8942090 -0.6317714 -0.3874070 0.7163946 3.9731951 # # Number of Observations: 273 # Number of Groups: 8 A comparison of deviances doesn't yield sensible conclusions for these models (the logLik decreased for the more complex model); I suspect one or both have not converged to MLE's; this would require further work to figure out. AIC BIC logLik Rand inc'pt only: glmmPQL.fit1 1271.560 1285.998 -631.7799 Rand slope too: glmmPQL.fit2 1278.671 1300.328 -633.3354 However, we may note that the fixed effect part of the slope on "Treat" is similar across all the models we've tried, running from about 0.77 to 0.80 and always statistically significant. The corresponding odds ratio for positive response is exp(0.8)=2.22 or so. These results for the common odds ratio are very similar to the ones we obtained earlier with Cochran-Mantel-Haenzel (see Agresti, pp. 230-236).