## ----echo=FALSE---------------------------------------------------------- library(knitr) opts_chunk\$set(size="small",background="white", highlight=FALSE, cache=TRUE, autdep=TRUE, tidy=TRUE) ## ------------------------------------------------------------------------ # Simulate a Gaussian-noise simple linear regression model # Inputs: x sequence; intercept; slope; noise variance; switch for whether to # return the simulated values, or run a regression and return the coefficients # Output: data frame or coefficient vector sim.gnslrm <- function(x, intercept, slope, sigma.sq, coefficients=TRUE) { n <- length(x) y <- intercept + slope*x + rnorm(n,mean=0,sd=sqrt(sigma.sq)) if (coefficients) { return(coefficients(lm(y~x))) } else { return(data.frame(x=x, y=y)) } } # Fix an arbitrary vector of x's x <- seq(from=-5, to=5, length.out=42) ## ----sampling-distribution-of-hat-beta-1, echo=FALSE--------------------- # Run the simulation 10,000 times and collect all the coefficients # What intercept, slope and noise variance does this impose? many.coefs <- replicate(1e4, sim.gnslrm(x=x, 5, -2, 0.1, coefficients=TRUE)) # Histogram of the slope estimates hist(many.coefs[2,], breaks=50, freq=FALSE, xlab=expression(hat(beta)[1]), main="") # Theoretical Gaussian sampling distribution theoretical.se <- sqrt(0.1/(length(x)*var(x))) curve(dnorm(x,mean=-2,sd=theoretical.se), add=TRUE, col="blue") ## ----eval=FALSE---------------------------------------------------------- ## # Run the simulation 10,000 times and collect all the coefficients ## # What intercept, slope and noise variance does this impose? ## many.coefs <- replicate(1e4, sim.gnslrm(x=x, 5, -2, 0.1, coefficients=TRUE)) ## # Histogram of the slope estimates ## hist(many.coefs[2,], breaks=50, freq=FALSE, xlab=expression(hat(beta)[1]), ## main="") ## # Theoretical Gaussian sampling distribution ## theoretical.se <- sqrt(0.1/(length(x)*var(x))) ## curve(dnorm(x,mean=-2,sd=theoretical.se), add=TRUE, ## col="blue") ## ----sampling-intervals, echo=FALSE-------------------------------------- lm.sim <- lm(y~x, data=sim.gnslrm(x=x, 5, -2, 0.1, coefficients=FALSE)) hat.sigma.sq <- mean(residuals(lm.sim)^2) se.hat.beta.1 <- sqrt(hat.sigma.sq/(var(x)*(length(x)-2))) alpha <- 0.02 k <- qt(1-alpha/2, df=length(x)-2) plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n", xlab=expression(beta[1]), ylab=expression(hat(beta)[1]), main="") abline(a=k*se.hat.beta.1,b=1) abline(a=-k*se.hat.beta.1,b=1) abline(a=0,b=1,lty="dashed") beta.1.star <- -1.73 segments(x0=beta.1.star,y0=k*se.hat.beta.1+beta.1.star, x1=beta.1.star,y1=-k*se.hat.beta.1+beta.1.star, col="blue") ## ----eval=FALSE---------------------------------------------------------- ## lm.sim <- lm(y~x, data=sim.gnslrm(x=x, 5, -2, 0.1, coefficients=FALSE)) ## hat.sigma.sq <- mean(residuals(lm.sim)^2) ## se.hat.beta.1 <- sqrt(hat.sigma.sq/(var(x)*(length(x)-2))) ## alpha <- 0.02 ## k <- qt(1-alpha/2, df=length(x)-2) ## plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n", ## xlab=expression(beta[1]), ## ylab=expression(hat(beta)[1]), main="") ## abline(a=k*se.hat.beta.1,b=1) ## abline(a=-k*se.hat.beta.1,b=1) ## abline(a=0,b=1,lty="dashed") ## beta.1.star <- -1.73 ## segments(x0=beta.1.star,y0=k*se.hat.beta.1+beta.1.star, ## x1=beta.1.star,y1=-k*se.hat.beta.1+beta.1.star, ## col="blue") ## ----sampling-intervals-plus-hat-beta-1, echo=FALSE---------------------- plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n", xlab=expression(beta[1]), ylab=expression(hat(beta)[1]), main="") abline(a=k*se.hat.beta.1,b=1) abline(a=-k*se.hat.beta.1,b=1) abline(a=0,b=1,lty="dashed") beta.1.hat <- coefficients(lm.sim)[2] abline(h=beta.1.hat,col="grey") ## ----eval=FALSE---------------------------------------------------------- ## plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n", ## xlab=expression(beta[1]), ## ylab=expression(hat(beta)[1]), main="") ## abline(a=k*se.hat.beta.1,b=1) ## abline(a=-k*se.hat.beta.1,b=1) ## abline(a=0,b=1,lty="dashed") ## beta.1.hat <- coefficients(lm.sim)[2] ## abline(h=beta.1.hat,col="grey") ## ----confidence-set, echo=FALSE------------------------------------------ plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n", xlab=expression(beta[1]), ylab=expression(hat(beta)[1]), main="") abline(a=k*se.hat.beta.1,b=1) abline(a=-k*se.hat.beta.1,b=1) abline(a=0,b=1,lty="dashed") beta.1.hat <- coefficients(lm.sim)[2] abline(h=beta.1.hat,col="grey") segments(x0=beta.1.hat-k*se.hat.beta.1, y0=beta.1.hat, x1=beta.1.hat+k*se.hat.beta.1, y1=beta.1.hat, col="red") ## ----eval=FALSE---------------------------------------------------------- ## plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n", ## xlab=expression(beta[1]), ## ylab=expression(hat(beta)[1]), main="") ## abline(a=k*se.hat.beta.1,b=1) ## abline(a=-k*se.hat.beta.1,b=1) ## abline(a=0,b=1,lty="dashed") ## beta.1.hat <- coefficients(lm.sim)[2] ## abline(h=beta.1.hat,col="grey") ## segments(x0=beta.1.hat-k*se.hat.beta.1, y0=beta.1.hat, ## x1=beta.1.hat+k*se.hat.beta.1, y1=beta.1.hat, ## col="red") ## ----t-dist-vs-Gaussian-dist, echo=FALSE--------------------------------- curve(qt(0.995,df=x-2),from=3,to=1e4,log="x", ylim=c(0,10), xlab="Sample size (n)", ylab=expression(k(n,alpha)),col="blue") abline(h=qnorm(0.995),lty="dashed",col="blue") curve(qt(0.975,df=x-2), add=TRUE) abline(h=qnorm(0.975),lty="dashed") curve(qt(0.75,df=x-2), add=TRUE, col="orange") abline(h=qnorm(0.75), lty="dashed", col="orange") legend("topright", legend=c(expression(alpha==0.01), expression(alpha==0.05), expression(alpha==0.5)), col=c("blue","black","orange"), lty="solid") ## ----eval=FALSE---------------------------------------------------------- ## curve(qt(0.995,df=x-2),from=3,to=1e4,log="x", ylim=c(0,10), ## xlab="Sample size (n)", ylab=expression(k(n,alpha)),col="blue") ## abline(h=qnorm(0.995),lty="dashed",col="blue") ## curve(qt(0.975,df=x-2), add=TRUE) ## abline(h=qnorm(0.975),lty="dashed") ## curve(qt(0.75,df=x-2), add=TRUE, col="orange") ## abline(h=qnorm(0.75), lty="dashed", col="orange") ## legend("topright", legend=c(expression(alpha==0.01), expression(alpha==0.05), ## expression(alpha==0.5)), ## col=c("blue","black","orange"), lty="solid") ## ----eval=FALSE---------------------------------------------------------- ## confint(object, level=0.95) ## ------------------------------------------------------------------------ library(gamair); data(chicago) death.temp.lm <- lm(death ~ tmpd, data=chicago) confint(death.temp.lm) confint(death.temp.lm, level=0.90) ## ------------------------------------------------------------------------ coefficients(summary(death.temp.lm)) ## ------------------------------------------------------------------------ coefficients(summary(death.temp.lm))[1,4] ## ------------------------------------------------------------------------ summary(death.temp.lm) ## ------------------------------------------------------------------------ print(summary(death.temp.lm), signif.stars=FALSE, digits=3) ## ----bunch-of-CIs, echo=FALSE-------------------------------------------- # Run 1000 simulations and get the confidence interval from each CIs <- replicate(1000, confint(lm(y~x,data=sim.gnslrm(x=x,5,-2,0.1,FALSE)))[2,]) # Plot the first 100 confidence intervals; start with the lower limits plot(1:100, CIs[1,1:100], ylim=c(min(CIs),max(CIs)), xlab="Simulation number", ylab="Confidence limits for slope") # Now the lower limits points(1:100, CIs[2,1:100]) # Draw line segments connecting them segments(x0=1:100, x1=1:100, y0=CIs[1,1:100], y1=CIs[2,1:100], lty="dashed") # Horizontal line at the true coefficient value abline(h=-2, col="grey") ## ----eval=FALSE---------------------------------------------------------- ## # Run 1000 simulations and get the confidence interval from each ## CIs <- replicate(1000, confint(lm(y~x,data=sim.gnslrm(x=x,5,-2,0.1,FALSE)))[2,]) ## # Plot the first 100 confidence intervals; start with the lower limits ## plot(1:100, CIs[1,1:100], ylim=c(min(CIs),max(CIs)), ## xlab="Simulation number", ylab="Confidence limits for slope") ## # Now the lower limits ## points(1:100, CIs[2,1:100]) ## # Draw line segments connecting them ## segments(x0=1:100, x1=1:100, y0=CIs[1,1:100], y1=CIs[2,1:100], lty="dashed") ## # Horizontal line at the true coefficient value ## abline(h=-2, col="grey") ## ----coverage-it-works, echo=FALSE--------------------------------------- # For each simulation, check whether the interval covered the truth covered <- (CIs[1,] <= -2) & (CIs[2,] >= -2) # Calculate the cumulative proportion of simulations where the interval # contained the truth, plot vs. number of simulations. plot(1:length(covered), cumsum(covered)/(1:length(covered)), xlab="Number of simulations", ylab="Sample coverage proportion", ylim=c(0,1)) abline(h=0.95, col="grey") ## ----eval=FALSE---------------------------------------------------------- ## # For each simulation, check whether the interval covered the truth ## covered <- (CIs[1,] <= -2) & (CIs[2,] >= -2) ## # Calculate the cumulative proportion of simulations where the interval ## # contained the truth, plot vs. number of simulations. ## plot(1:length(covered), cumsum(covered)/(1:length(covered)), ## xlab="Number of simulations", ## ylab="Sample coverage proportion", ylim=c(0,1)) ## abline(h=0.95, col="grey")