## ----echo=FALSE----------------------------------------------------------
library(knitr)
opts_chunk$set(size="small",background="white", highlight=FALSE, cache=TRUE, autdep=TRUE, tidy=TRUE)

## ------------------------------------------------------------------------
# Simulate a Gaussian-noise simple linear regression model
# Inputs: x sequence; intercept; slope; noise variance; switch for whether to
  # return the simulated values, or run a regression and return the coefficients
# Output: data frame or coefficient vector
sim.gnslrm <- function(x, intercept, slope, sigma.sq, coefficients=TRUE) {
  n <- length(x)
  y <- intercept + slope*x + rnorm(n,mean=0,sd=sqrt(sigma.sq))
  if (coefficients) {
    return(coefficients(lm(y~x)))
  } else {
    return(data.frame(x=x, y=y))
  }
}

# Fix an arbitrary vector of x's
x <- seq(from=-5, to=5, length.out=42)

## ----sampling-distribution-of-hat-beta-1, echo=FALSE---------------------
# Run the simulation 10,000 times and collect all the coefficients
  # What intercept, slope and noise variance does this impose?
many.coefs <- replicate(1e4, sim.gnslrm(x=x, 5, -2, 0.1, coefficients=TRUE))
# Histogram of the slope estimates
hist(many.coefs[2,], breaks=50, freq=FALSE, xlab=expression(hat(beta)[1]),
     main="")
# Theoretical Gaussian sampling distribution
theoretical.se <- sqrt(0.1/(length(x)*var(x)))
curve(dnorm(x,mean=-2,sd=theoretical.se), add=TRUE,
      col="blue")

## ----eval=FALSE----------------------------------------------------------
## # Run the simulation 10,000 times and collect all the coefficients
##   # What intercept, slope and noise variance does this impose?
## many.coefs <- replicate(1e4, sim.gnslrm(x=x, 5, -2, 0.1, coefficients=TRUE))
## # Histogram of the slope estimates
## hist(many.coefs[2,], breaks=50, freq=FALSE, xlab=expression(hat(beta)[1]),
##      main="")
## # Theoretical Gaussian sampling distribution
## theoretical.se <- sqrt(0.1/(length(x)*var(x)))
## curve(dnorm(x,mean=-2,sd=theoretical.se), add=TRUE,
##       col="blue")

## ----sampling-intervals, echo=FALSE--------------------------------------
lm.sim <- lm(y~x, data=sim.gnslrm(x=x, 5, -2, 0.1, coefficients=FALSE))
hat.sigma.sq <- mean(residuals(lm.sim)^2)
se.hat.beta.1 <- sqrt(hat.sigma.sq/(var(x)*(length(x)-2)))
alpha <- 0.02
k <- qt(1-alpha/2, df=length(x)-2)
plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n",
     xlab=expression(beta[1]),
     ylab=expression(hat(beta)[1]), main="")
abline(a=k*se.hat.beta.1,b=1)
abline(a=-k*se.hat.beta.1,b=1)
abline(a=0,b=1,lty="dashed")
beta.1.star <- -1.73
segments(x0=beta.1.star,y0=k*se.hat.beta.1+beta.1.star,
         x1=beta.1.star,y1=-k*se.hat.beta.1+beta.1.star,
         col="blue")

## ----eval=FALSE----------------------------------------------------------
## lm.sim <- lm(y~x, data=sim.gnslrm(x=x, 5, -2, 0.1, coefficients=FALSE))
## hat.sigma.sq <- mean(residuals(lm.sim)^2)
## se.hat.beta.1 <- sqrt(hat.sigma.sq/(var(x)*(length(x)-2)))
## alpha <- 0.02
## k <- qt(1-alpha/2, df=length(x)-2)
## plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n",
##      xlab=expression(beta[1]),
##      ylab=expression(hat(beta)[1]), main="")
## abline(a=k*se.hat.beta.1,b=1)
## abline(a=-k*se.hat.beta.1,b=1)
## abline(a=0,b=1,lty="dashed")
## beta.1.star <- -1.73
## segments(x0=beta.1.star,y0=k*se.hat.beta.1+beta.1.star,
##          x1=beta.1.star,y1=-k*se.hat.beta.1+beta.1.star,
##          col="blue")

## ----sampling-intervals-plus-hat-beta-1, echo=FALSE----------------------
plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n",
     xlab=expression(beta[1]),
     ylab=expression(hat(beta)[1]), main="")
abline(a=k*se.hat.beta.1,b=1)
abline(a=-k*se.hat.beta.1,b=1)
abline(a=0,b=1,lty="dashed")
beta.1.hat <- coefficients(lm.sim)[2]
abline(h=beta.1.hat,col="grey")

## ----eval=FALSE----------------------------------------------------------
## plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n",
##      xlab=expression(beta[1]),
##      ylab=expression(hat(beta)[1]), main="")
## abline(a=k*se.hat.beta.1,b=1)
## abline(a=-k*se.hat.beta.1,b=1)
## abline(a=0,b=1,lty="dashed")
## beta.1.hat <- coefficients(lm.sim)[2]
## abline(h=beta.1.hat,col="grey")

## ----confidence-set, echo=FALSE------------------------------------------
plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n",
     xlab=expression(beta[1]),
     ylab=expression(hat(beta)[1]), main="")
abline(a=k*se.hat.beta.1,b=1)
abline(a=-k*se.hat.beta.1,b=1)
abline(a=0,b=1,lty="dashed")
beta.1.hat <- coefficients(lm.sim)[2]
abline(h=beta.1.hat,col="grey")
segments(x0=beta.1.hat-k*se.hat.beta.1, y0=beta.1.hat,
         x1=beta.1.hat+k*se.hat.beta.1, y1=beta.1.hat,
         col="red")

## ----eval=FALSE----------------------------------------------------------
## plot(0, xlim=c(-3,-1),ylim=c(-3,-1),type="n",
##      xlab=expression(beta[1]),
##      ylab=expression(hat(beta)[1]), main="")
## abline(a=k*se.hat.beta.1,b=1)
## abline(a=-k*se.hat.beta.1,b=1)
## abline(a=0,b=1,lty="dashed")
## beta.1.hat <- coefficients(lm.sim)[2]
## abline(h=beta.1.hat,col="grey")
## segments(x0=beta.1.hat-k*se.hat.beta.1, y0=beta.1.hat,
##          x1=beta.1.hat+k*se.hat.beta.1, y1=beta.1.hat,
##          col="red")

## ----t-dist-vs-Gaussian-dist, echo=FALSE---------------------------------
curve(qt(0.995,df=x-2),from=3,to=1e4,log="x", ylim=c(0,10),
      xlab="Sample size (n)", ylab=expression(k(n,alpha)),col="blue")
abline(h=qnorm(0.995),lty="dashed",col="blue")
curve(qt(0.975,df=x-2), add=TRUE)
abline(h=qnorm(0.975),lty="dashed")
curve(qt(0.75,df=x-2), add=TRUE, col="orange")
abline(h=qnorm(0.75), lty="dashed", col="orange")
legend("topright", legend=c(expression(alpha==0.01), expression(alpha==0.05),
                         expression(alpha==0.5)),
       col=c("blue","black","orange"), lty="solid")

## ----eval=FALSE----------------------------------------------------------
## curve(qt(0.995,df=x-2),from=3,to=1e4,log="x", ylim=c(0,10),
##       xlab="Sample size (n)", ylab=expression(k(n,alpha)),col="blue")
## abline(h=qnorm(0.995),lty="dashed",col="blue")
## curve(qt(0.975,df=x-2), add=TRUE)
## abline(h=qnorm(0.975),lty="dashed")
## curve(qt(0.75,df=x-2), add=TRUE, col="orange")
## abline(h=qnorm(0.75), lty="dashed", col="orange")
## legend("topright", legend=c(expression(alpha==0.01), expression(alpha==0.05),
##                          expression(alpha==0.5)),
##        col=c("blue","black","orange"), lty="solid")

## ----eval=FALSE----------------------------------------------------------
## confint(object, level=0.95)

## ------------------------------------------------------------------------
library(gamair); data(chicago)
death.temp.lm <- lm(death ~ tmpd, data=chicago)
confint(death.temp.lm)
confint(death.temp.lm, level=0.90)

## ------------------------------------------------------------------------
coefficients(summary(death.temp.lm))

## ------------------------------------------------------------------------
coefficients(summary(death.temp.lm))[1,4]

## ------------------------------------------------------------------------
summary(death.temp.lm)

## ------------------------------------------------------------------------
print(summary(death.temp.lm), signif.stars=FALSE, digits=3)

## ----bunch-of-CIs, echo=FALSE--------------------------------------------
# Run 1000 simulations and get the confidence interval from each
CIs <- replicate(1000, confint(lm(y~x,data=sim.gnslrm(x=x,5,-2,0.1,FALSE)))[2,])
# Plot the first 100 confidence intervals; start with the lower limits
plot(1:100, CIs[1,1:100], ylim=c(min(CIs),max(CIs)),
     xlab="Simulation number", ylab="Confidence limits for slope")
# Now the lower limits
points(1:100, CIs[2,1:100])
# Draw line segments connecting them
segments(x0=1:100, x1=1:100, y0=CIs[1,1:100], y1=CIs[2,1:100], lty="dashed")
# Horizontal line at the true coefficient value
abline(h=-2, col="grey")

## ----eval=FALSE----------------------------------------------------------
## # Run 1000 simulations and get the confidence interval from each
## CIs <- replicate(1000, confint(lm(y~x,data=sim.gnslrm(x=x,5,-2,0.1,FALSE)))[2,])
## # Plot the first 100 confidence intervals; start with the lower limits
## plot(1:100, CIs[1,1:100], ylim=c(min(CIs),max(CIs)),
##      xlab="Simulation number", ylab="Confidence limits for slope")
## # Now the lower limits
## points(1:100, CIs[2,1:100])
## # Draw line segments connecting them
## segments(x0=1:100, x1=1:100, y0=CIs[1,1:100], y1=CIs[2,1:100], lty="dashed")
## # Horizontal line at the true coefficient value
## abline(h=-2, col="grey")

## ----coverage-it-works, echo=FALSE---------------------------------------
# For each simulation, check whether the interval covered the truth
covered <- (CIs[1,] <= -2) & (CIs[2,] >= -2)
# Calculate the cumulative proportion of simulations where the interval
# contained the truth, plot vs. number of simulations.
plot(1:length(covered), cumsum(covered)/(1:length(covered)),
     xlab="Number of simulations",
     ylab="Sample coverage proportion", ylim=c(0,1))
abline(h=0.95, col="grey")

## ----eval=FALSE----------------------------------------------------------
## # For each simulation, check whether the interval covered the truth
## covered <- (CIs[1,] <= -2) & (CIs[2,] >= -2)
## # Calculate the cumulative proportion of simulations where the interval
## # contained the truth, plot vs. number of simulations.
## plot(1:length(covered), cumsum(covered)/(1:length(covered)),
##      xlab="Number of simulations",
##      ylab="Sample coverage proportion", ylim=c(0,1))
## abline(h=0.95, col="grey")