## ----echo=FALSE----------------------------------------------------------
library(knitr)
opts_chunk$set(size="small",background="white", highlight=FALSE, cache=TRUE, autdep=TRUE, tidy=TRUE, options(show.signif.stars=FALSE))

## ------------------------------------------------------------------------
# Simulate from a previously fitted linear model with Gaussian noise
# Inputs: model; data frame
# Outputs: new data frame with response values replaced
# Presumes: all necessary variables are in data frame
sim.lm.gauss <- function(mdl, df) {
    # What's the response variable called?
      # Should be the first variable in the vector of all variables
    resp.var <- all.vars(formula(mdl))[1]
    # What value should we expect for the response?
    expect.resp <- predict(mdl, newdata=df)
    # How big is the noise?
    sigma2.mle <- mean(residuals(mdl)^2)
    # Add appropriately-sized Gaussian noise to the response
    response <- expect.resp + rnorm(nrow(df),0,sqrt(sigma2.mle))
    df[,resp.var] <- response # Won't change df outside this function!
    return(df)
}

## ------------------------------------------------------------------------
# Re-estimate a linear model on a new data set
# Inputs: old model; data frame
# Output: new lm object
# Presumes: data frame contains columns with appropriate names
re.lm <- function(mdl, df) {
    return(lm(formula(mdl), data=df))
}

## ------------------------------------------------------------------------
library(MASS); data(cats)
cats.lm <- lm(Hwt ~ Sex*Bwt, data=cats)

## ------------------------------------------------------------------------
beta.boots <- replicate(10000,
                         coefficients(re.lm(cats.lm,
                                            sim.lm.gauss(cats.lm, cats))))

## ------------------------------------------------------------------------
rowMeans(beta.boots) - coefficients(cats.lm)

## ------------------------------------------------------------------------
apply(beta.boots, 1, sd)

## ------------------------------------------------------------------------
apply(beta.boots, 1, quantile, prob=c(0.05/2, 1-0.05/2))

## ------------------------------------------------------------------------
coefficients(summary(cats.lm))[,"Std. Error"]

## ------------------------------------------------------------------------
confint(cats.lm, level=0.95)

## ------------------------------------------------------------------------
# Simulate from a previously fitted linear model with t-distributed noise
# Inputs: model; data frame
# Outputs: new data frame with response values replaced
# Presumes: all necessary variables are in data frame
sim.lm.t <- function(mdl, df) {
    # What's the response variable called?
    resp.var <- all.vars(formula(mdl))[1]
    # What value should we expect for the response?
    expect.resp <- predict(mdl, newdata=df)
    # Estimate the t parameters, using MASS::fitdistr
    stopifnot(require(MASS)) # Make sure the library's available
    # After the example in help(fitdistr)
    mydt <- function(x, s, df) { dt(x/s, df)/s }
    t.params <- fitdistr(residuals(cats.lm), mydt, start=list(s=1, df=50),
                         lower=c(0,1))$estimate
    # Add appropriately-sized t-noise to the response
    response <- expect.resp + t.params["s"]*rt(nrow(df),df=t.params["df"])
    df[,resp.var] <- response # Won't change df outside this function
    return(df)
}

## ------------------------------------------------------------------------
beta.boots2 <- replicate(10000,
                         coefficients(re.lm(cats.lm,
                                            sim.lm.t(cats.lm, cats))))

## ------------------------------------------------------------------------
apply(beta.boots2, 1, quantile, prob=c(0.05/2, 1-0.05/2))

## ------------------------------------------------------------------------
# Simulate from a previously fitted linear model, resampling residuals
# Inputs: model; data frame
# Outputs: new data frame with response values replaced
# Presumes: all necessary variables are in data frame
sim.lm.residuals <- function(mdl, df) {
    # What's the response variable called?
    resp.var <- all.vars(formula(mdl))[1]
    # What value should we expect for the response?
    expect.resp <- predict(mdl, newdata=df)
    # Resample the residuals
    new.noise <- sample(residuals(mdl), size=length(expect.resp), replace=TRUE)
    # Add new noise to the expected response
    response <- expect.resp + new.noise
    df[,resp.var] <- response # Won't change df outside this function
    return(df)
}

## ------------------------------------------------------------------------
resample <- function(x) { sample(x, size=length(x), replace=TRUE) }

## ------------------------------------------------------------------------
head(residuals(cats.lm))
resample(head(residuals(cats.lm)))
resample(head(residuals(cats.lm)))

## ------------------------------------------------------------------------
par(mfrow=c(1,3))
hist(residuals(cats.lm), main="", xlab="Residuals")
hist(resample(residuals(cats.lm)), main="", xlab="Residuals")
hist(resample(residuals(cats.lm)), main="", xlab="Residuals")

## ------------------------------------------------------------------------
beta.boots3 <- replicate(10000,
                         coefficients(re.lm(cats.lm,
                                            sim.lm.residuals(cats.lm, cats))))

## ------------------------------------------------------------------------
apply(beta.boots3, 1, quantile, prob=c(0.05/2, 1-0.05/2))

## ------------------------------------------------------------------------
# Re-sample the rows of a data frame
# Inputs: the data frame
# Output: a new data frame, contain a random sample, with replacement, of
  # rows from the input
resample.data.frame <- function(df) { df[resample(1:nrow(df)),] }

## ------------------------------------------------------------------------
beta.boots4 <- replicate(10000,
                         coefficients(re.lm(cats.lm,
                                            resample.data.frame(cats))))

## ------------------------------------------------------------------------
apply(beta.boots4, 1, quantile, prob=c(0.05/2, 1-0.05/2))