## ----echo=FALSE---------------------------------------------------------- library(knitr) opts_chunk$set(size="small",background="white", highlight=FALSE, cache=TRUE, autdep=TRUE, tidy=TRUE, options(show.signif.stars=FALSE)) ## ------------------------------------------------------------------------ # Simulate from a previously fitted linear model with Gaussian noise # Inputs: model; data frame # Outputs: new data frame with response values replaced # Presumes: all necessary variables are in data frame sim.lm.gauss <- function(mdl, df) { # What's the response variable called? # Should be the first variable in the vector of all variables resp.var <- all.vars(formula(mdl))[1] # What value should we expect for the response? expect.resp <- predict(mdl, newdata=df) # How big is the noise? sigma2.mle <- mean(residuals(mdl)^2) # Add appropriately-sized Gaussian noise to the response response <- expect.resp + rnorm(nrow(df),0,sqrt(sigma2.mle)) df[,resp.var] <- response # Won't change df outside this function! return(df) } ## ------------------------------------------------------------------------ # Re-estimate a linear model on a new data set # Inputs: old model; data frame # Output: new lm object # Presumes: data frame contains columns with appropriate names re.lm <- function(mdl, df) { return(lm(formula(mdl), data=df)) } ## ------------------------------------------------------------------------ library(MASS); data(cats) cats.lm <- lm(Hwt ~ Sex*Bwt, data=cats) ## ------------------------------------------------------------------------ beta.boots <- replicate(10000, coefficients(re.lm(cats.lm, sim.lm.gauss(cats.lm, cats)))) ## ------------------------------------------------------------------------ rowMeans(beta.boots) - coefficients(cats.lm) ## ------------------------------------------------------------------------ apply(beta.boots, 1, sd) ## ------------------------------------------------------------------------ apply(beta.boots, 1, quantile, prob=c(0.05/2, 1-0.05/2)) ## ------------------------------------------------------------------------ coefficients(summary(cats.lm))[,"Std. Error"] ## ------------------------------------------------------------------------ confint(cats.lm, level=0.95) ## ------------------------------------------------------------------------ # Simulate from a previously fitted linear model with t-distributed noise # Inputs: model; data frame # Outputs: new data frame with response values replaced # Presumes: all necessary variables are in data frame sim.lm.t <- function(mdl, df) { # What's the response variable called? resp.var <- all.vars(formula(mdl))[1] # What value should we expect for the response? expect.resp <- predict(mdl, newdata=df) # Estimate the t parameters, using MASS::fitdistr stopifnot(require(MASS)) # Make sure the library's available # After the example in help(fitdistr) mydt <- function(x, s, df) { dt(x/s, df)/s } t.params <- fitdistr(residuals(cats.lm), mydt, start=list(s=1, df=50), lower=c(0,1))$estimate # Add appropriately-sized t-noise to the response response <- expect.resp + t.params["s"]*rt(nrow(df),df=t.params["df"]) df[,resp.var] <- response # Won't change df outside this function return(df) } ## ------------------------------------------------------------------------ beta.boots2 <- replicate(10000, coefficients(re.lm(cats.lm, sim.lm.t(cats.lm, cats)))) ## ------------------------------------------------------------------------ apply(beta.boots2, 1, quantile, prob=c(0.05/2, 1-0.05/2)) ## ------------------------------------------------------------------------ # Simulate from a previously fitted linear model, resampling residuals # Inputs: model; data frame # Outputs: new data frame with response values replaced # Presumes: all necessary variables are in data frame sim.lm.residuals <- function(mdl, df) { # What's the response variable called? resp.var <- all.vars(formula(mdl))[1] # What value should we expect for the response? expect.resp <- predict(mdl, newdata=df) # Resample the residuals new.noise <- sample(residuals(mdl), size=length(expect.resp), replace=TRUE) # Add new noise to the expected response response <- expect.resp + new.noise df[,resp.var] <- response # Won't change df outside this function return(df) } ## ------------------------------------------------------------------------ resample <- function(x) { sample(x, size=length(x), replace=TRUE) } ## ------------------------------------------------------------------------ head(residuals(cats.lm)) resample(head(residuals(cats.lm))) resample(head(residuals(cats.lm))) ## ------------------------------------------------------------------------ par(mfrow=c(1,3)) hist(residuals(cats.lm), main="", xlab="Residuals") hist(resample(residuals(cats.lm)), main="", xlab="Residuals") hist(resample(residuals(cats.lm)), main="", xlab="Residuals") ## ------------------------------------------------------------------------ beta.boots3 <- replicate(10000, coefficients(re.lm(cats.lm, sim.lm.residuals(cats.lm, cats)))) ## ------------------------------------------------------------------------ apply(beta.boots3, 1, quantile, prob=c(0.05/2, 1-0.05/2)) ## ------------------------------------------------------------------------ # Re-sample the rows of a data frame # Inputs: the data frame # Output: a new data frame, contain a random sample, with replacement, of # rows from the input resample.data.frame <- function(df) { df[resample(1:nrow(df)),] } ## ------------------------------------------------------------------------ beta.boots4 <- replicate(10000, coefficients(re.lm(cats.lm, resample.data.frame(cats)))) ## ------------------------------------------------------------------------ apply(beta.boots4, 1, quantile, prob=c(0.05/2, 1-0.05/2))