# Code examples for Lecture 15, 36-350, Fall 2011 # See lecture slides for context # Omit a single specified case from a data set # Intended for use with jack-knifing # Input: data set (data), index of case to omit (i) # Output: data set of the same format as the input, but with one less case # Presumes: in table-shaped data, cases are rows # As written, works if data is a vector, list, matrix, data frame, or a # 1D or 2D array, but not for 3+D arrays. (Fixable.) omit.case <- function(data,i) { # How many dimensions does the data have? d <- dim(data) # d will be NULL for vectors and lists, or a vector itself otherwise # Handle vectors, lists, and one-dimensional arrays the same way if (is.null(d) || (length(d)==1)) { return(data[-i]) # Anything else has at least two dimensions } else { return(data[-i,]) } } # Calculate jackknife standard errors # Inputs: an estimator function (estimator) and a data set (data) # Outputs: vector of standard errors # Presumes: estimator() returns a numeric vector # each case of data is an element (if vector/list), or a row (if tabular) jackknife <- function(estimator,data) { # How many cases? # length() works on vectors and lists, which have no dim attribute if (is.null(dim(data))) { n <- length(data) } # otherwise, use nrow() else { n <- nrow(data) } # We don't know how many numbers estimator() will return, so we'll just # use cbind() to stitch the output together # Start with an empty structure jackknife.ests <- c() # for each case for (omit in 1:n) { # repeat the estimate, omitting that case reestimate <- estimator(omit.case(data,omit)) # add the re-estimate on as a new column jackknife.ests <- cbind(jackknife.ests,reestimate) } # Take variance across cases, i.e., variance of each row var.of.reestimates <- apply(jackknife.ests,1,var) # Re-scale jackknife.var <- ((n-1)^2/n)* var.of.reestimates # sqrt() to get a standard error jackknife.stderrs <- sqrt(jackknife.var) return(jackknife.stderrs) } # Examples # Standard error of the mean: jackknife(estimator=mean,data=rnorm(n=400,mean=7,sd=5)) # Should be about 1/4 --- why? # Our favorite data set library(MASS) data(cats) # Little function to run a particular regression and extract coefficients est.coefs <- function(data) { return(lm(Hwt~Bwt,data=data)$coefficients) } # EXERCISE: Describe the regression model jackknife(estimator=est.coefs,data=cats) # Compare to standard errors calculated with the usual regression formulas # How would you use this to replicate what you did in lab 4?