## ----echo=FALSE---------------------------------------------------------- library(knitr) opts_chunk\$set(size="small",background="white", highlight=FALSE, cache=TRUE, autdep=TRUE, tidy=TRUE) ## ----lin-mod-sim-code---------------------------------------------------- # Fix x values for all runs of the simulation; draw from an exponential n <- 20 # So we don't have magic #s floating around beta.0 <- 5 beta.1 <- -2 sigma.sq <- 3 fixed.x <- rexp(n=n) # Simulate from the model Y=\beta_0+\beta_1*x+N(0,\sigma^2) # Inputs: intercept; slope; variance; vector of x; return sample or estimated # linear model? # Outputs: data frame with columns x and y OR linear model fit to simulated y # regressed on x sim.lin.gauss <- function(intercept=beta.0, slope=beta.1, noise.variance=sigma.sq, x = fixed.x, model=FALSE) { # Make up y by adding Gaussian noise to the linear function y <- rnorm(length(x), intercept + slope*x, sd=sqrt(noise.variance)) # Do we want to fit a model to this simulation and return that # model? Or do we want to just return the simulated values? if (model) { return(lm(y~x)) } else { return(data.frame(x=x, y=y)) } } ## ----theory-vs-sim, echo=FALSE------------------------------------------- par(mfrow=c(2,1)) slope.sample <- replicate(1e4, coefficients(sim.lin.gauss(model=TRUE))["x"]) hist(slope.sample,freq=FALSE,breaks=50,xlab=expression(hat(beta)[1]),main="") curve(dnorm(x,-2,sd=sqrt(3/(n*var(fixed.x)))), add=TRUE, col="blue") pred.sample <- replicate(1e4, predict(sim.lin.gauss(model=TRUE), newdata=data.frame(x=-1))) hist(pred.sample, freq=FALSE, breaks=50, xlab=expression(hat(m)(-1)),main="") curve(dnorm(x, mean=beta.0+beta.1*(-1), sd=sqrt((sigma.sq/n)*(1+(-1-mean(fixed.x))^2/var(fixed.x)))), add=TRUE,col="blue") ## ----eval=FALSE---------------------------------------------------------- ## par(mfrow=c(2,1)) ## slope.sample <- replicate(1e4, coefficients(sim.lin.gauss(model=TRUE))["x"]) ## hist(slope.sample,freq=FALSE,breaks=50,xlab=expression(hat(beta)[1]),main="") ## curve(dnorm(x,-2,sd=sqrt(3/(n*var(fixed.x)))), add=TRUE, col="blue") ## pred.sample <- replicate(1e4, predict(sim.lin.gauss(model=TRUE), ## newdata=data.frame(x=-1))) ## hist(pred.sample, freq=FALSE, breaks=50, xlab=expression(hat(m)(-1)),main="") ## curve(dnorm(x, mean=beta.0+beta.1*(-1), ## sd=sqrt((sigma.sq/n)*(1+(-1-mean(fixed.x))^2/var(fixed.x)))), ## add=TRUE,col="blue")