## ----echo=FALSE----------------------------------------------------------
library(knitr)
opts_chunk$set(size="small",background="white", highlight=FALSE, cache=TRUE, autdep=TRUE, tidy=TRUE)

## ------------------------------------------------------------------------
# Simulate from a linear model with uniform X and t-distributed noise
# Inputs: number of points; intercept; slope; width of uniform X distribution
  # (symmetric around 0); degrees of freedom for t
# Output: data frame with columns for X and Y
sim.linmod <- function(n, beta.0, beta.1, width, df) {
    # draw n points from a uniform distribution centered on 0
    x <- runif(n, min=-width/2, max=width/2)
    # draw n points from a t distribution with the given number of degrees
    # of freedom
    epsilon <- rt(n, df=df)
    # make y from a linear model
    y <- beta.0 + beta.1*x + epsilon
    # return the data frame
    return(data.frame(x=x, y=y))
}

# Calculate in-sample MSE of a linear model
  # First define a function that works for just one slope/intercept pair at
  # time
  # Then "Vectorize" it to handle vectors of intercepts and slopes
# Inputs: slope; intercept; data frame with "x" and "y" columns
# Output: the in-sample MSE
# Presumes: "y" is the target variable and "x" is the predictor
mse.insample <- function(b.0, b.1, data) { mean((data$y-(b.0+b.1*data$x))^2) }
mse.insample <- Vectorize(mse.insample, vectorize.args=c("b.0","b.1"))

# Grids of possible intercepts and slopes
b.0.seq <- seq(from=-10,to=10, length.out=20)
b.1.seq <- seq(from=-10,to=10, length.out=20)

# 3d wire-mesh ("perspective") plot of a linear model's error surface
# Input: data set; maximum value for Z axis (for comparability across plots)
# Output: Transformation matrix for adding new points/lines to the plot,
  # invisibly --- see help(persp) under "Value".  (Ignored here)
# ATTN: hard-coded slope/intercept sequences less than ideal
in.sample.persp <- function(data, zmax=600) {
    # Calculate the in-sample MSE for every combination of
    z <- outer(b.0.seq, b.1.seq, mse.insample, data=data)
    persp(b.0.seq, b.1.seq, z, zlim=c(0,zmax), xlab="Intercept",
          ylab="Slope", zlab="MSE", ticktype="detailed")
}

## ----error-surfaces, echo=FALSE------------------------------------------
par(mfrow=c(3,2))
in.sample.persp(sim.linmod(n=10,beta.0=5,beta.1=-2,width=4,df=3))
in.sample.persp(sim.linmod(n=10,beta.0=5,beta.1=-2,width=4,df=3))
in.sample.persp(sim.linmod(n=100,beta.0=5,beta.1=-2,width=4,df=3))
in.sample.persp(sim.linmod(n=100,beta.0=5,beta.1=-2,width=4,df=3))
in.sample.persp(sim.linmod(n=1e5,beta.0=5,beta.1=-2,width=4,df=3))
in.sample.persp(sim.linmod(n=1e5,beta.0=5,beta.1=-2,width=4,df=3))
par(mfrow=c(1,1))

## ----eval=FALSE----------------------------------------------------------
## par(mfrow=c(3,2))
## in.sample.persp(sim.linmod(n=10,beta.0=5,beta.1=-2,width=4,df=3))
## in.sample.persp(sim.linmod(n=10,beta.0=5,beta.1=-2,width=4,df=3))
## in.sample.persp(sim.linmod(n=100,beta.0=5,beta.1=-2,width=4,df=3))
## in.sample.persp(sim.linmod(n=100,beta.0=5,beta.1=-2,width=4,df=3))
## in.sample.persp(sim.linmod(n=1e5,beta.0=5,beta.1=-2,width=4,df=3))
## in.sample.persp(sim.linmod(n=1e5,beta.0=5,beta.1=-2,width=4,df=3))
## par(mfrow=c(1,1))

## ----scatter-of-regression-lines, echo=FALSE-----------------------------
# Create an empty plot (type="n" for "do Nothing")
plot(0,type="n",xlim=c(-10,10),ylim=c(-10,10),xlab="x",ylab="y")
# Add the true regression line; exaggerate width so it stands out
abline(a=5, b=-2, lwd=5)
# Repeat 10 times: do a simulation, fit a line to the sim., add the fitted
# line to the plot
invisible(replicate(20, abline(lm(y ~ x, data=sim.linmod(n=10,beta.0=5,
                                             beta.1=-2,width=4,df=3)),
                               col="grey")))

## ----eval=FALSE----------------------------------------------------------
## # Create an empty plot (type="n" for "do Nothing")
## plot(0,type="n",xlim=c(-10,10),ylim=c(-10,10),xlab="x",ylab="y")
## # Add the true regression line; exaggerate width so it stands out
## abline(a=5, b=-2, lwd=5)
## # Repeat 10 times: do a simulation, fit a line to the sim., add the fitted
## # line to the plot
## invisible(replicate(20, abline(lm(y ~ x, data=sim.linmod(n=10,beta.0=5,
##                                              beta.1=-2,width=4,df=3)),
##                                col="grey")))

## ----eval=FALSE----------------------------------------------------------
## lm(y ~ x, data=df)

## ------------------------------------------------------------------------
# Make a very small simulated data set from our running examing
toy.data <- sim.linmod(n=10, beta.0=5, beta.1=-2, width=4, df=3)
# Fit the simple linear regression model to it by least squares
lm(y~x, data=toy.data)

## ------------------------------------------------------------------------
names(lm(y~x, data=toy.data))

## ------------------------------------------------------------------------
# Fit a linear model to the toy data, and store as toy.lm
  # The name of the estimated model needn't match that of the data, but it's
  # often a good idea
toy.lm <- lm(y~x, data=toy.data)

## ------------------------------------------------------------------------
coefficients(toy.lm)

## ------------------------------------------------------------------------
fitted(toy.lm)

## ------------------------------------------------------------------------
residuals(toy.lm)

## ----toy-model, echo=FALSE-----------------------------------------------
plot(y~x, data=toy.data, xlab="x",ylab="y", main="Simulated ('toy') data")
abline(toy.lm)

## ----eval=FALSE----------------------------------------------------------
## plot(y~x, data=toy.data, xlab="x",ylab="y", main="Simulated ('toy') data")
## abline(toy.lm)

## ----eval=FALSE----------------------------------------------------------
## predict(object, newdata)

## ------------------------------------------------------------------------
predict(toy.lm, newdata=data.frame(x=1:5))

## ------------------------------------------------------------------------
predict(toy.lm)
predict(toy.lm, data=data.frame(x=1:5))  # What's wrong here?

## ------------------------------------------------------------------------
predict(toy.lm, newdata=data.frame(zebra=1:5))

## ------------------------------------------------------------------------
predict(toy.lm, newdata=data.frame(x=1:5, zebra=6:10))

## ------------------------------------------------------------------------
predict(toy.lm, newdata=data.frame(x=1:5), se.fit=TRUE)