# R code for examples in the chapter "The Truth about Linear Regression"

# Figure 1
x1 = runif(100)
x2 = rnorm(100,0.5,0.1)
x3 = runif(100,2,3)
y1 = sqrt(x1) + rnorm(100,0,0.05)
y2 = sqrt(x2) + rnorm(100,0,0.05)
y3 = sqrt(x3) + rnorm(100,0,0.05)
plot(x1,y1,xlim=c(0,3),ylim=c(0,3))
rug(x1,side=1)
rug(y1,side=2)
points(x2,y2,pch=24,col="blue")
rug(x2,side=1,col="blue")
rug(y2,side=2,col="blue")
points(x3,y3,pch=22,col="red")
rug(x3,side=1,col="red")
rug(y3,side=2,col="red")
lm1 = lm(y1 ~ x1)
lm2 = lm(y2 ~ x2)
lm3 = lm(y3 ~ x3)
# abline takes intercept and slope as its first two arguments...
abline(lm1$coefficients)
abline(lm2$coefficients,col="blue")
abline(lm3$coefficients,col="red")
x.all=c(x1,x2,x3)
y.all=c(y1,y2,y3)
lm.all = lm(y.all~x.all)
abline(lm.all$coefficients,lty=2)
curve(sqrt(x),col="grey",add=TRUE)


# Figure 2: Make the 3D plot to show omitted variable bias
library(lattice)
library(MASS)

# Make correlated normal variables X and Z
x.z = mvrnorm(100,c(0,0),matrix(c(1,0.1,0.1,1),nrow=2))
# Y = X+Z + small noise
y = x.z[,1] + x.z[,2] + rnorm(100,0,0.1)
# 3D scatterplot
cloud(y~x.z[,1]*x.z[,2],xlab="X",ylab="Z",zlab="Y")


# Figure 3
# Change the correlation between X and Z to -0.1 instead of +0.1
new.x.z = mvrnorm(100,c(0,0),matrix(c(1,-0.1,-0.1,1),nrow=2))
new.y = new.x.z[,1] + new.x.z[,2] + rnorm(100,0,0.1)
cloud(new.y~new.x.z[,1]*new.x.z[,2],xlab="X",ylab="Z",zlab="Y")

# Figure 4
# Now omit Z and plot
plot(x.z[,1],y,xlab="x",xlim=range(c(x.z[,1],new.x.z[,1])),ylim=range(c(y,new.y)))
# Make sure the range encompasses both data sets!
rug(x.z[,1],side=1)
axis(y,side=2)
points(new.x.z[,1],new.y,col="blue")
rug(new.x.z[,1],side=1,col="blue")
rug(new.y,side=2,col="blue")
# ... and regress
old.lm = lm(y ~ x.z[,1])
new.lm = lm(new.y ~ new.x.z[,1])
abline(old.lm$coefficients)
abline(new.lm$coefficients,col="blue")

# Figure 5
x <- runif(100)
y <- rnorm(100,mean=log(x),sd=1)
plot(y~x)
curve(log(x),add=TRUE,col="grey")
abline(lm(y~x))