## ----slope-varies-with-location, echo=FALSE------------------------------
# Create three distributions for X
x1 = runif(100)
x2 = rnorm(100,0.5,0.1)
x3 = runif(100,2,3)
# Create matching Y variables from the same (nonlinear) model
y1 = sqrt(x1) + rnorm(length(x1),0,0.05)
y2 = sqrt(x2) + rnorm(length(x2),0,0.05)
y3 = sqrt(x3) + rnorm(length(x3),0,0.05)
# Plot the first set of (X,Y) points, making sure the plotting region is big
  # enough for all the later ones
plot(x1,y1,xlim=c(0,3),ylim=c(0,3), xlab="X", ylab="Y")
# Rugs for the those points, to indicate the marginal distribution
rug(x1,side=1)
rug(y1,side=2)
# Add the second set of points in a different color and plotting symbol
points(x2,y2,pch=24,col="blue")
rug(x2,side=1,col="blue")
rug(y2,side=2,col="blue")
# And the third
points(x3,y3,pch=22,col="red")
rug(x3,side=1,col="red")
rug(y3,side=2,col="red")
# Fit the regression lines and add them, in matching colors
lm1 = lm(y1 ~ x1)
lm2 = lm(y2 ~ x2)
lm3 = lm(y3 ~ x3)
abline(lm1$coefficients)
abline(lm2$coefficients,col="blue")
abline(lm3$coefficients,col="red")
# Combine the data, fit an over-all regression line
x.all=c(x1,x2,x3)
y.all=c(y1,y2,y3)
lm.all = lm(y.all~x.all)
abline(lm.all$coefficients,lty="dashed")
# Finally, the true regression curve.
curve(sqrt(x),col="grey",add=TRUE)

## ----scatterplot-for-omitted-variables, echo=FALSE-----------------------
# Make the 3D plot to show omitted variable bias
library(lattice)
library(MASS)  # for multivariate normal generator

# Make correlated normal variables X and Z
x.z = mvrnorm(100,c(0,0),matrix(c(1,0.1,0.1,1),nrow=2))
# Y = X+Z + small noise
y = x.z[,1] + x.z[,2] + rnorm(100,0,0.1)
# 3D scatterplot
cloud(y~x.z[,1]*x.z[,2],xlab="X",ylab="Z",zlab="Y")

## ----scatterplot-for-omitted-variables-post-shift, echo=FALSE------------
# Continuation of previous example
# Change the correlation between X and Z to -0.1 instead of +0.1
new.x.z = mvrnorm(100,c(0,0),matrix(c(1,-0.1,-0.1,1),nrow=2))
new.y = new.x.z[,1] + new.x.z[,2] + rnorm(100,0,0.1)
cloud(new.y~new.x.z[,1]*new.x.z[,2],xlab="X",ylab="Z",zlab="Y")

## ----y-on-x-with-z-shifted, echo=FALSE-----------------------------------
# Continuity of previous example
# Now omit Z and plot
plot(x.z[,1],y,xlab="x",xlim=range(c(x.z[,1],new.x.z[,1])),ylim=range(c(y,new.y)))
# Make sure the range encompasses both data sets!
rug(x.z[,1],side=1)
axis(y,side=2)
points(new.x.z[,1],new.y,col="blue")
rug(new.x.z[,1],side=1,col="blue")
rug(new.y,side=2,col="blue")
# ... and regress
old.lm = lm(y ~ x.z[,1])
new.lm = lm(new.y ~ new.x.z[,1])
abline(old.lm$coefficients)
abline(new.lm$coefficients,col="blue")

## ----log-regression-curve-and-scatter, echo=FALSE------------------------
x <- runif(100)
y <- rnorm(100,mean=log(x),sd=1)
plot(y~x)
curve(log(x),add=TRUE,col="grey")
abline(lm(y~x))

## ----eval=FALSE----------------------------------------------------------
## x <- runif(100)
## y <- rnorm(100,mean=log(x),sd=1)
## plot(y~x)
## curve(log(x),add=TRUE,col="grey")
## abline(lm(y~x))