# Examples of training data
# 20 standard-Gaussian X's
x = rnorm(20)

# Independent Y's
y0 = rnorm(20)
# Linear Y's
y1 = -0.5*x + rnorm(20)
# Quadratic Y's
y2 = 7*x^2 - 0.5*x + rnorm(20)

# Plot the simplest case: independent response
plot(x,y0)
# Fit a constant, plot it
y0.0 = lm(y0 ~ 1)
abline(h=y0.0$coefficients[1])
# Get evenly spaced points for pretty plotting
d = seq(-2,2,length.out=200)
# Fit polynomials of order 1 to 9
for (degree in 1:9) {
	fm = lm(y0 ~ poly(x,degree))
	# Store the results
	  # The assign/paste trick here is often
	  # useful
	assign(paste("y0",degree,sep="."), fm)
	# Plot them, with different line types
	lines(d, predict(fm,data.frame(x=d)),lty=(degree+1))
}

# Calculate and plot in-sample mean-squared error
mse = vector(length=10)
for (degree in 0:9) {
	# The get() function is the inverse to assign()
	fm = get(paste("y0",degree,sep="."))
	mse[degree+1] = mean(summary(fm)$residuals^2)
}
plot(0:9,mse,type="b",xlab="polynomial degree",
     ylab="mean squared error")

# Create testing data --- substantially bigger
# than the training set, so we can treat this as
# fairly representative of how well it would work.
  # If 200 data points seems too small, you could
  # easily use 2 million.  Actually, in this case
  # you could _calculate_ exactly what
  # would happen, using the properties of the
  # Gaussian distribution, but that's too much of
  # a pain for a special case.
x.new = rnorm(200)
y0.new = rnorm(200)

# Calculate the out-of-sample errors
gmse = vector(length=10)
for (degree in 0:9) {
	# Retrieve the stored fitted models
	fm = get(paste("y0",degree,sep="."))
	# Predict on new data
	predictions = predict(fm,data.frame(x=x.new))
	# Calculate residuals
	residuals = y0.new - predictions
	# square and average
	gmse[degree+1] = mean(residuals^2)
}
# Plot: lines and points
lines(0:9,gmse,col="blue",lty=2)
points(0:9,gmse,pch=24,col="blue")

# Calculate the OOS errors on data within the
# original range
range.ok = (x.new <= max(x)) & (x.new >= min(x))
gmse.restricted = vector(length=10)
for (degree in 0:9) {
	fm = get(paste("y0",degree,sep="."))
	predictions = predict(fm,data.frame(x=x.new[range.ok]))
	residuals = y0.new[range.ok] - predictions
	gmse.restricted[degree+1] = mean(residuals^2)
}
lines(0:9,gmse.restricted,col="blue",lty=2)
points(0:9,gmse.restricted,pch=24,col="blue")

# Quadratic example
plot(x,y2)
curve(7*x^2-0.5*x,col="grey",add=TRUE)
# Fit a constant, plot it
y2.0 = lm(y2 ~ 1)
abline(h=y2.0$coefficients[1])
# Get evenly spaced points for pretty plotting
d = seq(-2,2,length.out=200)
# Fit polynomials of order 1 to 9
for (degree in 1:9) {
	fm = lm(y2 ~ poly(x,degree))
	# Store the results
	  # The assign/paste trick here is often
	  # useful
	assign(paste("y2",degree,sep="."), fm)
	# Plot them, with different line types
	lines(d, predict(fm,data.frame(x=d)),lty=(degree+1))
}

# Plot with testing data
y2.new = 7*x.new^2 - 0.5*x.new + rnorm(200)
plot(x,y2,xlim=range(x.new),ylim=range(y2.new))
curve(7*x^2-0.5*x,col="grey",add=TRUE)
y2.0 = lm(y2 ~ 1)
abline(h=y2.0$coefficients[1])
d = seq(-4,4,length.out=200)
for (degree in 1:9) {
	fm = get(paste("y2",degree,sep="."))
	lines(d, predict(fm,data.frame(x=d)),lty=(degree+1))
}
points(x.new,y2.new,col="blue",pch=24)


# Calculate and plot in- and out-of- sample
# errors for the quadratic case
mse.q = vector(length=10)
gmse.q = vector(length=10)
for (degree in 0:9) {
	# The get() function is the inverse to assign()
	fm = get(paste("y2",degree,sep="."))
	mse.q[degree+1] = mean(summary(fm)$residuals^2)
	predictions = predict(fm,data.frame(x=x.new))
	resids = y2.new - predictions
	gmse.q[degree+1] = mean(resids^2)
}
plot(0:9,mse.q,type="b",xlab="polynomial degree",
     ylab="mean squared error",
     ylim=c(0,50))
lines(0:9,gmse.q,lty=2,col="blue")
points(0:9,gmse.q,pch=24,col="blue")