# Code for HW 2, problem 2
# Evaluate five different bandwidths for a simple kernel regression by
# five-fold cross-validation, using mean-squared error as the loss
# function
require(np)
data(oecdpanel)
# Compare predictive ability using five-fold CV
nfolds <- 5
case.folds <- rep(1:nfolds,length.out=nrow(oecdpanel))
# divide the cases as evenly as possible
case.folds <- sample(case.folds) # randomly permute the order
bandwidths <- (1:5)/10 # Evenly space bandwidths from 0.1 to 0.5
fold.mses <- matrix(0,nrow=nfolds,ncol=length(bandwidths))
colnames(fold.mses) = as.character(bandwidths)
# By naming the columns, we'll won't have to keep track of which bandwidth
# is in which position
for (fold in 1:nfolds) {
# What are the training cases and what are the test cases?
train <- oecdpanel[case.folds!=fold,]
test <- oecdpanel[case.folds==fold,]
for (bw in bandwidths) {
# Fit to the training set
current.npr <- npreg(growth ~ initgdp, data=train,bws=bw)
# Predict on the test set
predictions <- predict(current.npr, newdata=test)
# What's the mean-squared error?
fold.mses[fold,paste(bw)] <- mean((test$growth - predictions)^2)
# Using paste() here lets us access the column with the right name...
}
}
# Average the MSEs
bandwidths.cv.mses <- colMeans(fold.mses)