# Code for HW 2, problem 2 # Evaluate five different bandwidths for a simple kernel regression by # five-fold cross-validation, using mean-squared error as the loss # function require(np) data(oecdpanel) # Compare predictive ability using five-fold CV nfolds <- 5 case.folds <- rep(1:nfolds,length.out=nrow(oecdpanel)) # divide the cases as evenly as possible case.folds <- sample(case.folds) # randomly permute the order bandwidths <- (1:5)/10 # Evenly space bandwidths from 0.1 to 0.5 fold.mses <- matrix(0,nrow=nfolds,ncol=length(bandwidths)) colnames(fold.mses) = as.character(bandwidths) # By naming the columns, we'll won't have to keep track of which bandwidth # is in which position for (fold in 1:nfolds) { # What are the training cases and what are the test cases? train <- oecdpanel[case.folds!=fold,] test <- oecdpanel[case.folds==fold,] for (bw in bandwidths) { # Fit to the training set current.npr <- npreg(growth ~ initgdp, data=train,bws=bw) # Predict on the test set predictions <- predict(current.npr, newdata=test) # What's the mean-squared error? fold.mses[fold,paste(bw)] <- mean((test$growth - predictions)^2) # Using paste() here lets us access the column with the right name... } } # Average the MSEs bandwidths.cv.mses <- colMeans(fold.mses)