--- title: "Spline examples" output: html_document date: 14 February 2017 author: 36-402, Spring 2017, Section A --- ```{r, include=FALSE} # General set up # Load packages we may use later library(knitr) # Set knitr options for knitting code into the report: # - Save results so that code blocks aren't re-run unless code changes (cache), # _or_ a relevant earlier code block changed (autodep), but don't re-run if the # only thing that changed was the comments (cache.comments) # - Don't clutter R output with messages or warnings (message, warning) # This _will_ leave error messages showing up in the knitted report opts_chunk$set(cache=TRUE, autodep=TRUE, cache.comments=FALSE, message=FALSE, warning=FALSE) ``` Simple demo of splines ===== First generate some pretend data (this will look familiar) ```{r} set.seed(3) true.curve = function(x) {sin(x)*cos(5*x)} ylim = c(-2,2) n = 100 x = sort(rnorm(n,1.5,.5)) y = true.curve(x)+rnorm(length(x),0,0.15) plot(x,y,xlab="x",ylab=expression(f(x)+epsilon),ylim=ylim) curve(true.curve(x),col="grey",add=TRUE) data = data.frame(x=x,y=y) ``` We can fit splines with `smooth.spline`. - We can manually specify lambda with `spar` (actually specifies a monotone function of it) - We can feed the data in as separate `x` and `y` arguments, or as a data frame with `x` and `y` columns We can predict new values with `predict`. - To see help file, do `?predict.smooth.spline` - Takes new `x` values as `x` argument - Returns data frame with `x` and `y` elements (sorted) Very low $\lambda$: ```{r} fit_rough = smooth.spline(data,spar = 0) eval.grid = seq(from=min(data$x),to=max(data$x),length.out=1000) plot(data, ylim=ylim) lines(predict(fit_rough, x=eval.grid)) ``` Very high $\lambda$: ```{r} fit_verysmooth = smooth.spline(data,spar=2) plot(data, ylim=ylim) lines(predict(fit_verysmooth, x=eval.grid)) ``` Slightly lower, but still very high $\lambda$: ```{r} fit_smooth = smooth.spline(data,spar=1) plot(data, ylim=ylim) lines(predict(fit_smooth, x=eval.grid)) ``` Cross validated $\lambda$: ```{r} fit_cv = smooth.spline(data) plot(data, ylim=ylim) lines(predict(fit_cv, x=eval.grid)) ``` Fit a spline to financial data ===== First, we load data from Yahoo finance - Note, I had problems with this. You can email me if you try to replicate this and also have issues. ```{r} require(pdfetch) sp <- pdfetch_YAHOO("SPY", fields="adjclose", from=as.Date("1993-02-09"), to=as.Date("2017-02-13")) ``` ```{r, eval=FALSE} load('09.Rdata') ``` ```{r} # Compute log returns sp <- diff(log(sp)) # need to drop the initial NA which makes difficulties later sp <- sp[-1] ``` ```{r,warning=FALSE} #All but the last entry sp.today <- head(sp,-1) #All but the first entry sp.tomorrow <- tail(sp,-1) #Plot tomorrow versus today plot(as.numeric(sp.today),as.numeric(sp.tomorrow),pch=20,col=rgb(0,0,0,.3)) #Add a nice horizontal line abline(h=0,lty=3) #Fit a linear model, see the coefficients sp.linear = lm(sp.tomorrow ~ sp.today) coefficients(sp.linear) abline(sp.linear,col='grey',lwd=2) ``` We can fit a spline instead of a line: ```{r} #Fit a spline sp.spline <- smooth.spline(x=sp.today,y=sp.tomorrow,cv=TRUE) sp.spline sp.spline$lambda #Plot the spline (and the other stuff) plot(as.numeric(sp.today),as.numeric(sp.tomorrow),pch=20,col=rgb(0,0,0,.3),xlab="Today's log-return", ylab="Tomorrow's log-return") abline(h=0,lty=3) abline(sp.linear,col='grey',lwd=2) lines(sp.spline,type='l',lwd=2,col='blue') # The spline ``` What happens if we play around with the $\lambda$ parameter: ```{r, warning=FALSE} plot(as.numeric(sp.today),as.numeric(sp.tomorrow),pch=20,col=rgb(0,0,0,.3),xlab="Today's log-return", ylab="Tomorrow's log-return") abline(sp.linear,col="grey") lines(sp.spline) lines(smooth.spline(sp.today,sp.tomorrow,spar=1.5),col="blue") lines(smooth.spline(sp.today,sp.tomorrow,spar=2),col="blue",lty=2) lines(smooth.spline(sp.today,sp.tomorrow,spar=1.1),col="red") lines(smooth.spline(sp.today,sp.tomorrow,spar=0.5),col="red",lty=2) ``` Generate error bounds ===== We notice that the spline is steeper on the left than on the right! Could this be sampling error? Let's find bootstrap error bounds on the fit! First, we build a resampling function to get bootstrap data sets: ```{r, warning=FALSE} # A data frame to hold our data. Makes it easier to sample rows sp.frame <- data.frame(today=sp.today,tomorrow=sp.tomorrow) # A function to sample rows with replacement sp.resampler <- function() { n <- nrow(sp.frame) resample.rows <- sample(1:n,size=n,replace=TRUE) return(sp.frame[resample.rows,]) } ``` We set up a function to evaluate predictions on an even grid ```{r, warning=FALSE} # Set up a grid of evenly-spaced points on which to evaluate the spline grid.300 <- seq(from=min(sp.today),to=max(sp.today),length.out=300) sp.spline.estimator <- function(data,eval.grid=grid.300) { # Fit spline to data, with cross-validation to pick lambda fit <- smooth.spline(x=data[,1],y=data[,2],cv=TRUE) # Do the prediction on the grid and return the predicted values return(predict(fit,x=eval.grid)$y) # We only want the predicted values } ``` A function to draw $B$ bootstrap samples and compute pointwise quantiles: ```{r, warning=FALSE} sp.spline.cis <- function(B,alpha,eval.grid=grid.300) { spline.main <- sp.spline.estimator(sp.frame, eval.grid=eval.grid) # Draw B boottrap samples, fit the spline to each # Result has length(eval.grid) rows and B columns spline.boots <- replicate(B, sp.spline.estimator(sp.resampler(),eval.grid=eval.grid)) cis.lower <- 2*spline.main - apply(spline.boots,1,quantile,probs=1-alpha/2) cis.upper <- 2*spline.main - apply(spline.boots,1,quantile,probs=alpha/2) return(list(main.curve=spline.main, lower.ci=cis.lower, upper.ci=cis.upper, x=eval.grid)) } ``` Now we run it all together! ```{r, warning=FALSE, cache=TRUE} #Get the confidence intervals sp.cis <- sp.spline.cis(B=1000,alpha=0.05) #Plot the confidence intervals plot(as.vector(sp.today),as.vector(sp.tomorrow),xlab="Today's log-return", ylab="Tomorrow's log-return",pch=16,cex=0.5,col="grey") abline(sp.linear,col="darkgrey") lines(x=sp.cis$x,y=sp.cis$main.curve,lwd=2) lines(x=sp.cis$x,y=sp.cis$lower.ci,col='red') lines(x=sp.cis$x,y=sp.cis$upper.ci) ```