# Code for examples in Lecture 10, 36-350, Fall 2011 # See lecture slides for context # Load the data for the West et al. model once again gmp <- read.table("http://www.stat.cmu.edu/~cshalizi/statcomp/labs/02/gmp.dat") pop <- gmp$gmp/gmp$pcgmp gmp <- data.frame(gmp,pop=pop) # Define the MSE function # Calculate mean squared error of West et al. power-law scaling model # Inputs: scale factor (y0), scaling exponent (a), response variable (Y), # predictor variable (N) # Output: the MSE mse <- function(y0,a,Y=gmp$pcgmp,N=gmp$pop) { mean((Y - y0*(N^a))^2) } # Interaction of curve() and mse() # Next line won't work --- why? curve(mse(a=x,y0=6611),from=0.10,to=0.15) # Using sapply() to "vectorize" mse: sapply(seq(from=0.10,to=0.15,by=0.01),mse,y0=6611) # Check that it's doing what it should: mse(6611,0.10) # Define a vectorized version using sapply # You should work out what the inputs and outputs are here! mse.plottable <- function(a,...){sapply(a,mse,...)} # Make some plots curve(mse.plottable(a=x),from=0.10,to=0.15) curve(mse.plottable(a=x,y0=5100),from=0.10,to=0.20) # You should try varying Y and N in mse.plottable # Numerical gradients # This approach to numerical gradients is "for educational purposes only" # For serious work, see the numDeriv package on CRAN (or texts on numerical # analysis) --- see slides for drawbacks of simple first differences # Clunky numerical gradient calculation, with a for loop # Take numerical gradient by first differences # Inputs: function (f), point at which to take derivative (x), increments for # first differences (deriv.steps), further arguments to f (...) # Output: vector of partial derivatives # Presumes: f takes a vector as argument and returns a numeric scalar # deriv.steps and x have the same number of components gradient <- function(f,x,deriv.steps,...) { p <- length(x) stopifnot(length(deriv.steps)==p) f.old <- f(x,...) gradient <- vector(length=p) for (coordinate in 1:p) { x.new <- x x.new[coordinate] <- x.new[coordinate]+deriv.steps[coordinate] f.new <- f(x.new,...) gradient[coordinate] <- (f.new - f.old)/deriv.steps[coordinate] } return(gradient) } # Better version, avoiding iteration # Take numerical gradient by first differences # Inputs: function (f), point at which to take derivative (x), increments for # first differences (deriv.steps), further arguments to f (...) # Output: vector of partial derivatives # Presumes: f takes a vector as argument and returns a numeric scalar # deriv.steps and x have the same number of components gradient <- function(f,x,deriv.steps,...) { p <- length(x) stopifnot(length(deriv.steps)==p) f.old <- f(x,...) x.new <- matrix(rep(x,times=p),nrow=p) + diag(deriv.steps,nrow=p) # columns of x.new are perturbations of x because of how matrix() fills in f.new <- apply(x.new,2,f,...) gradient <- (f.new - f.old)/deriv.steps # Recycling return(gradient) } # Minimize a function by gradient descent # Inputs: function (f), starting guess (initial.x), number of steps to # take (max.iterations), factor by which to multiply gradient (step.scale), # size of negible derivatives (stopping.deriv), further arguments to # gradien or to f (...) # Outputs: list with final guess at minimum, final value of gradient, number # of iterations taken # Presumes: f takes a numeric vector and returns a numeric scalar # need to pass gradient any extra arguments via ... gradient.descent <- function(f,initial.x,max.iterations,step.scale, stopping.deriv,...) { x <- initial.x for (iteration in 1:max.iterations) { grad <- gradient(f,x,...) x <- x - step.scale*grad if(all(abs(grad) < stopping.deriv)) { break() } } fit <- list(argmin=x,final.gradient=grad,iterations=iteration) return(fit) } # Adapting mse() to gradient() and gradient.descent() # Option 1: write a wrapper # Version of MSE function taking a vector argument # Inputs: Vector of parameters (param), optional extra arguments to mse (...) # Output: The mean squared error mse.for.optimization <- function(param,...) { mse(y0=param[1],a=param[2],...) } # Try the following call (may have to wait a little) gradient.descent(f=mse.for.optimization,initial.x=c(6611,0.15), max.iterations=1e5,step.scale=c(1e-3,1e-12),stopping.deriv=1e-2, deriv.step=c(1,1e-6)) # Option 2: use an anonymous function gradient.descent(function(param,...) {mse(y0=param[1],a=param[2],...)}, initial.x=c(6611,0.15),max.iterations=1e5,step.scale=c(1e-3,1e-12), stopping.deriv=1e-2,deriv.step=c(1,1e-6)) # Results should be identical to previous call