# Example functions from Lecture 4, 36-350, Fall 2013 # See the lecture notes for context # "Robust" loss function, for outlier-resistant regression # Inputs: vector of numbers (x) # Outputs: vector with x^2 for small entries, 2|x|-1 for large ones psi.1 <- function(x) { psi <- ifelse(x^2 > 1, 2*abs(x)-1, x^2) return(psi) } # "Robust" loss function, for outlier-resistant regression # Inputs: vector of numbers (x), scale for crossover (c) # Outputs: vector with x^2 for small entries, 2c|x|-c^2 for large ones psi.2 <- function(x,c=1) { psi <- ifelse(x^2 > c^2, 2*c*abs(x)-c^2, x^2) return(psi) } # "Robust" loss function, for outlier-resistant regression # Inputs: vector of numbers (x), scale for crossover (c) # Outputs: vector with x^2 for small entries, 2c|x|-c^2 for large ones psi.3 <- function(x,c=1) { # Scale should be a single positive number stopifnot(length(c) == 1,c>0) psi <- ifelse(x^2 > c^2, 2*c*abs(x)-c^2, x^2) return(psi) } # Code example for estimation: maximum.iterations <- 100 deriv.step <- 1/1000 step.scale <- 1e-12 stopping.deriv <- 1/100 iteration <- 0 deriv <- Inf a <- 0.15 while ((iteration < maximum.iterations) && (deriv > stopping.deriv)) { iteration <- iteration + 1 mse.1 <- mean((gmp\$pcgmp - 6611*gmp\$pop^a)^2) mse.2 <- mean((gmp\$pcgmp - 6611*gmp\$pop^(a+deriv.step))^2) deriv <- (mse.2 - mse.1)/deriv.step a <- a - step.scale*deriv } list(a=a,iterations=iteration,converged=(iteration < maximum.iterations)) # Re-write as a function, with the initial guess for a as argument # Also, fix the "logic bug", should compare abs(deriv) to # threshold, not deriv itself # Estimate the scaling exponent in the West&c. model, by nonlinear least squares # Inputs: Initial guess at the scaling exponent (a) # Output: List with final estimate (a), number of iterations taken (iterations), # whether it converged or not (converged) estimate.scaling.exponent.1 <- function(a) { maximum.iterations <- 100 deriv.step <- 1/1000 step.scale <- 1e-12 stopping.deriv <- 1/100 iteration <- 0 deriv <- Inf while ((iteration < maximum.iterations) && (abs(deriv) > stopping.deriv)) { iteration <- iteration + 1 mse.1 <- mean((gmp\$pcgmp - 6611*gmp\$pop^a)^2) mse.2 <- mean((gmp\$pcgmp - 6611*gmp\$pop^(a+deriv.step))^2) deriv <- (mse.2 - mse.1)/deriv.step a <- a - step.scale*deriv } fit <- list(a=a,iterations=iteration, converged=(iteration < maximum.iterations)) return(fit) } # Turn "magic constants" inside code into default values of # arguments # deriv.step is probably too small for accuracy here # Estimate the scaling exponent in the West&c. model, by nonlinear least squares # Inputs: Initial guess at the scaling exponent (a), scaling factor (y0), # number of steps to use (maximum.iterations), size of perturbation for # numerical derivative (deriv.step), scaling of derivative (step.scale), # threshold for treating derivative as effectively zero (stopping.deriv) # Output: List with final estimate (a), number of iterations taken (iterations), # whether it converged or not (converged) estimate.scaling.exponent.2 <- function(a, y0=6611, maximum.iterations=100, deriv.step = 1/100, step.scale = 1e-12, stopping.deriv = 1/100) { iteration <- 0 deriv <- Inf while ((iteration < maximum.iterations) && (abs(deriv) > stopping.deriv)) { iteration <- iteration + 1 mse.1 <- mean((gmp\$pcgmp - y0*gmp\$pop^a)^2) mse.2 <- mean((gmp\$pcgmp - y0*gmp\$pop^(a+deriv.step))^2) deriv <- (mse.2 - mse.1)/deriv.step a <- a - step.scale*deriv } fit <- list(a=a,iterations=iteration, converged=(iteration < maximum.iterations)) return(fit) } # Define an internal mse() function, so it's clearer when we're # doing parallel acalculations, we don't have to repeat code # Estimate the scaling exponent in the West&c. model, by nonlinear least squares # Inputs: Initial guess at the scaling exponent (a), scaling factor (y0), # number of steps to use (maximum.iterations), size of perturbation for # numerical derivative (deriv.step), scaling of derivative (step.scale), # threshold for treating derivative as effectively zero (stopping.deriv) # Output: List with final estimate (a), number of iterations taken (iterations), # whether it converged or not (converged) estimate.scaling.exponent.3 <- function(a, y0=6611, maximum.iterations=100, deriv.step = 1/100, step.scale = 1e-12, stopping.deriv = 1/100) { iteration <- 0 deriv <- Inf mse <- function(a) { mean((gmp\$pcgmp - y0*gmp\$pop^a)^2) } while ((iteration < maximum.iterations) && (abs(deriv) > stopping.deriv)) { iteration <- iteration + 1 deriv <- (mse(a+deriv.step) - mse(a))/deriv.step a <- a - step.scale*deriv } fit <- list(a=a,iterations=iteration, converged=(iteration < maximum.iterations)) return(fit) } # Allow us to use different data sets, but with the old one as default # Estimate the scaling exponent in the West&c. model, by nonlinear least squares # Inputs: Initial guess at the scaling exponent (a), scaling factor (y0), # vector of response values (response), vector of predictor values # (predictor), number of steps to use (maximum.iterations), size of # perturbation for numerical derivative (deriv.step), scaling of derivative # (step.scale), threshold for treating derivative as effectively zero # (stopping.deriv) # Output: List with final estimate (a), number of iterations taken (iterations), # whether it converged or not (converged) estimate.scaling.exponent.4 <- function(a, y0=6611, response=gmp\$pcgmp, predictor = gmp\$pop, maximum.iterations=100, deriv.step = 1/100, step.scale = 1e-12, stopping.deriv = 1/100) { iteration <- 0 deriv <- Inf mse <- function(a) { mean((response - y0*predictor^a)^2) } while ((iteration < maximum.iterations) && (abs(deriv) > stopping.deriv)) { iteration <- iteration + 1 deriv <- (mse(a+deriv.step) - mse(a))/deriv.step a <- a - step.scale*deriv } fit <- list(a=a,iterations=iteration, converged=(iteration < maximum.iterations)) return(fit) } # Turn the while() loop into a for() loop, shortening the code a bit # Change is invisible to everything else # Estimate the scaling exponent in the West&c. model, by nonlinear least squares # Inputs: Initial guess at the scaling exponent (a), scaling factor (y0), # vector of response values (response), vector of predictor values # (predictor), number of steps to use (maximum.iterations), size of # perturbation for numerical derivative (deriv.step), scaling of derivative # (step.scale), threshold for treating derivative as effectively zero # (stopping.deriv) # Output: List with final estimate (a), number of iterations taken (iterations), # whether it converged or not (converged) estimate.scaling.exponent.5 <- function(a, y0=6611, response=gmp\$pcgmp, predictor = gmp\$pop, maximum.iterations=100, deriv.step = 1/100, step.scale = 1e-12, stopping.deriv = 1/100) { mse <- function(a) { mean((response - y0*predictor^a)^2) } for (iteration in 1:maximum.iterations) { deriv <- (mse(a+deriv.step) - mse(a))/deriv.step a <- a - step.scale*deriv if (abs(deriv) <= stopping.deriv) { break() } } fit <- list(a=a,iterations=iteration, converged=(iteration < maximum.iterations)) return(fit) }