# # lecture-12.r # # Created by Vincent Vu on 2011-10-09. # This work is licensed under a Creative Commons # Attribution-NonCommercial-ShareAlike 3.0 Unported License. # # Lecture slides show plots created with ggplot2, but the commands here # use R's built-in plotting functions. ggplot2 versions are commented out # in some places, and also provided at the end. # # masters.Rdata contains a single object, 'masters2011', that is a list with # 3 components: 'leaderboard', 'scorecard', 'course' load('masters2011.RData') # ============================================== # = Look at an individual player's performance = # ============================================== # Extract Tiger Woods' score card for the 4th round and calculate his running # over/under par score df <- subset(masters2011$scorecard, player == 'Tiger Woods' & round == 4) tiger <- as.numeric(df[, 1:18]) tiger <- cumsum(tiger - masters2011$course$par) plot(tiger, type = 'l') # Alternatively # library(ggplot2) # qplot(x=1:18,y=tiger, geom='line', xlab = 'hole', ylab = 'score', # main = 'Tiger Woods - Round 4') # Extract Tiger Wood's score card and reorder the rows by the round (1 to 4) df <- subset(masters2011$scorecard, player == 'Tiger Woods') tiger <- as.matrix(df[order(df$round), 1:18]) # Convert from an 18 x 4 matrix to a vector of length 18*4, rowwise tiger <- as.vector(t(tiger)) # Calculate Tiger's running score across the 4 rounds of the tournament tiger <- cumsum(tiger - rep(masters2011$course$par, 4)) plot(tiger, type = 'l') # qplot(x=1:(4*18), y=tiger, geom='line', xlab = 'hole', ylab = 'score', # main = 'Tiger Woods - Rounds 1-4') # ========================================================================== # = Abstract the analysis so that it can be easily repeated for any player = # ========================================================================== runningTotal <- function(df, par = masters2011$course$par) { # Reorder the rows of the data frame by the round number # (so that the scores are in chronological order) # and extract the scores as a matrix x <- as.matrix(df[order(df$round), 1:18]) n <- nrow(x) # Convert from an 18 x n matrix to a vector of length 18*n, rowwise x <- as.vector(t(x)) # Calculate the running over/under score x <- cumsum(x - rep(par, n)) return(x) } # Check out Charl Schwartzel charl <- runningTotal(subset(masters2011$scorecard, player == 'Charl Schwartzel')) plot(charl, xlab = 'hole', ylab = 'score', type = 'l') # ================================================================== # = Look at all players that made the cut (played in all 4 rounds) = # ================================================================== madecut <- subset(masters2011$leaderboard, position != 'CUT')$player df <- subset(masters2011$scorecard, player %in% madecut) df <- droplevels(df) # Two approaches to computing runningTotal for each player. # The results are stored row-wise in a 2-dimensional array # ===================== # = For-loop approach = # ===================== scores <- matrix(nrow = nlevels(df$player), ncol = 18 * 4) for(i in 1:nlevels(df$player)) { x <- subset(df, player == levels(df$player)[i]) scores[i, ] <- runningTotal(x) } rownames(scores) <- levels(df$player) # ===================================== # = Split, apply, combine with base R = # ===================================== x <- split(df, df$player) x <- lapply(x, runningTotal) scores <- do.call(rbind, x) # ==================== # = Plot the results = # ==================== matplot(t(scores), xlab = 'hole', ylab = 'total to par', type = 'l') # Add the median runing total lines(1:ncol(scores), apply(scores, 2, median), lwd = 2) # ================== # = Extra material = # ================== # Lookat players in the top 5 after rounds 1 and 4 i <- which( scores[18 * 4, ] <= quantile(scores[18 * 4, ], 0.05) | scores[18 * 1, ] <= quantile(scores[18 * 1, ], 0.05) ) matplot(scores[, i], xlab = 'hole', ylab = 'total to par', type = 'l') lines(1:nrow(scores), apply(scores, 1, median), lwd = 2) require(ggplot2) ggplot(data = melt(scores, varnames = c('player', 'hole')), aes(x = hole, y = value, group = player, color = player)) + labs(y = 'total to par') + geom_line(alpha = 1/2, size = 1) + annotate(geom = 'line', size = 1.5, alpha = 1/2, x = 1:ncol(scores), y = apply(scores, 2, median)) + opts(legend.position = 'none')