# Solutions to Lab 5, 36-350, Fall 2011 # See lab and solutions for context # First version of testing function # Check tukey.outlier on test cases # Inputs: none # Output: TRUE if all tests pass, else stops with an error test.tukey.outlier <- function() { x <- c(2.2, 7.8, -4.4, 0.0, -1.2, 3.9, 4.9, -5.7, -7.9, -4.9, 28.7, 4.9) x.pattern <- rep(FALSE,12); x.pattern[11] <- TRUE stopifnot(all(tukey.outlier(x) == x.pattern)) return(TRUE) } # Initial version of outlier function, for one vector # Detect outliers according to Tukey # Input: data vector (x) # Output: Boolean vector, TRUE for outliers, FALSE otherwise tukey.outlier <- function(x) { quartiles <- quartiles(x) lower.limit <- quartiles[1]-1.5*quartiles[3] upper.limit <- quartiles[2]+1.5*quartiles[3] outliers <- ((x < lower.limit) | (x > upper.limit)) return(outliers) } # Second version of testing function # Check tukey.outlier on test cases # Inputs: none # Output: TRUE if all tests pass, else stops with an error test.tukey.outlier <- function() { x <- c(2.2, 7.8, -4.4, 0.0, -1.2, 3.9, 4.9, -5.7, -7.9, -4.9, 28.7, 4.9) x.pattern <- rep(FALSE,12); x.pattern[11] <- TRUE stopifnot(all(tukey.outlier(x) == x.pattern)) stopifnot(all(tukey.outlier(-x) == tukey.outlier(x))) return(TRUE) } # Third version of testing function # Check tukey.outlier on test cases # Inputs: none # Output: TRUE if all tests pass, else stops with an error test.tukey.outlier <- function() { x <- c(2.2, 7.8, -4.4, 0.0, -1.2, 3.9, 4.9, -5.7, -7.9, -4.9, 28.7, 4.9) x.pattern <- rep(FALSE,12); x.pattern[11] <- TRUE stopifnot(all(tukey.outlier(x) == x.pattern)) stopifnot(all(tukey.outlier(-x) == tukey.outlier(x))) y <- c(11.0, 14.0, 3.5, 52.5, 21.5, 12.7, 16.7, 11.7, 10.8, -9.2, 12.3, 13.8) y.pattern <- rep(FALSE,12) y.pattern[3:5] <- TRUE; y.pattern[10] <- TRUE stopifnot(all(tukey.outlier(y) == y.pattern)) z <- cbind(x,y) z.pattern <- x.pattern | y.pattern stopifnot(all(tukey.outlier(z) == z.pattern)) return(TRUE) } # Version of tukey.outlier suitable for arrays (first approach) # Detect outliers according to Tukey # Inputs: data vector or data array (x) # Outputs: Boolean vector, TRUE for outliers, FALSE elsewhere # Presumes: When x is an array, each row is a separate case tukey.outlier <- function(x) { # Embed version we wrote for vectors as an internal function detect.outliers <- function(x) { quartiles <- quartiles(x) lower.limit <- quartiles[1]-1.5*quartiles[3] upper.limit <- quartiles[2]+1.5*quartiles[3] outliers <- ((x < lower.limit) | (x > upper.limit)) return(outliers) } # See if we are dealing with a multi-column array if(is.array(x) && ncol(x) > 1) { # If so, detect outliers in each column component.outliers <- apply(x,2,detect.outliers) # A row is an outlier if any component of it is outliers <- apply(component.outliers,1,any) } else { # Otherwise, apply the vector function directly outliers <- detect.outliers(x) } return(outliers) } # Version of tukey.outlier suitable for arrays (recursive approach) # Detect outliers according to Tukey # Inputs: data vector or data array (x) # Outputs: Boolean vector, TRUE for outliers, FALSE elsewhere # Presumes: When x is an array, each row is a separate case tukey.outlier <- function(x) { # Are we dealing with a multi-column array? if(is.array(x) && ncol(x) > 1) { # If so, recursive step: call function on each column component.outliers <- apply(x,2,tukey.outlier) # Combine across columns outliers <- apply(component.outliers,1,any) } else { # Base case, one column: do what we did for a vector above quartiles <- quartiles(x) lower.limit <- quartiles[1]-1.5*quartiles[3] upper.limit <- quartiles[2]+1.5*quartiles[3] outliers <- ((x < lower.limit) | (x > upper.limit)) } return(outliers) } # More elaborate and inforamtive testing function # Run a battery of tests on tukey.outlier # Inputs: none # Outputs: TRUE if all tests passed, FALSE if any failed # Side-effect: warning message for each failed test test.tukey.outlier <- function() { # Utility function for printing error messages # Inputs: name of function (fnct), input values (x), the comparison being # attempted (comparison), the value expected (value) # Output: a character string (not vector) with the message warning.message <- function(fnct="tukey.outlier",x,comparison="==",value) { # Turn x and value into things which look decent for printing text.x <- paste(x, collapse=" ") text.value <- paste(value,collapse=" ") message <- paste("failed test of",fnct,"(",text.x,")",comparison,text.value,collapse=" ") return(message) } # Define constants for test cases x <- c(2.2, 7.8, -4.4, 0.0, -1.2, 3.9, 4.9, -5.7, -7.9, -4.9, 28.7, 4.9) x.pattern <- rep(FALSE,12); x.pattern[11] <- TRUE y <- c(11.0, 14.0, 3.5, 52.5, 21.5, 12.7, 16.7, 11.7, 10.8, -9.2, 12.3, 13.8) y.pattern <- rep(FALSE,12) y.pattern[3:5] <- TRUE; y.pattern[10] <- TRUE z <- cbind(x,y) z.pattern <- x.pattern | y.pattern # Boolean vector for test success/failure; presume failure tests <- rep(FALSE,times=4) # Go over the tests # What follows is highly repetitious, and so we should really define a # function to automate this, but we're already doing far more work than the # lab called for tests[1] <- isTRUE(all.equal(tukey.outlier(x),x.pattern)) if (!tests[1]) { warning(warning.message(x=x,value=x.pattern)) } tests[2] <- isTRUE(all.equal(tukey.outlier(x),tukey.outlier(-x))) if (!tests[2]) { warning(warning.message(x=-x,value=x.pattern)) } tests[3] <- isTRUE(all.equal(tukey.outlier(y),y.pattern)) if (!tests[3]) { warning(warning.message(x=y,value=y.pattern)) } tests[4] <- isTRUE(all.equal(tukey.outlier(z),z.pattern)) if (!tests[4]) { warning(warning.message(x=z,value=z.pattern)) } # Return TRUE if all tests passed, else FALSE return(all(tests)) }