# Functions for Lab 07 - 2005. # Simply copied over from previous labs - no modifications. # Functions for lab5 # by Tom Minka # MINKA=~/R/minka/R # cat lab5e.r $MINKA/common.r $MINKA/aspect.r $MINKA/color.r $MINKA/regress.r > ../www/lab/lab5/lab5.r permute <- function(x) { for(i in 1:ncol(x)) { x[,i] = shuffle(x[,i]) } x } ################### # this is only for routines used by minka library rank.stable <- function(x) { # handles ties better than "rank" n <- length(x) r <- rep(0,n) r[order(x)] <- 1:n r } shuffle <- function(x) { if(length(dim(x)) == 2) { x[order(runif(nrow(x))),order(runif(ncol(x)))] } else { x[order(runif(length(x)))] } } named.list <- function(...,names.) { lst <- list(...) names(lst) = names. lst } named.numeric <- function(length,names.) { x <- numeric(length) names(x) = names. x } as.named.vector <- function(a) { if(is.matrix(a) || is.data.frame(a)) { x = a[,1] names(x) = rownames(a) x } else if(is.list(a)) { unlist(a) } else stop("don't know what to do") } # companions to xinch,yinch inchx = function(x,warn.log=T) { if (warn.log && par("xlog")) warning("x log scale: inchx() is non-sense") x / diff(par("usr")[1:2]) * par("pin")[1] } inchy = function (y, warn.log = TRUE) { if (warn.log && par("ylog")) warning("y log scale: inchy() is non-sense") y / diff(par("usr")[3:4]) * par("pin")[2] } inch.symbol <- function(pch=par("pch"),cex=par("cex")) { # returns the height in inches of the given plotting symbol # with current graphics settings. # this is for pch="H" inch = par("csi")*cex/1.5 if(pch == 1) inch = inch/2 inch } format.bytes = function(x) { # returns byte sizes in pretty form # example: format.bytes(c(2,1030,2e6)) # based on code by Fang Chen round.to.half = function(x) round(x*2)/2 s = as.character(x) names(s) = names(x) kilo = 2^10; mega = 2^20 i = (x >= mega) s[i] = paste(round.to.half(x[i]/mega),"M",sep="") i = (x >= kilo) & (x < mega) s[i] = paste(round(x[i]/kilo),"k",sep="") s } ls.sizes <- function(name=parent.frame(), pretty = T, top = 10, ...) { # lists the `top' biggest objects in the given environment # suggested by Fang Chen x <- sapply(ls(name,...),function(x) object.size(get(x))) if(top > length(x)) top = length(x); x = rev(rev(sort(x))[1:top]) if(pretty) x = format.bytes(x) x <- as.data.frame(x) colnames(x) <- "bytes" x } # smallest number such that 1+eps > 1 eps <- 1e-15 # returns the elements of x not indexed by i not <- function(x,i) { if(is.numeric(i)) return(x[-i]) if(is.character(i)) return(x[setdiff(names(x),i)]) if(is.logical(i)) return(x[!i]) } sort.levels <- function(f,x,fun=median) { if(length(x) == length(f)) x <- tapply(x,f,fun) if(length(x) != length(levels(f))) stop("wrong number of values to sort by") factor(f,levels=levels(f)[order(x)]) } # n is a vector of names # i is a vector of indices # merges names n[i] into a composite name # returns a shorter vector of names merge.names <- function(n,i=1:length(n)) { if(is.dim.ordered(n)) { first <- n[i[1]] last <- n[i[length(i)]] # what kind of range notation should we use? if(F) { split <- "\\.\\."; sep <- ".." } else { split <- "-"; sep <- "-" } s1 <- unlist(strsplit(first,split)) s2 <- unlist(strsplit(last,split)) # work around a bug in strsplit if(length(s2) == 1) s2 <- c("",s2) n[i] <- paste(s1[[1]], s2[[2]], sep=sep) } else { n[i] <- paste(n[i],collapse=",") } a <- attributes(n) n <- n[-i[-1]] attributes(n) <- a n } which2 <- function(x) { i <- which(x)-1 j <- floor(i/nrow(x)) i <- i - j*nrow(x) cbind(i+1,j+1) } which.max2 <- function(x) { i <- which.max(x)-1 j <- floor(i/nrow(x)) i <- i - j*nrow(x) c(i+1,j+1) } which.min2 <- function(x) { i <- which.min(x)-1 j <- floor(i/nrow(x)) i <- i - j*nrow(x) c(i+1,j+1) } zip <- function(a,b,fun="*") { fun <- match.fun(fun) r <- lapply(1:length(a), function(i) fun(a[[i]],b[[i]])) names(r) <- names(a) r } accumulate <- function(lst,fun="+") { fun <- match.fun(fun) r <- lst[[1]] for(i in 2:length(lst)) { #r <- r + lst[[i]] r <- fun(r,lst[[i]]) } r } # returns split(x,f) where f is a random factor # if p is a single number then f has two factors in the proportion (p,1-p) rsplit <- function(x,p) { if(is.data.frame(x)) n <- nrow(x) else n <- length(x) n1 <- ceiling(p*n) i <- c(rep(1,n1),rep(2,n-n1)) split(x,sample(i)) } is.dim.ordered <- function(d) { # dimensions are ordered by default a <- attr(d,"ordered") is.null(a) || a } dim.ordered <- function(x) { sapply(dimnames(x),is.dim.ordered) } set.dim.ordered <- function(x,d=seq(dim(x)),v=T) { for(i in d) { attr(dimnames(x)[[i]],"ordered") <- v } x } set.dim.unordered <- function(...) set.dim.ordered(...,v=F) # convert from interval to range notation interval2range <- function(x) { x <- unlist(strsplit(x,",")) x[1] <- substr(x[1],2,nchar(x[1])) x[1] <- format(as.numeric(x[1])) if(x[1] == "-Inf") x[1] <- "." x[2] <- substr(x[2],1,nchar(x[2])-1) x[2] <- format(as.numeric(x[2])) if(x[2] == "Inf") x[2] <- "." paste(x[1],x[2],sep="-") } # used by color.plot and predict.plot given rcut <- function(x,b) { f <- cut(x,b,include.lowest=T) levels(f) <- sapply(levels(f),interval2range) f } gradient <- function(x) { n <- length(x) g <- (diff(x[1:(n-1)])+diff(x[2:n]))/2 c(x[2]-x[1],g,x[n]-x[n-1]) } scale.range <- function(x) { (x-min(x,na.rm=T))/diff(range(x,na.rm=T)) } cut.quantile <- function(x,n=3,suffix="") { #if(missing(suffix)) suffix = deparse(substitute(x)) x[is.na(x)] <- 0 b <- quantile(unique(x),probs=seq(0,1,len=n+1),na.rm=T) if(length(suffix) == 1) { if(n == 5) labels <- c("low","med.low","med","med.high","high") else if(n == 4) labels <- c("low","med.low","med.high","high") else if(n == 3) labels <- c("low","med","high") else if(n == 2) labels <- c("low","high") else labels <- paste("cut",1:n,sep="") labels = paste(labels,suffix) } else labels = suffix cut(x,b,include=T,labels=labels) } indicators.factor <- function(y) { # convert a factor into a matrix of indicators # result is level by case r <- array(0,c(length(levels(y)),length(y)),list(levels(y),names(y))) for(lev in levels(y)) r[lev,y==lev] <- 1 r } reorder.rows <- function(y,i) { # indexes a matrix while preserving attributes if(is.na(nrow(y))) { y[i] } else { a <- attributes(y); y <- y[i,] if(!is.null(a$dimnames[[1]])) a$dimnames[[1]] <- a$dimnames[[1]][i] attributes(y) <- a y } } reorder.cols <- function(y,i) { # indexes a matrix while preserving attributes if(is.na(ncol(y))) { y[i] } else { a <- attributes(y); y <- y[,i] if(!is.null(a$dimnames[[2]])) a$dimnames[[2]] <- a$dimnames[[2]][i] attributes(y) <- a y } } ############################################################################## # Graphics tools auto.layout <- function(len) { layout <- c(1,len) din <- par("din") asp <- din[2]/din[1] if(asp > 1) { layout[2] <- ceiling(sqrt(len/asp)) layout[1] <- ceiling(len/layout[2]) } else { layout[1] = ceiling(sqrt(len*asp)) layout[2] = ceiling(len/layout[1]) } layout } auto.legend <- function(labels, col=1, lty, pch, text.col, ...) { # barplot could use an auto.legend usr <- par("usr") top <- usr[4] left <- usr[2]-max(strwidth(labels))-xinch(0.04) y <- cumsum(strheight(labels)+yinch(0.04)) if(missing(lty) && missing(pch)) { text.only <- T if(missing(text.col)) text.col <- col } for(i in 1:length(labels)) { text(left,top-y[i],labels[i],col=text.col[i],adj=0,...) } } auto.mar <- function(main="",xlab="x",ylab="y", xaxt=par("xaxt"),yaxt=par("yaxt"),las=par("las"),...) { mar = c(4.5,4,0,0.1) if(!is.null(main) && main != "") mar[3] = 1 if(is.null(xlab) || xlab == "") { if(xaxt == "n") mar[1] = 0.05 else mar[1] = 2.5 } if(is.null(ylab) || ylab == "") { if(yaxt == "n") mar[2] = 0 else mar[2] = 2 } if(las == 3 || las == 2) mar[1] = mar[1]*1.5 if(las == 1 || las == 2) mar[2] = mar[2]*1.5 mar } ############################################################################## # matlab compatibility rep.mat <- function(x,n,m) { array(rep(x,n*m),c(length(x)*n,m)) } rep.col <- function(x,n) { rep.mat(x,1,n) } rep.row <- function(x,n) { t(rep.col(x,n)) } eye <- function(n) { diag(rep(1,n)) } norm <- function(x) { sqrt(sum(x^2)) } # behaves like matlab "sum" dim.sum <- function(x,dim) { apply(x,setdiff(1:length(dim(x)),dim),sum) } scale.cols <- function(x,s) { x * rep.row(s,nrow(x)) } scale.rows <- function(x,s) { x * rep.col(s,ncol(x)) } ############################################################################## # data.frames hist.data.frame <- function(x,layout,breaks,...) { if(missing(layout)) layout <- auto.layout(length(x)) opar <- par(mfrow=layout) on.exit(par(opar)) for(i in names(x)) { if(missing(breaks)) breaks <- max(2,nclass.Sturges(x[[i]])) hist(x[[i]],xlab=i,main="",breaks=breaks,...) } } # sort the rows of a data frame (df) according to column f # f can also be a vector with length(f)==nrow(df) sort.data.frame <- function(df,f=ncol(df),...) { if(length(f) == 1) f <- df[[f]] else if(length(f) != nrow(df)) stop("length of sort vector doesn't match nrows") df[order(f,...),,drop=F] } sort.cells <- function(x) { sort.data.frame(as.data.frame(x)) } empty.data.frame <- function(col.names) { if(missing(col.names)) y <- NULL else { y <- sapply(col.names,function(x) NULL) } structure(y,class="data.frame") } rbind.extend <- function(df,df2) { # like rbind, but allows different numbers of columns (NAs inserted) x <- list() col.names <- unique(c(names(df),names(df2))) for(j in 1:length(col.names)) { k <- col.names[j] # must check in names first to avoid abbreviation match if(k %in% names(df)) v1 <- df[[k]] else v1 <- factor(rep(NA,nrow(df))) if(k %in% names(df2)) v2 <- df2[[k]] else v2 <- factor(rep(NA,nrow(df2))) # requires cfactor # force dispatch on second argument too x[[j]] <- cfactor(v1,v2) } names(x) <- col.names row.names <- make.unique(c(rownames(df),rownames(df2))) attr(x,"row.names") <- row.names class(x) <- "data.frame" x } rbind.extend <- function(df,df2) { if(nrow(df) == 0) return(df2) if(nrow(df2) == 0) return(df) not.1 <- setdiff(names(df2),names(df)) not.2 <- setdiff(names(df),names(df2)) if(length(not.1) > 0) df[not.1] <- rep(NA,nrow(df)) if(length(not.2) > 0) df2[not.2] <- rep(NA,nrow(df2)) col.names <- unique(c(names(df),names(df2))) x <- rbind(df[col.names],df2[col.names]) row.names <- make.unique(c(rownames(df),rownames(df2))) attr(x,"row.names") <- row.names x } cbind.extend <- function(df,df2) { # like cbind.data.frame, but pads with NA until rownames match if(is.null(df) || nrow(df) == 0) return(df2) if(is.null(df2) || nrow(df2) == 0) return(df) not.1 = setdiff(rownames(df2),rownames(df)) not.2 = setdiff(rownames(df),rownames(df2)) if(length(not.1) > 0) df[not.1,] = NA if(length(not.2) > 0) df2[not.2,] = NA r = cbind(df,df2[rownames(df),]) names(r) = c(names(df),names(df2)) r } # returns the name of the response variable # works for formulas, model frames, and fitted models response.var <- function(object) { if(inherits(object, "terms")) { a <- attributes(object) if(!a$response) return(character(0)) return(as.character(a$variables[2])) } if(is.null(attr(object,"terms"))) { if(is.data.frame(object)) { # shortcut to avoid make.names return(names(object)[length(object)]) } if(is.table(object)) return(response.var(terms(object))) if(is.list(object) || is.vector(object) || is.array(object)) return(NULL) } response.var(terms(object)) } # returns only the predictors which appear as bare terms predictor.vars <- function(object) { if(inherits(object, "terms")) { a <- attributes(object) pred = a$term.labels[a$order == 1] # drop I() terms pred = pred[substring(pred,1,2) != "I("] return(pred) } if(inherits(object,"data.frame") && is.null(attr(object,"terms"))) { # shortcut to avoid make.names return(names(object)[-length(object)]) } predictor.vars(terms(object)) } # returns all terms on the rhs, including higher-order terms predictor.terms <- function(object) { attr(terms(object),"term.labels") } as.data.frame.col <- function(x,n="V1") { frame = as.data.frame(as.matrix(x)) names(frame) = n frame } as.data.frame.row <- function(x,row.name="") { extra.args <- list(check.names=F,row.names=row.name) do.call("data.frame",append(as.list(x),extra.args)) } apply.df <- function(x,fun) { # must be done this way to prevent check.names (bug in data.frame) do.call("data.frame",append(lapply(x,fun),list(row.names=rownames(x),check.names=F))) } ############################################################################## # bug fixes aggregate.data.frame <- function(x, by, FUN, ...) { if(!is.data.frame(x)) x <- as.data.frame(x) if(!is.list(by)) stop("`by' must be a list") if(is.null(names(by))) names(by) <- paste("Group", seq(along = by), sep = ".") else { nam <- names(by) ind <- which(nchar(nam) == 0) names(by)[ind] <- paste("Group", ind, sep = ".") } y <- lapply(x, tapply, by, FUN, ..., simplify = FALSE) if(any(sapply(unlist(y, recursive = FALSE), length) > 1)) stop("`FUN' must always return a scalar") z <- y[[1]] d <- dim(z) w <- list() for (i in seq(along = d)) { j <- rep(rep(seq(1 : d[i]), prod(d[seq(length = i - 1)]) * rep(1, d[i])), prod(d[seq(from = i + 1, length = length(d) - i)])) # minka: preserve original levels f <- factor(dimnames(z)[[i]][j],levels=levels(by[[i]])) w[[names(by)[i]]] = f } #w <- w[which(!unlist(lapply(z, is.null))), ] y <- data.frame(w, lapply(y, unlist, use.names = FALSE)) #names(y) <- c(names(by), names(x)) y } # concatenate factors (should be built in) # unfortunately "sort" requires the original c(), which drops labels cfactor <- function(f1,f2=NULL) { if(length(f1) == 0) return(f2) else if(length(f2) == 0) return(f1) if(!is.factor(f1) || !is.factor(f2)) return(c(f1,f2)) all.levs <- unique(c(levels(f1),levels(f2))) factor(c(as.character(f1),as.character(f2)),levels=all.levs) } # desiderata for make.unique: # 1. if A is unique, make.unique(c(A,B)) preserves A # 2. make.unique(c(A,B)) = make.unique(c(make.unique(A),B)) # internal version # does not satisfy desideratum #2 # make.unique(c("a","a","a")) != make.unique(c(make.unique(c("a","a")),"a")) make.unique <- function(names) { # names is character vector if(mode(names) != "character") stop("names must be a character vector") while(any(dups <- duplicated(names))) { names[dups] <- paste(names[dups], seq(length = sum(dups)), sep = "") } names } # satifies both desiderata # make.unique(c("a","a","a.2","a")) == # make.unique(c(make.unique(c("a","a")),"a.2","a")) make.unique <- function(names,sep=".") { # names is character vector if(mode(names) != "character") stop("names must be a character vector") repeat { i <- which(duplicated(names)) if(length(i) == 0) break # loop duplicates for(j in i) { cnt <- 2 repeat { newnam <- paste(names[j],cnt,sep=sep) # compare to previous elements only if(!any(is.element(newnam,names[1:j]))) break cnt <- cnt + 1 } names[j] <- newnam } } names } make.names <- function(names, unique=F) { names <- .Internal(make.names(as.character(names))) # minka: change keywords i <- is.element(names, c("for","while","repeat","if","else","function")) if(any(i)) names[i] <- paste(names[i],".",sep="") if(unique) names <- make.unique(names) names } terms.data.frame <- function(x,env=parent.frame(),...) { fmla <- attr(x,"terms") if(is.null(fmla)) { # minka: assume the last column is the response nm <- make.names(names(x)) if(length(nm) > 1) { lhs <- nm[length(nm)] rhs <- nm[-length(nm)] } else { lhs <- NULL rhs <- nm } fmla <- terms(formula(paste(lhs,"~",paste(rhs,collapse="+")),env=env,...)) } fmla } formula.data.frame <- function(x,env=parent.frame(),...) { formula(terms(x,env=env),...) } terms.table <- function(x,...) { terms.data.frame(as.data.frame(x),...) } formula.default <- function (x,env=parent.frame(), ...) { if (!is.null(x$formula)) eval(x$formula) else if (!is.null(x$call$formula)) eval(x$call$formula) # minka: always return formula, not terms else if (!is.null(x$terms)) formula.terms(x$terms) else if (!is.null(attr(x, "formula"))) attr(x, "formula") else {form<-switch(mode(x), NULL = structure(NULL, class = "formula"), character = formula(eval(parse(text = x)[[1]])), call = eval(x), stop("invalid formula")) environment(form)<-env form } } update.default <- function (object, formula., ..., evaluate = TRUE) { call <- object$call if (is.null(call)) stop("need an object with call component") extras <- match.call(expand.dots = FALSE)$... if (!missing(formula.)) call$formula <- update.formula(formula(object), formula.) if(length(extras) > 0) { existing <- !is.na(match(names(extras), names(call))) ## do these individually to allow NULL to remove entries. for (a in names(extras)[existing]) call[[a]] <- extras[[a]] if(any(!existing)) { call <- c(as.list(call), extras[!existing]) call <- as.call(call) } } if(evaluate) { # minka: use environment of formula instead of parent.frame env<-environment(call$formula) if (is.null(env)) env<-parent.frame() eval(call,env) } else call } # should be built into R sum.data.frame <- function(x,...) sapply(x,function(y) sum(y,...)) mean.data.frame <- function(x,...) sapply(x,function(y) mean(y,...)) plot.default <- function(x, y=NULL, type="p", xlim=NULL, ylim=NULL, log="", main=NULL, sub=NULL, xlab=NULL, ylab=NULL, ann=par("ann"), axes=TRUE, frame.plot=axes, panel.first=NULL, panel.last=NULL, col=par("col"), bg=NA, pch=par("pch"), cex = 1, lty=par("lty"), lab=par("lab"), lwd=par("lwd"), asp=NA, ...) { xlabel <- if (!missing(x)) deparse(substitute(x)) ylabel <- if (!missing(y)) deparse(substitute(y)) # minka # treat character as factor if(mode(y) == "character") y <- factor(y,levels=unique(x)) if(mode(x) == "character") x <- factor(x,levels=unique(x)) # this works even if x and y are factors xy <- xy.coords(x, y, xlabel, ylabel, log) if(all(is.na(xy$x))) xy$x <- 1:length(x) xlab <- if (is.null(xlab)) xy$xlab else xlab ylab <- if (is.null(ylab)) xy$ylab else ylab xlim <- if (is.null(xlim)) range(xy$x[is.finite(xy$x)]) else xlim ylim <- if (is.null(ylim)) range(xy$y[is.finite(xy$y)]) else ylim plot.new() plot.window(xlim, ylim, log, asp, ...) panel.first plot.xy(xy, type, col=col, pch=pch, cex=cex, bg=bg, lty=lty, lwd=lwd, ...) panel.last if (axes) { # minka if(is.factor(x)) { lev <- levels(x) axis(1, at=1:length(lev), labels=lev, ...) } else axis(1, ...) # minka if(is.factor(y)) { lev <- levels(y) axis(2, at=1:length(lev), labels=lev, ...) } else axis(2, ...) } if (frame.plot) box(...) if (ann) title(main=main, sub=sub, xlab=xlab, ylab=ylab, ...) invisible() } model.frame.multinom <- function(object) { oc <- object$call oc[[1]] <- NULL do.call("model.frame",as.list(oc)) } model.frame.glm <- function (formula, data, na.action, ...) { if (is.null(formula$model)) { fcall <- formula$call fcall$method <- "model.frame" fcall[[1]] <- as.name("glm") # minka if (!is.null(formula$terms)) env <- environment(formula$terms) else env <- environment(fcall$formula) if (is.null(env)) env <- parent.frame() eval(fcall, env) } else formula$model } model.frame.loess <- model.frame.glm model.frame.nnet <- model.frame.glm # bugfix for ts library as.data.frame.ts <- function(x,optional) { nam <- deparse(substitute(x)) df <- as.data.frame.vector(as.vector(x)) names(df) <- nam cbind(data.frame(time=as.vector(time(x))),df) } barplot.default <- function(height, width = 1, space = NULL, names.arg = NULL, legend.text = NULL, beside = FALSE, horiz = FALSE, density = NULL, angle = 45, col = heat.colors(NR), border = par("fg"), main = NULL, sub = NULL, xlab = NULL, ylab = NULL, xlim = NULL, ylim = NULL, xpd = TRUE, axes = TRUE, axisnames = TRUE, cex.axis = par("cex.axis"), cex.names = par("cex.axis"), inside = TRUE, plot = TRUE, ...) { if (!missing(inside)) .NotYetUsed("inside", error = FALSE) if (!missing(border)) .NotYetUsed("border", error = FALSE) if (missing(space)) space <- if (is.matrix(height) && beside) c(0, 1) else 0.2 space <- space * mean(width) if (plot && axisnames && missing(names.arg)) names.arg <- if(is.matrix(height)) colnames(height) else names(height) if (is.vector(height)) { height <- cbind(height) beside <- TRUE } else if (is.array(height) && (length(dim(height)) == 1)) { height <- rbind(height) beside <- TRUE } else if (!is.matrix(height)) stop("`height' must be a vector or a matrix") if(is.logical(legend.text)) { if(legend.text && is.matrix(height)) legend.text <- rownames(height) else legend.text <- NULL } NR <- nrow(height) NC <- ncol(height) if (beside) { if (length(space) == 2) space <- rep(c(space[2], rep(space[1], NR - 1)), NC) width <- rep(width, length = NR * NC) } else { width <- rep(width, length = NC) height <- rbind(0, apply(height, 2, cumsum)) } delta <- width / 2 w.r <- cumsum(space + width) w.m <- w.r - delta w.l <- w.m - delta if (horiz) { if (missing(xlim)) xlim <- range(-0.01 * height, height, na.rm=TRUE) if (missing(ylim)) ylim <- c(min(w.l), max(w.r)) } else { if (missing(xlim)) xlim <- c(min(w.l), max(w.r)) if (missing(ylim)) ylim <- range(-0.01 * height, height, na.rm=TRUE) } if (beside) w.m <- matrix(w.m, nc = NC) if(plot) { ##-------- Plotting : # minka: removed xaxs="i",xpd nonsense plot.new() plot.window(xlim, ylim, ...) # Beware : angle and density are passed using R scoping rules xyrect <- function(x1,y1, x2,y2, horizontal = TRUE, ...) { if(horizontal) rect(x1,y1, x2,y2, angle = angle, density = density, ...) else rect(y1,x1, y2,x2, angle = angle, density = density, ...) } if (beside) { xyrect(0, w.l, c(height), w.r, horizontal=horiz, col = col) } else { for (i in 1:NC) { xyrect(height[1:NR, i], w.l[i], height[-1, i], w.r[i], horizontal=horiz, col = col) } } if (axisnames && !is.null(names.arg)) { # specified or from {col}names at.l <- if (length(names.arg) != length(w.m)) { if (length(names.arg) == NC) # i.e. beside (!) colMeans(w.m) else stop("incorrect number of names") } else w.m axis(if(horiz) 2 else 1, at = at.l, labels = names.arg, lty = 0, cex.axis = cex.names, ...) } if(!is.null(legend.text)) { legend.col <- rep(col, length = length(legend.text)) if((horiz & beside) || (!horiz & !beside)){ legend.text <- rev(legend.text) legend.col <- rev(legend.col) density <- rev(density) angle <- rev(angle) } xy <- par("usr") legend(xy[2] - xinch(0.1), xy[4] - yinch(0.1), legend = legend.text, angle = angle, density = density, fill = legend.col, xjust = 1, yjust = 1) } title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...) if(axes) axis(if(horiz) 1 else 2, cex.axis = cex.axis, ...) invisible(w.m) } else w.m } # Routines for controlling aspect ratio # taken from library(lattice) banking <- function (dx, dy) { if (is.na(dx)) NA else { if (is.list(dx)) { dy <- dx[[2]] dx <- dx[[1]] } if (length(dx) != length(dy)) stop("Non matching lengths") id <- dx != 0 & dy != 0 if (any(id)) { r <- abs(dx[id]/dy[id]) median(r) } else 1 } } # from Splus banking <- function(dx, dy, iter = 20, tolerance = 0.5) { dx <- abs(dx) dy <- abs(dy) if(sum(is.na(dx)) != 0) { warning("infinite in x, no banking") dx[is.na(dx)] <- 0 } if(sum(is.na(dy)) != 0) { warning("infinity in y, no banking") dy[is.na(dy)] <- 0 } if(sum(dx) == 0 || sum(dy) == 0) return(NA) good <- dx > 0 dx <- dx[good] dy <- dy[good] mad <- median(dy/dx) # initial guess ar <- if(mad > 0) mad else 1 radians.to.angle <- 180/pi for(i in 1:iter) { distances <- sqrt(dx^2 + (ar * dy)^2) orientations <- atan(ar * dy, dx) avg <- (radians.to.angle * sum(orientations * distances))/ sum(distances) if(abs(45 - avg) < tolerance) break ar <- ar * (45/avg) } ar } banking.fcn <- function(dx,dy,asp) { # want result to be 45 i = (dx != 0) & (dy != 0) dx = dx[i] dy = dy[i] len = sqrt(dx^2 + (asp*dy)^2) radians.to.angle <- 180/pi orient = abs(atan(asp*dy,dx)*radians.to.angle) #hist(orient) sum(orient*len)/sum(len) } # this function is broken auto.aspect <- function(x,y) { # returns an aspect ratio in user coordinates, suitable for asp= # x and y are vectors # NA specifies a break in the curve, just as in lines() if(is.list(x)) { y = x[[2]] x = x[[1]] } dx = diff(x); dy = diff(y) i <- !is.na(dx) & !is.na(dy) dx <- dx[i]; dy <- dy[i] asp = banking(dx,dy) #print(banking.fcn(dx,dy,asp)) asp } # this version doesn't preserve margins properly set.aspect <- function(aspect,margins=T) { aspect <- c(1, aspect) din <- par("din") pin <- aspect*min(din/aspect) if(margins) { # mai is (bottom,left,top,right) m <- din-pin mai <- c(sum(par("mai")[c(2,4)]), sum(par("mai")[c(1,3)])) pin <- din - pmax(m, mai) } par(pin=pin) } reset.aspect <- function() { mai <- c(sum(par("mai")[c(2,4)]), sum(par("mai")[c(1,3)])) par(pin=par("din")-mai) } # Sets graphics parameters so the next plot has aspect ratio asp. # It is okay to run set.aspect repeatedly without resetting the graphics # parameters. # # To reset: # reset.aspect() # or: # opar <- set.aspect(a); par(opar) # # In R, it is also possible to force a given aspect ratio using # layout(1,width=1,height=aspect,respect=T) # and reset with # layout(1) # However, the aspect ratio it gives is not quite correct. # # Test: # set.aspect(2); plot(runif(100)); reset.aspect() # layout(1,width=1,height=2,respect=T); plot(runif(100)); layout(1) # set.aspect <- function(aspect,margins=T) { fin <- par("fin") a <- aspect*fin[1]/fin[2] # plt is (left,right,bottom,top) if(a > 1) plt <- c(0.5-1/a/2,0.5+1/a/2,0,1) else plt <- c(0,1,0.5-a/2,0.5+a/2) if(margins) { # add room for margins # mai is (bottom,left,top,right) mplt <- par("mai")[c(2,4,1,3)]/c(fin[1],fin[1],fin[2],fin[2]) mplt[c(2,4)] <- 1-mplt[c(2,4)] # compose plt and mplt plt[1:2] <- mplt[1] + (mplt[2]-mplt[1])*plt[1:2] plt[3:4] <- mplt[3] + (mplt[4]-mplt[3])*plt[3:4] } par(plt=plt) } reset.aspect <- function() { if(F) { fin <- par("fin") mplt <- par("mai")[c(2,4,1,3)]/c(fin[1],fin[1],fin[2],fin[2]) mplt[c(2,4)] <- 1-mplt[c(2,4)] par(plt=mplt) } else { # simpler way par(mar=par("mar")) } } # Routines for manipulating colors and making color plots # Tom Minka # intended for white background gray.colors <- function(n) gray(rev(0:(n-1))/n) default.colors <- function(n) ((0:(n-1))%%6+1) # good for objects which are being outlined (e.g. pies) default.colors.w <- function(n) c(8,seq(len=n-1)) # these are from # http://www.personal.psu.edu/faculty/c/a/cab38/ColorBrewerBeta2.html OrRd.colors <- function(n) { if(n == 7) c("#fef0d9","#fdd49e","#fdbb84","#fc8d59","#ef6548","#d7301f","#99000d") else if(n == 6) c("#fef0d9","#fdd49e","#fdbb84","#fc8d59","#e34a33","#b2000d") else if(n == 5) c("#fef0d9","#fdcc8a","#fc8d59","#e34a33","#b2000d") else if(n == 4) c("#fef0d9","#fdcc8a","#fc8d59","#d7301f") else if(n == 3) c("#fee8c8","#fdbb84","#e34a33") else if(n == 2) c("#fdbb84","#e34a33") else stop("need a different number of levels") } YlGnBu.colors <- function(n) { if(n == 8) c("#ffffd9","#edf8b0","#c7e9b4","#7fcdbb","#41b6c3","#1d91c0","#225ea8","#0c2c83") else if(n == 7) c("#ffffcc","#c7e9b4","#7fcdbb","#41b6c3","#1d91c0","#386cb0","#0c2c83") else if(n == 6) c("#ffffcc","#c7e9b4","#7fcdbb","#41b6c3","#2c7fb8","#253494") else if(n == 5) c("#ffffcc","#a1dab4","#41b6c3","#2c7fb8","#253494") else if(n == 4) c("#ffffcc","#a1dab4","#41b6c3","#386cb0") else if(n == 3) c("#edf8b0","#7fcdbb","#2c7fb8") else if(n == 2) c("yellow","blue") else stop("need a different number of levels") } # diverging BrBg.colors <- function(n) { if(n == 7) c("#8c510a","#d8b365","#f6e8c3","#f5f5f5","#c7eae5","#5ab4ac","#1665e") else if(n == 6) c("#8c510a","#d8b365","#f6e8c3","#c7eae5","#5ab4ac","#1665e") else if(n == 5) c("#a6611a","#dfc27d","#f5f5f5","#80cdc1","#18571") else if(n == 4) c("#a6611a","#dfc27d","#80cdc1","#18571") else if(n == 3) c("#d8b365","#f5f5f5","#5ab4ac") else if(n == 2) c("#d8b365","#5ab4ac") else stop("need a different number of levels") } RYB.colors <- function(n) { if(n == 7) c("#d73d29","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4") else if(n == 6) c("#d73d29","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4") else if(n == 5) c("#ca373b","#fdae61","#ffffbf","#abd9e9","#2c7bb6") else if(n == 4) c("#ca373b","#fdae61","#abd9e9","#2c7bb6") else if(n == 3) c("#fc8d59","#ffffbf","#91bfdb") else if(n == 2) c("red","blue") else stop("need a different number of levels") } # sequential WYRK YR.colors <- function(k,lt=0.97,dk=0.03) { r <- c(lt, rep(lt,k), rep(lt,k), seq(lt,dk,len=k+1)[-1]) g <- c(lt, rep(lt,k), seq(lt,dk,len=k+1)[-1], rep(dk,k)) b <- c(lt, seq(lt,dk,len=k+1)[-1], rep(dk,k), rep(dk,k)) i <- seq(1,length(r),len=k) rgb(r[i],g[i],b[i]) } # diverging diverging.colors <- function(n,hmax=0,hmin=(0.5+hmax)%%1,v=1) { if(n %% 2 == 1) { # odd case k <- n%/%2 x <- seq(1,0,len=k+1) c(hsv(h = hmin, s = x, v), hsv(h = hmax, s = rev(x[1:k]), v)) } else { # even case k <- n%/%2 x <- seq(1,-1,len=n)[1:k] c(hsv(h = hmin, s = x, v), hsv(h = hmax, s = rev(x), v)) } } RC.colors <- diverging.colors GM.colors <- function(n) diverging.colors(n,2/6) # returns a new vector of darker colors darker <- function(col,scale=0.8) { crgb <- col2rgb(col)/255*scale if(T) { # handle black specially i <- which(apply(crgb,2,sum)==0) if(length(i)>0) { crgb[,i] <- rep.col(c(1,1,1)/10,length(i)) } } rgb(crgb[1,],crgb[2,],crgb[3,]) } # note: pch 21 looks like pch 1, but is actually a filled circle #plot(y,pch=21,bg=hsv(6/12,1,1)) color.key <- function(col,pch,names,breaks,digits=2,cex=0.75) { if(is.null(col)) col <- rep(1,length(pch)) rx <- range(par("usr")[1:2]) ry <- range(par("usr")[3:4]) if(T && !missing(pch) && !is.null(pch)) { # key is par("csi")*cex inches ry[1] <- ry[2] - diff(ry)/par("pin")[2]*par("csi")*cex } else { # key is 0.06 inches high ry[1] <- ry[2] - diff(ry)/par("pin")[2]*0.06 } if(!missing(names)) n <- length(names) else if(!missing(breaks)) n <- length(breaks)-1 else n <- length(col) i <- seq(0,1,len=n+1) x1 <- rx[1]+i[-length(i)]*diff(rx) x2 <- rx[1]+i[-1]*diff(rx) if(n == 0) stop("n == 0") col <- rep(col,ceiling(n/length(col)))[1:n] rect(x1, ry[1], x2, ry[2], col=col, border=NA) if(!missing(pch) && !is.null(pch)) points((x1+x2)/2, rep(mean(ry),n), col=8, pch=pch, cex=0.75) if(!missing(names)) mtext(names,at=(x1+x2)/2,cex=cex) if(!missing(breaks)) { x <- rx[1] + i*diff(rx) names <- prettyNum(formatC(breaks,format="fg",digits=digits),big.mark=",") #names = format(breaks,digits=digits,trim=T) mtext(names[1],at=x[1],cex=cex,adj=0) mtext(names[n+1],at=x[n+1],cex=cex,adj=1) mtext(names[2:n],at=x[2:n],cex=cex,adj=0.5) } } # extends r=c(min,max) by 4%, to mimic plotting limits extend.pct <- function(r,pct=0.04) { m <- diff(r)*pct if(m == 0) m <- 0.432*abs(r[1]) if(m == 0) m <- 1.08 c(r[1]-m,r[2]+m) } extend.inches <- function(r,upper=0,lower=0) { # extends the given range by a specified number of inches of each side. # upper and lower are actually inches/plot.size # constraints: # (r.new[2] - r[2])/diff(r.new) = upper # (r[1] - r.new[1])/diff(r.new) = lower r.new <- r a = lower/(1-upper) r.new[1] <- (r[1] + a*r[2])/(1-a) r.new[2] <- (r[2] - upper*r.new[1])/(1-upper) r.new } # returns the minimum user limits for a plotting region of size pin # which would enclose the given labels at the given positions # with the justification adj at rotation srt. range.text <- function(x,y,labels,cex=NULL,adj=NULL,srt=0, pin=par("pin")) { lab.w <- strwidth(labels,"inches",cex=cex) lab.h <- strheight(labels,"inches",cex=cex) # bounding box of text, columns are (x1,y1)(x2,y1)(x2,y2)(x1,y2) n <- length(labels) ix <- seq(1,8,by=2) iy <- seq(2,8,by=2) hull <- array(0,c(n,8)) hull[,c(1,7,2,4)] <- rep(0,n) hull[,c(3,5,6,8)] <- rep(1,n) # put adj point at origin and scale if(is.null(adj)) adj <- c(0.5,0.5) if(length(adj) == 1) adj <- c(adj,0.5) hull[,ix] <- (hull[,ix] - adj[1])*rep.col(lab.w,4) hull[,iy] <- (hull[,iy] - adj[2])*rep.col(lab.h,4) # rotate srt <- srt/180*pi cr <- cos(srt) sr <- sin(srt) R <- array(c(cr,-sr,sr,cr),c(2,2)) R <- diag(4) %x% R hull <- hull %*% R bbox <- list() bbox$left <- apply(hull[,ix],1,min)/pin[1] bbox$right <- apply(hull[,ix],1,max)/pin[1] bbox$bottom <- apply(hull[,iy],1,min)/pin[2] bbox$top <- apply(hull[,iy],1,max)/pin[2] # solve constraints xmid <- mean(range(x)) ymid <- mean(range(y)) xlim <- c() # these come from the constraints # (x-xmin)/(xmax-xmin) + left > 0 # (x-xmin)/(xmax-xmin) + right < 1 # where xmax is fixed at 2*xmid - xmin min1 <- min((x + 2*bbox$left*xmid)/(1+2*bbox$left)) min2 <- min((2*(1-bbox$right)*xmid - x)/(1-2*bbox$right)) xlim[1] <- min(min1,min2) xlim[2] <- 2*xmid - xlim[1] ylim <- c() min1 <- min((y + 2*bbox$bottom*ymid)/(1+2*bbox$bottom)) min2 <- min((2*(1-bbox$top)*ymid - y)/(1-2*bbox$top)) ylim[1] <- min(min1,min2) ylim[2] <- 2*ymid - ylim[1] c(xlim,ylim) } range.aspect <- function(asp,lim=par("usr"),pin=par("pin")) { # returns c(xlim,ylim), strictly bigger than lim, which satisfy asp # simulates the effect of plot.window(asp) xlim <- lim[1:2] ylim <- lim[3:4] dx <- diff(xlim)/pin[1] dy <- diff(ylim)/pin[2] if(dy > asp*dx) { dx <- dy/asp*pin[1] xlim <- mean(xlim) + c(-dx/2,dx/2) } else { dy <- asp*dx*pin[2] ylim <- mean(ylim) + c(-dy/2,dy/2) } c(xlim,ylim) } default.pch <- c(1,41,4,20,35,38,2,17) sequential.pch <- c(20,41,4,1,38,35,11,19) sequential.pch.paper <- c(45,41,4,1,38,35,11,19) color.plot <- function(object, ...) UseMethod("color.plot") color.plot.formula <- function(formula,data=parent.frame(),...) { x <- model.frame.default(formula,data,na.action=na.pass) color.plot.data.frame(x,...) } color.plot.data.frame <- function(x,z,zlab,xlab=NULL,ylab=NULL,labels=F,...) { if(missing(z)) { pred <- predictor.vars(x) resp <- response.var(x) z <- x[[resp]] if(missing(zlab)) zlab <- resp } else { pred <- colnames(x) if(missing(zlab)) zlab <- deparse(substitute(z)) } if(length(pred) < 2) stop("must have two predictors to plot") if(is.logical(labels)) { labels <- if(labels) rownames(x) else NULL } if(is.null(xlab)) xlab <- pred[1] if(is.null(ylab)) ylab <- pred[2] color.plot.default(x[[pred[1]]],x[[pred[2]]],z,labels=labels, xlab=xlab,ylab=ylab,zlab=zlab,...) } color.plot.default <- function(x,y,z,labels=NULL,data=parent.frame(), xlab,ylab,zlab, xlim=NULL,ylim=NULL, axes=T,key=T,add=F,nlevels=4, color.palette=default.colors,col, pch.palette=default.pch,pch, jitter=F,digits=2,mar,bg, cex=par("cex"),yaxt="s",...) { # continous responses are automatically quantized into nlevels levels, with # colors given by col. # jitter is only a suggestion. jittering will only be done if necessary. if(missing(xlab)) xlab <- deparse(substitute(x)) if(missing(ylab)) ylab <- deparse(substitute(y)) if(missing(zlab)) zlab <- deparse(substitute(z)) x <- eval(substitute(x),data) y <- eval(substitute(y),data) z <- eval(substitute(z),data) # treat character as factor if(mode(x) == "character") x <- factor(x,levels=unique(x)) if(mode(y) == "character") y <- factor(y,levels=unique(x)) if(is.numeric(z)) { b <- as.numeric(quantile(unique(z), seq(0,1,len=nlevels+1),na.rm=T)) f <- rcut(z,b) } else { f <- factor(z) } nlevels <- length(levels(f)) if(nlevels == 0) stop("0 levels in factor") # lev.col is the color of each level # lev.pch is the symbol of each level # if there are too few colors, recycle them and change pch lev <- 1:nlevels # col is the color of the individual points q <- as.numeric(f) if(missing(col)) { if(mode(color.palette) == "function") color.palette <- color.palette(nlevels) lev.col <- color.palette[((lev-1) %% length(color.palette))+1] col <- lev.col[q] } else { lev.col <- NULL } if(missing(pch)) { lev.pch <- pch.palette[(((lev-1) %/% length(color.palette)) %% length(pch.palette))+1] pch <- lev.pch[q] } else { lev.pch <- NULL } # setup plot window if(!add) { if(missing(mar)) { # mar is c(bottom, left, top, right) if(axes) { mar = auto.mar() if(yaxt=="n") mar[2] = 0 mar[3] = if(key) 2 else 1 } else { mar <- c(0.05,0,if(key) 1 else 0,0.05) } } # change mar permanently so we can add things on top par(mar=mar) if(!missing(bg)) { opar <- par(bg=bg) on.exit(par(opar)) } } # xlim,ylim needed for jittering and label placement if(add) { xlim <- par("usr")[1:2] ylim <- par("usr")[3:4] } else { if(is.null(labels)) { if(is.null(xlim)) xlim <- range(as.numeric(x),na.rm=T) if(is.null(ylim)) ylim <- range(as.numeric(y),na.rm=T) if(key) { # make room at the top of the plot for the key if(length(unique(lev.pch)) == 1) { a = 0.06/par("pin")[2] } else { a = par("csi")*cex/par("pin")[2] } ylim = extend.inches(ylim,a) } } else { plot.new() lab.lim <- range.text(x,y,labels,cex) # make room for labels if(is.null(xlim)) xlim <- lab.lim[1:2] if(is.null(ylim)) ylim <- lab.lim[3:4] } } if(jitter) { # this test needs to be less strict. need to consider actual figure size. if(length(levels(factor(x))) < length(x)) { cat("jittering",xlab,"\n") jitter1 <- (runif(length(x))-0.5)*diff(extend.pct(xlim))/100 x <- x+jitter1 } if(length(levels(factor(y))) < length(y)) { cat("jittering",ylab,"\n") jitter2 <- (runif(length(y))-0.5)*diff(extend.pct(ylim))/100 y <- y+jitter2 } } if(add) { if(!is.null(labels)) { return(text(x, y, labels=labels, col=col, cex=cex, ...)) } else { return(points(x, y, pch=pch, col=col,...)) } } # from here on, add=F if(!is.null(labels)) { plot.default(x, y, xlim=xlim, ylim=ylim, type="n", xlab=xlab,ylab=ylab,main="",axes=axes,yaxt=yaxt,...) text(x, y, labels=labels, col=col, cex=cex, ...) } else { plot.default(x, y, xlim=xlim, ylim=ylim, xlab=xlab,ylab=ylab,main="", pch=pch,col=col,axes=axes,yaxt=yaxt,cex=cex,...) } if(key) { # don't draw symbols in the key if there is only one type if(length(unique(lev.pch)) == 1) lev.pch <- NULL if(is.numeric(z)) color.key(lev.col,lev.pch,breaks=b,digits=digits) else color.key(lev.col,lev.pch,levels(f)) if(axes) title(zlab,line=1) } else if(axes) title(zlab) } # tests: # color.plot(runif(10),runif(10),c(rep("a",5),rep("b",5))) # color.plot(runif(10),runif(10),c(rep("a",5),rep("b",5)),rep("foo",10)) # this part same as color.plot text.plot <- function(object,...) UseMethod("text.plot") text.plot.formula <- function(formula,data=parent.frame(),...) { x <- model.frame.default(formula,data,na.action=na.pass) text.plot.data.frame(x,...) } text.plot.data.frame <- function(x,labels,xlab,ylab,...) { pred <- predictor.vars(x)[1] resp <- response.var(x) x1 <- x[[pred]] x2 <- x[[resp]] if(missing(labels)) labels <- rownames(x) if(missing(xlab)) xlab = pred if(missing(ylab)) ylab = resp text.plot.default(x1,x2,labels,xlab=xlab,ylab=ylab,...) } text.plot.default <- function(x,y,labels,xlab,ylab,xlim=NULL,ylim=NULL, main="",asp=NULL, cex=par("cex"),adj=NULL,srt=0,axes=T,...) { if(missing(xlab)) xlab <- deparse(substitute(x)) if(missing(ylab)) ylab <- deparse(substitute(y)) plot.new() lim <- range.text(x,y,labels,cex,adj,srt) # make room for labels if(is.null(xlim)) xlim <- lim[1:2] if(is.null(ylim)) ylim <- lim[3:4] if(F) { plot(x,y,xlab=xlab,ylab=ylab,xlim=xlim,ylim=ylim,axes=axes, main=main,...,type="n") } else { plot.window(xlim,ylim,asp=asp) if(axes) { axis(1); axis(2); box() } title(main,xlab=xlab,ylab=ylab) } text(x,y,labels=labels,cex=cex,adj=adj,srt=srt,...) } # test: text.plot(runif(10),runif(10),rep("a",10)) # Plot a regression surface. # # clip specifies a polygon in (x,y) over which the surface is to be defined. # possible values are F (no clipping), T (clip to the convex hull of the # data), a list(x,y), or a matrix with two columns. # # if fill=T, z values outside zlim will not be plotted, # i.e. the corresponding regions will be white. # the default zlim is the range of the data. # color.plot.loess <- function(object,x,res=50,add=F,fill=F,equal=F,drawlabels=F, levels=NULL,nlevels=if(is.null(levels)) 4 else length(levels), col, clip=T, zlim, bg=par("bg"), lwd=par("lwd"), color.palette=if(fill)topo.colors else default.colors,...) { if(missing(x)) x <- model.frame(object) else x <- model.frame(terms(object),x) pred <- predictor.vars(object) if(length(pred) == 1) return(plot.loess(object,x,add=add,lwd=lwd,...)) if(mode(color.palette) == "function") { color.palette <- color.palette(nlevels) } if(!add && !fill) color.plot.data.frame(x,levels=levels,nlevels=nlevels, color.palette=color.palette,bg=bg,...) # use the par options set by color.plot.data.frame # x is only used to get plotting range resp <- response.var(object) pred <- predictor.vars(object) # this is ugly, but otherwise recursively calling color.plot.data.frame is hard if(is.null(...$xlim)) { if(add || !fill) xlim <- range(par("usr")[1:2]) else xlim <- range(x[[pred[1]]]) } else xlim <- ...$xlim x1 <- seq(xlim[1],xlim[2],length=res) if(is.null(...$ylim)) { if(add || !fill) ylim <- range(par("usr")[3:4]) else ylim <- range(x[[pred[2]]]) } else ylim <- ...$ylim x2 <- seq(ylim[1],ylim[2],length=res) xt <- expand.grid(x1,x2) names(xt) <- pred[1:2] z <- predict(object,xt) if(clip != F && length(pred) >= 2) { if(clip == T) { i <- chull(x[pred]) clip <- x[pred][i,] } z[!in.polygon(clip,xt)] <- NA } if(missing(zlim)) { zlim <- range(x[[resp]]) z[z < zlim[1]] <- zlim[1] z[z > zlim[2]] <- zlim[2] } dim(z) <- c(length(x1),length(x2)) if(is.null(levels)) { if(equal) levels <- seq(zlim[1],zlim[2],len=nlevels+1) else { r <- unique(x[[resp]]) #if(!missing(zlim)) { r <- r[r >= zlim[1] & r <= zlim[2]] r <- c(r,zlim) #} levels <- as.numeric(quantile(r, seq(0,1,len=nlevels+1),na.rm=T)) } } if(fill) { filled.contour(x1,x2,z,levels=levels, col=color.palette,xlab=pred[1],ylab=pred[2],main=resp,...) if(!add) color.plot.data.frame(x,col=1,add=T,...) } else { color.palette <- c(color.palette,color.palette[length(color.palette)]) contour(x1,x2,z,add=T,col=color.palette,levels=levels,drawlabels=drawlabels,lwd=lwd,...) } } color.plot.glm <- function(object,data,jitter=T,add=F,se=F,col=3,lwd=2,...) { if(missing(data)) data = model.frame(object) if(!add) color.plot.data.frame(data) # use the par options set by color.plot.data.frame w <- coef(object) z <- log(0.75/0.25) if(length(w) == 3) { abline(-w[1]/w[3],-w[2]/w[3],col=col,lwd=lwd,...) if(se) { abline((-z-w[1])/w[3],-w[2]/w[3],col=col,lty=2,lwd=lwd,...) abline((z-w[1])/w[3],-w[2]/w[3],col=col,lty=2,lwd=lwd,...) } } else { stop("use color.plot.knn") # two predictors with an interaction term # boundary is an ellipse xlim <- par("usr")[1:2] x <- seq(xlim[1],xlim[2],length=50) y <- (0-w[1]-w[2]*x)/(w[3]+w[4]*x) i <- (w[3]+w[4]*x < 0) lines(x[i],y[i],col=col,...) lines(x[!i],y[!i],col=col,...) if(se) { for(q in c(-z,z)) { y <- (q-w[1]-w[2]*x)/(w[3]+w[4]*x) lines(x[i],y[i],col=col,lty=2,...) lines(x[!i],y[!i],col=col,lty=2,...) } } } } color.plot.multinom <- color.plot.glm # Plot classification boundaries. # If pclass is given, the probability of being in that class is plotted. # # levels and col are only used if pclass != NULL # color.plot.knn <- function(object,x,pclass=NULL,fill=F,res=50, levels=c(0.5),add=F,axes=T,lwd=2, color.palette=default.colors,col=3,drawlabels=F,...) { resp <- response.var(object) pred <- predictor.vars(object) if(missing(x)) x <- model.frame(object) else if(length(pred) == 1) { # take an unused variable from x pred[2] = setdiff(colnames(x),c(resp,pred))[1] fmla = formula(paste(resp,"~",paste(pred,collapse="+"))) x = model.frame(fmla,x) } else x <- model.frame(terms(object),x) if(!add && !fill) color.plot.data.frame(x,color.palette=color.palette,...) # use the par options set by color.plot.data.frame # x is only used to get plotting range # this is ugly, but otherwise recursively calling color.plot.data.frame is hard if(is.null(...$xlim)) { if(add || !fill) xlim <- range(par("usr")[1:2]) else xlim <- range(x[[pred[1]]]) } else xlim <- ...$xlim x1 <- seq(xlim[1],xlim[2],length=res) # allow > 2 for derived features if(length(pred) >= 2) { if(is.null(...$ylim)) { if(add || !fill) ylim <- range(par("usr")[3:4]) else ylim <- range(x[[pred[2]]]) } else ylim <- ...$ylim x2 <- seq(ylim[1],ylim[2],length=res) xt <- expand.grid(x1,x2) names(xt) <- pred[1:2] } else { xt <- data.frame(x1) } prob.plot <- !is.null(pclass) if(prob.plot) { # z is the probability of a class if(inherits(object,"glm") || inherits(object,"gam") || inherits(object,"nnet")) { if(inherits(object,"glm") || inherits(object,"gam")) { z <- predict(object,xt,type="response") } else { z = predict(object,xt) } y <- model.frame(object)[[resp]] if(is.character(pclass)) pclass <- pmatch(pclass, levels(y)) if(pclass == 1) { z <- 1-z lev <- levels(y)[1] main <- paste("Pr(",resp," = ",lev,")",sep="") } else { if(pclass > length(levels(y))) stop("pclass exceeds the number of classes") if(length(levels(y)) == 2) { lev <- levels(y)[2] main <- paste("Pr(",resp," = ",lev,")",sep="") } else { lev <- levels(y)[1] main <- paste("Pr(",resp," != ",lev,")",sep="") } } } else { z <- predict(object,xt,type="vector") if(is.character(pclass)) pclass <- pmatch(pclass, colnames(z)) lev <- colnames(z)[pclass] main <- paste("Pr(",resp," = ",lev,")",sep="") z <- z[,pclass] } } else { # z is the class if(inherits(object,"glm") || inherits(object,"gam")) { p <- predict(object,xt,type="response") y <- model.frame(object)[[resp]] z <- factor(levels(y)[as.numeric(p > 0.5)+1], levels=levels(y)) } else { z <- factor(predict(object,xt,type="class")) } } dim(z) <- c(length(x1),length(x2)) if(fill) { if(prob.plot) { filled.contour(x1,x2,z, plot.axes={ title(main=main,xlab=pred[1],ylab=pred[2]);axis(1);axis(2); if(!add) color.plot.data.frame(x,col=1,add=T,...) },...) } else { lev <- levels(z) if(is.function(color.palette)) col <- color.palette(length(lev)) else col <- color.palette actual.lev <- levels(factor(z)) icol <- col[lev %in% actual.lev] z <- as.numeric(z) dim(z) <- c(length(x1),length(x2)) colorbar <- (length(col) > 1) if(!add) { # change mar permanently so we can add things on top if(axes) { mar = auto.mar() mar[3] = if(colorbar) 2 else 1 par(mar=mar) } else { par(mar=c(0.05,0,0,0.1)) } image(x1,x2,z,col=icol,main="",xlab=pred[1],ylab=pred[2]) # assume a light color scheme (else should be lighter) dcol <- darker(col) color.plot.data.frame(x,add=T,col=dcol,...) } else image(x1,x2,z,col=icol,add=T) if(colorbar) { color.key(col,lev) if(!add) title(resp,line=1) } else if(!add) title(resp,line=0) } } else { # not filled if(prob.plot) { cat("Plotting contour for",main,"=",levels,"\n") contour(x1,x2,z,add=T, levels=levels,col=col,lwd=lwd, drawlabels=drawlabels,...) } else { nlevels <- length(levels(z)) z <- as.numeric(z) dim(z) <- c(length(x1),length(x2)) levels <- 0.5+(1:(nlevels-1)) contour(x1,x2,z,add=T,levels=levels,col=col,lwd=lwd, drawlabels=drawlabels,...) } } } ############################################################################## # bug fix # minka: added `key' and `main' arguments filled.contour <- function (x = seq(0, 1, len = nrow(z)), y = seq(0, 1, len = ncol(z)), z, xlim = range(x, finite=TRUE), ylim = range(y, finite=TRUE), zlim = range(z, finite=TRUE), levels = pretty(zlim, nlevels), nlevels = 20, color.palette = cm.colors, col = color.palette(length(levels) - 1), plot.title, plot.axes, key.title, key.axes, key=T, asp = NA, xaxs="i", yaxs="i", las = 1, axes = TRUE, main="", ...) { if (missing(z)) { if (!missing(x)) { if (is.list(x)) { z <- x$z y <- x$y x <- x$x } else { z <- x x <- seq(0, 1, len = nrow(z)) } } else stop("no `z' matrix specified") } else if (is.list(x)) { y <- x$y x <- x$x } if (any(diff(x) <= 0) || any(diff(y) <= 0)) stop("increasing x and y values expected") if(key) { mar.orig <- (par.orig <- par(c("mar","las","mfrow")))$mar on.exit(par(par.orig)) par(las = las) w <- (3 + mar.orig[2]) * par('csi') * 2.54 layout(matrix(c(2, 1), nc=2), widths=c(1, lcm(w))) ## Plot the `plot key' (scale): mar <- mar.orig mar[4] <- mar[2] mar[2] <- 1 par(mar = mar) plot.new() plot.window(xlim=c(0,1), ylim=range(levels), xaxs="i", yaxs="i") rect(0, levels[-length(levels)], 1, levels[-1], col = col) if (missing(key.axes)) { if (axes) axis(4) } else key.axes box() if (!missing(key.title)) key.title } ## Plot contour-image:: if(key) { mar <- mar.orig mar[4] <- 1 par(mar=mar) } plot.new() plot.window(xlim, ylim, "", xaxs=xaxs, yaxs=yaxs, asp=asp) if (!is.matrix(z) || nrow(z) <= 1 || ncol(z) <= 1) stop("no proper `z' matrix specified") if (!is.double(z)) storage.mode(z) <- "double" .Internal(filledcontour(as.double(x), as.double(y), z, as.double(levels), col = col)) if (axes) { if (missing(plot.axes)) { # minka: no title here axis(1) axis(2) } else plot.axes if (missing(plot.title)) title(main=main,...) else plot.title } box() invisible() } ############################################################################## rgb2name <- function(r,g,b) { if(missing(g)) { r <- as.matrix(r) if(ncol(r) != 3) { if(nrow(r) == 3) r <- t(r) else stop("matrix must have 3 rows or 3 columns") } } else { r <- array(c(r,g,b),c(length(r),3)) } if(max(r) > 1) r <- r/255 cc <- colors() # remove duplicates #cc <- setdiff(cc, c("gray100","grey100")) crgb <- col2rgb(cc)/255 # distance matrix d <- sqdist(r,t(crgb)) cc[apply(d, 1, which.min)] } col2hsv <- function(col) { rgb2hsv(col2rgb(col)) } rgb2hsv <- function(r,g,b) { if(missing(g)) { if(nrow(r) != 3) { if(ncol(r) == 3) r <- t(r) else stop("matrix must have 3 rows or 3 columns") } b <- r[3,] g <- r[2,] r <- r[1,] } n <- length(r) lim <- array(0,c(2,n)) lim[1,] <- pmin(r,g,b) lim[2,] <- pmax(r,g,b) rang <- lim[2,]-lim[1,] v <- lim[2,] s <- v i <- (lim[2,] > 0) s[!i] <- 0 s[i] <- rang[i]/lim[2,i] h <- v h[s == 0] <- 0 i <- (s > 0) & (r == lim[2,]) h[i] <- (g[i] - b[i])/rang[i]/6 i <- (s > 0) & (g == lim[2,]) h[i] <- (2 + (b[i]-r[i])/rang[i])/6 i <- (s > 0) & (b == lim[2,]) h[i] <- (4 + (r[i]-g[i])/rang[i])/6 i <- (h < 0) h[i] <- h[i] + 1 t(array(c(h,s,v),c(n,3),list(NULL,c("hue","saturation","value")))) } test.col2hsv <- function() { h <- col2hsv(palette()) print(palette()) print(sapply(1:ncol(h), function(i) hsv(h[1,i],h[2,i],h[3,i]))) } hsv2cone <- function(h,s,v) { if(missing(s)) { if(nrow(h) != 3) { if(ncol(h) == 3) h <- t(h) else stop("matrix must have 3 rows or 3 columns") } v <- h[3,] s <- h[2,] h <- h[1,] } x <- s*v*cos(h*2*pi) y <- s*v*sin(h*2*pi) z <- v n <- length(x) t(array(c(x,y,z),c(n,3),list(NULL,c("x","y","z")))) } cone2hsv <- function(x,y,z) { if(missing(y)) return(cone2hsv(x[1,],x[2,],x[3,])) v <- z h <- (atan2(y,x)/2/pi + 1) %% 1 s <- sqrt(x^2 + y^2)/v i <- which(s > 1 | is.na(s)) if(length(i) > 0) s[i] <- 0 r <- rbind(h,s,v) rownames(r) <- c("hue","saturation","value") r } # extra routines for linear regression # Tom Minka panel.hist <- function(x, ...) { # helper function for pairs(), from examples # usage: pairs(x,diag.panel=panel.hist) # put histograms on the diagonal usr <- par("usr"); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, ...) } # default color for partial residuals formals(termplot)$col.res <- 1 # default lowess span #formals(panel.smooth)$span <- 4/5 #formals(lowess)$f <- 3/4 # returns a model frame where the responses have been replaced by their # residuals under the fit residual.frame <- function(object,data) { if(missing(data)) { x <- model.frame(object) resp <- response.var(x) if(inherits(object,"glm")) { p <- object$fitted.value x[[resp]] <- (object$y - p)/p/(1-p) } else x[[resp]] <- residuals(object) } else { x <- data resp <- response.var(object) # only for lm, not glm if(inherits(object,"glm")) stop("can't compute residuals for glm") # expand p to include NAs p <- predict(object,x)[rownames(x)] names(p) <- NULL x[[resp]] <- x[[resp]] - p #attr(x,"terms") <- terms(object) } x } partial.residuals <- function(object,omit,exact=T) { resp <- response.var(object) pred <- predictor.vars(object) if(exact) { x <- list() for(i in 1:length(omit)) { fit <- update(object,paste(".~. -",omit[i])) #cat(pred[i],coef(fit),"\n") x[[i]] <- residuals(fit) } names(x) <- omit } else { x <- data.frame(residuals(fit,type="partial")) r = residuals(fit) for(i in 1:length(x)) { names(x[[i]]) = names(r) } } x } partial.residual.frame <- function(object,omit) { x = model.frame(object) resp <- response.var(x) x[[resp]] = partial.residuals(object,omit)[[1]] pred = predictor.vars(formula(paste(resp,"~",omit))) x[c(pred,resp)] } ############################################################################## expand.frame <- function(data,v) { if(length(v) == 0) return(data) vs = strsplit(v,":") for(i in 1:length(v)) { data[[v[[i]]]] <- apply(data[vs[[i]]],1,prod) } data } predict.plot.data.frame <- function(x,given,given.lab,nlevels=2, layout,partial,type=c("prob","logit","probit"), highlight,key,se=T, identify.pred=F,scol="red",slwd=2, span=2/3,mar=NA,bg=par("bg"), xaxt=par("xaxt"),yaxt=par("yaxt"), color.palette=default.colors, asp=NULL, pch.palette=default.pch,main=NULL,xlab,ylab,...) { std.error = function(x) sd(x)/sqrt(length(x)) type <- match.arg(type) resp <- response.var(x) pred <- predictor.terms(x) x <- expand.frame(x,setdiff(pred,predictor.vars(x))) if(!missing(given)) { if(missing(key)) key = T if(!is.factor(given)) { # make given a factor b <- as.numeric(quantile(unique(given),seq(0,1,length=nlevels+1))) given <- rcut(given,b) } else nlevels = length(levels(given)) if(is.function(color.palette)) color.palette = color.palette(nlevels) lev = 1:nlevels level.color = color.palette[((lev-1) %% length(color.palette))+1] level.pch <- pch.palette[(((lev-1) %/% length(color.palette)) %% length(pch.palette))+1] } else key = F if(missing(layout)) layout <- auto.layout(length(pred) + key) opar <- par(bg=bg) if(any(layout != c(1,1))) { opar = c(opar,par(mfrow=layout)) } if(!is.null(mar)) { if(is.na(mar)) { mar = auto.mar(main=main,xlab=if(missing(xlab)) "y" else xlab, ylab=if(missing(ylab)) "y" else ylab,xaxt=xaxt,yaxt=yaxt) } opar <- c(opar,par(mar=mar)) } on.exit(par(opar)) if(!missing(partial)) { if(missing(highlight)) highlight <- predictor.terms(partial) if(!missing(given.lab)) { if(given.lab %in% predictor.vars(partial)) { partial = update(partial,paste(".~.-",given.lab)) } } pres <- partial.residuals(partial,pred,exact=T) } else if(missing(highlight)) highlight <- NULL for(i in pred) { if(!missing(partial)) { # sometimes pres is longer than x, because of NAs x[[resp]] <- pres[[i]][rownames(x)] if(is.null(x[[resp]])) stop(paste("partial of",i,"not found")) } if(is.factor(x[[i]]) && !is.ordered(x[[i]])) { x[[i]] <- sort.levels(x[[i]],as.numeric(x[[resp]])) } # plot points if(missing(given)) { # no given if(!is.na(scol)) { # smooth curve #k <- is.finite(x[,i]) & is.finite(x[[resp]]) # keep warnings about infinite points k <- !is.na(x[,i]) & !is.na(x[[resp]]) if(is.factor(x[[resp]])) { # factor response if(length(levels(x[[resp]]))==2 && !is.factor(x[[i]])) { if(type=="prob") { fit = loprob(x[k,i], x[k,resp]) if(F) { # piecewise constant approx library(tree) frame = data.frame(x=x[k,i],y=x[k,resp]) tr = tree(y~x,frame) partition.tree(tr,add=T,force=F) } } else { fit <- loprob(x[k,i], x[k,resp]) p <- fit$y-1 p <- switch(type,logit=log(p/(1-p)),probit=qnorm(p)) fit$y <- p+1.5 plot(x[[i]],2*as.numeric(x[[resp]])-3,xlab=i,ylab=type) } } } else { if(!is.factor(x[[i]])) { if(!all(is.finite(x[k,i]))) stop(paste(i,"has infinite values.\nCheck your transformation.")) if(!all(is.finite(x[k,resp]))) stop(paste(resp,"has infinite values.\nCheck your transformation.")) fit = lowess(x[k,i], x[k,resp], f=span) } } } real.asp = if(identical(asp,"auto")) auto.aspect(fit) else asp if(is.factor(x[[resp]])) { # factor response if(type=="prob") { if(is.factor(x[[i]])) { # should sort if the factor is not ordered mosaicplot(table(x[[i]], x[[resp]]), xlab=i, ylab=resp, ...) } else { plot(x[[i]], x[[resp]], xlab=i, ylab=resp, ...) } } } else if(is.factor(x[[i]])) { rt = rtable(formula(paste(resp,"~",i)),x) se = rtable(formula(paste(resp,"~",i)),x,fun=std.error) * 1.64 linechart(rt,se) } else { # numeric response plot(x[[i]], x[[resp]], xlab="", ylab=resp, main=main, asp=real.asp, ...) if(i %in% highlight) title(xlab=i,font.lab=2) else title(xlab=i) if(!missing(partial)) { a <- coef(partial) a0 = a["(Intercept)"] if(is.null(a0)) a0 = 0 #if(!is.na(a[i])) abline(a0,a[i],col=3,lty=2) } } lines(fit,col=scol,lwd=slwd) # identify if((identify.pred == T) || (i %in% identify.pred)) { identify(x[k,i],x[k,resp],labels=rownames(x)[k]) } } else if(is.factor(x[[i]])) { fmla = formula(paste(resp,"~",i,"+",given.lab)) x[[given.lab]] = given rt = rtable(fmla,x) if(se) { rt.se = rtable(fmla,x, fun=std.error) * 1.64 rt.se[is.na(rt.se)] = sd(x[[resp]]) linechart(t(rt),t(rt.se),xlab=" ", xaxt=xaxt,yaxt=yaxt,row.labels=NULL,stand=F) } else { linechart(t(rt),xlab=" ", xaxt=xaxt,yaxt=yaxt,row.labels=NULL,stand=F) } font.lab = if(i %in% highlight) 2 else 1 title(xlab=i,font.lab=font.lab) } else { # condition on the given variable plot.new() box() title(ylab=resp) if(i %in% highlight) title(xlab=i,font.lab=2) else title(xlab=i) if(is.factor(x[[i]])) { # setup window for factor xlim <- length(levels(x[[i]])) plot.window(xlim=c(0.5,xlim+0.5),ylim=range(x[[resp]])) if(xaxt != "n") axis(1,1:xlim,labels=levels(x[[i]])) if(yaxt != "n") axis(2) cat(paste("jittering",i,"\n")) } else { # setup window for numeric plot.window(xlim=range(x[[i]],na.rm=T),ylim=range(x[[resp]],na.rm=T)) if(xaxt != "n") axis(1) if(yaxt != "n") axis(2) } lev <- levels(given) for(g in 1:length(lev)) { color <- level.color[g] pch <- level.pch[g] val <- lev[g] k <- (given == val) if(is.factor(x[[i]])) { jitter <- (runif(length(x[k,i]))-0.5)/5 points(as.numeric(x[k,i])+jitter, x[k,resp], col=color, pch=pch, ...) } else { points(x[k,i], x[k,resp], col=color, pch=pch, ...) } if(is.factor(x[[resp]])) { fit = loprob(x[k,i], x[k,resp]) } else { fit = lowess(x[k,i], x[k,resp], f=span) } lines(fit,col=color,lwd=slwd) } } } # show legend for the given variable, as a separate panel # this is better than putting a color key on each plot if(key) { plot.new() if(!missing(given.lab)) { font.lab = if(given.lab %in% highlight) 2 else 1 title(xlab=given.lab,font.lab=font.lab) } lev = levels(given) y <- cumsum(strheight(lev)+0.02) for(i in 1:length(lev)) { color <- color.palette[i] val <- lev[i] text(0.5,0.5+y[i],val,col=color,adj=0.5) if(length(unique(level.pch))>1) points(0.6+strwidth(val)/2,0.5+y[i],pch=level.pch[i]) } } } predict.plot.lm <- function(object,data,partial=F,...) { if(!partial) { if(missing(data)) { res <- residual.frame(object) } else { res <- residual.frame(object,data) } cat("plotting residuals\n") predict.plot.data.frame(res,highlight=predictor.terms(object),...) } else { if(missing(data)) data <- model.frame(object) cat("plotting partial residuals\n") predict.plot.data.frame(data,partial=object,...) } } predict.plot.formula <- function(formula,data=parent.frame(),...) { # formula has givens? rhs <- formula[[3]] if(is.call(rhs) && (deparse(rhs[[1]]) == "|")) { # remove givens from formula given <- deparse(rhs[[3]]) formula[[3]] <- rhs[[2]] if(is.environment(data)) g <- get(given,env=data) else g <- data[[given]] if(is.null(g)) stop(paste("variable \"",given,"\" not found",sep="")) return(predict.plot.formula(formula,data, given=g,given.lab=given,...)) } if(F) { expr <- match.call(expand = F) expr$... <- NULL expr$na.action <- na.pass expr[[1]] <- as.name("model.frame.default") x <- eval(expr, parent.frame()) } else { # formula has its own environment x <- model.frame.default(formula,data,na.action=na.pass) } predict.plot.data.frame(x,...) } predict.plot <- function(object, ...) UseMethod("predict.plot") ############################################################################## expand.cross <- function(object) { resp <- response.var(object) pred <- predictor.vars(object) # quadratic terms only pred2 <- c() for(i in pred) { for(j in pred) { if(match(i,pred) < match(j,pred)) pred2 = c(pred2,paste(i,j,sep=":")) } } pred = c(pred,pred2) formula(paste(resp,"~",paste(pred,collapse="+"))) } interact.plot.data.frame <- function(x,ypred,partial,highlight,span=0.75, scol="red",slwd=2,type=c("*",":"), bg=par("bg"),...) { library(modreg) type = match.arg(type) resp <- response.var(x) pred = predictor.vars(x) if(missing(ypred)) ypred = pred n = length(pred) mar=c(0.05,0.05,0,0.05) if(all(ypred == pred)) layout = c(n,n) else layout = auto.layout(n*length(ypred)) opar = par(mfrow=layout,mar=mar,bg=bg) on.exit(par(opar)) if(!missing(partial)) { if(missing(highlight)) highlight <- predictor.terms(partial) } else if(missing(highlight)) highlight <- NULL for(py in ypred) { for(px in pred) { if(px == py) { plot.new() box() y = 0.5 if(is.factor(x[[py]])) { auto.legend(levels(x[[py]]),default.colors(nlevels(x[[py]]))) #y = 0.5 } text(0.5,y,py,font=if(py %in% highlight) 2 else 1) } else if(!is.factor(x[[py]]) && (match(py,pred) < match(px,pred))) { plot.new() } else { y = x[c(px,py,resp)] if(!missing(partial)) { predxy = paste(px,type,py,sep="") fit = update(partial,paste(".~.-",predxy)) y[[resp]] = residuals(fit) } mar = c(0.05,0.05,0,0.05) if(is.factor(y[[px]]) && is.factor(y[[py]])) { y = rtable(formula(y),y) xlab = if(px %in% ypred) "" else px linechart(t(y),xlab=xlab,ylab="",yaxt="n",row.labels=NULL) } else if(is.factor(y[[px]])) { plot.new() } else if(is.factor(y[[py]])) { fmla = formula(paste(resp,"~",px,"|",py)) predict.plot(fmla,y,xlab="",ylab="",xaxt="n",yaxt="n",key=F) } else if(is.factor(y[[resp]])) { color.plot(y, xlab="",ylab="",mar=mar,key=F,axes=F,...) } else { fmla = formula(paste(resp,"~",px,"+",py)) color.plot(loess(fmla,y,span=span), xlab="",ylab="",mar=mar,key=F,axes=F,...) } predxy = paste(px,":",py,sep="") predyx = paste(py,":",px,sep="") if(any(c(predxy,predyx) %in% highlight)) { box(col="red") } else { box() } } } } } interact.plot.lm <- function(object,data,partial=F,...) { if(!partial) { if(missing(data)) { res <- residual.frame(object) } else { res <- residual.frame(object,data) } cat("plotting residuals\n") interact.plot.data.frame(res,highlight=predictor.vars(object),...) } else { if(missing(data)) data <- model.frame(object) cat("plotting partial residuals\n") interact.plot.data.frame(data,partial=object,...) } } interact.plot <- function(object, ...) UseMethod("interact.plot") ############################################################################## # glm stuff smooth <- function(x,y) { library(modreg) if(length(unique(x)) >= 4) { fit = smooth.spline(x,y) predict(fit) } else { lowess(x,y) } } loprob <- function(x,y) { # simulates lowess for binary response # returns list(x,y) lev <- levels(y) if(length(lev) != 2) stop("y must have two levels") if(!is.factor(x)) { from <- min(x) to <- max(x) } if(T) { # smoothing spline library(modreg) y <- as.numeric(y)-1 r <- seq(from,to,length=50) if(length(unique(x)) >= 4) { fit = smooth.spline(x,y) p <- predict(fit,r)$y } else { fit <- loess(y~x,degree=0) p <- predict(fit,r) } list(x=r,y=p+1) } else if(F) { # density estimate - too bumpy densf <- function(x) { if(is.factor(x)) { list(x=levels(x),y=tabulate(x)/length(x)) } else { density(x,from=from,to=to,adjust=1.5) } } dens <- lapply(split(x,y),densf) p <- sum(y==levels(y)[2])/length(y) p <- p*dens[[2]]$y/((1-p)*dens[[1]]$y + p*dens[[2]]$y) list(x=dens[[1]]$x,y=p+1) } else if(F) { # hill density estimate - too noisy xr <- seq(from,to,length=50) densf <- function(x) hill.density(x,xr) dens <- lapply(split(x,y),densf) p <- dens[[2]]/(dens[[1]] + dens[[2]]) list(x=xr,y=p+1) } else { # gam smooth - too slow library(mgcv) y <- as.numeric(y)-1 fit <- gam(y~s(x),family=binomial) r <- seq(from,to,length=50) p <- predict(fit,data.frame(x=r),type="response") list(x=r,y=p+1) } } predict.frame <- function(object) { # returns a frame relating predicted values to actual values # for glms, the predicted value is the value before transformation resp = response.var(object) frame = data.frame(z=predict(object),response=model.frame(object)[[resp]]) names(frame)[2] = resp frame } model.plot <- function(object,data,partial=F,smooth=F,se=F, col="green",lwd=2,add=F,lty=1, bg=par("bg"),...) { if(missing(data)) data <- model.frame(object) resp = response.var(object) pred = predictor.terms(object) data <- expand.frame(data,setdiff(pred,predictor.vars(x))) p <- predict(object,data,type="response",se=se) if(inherits(object,"glm")) { if(!se) p = p+1 else p$fit = p$fit+1 } if(add) { # must have one predictor term if(length(pred) > 1) stop("model has more than one predictor") x <- data[[pred]] i <- order(x) x <- sort(x) if(!se) { lines(x,p[i],col=col,lwd=lwd,lty=lty,...) } else { lines(x,p$fit[i],col=col,lwd=lwd,lty=lty,...) lines(x,p$fit[i]+p$se[i],col=col,lty=2,lwd=lwd,...) lines(x,p$fit[i]-p$se[i],col=col,lty=2,lwd=lwd,...) } return(invisible(p)) } layout <- auto.layout(length(pred)) opar <- par(bg=bg) if(any(layout != c(1,1))) { opar = c(opar,par(mfrow=layout)) } on.exit(par(opar)) for(v in pred) { plot(data[[v]],data[[resp]],xlab=v,ylab=resp,...) x = data[[v]] if(length(pred) == 1) { i <- order(x) lines(x[i],p[i],col=col,lwd=lwd,lty=lty,...) } else { if(partial) { fit <- update(object,paste(".~. -",v)) p = predict(fit,data,type="response",se=se) if(inherits(object,"glm")) { if(!se) p = p+1 else p$fit = p$fit+1 } } lines(smooth(x,p),col=col,lwd=lwd,lty=lty,...) } if(smooth) { lines(loprob(x,data[[resp]]),col=2,lwd=lwd,lty=lty,...) } } } plot.loess <- function(object,xlim,col=2,...) { x <- model.frame(object) pred <- predictor.vars(object) resp <- response.var(object) plot(x[[pred]],x[[resp]],xlab=pred,ylab=resp,...) if(missing(xlim)) xlim <- range(x[[pred]]) xt <- seq(xlim[1],xlim[2],len=50) y <- predict(object,xt) lines(xt,y,col=col) } misclass.glm <- function(fit,data,rate=F) { # model.frame is needed to evaluate I() terms if(missing(data)) data <- model.frame(fit) else data = model.frame(formula(fit),data) resp <- response.var(fit) lev = levels(data[[resp]]) p <- factor.logical(predict(fit,data,type="response")>0.5,labels=lev) r = sum(as.numeric(p) != as.numeric(data[[resp]])) if(rate) r/nrow(data) else r } misclass.nnet = function(fit,data) { # model.frame is needed to evaluate I() terms if(missing(data)) data <- model.frame(fit) else data = model.frame(formula(fit),data) resp <- response.var(fit) p <- predict(fit,data,type="class") return(sum(p != as.character(data[[resp]]))) } misclass <- function(object, ...) UseMethod("misclass") formula.nnet = formula.lm deviance.glm <- function(object, newdata) { if(missing(newdata)) return(object$deviance) resp <- response.var(object) if(object$family$family == "binomial") { truth <- (as.numeric(newdata[[resp]]) == 2) p <- predict(object,newdata,type="response") p[p == 0] <- 1e-3 p[!truth] <- 1-p[!truth] -2*sum(log(p)) } else { stop("family not handled") } } confusion.tree <- function(tr,data,p=0.5) { resp <- response.var(tr) if(p == 0.5) { r <- predict(tr,data,type="class") } else { r = predict(tr,data,type="vector")[,2] r = factor.logical(r>p,labels=levels(data[[resp]])) } table(truth=data[[resp]],predicted=r) } confusion.knn = confusion.tree factor.logical <- function(x,labels=c("No","Yes")) { f <- factor(x,levels=c(F,T)) levels(f) <- labels f } confusion.glm <- function(fit,data,p=0.5) { # model.frame is needed to evaluate I() terms if(missing(data)) data <- model.frame(fit) else data = model.frame(formula(fit),data) resp <- response.var(fit) r = predict(fit,data,type="response") r <- factor.logical(r>p,labels=levels(data[[resp]])) table(truth=data[[resp]],predicted=r) } confusion <- function(object, ...) UseMethod("confusion") # expands a model formula to include all squares and cross-products of the # predictors expand.quadratic <- function(fmla,cross=T) { resp <- response.var(fmla) pred <- predictor.vars(fmla) pred2 <- sapply(pred,function(s) paste("I(",s,"^2)",sep="")) s <- paste(resp,"~",paste(pred,collapse="+"), "+",paste(pred2,collapse="+")) len <- length(pred) if(cross && len > 1) { pred3 <- c() for(i in 1:(len-1)) { for(j in (i+1):len) { pred3 <- c(pred3, paste(pred[i],":",pred[j],sep="")) } } s <- paste(s,"+",paste(pred3,collapse="+")) } formula(s) } logistic <- function(formula,data) { #data <- model.frame(formula,data) resp = response.var(formula) pred <- predictor.vars(formula) if(!all(sapply(data[pred],is.numeric))) stop("all predictors must be numeric") object <- list(terms=terms(data),data=data) class(object) <- "logistic" object$shift = mean(data[pred]) object$scale <- sd(data[pred]) object$scale[object$scale == 0] = 1 data[pred] = scale(data[pred]) w = rep(0,length(predictor.terms(formula))+3) object$nnet = nnet(formula,data,Wts=w,size=1,decay=10) object$levels = levels(data[[resp]]) object } if(T) { logistic <- function(formula,data,...) { environment(formula) = sys.frame(sys.nframe()) glm(formula,data,family=binomial,maxit=50,...) } } predict.logistic <- function(object,test,type=c("class","response","vector"), ...) { resp <- response.var(object) pred <- predictor.vars(object) xt <- scale(test[pred],center=object$shift,scale=object$scale) type <- match.arg(type) if(type == "class") { factor(predict(object$nnet,xt,type="class"),levels=object$levels) } else { # type vector p = predict(object$nnet,xt,type="raw") if(type == "response") { p } else { lev <- object$levels v <- array(0,c(length(p),length(lev)),list(rownames(test),lev)) v[,1] = 1-p v[,2] = p v } } } misclass.logistic = misclass.nnet model.frame.logistic = function(object) object$data