############################################################################### # sum.small: Sum a vector of very small numbers # loglik.np: Log-likelihood of np mixture model on arbitrary data set ############################################################################### # Sum a vector of very small numbers, using the fact that # sum(x) = max(x) * sum(x/max(x)) # so # log(sum(x)) = max(log(x)) + log(sum(exp(log(x) - max(log(x))))) # This reduces numerical "underflow" issues when dealing with very small # numbers # See also "Laplace approximation" for exponential sums or integrals # Inputs: vector of numbers to sum, whether the numbers have already been # logged, whether to deliver sum(x) or log(sum(x)) # Output: either sum(x) or log(sum(x)) sum.small <- function(x, initial.log=TRUE, return.log=TRUE) { # If we're not given the logs of the numbers initially, discard any # zeroes (since they don't contribute to the sum) and take the log if (! initial.log) { x <- x[x>0] x <- log(x) } # What's the largest summand (after logging)? largest <- max(x) # Adjust all the summands down by this x <- x-largest # Add everything up in logs --- you can double check that this will be # exactly sum(x) if all the arithmetic is done exactly total <- largest + log(sum(exp(x))) # If we're not returning the log sum, undo the log if (! return.log) { total <- exp(total) } # and return return(total) } loglik.np <- function(npmix, data) { stopifnot(require(mixtools)) stopifnot(require(plyr)) n.clusters <- length(npmix\$lambdahat) p <- length(npmix\$blockid) stopifnot(ncol(data)==p) loglik.3d.array <- array(NA, dim=c(p,n.clusters,nrow(data))) for (var in 1:p) { for (cluster in 1:n.clusters) { loglik.3d.array[var,cluster,] <- log(density.npEM(x=npmix, u=data[,var], component=cluster, block=var)\$y) } } loglik.2d.array <- apply(loglik.3d.array,MARGIN=c(2,3),sum) loglik.2d.weighted <- sweep(loglik.2d.array, MARGIN=1, STATS=log(npmix\$lambdahat), FUN="+") loglik.1d <- apply(loglik.2d.weighted,2,sum.small) return(list(sum(loglik.1d),loglik.3d.array)) }