# Code to accompany Lecture 14 (Local Linear Embedding)
# An R implementation of local linear embedding, following the
# description in Saul and Roweis, JMLR 4 (2003): 119--155
# URL: http://jmlr.csail.mit.edu/papers/v4/saul03a.html
# Probably not suitable for large data sets:
# I. The finding of the k nearest neighbors for each oint is NOT done with
# any real efficiency
# II. The final eigenvalue problem is solved by brute force, without using
# the sparsity of the matrix
# Local linear embedding of data vectors
# Inputs: n*p matrix of vectors, number of dimensions q to find (< p),
# number of nearest neighbors per vector, scalar regularization setting
# Calls: find.kNNs, reconstruction.weights, coords.from.weights
# Output: n*q matrix of new coordinates
lle <- function(x,q,k=q+1,alpha=0.01) {
stopifnot(q>0, qq, alpha>0) # sanity checks
kNNs = find.kNNs(x,k) # should return an n*k matrix of indices
w = reconstruction.weights(x,kNNs,alpha) # n*n weight matrix
coords = coords.from.weights(w,q) # n*q coordinate matrix
return(coords)
}
####### Finding nearest neighbors and sub-functions ############
# Find multiple nearest neighbors in a data frame
# Inputs: n*p matrix of data vectors, number of neighbors to find,
# optional arguments to dist function
# Calls: smallest.by.rows
# Output: n*k matrix of the indices of nearest neighbors
find.kNNs <- function(x,k,...) {
x.distances = dist(x,...) # Uses the built-in distance function
x.distances = as.matrix(x.distances) # need to make it a matrix
kNNs = smallest.by.rows(x.distances,k+1) # see text for +1
return(kNNs[,-1]) # see text for -1
}
# Find the k smallest entries in each row of an array
# Inputs: n*p array, p >= k, number of smallest entries to find
# Output: n*k array of column indices for smallest entries per row
smallest.by.rows <- function(m,k) {
stopifnot(ncol(m) >= k) # Otherwise "k smallest" is meaningless
row.orders = t(apply(m,1,order))
k.smallest = row.orders[,1:k]
return(k.smallest)
}
###### Finding linear approximation weights and sub-functions ########
# Least-squares weights for linear approx. of data from neighbors
# Inputs: n*p matrix of vectors, n*k matrix of neighbor indices,
# scalar regularization setting
# Calls: local.weights
# Outputs: n*n matrix of weights
reconstruction.weights <- function(x,neighbors,alpha) {
stopifnot(is.matrix(x),is.matrix(neighbors),alpha>0)
n=nrow(x)
stopifnot(nrow(neighbors) == n)
w = matrix(0,nrow=n,ncol=n)
for (i in 1:n) {
i.neighbors = neighbors[i,]
w[i,i.neighbors] = local.weights(x[i,],x[i.neighbors,],alpha)
}
return(w)
}
# Calculate local reconstruction weights from vectors
# Inputs: focal vector (1*p matrix), k*p matrix of neighbors,
# scalar regularization setting
# Outputs: length k vector of weights, summing to 1
local.weights <- function(focal,neighbors,alpha) {
# basic matrix-shape sanity checks
stopifnot(nrow(focal)==1,ncol(focal)==ncol(neighbors))
# Should really sanity-check the rest (is.numeric, etc.)
k = nrow(neighbors)
# Center on the focal vector
neighbors=t(t(neighbors)-focal) # exploits recycling rule, which
# has a weird preference for columns
gram = neighbors %*% t(neighbors)
# Try to solve the problem without regularization
weights = try(solve(gram,rep(1,k)))
# The try function tries to evaluate its argument and returns
# the value if successful; otherwise it returns an error
# message of class "try-error"
if (identical(class(weights),"try-error")) {
# Un-regularized solution failed, try to regularize
# TODO: look at the error, check if it's something
# regularization could fix!
weights = solve(gram+alpha*diag(k),rep(1,k))
}
# Enforce the unit-sum constraint
weights = weights/sum(weights)
return(weights)
}
# Get approximation weights from indices of point and neighbors
# Inputs: index of focal point, n*p matrix of vectors, n*k matrix
# of nearest neighbor indices, scalar regularization setting
# Calls: local.weights
# Output: vector of n reconstruction weights
local.weights.for.index <- function(focal,x,NNs,alpha) {
n = nrow(x)
stopifnot(n> 0, 0 < focal, focal <= n, nrow(NNs)==n)
w = rep(0,n)
neighbors = NNs[focal,]
wts = local.weights(x[focal,],x[neighbors,],alpha)
w[neighbors] = wts
return(w)
}
# Local linear approximation weights, without iteration
# Inputs: n*p matrix of vectors, n*k matrix of neighbor indices,
# scalar regularization setting
# Calls: local.weights.for.index
# Outputs: n*n matrix of reconstruction weights
reconstruction.weights.2 <- function(x,neighbors,alpha) {
# Sanity-checking should go here
n = nrow(x)
w = sapply(1:n,local.weights.for.index,x=x,NNs=neighbors,
alpha=alpha)
w = t(w) # sapply returns the transpose of the matrix we want
return(w)
}
########## Calculating new coordinates from local weights #############
# Find intrinsic coordinates from local linear approximation weights
# Inputs: n*n matrix of weights, number of dimensions q, numerical
# tolerance for checking the row-sum constraint on the weights
# Output: n*q matrix of new coordinates on the manifold
coords.from.weights <- function(w,q,tol=1e-7) {
n=nrow(w)
stopifnot(ncol(w)==n) # Needs to be square
# Check that the weights are normalized
# to within tol > 0 to handle round-off error
stopifnot(all(abs(rowSums(w)-1) < tol))
# Make the Laplacian
M = t(diag(n)-w)%*%(diag(n)-w)
# diag(n) is n*n identity matrix
soln = eigen(M) # eigenvalues and eigenvectors (here,
# eigenfunctions), in order of decreasing eigenvalue
coords = soln$vectors[,((n-q):(n-1))] # bottom eigenfunctions
# except for the trivial one
return(coords)
}