# All the R used in making the lecture
#### Plotting state centers with name labels
plot.new() # Start up a new plot
# How big is the plotting window? Set ranges from the data we'll be graphing
plot.window(xlim=range(state.center$x),ylim=range(state.center$y))
# Put the name of each state at its center. Shrink text 25% so there's less
# overlap between the names.
text(state.center,state.name,cex=0.75)
# Add the horizontal axis at the bottom and the vertical at the left.
axis(1)
axis(2)
# Draw a box.
box()
# Add the titles
title(main="Where R Thinks the US States Are",xlab="Longitude",ylab="Latitude")
##### PCA of unscaled states data, scree plot
prcomp(state.x77)
plot(prcomp(state.x77),type="l")
##### Check the standard deviations of the features
apply(state.x77,2,sd)
# the "2" means apply the function (sd) to each column; "1" would mean apply
# it to each row
##### Biplot of PCA
biplot(prcomp(state.x77),cex=(0.5,0.75))
# Plot original data points and feature vectors against principal
# components; shrink data-point names by 25% for contrast/legibility
##### Scale the features to unit variance and make another biplot
biplot(prcomp(state.x77,scale.=TRUE),cex=c(0.5,0.75))
##### Add latitude and longitude to the data set
state.x77.centers = cbind(state.x77,state.center$x,state.center$y)
colnames(state.x77.centers) = c(colnames(state.x77),"Longitude","Latitude")
# Fix column names
##### Make another biplot
biplot(prcomp(state.x77.centers,scale.=TRUE),cex=c(0.5,0.8))
##### Add density to the data set, fix column names, make biplot
states.density = cbind(state.x77,
state.x77[,"Population"]/state.x77[,"Area"],
state.center$x,state.center$y)
colnames(states.density) = c(colnames(state.x77),"Density","Longitude",
"Lattitude")
biplot(prcomp(states.density,scale.=TRUE),cex=c(0.5,0.75))
##### Randomly permute the Frost variable --- leaves marginal distribution
##### alone but destroys correlations
# Redo PCA and replot this partially randomized data
states.density.randfrost = states.density
states.density.randfrost[,"Frost"] = sample(states.density[,"Frost"])
biplot(prcomp(states.density.randfrost,scale.=TRUE),cex=c(0.5,0.8))
##### Plotting for factanal() models
# Make a biplot from the output of factanal()
# Presumes: fa.fit is a fitted factor model of the
# type returned by factanal
# fa.fit contains a scores object
# fa.fit has at least two factors!
# Inputs: fitted factor analysis model, additional
# parameters to pass to biplot()
# Side-effects: Makes biplot
# Outputs: None
biplot.factanal <- function (fa.fit,...)
{
# Get the first two columns of scores, i.e.,
# scores on first two factors
x = fa.fit$scores[,1:2]
# Get the loadings on the first two factors
y = fa.fit$loadings[,1:2]
biplot(x,y,...)
}
# Make scree (eigenvalue-magnitude) plots from the output of factanal()
# Input: fitted model of class factanal,
# x-axis label (default "factor"),
# y-axis label (default "eigenvalue")
# graphical parameters to pass to plot()
# Side-effects: Plots eigenvalues vs. factor number
# Output: None
screeplot.factanal <- function(fa.fit,xlab="factor",ylab="eigenvalue",...) {
# sum-of-squares function for repeated application
sosq <- function(v) {sum(v^2)}
# Get the matrix of loadings
my.loadings <- as.matrix(fa.fit$loadings)
# Eigenvalues can be recovered as sum of
# squares of each column
evalues <- apply(my.loadings,2,sosq)
plot(evalues,xlab=xlab,ylab=ylab,...)
}
##### Single-factor analysis
factanal(states.density,factors=1)
##### Plot the single factor against location
library(lattice) # For more graphics commands!
# You may have to install lattice from CRAN
state.g = cbind(state.center$x,
state.center$y,
factanal(states.density,factors=1,scores="regression")$scores)
colnames(state.g) = c("Longitude","Latitude","G")
levelplot(state.g[,"G"]~state.g[,"Longitude"]*state.g[,"Latitude"],
xlab="Longitude",ylab="Latitude")
# See help(levelplot) for more
##### Biplot of 2-factor analysis
biplot.factanal(factanal(states.density,factors=2,scores="regression"),
cex=c(0.5,0.8))
##### Plots for 6-factor analysis
states.density.fa6 <- factanal(states.density,factors=6,scores="regression")
biplot.factanal(states.density.fa6,cex=c(0.5,0.75))
screeplot.factanal(states.density.fa6,type="b")