FINGER EXERCISES FOR WEEK 08 TO BE DISCUSSED MON OCT 21 2002.
------------------------------------------------------------

######
# Q1 #
######

Y_c(1.2, 2.4, 1.3, 1.3, 0.0, 1.0, 1.8, 0.8, 4.6, 1.4)

# (a)
######

# the loglikelihood
l1_function(loc, sc, data) {
	s_0
	for(i in 1:length(data))
		s_ s - log(pi*sc)- log(1+((data[i]-loc)/sc)^2)
	return(s)
}

# sc in range [0, 0.02]
loc_seq(0.5, 1.5, length=60)
sc_seq(0, 0.02, length=60)
z_outer(loc, sc, l1, data=Y)
persp(loc, sc, z)

# sc in range [0, 1]
loc_seq(0.5, 1.5, length=60)
sc_seq(0, 1, length=60)
z_outer(loc, sc, l1, data=Y)
persp(loc, sc, z)


# (b) & (c)
###########

# -loglikelihood
nll_function(loc, sc, data) {
	s_ - l1(loc, sc, data)
	if(is.na(s)) return(.Machine$double.xmax) # return a known 
             # large number to avoid any NA in s
	return(s)
}

library(MASS)

# hessian matrix function 
fn2_deriv3(~log(pi*sc)+log(1+((Y-loc)/sc)^2), c("loc", "sc"), 
  function(loc, sc) NULL)

summary(ans_ms( ~ nll(loc, sc, Y), start=c(loc=1, sc=1))    )
#
# yields
# 
# Final value: 13.38277
#
# Solution:
#          Par. 
# loc 1.2633491
#  sc 0.3029818
# 
# Convergence: RELATIVE FUNCTION CONVERGENCE.
# 
# Computations done:
#  Iterations Function Gradient 
#          12       20       32
# 


# Now, to get the final hessian and standard errors of the parameters,
# we need the following:

tmp_attr(fn2(ans$parameters[1], ans$parameters[2]), "hessian")  
# retrieve the hessian matrices

hess_apply(tmp, c(2,3), sum)                                    
# sum hessian matrices over samples
# hessian matrix  [ 66.749943  1.907957
#                    1.907957 42.243772 ]
#
# So the variance-covariance matrix of the parameter estimates is
#

solve(hess)

#
#              loc           sc 
# loc  0.015006600 -0.000676286
#  sc -0.000676286  0.023720540
#
# with standard errors
#

sqrt(diag(solve(hess)))

#
# [1] 0.1225014 0.1540147
#

# NOTE: An alternative way to do this that is more in line with our
# class discussion last week would be:

fn3_deriv3(~log(pi*sc)+log(1+((Y-loc)/sc)^2), c("loc", "sc"), 
  function(loc, sc, Y) NULL)

summary(ans _ ms( ~ fn3(loc, sc, Y), start=c(loc=1, sc=1)), data =
  as.data.frame(Y))

# yields:
# 
# Solution:
#          Par.         Grad. Hessian.loc Hessian.sc 
# loc 1.2633491 -2.315337e-11   66.723074   1.902314
#  sc 0.3029818 -5.195755e-11    1.902314  42.211792
# 
# Information:
#               loc            sc 
# loc  0.0150066004 -0.0006762865
#  sc -0.0006762865  0.0237205404
# 
# Convergence: RELATIVE FUNCTION CONVERGENCE.
#
# Computations done:
#  Iterations Function Gradient 
#           8        9        9
# 
# and the standard errors would be 

sqrt(diag(summary(ans)$Information))
[1] 0.1225014 0.1540147

# Note that 
# 
# (1) This method sums the log-likelihood over the data, rather than
#     precomputing the sum
# (2) This method uses the analytic hessian, rather than a numerically
#     computed hessian (therefore it will be faster, and, perhaps,
#     more accurate)
# (3) When the analytic hessian is fed to ms, ms can generate the final
#     variance-covariance matrix of the parameter estimates explicitly
#     (note that it is mislabelled "information" in the output!)


# To compare this with the result of summing the log-likelihood over
# the data, but NOT supplying the Hessian, one could look at something
# like this:
#
# nll_function(loc, sc, y){
# 	s_log(pi*sc) + log(1+((y-loc)/sc)^2)
# 	if(sum(is.na(s))>0) return(.Machine$double.xmax) # return a 
#             # known large number to avoid any NA in s
#	return(s)
# }
# ms( ~ nll(loc, sc, Y), start=c(loc=1, sc=1), data=as.data.frame(Y))


# -loglikelihood with only one parameter form
# for the convenience of nlmin or nlminb
nll2_function(theta) {
	nll(theta[1], theta[2], Y)
}

Two S-Plus functions
====================
ans_nlmin(nll2, c(1,1))                       
   # converge to loc=1.2633491 sc=0.3029818
tmp_attr(fn2(ans$x[1], ans$x[2]), "hessian")  
   # retrieve the hessian matrices
hess_apply(tmp, c(2,3), sum)                  
   # sum hessian matrices over samples
# hessian matrix  [ 66.723075  1.902313
#                    1.902313 42.211792 ]

ans_nlminb(c(1,1), nll2)                                        
  # converge to loc=1.2633491 sc=0.3029818
tmp_attr(fn2(ans$parameters[1], ans$parameters[2]), "hessian")  
  # retrieve the hessian matrices
hess_apply(tmp, c(2,3), sum)                                    
  # sum hessian matrices over samples
# hessian matrix  [ 66.723074  1.902313
#                    1.902313 42.211791]

Two R functions
================
nlm(nll2, c(1,1), hessian=T)     # converge to loc=1.2633485 sc=0.3029815
#hessian matrix  [ 66.731771  1.878349
#                   1.878349 42.172896 ]

optim(c(1,1), nll2, hessian=T)   # converge to loc=1.2632898 sc=0.3029738
#hessian matrix  [ 66.720740  1.922313
#                   1.922313 42.220467 ]

# All the above methods have very similar hessian matrices, therefore  
# very similar inverse hessian matrices.  So SE's are about the same.


######
# Q2 #
######

# (a)
######

Following results in Splus:

nll1() uses double loops for all x's and theta's, which is supposed to
be the slowest.

nll2() uses single loop on theta's and then sums over x's, which is
faster than double-loop.

nll3() uses outer() to generate a n*m matrix, operates on matrix
entries, then sums over clumns (x's).

nll3() is faster than nll1() but slower than nll2().  Test shows that
when m and n are large, both outer("-") and apply() are time
consuming.  

If you look at the code for "outer" you will see that it is all Splus
(or R) code.  Similarly, "apply" spends a great deal of time
reformatting the array before calling a fast apply function (and a
great deal more time reformatting the output before returning!).

# (b)
######

/****************************************
 * myexternc.c: My external C functions *
 ****************************************/

#include<math.h>
#define PI 3.1415926

void mynll(long *n, long *m, double *x, double *theta, double *y)
{
  int i, j;
  
  for(i = 0 ; i < *m ; i++) {  
     y[i] = -(*n)*log(2*PI)/2; 
     for(j = 0; j < *n ; j++) {
       y[i] -= (x[j]-theta[i])*(x[j]-theta[i])/2;
    }
  }
}

############## add external C program to Splus ################

In Splus
========
Linux% Splus CHAPTER myexternc.c   # this generates a makefile
Linux% Splus make                  # this build S.so
Linux% Splus -e
> dyn.open("S.so")

** If you want to change the name of S.so to your own, in this simple
   case, you can use 

Linux% gcc -c myexternc.c                       # to generate myexternc.o
Linux% Splus LIBRARY myexternc.so myexternc.o   # build myexternc.so 
                                                # instead of S.so 
Linux% Splus -e
> dyn.open("myextern.so")

In R
====
Linux% R CMD SHLIB myexternc.c     # to build myexternc.so
Linux% R
> dyn.load("myexternc.so")

############ then we can call mynll in nllr4() #################

# (c)
######

Following results in Splus:

By using nlltime(), we can get rough estimate of execution time for
each routine.  As n>500, the differences become apparent even on very
fast computers.  The order of speed is: nllr4 >> nll2 > nll3 >> nll1