%% Simulates Galton father/son data (see Friedman 78).
%% First set some graphical parameters
scatterPointSize = 6;
conditionalMeanMarkerSize = 10;
% Set bivariate normal parameters (from Freedman)
N = 100;
xMu = 68; yMu = 69;
xSD = 2.7; ySD = 2.7;
rho = 0.5;
covariance = rho * xSD * ySD;
% Set up arrays for function inputs.
mu = [xMu, yMu]; sigma = [xSD^2, covariance; covariance, ySD^2];
% Draw a random sample
Nsample = 1078;
sample = mvnrnd(mu, sigma, Nsample);
% Fit a linear model.
lmgalton = polyfit(sample(:, 1), sample(:, 2), 1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%% First create the level sets plot %%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set levels for the level set plot.
levels = [0.8, 0.9, 0.95, 0.99];
inverseSigma = sigma^-1;
cband_y = sqrt(chi2inv(levels, 2)/inverseSigma(1,1));
confidenceBandPoints = [68+cband_y; 69+zeros(1, 4)]';
confidenceBandValues = mvnpdf(confidenceBandPoints, mu, sigma);
xgrid = (xMu-(3.5*xSD)):(7*xSD/N):(xMu+(3.5*xSD));
ygrid = (yMu-(3.5*ySD)):(7*ySD/N):(yMu+(3.5*ySD));
[x, y] = meshgrid(xgrid, ygrid);
pdfValues = mvnpdf([x(:), y(:)], mu, sigma);
pdfValues = reshape(pdfValues, size(x));
figure
set(gcf, 'Position', [200, 200, 1700, 600])
subplot(1, 2, 1)
[C,h] = contour(xgrid, ygrid, pdfValues, ...
'LevelList', confidenceBandValues, 'LineWidth', 2);
% Add in text labels for level sets.
v = {'80%', '90%', '95%', '99%'};
textHandles = clabel(C, h, 'FontSize', 15, 'LabelSpacing', 2000);
m = length(textHandles);
for i = 1:m
set(textHandles(i), 'String', v(i));
end
set(h,'ShowText','off')
colormap gray(1)
set(gca, 'FontSize', 20, 'xlim', [58, 78], 'ylim', [59, 79], 'TickDir', 'out')
xlabel('Father''s Height (inches)', 'FontSize', 20)
ylabel('Son''s Height (inches)', 'FontSize', 20)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%% Create the scatter plot %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
subplot(1, 2, 2)
plot(X(:, 1), X(:, 2), '.', ...
'MarkerSize', scatterPointSize, 'MarkerEdgeColor', [0.6, 0.6, 0.6])
set(gca, 'FontSize', 20, 'xlim', [58, 78], 'ylim', [59, 79], 'TickDir', 'out')
xlabel('Father''s Height (inches)', 'FontSize', 20)
ylabel('Son''s Height (inches)', 'FontSize', 20)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%% Add lines to both %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% In the next section, we choose two points, at father's heights
% of 64 and 72 inches, and plot the mean of the observations
% falling within a small window of those points. The goal is to
% show that the conditional means are close to the regression
% line.
% Set the points
point1x = 64;
point2x = 72;
% Create small windows.
epsilon = 0.5;
window1 = point1x + epsilon * [-1, 1];
window2 = point2x + epsilon * [-1, 1];
% Find Xs close to the point's xs.
inWindow1 = abs( sample(:,1) - point1x ) < epsilon;
inWindow2 = abs( sample(:,1) - point2x ) < epsilon;
% Find the mean Ys for Xs close to point's xs.
point1y = mean( sample( inWindow1, 2 ) );
point2y = mean( sample( inWindow2, 2 ) );
% Now go through and add the lines and points.
for i = 1:2
subplot(1,2,i)
set(gca,'OuterPosition',get(gca,'OuterPosition')+[0, 0+0.05, 0, 0])
hold on;
% Draw the vertical lines creating boundaries around the small
% windows.
% line([x1, x2], [y1, y2]) plots a line from x1,y1 to x2,y2, so
% line([x, x], [yLow, yHigh]) plots a vertical line at x.
lineLocations = [window1, window2];
yLimits = [50, 90];
for j = 1:4
line([lineLocations(j), lineLocations(j)], yLimits, ...
'Color', 'k', 'LineStyle', '--')
end
% The theoretical line has slope 1 and intercept 1 (means are 69
% and 68, so sons are, on average, 1 inch taller than fathers).
theoreticalLine = refline(1, 1);
regressionLine = refline(lmgalton(1, 1), lmgalton(1, 2));
set(theoreticalLine, 'Color', [0, 0, 0], 'LineStyle', '-.')
set(regressionLine, 'Color', [0, 0, 0])
% Place xs ('xr' symbols) at the conditional means.
plot([point1x, point2x], [point1y, point2y], 'xr', ...
'LineWidth', 2, 'MarkerSize', conditionalMeanMarkerSize)
end