You are on page 1of 3

#

# Sparrow data analysis


# Sullivan
#
#
# Read in data
#
#
sparrow = read.table("sparrow.txt",header=T)
#
#
# Plot the data to see what it looks like
# (two methods)
#
plot(wing.length~age.days,data=sparrow)
#
plot(sparrow$age.days,sparrow$wing.length)
#
#
# Conduct a linear regression of sparrow wing.length against age.days
# (two methods)
#
# Standard linear model with Gaussian distribution assumption
#
sparrow.lm = lm(wing.length~age.days,data=sparrow)
#
#
# Generalized linear model (can be used with other distributions, later in the s
emester)
#
sparrow.glm = glm(wing.length~age.days,data=sparrow)
#
#
# Compare output summaries
#
summary(sparrow.lm)
summary(sparrow.glm)
#
# Is the slope signficantly different from zero?
# Check to see t-value is above critical value.
# Also, check that p-value is smaller than 0.05 say
#
# Another summary of the data is through an ANOVA type table
#
anova(sparrow.lm)
#
# The MSE can be found in this table as the Mean Sq of the Residuals
#
# or it can be computed directly as
#
sum(resid(sparrow.lm)^2)/11
#
# 0.04770085
#
# or as
#
beta0 = coef(sparrow.lm)[1]
beta1 = coef(sparrow.lm)[2]
#
MSE = sum((sparrow$wing.length - (beta0 + beta1 * sparrow$age.days))^2)/(length(
sparrow$age.days) - 2)
#
#
#
# Test for Gaussian distribution assumptions using
# normal probability plots also known as quantile plots
#
qqnorm(resid(sparrow.lm))
qqline(resid(sparrow.lm))
#
#
#
# Make predictions of the mean with standard error
# at prescribed data points
#
sparrow.newpoints = data.frame(age.days=seq(3,17))
#
sparrow.predict = predict(sparrow.lm,se.fit=T,newdata=sparrow.newpoints)
#
#
# Plot just the predictions first as points
#
plot(sparrow.newpoints$age.days, sparrow.predict$fit)
#
#
# Now plot as line with 95% confidence envelope included
#
# First set up the plot, by plotting without including points
#
plot(sparrow.newpoints$age.days, sparrow.predict$fit,type="n",
xlab="Age (days)",ylab="Predicted Wing Length (inches)")
#
# then include a line based on the predicted points, lwd determines line width
#
lines(sparrow.newpoints$age.days, sparrow.predict$fit,lwd=3)
#
# create what is needed to make the envelope, mean +/- 2 se
#
sparrow.upper = sparrow.predict$fit+2*sparrow.predict$se.fit
sparrow.lower = sparrow.predict$fit-2*sparrow.predict$se.fit
#
# plot the two envelope lines in a different color
#
lines(sparrow.newpoints$age.days, sparrow.upper, col=8, lwd=3)
lines(sparrow.newpoints$age.days, sparrow.lower, col=8, lwd=3)
#
# add a title
#
title("Predicted Wing Length")
#
#
#
# Now some useful commands for Homework 1 (See also Lec02 and Chp 02):
#
#
# To make a prediction for one point,
# for example what is the wing length for a bird at age 12
#
sparrow.predict.12 = predict(sparrow.lm,newdata=data.frame(age.days=12),se.fit=T
)
#
#
# For the confidence interval for a new observation:
#
# You get the estimate from the prediction above and
# calculate the standard error for the new observation as follows:
#
s2.yh.new.sparrow = MSE + sparrow.predict.12$se.fit^2
#
# standard error of new prediction
#
s.yh.new.sparrow = sqrt(s2.yh.new.sparrow)
#
# Confidence interval for wing length of new sparrow of age 12
#
sparrow.predict.12$fit + c(-2,2) * s.yh.new.sparrow
#
#
#
# Obtain a 99 percent confidence interval for the slope:
#
#
sparrow.sample.size = length(sparrow$wing.length)
#
# Notice that we usually use plus or minus 2 standard errors, which corresponds
to the following t critical value:
#
qt(.975,sparrow.sample.size-2)
#
# Here however, we are looking for alpha = 0.01, or 1-alpha/2 = 0.995, so
#
qt(.995,sparrow.sample.size-2)
#
# The confidence interval can be found using beta1 +/- t(1-alpha/2) SE, so
# Note that the estimate 0.2702 and se 0.0135 come from summary(sparrow.lm)
#
0.2702 + c(-1, 1) * qt(0.995, sparrow.sample.size-2) * 0.0135
#
#
#

You might also like