You are on page 1of 3

Page 1

#
# EDA and Multiple Regression
#
# IGSS, Wuhan, Summary, 2015
#
library(maptools)
library(spdep)
library(pastecs)
library(e1071)
library(car)
#
# Set the working directory to your lab folder where "mapping.R" is located.
#
setwd("C:\\Users\\bin li\\Documents\\wuhan 2015\\labs")
source("mapping.R")
#
# Read the shapefile and map out CRIME
#
col_file <- system.file("etc/shapes/columbus.shp", package = "spdep")
col_shp <- readShapePoly(col_file)
#
# Convert the attribute table to a data frame and
# store it in memory for convenience
#
col_tab <- data.frame(col_shp)
attach(col_tab)
#
names(col_tab)
mapping.seq(col_shp,CRIME,5,main="Crime")
#
# Summary statistics
#
stat.desc(CRIME)
fivenum(CRIME)
#
# Summary graphs
#
# Histogram
#
hist(CRIME)
kurtosis(CRIME)
skewness(CRIME)
#
# Density estimates. Try different bandwidth "bw"
#
plot(density(CRIME))
lines(density(CRIME,bw=10),lty=2)
#
# Stem-Leafs
#
stem(CRIME)
#
# Boxplot
#
boxplot(CRIME)
#
# Quantile-Quantile plot to check normality
#
qqnorm(CRIME,main="Normal Q-Q Plot",xlab="Theoretical Quantiles",ylab = "Sample Quantiles")
qqline(CRIME,distribution=qnorm,col="red")
#
# Exploring linear relations
#
# Correlation matrix
#
cor(cbind(CRIME,HOVAL,INC,OPEN,PLUMB,DISCBD))
#
# Scatter plot matrix. Look for monotonic relations (red line)
#
pairs(cbind(CRIME,HOVAL,INC,OPEN,PLUMB,DISCBD),pch=1,cex=0.5)
scatterplotMatrix(~CRIME+HOVAL+INC+OPEN+PLUMB+DISCBD)
#
# Scatter plot, regression line, and LOWESS line
#

Page 2
plot(INC,CRIME)
abline(lm(CRIME ~ INC),col="red")
lines(lowess(CRIME ~ INC),col="blue")
#
# Spatial autocorrelation: Moran Scatterplot
#
# Create neighbor list from polygon shapefile
#
col_nb <- poly2nb(col_shp,queen=TRUE)
#
# Convert neighbor list to C and W matrix
#
col.matw <- nb2listw(col_nb,style="W")
col.matc <- nb2listw(col_nb,style="B")
#
# Moran scatter plot and MC
#
moran.plot(CRIME,col.matw,pch=20)
moran.test(CRIME,col.matw)
#
# Local MC to show clusters
#
col_localMC <- localmoran(CRIME,col.matc)
mapping.seq(col_shp,col_localMC,5,main="Local Moran")
mapping.seq(col_shp,col_localMC[,5],5,main="Local Moran P values")
#
# ---------------------# Regression diagnosis
#
# Assessing leverage: hatvalues
#
crime.1 <- lm(CRIME ~ INC+HOVAL+DISCBD)
crime_hat <- (hatvalues(crime.1))
plot(crime_hat)
abline(h=c(2,3)*4/49,lty=2) # highlight points 2-3 times above mean; n=49
identify(1:49,crime_hat) # click points on graph to see ID
#
# back to R console and hit the stop button to exit the interactive mode
#
# CRIME observations with hatvalues 2 sd above the mean
#
which(crime_hat >= (mean(crime_hat)+2*sd(crime_hat)))
#
# Cook's distance
#
crime_cookd <- cookd(crime.1)
plot(crime_cookd)
abline(h=4/49,lty=2) # Cutoff display
identify(1:49,crime_cookd)
which(crime_cookd >= 4/49)
#
# Normality, homoscedasticity, residuals
#
crime_res <- residuals(crime.1)
plot(density(crime_res))
lines(density(crime_res,bw=9),lty=2)
qqnorm(crime_res,main="Normal Q-Q Plot",xlab="Theoretical Quantiles",ylab = "Sample Quantiles")
qqline(crime_res,distribution=qnorm,col="red")
plot(crime_res)
lines(crime_res)
lines(density(crime_res,bw=12),lty=2)
#
# Spatial autocorrelation, residuals
#
lm.morantest(crime.1,col.matw)
#
# --------------------------------------#
# Refit model with outliers as indicator variables
#
#---------------------#
# run once! create a new column "itor"
col_tab$itor <- 0

Page 3
col_tab$itor[7] <- 1
col_tab$itor[10] <- 1
col_tab$itor[20] <- 1
#
crime.2 <- lm(CRIME ~ INC+HOVAL+DISCBD+itor,data=col_tab)
#
crime.3 <- lm(CRIME ~ INC+DISCBD+itor,data=col_tab)
#
# Is the model improved? Also re-run the diagnoses to compare with
# previous models.
#
#--------------------------------------------# Run the SAR model to take care of spatial autocorrelation
#
crime3_sar <- errorsarlm(CRIME ~ INC+DISCBD+itor,data=col_tab,listw=col.matw)
summary(crime3_sar)
sar_moran3.res <- round(residuals(crime3_sar,type="response"))
moran.test(sar_moran3.res,col.matc)
#
# When col_tab is no longer needed in memory.
#
detach(col_tab)

You might also like