Professional Documents
Culture Documents
mardia
> tr(S)
[1] 196.1
> eigen(S)
$values
[1] 167.76619 28.33381
$vectors
[,1] [,2]
[1,] 0.6929858 -0.7209512
[2,] 0.7209512 0.6929858
pottery, skulls
Warning message:
package ‘MVA’ was built under R version 3.2.5
> head_pca <- princomp(formula = ~ head1 + head2, data = head_dat, cor = FALS
E, scores = TRUE)
> head_pca
Call:
princomp(formula = ~head1 + head2, data = head_dat, cor = FALSE,
scores = TRUE)
Standard deviations:
Comp.1 Comp.2
12.690766 5.215406
Loadings:
Comp.1 Comp.2
head1 0.693 -0.721
head2 0.721 0.693
> colMeans(head_dat)
head1 head2
185.72 183.84
> plot(x=head_dat$head1,y=head_dat$head2,xlab = "First son's head length (mm)
",ylab = "Second son's head length")
> a1 <- 183.84+(0.721/0.693)*(-185.72)
> b1 <- 0.721/0.693
> b1 <- 0.721/0.693
> a2 <- 185.72+(-0.693/0.721)*(-185.72)
> b2 <- -0.693/0.721
> abline(a1,b1)
> abline(a2,b2,lty="dashed")
>
> #COONTOH 2
> heptathlon <- read.table(file="C:/data/heptathlon.txt",head=TRUE)
Error in file(file, "rt") : cannot open the connection
In addition: Warning message:
In file(file, "rt") :
cannot open file 'C:/data/heptathlon.txt': No such file or directory
> heptathlon <- read.table(file="d:/heptathlon.txt",head=TRUE)
> heptathlon
name country hurdles highjump shot run200m longjump javelin run8
00m score
1 Joyner-Kersee (USA) 12.69 1.86 15.80 22.56 7.27 45.66 128
.51 7291
2 John (GDR) 12.85 1.80 16.23 23.65 6.71 42.56 126
.12 6897
3 Behmer (GDR) 13.20 1.83 14.20 23.10 6.68 44.54 124
.20 6858
4 Sablovskaite (URS) 13.61 1.80 15.23 23.92 6.25 42.78 132
.24 6540
5 Choubenkova (URS) 13.51 1.74 14.76 23.93 6.32 47.46 127
.90 6540
6 Schulz (GDR) 13.75 1.83 13.50 24.65 6.33 42.82 125
.79 6411
7 Fleming (AUS) 13.38 1.80 12.88 23.59 6.37 40.28 132
.54 6351
8 Greiner (USA) 13.55 1.80 14.13 24.48 6.47 38.00 133
.65 6297
9 Lajbnerova (CZE) 13.63 1.83 14.28 24.86 6.11 42.20 136
.05 6252
10 Bouraga (URS) 13.25 1.77 12.62 23.59 6.28 39.06 134
.74 6252
11 Wijnsma (HOL) 13.75 1.86 13.01 25.03 6.34 37.86 131
.49 6205
12 Dimitrova (BUL) 13.24 1.80 12.88 23.59 6.37 40.28 132
.54 6171
13 Scheider (SWI) 13.85 1.86 11.58 24.87 6.05 47.50 134
.93 6137
14 Braun (FRG) 13.71 1.83 13.16 24.78 6.12 44.58 142
.82 6109
15 Ruotsalainen (FIN) 13.79 1.80 12.32 24.61 6.08 45.44 137
.06 6101
16 Yuping (CHN) 13.93 1.86 14.21 25.00 6.40 38.60 146
.67 6087
17 Hagger (GB) 13.47 1.80 12.75 25.47 6.34 35.76 138
.48 5975
18 Brown (USA) 14.07 1.83 12.69 24.83 6.13 44.34 146
.43 5972
19 Mulliner (GB) 14.39 1.71 12.68 24.92 6.10 37.76 138
.02 5746
20 Hautenauve (BEL) 14.04 1.77 11.81 25.61 5.99 35.68 133
.90 5734
21 Kytola (FIN) 14.31 1.77 11.66 25.69 5.75 39.48 133
.35 5686
22 Geremias (BRA) 14.23 1.71 12.95 25.50 5.50 39.64 144
.02 5508
23 Hui-Ing (TAI) 14.85 1.68 10.00 25.23 5.47 39.14 137
.30 5290
24 Jeong-Mi (KOR) 14.53 1.71 10.83 26.61 5.50 39.26 139
.17 5289
25 Launa (PNG) 16.42 1.50 11.78 26.16 4.88 46.38 163
.43 4566
> head(heptathlon)
name country hurdles highjump shot run200m longjump javelin run80
0m score
1 Joyner-Kersee (USA) 12.69 1.86 15.80 22.56 7.27 45.66 128.
51 7291
2 John (GDR) 12.85 1.80 16.23 23.65 6.71 42.56 126.
12 6897
3 Behmer (GDR) 13.20 1.83 14.20 23.10 6.68 44.54 124.
20 6858
4 Sablovskaite (URS) 13.61 1.80 15.23 23.92 6.25 42.78 132.
24 6540
5 Choubenkova (URS) 13.51 1.74 14.76 23.93 6.32 47.46 127.
90 6540
6 Schulz (GDR) 13.75 1.83 13.50 24.65 6.33 42.82 125.
79 6411
> heptathlon$hurdles <- with(heptathlon, max(hurdles)-hurdles)
> heptathlon$run200m <- with(heptathlon, max(run200m)-run200m)
> heptathlon$run800m <- with(heptathlon, max(run800m)-run800m)
> R <- cor(heptathlon[,c("hurdles","highjump","shot","run200m","longjump","ja
velin", "run800m")])
> round(R,2)
hurdles highjump shot run200m longjump javelin run800m
hurdles 1.00 0.81 0.65 0.77 0.91 0.01 0.78
highjump 0.81 1.00 0.44 0.49 0.78 0.00 0.59
shot 0.65 0.44 1.00 0.68 0.74 0.27 0.42
run200m 0.77 0.49 0.68 1.00 0.82 0.33 0.62
longjump 0.91 0.78 0.74 0.82 1.00 0.07 0.70
javelin 0.01 0.00 0.27 0.33 0.07 1.00 -0.02
run800m 0.78 0.59 0.42 0.62 0.70 -0.02 1.00
> new.heptathlon <- heptathlon[-25,]
> R <- cor(new.heptathlon[,c("hurdles","highjump","shot","run200m","longjump"
,"javelin", "run800m")])
> round(R,2)
hurdles highjump shot run200m longjump javelin run800m
hurdles 1.00 0.58 0.77 0.83 0.89 0.33 0.56
highjump 0.58 1.00 0.46 0.39 0.66 0.35 0.15
shot 0.77 0.46 1.00 0.67 0.78 0.34 0.41
run200m 0.83 0.39 0.67 1.00 0.81 0.47 0.57
longjump 0.89 0.66 0.78 0.81 1.00 0.29 0.52
javelin 0.33 0.35 0.34 0.47 0.29 1.00 0.26
run800m 0.56 0.15 0.41 0.57 0.52 0.26 1.00
> library(MVA)
> new.heptathlon_pca <- prcomp(formula = ~ hurdles+highjump+shot+run200m+long
jump+javelin+run800m, data =new.heptathlon, scale = TRUE)
> new.heptathlon_pca
Standard deviations:
[1] 2.0793370 0.9481532 0.9109016 0.6831967 0.5461888 0.3374549 0.2620420
Rotation:
PC1 PC2 PC3 PC4 PC5 PC
6 PC7
hurdles -0.4503876 0.05772161 -0.1739345 0.04840598 -0.19889364 0.8466508
6 -0.06961672
highjump -0.3145115 -0.65133162 -0.2088272 -0.55694554 0.07076358 -0.0900754
4 0.33155910
shot -0.4024884 -0.02202088 -0.1534709 0.54826705 0.67166466 -0.0988635
9 0.22904298
run200m -0.4270860 0.18502783 0.1301287 0.23095946 -0.61781764 -0.3327935
9 0.46971934
longjump -0.4509639 -0.02492486 -0.2697589 -0.01468275 -0.12151793 -0.3829441
1 -0.74940781
javelin -0.2423079 -0.32572229 0.8806995 0.06024757 0.07874396 0.0719343
7 -0.21108138
run800m -0.3029068 0.65650503 0.1930020 -0.57418128 0.31880178 -0.0521766
4 0.07718616
> summary(new.heptathlon_pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 2.0793 0.9482 0.9109 0.68320 0.54619 0.33745 0.26204
Proportion of Variance 0.6177 0.1284 0.1185 0.06668 0.04262 0.01627 0.00981
Cumulative Proportion 0.6177 0.7461 0.8646 0.93131 0.97392 0.99019 1.00000
> a1 <- new.heptathlon_pca$rotation[,1]
> a1
hurdles highjump shot run200m longjump javelin run800m
-0.4503876 -0.3145115 -0.4024884 -0.4270860 -0.4509639 -0.2423079 -0.3029068
> plot(new.heptathlon_pca)
> install.packages("princomp")
Installing package into ‘C:/Users/Lenovoku/Documents/R/win-library/3.2’
(as ‘lib’ is unspecified)
Warning in install.packages :
package ‘princomp’ is not available (for R version 3.2.4 Revised)
> install.packages("Princomp")
Installing package into ‘C:/Users/Lenovoku/Documents/R/win-library/3.2’
(as ‘lib’ is unspecified)
Warning in install.packages :
package ‘Princomp’ is not available (for R version 3.2.4 Revised)
> library(princomp)
Error in library(princomp) : there is no package called ‘princomp’
> center <- new.heptathlon_pca$center
> scale <- new.heptathlon_pca$scale
> hm <- as.matrix(new.heptathlon[,c("hurdles","highjump","shot", "run200m","
longjump","javelin", "run800m")])
> drop(scale(hm, center = center, scale = scale) %*% new.heptathlon_pca$rotat
ion[,1])
1 2 3 4 5 6
7
-4.757530189 -3.147943402 -2.926184760 -1.288135516 -1.503450994 -0.958467101
-0.953445060
8 9 10 11 12 13
14
-0.633239267 -0.381571974 -0.522322004 -0.217701500 -1.075984276 0.003014986
0.109183759
15 16 17 18 19 20
21
0.208868056 0.232507119 0.659520046 0.756854602 1.880932819 1.828170404
2.118203163
22 23 24
2.770706272 3.901166920 3.896847898
> predict(new.heptathlon_pca)[,1]
1 2 3 4 5 6
7
-4.757530189 -3.147943402 -2.926184760 -1.288135516 -1.503450994 -0.958467101
-0.953445060
8 9 10 11 12 13
14
-0.633239267 -0.381571974 -0.522322004 -0.217701500 -1.075984276 0.003014986
0.109183759
15 16 17 18 19 20
21
0.208868056 0.232507119 0.659520046 0.756854602 1.880932819 1.828170404
2.118203163
22 23 24
2.770706272 3.901166920 3.896847898
> cor(new.heptathlon$score, new.heptathlon_pca$x[,1])
[1] -0.9931168
> plot(new.heptathlon$score,new.heptathlon_pca$x[,1],xlab="Heptathlon score",
ylab="PC 1 score")
> biplot(new.heptathlon_pca,col = c("magenta","black"),cex=0.75,xlim=c(-0.5,0
.6))
>
> #CONTOH 3
>
> USairpollution <- read.csv(file="D:/USairpollution.csv")
> USairpollution
City SO2 temp manu popul wind precip predays
1 Albany 46 47.6 44 116 8.8 33.36 135
2 Albuquerque 11 56.8 46 244 8.9 7.77 58
3 Atlanta 24 61.5 368 497 9.1 48.34 115
4 Baltimore 47 55.0 625 905 9.6 41.31 111
5 Buffalo 11 47.1 391 463 12.4 36.11 166
6 Charleston 31 55.2 35 71 6.5 40.75 148
7 Chicago 110 50.6 3344 3369 10.4 34.44 122
8 Cincinnati 23 54.0 462 453 7.1 39.04 132
9 Cleveland 65 49.7 1007 751 10.9 34.99 155
10 Columbus 26 51.5 266 540 8.6 37.01 134
11 Dallas 9 66.2 641 844 10.9 35.94 78
12 Denver 17 51.9 454 515 9.0 12.95 86
13 Des Moines 17 49.0 104 201 11.2 30.85 103
14 Detroit 35 49.9 1064 1513 10.1 30.96 129
15 Hartford 56 49.1 412 158 9.0 43.37 127
16 Houston 10 68.9 721 1233 10.8 48.19 103
17 Indianapolis 28 52.3 361 746 9.7 38.74 121
18 Jacksonville 14 68.4 136 529 8.8 54.47 116
19 Kansas City 14 54.5 381 507 10.0 37.00 99
20 Little Rock 13 61.0 91 132 8.2 48.52 100
21 Louisville 30 55.6 291 593 8.3 43.11 123
22 Memphis 10 61.6 337 624 9.2 49.10 105
23 Miami 10 75.5 207 335 9.0 59.80 128
24 Milwaukee 16 45.7 569 717 11.8 29.07 123
25 Minneapolis 29 43.5 699 744 10.6 25.94 137
26 Nashville 18 59.4 275 448 7.9 46.00 119
27 New Orleans 9 68.3 204 361 8.4 56.77 113
28 Norfolk 31 59.3 96 308 10.6 44.68 116
29 Omaha 14 51.5 181 347 10.9 30.18 98
30 Philadelphia 69 54.6 1692 1950 9.6 39.93 115
31 Phoenix 10 70.3 213 582 6.0 7.05 36
32 Pittsburgh 61 50.4 347 520 9.4 36.22 147
33 Providence 94 50.0 343 179 10.6 42.75 125
34 Richmond 26 57.8 197 299 7.6 42.59 115
35 Salt Lake City 28 51.0 137 176 8.7 15.17 89
36 San Francisco 12 56.7 453 716 8.7 20.66 67
37 Seattle 29 51.1 379 531 9.4 38.79 164
38 St. Louis 56 55.9 775 622 9.5 35.89 105
39 Washington 29 57.3 434 757 9.3 38.89 111
40 Wichita 8 56.6 125 277 12.7 30.58 82
41 Wilmington 36 54.0 80 80 9.0 40.25 114
> USairpollution$negtemp <- USairpollution$temp * (-1)
> head(USairpollution)
City SO2 temp manu popul wind precip predays negtemp
1 Albany 46 47.6 44 116 8.8 33.36 135 -47.6
2 Albuquerque 11 56.8 46 244 8.9 7.77 58 -56.8
3 Atlanta 24 61.5 368 497 9.1 48.34 115 -61.5
4 Baltimore 47 55.0 625 905 9.6 41.31 111 -55.0
5 Buffalo 11 47.1 391 463 12.4 36.11 166 -47.1
6 Charleston 31 55.2 35 71 6.5 40.75 148 -55.2
> round(cor(USairpollution[,-c(1:3)]),3)
manu popul wind precip predays negtemp
manu 1.000 0.955 0.238 -0.032 0.132 0.190
popul 0.955 1.000 0.213 -0.026 0.042 0.063
wind 0.238 0.213 1.000 -0.013 0.164 0.350
precip -0.032 -0.026 -0.013 1.000 0.496 -0.386
predays 0.132 0.042 0.164 0.496 1.000 0.430
negtemp 0.190 0.063 0.350 -0.386 0.430 1.000
> usair_pca <- princomp(USairpollution[,-c(1:3)], cor = TRUE)
> summary(usair_pca, loadings = TRUE)
Importance of components:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
Comp.6
Standard deviation 1.4819456 1.2247218 1.1809526 0.8719099 0.33848287 0.1
85599752
Proportion of Variance 0.3660271 0.2499906 0.2324415 0.1267045 0.01909511 0.0
05741211
Cumulative Proportion 0.3660271 0.6160177 0.8484592 0.9751637 0.99425879 1.0
00000000
Loadings:
Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
manu -0.612 0.168 -0.273 -0.137 0.102 0.703
popul -0.578 0.222 -0.350 -0.695
wind -0.354 -0.131 0.297 0.869 -0.113
precip -0.623 -0.505 0.171 0.568
predays -0.238 -0.708 -0.311 -0.580
negtemp -0.330 -0.128 0.672 -0.306 0.558 -0.136
> usair_reg <- lm(SO2 ~ usair_pca$scores,data = USairpollution)
> summary(usair_reg)
Call:
lm(formula = SO2 ~ usair_pca$scores, data = USairpollution)
Residuals:
Min 1Q Median 3Q Max
-23.004 -8.542 -0.991 5.758 48.758
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 30.049 2.286 13.146 6.91e-15 ***
usair_pca$scoresComp.1 -9.942 1.542 -6.446 2.28e-07 ***
usair_pca$scoresComp.2 -2.240 1.866 -1.200 0.23845
usair_pca$scoresComp.3 -0.375 1.935 -0.194 0.84752
usair_pca$scoresComp.4 -8.549 2.622 -3.261 0.00253 **
usair_pca$scoresComp.5 15.176 6.753 2.247 0.03122 *
usair_pca$scoresComp.6 39.271 12.316 3.189 0.00306 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1