Professional Documents
Culture Documents
Group 4
January 9, 2018
library("party")
##
## Attaching package: 'zoo'
library(caret)
library(pROC)
##
## Attaching package: 'pROC'
## randomForest 4.6-12
##
## Attaching package: 'randomForest'
bank<- read.csv("bank-additional-full.csv")
bank$y <- as.factor(bank$y)
random <- sample(1:nrow(bank), size=(0.5*nrow(bank)), replace=FALSE)
bank1 <- bank[random,]
strain <- bank1[c(1:672,1000:14567),]
stest <- bank1[c(673:999,14568:16475),]
## no yes MeanDecreaseAccuracy
## age 1.627090e-04 0.0006525434 1.527186e-04
## job 1.457192e-04 0.0007457360 1.463129e-04
## marital 7.628708e-05 0.0004135245 8.018263e-05
## education 1.307121e-04 0.0006393608 1.362917e-04
## default 5.824307e-05 0.0003637880 5.356985e-05
## housing 6.076103e-05 0.0003546303 6.691713e-05
## loan 4.676190e-05 0.0002534952 5.014895e-05
## contact 4.954949e-04 0.0006227242 4.395141e-04
## month 1.871339e-03 0.0010763271 1.617304e-03
## day_of_week 1.562253e-04 0.0006181445 1.548155e-04
## duration 2.033313e-04 0.0012763713 2.511003e-04
## campaign 8.408737e-05 0.0004707685 9.046708e-05
## pdays 1.647175e-04 0.0011392946 1.936220e-04
## previous 1.922123e-04 0.0005925649 1.759257e-04
## poutcome 1.588325e-04 0.0011293179 1.912898e-04
## emp.var.rate 2.235286e-03 0.0022622801 1.990297e-03
## cons.price.idx 1.696626e-03 0.0009206261 1.491864e-03
## cons.conf.idx 1.249113e-03 0.0009723164 1.072625e-03
## euribor3m 1.891732e-03 0.0022420016 1.629565e-03
## nr.employed 1.516485e-03 0.0023723058 1.366323e-03
summary(fit3)
plot(fit3)
## [1] 0.9181208
recall_bank<-CM_bank[1,1]/(CM_bank[1,1]+CM_bank[1,2])
recall_bank
## [1] 0.9678392
F1_bank<-(2*presisi_bank*recall_bank)/(presisi_bank+recall_bank)
F1_bank
## [1] 0.9546468
library(pROC)
des<-as.numeric(prediksi_fit2)
plot(roc(stest$y,des))
roc(stest$y,des)
##
## Call:
## roc.default(response = stest$y, predictor = des)
##
## Data: des in 1990 controls (stest$y no) < 245 cases (stest$y yes).
## Area under the curve: 0.7979
3.Decision Tree
library("party")
library(caret)
library(ggplot2)
fit1 <- ctree(y ~.,data=strain)
print(fit1)
##
## Conditional inference tree with 34 terminal nodes
##
## Response: y
## Inputs: age, job, marital, education, default, housing, loan, contact, mo
nth, day_of_week, duration, campaign, pdays, previous, poutcome, emp.var.rate
, cons.price.idx, cons.conf.idx, euribor3m, nr.employed
## Number of observations: 14240
##
## 1) duration <= 505; criterion = 1, statistic = 2470.718
## 2) poutcome == {success}; criterion = 1, statistic = 2274.884
## 3) nr.employed <= 5076.2; criterion = 1, statistic = 50.438
## 4) duration <= 167; criterion = 1, statistic = 25.793
## 5)* weights = 80
## 4) duration > 167
## 6)* weights = 253
## 3) nr.employed > 5076.2
## 7) duration <= 156; criterion = 0.999, statistic = 20.702
## 8)* weights = 25
## 7) duration > 156
## 9) cons.conf.idx <= -47.1; criterion = 0.971, statistic = 12.569
## 10)* weights = 19
## 9) cons.conf.idx > -47.1
## 11)* weights = 34
## 2) poutcome == {failure, nonexistent}
## 12) nr.employed <= 5076.2; criterion = 1, statistic = 1366.082
## 13) duration <= 172; criterion = 1, statistic = 195.165
## 14) duration <= 77; criterion = 1, statistic = 20.23
## 15)* weights = 131
## 14) duration > 77
## 16)* weights = 414
## 13) duration > 172
## 17) duration <= 264; criterion = 0.999, statistic = 16.192
## 18)* weights = 281
## 17) duration > 264
## 19)* weights = 301
## 12) nr.employed > 5076.2
## 20) month == {apr, dec, mar, oct}; criterion = 1, statistic = 1327.1
31
## 21) month == {mar, oct}; criterion = 1, statistic = 64.885
## 22) duration <= 166; criterion = 1, statistic = 33.264
## 23)* weights = 71
## 22) duration > 166
## 24)* weights = 45
## 21) month == {apr, dec}
## 25) day_of_week == {fri, mon}; criterion = 1, statistic = 42.993
## 26) age <= 27; criterion = 0.996, statistic = 25.188
## 27)* weights = 32
## 26) age > 27
## 28)* weights = 326
## 25) day_of_week == {thu, tue, wed}
## 29) duration <= 277; criterion = 1, statistic = 29.294
## 30)* weights = 239
## 29) duration > 277
## 31)* weights = 103
## 20) month == {aug, jul, jun, may, nov}
## 32) duration <= 393; criterion = 1, statistic = 281.864
## 33) emp.var.rate <= -0.1; criterion = 1, statistic = 79.272
## 34) duration <= 282; criterion = 1, statistic = 45.444
## 35) contact == {telephone}; criterion = 0.998, statistic = 1
8.177
## 36) euribor3m <= 4.191; criterion = 0.985, statistic = 23.
877
## 37) job == {admin., blue-collar, entrepreneur, managemen
t, services, student, technician, unemployed}; criterion = 1, statistic = 108
## 38)* weights = 207
## 37) job == {housemaid, retired, self-employed}
## 39)* weights = 11
## 36) euribor3m > 4.191
## 40)* weights = 18
## 35) contact == {cellular}
## 41) duration <= 130; criterion = 0.984, statistic = 14.293
## 42)* weights = 1068
## 41) duration > 130
## 43)* weights = 996
## 34) duration > 282
## 44)* weights = 388
## 33) emp.var.rate > -0.1
## 45) duration <= 355; criterion = 1, statistic = 31.705
## 46)* weights = 6566
## 45) duration > 355
## 47)* weights = 256
## 32) duration > 393
## 48) cons.price.idx <= 93.444; criterion = 1, statistic = 19.96
## 49)* weights = 272
## 48) cons.price.idx > 93.444
## 50)* weights = 401
## 1) duration > 505
## 51) duration <= 836; criterion = 1, statistic = 80.449
## 52) nr.employed <= 5076.2; criterion = 1, statistic = 74.224
## 53) poutcome == {success}; criterion = 0.996, statistic = 16.856
## 54)* weights = 45
## 53) poutcome == {failure, nonexistent}
## 55)* weights = 112
## 52) nr.employed > 5076.2
## 56) duration <= 647; criterion = 1, statistic = 21.914
## 57) cons.price.idx <= 92.893; criterion = 1, statistic = 19.682
## 58)* weights = 103
## 57) cons.price.idx > 92.893
## 59) month == {aug, oct}; criterion = 0.981, statistic = 22.597
## 60)* weights = 64
## 59) month == {apr, jul, jun, may, nov}
## 61)* weights = 424
## 56) duration > 647
## 62) contact == {telephone}; criterion = 0.987, statistic = 11.674
## 63)* weights = 145
## 62) contact == {cellular}
## 64)* weights = 275
## 51) duration > 836
## 65) education == {basic.4y, basic.9y, university.degree}; criterion =
0.974, statistic = 21.818
## 66)* weights = 279
## 65) education == {basic.6y, high.school, professional.course, unknown}
## 67)* weights = 256
summary(fit1)
plot(fit1)
prediksi_fit1 <- predict(fit1,stest)
stest$prediksi <- unlist(prediksi_fit1)
confusionMatrix(stest$y, prediksi_fit1)
library(pROC)
des<-as.numeric(prediksi_fit1)
plot(roc(stest$y,des))
roc(stest$y,des)
##
## Call:
## roc.default(response = stest$y, predictor = des)
##
## Data: des in 1990 controls (stest$y no) < 245 cases (stest$y yes).
## Area under the curve: 0.7375