You are on page 1of 9

install.

packages(tm)

install.packages(wordcloud)

install.packages(RWeka)

install.packages(SnowballC)

install.packages(caret)

install.packages(rminer)

install.packages(kernlab)

install.packages(rpart)

library(tm)

library(wordcloud)

library(RWeka)

library(SnowballC)

library(caret)

library(rminer)

library(kernlab)

library(rpart)

setwd()

setwd()

book_reviews=read.csv(BOOK1.csv,header=T,stringFactors=F)

str(book_reviews)

book_reviews$Label<-factor(book_reviews$Label)

summary(book_reviews)

nrow(book_reviews)

prop.table(table(book_reviews$Label))

set.seed(100)

inTrain<-createDataPartition(y= book_reviews$Label,p=0.50,list=FALSE)

train_m<- book_reviews[inTrain]

testdata<- book_reviews[-inTrain,]

inTest<-createDataPartition(y= book_reviews$Label,p=0.50,list=FALSE)
test1_m<-testdata[inTest,]

test2_m<-testdata[-inTest,]

nrow(train_m)

summary(train_m)

nrow(test1_m)

summary(test1_m)

nrow(test2_m)

summary(test2_m)

prop.table(table(train_m$Label))

prop.table(table(test1_m$Label))

prop.table(table(test2_m$Label))

train_corpus_m<-Corpus(VectorSource(train_m$review))

length(train_corpus_m)

? Corpus

?VectorSource

train_corpus_m<-Corpus(DataframeSource(as.matrix(train_m$review)))

?DataframeSource

length(train_corpus_m)

train_m$review[1]

Step1<-tm_map(train_corpus_m,tolower)

Step1[1]

Step[[1]]

Step1[2:3]

insepct(Step1[2:3])

insepct(head(Step1,3))

t<-tolower(TEXT)

Step2<-tm_map(Step1,removeNumbers)

Step2[[1]]
insepct(Step2[1:2])

insepct(head(Step2,3))

Step3<-tm_map(Step2,removeWords,stopwords(english))

Step3<-tm_map(Step2,removeWords,stopwords())

Step3[[1]]

Step3<-tm_map(Step3,removeWords,two)

Step3a[[1]]

Step3b<-tm_map(Step3,removeWords,c(two,movie,film,films))

Step3b[[1]]

mystopwords<-c(stopwords(english),one,two,three,make,get, movie, movies,film,films)

Step3c<-tm_map(Step3,removeWords, mystopwords)

Step3c[[1]]

Step4<-tm_map(Step3c,removePunctuation)

Step4[[1]]

Step5<-tm_map(Step4,stripWhitespace)

Step5[[1]]

Texts<-(I am member of the XYZ association,apply for our open associate position,xyz memorial
lecture takes place on wednesday,vote for the most popular lecturer)

corpus<-Corpus(DataframeSource(data.frame(texts))

corpus.temp<-tm_map(corpus,stemDocument,language=english)

corpus.temp[[2]][1]

Step6<-tm_map(Step5,stemDocument,language=english)

Step6[[1]]

train_dtm_m=DocumentTermMatrix(Step6)

dim(train_dtm_m)
train_dtm_m=DocumentTermMatrix(train_corpus_m,control=list(removeNumbers=T,removePunctuatio
n=T,stringWhitespace=T,tolower=T,stopwords=T,stemming=T))

dim(train_dtm_m)

train_rmspa_m=removeSparseTerms(train_dtm_m,0.80)

? removeSparseTerms

dim(train_rmspa_m)

mean_train=sort(colMeans(as.matrix(train_rmsapa_m)),decreasing=T)

mean_train[1:20]

average_top20=mean(mean_train[1:20])

average_top20

barplot(mean_train[1:20],border=NA,las=3,xlab=top 20 words,ylab=Frequency,ylim=c(0,3))

mystopwords<-c(one,two,three,make,get, movie, movies,film,films)

train_corpus_m2<-tm_map(train_corpus_m, ,removeWords, mystopwords)

train_dtm_m2=DocumentTermMatrix(train_corpus_m2,control=list(removeNumbers=T,removePunctua
tion=T,stringWhitespace=T,tolower=T,stopwords=T,stemming=T))

dim(train_dtm_m2)

train_rmspa_m2= removeSparseTerms(train_dtm_m2,0.80)

dim(train_rmspa_m2)
mean_train_m2=sort(colMeans(as.matrix(train_rmsapa_m2)),decreasing=T)

mean_train_m2

average_top20_m2=mean(mean_train_m2[1:20])

average_top20_m2

barplot(mean_train_m2[1:20],border=NA,las=3,xlab=top 20 words,ylab=Frequency,ylim=c(0,3))

wordcloud(names(mean_train_m2[1:30]),
mean_train_m2[1:30],scale=c(5,1),colors=brewer,pal(8,Dark2))

?wordcloud

Sentiment classification using various methods using train,test1 and test2

Train_BoWfreq<-as.matrix(train_rmspa_m)

Train_data_m=data.frame(y=train_m$Label,x= Train_BoWfreq)

Summary(train_data_m)

str(train_data_m)

train_BoW_m=findFreqTerms(train_rmspa_m)

length(train_BoW_m)

test1_corpus_m<-Corpus(DataframeSource(as.matrix(test1_m$review)))

BoW_test1_m=DocumentTermMatrix(test1_corpus_m,control=list(removeNumbers=T,removePunctuati
on=T,stringWhitespace=T,tolower=T,stopwords=T,stemming=T, dictionary=train_BoW_m))

str(BoW_test1_m)

dim(BoW_test1_m)

test1_BoWfreq_m<-as.matrix(BoW_test1_m)

test1_data_m=data.frame(y=test1_m$Label,x=test1_BoWfreq_m)

str(test1_data_m)

summary(test1_data_m)

library(party)
BoW_ctree_m<-ctree(y ~.,data=train_data_m)

summary(BoW_ctree_m)

plot(BoW_ctree_m)

plot(BoW_ctree_m,type=simple)

test1Pred=predict(BoW_ctree_m,newdata=test1_data_m)

confusionMatrix(test1Pred,test1_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test1Pred,test1_data_m[,1],c(ACC,TPR,PRECISION,F1))

library(RWeka)

NB<-make_Weka_classifier( )

BoW_NB_m=NB(y ~.,data=train_data_m)

Library(kernlab)

BoW_ksvm_m=ksvm(y ~.,data ~ train_data_m)

test1Pred=predict(BoW_ksvm_m,newdata=test1_data_m)

confusionMatrix(test1Pred,test1_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test1Pred,test1_data_m[,1],c(ACC,TPR,PRECISION,F1))

Library(RWeka)

MLP<-make_Weka_classifier( )

BoW_MLP_m<-MLP(y ~.,data ~ train_data_m,control-Weka_control(N-100,L-0.2))

test1Pred=predict(BoW_MLP_m,newdata=test1_data_m)

confusionMatrix(test1Pred,test1_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test1Pred,test1_data_m[,1],c(ACC,TPR,PRECISION,F1))

evaluate_Weka_classifier(BoW_IBk_m,test1_data_m,numFolds=0,complexity=FALSE,seed=1,class=TRUE)
test2_corpus_m<-Corpus(DataframeSource(as.matrix(test2_m$review)))

BoW_test2_m=DocumentTermMatrix(test2_corpus_m,control=list(removeNumbers=T,removePunctuati
on=T,stringWhitespace=T,tolower=T,stopwords=T,stemming=T, dictionary=train_BoW_m))

dim(BoW_test2_m)

test2_BoWfreq_m=as.matrix(tBoW_test2_m)

summary(test2_BoWfreq_m)

test2_data_m=data.frame(y=test2_m$Label,x=test2_BoWfreq_m)

test2Pred=predict(BoW_ctree_m,newdata=test2_data_m)

confusionMatrix(test2Pred,test2_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test2Pred,test2_data_m[,1],c(ACC,TPR,PRECISION,F1))

test2Pred=predict(BoW_J48_m,newdata=test2_data_m)

confusionMatrix(test2Pred,test2_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test2Pred,test2_data_m[,1],c(ACC,TPR,PRECISION,F1))

evaluate_Weka_classifier(BoW_J48_m,test2_data_m,numFolds=0,complexity=FALSE,seed=1,class=TRUE)

test2Pred=predict(BoW_C50_m,newdata=test2_data_m)

confusionMatrix(test2Pred,test2_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test2Pred,test2_data_m[,1],c(ACC,TPR,PRECISION,F1))

test2Pred=predict(BoW_NB_m,newdata=test2_data_m)

confusionMatrix(test2Pred,test2_data_m[,1],positive=Positive,dnn=c(Prediction,True))
nmetric(test2Pred,test2_data_m[,1],c(ACC,TPR,PRECISION,F1))

test2Pred=predict(BoW_ksvm_m,newdata=test2_data_m)

confusionMatrix(test2Pred,test2_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test2Pred,test2_data_m[,1],c(ACC,TPR,PRECISION,F1))

test2Pred=predict(BoW_MLP_m,newdata=test2_data_m)

confusionMatrix(test2Pred,test2_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test2Pred,test2_data_m[,1],c(ACC,TPR,PRECISION,F1))

test2Pred=predict(BoW_IBk_m,newdata=test2_data_m)

confusionMatrix(test2Pred,test2_data_m[,1],positive=Positive,dnn=c(Prediction,True))

nmetric(test2Pred,test2_data_m[,1],c(ACC,TPR,PRECISION,F1))

evaluate_Weka_classifier(BoW_IBk_m,test2_data_m,numFolds=0,complexity=FALSE,seed=1,class=TRUE)

You might also like