Professional Documents
Culture Documents
classification
using a Naive
Bayes scheme
# Data : 20 Newsgroups
# Download link : http://www.cs.umb.edu/~smimarog/textmining/datasets/
# Load all the required libraries. Note : Packages need to be installed first.
library(dplyr)
library(caret)
library(tm)
library(RTextTools)
library(doMC)
library(e1071)
registerDoMC(cores=detectCores())
# Load data.
# We will use the 'train-all-terms' file which contains over 11300 messages.
# Read file as a dataframe
ng.df <- read.table("20ng-train-all-terms.txt", header=FALSE, sep="\t", quote="",
stringsAsFactors=FALSE, col.names = c("topic", "text"))