.Rhistory

version
Final <- read.csv("~/R/RCode/TM/HotelReviewSA/Final.csv")
View(Final)
View(Final)
MEUSE <- read.csv("~/R/RCode/TM/HotelReviewSA/MEUSE.csv")
View(MEUSE)
SA <- read.csv("~/R/RCode/TM/HotelReviewSA/SA.csv")
View(SA)
SA <- SA[sample(nrow(SA), 100),]
View(SA)
require(openNLP)
install.packages("openNLP")
install.packages("openNLP")
install.packages("openNLP")
require(openNLP)
require(openNLP)
install.packages('rJava')
install.packages('rJava')
R CMD javareconf
install.packages('rJava')
library(rJava)
install.packages('rJava')
install.packages('rJava')
install.packages('rJava')
require(openNLP) ## Loads the package for use in the task
install.packages('rJava')
install.packages('rJava')
install.packages('rJava')
library(rJava)
remove.packages(rJava)
remove.packages('rJava')
install.packages('rJava')
install.packages('rJava')
version()
version
rm(SA)
install.packages('rJava')
library(rJava)
library(rjava)
library(rJava)
install.packages('rJava')
version
library(rJava)
require(openNLP) ## Loads the package for use in the task
install.packages('rJava')
install.packages('rJava')
library(rJava)
install.packages('rJava')
install.packages('rJava')
data(iris)
View(iris)
str(iris)
set.seed(1234)
# creating training and testing data randomly
ind <- sample(2, nrow(iris), replace=TRUE, prob=c(0.7, 0.3))
trainData <- iris[ind==1,]
testData <- iris[ind==2,]
library(party)
myFormula <- Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
iris_ctree <- ctree(myFormula, data=trainData)
# check the prediction
install.packages('party')
library(party)
myFormula <- Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
iris_ctree <- ctree(myFormula, data=trainData)
# check the prediction
table(predict(iris_ctree), trainData$Species)
print(iris_ctree)
plot(iris_ctree)
plot(iris_ctree, type="simple")
# predict on test data
testPred <- predict(iris_ctree, newdata = testData)
table(testPred, testData$Species)
??ctree
?ctree
install.packages(){}
install.packages('mboost')
words <- scan("http://norvig.com/big.txt", what = character())
a <- strip.text(words)
strip.text <- function(txt) {
# remove apostrophes (so "don't" -> "dont", "Jane's" -> "Janes", etc.)
txt <- gsub("'","",txt)
# convert to lowercase
txt <- tolower(txt)
# change other non-alphanumeric characters to spaces
txt <- gsub("[^a-z0-9]"," ",txt)
# change digits to #
txt <- gsub("[0-9]+"," ",txt)
# split and make one vector
txt <- unlist(strsplit(txt," "))
# remove empty words
txt <- txt[txt != ""]
return(txt)
}
a <- strip.text(words)
a[[1]]
a[[2]]
words[[2]]
words[2]
sumary(words)
summary(words)
Transpositions <- function(word = FALSE) {
N <- nchar(word)
if (N > 2) {
out <- rep(word, N - 1)
word <- unlist(strsplit(word, NULL))
# Permutations of the letters
perms <- matrix(c(1:(N - 1), 2:N), ncol = 2)
reversed <- perms[, 2:1]
trans.words <- matrix(rep(word, N - 1), byrow = TRUE, nrow = N - 1)
for(i in 1:(N - 1)) {
trans.words[i, perms[i, ]] <- trans.words[i, reversed[i, ]]
out[i] <- paste(trans.words[i, ], collapse = "")
}
}
else if (N == 2) {
out <- paste(word[2:1], collapse = "")
}
else {
out <- paste(word, collapse = "")
}
return(out)
}
Deletes <- function(word = FALSE) {
N <- nchar(word)
word <- unlist(strsplit(word, NULL))
for(i in 1:N) {
out[i] <- paste(word[-i], collapse = "")
}
return(out)
}
# Single-letter insertions.
Insertions <- function(word = FALSE) {
N <- nchar(word)
out <- list()
for (letter in letters) {
out[[letter]] <- rep(word, N + 1)
for (i in 1:(N + 1)) {
out[[letter]][i] <- paste(substr(word, i - N, i - 1), letter,
substr(word, i, N), sep = "")
}
}
out <- unlist(out)
return(out)
}
# Single-letter replacements.
Replaces <- function(word = FALSE) {
N <- nchar(word)
out <- list()
for (letter in letters) {
out[[letter]] <- rep(word, N)
for (i in 1:N) {
out[[letter]][i] <- paste(substr(word, i - N, i - 1), letter,
substr(word, i + 1, N + 1), sep = "")
}
}
out <- unlist(out)
return(out)
}
# All Neighbors with distance "1"
Neighbors <- function(word) {
neighbors <- c(word, Replaces(word), Deletes(word),
Insertions(word), Transpositions(word))
return(neighbors)
}
# Probability as determined by our corpus.
Probability <- function(word, dtm) {
# Number of words, total
N <- length(dtm)
word.number <- which(names(dtm) == word)
count <- dtm[word.number]
pval <- count/N
return(pval)
}
# Correct a single word.
Correct <- function(word, dtm) {
neighbors <- Neighbors(word)
# If it is a word, just return it.
if (word %in% names(dtm)) {
out <- word
}
# Otherwise, check for neighbors.
else {
# Which of the neighbors are known words?
known <- which(neighbors %in% names(dtm))
N.known <- length(known)
# If there are no known neighbors, including the word,
# look farther away.
if (N.known == 0) {
print(paste("Having a hard time matching '", word, "'...", sep = ""))
neighbors <- unlist(lapply(neighbors, Neighbors))
}
# Then out non-words.
neighbors <- neighbors[which(neighbors %in% names(dtm))]
N <- length(neighbors)
# If we found some neighbors, find the one with the highest
# p-value.
if (N > 1) {
P <- 0*(1:N)
for (i in 1:N) {
P[i] <- Probability(neighbors[i], dtm)
}
out <- neighbors[which.max(P)]
}
# If no neighbors still, return the word.
else {
out <- word
}
}
return(out)
}
words <- strip.text(words)
counts <- table(words)
Correct("speling", dtm = counts)
CorrectDocument <- function(document, dtm) {
by.word <- unlist(strsplit(document, " "))
N <- length(by.word)
for (i in 1:N) {
by.word[i] <- Correct(by.word[i], dtm = dtm)
}
corrected <- paste(by.word, collapse = " ")
return(corrected)
}
Correct("speling", dtm = counts)
rm(a)
library(slidify)
slidify()
slidify("index .Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
setwd("/media/sda2/Data/RCode/DSTraining/Intro-to-R")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
install.packages('sqldf')
install.packages('sqldf')
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
install.packages("plyr")
library(plyr)
slidify("index.Rmd")
?knitr
??knitr
slidify("index.Rmd")
install.packages("sqldf")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
library(plyr)
slidify("index.Rmd")
slidify("index.Rmd")
summary(iris)
stry(iris)
str(iris)
str(iris)
summary(iris)
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")
s <- c("Go to Heaven for the climate, Hell for the company.")
library(stringr)
?str_split
str_split(s, ",")
s <- c("I am in India, I love Cricket, and I like Sachin.")
str_split(s, ",")
str_extract_all(s, "[H][a-z]+ ")
str_extract_all(s, "[I][a-z]+ ")
s <- c("I am in India, I love Cricket, and I like Sachin.")
str_extract_all(s, "[I][a-z]+ ")
s <- c("I am in India, I love Cricket, and I like Sachin.")
str_extract_all(s, "[l][a-z]+")
slidify("index.Rmd")
slidify("index.Rmd")
slidify("index.Rmd")