forked from bkrai/Top-10-Machine-Learning-Methods-With-R
-
Notifications
You must be signed in to change notification settings - Fork 0
/
NaiveBayes
70 lines (55 loc) · 1.54 KB
/
NaiveBayes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Naive Bayes
# Libraries
library(naivebayes)
library(dplyr)
library(ggplot2)
library(psych)
#Read data file
getwd()
data <- read.csv('https://raw.githubusercontent.com/bkrai/Statistical-Modeling-and-Graphs-with-R/main/binary.csv')
#contingency table
xtabs(~admit + rank, data = data)
#Rank & admit are categorical variables
data$rank <- as.factor(data$rank)
data$admit <- as.factor(data$admit)
# Visualization
pairs.panels(data[-1])
data %>%
group_by(admit) %>%
ggplot(aes(x=admit, y=gre, fill=admit)) +
geom_boxplot()
data %>%
ggplot(aes(x=admit, y=gpa, fill=admit)) +
geom_boxplot() +
ggtitle('Box Plot')
data %>%
ggplot(aes(x=gre, fill=admit)) +
geom_density(alpha=0.8, color='black') +
ggtitle('Density Plot')
data %>%
ggplot(aes(x=gpa, fill=admit)) +
geom_density(alpha=0.8, color='black') +
ggtitle('Density Plot')
#Split data into Training (80%) and Testing (20%) datasets
set.seed(1234)
ind <- sample(2,nrow(data),replace=TRUE, prob=c(0.8,.2))
train <- data[ind==1,]
test <- data[ind==2,]
# Naive Bayes
model <- naive_bayes(admit ~ ., data = train)
model
plot(model)
# numeric predictors - means (1st col) & sd's (2nd col)
train %>% filter(admit=="0") %>%
summarize(mean(gre), sd(gre))
# Predict
p <- predict(model, train, type= 'prob')
head(cbind(p, train))
# Misclassification error - train data
p1 <- predict(model, train)
(tab1 <- table(p1, train$admit))
1 - sum(diag(tab1))/ sum(tab1)
# Misclassification error - test data
p2 <- predict(model, test)
(tab2 <- table(p2, test$admit))
1 - sum(diag(tab2))/ sum(tab2)