-
Notifications
You must be signed in to change notification settings - Fork 3
/
training_NaiveBayes.py
69 lines (60 loc) · 1.72 KB
/
training_NaiveBayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
from nltk_utils import token, bag_of_words
import numpy as np
from sklearn.naive_bayes import BernoulliNB
with open('content.json', 'r', encoding="utf-8") as c:
contents = json.load(c)
with open('test_content.json', 'r', encoding="utf-8") as t:
test_contents = json.load(t)
all_words = []
tags = []
xy = []
xy_test = []
punctuation = ['?', '.', ',', '!', ':', '/']
for content in contents['intents']:
tag = content['tag']
tags.append(tag)
for pattern in content['patterns']:
w = token(pattern)
all_words.extend(w)
xy.append((w, tag))
for test_content in test_contents['intents']:
tag = test_content['tag']
for test_pattern in test_content['patterns']:
t_w = token(test_pattern)
xy_test.append((t_w, tag))
all_words = sorted(set([w.lower() for w in all_words if w not in punctuation]))
tags = sorted(set(tags))
print(tags)
X_train = []
y_train = []
X_test = []
y_test = []
for (pattern_sentence, tag) in xy:
bag = bag_of_words(pattern_sentence, all_words)
label = tags.index(tag)
X_train.append(bag)
y_train.append(label)
for (pattern_test, tag) in xy_test:
bag = bag_of_words(pattern_test, all_words)
label = tags.index(tag)
X_test.append(bag)
y_test.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
# Model:
model = BernoulliNB()
# Training:
model.fit(X_train, y_train)
# Testing:
prediction = model.predict(X_test)
print(X_train.shape)
print(prediction)
print(y_test)
correct = 0
for i in range(len(prediction)):
if prediction[i] == y_test[i]:
correct += 1
print(correct/len(prediction))