Skip to content

Latest commit

 

History

History
66 lines (50 loc) · 1.36 KB

classifier.adoc

File metadata and controls

66 lines (50 loc) · 1.36 KB

Classifier

# dog-cat classifier

import numpy as np

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB

sentences = [
    'I love dog',
    'This document is about dog',
    'I like cat',
    'This document is about cat',
    'Dogs are of many types',
]

classes = [
    'dog',
    'dog',
    'cat',
    'cat',
    'dog',
]

count_vect = CountVectorizer()

X_train_counts = count_vect.fit_transform(sentences)
print X_train_counts.shape      # (5, 13)

tf_transformer = TfidfTransformer(use_idf=False).fit(X_train_counts)
X_train_tf = tf_transformer.transform(X_train_counts)
print X_train_tf.shape      # (5, 13)

clf = MultinomialNB().fit(X_train_tf, classes)

sentences_test = ['I dont like dogs', 'cats are very cute']
classes_test = ['dog', 'cat']
X_test_counts = count_vect.transform(sentences_test)
print X_test_counts.shape      # (2, 13)

X_test_tfidf = tf_transformer.transform(X_test_counts)
print X_test_tfidf.shape      # (2, 13)

predicted = clf.predict(X_test_tfidf)

print predicted               # ['dog', 'dog']
print np.mean(predicted == classes_test)    # 0.5