Skip to content

Commit

Permalink
Reorg/naive bayes (#145)
Browse files Browse the repository at this point in the history
* reorg algorithm
* reorg: test
* fix: var calc
  • Loading branch information
shenxiangzhuang authored Nov 11, 2024
1 parent 7df532c commit 3f7ed01
Show file tree
Hide file tree
Showing 8 changed files with 440 additions and 561 deletions.
49 changes: 0 additions & 49 deletions tests/classification/naive_bayes/test_categorical_naive_bayes.py

This file was deleted.

49 changes: 0 additions & 49 deletions tests/classification/naive_bayes/test_multinomial_naive_bayes.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import math

import numpy as np
import pytest

from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import CategoricalNB, GaussianNB, MultinomialNB

from toyml.classification.naive_bayes import GaussianNaiveBayes
from toyml.classification.naive_bayes import (
CategoricalNaiveBayes,
GaussianNaiveBayes,
MultinomialNaiveBayes,
)


@pytest.fixture
Expand Down Expand Up @@ -36,6 +41,17 @@ def wikipedia_person_classification_sample() -> list[float]:
return [6, 130, 8]


@pytest.fixture
def sklearn_example_random_dataset_label() -> tuple[list[list[int]], list[int]]:
"""
References: https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.MultinomialNB.html#multinomialnb
"""
rng = np.random.RandomState(1)
dataset = rng.randint(5, size=(6, 100)).tolist()
label = np.array([1, 2, 3, 4, 5, 6]).tolist()
return dataset, label


class TestGaussianNaiveBayesIntegration:
def test_same_result_with_wikipedia(
self,
Expand Down Expand Up @@ -79,3 +95,63 @@ def test_same_result_with_sklearn(
sklearn_prob = sklearn_clf.predict_proba([wikipedia_person_classification_sample])
assert math.isclose(sut_prob[0], sklearn_prob[0][0])
assert math.isclose(sut_prob[1], sklearn_prob[0][1])


class TestMultinomialNaiveBayesIntegration:
def test_same_result_with_sklearn(
self,
sklearn_example_random_dataset_label: tuple[list[list[float]], list[int]],
) -> None:
dataset, label = sklearn_example_random_dataset_label
sklearn_clf = MultinomialNB()
sklearn_clf.fit(dataset, label)
# use the same variance calculation config with sklearn
sut = MultinomialNaiveBayes(alpha=1).fit(dataset, label)
# test same labels
test_sample = dataset[2]
sklearn_label = sklearn_clf.predict([test_sample])
sut_label = sut.predict(test_sample)

assert sut_label == sklearn_label[0]

# test same log probs
sut_log_prob = sut.predict_log_proba(test_sample)
sklearn_log_prob = sklearn_clf.predict_log_proba([test_sample])
for i in range(6):
assert math.isclose(sut_log_prob[i + 1], sklearn_log_prob[0][i])

# # test same probs
sut_prob = sut.predict_proba(test_sample)
sklearn_prob = sklearn_clf.predict_proba([test_sample])
for i in range(6):
assert math.isclose(sut_prob[i + 1], sklearn_prob[0][i])


class TestCategoricalNaiveBayesIntegration:
def test_same_result_with_sklearn(
self,
sklearn_example_random_dataset_label: tuple[list[list[float]], list[int]],
) -> None:
dataset, label = sklearn_example_random_dataset_label
sklearn_clf = CategoricalNB(alpha=1)
sklearn_clf.fit(dataset, label)
# use the same variance calculation config with sklearn
sut = CategoricalNaiveBayes(alpha=1).fit(dataset, label)
# test same labels
test_sample = dataset[2]
sklearn_label = sklearn_clf.predict([test_sample])
sut_label = sut.predict(test_sample)

assert sut_label == sklearn_label[0]

# test same log probs
sut_log_prob = sut.predict_log_proba(test_sample)
sklearn_log_prob = sklearn_clf.predict_log_proba([test_sample])
for i in range(6):
assert math.isclose(sut_log_prob[i + 1], sklearn_log_prob[0][i])

# # test same probs
sut_prob = sut.predict_proba(test_sample)
sklearn_prob = sklearn_clf.predict_proba([test_sample])
for i in range(6):
assert math.isclose(sut_prob[i + 1], sklearn_prob[0][i])
Loading

0 comments on commit 3f7ed01

Please sign in to comment.