From 3f7ed01c40e3ad52713fdca317fac1e907a37b27 Mon Sep 17 00:00:00 2001
From: Mathew Shen <datahonor@gmail.com>
Date: Mon, 11 Nov 2024 16:30:53 +0800
Subject: [PATCH] Reorg/naive bayes (#145)

* reorg algorithm
* reorg: test
* fix: var calc
---
 .../test_categorical_naive_bayes.py           |  49 ---
 .../test_multinomial_naive_bayes.py           |  49 ---
 ...ian_naive_bayes.py => test_naive_bayes.py} |  80 +++-
 toyml/classification/naive_bayes.py           | 362 ++++++++++++++++++
 toyml/classification/naive_bayes/__init__.py  |   9 -
 .../naive_bayes/categorical_naive_bayes.py    | 159 --------
 .../naive_bayes/gaussian_naive_bayes.py       | 158 --------
 .../naive_bayes/multinomial_naive_bayes.py    | 135 -------
 8 files changed, 440 insertions(+), 561 deletions(-)
 delete mode 100644 tests/classification/naive_bayes/test_categorical_naive_bayes.py
 delete mode 100644 tests/classification/naive_bayes/test_multinomial_naive_bayes.py
 rename tests/classification/{naive_bayes/test_gaussian_naive_bayes.py => test_naive_bayes.py} (50%)
 create mode 100644 toyml/classification/naive_bayes.py
 delete mode 100644 toyml/classification/naive_bayes/__init__.py
 delete mode 100644 toyml/classification/naive_bayes/categorical_naive_bayes.py
 delete mode 100644 toyml/classification/naive_bayes/gaussian_naive_bayes.py
 delete mode 100644 toyml/classification/naive_bayes/multinomial_naive_bayes.py

diff --git a/tests/classification/naive_bayes/test_categorical_naive_bayes.py b/tests/classification/naive_bayes/test_categorical_naive_bayes.py
deleted file mode 100644
index 062eb24..0000000
--- a/tests/classification/naive_bayes/test_categorical_naive_bayes.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import math
-
-import numpy as np
-import pytest
-
-from sklearn.naive_bayes import CategoricalNB
-
-from toyml.classification.naive_bayes import CategoricalNaiveBayes
-
-
-@pytest.fixture
-def sklearn_example_random_dataset_label() -> tuple[list[list[int]], list[int]]:
-    """
-    References: https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.MultinomialNB.html#multinomialnb
-    """
-    rng = np.random.RandomState(1)
-    dataset = rng.randint(5, size=(6, 100)).tolist()
-    label = np.array([1, 2, 3, 4, 5, 6]).tolist()
-    return dataset, label
-
-
-class TestMultinomialNaiveBayesIntegration:
-    def test_same_result_with_sklearn(
-        self,
-        sklearn_example_random_dataset_label: tuple[list[list[int]], list[int]],
-    ) -> None:
-        dataset, label = sklearn_example_random_dataset_label
-        sklearn_clf = CategoricalNB(alpha=1)
-        sklearn_clf.fit(dataset, label)
-        # use the same variance calculation config with sklearn
-        sut = CategoricalNaiveBayes(alpha=1).fit(dataset, label)
-        # test same labels
-        test_sample = dataset[2]
-        sklearn_label = sklearn_clf.predict([test_sample])
-        sut_label = sut.predict(test_sample)
-
-        assert sut_label == sklearn_label[0]
-
-        # test same log probs
-        sut_log_prob = sut.predict_log_proba(test_sample)
-        sklearn_log_prob = sklearn_clf.predict_log_proba([test_sample])
-        for i in range(6):
-            assert math.isclose(sut_log_prob[i + 1], sklearn_log_prob[0][i])
-
-        # # test same probs
-        sut_prob = sut.predict_proba(test_sample)
-        sklearn_prob = sklearn_clf.predict_proba([test_sample])
-        for i in range(6):
-            assert math.isclose(sut_prob[i + 1], sklearn_prob[0][i])
diff --git a/tests/classification/naive_bayes/test_multinomial_naive_bayes.py b/tests/classification/naive_bayes/test_multinomial_naive_bayes.py
deleted file mode 100644
index 79e5649..0000000
--- a/tests/classification/naive_bayes/test_multinomial_naive_bayes.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import math
-
-import numpy as np
-import pytest
-
-from sklearn.naive_bayes import MultinomialNB
-
-from toyml.classification.naive_bayes import MultinomialNaiveBayes
-
-
-@pytest.fixture
-def sklearn_example_random_dataset_label() -> tuple[list[list[int]], list[int]]:
-    """
-    References: https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.MultinomialNB.html#multinomialnb
-    """
-    rng = np.random.RandomState(1)
-    dataset = rng.randint(5, size=(6, 100)).tolist()
-    label = np.array([1, 2, 3, 4, 5, 6]).tolist()
-    return dataset, label
-
-
-class TestMultinomialNaiveBayesIntegration:
-    def test_same_result_with_sklearn(
-        self,
-        sklearn_example_random_dataset_label: tuple[list[list[int]], list[int]],
-    ) -> None:
-        dataset, label = sklearn_example_random_dataset_label
-        sklearn_clf = MultinomialNB()
-        sklearn_clf.fit(dataset, label)
-        # use the same variance calculation config with sklearn
-        sut = MultinomialNaiveBayes(alpha=1).fit(dataset, label)
-        # test same labels
-        test_sample = dataset[2]
-        sklearn_label = sklearn_clf.predict([test_sample])
-        sut_label = sut.predict(test_sample)
-
-        assert sut_label == sklearn_label[0]
-
-        # test same log probs
-        sut_log_prob = sut.predict_log_proba(test_sample)
-        sklearn_log_prob = sklearn_clf.predict_log_proba([test_sample])
-        for i in range(6):
-            assert math.isclose(sut_log_prob[i + 1], sklearn_log_prob[0][i])
-
-        # # test same probs
-        sut_prob = sut.predict_proba(test_sample)
-        sklearn_prob = sklearn_clf.predict_proba([test_sample])
-        for i in range(6):
-            assert math.isclose(sut_prob[i + 1], sklearn_prob[0][i])
diff --git a/tests/classification/naive_bayes/test_gaussian_naive_bayes.py b/tests/classification/test_naive_bayes.py
similarity index 50%
rename from tests/classification/naive_bayes/test_gaussian_naive_bayes.py
rename to tests/classification/test_naive_bayes.py
index b151c33..36c7a68 100644
--- a/tests/classification/naive_bayes/test_gaussian_naive_bayes.py
+++ b/tests/classification/test_naive_bayes.py
@@ -1,10 +1,15 @@
 import math
 
+import numpy as np
 import pytest
 
-from sklearn.naive_bayes import GaussianNB
+from sklearn.naive_bayes import CategoricalNB, GaussianNB, MultinomialNB
 
-from toyml.classification.naive_bayes import GaussianNaiveBayes
+from toyml.classification.naive_bayes import (
+    CategoricalNaiveBayes,
+    GaussianNaiveBayes,
+    MultinomialNaiveBayes,
+)
 
 
 @pytest.fixture
@@ -36,6 +41,17 @@ def wikipedia_person_classification_sample() -> list[float]:
     return [6, 130, 8]
 
 
+@pytest.fixture
+def sklearn_example_random_dataset_label() -> tuple[list[list[int]], list[int]]:
+    """
+    References: https://scikit-learn.org/1.5/modules/generated/sklearn.naive_bayes.MultinomialNB.html#multinomialnb
+    """
+    rng = np.random.RandomState(1)
+    dataset = rng.randint(5, size=(6, 100)).tolist()
+    label = np.array([1, 2, 3, 4, 5, 6]).tolist()
+    return dataset, label
+
+
 class TestGaussianNaiveBayesIntegration:
     def test_same_result_with_wikipedia(
         self,
@@ -79,3 +95,63 @@ def test_same_result_with_sklearn(
         sklearn_prob = sklearn_clf.predict_proba([wikipedia_person_classification_sample])
         assert math.isclose(sut_prob[0], sklearn_prob[0][0])
         assert math.isclose(sut_prob[1], sklearn_prob[0][1])
+
+
+class TestMultinomialNaiveBayesIntegration:
+    def test_same_result_with_sklearn(
+        self,
+        sklearn_example_random_dataset_label: tuple[list[list[float]], list[int]],
+    ) -> None:
+        dataset, label = sklearn_example_random_dataset_label
+        sklearn_clf = MultinomialNB()
+        sklearn_clf.fit(dataset, label)
+        # use the same variance calculation config with sklearn
+        sut = MultinomialNaiveBayes(alpha=1).fit(dataset, label)
+        # test same labels
+        test_sample = dataset[2]
+        sklearn_label = sklearn_clf.predict([test_sample])
+        sut_label = sut.predict(test_sample)
+
+        assert sut_label == sklearn_label[0]
+
+        # test same log probs
+        sut_log_prob = sut.predict_log_proba(test_sample)
+        sklearn_log_prob = sklearn_clf.predict_log_proba([test_sample])
+        for i in range(6):
+            assert math.isclose(sut_log_prob[i + 1], sklearn_log_prob[0][i])
+
+        # # test same probs
+        sut_prob = sut.predict_proba(test_sample)
+        sklearn_prob = sklearn_clf.predict_proba([test_sample])
+        for i in range(6):
+            assert math.isclose(sut_prob[i + 1], sklearn_prob[0][i])
+
+
+class TestCategoricalNaiveBayesIntegration:
+    def test_same_result_with_sklearn(
+        self,
+        sklearn_example_random_dataset_label: tuple[list[list[float]], list[int]],
+    ) -> None:
+        dataset, label = sklearn_example_random_dataset_label
+        sklearn_clf = CategoricalNB(alpha=1)
+        sklearn_clf.fit(dataset, label)
+        # use the same variance calculation config with sklearn
+        sut = CategoricalNaiveBayes(alpha=1).fit(dataset, label)
+        # test same labels
+        test_sample = dataset[2]
+        sklearn_label = sklearn_clf.predict([test_sample])
+        sut_label = sut.predict(test_sample)
+
+        assert sut_label == sklearn_label[0]
+
+        # test same log probs
+        sut_log_prob = sut.predict_log_proba(test_sample)
+        sklearn_log_prob = sklearn_clf.predict_log_proba([test_sample])
+        for i in range(6):
+            assert math.isclose(sut_log_prob[i + 1], sklearn_log_prob[0][i])
+
+        # # test same probs
+        sut_prob = sut.predict_proba(test_sample)
+        sklearn_prob = sklearn_clf.predict_proba([test_sample])
+        for i in range(6):
+            assert math.isclose(sut_prob[i + 1], sklearn_prob[0][i])
diff --git a/toyml/classification/naive_bayes.py b/toyml/classification/naive_bayes.py
new file mode 100644
index 0000000..d750533
--- /dev/null
+++ b/toyml/classification/naive_bayes.py
@@ -0,0 +1,362 @@
+from __future__ import annotations
+
+import copy
+import math
+import statistics
+
+from abc import ABC, abstractmethod
+from collections import Counter
+from dataclasses import dataclass, field
+
+Class = int
+Dimension = int
+FeatureValue = float
+
+
+@dataclass
+class BaseNaiveBayes(ABC):
+    class_prior_: dict[Class, float] = field(default_factory=dict)
+    """The prior probability of each class in training dataset"""
+
+    @abstractmethod
+    def fit(self, dataset: list[list[FeatureValue]], labels: list[Class]) -> BaseNaiveBayes:
+        raise NotImplementedError
+
+    def predict(self, sample: list[FeatureValue]) -> int:
+        """Predict the class label for a given sample.
+
+        Args:
+            sample: A single sample to predict, represented as a list of feature values.
+
+        Returns:
+            int: Predicted class label.
+        """
+        label_posteriors = self.predict_proba(sample)
+        label = max(label_posteriors, key=lambda k: label_posteriors[k])
+        return label
+
+    def predict_proba(self, sample: list[FeatureValue], normalization: bool = True) -> dict[Class, float]:
+        """Predict class probabilities for a given sample.
+
+        Args:
+            sample: A single sample to predict, represented as a list of feature values.
+            normalization: Whether to normalize the probabilities. Default is True.
+
+        Returns:
+            dict[int, float]: Dictionary mapping class labels to their predicted probabilities.
+        """
+        label_posteriors = self.predict_log_proba(sample, normalization)
+        return {label: math.exp(log_prob) for label, log_prob in label_posteriors.items()}
+
+    def predict_log_proba(self, sample: list[FeatureValue], normalization: bool = True) -> dict[Class, float]:
+        """Predict log probabilities for a given sample.
+
+        Args:
+            sample: A single sample to predict, represented as a list of feature values.
+            normalization: Whether to normalize the log probabilities. Default is True.
+
+        Returns:
+            dict[int, float]: Dictionary mapping class labels to their predicted log probabilities.
+        """
+        label_likelihoods = self._log_likelihood(sample)
+        raw_label_posteriors: dict[int, float] = {}
+        for label, likelihood in label_likelihoods.items():
+            raw_label_posteriors[label] = likelihood + math.log(self.class_prior_[label])
+        if normalization is False:
+            return raw_label_posteriors
+        # ref: https://github.com/scikit-learn/scikit-learn/blob/2beed55847ee70d363bdbfe14ee4401438fba057/sklearn/naive_bayes.py#L97
+        max_log_prob = max(raw_label_posteriors.values())
+        logsumexp_prob = max_log_prob + math.log(
+            sum(math.exp(log_prob - max_log_prob) for log_prob in raw_label_posteriors.values())
+        )
+        label_posteriors = {
+            label: raw_posterior - logsumexp_prob for label, raw_posterior in raw_label_posteriors.items()
+        }
+        return label_posteriors
+
+    @abstractmethod
+    def _log_likelihood(self, sample: list[FeatureValue]) -> dict[Class, float]:
+        raise NotImplementedError
+
+
+@dataclass
+class GaussianNaiveBayes(BaseNaiveBayes):
+    """
+    Gaussian naive bayes classification algorithm implementation.
+
+    Examples:
+        >>> label = [0, 0, 0, 0, 1, 1, 1, 1]
+        >>> dataset = [[6.00, 180, 12], [5.92, 190, 11], [5.58, 170, 12], [5.92, 165, 10], [5.00, 100, 6], [5.50, 150, 8], [5.42, 130, 7], [5.75, 150, 9]]
+        >>> clf = GaussianNaiveBayes().fit(dataset, label)
+        >>> clf.predict([6.00, 130, 8])
+        1
+
+    """
+
+    unbiased_variance: bool = True
+    """Use the unbiased variance estimation or not. Default is True."""
+    var_smoothing: float = 1e-9
+    """Portion of the largest variance of all features that is added to variances for calculation stability."""
+    labels_: list[Class] = field(default_factory=list)
+    """The labels in training dataset"""
+    class_count_: int = 0
+    """The number of classes in training dataset"""
+    class_prior_: dict[Class, float] = field(default_factory=dict)
+    """The prior probability of each class in training dataset"""
+    means_: dict[Class, list[float]] = field(default_factory=dict)
+    """The means of each class in training dataset"""
+    variances_: dict[Class, list[float]] = field(default_factory=dict)
+    """The variance of each class in training dataset"""
+    epsilon_: float = 0
+    """The absolute additive value to variances."""
+
+    def fit(self, dataset: list[list[FeatureValue]], labels: list[Class]) -> GaussianNaiveBayes:
+        """Fit the Gaussian Naive Bayes classifier.
+
+        Args:
+            dataset: Training data, where each row is a sample and each column is a feature.
+            labels: Target labels for training data.
+
+        Returns:
+            self: Returns the instance itself.
+        """
+        self.labels_ = sorted(set(labels))
+        self.class_count_ = len(set(labels))
+        self.class_prior_ = {label: 1 / self.class_count_ for label in self.labels_}
+        self.epsilon_ = self.var_smoothing * max(self._variance(col) for col in zip(*dataset))
+        self.means_, self.variances_ = self._get_classes_means_variances(dataset, labels)
+        return self
+
+    def _log_likelihood(self, sample: list[FeatureValue]) -> dict[Class, float]:
+        """
+        Calculate the likelihood of each sample in each class
+        """
+        label_likelihoods: dict[Class, float] = {}
+        for label in self.labels_:
+            label_means = self.means_[label]
+            label_vars = self.variances_[label]
+            log_likelihood = 0.0
+            for i, xi in enumerate(sample):
+                # calculate the log-likelihood
+                log_likelihood += -0.5 * math.log(2 * math.pi * label_vars[i]) - (
+                    (xi - label_means[i]) ** 2 / (2 * label_vars[i])
+                )
+            label_likelihoods[label] = log_likelihood
+        return label_likelihoods
+
+    def _get_classes_means_variances(
+        self,
+        dataset: list[list[FeatureValue]],
+        labels: list[Class],
+    ) -> tuple[dict[Class, list[float]], dict[Class, list[float]]]:
+        means, variances = {}, {}
+        for label in self.labels_:
+            label_samples = [sample for (sample, sample_label) in zip(dataset, labels) if sample_label == label]
+            means[label] = self._dataset_column_means(label_samples)
+            variances[label] = self._dataset_column_variances(label_samples)
+        return means, variances
+
+    @staticmethod
+    def _dataset_column_means(dataset: list[list[FeatureValue]]) -> list[float]:
+        """
+        Calculate vectors mean
+        """
+        return [statistics.mean(column) for column in zip(*dataset, strict=True)]
+
+    def _dataset_column_variances(self, dataset: list[list[FeatureValue]]) -> list[float]:
+        """
+        Calculate vectors(every column) standard variance
+        """
+        return [self._variance(column) + self.epsilon_ for column in zip(*dataset, strict=True)]
+
+    def _variance(self, xs: list[FeatureValue] | tuple[FeatureValue, ...]) -> float:
+        n = len(xs)
+        mean = statistics.mean(xs)
+        ss = sum((x - mean) ** 2 for x in xs)
+        if self.unbiased_variance is True:
+            if n > 1:
+                variance = ss / (len(xs) - 1)
+            else:
+                variance = 0.0  # Variance is zero when there's only one sample
+        else:
+            variance = ss / len(xs)
+        return variance
+
+
+@dataclass
+class MultinomialNaiveBayes(BaseNaiveBayes):
+    """
+    Multinomial Naive Bayes classifier.
+
+    Examples:
+        >>> import random
+        >>> rng = random.Random(0)
+        >>> dataset = [[rng.randint(0, 5) for _ in range(100)] for _ in range(6)]
+        >>> label = [1, 2, 3, 4, 5, 6]
+        >>> clf = MultinomialNaiveBayes().fit(dataset, label)
+        >>> clf.predict(dataset[2])
+        3
+
+    """
+
+    alpha: float = 1.0
+    """Additive (Laplace/Lidstone) smoothing parameter"""
+    labels_: list[Class] = field(default_factory=list)
+    """The labels in training dataset"""
+    class_count_: int = 0
+    """The number of classes in training dataset"""
+    class_prior_: dict[Class, float] = field(default_factory=dict)
+    """The prior probability of each class in training dataset"""
+    class_feature_count_: dict[Class, list[int]] = field(default_factory=dict)
+    """The feature value counts of each class in training dataset"""
+    class_feature_log_prob_: dict[Class, list[float]] = field(default_factory=dict)
+    """The feature value probability of each class in training dataset"""
+
+    def fit(self, dataset: list[list[FeatureValue]], labels: list[Class]) -> MultinomialNaiveBayes:
+        """Fit the Multinomial Naive Bayes classifier.
+
+        Args:
+            dataset: Training data, where each row is a sample and each column is a feature.
+                     Features should be represented as counts (non-negative integers).
+            labels: Target labels for training data.
+
+        Returns:
+            self: Returns the instance itself.
+        """
+        self.labels_ = sorted(set(labels))
+        self.class_count_ = len(set(labels))
+        # get the prior from training dataset labels
+        self.class_prior_ = {label: count / len(dataset) for label, count in Counter(labels).items()}
+        self.class_feature_count_, self.class_feature_log_prob_ = self._get_classes_feature_count_prob(dataset, labels)
+        return self
+
+    def _log_likelihood(self, sample: list[FeatureValue]) -> dict[Class, float]:
+        """
+        Calculate the likelihood of each sample in each class
+        """
+        label_likelihoods: dict[int, float] = {}
+        for label in self.labels_:
+            likelihood = 0.0
+            for i, xi in enumerate(sample):
+                # calculate the log-likelihood
+                likelihood += xi * self.class_feature_log_prob_[label][i]
+            label_likelihoods[label] = likelihood
+        return label_likelihoods
+
+    def _get_classes_feature_count_prob(
+        self,
+        dataset: list[list[FeatureValue]],
+        labels: list[Class],
+    ) -> tuple[dict[Class, list[int]], dict[Class, list[float]]]:
+        feature_count, feature_prob = {}, {}
+        for label in self.labels_:
+            label_samples = [sample for (sample, sample_label) in zip(dataset, labels) if sample_label == label]
+            counts = self._dataset_feature_counts(label_samples)
+            feature_count[label] = counts
+            feature_prob[label] = [math.log(value_count / sum(counts)) for value_count in counts]
+
+        return feature_count, feature_prob
+
+    def _dataset_feature_counts(self, dataset: list[list[FeatureValue]]) -> list[int]:
+        """
+        Calculate feature value counts
+        """
+        return [sum(column) + self.alpha for column in zip(*dataset, strict=True)]
+
+
+@dataclass
+class CategoricalNaiveBayes(BaseNaiveBayes):
+    """
+    Categorical Naive Bayes classifier.
+
+    Examples:
+        >>> import random
+        >>> rng = random.Random(0)
+        >>> dataset = [[rng.randint(0, 5) for _ in range(100)] for _ in range(6)]
+        >>> label = [1, 2, 3, 4, 5, 6]
+        >>> clf = CategoricalNaiveBayes().fit(dataset, label)
+        >>> clf.predict(dataset[2])
+        3
+    """
+
+    alpha: float = 1.0
+    """Additive (Laplace/Lidstone) smoothing parameter"""
+    labels_: list[Class] = field(default_factory=list)
+    """The labels in training dataset"""
+    class_count_: int = 0
+    """The number of classes in training dataset"""
+    class_prior_: dict[Class, float] = field(default_factory=dict)
+    """The prior probability of each class in training dataset"""
+    class_feature_count_: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = field(default_factory=dict)
+    """The feature value counts of each class in training dataset"""
+    class_feature_log_prob_: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = field(default_factory=dict)
+    """The feature value probability of each class in training dataset"""
+
+    def fit(self, dataset: list[list[FeatureValue]], labels: list[Class]) -> CategoricalNaiveBayes:
+        """Fit the Categorical Naive Bayes classifier.
+
+        Args:
+            dataset: Training data, where each row is a sample and each column is a feature.
+            labels: Target labels for training data.
+
+        Returns:
+            self: Returns the instance itself.
+        """
+        self.labels_ = sorted(set(labels))
+        self.class_count_ = len(set(labels))
+        # get the prior from training dataset labels
+        self.class_prior_ = {label: count / len(dataset) for label, count in Counter(labels).items()}
+        self.class_feature_count_, self.class_feature_log_prob_ = self._get_classes_feature_count_prob(dataset, labels)
+        return self
+
+    def _log_likelihood(self, sample: list[FeatureValue]) -> dict[Class, float]:
+        """
+        Calculate the likelihood of each sample in each class
+        """
+        label_likelihoods: dict[Class, float] = {}
+        for label in self.labels_:
+            likelihood = 0.0
+            for i, xi in enumerate(sample):
+                # calculate the log-likelihood
+                likelihood += self.class_feature_log_prob_[label][i].get(xi, 0)
+            label_likelihoods[label] = likelihood
+        return label_likelihoods
+
+    def _get_classes_feature_count_prob(
+        self,
+        dataset: list[list[FeatureValue]],
+        labels: list[Class],
+    ) -> tuple:  # type: ignore[type-arg]
+        feature_smooth_count: dict[Dimension, dict[FeatureValue, float]] = {}
+        for dim, column in enumerate(zip(*dataset)):
+            feature_smooth_count[dim] = {value: self.alpha for value in set(column)}
+
+        feature_count: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = {}
+        feature_prob: dict[Class, dict[Dimension, dict[FeatureValue, float]]] = {}
+        for label in self.labels_:
+            label_samples = [sample for (sample, sample_label) in zip(dataset, labels) if sample_label == label]
+            counts = self._dataset_feature_counts(label_samples, feature_smooth_count)
+            feature_count[label] = counts
+            feature_prob[label] = {}
+            for dim, feature_value_count in counts.items():
+                feature_prob[label][dim] = {
+                    feature_value: math.log(count / sum(feature_value_count.values()))
+                    for feature_value, count in feature_value_count.items()
+                }
+
+        return feature_count, feature_prob
+
+    @staticmethod
+    def _dataset_feature_counts(
+        dataset: list[list[FeatureValue]],
+        feature_smooth_count: dict[Dimension, dict[FeatureValue, float]],
+    ) -> dict[Dimension, dict[FeatureValue, float]]:
+        """
+        Calculate feature value counts
+        """
+        # Note: here we should use deepcopy
+        feature_value_count = copy.deepcopy(feature_smooth_count)
+        for dim, column in enumerate(zip(*dataset, strict=True)):
+            for value, count in Counter(column).items():
+                feature_value_count[dim][value] += count
+        return feature_value_count
diff --git a/toyml/classification/naive_bayes/__init__.py b/toyml/classification/naive_bayes/__init__.py
deleted file mode 100644
index 4218e40..0000000
--- a/toyml/classification/naive_bayes/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from .categorical_naive_bayes import CategoricalNaiveBayes
-from .gaussian_naive_bayes import GaussianNaiveBayes
-from .multinomial_naive_bayes import MultinomialNaiveBayes
-
-__all__ = [
-    "GaussianNaiveBayes",
-    "MultinomialNaiveBayes",
-    "CategoricalNaiveBayes",
-]
diff --git a/toyml/classification/naive_bayes/categorical_naive_bayes.py b/toyml/classification/naive_bayes/categorical_naive_bayes.py
deleted file mode 100644
index 289e078..0000000
--- a/toyml/classification/naive_bayes/categorical_naive_bayes.py
+++ /dev/null
@@ -1,159 +0,0 @@
-from __future__ import annotations
-
-import copy
-import math
-
-from collections import Counter
-from dataclasses import dataclass, field
-
-Class = int
-Dimension = int
-FeatureValue = int
-Count = float
-Prob = float
-
-
-@dataclass
-class CategoricalNaiveBayes:
-    """
-    Categorical Naive Bayes classifier.
-
-    Examples:
-        >>> import random
-        >>> rng = random.Random(0)
-        >>> dataset = [[rng.randint(0, 5) for _ in range(100)] for _ in range(6)]
-        >>> label = [1, 2, 3, 4, 5, 6]
-        >>> clf = CategoricalNaiveBayes().fit(dataset, label)
-        >>> clf.predict(dataset[2])
-        3
-    """
-
-    alpha: float = 1.0
-    """Additive (Laplace/Lidstone) smoothing parameter"""
-    labels_: list[Class] = field(default_factory=list)
-    """The labels in training dataset"""
-    class_count_: int = 0
-    """The number of classes in training dataset"""
-    class_prior_: dict[Class, float] = field(default_factory=dict)
-    """The prior probability of each class in training dataset"""
-    class_feature_count_: dict[Class, dict[Dimension, dict[FeatureValue, Count]]] = field(default_factory=dict)
-    """The feature value counts of each class in training dataset"""
-    class_feature_log_prob_: dict[Class, dict[Dimension, dict[FeatureValue, Prob]]] = field(default_factory=dict)
-    """The feature value probability of each class in training dataset"""
-
-    def fit(self, dataset: list[list[int]], labels: list[int]) -> CategoricalNaiveBayes:
-        """Fit the Categorical Naive Bayes classifier.
-
-        Args:
-            dataset: Training data, where each row is a sample and each column is a feature.
-            labels: Target labels for training data.
-
-        Returns:
-            self: Returns the instance itself.
-        """
-        self.labels_ = sorted(set(labels))
-        self.class_count_ = len(set(labels))
-        # get the prior from training dataset labels
-        self.class_prior_ = {label: count / len(dataset) for label, count in Counter(labels).items()}
-        self.class_feature_count_, self.class_feature_log_prob_ = self._get_classes_feature_count_prob(dataset, labels)
-        return self
-
-    def predict(self, sample: list[int]) -> int:
-        """Predict the class label for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature values.
-
-        Returns:
-            int: Predicted class label.
-        """
-        label_posteriors = self.predict_log_proba(sample)
-        label = max(label_posteriors, key=lambda k: label_posteriors[k])
-        return label
-
-    def predict_proba(self, sample: list[int]) -> dict[int, float]:
-        """Predict class probabilities for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature values.
-
-        Returns:
-            dict[int, float]: Dictionary mapping class labels to their predicted probabilities.
-        """
-        label_posteriors = self.predict_log_proba(sample)
-        return {label: math.exp(log_prob) for label, log_prob in label_posteriors.items()}
-
-    def predict_log_proba(self, sample: list[int]) -> dict[int, float]:
-        """Predict log probabilities for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature values.
-
-        Returns:
-            dict[int, float]: Dictionary mapping class labels to their predicted log probabilities.
-        """
-        label_likelihoods = self._likelihood(sample)
-        raw_label_posteriors: dict[int, float] = {}
-        for label, likelihood in label_likelihoods.items():
-            raw_label_posteriors[label] = likelihood + math.log(self.class_prior_[label])
-        # ref: https://github.com/scikit-learn/scikit-learn/blob/2beed55847ee70d363bdbfe14ee4401438fba057/sklearn/naive_bayes.py#L97
-        max_log_prob = max(raw_label_posteriors.values())
-        logsumexp_prob = max_log_prob + math.log(
-            sum(math.exp(log_prob - max_log_prob) for log_prob in raw_label_posteriors.values())
-        )
-        label_posteriors = {
-            label: raw_posterior - logsumexp_prob for label, raw_posterior in raw_label_posteriors.items()
-        }
-        return label_posteriors
-
-    def _likelihood(self, sample: list[int]) -> dict[Class, float]:
-        """
-        Calculate the likelihood of each sample in each class
-        """
-        label_likelihoods: dict[Class, float] = {}
-        for label in self.labels_:
-            likelihood = 0.0
-            for i, xi in enumerate(sample):
-                # calculate the log-likelihood
-                likelihood += self.class_feature_log_prob_[label][i].get(xi, 0)
-            label_likelihoods[label] = likelihood
-        return label_likelihoods
-
-    def _get_classes_feature_count_prob(
-        self,
-        dataset: list[list[int]],
-        labels: list[int],
-    ) -> tuple:  # type: ignore[type-arg]
-        feature_smooth_count: dict[Dimension, dict[FeatureValue, Count]] = {}
-        for dim, column in enumerate(zip(*dataset)):
-            feature_smooth_count[dim] = {value: self.alpha for value in set(column)}
-
-        feature_count: dict[Class, dict[Dimension, dict[FeatureValue, Count]]] = {}
-        feature_prob: dict[Class, dict[Dimension, dict[FeatureValue, Prob]]] = {}
-        for label in self.labels_:
-            label_samples = [sample for (sample, sample_label) in zip(dataset, labels) if sample_label == label]
-            counts = self._dataset_feature_counts(label_samples, feature_smooth_count)
-            feature_count[label] = counts
-            feature_prob[label] = {}
-            for dim, feature_value_count in counts.items():
-                feature_prob[label][dim] = {
-                    feature_value: math.log(count / sum(feature_value_count.values()))
-                    for feature_value, count in feature_value_count.items()
-                }
-
-        return feature_count, feature_prob
-
-    @staticmethod
-    def _dataset_feature_counts(
-        dataset: list[list[FeatureValue]],
-        feature_smooth_count: dict[Dimension, dict[FeatureValue, Count]],
-    ) -> dict[Dimension, dict[FeatureValue, Count]]:
-        """
-        Calculate feature value counts
-        """
-        # Note: here we should use deepcopy
-        feature_value_count = copy.deepcopy(feature_smooth_count)
-        for dim, column in enumerate(zip(*dataset, strict=True)):
-            for value, count in Counter(column).items():
-                feature_value_count[dim][value] += count
-        return feature_value_count
diff --git a/toyml/classification/naive_bayes/gaussian_naive_bayes.py b/toyml/classification/naive_bayes/gaussian_naive_bayes.py
deleted file mode 100644
index 47b9cce..0000000
--- a/toyml/classification/naive_bayes/gaussian_naive_bayes.py
+++ /dev/null
@@ -1,158 +0,0 @@
-from __future__ import annotations
-
-import math
-import statistics
-
-from dataclasses import dataclass, field
-
-
-@dataclass
-class GaussianNaiveBayes:
-    """
-    Gaussian naive bayes classification algorithm implementation.
-
-    Examples:
-        >>> label = [0, 0, 0, 0, 1, 1, 1, 1]
-        >>> dataset = [[6.00, 180, 12], [5.92, 190, 11], [5.58, 170, 12], [5.92, 165, 10], [5.00, 100, 6], [5.50, 150, 8], [5.42, 130, 7], [5.75, 150, 9]]
-        >>> clf = GaussianNaiveBayes().fit(dataset, label)
-        >>> clf.predict([6.00, 130, 8])
-        1
-
-    """
-
-    unbiased_variance: bool = True
-    """Use the unbiased variance estimation or not. Default is True."""
-    var_smoothing: float = 1e-9
-    """Portion of the largest variance of all features that is added to variances for calculation stability."""
-    labels_: list[int] = field(default_factory=list)
-    """The labels in training dataset"""
-    class_count_: int = 0
-    """The number of classes in training dataset"""
-    class_prior_: dict[int, float] = field(default_factory=dict)
-    """The prior probability of each class in training dataset"""
-    means_: dict[int, list[float]] = field(default_factory=dict)
-    """The means of each class in training dataset"""
-    variances_: dict[int, list[float]] = field(default_factory=dict)
-    """The variance of each class in training dataset"""
-    epsilon_: float = 0
-    """The absolute additive value to variances."""
-
-    def fit(self, dataset: list[list[float]], labels: list[int]) -> GaussianNaiveBayes:
-        """Fit the Gaussian Naive Bayes classifier.
-
-        Args:
-            dataset: Training data, where each row is a sample and each column is a feature.
-            labels: Target labels for training data.
-
-        Returns:
-            self: Returns the instance itself.
-        """
-        self.labels_ = sorted(set(labels))
-        self.class_count_ = len(set(labels))
-        self.class_prior_ = {label: 1 / self.class_count_ for label in self.labels_}
-        self.epsilon_ = self.var_smoothing * max(self._variance(col) for col in zip(*dataset))
-        self.means_, self.variances_ = self._get_classes_means_variances(dataset, labels)
-        return self
-
-    def predict(self, sample: list[float]) -> int:
-        """Predict the class label for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature values.
-
-        Returns:
-            int: Predicted class label.
-        """
-        label_posteriors = self.predict_proba(sample)
-        label = max(label_posteriors, key=lambda k: label_posteriors[k])
-        return label
-
-    def predict_proba(self, sample: list[float], normalization: bool = True) -> dict[int, float]:
-        """Predict class probabilities for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature values.
-            normalization: Whether to normalize the probabilities. Default is True.
-
-        Returns:
-            dict[int, float]: Dictionary mapping class labels to their predicted probabilities.
-        """
-        label_posteriors = self.predict_log_proba(sample, normalization)
-        return {label: math.exp(log_prob) for label, log_prob in label_posteriors.items()}
-
-    def predict_log_proba(self, sample: list[float], normalization: bool = True) -> dict[int, float]:
-        """Predict log probabilities for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature values.
-            normalization: Whether to normalize the log probabilities. Default is True.
-
-        Returns:
-            dict[int, float]: Dictionary mapping class labels to their predicted log probabilities.
-        """
-        label_likelihoods = self._log_likelihood(sample)
-        raw_label_posteriors: dict[int, float] = {}
-        for label, likelihood in label_likelihoods.items():
-            raw_label_posteriors[label] = likelihood + math.log(self.class_prior_[label])
-        if normalization is False:
-            return raw_label_posteriors
-        # ref: https://github.com/scikit-learn/scikit-learn/blob/2beed55847ee70d363bdbfe14ee4401438fba057/sklearn/naive_bayes.py#L97
-        max_log_prob = max(raw_label_posteriors.values())
-        logsumexp_prob = max_log_prob + math.log(
-            sum(math.exp(log_prob - max_log_prob) for log_prob in raw_label_posteriors.values())
-        )
-        label_posteriors = {
-            label: raw_posterior - logsumexp_prob for label, raw_posterior in raw_label_posteriors.items()
-        }
-        return label_posteriors
-
-    def _log_likelihood(self, sample: list[float]) -> dict[int, float]:
-        """
-        Calculate the likelihood of each sample in each class
-        """
-        label_likelihoods: dict[int, float] = {}
-        for label in self.labels_:
-            label_means = self.means_[label]
-            label_vars = self.variances_[label]
-            log_likelihood = 0.0
-            for i, xi in enumerate(sample):
-                # calculate the log-likelihood
-                log_likelihood += -0.5 * math.log(2 * math.pi * label_vars[i]) - (
-                    (xi - label_means[i]) ** 2 / (2 * label_vars[i])
-                )
-            label_likelihoods[label] = log_likelihood
-        return label_likelihoods
-
-    def _get_classes_means_variances(
-        self,
-        dataset: list[list[float]],
-        labels: list[int],
-    ) -> tuple[dict[int, list[float]], dict[int, list[float]]]:
-        means, variances = {}, {}
-        for label in self.labels_:
-            label_samples = [sample for (sample, sample_label) in zip(dataset, labels) if sample_label == label]
-            means[label] = self._dataset_column_means(label_samples)
-            variances[label] = self._dataset_column_variances(label_samples)
-        return means, variances
-
-    @staticmethod
-    def _dataset_column_means(dataset: list[list[float]]) -> list[float]:
-        """
-        Calculate vectors mean
-        """
-        return [statistics.mean(column) for column in zip(*dataset, strict=True)]
-
-    def _dataset_column_variances(self, dataset: list[list[float]]) -> list[float]:
-        """
-        Calculate vectors(every column) standard variance
-        """
-        return [self._variance(column) + self.epsilon_ for column in zip(*dataset, strict=True)]
-
-    def _variance(self, xs: list[float] | tuple[float, ...]) -> float:
-        mean = statistics.mean(xs)
-        ss = sum((x - mean) ** 2 for x in xs)
-        if self.unbiased_variance is True:
-            variance = ss / (len(xs) - 1)
-        else:
-            variance = ss / len(xs)
-        return variance
diff --git a/toyml/classification/naive_bayes/multinomial_naive_bayes.py b/toyml/classification/naive_bayes/multinomial_naive_bayes.py
deleted file mode 100644
index 74ff70d..0000000
--- a/toyml/classification/naive_bayes/multinomial_naive_bayes.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from __future__ import annotations
-
-import math
-
-from collections import Counter
-from dataclasses import dataclass, field
-
-
-@dataclass
-class MultinomialNaiveBayes:
-    """
-    Multinomial Naive Bayes classifier.
-
-    Examples:
-        >>> import random
-        >>> rng = random.Random(0)
-        >>> dataset = [[rng.randint(0, 5) for _ in range(100)] for _ in range(6)]
-        >>> label = [1, 2, 3, 4, 5, 6]
-        >>> clf = MultinomialNaiveBayes().fit(dataset, label)
-        >>> clf.predict(dataset[2])
-        3
-
-    """
-
-    alpha: float = 1.0
-    """Additive (Laplace/Lidstone) smoothing parameter"""
-    labels_: list[int] = field(default_factory=list)
-    """The labels in training dataset"""
-    class_count_: int = 0
-    """The number of classes in training dataset"""
-    class_prior_: dict[int, float] = field(default_factory=dict)
-    """The prior probability of each class in training dataset"""
-    class_feature_count_: dict[int, list[int]] = field(default_factory=dict)
-    """The feature value counts of each class in training dataset"""
-    class_feature_log_prob_: dict[int, list[float]] = field(default_factory=dict)
-    """The feature value probability of each class in training dataset"""
-
-    def fit(self, dataset: list[list[int]], labels: list[int]) -> MultinomialNaiveBayes:
-        """Fit the Multinomial Naive Bayes classifier.
-
-        Args:
-            dataset: Training data, where each row is a sample and each column is a feature.
-                    Features should be represented as counts (non-negative integers).
-            labels: Target labels for training data.
-
-        Returns:
-            self: Returns the instance itself.
-        """
-        self.labels_ = sorted(set(labels))
-        self.class_count_ = len(set(labels))
-        # get the prior from training dataset labels
-        self.class_prior_ = {label: count / len(dataset) for label, count in Counter(labels).items()}
-        self.class_feature_count_, self.class_feature_log_prob_ = self._get_classes_feature_count_prob(dataset, labels)
-        return self
-
-    def predict(self, sample: list[int]) -> int:
-        """Predict the class label for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature counts.
-
-        Returns:
-            int: Predicted class label.
-        """
-        label_posteriors = self.predict_log_proba(sample)
-        label = max(label_posteriors, key=lambda k: label_posteriors[k])
-        return label
-
-    def predict_proba(self, sample: list[int]) -> dict[int, float]:
-        """Predict class probabilities for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature counts.
-
-        Returns:
-            dict[int, float]: Dictionary mapping class labels to their predicted probabilities.
-        """
-        label_posteriors = self.predict_log_proba(sample)
-        return {label: math.exp(log_prob) for label, log_prob in label_posteriors.items()}
-
-    def predict_log_proba(self, sample: list[int]) -> dict[int, float]:
-        """Predict log probabilities for a given sample.
-
-        Args:
-            sample: A single sample to predict, represented as a list of feature counts.
-
-        Returns:
-            dict[int, float]: Dictionary mapping class labels to their predicted log probabilities.
-        """
-        label_likelihoods = self._likelihood(sample)
-        raw_label_posteriors: dict[int, float] = {}
-        for label, likelihood in label_likelihoods.items():
-            raw_label_posteriors[label] = likelihood + math.log(self.class_prior_[label])
-        # ref: https://github.com/scikit-learn/scikit-learn/blob/2beed55847ee70d363bdbfe14ee4401438fba057/sklearn/naive_bayes.py#L97
-        max_log_prob = max(raw_label_posteriors.values())
-        logsumexp_prob = max_log_prob + math.log(
-            sum(math.exp(log_prob - max_log_prob) for log_prob in raw_label_posteriors.values())
-        )
-        label_posteriors = {
-            label: raw_posterior - logsumexp_prob for label, raw_posterior in raw_label_posteriors.items()
-        }
-        return label_posteriors
-
-    def _likelihood(self, sample: list[int]) -> dict[int, float]:
-        """
-        Calculate the likelihood of each sample in each class
-        """
-        label_likelihoods: dict[int, float] = {}
-        for label in self.labels_:
-            likelihood = 0.0
-            for i, xi in enumerate(sample):
-                # calculate the log-likelihood
-                likelihood += xi * self.class_feature_log_prob_[label][i]
-            label_likelihoods[label] = likelihood
-        return label_likelihoods
-
-    def _get_classes_feature_count_prob(
-        self,
-        dataset: list[list[int]],
-        labels: list[int],
-    ) -> tuple[dict[int, list[int]], dict[int, list[float]]]:
-        feature_count, feature_prob = {}, {}
-        for label in self.labels_:
-            label_samples = [sample for (sample, sample_label) in zip(dataset, labels) if sample_label == label]
-            counts = self._dataset_feature_counts(label_samples)
-            feature_count[label] = counts
-            feature_prob[label] = [math.log(value_count / sum(counts)) for value_count in counts]
-
-        return feature_count, feature_prob
-
-    def _dataset_feature_counts(self, dataset: list[list[int]]) -> list[int]:
-        """
-        Calculate feature value counts
-        """
-        return [sum(column) + self.alpha for column in zip(*dataset, strict=True)]