Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
noahnovsak committed Aug 25, 2023
1 parent 8eaab15 commit 58a679d
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 104 deletions.
2 changes: 1 addition & 1 deletion Orange/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def fix_dim(x):
# Call the predictor
backmappers = None
n_values = []
if isinstance(data, (np.ndarray, scipy.sparse.csr_matrix)):
if isinstance(data, (np.ndarray, scipy.sparse.csr_matrix, da.Array)):
prediction = self.predict(data)
elif isinstance(data, Table):
backmappers, n_values = self.get_backmappers(data)
Expand Down
196 changes: 93 additions & 103 deletions Orange/tests/test_naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@
from Orange.classification import NaiveBayesLearner
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
from Orange.evaluation import CrossValidation, CA
from Orange.tests import test_filename
from Orange.tests.test_dasktable import with_dasktable, temp_dasktable


# This class is used to force predict_storage to fall back to the slower
# procedure instead of calling `predict`
from Orange.tests import test_filename


class NotATable(Table): # pylint: disable=too-many-ancestors,abstract-method
@classmethod
def from_file(cls, *args, **kwargs):
Expand All @@ -34,40 +33,45 @@ def setUpClass(cls):
def setUp(self):
self.model = self.learner(self.data)

def test_NaiveBayes(self):
@with_dasktable
def test_NaiveBayes(self, prep_table):
tables = [prep_table(tab) for tab in (self.table, Table("iris"))]
cv = CrossValidation(k=10)
results = cv(self.table, [self.learner])
results = cv(tables[0], [self.learner])
ca = CA(results)
self.assertGreater(ca, 0.7)
self.assertLess(ca, 0.9)

cv = CrossValidation(k=10)
results = cv(Table("iris"), [self.learner])
results = cv(tables[1], [self.learner])
ca = CA(results)
self.assertGreater(ca, 0.7)

def test_degenerate(self):
@with_dasktable
def test_degenerate(self, prep_table):
d = Domain((ContinuousVariable(name="A"),
ContinuousVariable(name="B"),
ContinuousVariable(name="C")),
DiscreteVariable(name="CLASS", values=("M", "F")))
t = Table.from_list(d, [[0, 1, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1]])
t = prep_table(Table.from_list(d, [[0, 1, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1]]))
nb = NaiveBayesLearner()
model = nb(t)
self.assertEqual(model.domain.attributes, ())
self.assertEqual(model(t[0]), 1)
self.assertTrue(all(model(t) == 1))

def test_allnan_cv(self):
@with_dasktable
def test_allnan_cv(self, prep_table):
# GH 2740
data = Table(test_filename('datasets/lenses.tab'))
data = prep_table(Table(test_filename('datasets/lenses.tab')))
cv = CrossValidation(stratified=False)
results = cv(data, [self.learner])
self.assertFalse(any(results.failed))

def test_prediction_routing(self):
data = self.data
predict = self.model.predict = Mock(return_value=(data.Y, None))
@with_dasktable
def test_prediction_routing(self, prep_table):
data = prep_table(self.data)
predict = self.model.predict = Mock(return_value=(np.asarray(data.Y), None))

self.model(data)
predict.assert_called()
Expand All @@ -84,12 +88,14 @@ def test_prediction_routing(self):
self.model.predict_storage(data[0])
predict.assert_called()

def test_compare_results_of_predict_and_predict_storage(self):
@with_dasktable
def test_compare_results_of_predict_and_predict_storage(self, prep_table):
data1 = prep_table(self.data)
data2 = NotATable("titanic")

self.model = self.learner(self.data[:50])
self.model = self.learner(data1[:50])
predict = self.model.predict = Mock(side_effect=self.model.predict)
values, probs = self.model.predict_storage(self.data[50:])
values, probs = self.model.predict_storage(data1[50:])
predict.assert_called()
predict.reset_mock()
values2, probs2 = self.model.predict_storage(data2[50:])
Expand All @@ -99,18 +105,18 @@ def test_compare_results_of_predict_and_predict_storage(self):
np.testing.assert_equal(probs, probs2)

def test_predictions(self):
self._test_predictions(sparse=None)
self._test_predictions_with_absent_class(sparse=None)
self._test_predictions(self._data, sparse=None)
self._test_predictions(self._data_with_absent_class, sparse=None)

def test_predictions_csr_matrix(self):
self._test_predictions(sparse=sp.csr_matrix)
self._test_predictions_with_absent_class(sparse=sp.csr_matrix)
self._test_predictions(self._data, sparse=sp.csr_matrix)
self._test_predictions(self._data_with_absent_class, sparse=sp.csr_matrix)

def test_predictions_csc_matrix(self):
self._test_predictions(sparse=sp.csc_matrix)
self._test_predictions_with_absent_class(sparse=sp.csc_matrix)
self._test_predictions(self._data, sparse=sp.csc_matrix)
self._test_predictions(self._data_with_absent_class, sparse=sp.csc_matrix)

def _test_predictions(self, sparse):
def _data(self, sparse):
x = np.array([
[1, 0, 0],
[0, np.nan, 0],
Expand All @@ -131,20 +137,12 @@ def _test_predictions(self, sparse):
DiscreteVariable("y", values="abc"))
data = Table.from_numpy(domain, x, y)

model = self.learner(data)
np.testing.assert_almost_equal(
model.class_prob,
[4/11, 4/11, 3/11]
)
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
[[3/7, 2/7], [2/7, 3/7], [2/7, 2/7]])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
[[2/5, 1/3, 1/5], [2/5, 1/3, 2/5], [1/5, 1/3, 2/5]])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
[[4/11], [4/11], [3/11]])
results = [
[4 / 11, 4 / 11, 3 / 11],
[[3 / 7, 2 / 7], [2 / 7, 3 / 7], [2 / 7, 2 / 7]],
[[2 / 5, 1 / 3, 1 / 5], [2 / 5, 1 / 3, 2 / 5], [1 / 5, 1 / 3, 2 / 5]],
[[4 / 11], [4 / 11], [3 / 11]]
]

test_x = np.array([[a, b, 0] for a in [0, 1] for b in [0, 1, 2]])
# Classifiers reject csc matrices in the base class
Expand All @@ -162,55 +160,9 @@ def _test_predictions(self, sparse):
[0.26086956521739, 0.39130434782609, 0.34782608695652],
[0.15000000000000, 0.45000000000000, 0.40000000000000]
])
return data, domain, results, test_x, test_y, exp_probs

# Test the faster algorithm for Table (numpy matrices)
test_data = Table.from_numpy(domain, test_x, test_y)
probs = model(test_data, ret=model.Probs)
np.testing.assert_almost_equal(exp_probs, probs)
values = model(test_data)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
values, probs = model(test_data, ret=model.ValueProbs)
np.testing.assert_almost_equal(exp_probs, probs)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))

# Test the slower algorithm for non-Table data (iteration in Python)
test_data = NotATable.from_numpy(domain, test_x, test_y)
probs = model(test_data, ret=model.Probs)
np.testing.assert_almost_equal(exp_probs, probs)
values = model(test_data)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
values, probs = model(test_data, ret=model.ValueProbs)
np.testing.assert_almost_equal(exp_probs, probs)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))

# Test prediction directly on numpy
probs = model(test_x, ret=model.Probs)
np.testing.assert_almost_equal(exp_probs, probs)
values = model(test_x)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
values, probs = model(test_x, ret=model.ValueProbs)
np.testing.assert_almost_equal(exp_probs, probs)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))

# Test prediction on instances
for inst, exp_prob in zip(test_data, exp_probs):
np.testing.assert_almost_equal(
model(inst, ret=model.Probs),
exp_prob)
self.assertEqual(model(inst), np.argmax(exp_prob))
value, prob = model(inst, ret=model.ValueProbs)
np.testing.assert_almost_equal(prob, exp_prob)
self.assertEqual(value, np.argmax(exp_prob))

# Test prediction by directly calling predict. This is needed to test
# csc_matrix, but doesn't hurt others
if sparse is sp.csc_matrix:
test_x = sparse(test_x)
values, probs = model.predict(test_x)
np.testing.assert_almost_equal(exp_probs, probs)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))

def _test_predictions_with_absent_class(self, sparse):
def _data_with_absent_class(self, sparse):
"""Empty classes should not affect predictions"""
x = np.array([
[1, 0, 0],
Expand All @@ -232,20 +184,12 @@ def _test_predictions_with_absent_class(self, sparse):
DiscreteVariable("y", values="abcd"))
data = Table.from_numpy(domain, x, y)

model = self.learner(data)
np.testing.assert_almost_equal(
model.class_prob,
[4/11, 0, 4/11, 3/11]
)
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
[[3/7, 2/7], [0, 0], [2/7, 3/7], [2/7, 2/7]])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
[[2/5, 1/3, 1/5], [0, 0, 0], [2/5, 1/3, 2/5], [1/5, 1/3, 2/5]])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
[[4/11], [0], [4/11], [3/11]])
results = [
[4/11, 0, 4/11, 3/11],
[[3/7, 2/7], [0, 0], [2/7, 3/7], [2/7, 2/7]],
[[2/5, 1/3, 1/5], [0, 0, 0], [2/5, 1/3, 2/5], [1/5, 1/3, 2/5]],
[[4/11], [0], [4/11], [3/11]]
]

test_x = np.array([[a, b, 0] for a in [0, 1] for b in [0, 1, 2]])
# Classifiers reject csc matrices in the base class
Expand All @@ -263,6 +207,24 @@ def _test_predictions_with_absent_class(self, sparse):
[0.26086956521739, 0, 0.39130434782609, 0.34782608695652],
[0.15000000000000, 0, 0.45000000000000, 0.40000000000000]
])
return data, domain, results, test_x, test_y, exp_probs

def _test_predictions(self, make_data, sparse):
data, domain, results, test_x, test_y, exp_probs = make_data(sparse)

model = self.learner(data)
np.testing.assert_almost_equal(
model.class_prob,
results[0])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
results[1])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
results[2])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
results[3])

# Test the faster algorithm for Table (numpy matrices)
test_data = Table.from_numpy(domain, test_x, test_y)
Expand Down Expand Up @@ -311,22 +273,50 @@ def _test_predictions_with_absent_class(self, sparse):
np.testing.assert_almost_equal(exp_probs, probs)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))

def test_no_attributes(self):
def test_predictions_dask(self):
for make_data in (self._data, self._data_with_absent_class):
data, domain, results, test_x, test_y, exp_probs = make_data(None)
data = temp_dasktable(data)
model = self.learner(data)
np.testing.assert_almost_equal(
model.class_prob,
results[0])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
results[1])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
results[2])
np.testing.assert_almost_equal(
np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
results[3])
test_data = temp_dasktable(Table.from_numpy(domain, test_x, test_y))
probs = model(test_data, ret=model.Probs)
np.testing.assert_almost_equal(exp_probs, probs)
values = model(test_data)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
values, probs = model(test_data, ret=model.ValueProbs)
np.testing.assert_almost_equal(exp_probs, probs)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))

@with_dasktable
def test_no_attributes(self, prep_table):
y = np.array([0, 0, 0, 1, 1, 1, 2, 2])
domain = Domain([], DiscreteVariable("y", values="abc"))
data = Table.from_numpy(domain, np.zeros((len(y), 0)), y.T)
data = prep_table(Table.from_numpy(domain, np.zeros((len(y), 0)), y.T))
model = self.learner(data)
np.testing.assert_almost_equal(
model.predict_storage(np.zeros((5, 0)))[1],
[[4/11, 4/11, 3/11]] * 5
)

def test_no_targets(self):
@with_dasktable
def test_no_targets(self, prep_table):
x = np.array([[0], [1], [2]])
y = np.full(3, np.nan)
domain = Domain([DiscreteVariable("x", values="abc")],
DiscreteVariable("y", values="abc"))
data = Table.from_numpy(domain, x, y)
data = prep_table(Table.from_numpy(domain, x, y))
self.assertRaises(ValueError, self.learner, data)


Expand Down

0 comments on commit 58a679d

Please sign in to comment.