add tests

biolab · Aug 25, 2023 · 58a679d · 58a679d
1 parent 8eaab15
commit 58a679d
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 104 deletions.
diff --git a/Orange/base.py b/Orange/base.py
@@ -444,7 +444,7 @@ def fix_dim(x):
         # Call the predictor
         backmappers = None
         n_values = []
-        if isinstance(data, (np.ndarray, scipy.sparse.csr_matrix)):
+        if isinstance(data, (np.ndarray, scipy.sparse.csr_matrix, da.Array)):
             prediction = self.predict(data)
         elif isinstance(data, Table):
             backmappers, n_values = self.get_backmappers(data)

diff --git a/Orange/tests/test_naive_bayes.py b/Orange/tests/test_naive_bayes.py
@@ -10,13 +10,12 @@
 from Orange.classification import NaiveBayesLearner
 from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
 from Orange.evaluation import CrossValidation, CA
+from Orange.tests import test_filename
+from Orange.tests.test_dasktable import with_dasktable, temp_dasktable
 
 
 # This class is used to force predict_storage to fall back to the slower
 # procedure instead of calling `predict`
-from Orange.tests import test_filename
-
-
 class NotATable(Table):  # pylint: disable=too-many-ancestors,abstract-method
     @classmethod
     def from_file(cls, *args, **kwargs):
@@ -34,40 +33,45 @@ def setUpClass(cls):
     def setUp(self):
         self.model = self.learner(self.data)
 
-    def test_NaiveBayes(self):
+    @with_dasktable
+    def test_NaiveBayes(self, prep_table):
+        tables = [prep_table(tab) for tab in (self.table, Table("iris"))]
         cv = CrossValidation(k=10)
-        results = cv(self.table, [self.learner])
+        results = cv(tables[0], [self.learner])
         ca = CA(results)
         self.assertGreater(ca, 0.7)
         self.assertLess(ca, 0.9)
 
         cv = CrossValidation(k=10)
-        results = cv(Table("iris"), [self.learner])
+        results = cv(tables[1], [self.learner])
         ca = CA(results)
         self.assertGreater(ca, 0.7)
 
-    def test_degenerate(self):
+    @with_dasktable
+    def test_degenerate(self, prep_table):
         d = Domain((ContinuousVariable(name="A"),
                     ContinuousVariable(name="B"),
                     ContinuousVariable(name="C")),
                    DiscreteVariable(name="CLASS", values=("M", "F")))
-        t = Table.from_list(d, [[0, 1, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1]])
+        t = prep_table(Table.from_list(d, [[0, 1, 0, 0], [0, 1, 0, 1], [0, 1, 0, 1]]))
         nb = NaiveBayesLearner()
         model = nb(t)
         self.assertEqual(model.domain.attributes, ())
         self.assertEqual(model(t[0]), 1)
         self.assertTrue(all(model(t) == 1))
 
-    def test_allnan_cv(self):
+    @with_dasktable
+    def test_allnan_cv(self, prep_table):
         # GH 2740
-        data = Table(test_filename('datasets/lenses.tab'))
+        data = prep_table(Table(test_filename('datasets/lenses.tab')))
         cv = CrossValidation(stratified=False)
         results = cv(data, [self.learner])
         self.assertFalse(any(results.failed))
 
-    def test_prediction_routing(self):
-        data = self.data
-        predict = self.model.predict = Mock(return_value=(data.Y, None))
+    @with_dasktable
+    def test_prediction_routing(self, prep_table):
+        data = prep_table(self.data)
+        predict = self.model.predict = Mock(return_value=(np.asarray(data.Y), None))
 
         self.model(data)
         predict.assert_called()
@@ -84,12 +88,14 @@ def test_prediction_routing(self):
         self.model.predict_storage(data[0])
         predict.assert_called()
 
-    def test_compare_results_of_predict_and_predict_storage(self):
+    @with_dasktable
+    def test_compare_results_of_predict_and_predict_storage(self, prep_table):
+        data1 = prep_table(self.data)
         data2 = NotATable("titanic")
 
-        self.model = self.learner(self.data[:50])
+        self.model = self.learner(data1[:50])
         predict = self.model.predict = Mock(side_effect=self.model.predict)
-        values, probs = self.model.predict_storage(self.data[50:])
+        values, probs = self.model.predict_storage(data1[50:])
         predict.assert_called()
         predict.reset_mock()
         values2, probs2 = self.model.predict_storage(data2[50:])
@@ -99,18 +105,18 @@ def test_compare_results_of_predict_and_predict_storage(self):
         np.testing.assert_equal(probs, probs2)
 
     def test_predictions(self):
-        self._test_predictions(sparse=None)
-        self._test_predictions_with_absent_class(sparse=None)
+        self._test_predictions(self._data, sparse=None)
+        self._test_predictions(self._data_with_absent_class, sparse=None)
 
     def test_predictions_csr_matrix(self):
-        self._test_predictions(sparse=sp.csr_matrix)
-        self._test_predictions_with_absent_class(sparse=sp.csr_matrix)
+        self._test_predictions(self._data, sparse=sp.csr_matrix)
+        self._test_predictions(self._data_with_absent_class, sparse=sp.csr_matrix)
 
     def test_predictions_csc_matrix(self):
-        self._test_predictions(sparse=sp.csc_matrix)
-        self._test_predictions_with_absent_class(sparse=sp.csc_matrix)
+        self._test_predictions(self._data, sparse=sp.csc_matrix)
+        self._test_predictions(self._data_with_absent_class, sparse=sp.csc_matrix)
 
-    def _test_predictions(self, sparse):
+    def _data(self, sparse):
         x = np.array([
             [1, 0, 0],
             [0, np.nan, 0],
@@ -131,20 +137,12 @@ def _test_predictions(self, sparse):
             DiscreteVariable("y", values="abc"))
         data = Table.from_numpy(domain, x, y)
 
-        model = self.learner(data)
-        np.testing.assert_almost_equal(
-            model.class_prob,
-            [4/11, 4/11, 3/11]
-        )
-        np.testing.assert_almost_equal(
-            np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
-            [[3/7, 2/7], [2/7, 3/7], [2/7, 2/7]])
-        np.testing.assert_almost_equal(
-            np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
-            [[2/5, 1/3, 1/5], [2/5, 1/3, 2/5], [1/5, 1/3, 2/5]])
-        np.testing.assert_almost_equal(
-            np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
-            [[4/11], [4/11], [3/11]])
+        results = [
+            [4 / 11, 4 / 11, 3 / 11],
+            [[3 / 7, 2 / 7], [2 / 7, 3 / 7], [2 / 7, 2 / 7]],
+            [[2 / 5, 1 / 3, 1 / 5], [2 / 5, 1 / 3, 2 / 5], [1 / 5, 1 / 3, 2 / 5]],
+            [[4 / 11], [4 / 11], [3 / 11]]
+        ]
 
         test_x = np.array([[a, b, 0] for a in [0, 1] for b in [0, 1, 2]])
         # Classifiers reject csc matrices in the base class
@@ -162,55 +160,9 @@ def _test_predictions(self, sparse):
             [0.26086956521739, 0.39130434782609, 0.34782608695652],
             [0.15000000000000, 0.45000000000000, 0.40000000000000]
         ])
+        return data, domain, results, test_x, test_y, exp_probs
 
-        # Test the faster algorithm for Table (numpy matrices)
-        test_data = Table.from_numpy(domain, test_x, test_y)
-        probs = model(test_data, ret=model.Probs)
-        np.testing.assert_almost_equal(exp_probs, probs)
-        values = model(test_data)
-        np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
-        values, probs = model(test_data, ret=model.ValueProbs)
-        np.testing.assert_almost_equal(exp_probs, probs)
-        np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
-
-        # Test the slower algorithm for non-Table data (iteration in Python)
-        test_data = NotATable.from_numpy(domain, test_x, test_y)
-        probs = model(test_data, ret=model.Probs)
-        np.testing.assert_almost_equal(exp_probs, probs)
-        values = model(test_data)
-        np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
-        values, probs = model(test_data, ret=model.ValueProbs)
-        np.testing.assert_almost_equal(exp_probs, probs)
-        np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
-
-        # Test prediction directly on numpy
-        probs = model(test_x, ret=model.Probs)
-        np.testing.assert_almost_equal(exp_probs, probs)
-        values = model(test_x)
-        np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
-        values, probs = model(test_x, ret=model.ValueProbs)
-        np.testing.assert_almost_equal(exp_probs, probs)
-        np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
-
-        # Test prediction on instances
-        for inst, exp_prob in zip(test_data, exp_probs):
-            np.testing.assert_almost_equal(
-                model(inst, ret=model.Probs),
-                exp_prob)
-            self.assertEqual(model(inst), np.argmax(exp_prob))
-            value, prob = model(inst, ret=model.ValueProbs)
-            np.testing.assert_almost_equal(prob, exp_prob)
-            self.assertEqual(value, np.argmax(exp_prob))
-
-        # Test prediction by directly calling predict. This is needed to test
-        # csc_matrix, but doesn't hurt others
-        if sparse is sp.csc_matrix:
-            test_x = sparse(test_x)
-        values, probs = model.predict(test_x)
-        np.testing.assert_almost_equal(exp_probs, probs)
-        np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
-
-    def _test_predictions_with_absent_class(self, sparse):
+    def _data_with_absent_class(self, sparse):
         """Empty classes should not affect predictions"""
         x = np.array([
             [1, 0, 0],
@@ -232,20 +184,12 @@ def _test_predictions_with_absent_class(self, sparse):
             DiscreteVariable("y", values="abcd"))
         data = Table.from_numpy(domain, x, y)
 
-        model = self.learner(data)
-        np.testing.assert_almost_equal(
-            model.class_prob,
-            [4/11, 0, 4/11, 3/11]
-        )
-        np.testing.assert_almost_equal(
-            np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
-            [[3/7, 2/7], [0, 0], [2/7, 3/7], [2/7, 2/7]])
-        np.testing.assert_almost_equal(
-            np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
-            [[2/5, 1/3, 1/5], [0, 0, 0], [2/5, 1/3, 2/5], [1/5, 1/3, 2/5]])
-        np.testing.assert_almost_equal(
-            np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
-            [[4/11], [0], [4/11], [3/11]])
+        results = [
+            [4/11, 0, 4/11, 3/11],
+            [[3/7, 2/7], [0, 0], [2/7, 3/7], [2/7, 2/7]],
+            [[2/5, 1/3, 1/5], [0, 0, 0], [2/5, 1/3, 2/5], [1/5, 1/3, 2/5]],
+            [[4/11], [0], [4/11], [3/11]]
+        ]
 
         test_x = np.array([[a, b, 0] for a in [0, 1] for b in [0, 1, 2]])
         # Classifiers reject csc matrices in the base class
@@ -263,6 +207,24 @@ def _test_predictions_with_absent_class(self, sparse):
             [0.26086956521739, 0, 0.39130434782609, 0.34782608695652],
             [0.15000000000000, 0, 0.45000000000000, 0.40000000000000]
         ])
+        return data, domain, results, test_x, test_y, exp_probs
+
+    def _test_predictions(self, make_data, sparse):
+        data, domain, results, test_x, test_y, exp_probs = make_data(sparse)
+
+        model = self.learner(data)
+        np.testing.assert_almost_equal(
+            model.class_prob,
+            results[0])
+        np.testing.assert_almost_equal(
+            np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
+            results[1])
+        np.testing.assert_almost_equal(
+            np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
+            results[2])
+        np.testing.assert_almost_equal(
+            np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
+            results[3])
 
         # Test the faster algorithm for Table (numpy matrices)
         test_data = Table.from_numpy(domain, test_x, test_y)
@@ -311,22 +273,50 @@ def _test_predictions_with_absent_class(self, sparse):
         np.testing.assert_almost_equal(exp_probs, probs)
         np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
 
-    def test_no_attributes(self):
+    def test_predictions_dask(self):
+        for make_data in (self._data, self._data_with_absent_class):
+            data, domain, results, test_x, test_y, exp_probs = make_data(None)
+            data = temp_dasktable(data)
+            model = self.learner(data)
+            np.testing.assert_almost_equal(
+                model.class_prob,
+                results[0])
+            np.testing.assert_almost_equal(
+                np.exp(model.log_cont_prob[0]) * model.class_prob[:, None],
+                results[1])
+            np.testing.assert_almost_equal(
+                np.exp(model.log_cont_prob[1]) * model.class_prob[:, None],
+                results[2])
+            np.testing.assert_almost_equal(
+                np.exp(model.log_cont_prob[2]) * model.class_prob[:, None],
+                results[3])
+            test_data = temp_dasktable(Table.from_numpy(domain, test_x, test_y))
+            probs = model(test_data, ret=model.Probs)
+            np.testing.assert_almost_equal(exp_probs, probs)
+            values = model(test_data)
+            np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
+            values, probs = model(test_data, ret=model.ValueProbs)
+            np.testing.assert_almost_equal(exp_probs, probs)
+            np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))
+
+    @with_dasktable
+    def test_no_attributes(self, prep_table):
         y = np.array([0, 0, 0, 1, 1, 1, 2, 2])
         domain = Domain([], DiscreteVariable("y", values="abc"))
-        data = Table.from_numpy(domain, np.zeros((len(y), 0)), y.T)
+        data = prep_table(Table.from_numpy(domain, np.zeros((len(y), 0)), y.T))
         model = self.learner(data)
         np.testing.assert_almost_equal(
             model.predict_storage(np.zeros((5, 0)))[1],
             [[4/11, 4/11, 3/11]] * 5
         )
 
-    def test_no_targets(self):
+    @with_dasktable
+    def test_no_targets(self, prep_table):
         x = np.array([[0], [1], [2]])
         y = np.full(3, np.nan)
         domain = Domain([DiscreteVariable("x", values="abc")],
                         DiscreteVariable("y", values="abc"))
-        data = Table.from_numpy(domain, x, y)
+        data = prep_table(Table.from_numpy(domain, x, y))
         self.assertRaises(ValueError, self.learner, data)