Merge pull request #694 from markotoplak/pls-multitarget

[ENH] PLS supports multiple targets
Quasars · Oct 20, 2023 · 9909379 · 9909379
2 parents 930ac80 + 91846f6
commit 9909379
Show file tree

Hide file tree

Showing 3 changed files with 95 additions and 36 deletions.
diff --git a/orangecontrib/spectroscopy/models/pls.py b/orangecontrib/spectroscopy/models/pls.py
@@ -1,4 +1,6 @@
 import numpy as np
+import pkg_resources
+import sklearn
 import sklearn.cross_decomposition as skl_pls
 
 from Orange.data import Table, Domain, Variable, \
@@ -59,11 +61,17 @@ class PLSModel(SklModel):
 
     @property
     def coefficients(self):
-        return self.skl_model.coef_
+        coef = self.skl_model.coef_
+        # 1.3 has transposed coef_
+        if pkg_resources.parse_version(sklearn.__version__) < pkg_resources.parse_version("1.3.0"):
+            coef = coef.T
+        return coef
 
     def predict(self, X):
         vals = self.skl_model.predict(X)
-        return vals.ravel()
+        if len(self.domain.class_vars) == 1:
+            vals = vals.ravel()
+        return vals
 
     def __str__(self):
         return 'PLSModel {}'.format(self.skl_model)
@@ -117,16 +125,24 @@ def components(self):
         components.name = 'components'
         return components
 
+    def coefficients_table(self):
+        coeffs = self.coefficients.T
+        domain = Domain(
+            [ContinuousVariable(f"coef {i}") for i in range(coeffs.shape[1])],
+            metas=[StringVariable("name")]
+        )
+        waves = [[attr.name] for attr in self.domain.attributes]
+        coef_table = Table.from_numpy(domain, X=coeffs, metas=waves)
+        coef_table.name = "coefficients"
+        return coef_table
+
 
 class PLSRegressionLearner(SklLearner, _FeatureScorerMixin):
     __wraps__ = skl_pls.PLSRegression
     __returns__ = PLSModel
-
+    supports_multiclass = True
     preprocessors = SklLearner.preprocessors
 
-    # this learner enforces a single class because multitarget is not
-    # explicitly allowed
-
     def fit(self, X, Y, W=None):
         params = self.params.copy()
         params["n_components"] = min(X.shape[1] - 1,
@@ -140,6 +156,15 @@ def __init__(self, n_components=2, scale=True,
         super().__init__(preprocessors=preprocessors)
         self.params = vars()
 
+    def incompatibility_reason(self, domain):
+        reason = None
+        if not domain.class_vars:
+            reason = "Numeric targets expected."
+        else:
+            for cv in domain.class_vars:
+                if not cv.is_continuous:
+                    reason = "Only numeric target variables expected."
+        return reason
 
 if __name__ == '__main__':
     import Orange

diff --git a/orangecontrib/spectroscopy/tests/test_owpls.py b/orangecontrib/spectroscopy/tests/test_owpls.py
@@ -2,6 +2,8 @@
 
 import numpy as np
 
+import pkg_resources
+import sklearn
 from sklearn.cross_decomposition import PLSRegression
 
 from Orange.data import Table, Domain, ContinuousVariable
@@ -21,28 +23,42 @@ def table(rows, attr, vars):
     return Table.from_numpy(domain, X=X, Y=Y)
 
 
+def coefficients(sklmodel):
+    coef = sklmodel.coef_
+    # 1.3 has transposed coef_
+    if pkg_resources.parse_version(sklearn.__version__) < pkg_resources.parse_version("1.3.0"):
+        coef = coef.T
+    return coef
+
+
 class TestPLS(TestCase):
 
     def test_allow_y_dim(self):
         """ The current PLS version allows only a single Y dimension. """
-        d = table(10, 5, 1)
         learner = PLSRegressionLearner(n_components=2)
-        learner(d)
-        for n_class_vars in [0, 2]:
+        d = table(10, 5, 0)
+        with self.assertRaises(ValueError):
+            learner(d)
+        for n_class_vars in [1, 2, 3]:
             d = table(10, 5, n_class_vars)
-            with self.assertRaises(ValueError):
-                learner(d)
+            learner(d)  # no exception
 
     def test_compare_to_sklearn(self):
         d = table(10, 5, 1)
-        with d.unlocked():
-            d.X = np.random.RandomState(0).rand(*d.X.shape)
-            d.Y = np.random.RandomState(0).rand(*d.Y.shape)
         orange_model = PLSRegressionLearner()(d)
         scikit_model = PLSRegression().fit(d.X, d.Y)
         np.testing.assert_almost_equal(scikit_model.predict(d.X).ravel(),
                                        orange_model(d))
-        np.testing.assert_almost_equal(scikit_model.coef_,
+        np.testing.assert_almost_equal(coefficients(scikit_model),
+                                       orange_model.coefficients)
+
+    def test_compare_to_sklearn_multid(self):
+        d = table(10, 5, 3)
+        orange_model = PLSRegressionLearner()(d)
+        scikit_model = PLSRegression().fit(d.X, d.Y)
+        np.testing.assert_almost_equal(scikit_model.predict(d.X),
+                                       orange_model(d))
+        np.testing.assert_almost_equal(coefficients(scikit_model),
                                        orange_model.coefficients)
 
     def test_too_many_components(self):
@@ -60,21 +76,30 @@ def test_too_many_components(self):
         self.assertEqual(model.skl_model.n_components, 4)
 
     def test_scores(self):
-        d = table(10, 5, 1)
-        orange_model = PLSRegressionLearner()(d)
-        scikit_model = PLSRegression().fit(d.X, d.Y)
-        scores = orange_model.project(d)
-        sx, sy = scikit_model.transform(d.X, d.Y)
-        np.testing.assert_almost_equal(sx, scores.X)
-        np.testing.assert_almost_equal(sy, scores.metas)
+        for d in [table(10, 5, 1), table(10, 5, 3)]:
+            orange_model = PLSRegressionLearner()(d)
+            scikit_model = PLSRegression().fit(d.X, d.Y)
+            scores = orange_model.project(d)
+            sx, sy = scikit_model.transform(d.X, d.Y)
+            np.testing.assert_almost_equal(sx, scores.X)
+            np.testing.assert_almost_equal(sy, scores.metas)
 
     def test_components(self):
-        d = table(10, 5, 1)
-        orange_model = PLSRegressionLearner()(d)
-        scikit_model = PLSRegression().fit(d.X, d.Y)
-        components = orange_model.components()
-        np.testing.assert_almost_equal(scikit_model.x_loadings_, components.X.T)
-        np.testing.assert_almost_equal(scikit_model.y_loadings_, components.Y.reshape(1, -1))
+        def t2d(m):
+            return m.reshape(-1, 1) if len(m.shape) == 1 else m
+        for d in [table(10, 5, 1), table(10, 5, 3)]:
+            orange_model = PLSRegressionLearner()(d)
+            scikit_model = PLSRegression().fit(d.X, d.Y)
+            components = orange_model.components()
+            np.testing.assert_almost_equal(scikit_model.x_loadings_, components.X.T)
+            np.testing.assert_almost_equal(scikit_model.y_loadings_, t2d(components.Y).T)
+
+    def test_coefficients(self):
+        for d in [table(10, 5, 1), table(10, 5, 3)]:
+            orange_model = PLSRegressionLearner()(d)
+            scikit_model = PLSRegression().fit(d.X, d.Y)
+            coef_table = orange_model.coefficients_table()
+            np.testing.assert_almost_equal(coefficients(scikit_model).T, coef_table.X)
 
 
 class TestOWPLS(WidgetTest, WidgetLearnerTestMixin):

diff --git a/orangecontrib/spectroscopy/widgets/owpls.py b/orangecontrib/spectroscopy/widgets/owpls.py
@@ -53,13 +53,7 @@ def update_model(self):
         projection = None
         components = None
         if self.model is not None:
-            domain = Domain(
-                [ContinuousVariable("coef")], metas=[StringVariable("name")])
-            coefs = self.model.coefficients
-            coefs = coefs.reshape(-1, 1)
-            waves = [[attr.name] for attr in self.model.domain.attributes]
-            coef_table = Table.from_numpy(domain, X=coefs, metas=waves)
-            coef_table.name = "coefficients"
+            coef_table = self.model.coefficients_table()
             projection = self.model.project(self.data)
             components = self.model.components()
         self.Outputs.coefsdata.send(coef_table)
@@ -68,8 +62,23 @@ def update_model(self):
 
     @OWBaseLearner.Inputs.data
     def set_data(self, data):
+        # reimplemented completely because the base learner does not
+        # allow multiclass
+
         self.Warning.sparse_data.clear()
-        super().set_data(data)
+
+        self.Error.data_error.clear()
+        self.data = data
+
+        if data is not None and data.domain.class_var is None and not data.domain.class_vars:
+            self.Error.data_error(
+                "Data has no target variable.\n"
+                "Select one with the Select Columns widget.")
+            self.data = None
+
+        # invalidate the model so that handleNewSignals will update it
+        self.model = None
+
         if self.data and sp.issparse(self.data.X):
             self.Warning.sparse_data()