From a32c0acaf4b8166585fb8cafbe37b679271fb6bd Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Thu, 8 Aug 2024 11:28:42 +0200 Subject: [PATCH 1/2] PLS: Adjust coeffs to newest sklearn --- Orange/widgets/model/owpls.py | 10 ++---- Orange/widgets/model/tests/test_owpls.py | 41 ++++++++++++++++-------- requirements-core.txt | 2 +- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/Orange/widgets/model/owpls.py b/Orange/widgets/model/owpls.py index f3f548966c..e648de7853 100644 --- a/Orange/widgets/model/owpls.py +++ b/Orange/widgets/model/owpls.py @@ -76,7 +76,6 @@ def _create_output_coeffs_loadings(self) -> Table: n_components = x_rotations.shape[1] names = [f"coef ({v.name})" for v in self.model.domain.class_vars] - names += [f"coef/X_sd ({v.name})" for v in self.model.domain.class_vars] names += [f"w*c {i + 1}" for i in range(n_components)] domain = Domain( [ContinuousVariable(n) for n in names], @@ -85,18 +84,13 @@ def _create_output_coeffs_loadings(self) -> Table: ) data = self.model.data_to_model_domain(self.data) - x_std = np.std(data.X, axis=0) - coeffs_x_std = coefficients.T / x_std X_features = np.hstack((coefficients, - coeffs_x_std.T, x_rotations)) X_targets = np.hstack((np.full((n_targets, n_targets), np.nan), - np.full((n_targets, n_targets), np.nan), y_loadings)) - coeffs = coeffs_x_std * np.mean(data.X, axis=0) - X_intercepts = np.hstack((intercept, - intercept - coeffs.sum(), + coeffs = coefficients.T * np.mean(data.X, axis=0) + X_intercepts = np.hstack((intercept - coeffs.sum(), np.full((1, n_components), np.nan))) X = np.vstack((X_features, X_targets, X_intercepts)) diff --git a/Orange/widgets/model/tests/test_owpls.py b/Orange/widgets/model/tests/test_owpls.py index c0055a90e6..e87e681733 100644 --- a/Orange/widgets/model/tests/test_owpls.py +++ b/Orange/widgets/model/tests/test_owpls.py @@ -1,5 +1,6 @@ import unittest import numpy as np +from sklearn.cross_decomposition import PLSRegression from Orange.data import Table, Domain, StringVariable from Orange.widgets.model.owpls import OWPLS @@ -30,15 +31,28 @@ def setUp(self): ParameterMapping('n_components', self.widget.controls.n_components) ] + def test_coeffs_compare_sklearn(self): + self.send_signal(self.widget.Inputs.data, self._data) + coefsdata = self.get_output(self.widget.Outputs.coefsdata) + intercept = coefsdata.X[-1, 0] + coeffs = coefsdata.X[:-2, 0] + Y_orange = self._data.X @ coeffs + intercept + + pls = PLSRegression(n_components=2) + pls.fit(self._data.X, self._data.Y) + Y_sklearn = pls.predict(self._data.X) + + np.testing.assert_almost_equal(Y_sklearn, Y_orange) + def test_output_coefsdata(self): self.send_signal(self.widget.Inputs.data, self._data) coefsdata = self.get_output(self.widget.Outputs.coefsdata) self.assertEqual(coefsdata.name, "Coefficients and Loadings") - self.assertEqual(coefsdata.X.shape, (15, 4)) + self.assertEqual(coefsdata.X.shape, (15, 3)) self.assertEqual(coefsdata.Y.shape, (15, 0)) self.assertEqual(coefsdata.metas.shape, (15, 2)) - self.assertEqual(["coef (MEDV)", "coef/X_sd (MEDV)", "w*c 1", "w*c 2"], + self.assertEqual(["coef (MEDV)", "w*c 1", "w*c 2"], [v.name for v in coefsdata.domain.attributes]) self.assertEqual(["Variable name", "Variable role"], [v.name for v in coefsdata.domain.metas]) @@ -47,21 +61,20 @@ def test_output_coefsdata(self): self.assertTrue((coefsdata.metas[:-2, 1] == 0).all()) self.assertTrue((coefsdata.metas[-2, 1] == 1)) self.assertTrue(np.isnan(coefsdata.metas[-1, 1])) - self.assertAlmostEqual(coefsdata.X[0, 2], 0.237, 3) - self.assertAlmostEqual(coefsdata.X[13, 2], -0.304, 3) - self.assertAlmostEqual(coefsdata.X[-1, 0], 22.5, 1) + self.assertAlmostEqual(coefsdata.X[0, 2], 0.012, 3) + self.assertAlmostEqual(coefsdata.X[13, 2], 0.389, 3) + self.assertAlmostEqual(coefsdata.X[-1, 0], 13.7, 1) self.assertTrue(np.isnan(coefsdata.X[-1, 2:]).all()) def test_output_coefsdata_multi_target(self): self.send_signal(self.widget.Inputs.data, self._data_multi_target) coefsdata = self.get_output(self.widget.Outputs.coefsdata) self.assertEqual(coefsdata.name, "Coefficients and Loadings") - self.assertEqual(coefsdata.X.shape, (15, 6)) + self.assertEqual(coefsdata.X.shape, (15, 4)) self.assertEqual(coefsdata.Y.shape, (15, 0)) self.assertEqual(coefsdata.metas.shape, (15, 2)) - attr_names = ["coef (MEDV)", "coef (CRIM)", "coef/X_sd (MEDV)", - "coef/X_sd (CRIM)", "w*c 1", "w*c 2"] + attr_names = ["coef (MEDV)", "coef (CRIM)", "w*c 1", "w*c 2"] self.assertEqual(attr_names, [v.name for v in coefsdata.domain.attributes]) self.assertEqual(["Variable name", "Variable role"], @@ -72,12 +85,12 @@ def test_output_coefsdata_multi_target(self): self.assertTrue((coefsdata.metas[:-3, 1] == 0).all()) self.assertTrue((coefsdata.metas[-2:-1, 1] == 1).all()) self.assertTrue(np.isnan(coefsdata.metas[-1, 1])) - self.assertAlmostEqual(coefsdata.X[0, 4], -0.198, 3) - self.assertAlmostEqual(coefsdata.X[12, 4], -0.288, 3) - self.assertAlmostEqual(coefsdata.X[13, 4], 0.243, 3) - self.assertAlmostEqual(coefsdata.X[-1, 0], 22.5, 1) - self.assertAlmostEqual(coefsdata.X[-1, 1], 3.6, 1) - self.assertTrue(np.isnan(coefsdata.X[-1, 4:]).all()) + self.assertAlmostEqual(coefsdata.X[0, 2], -0.198, 3) + self.assertAlmostEqual(coefsdata.X[12, 2], -0.288, 3) + self.assertAlmostEqual(coefsdata.X[13, 2], 0.243, 3) + self.assertAlmostEqual(coefsdata.X[-1, 0], 6.7, 1) + self.assertAlmostEqual(coefsdata.X[-1, 1], -12.2, 1) + self.assertTrue(np.isnan(coefsdata.X[-1, 2:]).all()) def test_output_data(self): self.send_signal(self.widget.Inputs.data, self._data) diff --git a/requirements-core.txt b/requirements-core.txt index b78f2934e5..260f63470b 100644 --- a/requirements-core.txt +++ b/requirements-core.txt @@ -18,7 +18,7 @@ pip>=19.3 python-louvain>=0.13 pyyaml requests -scikit-learn>=1.4.0 +scikit-learn>=1.5.1 scipy>=1.9 serverfiles # for Data Sets synchronization xgboost>=1.7.4,<2.1 From cc5a28119c47563789044ae44a51edaa7fe6b07b Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Mon, 19 Aug 2024 14:02:59 +0200 Subject: [PATCH 2/2] PLS: Add 'coef * X_sd' column to coeffs --- Orange/widgets/model/owpls.py | 12 ++++++++---- Orange/widgets/model/tests/test_owpls.py | 21 +++++++++++---------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/Orange/widgets/model/owpls.py b/Orange/widgets/model/owpls.py index e648de7853..14a20a3bfc 100644 --- a/Orange/widgets/model/owpls.py +++ b/Orange/widgets/model/owpls.py @@ -68,14 +68,15 @@ def update_model(self): def _create_output_coeffs_loadings(self) -> Table: intercept = self.model.intercept.T[None, :] - coefficients = self.model.coefficients.T + coefficients = self.model.coefficients _, y_loadings = self.model.loadings x_rotations, _ = self.model.rotations - n_features, n_targets = coefficients.shape + n_targets, n_features = coefficients.shape n_components = x_rotations.shape[1] names = [f"coef ({v.name})" for v in self.model.domain.class_vars] + names += [f"coef * X_sd ({v.name})" for v in self.model.domain.class_vars] names += [f"w*c {i + 1}" for i in range(n_components)] domain = Domain( [ContinuousVariable(n) for n in names], @@ -84,13 +85,16 @@ def _create_output_coeffs_loadings(self) -> Table: ) data = self.model.data_to_model_domain(self.data) - X_features = np.hstack((coefficients, + X_features = np.hstack((coefficients.T, + (coefficients * np.std(data.X, axis=0)).T, x_rotations)) X_targets = np.hstack((np.full((n_targets, n_targets), np.nan), + np.full((n_targets, n_targets), np.nan), y_loadings)) - coeffs = coefficients.T * np.mean(data.X, axis=0) + coeffs = coefficients * np.mean(data.X, axis=0) X_intercepts = np.hstack((intercept - coeffs.sum(), + intercept, np.full((1, n_components), np.nan))) X = np.vstack((X_features, X_targets, X_intercepts)) diff --git a/Orange/widgets/model/tests/test_owpls.py b/Orange/widgets/model/tests/test_owpls.py index e87e681733..c7dfa19dde 100644 --- a/Orange/widgets/model/tests/test_owpls.py +++ b/Orange/widgets/model/tests/test_owpls.py @@ -48,11 +48,11 @@ def test_output_coefsdata(self): self.send_signal(self.widget.Inputs.data, self._data) coefsdata = self.get_output(self.widget.Outputs.coefsdata) self.assertEqual(coefsdata.name, "Coefficients and Loadings") - self.assertEqual(coefsdata.X.shape, (15, 3)) + self.assertEqual(coefsdata.X.shape, (15, 4)) self.assertEqual(coefsdata.Y.shape, (15, 0)) self.assertEqual(coefsdata.metas.shape, (15, 2)) - self.assertEqual(["coef (MEDV)", "w*c 1", "w*c 2"], + self.assertEqual(["coef (MEDV)", "coef * X_sd (MEDV)", "w*c 1", "w*c 2"], [v.name for v in coefsdata.domain.attributes]) self.assertEqual(["Variable name", "Variable role"], [v.name for v in coefsdata.domain.metas]) @@ -61,8 +61,8 @@ def test_output_coefsdata(self): self.assertTrue((coefsdata.metas[:-2, 1] == 0).all()) self.assertTrue((coefsdata.metas[-2, 1] == 1)) self.assertTrue(np.isnan(coefsdata.metas[-1, 1])) - self.assertAlmostEqual(coefsdata.X[0, 2], 0.012, 3) - self.assertAlmostEqual(coefsdata.X[13, 2], 0.389, 3) + self.assertAlmostEqual(coefsdata.X[0, 3], 0.012, 3) + self.assertAlmostEqual(coefsdata.X[13, 3], 0.389, 3) self.assertAlmostEqual(coefsdata.X[-1, 0], 13.7, 1) self.assertTrue(np.isnan(coefsdata.X[-1, 2:]).all()) @@ -70,11 +70,12 @@ def test_output_coefsdata_multi_target(self): self.send_signal(self.widget.Inputs.data, self._data_multi_target) coefsdata = self.get_output(self.widget.Outputs.coefsdata) self.assertEqual(coefsdata.name, "Coefficients and Loadings") - self.assertEqual(coefsdata.X.shape, (15, 4)) + self.assertEqual(coefsdata.X.shape, (15, 6)) self.assertEqual(coefsdata.Y.shape, (15, 0)) self.assertEqual(coefsdata.metas.shape, (15, 2)) - attr_names = ["coef (MEDV)", "coef (CRIM)", "w*c 1", "w*c 2"] + attr_names = ["coef (MEDV)", "coef (CRIM)", "coef * X_sd (MEDV)", + "coef * X_sd (CRIM)", "w*c 1", "w*c 2"] self.assertEqual(attr_names, [v.name for v in coefsdata.domain.attributes]) self.assertEqual(["Variable name", "Variable role"], @@ -85,12 +86,12 @@ def test_output_coefsdata_multi_target(self): self.assertTrue((coefsdata.metas[:-3, 1] == 0).all()) self.assertTrue((coefsdata.metas[-2:-1, 1] == 1).all()) self.assertTrue(np.isnan(coefsdata.metas[-1, 1])) - self.assertAlmostEqual(coefsdata.X[0, 2], -0.198, 3) - self.assertAlmostEqual(coefsdata.X[12, 2], -0.288, 3) - self.assertAlmostEqual(coefsdata.X[13, 2], 0.243, 3) + self.assertAlmostEqual(coefsdata.X[0, 4], -0.198, 3) + self.assertAlmostEqual(coefsdata.X[12, 4], -0.288, 3) + self.assertAlmostEqual(coefsdata.X[13, 4], 0.243, 3) self.assertAlmostEqual(coefsdata.X[-1, 0], 6.7, 1) self.assertAlmostEqual(coefsdata.X[-1, 1], -12.2, 1) - self.assertTrue(np.isnan(coefsdata.X[-1, 2:]).all()) + self.assertTrue(np.isnan(coefsdata.X[-1, 4:]).all()) def test_output_data(self): self.send_signal(self.widget.Inputs.data, self._data)