Skip to content

Commit

Permalink
Merge pull request #6867 from VesnaT/pls_coeffs
Browse files Browse the repository at this point in the history
PLS: Adjust coeffs to newest sklearn
  • Loading branch information
lanzagar authored Oct 4, 2024
2 parents eed39ef + cc5a281 commit 08ac250
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 19 deletions.
18 changes: 8 additions & 10 deletions Orange/widgets/model/owpls.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ def update_model(self):

def _create_output_coeffs_loadings(self) -> Table:
intercept = self.model.intercept.T[None, :]
coefficients = self.model.coefficients.T
coefficients = self.model.coefficients
_, y_loadings = self.model.loadings
x_rotations, _ = self.model.rotations

n_features, n_targets = coefficients.shape
n_targets, n_features = coefficients.shape
n_components = x_rotations.shape[1]

names = [f"coef ({v.name})" for v in self.model.domain.class_vars]
names += [f"coef/X_sd ({v.name})" for v in self.model.domain.class_vars]
names += [f"coef * X_sd ({v.name})" for v in self.model.domain.class_vars]
names += [f"w*c {i + 1}" for i in range(n_components)]
domain = Domain(
[ContinuousVariable(n) for n in names],
Expand All @@ -85,18 +85,16 @@ def _create_output_coeffs_loadings(self) -> Table:
)

data = self.model.data_to_model_domain(self.data)
x_std = np.std(data.X, axis=0)
coeffs_x_std = coefficients.T / x_std
X_features = np.hstack((coefficients,
coeffs_x_std.T,
X_features = np.hstack((coefficients.T,
(coefficients * np.std(data.X, axis=0)).T,
x_rotations))
X_targets = np.hstack((np.full((n_targets, n_targets), np.nan),
np.full((n_targets, n_targets), np.nan),
y_loadings))

coeffs = coeffs_x_std * np.mean(data.X, axis=0)
X_intercepts = np.hstack((intercept,
intercept - coeffs.sum(),
coeffs = coefficients * np.mean(data.X, axis=0)
X_intercepts = np.hstack((intercept - coeffs.sum(),
intercept,
np.full((1, n_components), np.nan)))
X = np.vstack((X_features, X_targets, X_intercepts))

Expand Down
30 changes: 22 additions & 8 deletions Orange/widgets/model/tests/test_owpls.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import numpy as np
from sklearn.cross_decomposition import PLSRegression

from Orange.data import Table, Domain, StringVariable
from Orange.widgets.model.owpls import OWPLS
Expand Down Expand Up @@ -30,6 +31,19 @@ def setUp(self):
ParameterMapping('n_components', self.widget.controls.n_components)
]

def test_coeffs_compare_sklearn(self):
self.send_signal(self.widget.Inputs.data, self._data)
coefsdata = self.get_output(self.widget.Outputs.coefsdata)
intercept = coefsdata.X[-1, 0]
coeffs = coefsdata.X[:-2, 0]
Y_orange = self._data.X @ coeffs + intercept

pls = PLSRegression(n_components=2)
pls.fit(self._data.X, self._data.Y)
Y_sklearn = pls.predict(self._data.X)

np.testing.assert_almost_equal(Y_sklearn, Y_orange)

def test_output_coefsdata(self):
self.send_signal(self.widget.Inputs.data, self._data)
coefsdata = self.get_output(self.widget.Outputs.coefsdata)
Expand All @@ -38,7 +52,7 @@ def test_output_coefsdata(self):
self.assertEqual(coefsdata.Y.shape, (15, 0))
self.assertEqual(coefsdata.metas.shape, (15, 2))

self.assertEqual(["coef (MEDV)", "coef/X_sd (MEDV)", "w*c 1", "w*c 2"],
self.assertEqual(["coef (MEDV)", "coef * X_sd (MEDV)", "w*c 1", "w*c 2"],
[v.name for v in coefsdata.domain.attributes])
self.assertEqual(["Variable name", "Variable role"],
[v.name for v in coefsdata.domain.metas])
Expand All @@ -47,9 +61,9 @@ def test_output_coefsdata(self):
self.assertTrue((coefsdata.metas[:-2, 1] == 0).all())
self.assertTrue((coefsdata.metas[-2, 1] == 1))
self.assertTrue(np.isnan(coefsdata.metas[-1, 1]))
self.assertAlmostEqual(coefsdata.X[0, 2], 0.237, 3)
self.assertAlmostEqual(coefsdata.X[13, 2], -0.304, 3)
self.assertAlmostEqual(coefsdata.X[-1, 0], 22.5, 1)
self.assertAlmostEqual(coefsdata.X[0, 3], 0.012, 3)
self.assertAlmostEqual(coefsdata.X[13, 3], 0.389, 3)
self.assertAlmostEqual(coefsdata.X[-1, 0], 13.7, 1)
self.assertTrue(np.isnan(coefsdata.X[-1, 2:]).all())

def test_output_coefsdata_multi_target(self):
Expand All @@ -60,8 +74,8 @@ def test_output_coefsdata_multi_target(self):
self.assertEqual(coefsdata.Y.shape, (15, 0))
self.assertEqual(coefsdata.metas.shape, (15, 2))

attr_names = ["coef (MEDV)", "coef (CRIM)", "coef/X_sd (MEDV)",
"coef/X_sd (CRIM)", "w*c 1", "w*c 2"]
attr_names = ["coef (MEDV)", "coef (CRIM)", "coef * X_sd (MEDV)",
"coef * X_sd (CRIM)", "w*c 1", "w*c 2"]
self.assertEqual(attr_names,
[v.name for v in coefsdata.domain.attributes])
self.assertEqual(["Variable name", "Variable role"],
Expand All @@ -75,8 +89,8 @@ def test_output_coefsdata_multi_target(self):
self.assertAlmostEqual(coefsdata.X[0, 4], -0.198, 3)
self.assertAlmostEqual(coefsdata.X[12, 4], -0.288, 3)
self.assertAlmostEqual(coefsdata.X[13, 4], 0.243, 3)
self.assertAlmostEqual(coefsdata.X[-1, 0], 22.5, 1)
self.assertAlmostEqual(coefsdata.X[-1, 1], 3.6, 1)
self.assertAlmostEqual(coefsdata.X[-1, 0], 6.7, 1)
self.assertAlmostEqual(coefsdata.X[-1, 1], -12.2, 1)
self.assertTrue(np.isnan(coefsdata.X[-1, 4:]).all())

def test_output_data(self):
Expand Down
2 changes: 1 addition & 1 deletion requirements-core.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pip>=19.3
python-louvain>=0.13
pyyaml
requests
scikit-learn>=1.4.0
scikit-learn>=1.5.1
scipy>=1.9
serverfiles # for Data Sets synchronization
xgboost>=1.7.4,<2.1
Expand Down

0 comments on commit 08ac250

Please sign in to comment.