diff --git a/orangecontrib/spectroscopy/models/pls.py b/orangecontrib/spectroscopy/models/pls.py index 063ce6f33..ee30dbed2 100644 --- a/orangecontrib/spectroscopy/models/pls.py +++ b/orangecontrib/spectroscopy/models/pls.py @@ -1,16 +1,12 @@ import numpy as np import sklearn.cross_decomposition as skl_pls -from Orange.data import Variable, ContinuousVariable +from Orange.data import Table, Domain, Variable, \ + ContinuousVariable, StringVariable +from Orange.data.util import get_unique_names, SharedComputeValue from Orange.preprocess.score import LearnerScorer from Orange.regression import SklLearner, SklModel -# Add any pre-processing of data here -# Normalization is only needed if and when the data -# is changing overall shape or the x axis is varying for every data row/instance - -pls_pps = SklLearner.preprocessors - class _FeatureScorerMixin(LearnerScorer): feature_type = Variable @@ -21,7 +17,45 @@ def score(self, data): return np.abs(model.coefficients), model.domain.attributes +class _PLSCommonTransform: + + def __init__(self, pls_model): + self.pls_model = pls_model + + def _transform_with_numpy_output(self, X, Y): + pls = self.pls_model.skl_model + """ + # the next command does the following + x_center = X - pls._x_mean + y_center = Y - pls._y_mean + t = x_center @ pls.x_rotations_ + u = y_center @ pls.y_rotations_ + """ + t, u = pls.transform(X, Y) + return np.hstack((t, u)) + + def __call__(self, data): + if data.domain != self.pls_model.domain: + data = data.transform(self.pls_model.domain) + if len(data.Y.shape) == 1: + Y = data.Y.reshape(-1, 1) + else: + Y = data.Y + return self._transform_with_numpy_output(data.X, Y) + + +class PLSProjector(SharedComputeValue): + def __init__(self, transform, feature): + super().__init__(transform) + self.feature = feature + + def compute(self, _, space): + return space[:, self.feature] + + class PLSModel(SklModel): + var_prefix_X = "PLS T" + var_prefix_Y = "PLS U" @property def coefficients(self): @@ -34,12 +68,61 @@ def predict(self, X): def __str__(self): return 'PLSModel {}'.format(self.skl_model) + def _get_var_names(self, n, prefix): + names = [f"{prefix}{postfix}" for postfix in range(1, n + 1)] + return get_unique_names([var.name for var in self.domain.metas], names) + + def project(self, data): + if not isinstance(data, Table): + raise RuntimeError("PLSModel can only project tables") + + transformer = _PLSCommonTransform(self) + + def trvar(i, name): + return ContinuousVariable(name, compute_value=PLSProjector(transformer, i)) + + n_components = self.skl_model.x_loadings_.shape[1] + + var_names_X = self._get_var_names(n_components, self.var_prefix_X) + var_names_Y = self._get_var_names(n_components, self.var_prefix_Y) + + domain = Domain( + [trvar(i, var_names_X[i]) for i in range(n_components)], + data.domain.class_vars, + list(data.domain.metas) + + [trvar(n_components + i, var_names_Y[i]) for i in range(n_components)] + ) + + return data.transform(domain) + + def components(self): + orig_domain = self.domain + names = [a.name for a in orig_domain.attributes + orig_domain.class_vars] + meta_name = get_unique_names(names, 'components') + + n_components = self.skl_model.x_loadings_.shape[1] + + meta_vars = [StringVariable(name=meta_name)] + metas = np.array( + [[f"Component {i + 1}" for i in range(n_components)]], dtype=object + ).T + dom = Domain( + [ContinuousVariable(a.name) for a in orig_domain.attributes], + [ContinuousVariable(a.name) for a in orig_domain.class_vars], + metas=meta_vars) + components = Table(dom, + self.skl_model.x_loadings_.T, + Y=self.skl_model.y_loadings_.T, + metas=metas) + components.name = 'components' + return components + class PLSRegressionLearner(SklLearner, _FeatureScorerMixin): __wraps__ = skl_pls.PLSRegression __returns__ = PLSModel - preprocessors = pls_pps + preprocessors = SklLearner.preprocessors # this learner enforces a single class because multitarget is not # explicitly allowed diff --git a/orangecontrib/spectroscopy/tests/test_owpls.py b/orangecontrib/spectroscopy/tests/test_owpls.py index 37810a5b3..208de049b 100644 --- a/orangecontrib/spectroscopy/tests/test_owpls.py +++ b/orangecontrib/spectroscopy/tests/test_owpls.py @@ -59,6 +59,23 @@ def test_too_many_components(self): model = PLSRegressionLearner(n_components=6)(d) self.assertEqual(model.skl_model.n_components, 4) + def test_scores(self): + d = table(10, 5, 1) + orange_model = PLSRegressionLearner()(d) + scikit_model = PLSRegression().fit(d.X, d.Y) + scores = orange_model.project(d) + sx, sy = scikit_model.transform(d.X, d.Y) + np.testing.assert_almost_equal(sx, scores.X) + np.testing.assert_almost_equal(sy, scores.metas) + + def test_components(self): + d = table(10, 5, 1) + orange_model = PLSRegressionLearner()(d) + scikit_model = PLSRegression().fit(d.X, d.Y) + components = orange_model.components() + np.testing.assert_almost_equal(scikit_model.x_loadings_, components.X.T) + np.testing.assert_almost_equal(scikit_model.y_loadings_, components.Y.reshape(1, -1)) + class TestOWPLS(WidgetTest, WidgetLearnerTestMixin): def setUp(self): diff --git a/orangecontrib/spectroscopy/widgets/owpls.py b/orangecontrib/spectroscopy/widgets/owpls.py index 94df96a4c..7cc78066f 100644 --- a/orangecontrib/spectroscopy/widgets/owpls.py +++ b/orangecontrib/spectroscopy/widgets/owpls.py @@ -22,13 +22,13 @@ class OWPLS(OWBaseLearner): class Outputs(OWBaseLearner.Outputs): coefsdata = Output("Coefficients", Table, explicit=True) + transformed = Output("Transformed Data", Table) + components = Output("Components", Table) class Warning(OWBaseLearner.Warning): sparse_data = Msg('Sparse input data: default preprocessing is to scale it.') - #: number of components n_components = Setting(2) - #: whether or not to limit number of iterations max_iter = Setting(500) def add_main_layout(self): @@ -50,6 +50,8 @@ def add_main_layout(self): def update_model(self): super().update_model() coef_table = None + projection = None + components = None if self.model is not None: domain = Domain( [ContinuousVariable("coef")], metas=[StringVariable("name")]) @@ -58,7 +60,11 @@ def update_model(self): waves = [[attr.name] for attr in self.model.domain.attributes] coef_table = Table.from_numpy(domain, X=coefs, metas=waves) coef_table.name = "coefficients" + projection = self.model.project(self.data) + components = self.model.components() self.Outputs.coefsdata.send(coef_table) + self.Outputs.transformed.send(projection) + self.Outputs.components.send(components) @OWBaseLearner.Inputs.data def set_data(self, data):