Skip to content

Commit

Permalink
PLS: transformed and compoments outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
markotoplak committed Oct 19, 2023
1 parent e6924c0 commit 7d3796e
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 10 deletions.
98 changes: 90 additions & 8 deletions orangecontrib/spectroscopy/models/pls.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import numpy as np
import sklearn.cross_decomposition as skl_pls

from Orange.data import Variable, ContinuousVariable
from Orange.data import Table, Domain, Variable, \
ContinuousVariable, StringVariable
from Orange.data.util import get_unique_names, SharedComputeValue
from Orange.preprocess.score import LearnerScorer
from Orange.regression import SklLearner, SklModel

# Add any pre-processing of data here
# Normalization is only needed if and when the data
# is changing overall shape or the x axis is varying for every data row/instance

pls_pps = SklLearner.preprocessors


class _FeatureScorerMixin(LearnerScorer):
feature_type = Variable
Expand All @@ -21,7 +17,45 @@ def score(self, data):
return np.abs(model.coefficients), model.domain.attributes


class _PLSCommonTransform:

def __init__(self, pls_model):
self.pls_model = pls_model

def _transform_with_numpy_output(self, X, Y):
pls = self.pls_model.skl_model
"""
# the next command does the following
x_center = X - pls._x_mean
y_center = Y - pls._y_mean
t = x_center @ pls.x_rotations_
u = y_center @ pls.y_rotations_
"""
t, u = pls.transform(X, Y)
return np.hstack((t, u))

def __call__(self, data):
if data.domain != self.pls_model.domain:
data = data.transform(self.pls_model.domain)
if len(data.Y.shape) == 1:
Y = data.Y.reshape(-1, 1)
else:
Y = data.Y

Check warning on line 43 in orangecontrib/spectroscopy/models/pls.py

View check run for this annotation

Codecov / codecov/patch

orangecontrib/spectroscopy/models/pls.py#L43

Added line #L43 was not covered by tests
return self._transform_with_numpy_output(data.X, Y)


class PLSProjector(SharedComputeValue):
def __init__(self, transform, feature):
super().__init__(transform)
self.feature = feature

def compute(self, _, space):
return space[:, self.feature]


class PLSModel(SklModel):
var_prefix_X = "PLS T"
var_prefix_Y = "PLS U"

@property
def coefficients(self):
Expand All @@ -34,12 +68,60 @@ def predict(self, X):
def __str__(self):
return 'PLSModel {}'.format(self.skl_model)

def _get_var_names(self, n, prefix):
names = [f"{prefix}{postfix}" for postfix in range(1, n + 1)]
return get_unique_names([var.name for var in self.domain.metas], names)

def project(self, data):
if not isinstance(data, Table):
raise RuntimeError("PLSModel can only project tables")

Check warning on line 77 in orangecontrib/spectroscopy/models/pls.py

View check run for this annotation

Codecov / codecov/patch

orangecontrib/spectroscopy/models/pls.py#L77

Added line #L77 was not covered by tests

transformer = _PLSCommonTransform(self)

def trvar(i, name):
return ContinuousVariable(name, compute_value=PLSProjector(transformer, i))

n_components = self.skl_model.x_loadings_.shape[1]

var_names_X = self._get_var_names(n_components, self.var_prefix_X)
var_names_Y = self._get_var_names(n_components, self.var_prefix_Y)

domain = Domain(
[trvar(i, var_names_X[i]) for i in range(n_components)],
data.domain.class_vars,
[trvar(n_components + i, var_names_Y[i]) for i in range(n_components)] + list(data.domain.metas)
)

return data.transform(domain)

def components(self):
orig_domain = self.domain
names = [a.name for a in orig_domain.attributes + orig_domain.class_vars]
meta_name = get_unique_names(names, 'components')

n_components = self.skl_model.x_loadings_.shape[1]

meta_vars = [StringVariable(name=meta_name)]
metas = np.array(
[[f"Component {i + 1}" for i in range(n_components)]], dtype=object
).T
dom = Domain(
[ContinuousVariable(a.name) for a in orig_domain.attributes],
[ContinuousVariable(a.name) for a in orig_domain.class_vars],
metas=meta_vars)
components = Table(dom,
self.skl_model.x_loadings_.T,
Y=self.skl_model.y_loadings_.T,
metas=metas)
components.name = 'components'
return components


class PLSRegressionLearner(SklLearner, _FeatureScorerMixin):
__wraps__ = skl_pls.PLSRegression
__returns__ = PLSModel

preprocessors = pls_pps
preprocessors = SklLearner.preprocessors

# this learner enforces a single class because multitarget is not
# explicitly allowed
Expand Down
17 changes: 17 additions & 0 deletions orangecontrib/spectroscopy/tests/test_owpls.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,23 @@ def test_too_many_components(self):
model = PLSRegressionLearner(n_components=6)(d)
self.assertEqual(model.skl_model.n_components, 4)

def test_scores(self):
d = table(10, 5, 1)
orange_model = PLSRegressionLearner()(d)
scikit_model = PLSRegression().fit(d.X, d.Y)
scores = orange_model.project(d)
sx, sy = scikit_model.transform(d.X, d.Y)
np.testing.assert_almost_equal(sx, scores.X)
np.testing.assert_almost_equal(sy, scores.metas)

def test_components(self):
d = table(10, 5, 1)
orange_model = PLSRegressionLearner()(d)
scikit_model = PLSRegression().fit(d.X, d.Y)
components = orange_model.components()
np.testing.assert_almost_equal(scikit_model.x_loadings_, components.X.T)
np.testing.assert_almost_equal(scikit_model.y_loadings_, components.Y.reshape(1, -1))


class TestOWPLS(WidgetTest, WidgetLearnerTestMixin):
def setUp(self):
Expand Down
10 changes: 8 additions & 2 deletions orangecontrib/spectroscopy/widgets/owpls.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ class OWPLS(OWBaseLearner):

class Outputs(OWBaseLearner.Outputs):
coefsdata = Output("Coefficients", Table, explicit=True)
transformed = Output("Transformed Data", Table)
components = Output("Components", Table)

class Warning(OWBaseLearner.Warning):
sparse_data = Msg('Sparse input data: default preprocessing is to scale it.')

#: number of components
n_components = Setting(2)
#: whether or not to limit number of iterations
max_iter = Setting(500)

def add_main_layout(self):
Expand All @@ -50,6 +50,8 @@ def add_main_layout(self):
def update_model(self):
super().update_model()
coef_table = None
projection = None
components = None
if self.model is not None:
domain = Domain(
[ContinuousVariable("coef")], metas=[StringVariable("name")])
Expand All @@ -58,7 +60,11 @@ def update_model(self):
waves = [[attr.name] for attr in self.model.domain.attributes]
coef_table = Table.from_numpy(domain, X=coefs, metas=waves)
coef_table.name = "coefficients"
projection = self.model.project(self.data)
components = self.model.components()
self.Outputs.coefsdata.send(coef_table)
self.Outputs.transformed.send(projection)
self.Outputs.components.send(components)

@OWBaseLearner.Inputs.data
def set_data(self, data):
Expand Down

0 comments on commit 7d3796e

Please sign in to comment.