Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EMSC: __eq__ and __hash__ #697

Merged
merged 5 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 72 additions & 28 deletions orangecontrib/spectroscopy/preprocess/emsc.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,68 @@
import numpy as np

import Orange
from Orange.data import Table
from Orange.preprocess.preprocess import Preprocess
from Orange.data.util import get_unique_names

from orangecontrib.spectroscopy.data import getx, spectra_mean
from orangecontrib.spectroscopy.data import getx
from orangecontrib.spectroscopy.preprocess.utils import SelectColumn, CommonDomainOrderUnknowns, \
interp1d_with_unknowns_numpy, nan_extend_edges_and_interpolate, MissingReferenceException
interp1d_with_unknowns_numpy, MissingReferenceException, interpolate_extend_to, \
CommonDomainRef, table_eq_x
from orangecontrib.spectroscopy.preprocess.npfunc import Function, Segments


class SelectionFunction(Segments):
class SelectionFunction(Function):
"""
Weighted selection function. Includes min and max.
"""
def __init__(self, min_, max_, w):
super().__init__((lambda x: True,
lambda x: 0),
(lambda x: np.logical_and(x >= min_, x <= max_),
lambda x: w))
super().__init__(None)
self.min_ = min_
self.max_ = max_
self.w = w

def __call__(self, x):
seg = Segments((lambda x: True, lambda x: 0),
(lambda x: np.logical_and(x >= self.min_, x <= self.max_),
lambda x: self.w)
)
return seg(x)

class SmoothedSelectionFunction(Segments):
def __eq__(self, other):
return super().__eq__(other) \
and self.min_ == other.min_ \
and self.max_ == other.max_ \
and self.w == other.w

def __hash__(self):
return hash((super().__hash__(), self.min_, self.max_, self.w))


class SmoothedSelectionFunction(SelectionFunction):
"""
Weighted selection function. Min and max points are middle
points of smoothing with hyperbolic tangent.
"""
def __init__(self, min_, max_, s, w):
middle = (min_ + max_) / 2
super().__init__((lambda x: x < middle,
lambda x: (np.tanh((x - min_) / s) + 1) / 2 * w),
(lambda x: x >= middle,
lambda x: (-np.tanh((x - max_) / s) + 1) / 2 * w))
super().__init__(min_, max_, w)
self.s = s

def __call__(self, x):
middle = (self.min_ + self.max_) / 2
seg = Segments((lambda x: x < middle,
lambda x: (np.tanh((x - self.min_) / self.s) + 1) / 2 * self.w),
(lambda x: x >= middle,
lambda x: (-np.tanh((x - self.max_) / self.s) + 1) / 2 * self.w)
)
return seg(x)

def __eq__(self, other):
return super().__eq__(other) \
and self.s == other.s

def __hash__(self):
return hash((super().__hash__(), self.s))


def weighted_wavenumbers(weights, wavenumbers):
Expand Down Expand Up @@ -61,11 +92,12 @@ class EMSCModel(SelectColumn):
InheritEq = True


class _EMSC(CommonDomainOrderUnknowns):
class _EMSC(CommonDomainOrderUnknowns, CommonDomainRef):

def __init__(self, reference, badspectra, weights, order, scaling, domain):
super().__init__(domain)
self.reference = reference
CommonDomainOrderUnknowns.__init__(self, domain)
CommonDomainRef.__init__(self, reference, domain)
assert len(self.reference) == 1
self.badspectra = badspectra
self.weights = weights
self.order = order
Expand All @@ -74,17 +106,7 @@ def __init__(self, reference, badspectra, weights, order, scaling, domain):
def transformed(self, X, wavenumbers):
# wavenumber have to be input as sorted
# about 85% of time in __call__ function is spent is lstsq
# compute average spectrum from the reference
ref_X = np.atleast_2d(spectra_mean(self.reference.X))

def interpolate_to_data(other_xs, other_data):
# all input data needs to be interpolated (and NaNs removed)
interpolated = interp1d_with_unknowns_numpy(other_xs, other_data, wavenumbers)
# we know that X is not NaN. same handling of reference as of X
interpolated, _ = nan_extend_edges_and_interpolate(wavenumbers, interpolated)
return interpolated

ref_X = interpolate_to_data(getx(self.reference), ref_X)
ref_X = interpolate_extend_to(self.reference, wavenumbers)
wei_X = weighted_wavenumbers(self.weights, wavenumbers)

N = wavenumbers.shape[0]
Expand All @@ -93,7 +115,7 @@ def interpolate_to_data(other_xs, other_data):

n_badspec = len(self.badspectra) if self.badspectra is not None else 0
if self.badspectra:
badspectra_X = interpolate_to_data(getx(self.badspectra), self.badspectra.X)
badspectra_X = interpolate_extend_to(self.badspectra, wavenumbers)

M = []
for x in range(0, self.order+1):
Expand Down Expand Up @@ -122,6 +144,26 @@ def interpolate_to_data(other_xs, other_data):

return newspectra

def __eq__(self, other):
return CommonDomainRef.__eq__(self, other) \
and table_eq_x(self.badspectra, other.badspectra) \
and self.order == other.order \
and self.scaling == other.scaling \
and (self.weights == other.weights
if not isinstance(self.weights, Table)
else table_eq_x(self.weights, other.weights))

def __hash__(self):
domain = self.badspectra.domain if self.badspectra is not None else None
fv = tuple(self.badspectra.X[0][:10]) if self.badspectra is not None else None
weights = self.weights if not isinstance(self.weights, Table) else tuple(self.weights.X[0][:10])
return hash((CommonDomainRef.__hash__(self), domain, fv, weights, self.order, self.scaling))


def average_table_x(data):
return Orange.data.Table.from_numpy(Orange.data.Domain(data.domain.attributes),
X=data.X.mean(axis=0, keepdims=True))


class EMSC(Preprocess):

Expand All @@ -132,6 +174,8 @@ def __init__(self, reference=None, badspectra=None, weights=None, order=2, scali
if reference is None:
raise MissingReferenceException()
self.reference = reference
if len(self.reference) > 1:
self.reference = average_table_x(self.reference)
self.badspectra = badspectra
self.weights = weights
self.order = order
Expand Down
51 changes: 31 additions & 20 deletions orangecontrib/spectroscopy/preprocess/me_emsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,14 @@
from sklearn.decomposition import TruncatedSVD

import Orange
from Orange.data import Table
from Orange.preprocess.preprocess import Preprocess
from Orange.data.util import get_unique_names

from orangecontrib.spectroscopy.data import getx, spectra_mean
from orangecontrib.spectroscopy.data import getx
from orangecontrib.spectroscopy.preprocess.utils import SelectColumn, CommonDomainOrderUnknowns, \
interp1d_with_unknowns_numpy, nan_extend_edges_and_interpolate
from orangecontrib.spectroscopy.preprocess.emsc import weighted_wavenumbers


def interpolate_to_data(other_xs, other_data, wavenumbers):
# all input data needs to be interpolated (and NaNs removed)
interpolated = interp1d_with_unknowns_numpy(other_xs, other_data, wavenumbers)
# we know that X is not NaN. same handling of reference as of X
interpolated, _ = nan_extend_edges_and_interpolate(wavenumbers, interpolated)
return interpolated
interpolate_extend_to, CommonDomainRef, table_eq_x
from orangecontrib.spectroscopy.preprocess.emsc import weighted_wavenumbers, average_table_x


def calculate_complex_n(ref_X,wavenumbers):
Expand Down Expand Up @@ -95,12 +88,13 @@ class ME_EMSCModel(SelectColumn):
InheritEq = True


class _ME_EMSC(CommonDomainOrderUnknowns):
class _ME_EMSC(CommonDomainOrderUnknowns, CommonDomainRef):

def __init__(self, reference, weights, ncomp, alpha0, gamma, maxNiter, fixedNiter, positiveRef, domain):
super().__init__(domain)
self.reference = reference
self.weights = weights # !!! THIS SHOULD BE A NP ARRAY (or similar) with inflection points
CommonDomainOrderUnknowns.__init__(self, domain)
CommonDomainRef.__init__(self, reference, domain)
assert len(reference) == 1
self.weights = weights
self.ncomp = ncomp
self.alpha0 = alpha0
self.gamma = gamma
Expand Down Expand Up @@ -234,8 +228,7 @@ def iterate(spectra, correctedFirsIteration, residualsFirstIteration, wavenumber
break
return newspectra, RMSEall, numberOfIterations

ref_X = np.atleast_2d(spectra_mean(self.reference.X))
ref_X = interpolate_to_data(getx(self.reference), ref_X, wavenumbers)
ref_X = interpolate_extend_to(self.reference, wavenumbers)
ref_X = ref_X[0]

wei_X = weighted_wavenumbers(self.weights, wavenumbers)
Expand Down Expand Up @@ -284,6 +277,24 @@ def iterate(spectra, correctedFirsIteration, residualsFirstIteration, wavenumber
newspectra = np.hstack((newspectra, numberOfIterations.reshape(-1, 1),RMSEall.reshape(-1, 1)))
return newspectra

def __eq__(self, other):
return CommonDomainRef.__eq__(self, other) \
and self.ncomp == other.ncomp \
and np.array_equal(self.alpha0, other.alpha0) \
and np.array_equal(self.gamma, other.gamma) \
and self.maxNiter == other.maxNiter \
and self.fixedNiter == other.fixedNiter \
and self.positiveRef == other.positiveRef \
and (self.weights == other.weights
if not isinstance(self.weights, Table)
else table_eq_x(self.weights, other.weights))

def __hash__(self):
weights = self.weights \
if not isinstance(self.weights, Table) else tuple(self.weights.X[0][:10])
return hash((CommonDomainRef.__hash__(self), weights, self.ncomp, tuple(self.alpha0),
tuple(self.gamma), self.maxNiter, self.fixedNiter, self.positiveRef))


class MissingReferenceException(Exception):
pass
Expand All @@ -298,6 +309,8 @@ def __init__(self, reference=None, weights=None, ncomp=False, n0=np.linspace(1.1
if reference is None:
raise MissingReferenceException()
self.reference = reference
if len(self.reference) > 1:
self.reference = average_table_x(self.reference)
self.weights = weights
self.ncomp = ncomp
self.output_model = output_model
Expand All @@ -315,10 +328,8 @@ def __init__(self, reference=None, weights=None, ncomp=False, n0=np.linspace(1.1
self.gamma = self.h * np.log(10) / (4 * np.pi * 0.5 * np.pi * (self.n0 - 1) * self.a * 1e-6)

if not self.ncomp:
ref_X = np.atleast_2d(spectra_mean(self.reference.X))
wavenumbers_ref = np.array(sorted(getx(self.reference)))
ref_X = interpolate_to_data(getx(self.reference), ref_X, wavenumbers_ref)
ref_X = ref_X[0]
ref_X = interpolate_extend_to(self.reference, wavenumbers_ref)[0]
self.ncomp = cal_ncomp(ref_X, wavenumbers_ref, explainedVariance, self.alpha0, self.gamma)
else:
self.explainedVariance = False
Expand Down
35 changes: 33 additions & 2 deletions orangecontrib/spectroscopy/preprocess/npfunc.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,44 @@
import numpy as np


class Function():
class Function:

def __init__(self, fn):
self.fn = fn

def __call__(self, x):
return self.fn(x)

def __eq__(self, other):
return type(self) is type(other) \
and self.fn == other.fn

def __hash__(self):
return hash((type(self), self.fn))


class Constant(Function):

def __init__(self, c):
super().__init__(None)
self.c = c

def __call__(self, x):
x = np.asarray(x)
return np.ones(x.shape)*self.c

def __eq__(self, other):
return super().__eq__(other) \
and self.c == other.c

def __hash__(self):
return hash((super().__hash__(), self.c))


class Identity(Function):

def __init__(self):
pass
super().__init__(None)

def __call__(self, x):
return x
Expand All @@ -38,6 +53,7 @@ class Segments(Function):
"""

def __init__(self, *segments):
super().__init__(None)
self.segments = segments

def __call__(self, x):
Expand All @@ -48,10 +64,18 @@ def __call__(self, x):
output[ind] = fn(x[ind])
return output

def __eq__(self, other):
return super().__eq__(other) \
and self.segments == other.segments

def __hash__(self):
return hash((super().__hash__(), self.segments))


class Sum(Function):

def __init__(self, *elements):
super().__init__(None)
self.elements = elements

def __call__(self, x):
Expand All @@ -63,3 +87,10 @@ def __call__(self, x):
else:
acc = acc + current
return acc

def __eq__(self, other):
return super().__eq__(other) \
and self.segments == other.elements

def __hash__(self):
return hash((super().__hash__(), self.elements))
Loading
Loading