Skip to content

Commit

Permalink
Merge pull request #738 from markotoplak/refactor-preprocess-tests
Browse files Browse the repository at this point in the history
[MNT] Refactor preprocess tests
  • Loading branch information
markotoplak authored Aug 20, 2024
2 parents ff69669 + d856bf3 commit 86d7b11
Show file tree
Hide file tree
Showing 12 changed files with 323 additions and 283 deletions.
12 changes: 11 additions & 1 deletion orangecontrib/spectroscopy/tests/test_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,19 @@

from Orange.data import Table
from orangecontrib.spectroscopy.preprocess.als import ALSP, ARPLS, AIRPLS
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALLER_COLLAGEN


class Testals(unittest.TestCase):
class TestAls(unittest.TestCase, TestCommonIndpSamplesMixin):

preprocessors = [
ALSP(lam=100E+6, itermax=5, p=0.5),
ARPLS(lam=100E+5, itermax=5, ratio=0.5),
AIRPLS(lam=100, itermax=5, porder=1),
]
data = SMALLER_COLLAGEN

def test_als_Basic(self):
data = Table.from_numpy(None, [[1.0, 2.0, 10.0, 5.0],
[3.0, 5.0, 9.0, 4.0]])
Expand Down
9 changes: 8 additions & 1 deletion orangecontrib/spectroscopy/tests/test_atm_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@

from orangecontrib.spectroscopy.preprocess.atm_corr import AtmCorr
from orangecontrib.spectroscopy.tests.util import spectra_table
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALL_COLLAGEN, add_edge_case_data_parameter

class TestAtmCorr(unittest.TestCase, TestCommonIndpSamplesMixin):

preprocessors = list(
add_edge_case_data_parameter(AtmCorr, "reference", SMALL_COLLAGEN[0:1],
correct_ranges=[(1300, 2100)], smooth_win=5))
data = SMALL_COLLAGEN

class TestAtmCorr(unittest.TestCase):
def test_atm_corr(self):
# Fake atmospheric spectrum
def atm(wn):
Expand Down
65 changes: 4 additions & 61 deletions orangecontrib/spectroscopy/tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,17 @@
from Orange.evaluation.scoring import AUC
from Orange.data.table import DomainTransformationError

from orangecontrib.spectroscopy.tests.test_preprocess import \
PREPROCESSORS_INDEPENDENT_SAMPLES, \
PREPROCESSORS

from orangecontrib.spectroscopy.tests.test_preprocess import SMALL_COLLAGEN, preprocessor_data

from orangecontrib.spectroscopy.preprocess import Interpolate, \
Cut, SavitzkyGolayFiltering
from orangecontrib.spectroscopy.data import getx
from orangecontrib.spectroscopy.tests.util import smaller_data


logreg = LogisticRegressionLearner(max_iter=1000)

COLLAGEN = Orange.data.Table("collagen")
SMALL_COLLAGEN = smaller_data(COLLAGEN, 2, 2)


def separate_learn_test(data):
sf = ms.ShuffleSplit(n_splits=1, test_size=0.2, random_state=np.random.RandomState(0))
Expand Down Expand Up @@ -80,20 +78,6 @@ def test_predict_different_domain_interpolation(self):
# the more we cut the lower precision we get
self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)

def test_whole_and_train_separete(self):
""" Applying a preprocessor before spliting data into train and test
and applying is just on train data should yield the same transformation of
the test data. """
for proc in PREPROCESSORS_INDEPENDENT_SAMPLES:
with self.subTest(proc):
data = preprocessor_data(proc)
_, test1 = separate_learn_test(proc(data))
train, test = separate_learn_test(data)
train = proc(train)
test_transformed = test.transform(train.domain)
np.testing.assert_almost_equal(test_transformed.X, test1.X,
err_msg="Preprocessor " + str(proc))

def test_predict_savgov_same_domain(self):
data = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(self.collagen)
train, test = separate_learn_test(data)
Expand All @@ -107,44 +91,3 @@ def test_predict_savgol_another_interpolate(self):
train = Interpolate(points=getx(train))(train)
aucai = AUC(TestOnTestData()(train, test, [logreg]))
self.assertAlmostEqual(auc, aucai, delta=0.02)

def test_slightly_different_domain(self):
""" If test data has a slightly different domain then (with interpolation)
we should obtain a similar classification score. """
# rows full of unknowns make LogisticRegression undefined
# we can obtain them, for example, with EMSC, if one of the badspectra
# is a spectrum from the data
learner = LogisticRegressionLearner(max_iter=1000, preprocessors=[_RemoveNaNRows()])

for proc in PREPROCESSORS:
if hasattr(proc, "skip_add_zeros"):
continue
with self.subTest(proc):
# LR that can not handle unknown values
train, test = separate_learn_test(preprocessor_data(proc))
train1 = proc(train)
aucorig = AUC(TestOnTestData()(train1, test, [learner]))
test = slightly_change_wavenumbers(test, 0.00001)
test = odd_attr(test)
# a subset of points for training so that all test sets points
# are within the train set points, which gives no unknowns
train = Interpolate(points=getx(train)[1:-3])(train) # interpolatable train
train = proc(train)
# explicit domain conversion test to catch exceptions that would
# otherwise be silently handled in TestOnTestData
_ = test.transform(train.domain)
aucnow = AUC(TestOnTestData()(train, test, [learner]))
self.assertAlmostEqual(aucnow, aucorig, delta=0.03, msg="Preprocessor " + str(proc))
test = Interpolate(points=getx(test) - 1.)(test) # also do a shift
_ = test.transform(train.domain) # explicit call again
aucnow = AUC(TestOnTestData()(train, test, [learner]))
# the difference should be slight
self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))


class _RemoveNaNRows(Orange.preprocess.preprocess.Preprocess):

def __call__(self, data):
mask = np.isnan(data.X)
mask = np.any(mask, axis=1)
return data[~mask]
7 changes: 6 additions & 1 deletion orangecontrib/spectroscopy/tests/test_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
import unittest
from orangecontrib.spectroscopy.preprocess import Cut
from orangecontrib.spectroscopy.data import getx
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALL_COLLAGEN


class TestCut(unittest.TestCase):
class TestCut(unittest.TestCase, TestCommonIndpSamplesMixin):

preprocessors = [Cut(lowlim=1000, highlim=1800)]
data = SMALL_COLLAGEN

@classmethod
def setUpClass(cls):
Expand Down
8 changes: 7 additions & 1 deletion orangecontrib/spectroscopy/tests/test_despike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@
import numpy as np
from Orange.data import Table
from orangecontrib.spectroscopy.preprocess import Despike
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALL_COLLAGEN


class TestSpikeremoval(unittest.TestCase):
class TestSpikeRemoval(unittest.TestCase, TestCommonIndpSamplesMixin):

preprocessors = [Despike(threshold=5, cutoff=60, dis=5)]
data = SMALL_COLLAGEN

def test_spikes(self):
data = Table.from_numpy(None, [[1000, 1, 1, 1, 1, 10, 1, 1, 1000, 1000, 1000, 1, 1000,
1, 1, 1, 1000, 1000, 1000, 1000],
Expand Down
12 changes: 11 additions & 1 deletion orangecontrib/spectroscopy/tests/test_emsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,19 @@
SelectionFunction, SmoothedSelectionFunction
from orangecontrib.spectroscopy.preprocess.npfunc import Sum
from orangecontrib.spectroscopy.tests.util import spectra_table
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALL_COLLAGEN, add_edge_case_data_parameter


class TestEMSC(unittest.TestCase):
class TestEMSC(unittest.TestCase, TestCommonIndpSamplesMixin):

different_reference = list(
add_edge_case_data_parameter(EMSC, "reference", SMALL_COLLAGEN[0:1]))
different_badspectra = list(
add_edge_case_data_parameter(EMSC, "badspectra", SMALL_COLLAGEN[0:2],
reference=SMALL_COLLAGEN[-1:]))
preprocessors = different_reference + different_badspectra
data = SMALL_COLLAGEN

def test_ab(self):
data = Table.from_numpy(None, [[1.0, 2.0, 1.0, 1.0],
Expand Down
17 changes: 16 additions & 1 deletion orangecontrib/spectroscopy/tests/test_integrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,24 @@
import numpy as np

from orangecontrib.spectroscopy.preprocess import Integrate
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALL_COLLAGEN


class TestIntegrate(unittest.TestCase):
class TestIntegrate(unittest.TestCase, TestCommonIndpSamplesMixin):

preprocessors = [
Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]),
Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]),
Integrate(methods=Integrate.PeakMax, limits=[[1100, 1200]]),
Integrate(methods=Integrate.PeakBaseline, limits=[[1100, 1200]]),
Integrate(methods=Integrate.PeakAt, limits=[[1100]]),
Integrate(methods=Integrate.PeakX, limits=[[1100, 1200]]),
Integrate(methods=Integrate.PeakXBaseline, limits=[[1100, 1200]])
]
data = SMALL_COLLAGEN


def test_simple(self):
data = Table.from_numpy(None, [[1, 2, 3, 1, 1, 1],
Expand Down
7 changes: 6 additions & 1 deletion orangecontrib/spectroscopy/tests/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,14 @@
nan_extend_edges_and_interpolate
from orangecontrib.spectroscopy.data import getx
from orangecontrib.spectroscopy.tests.util import spectra_table
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALL_COLLAGEN


class TestInterpolate(unittest.TestCase):
class TestInterpolate(unittest.TestCase, TestCommonIndpSamplesMixin):

preprocessors = [Interpolate(np.linspace(1000, 1700, 100))]
data = SMALL_COLLAGEN

@classmethod
def setUpClass(cls):
Expand Down
8 changes: 7 additions & 1 deletion orangecontrib/spectroscopy/tests/test_me_emsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from orangecontrib.spectroscopy.preprocess.me_emsc import ME_EMSC
from orangecontrib.spectroscopy.preprocess.emsc import SelectionFunction, SmoothedSelectionFunction
from orangecontrib.spectroscopy.preprocess.npfunc import Sum
from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
SMALLER_COLLAGEN, add_edge_case_data_parameter


def weights_from_inflection_points_legacy(points, kappa, wavenumbers):
Expand Down Expand Up @@ -82,7 +84,11 @@ def weights_from_inflection_points_legacy(points, kappa, wavenumbers):
return data


class TestME_EMSC(unittest.TestCase):
class TestME_EMSC(unittest.TestCase, TestCommonIndpSamplesMixin):

preprocessors = list(
add_edge_case_data_parameter(ME_EMSC, "reference", SMALLER_COLLAGEN[0:1], max_iter=4))
data = SMALLER_COLLAGEN

@classmethod
def setUpClass(cls):
Expand Down
Loading

0 comments on commit 86d7b11

Please sign in to comment.