From e645b771a77ba0cda1c48e1ac1658a3d7b44cabe Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 16:02:54 +0200
Subject: [PATCH 01/13] Move conversion preprocessor tests into test_preprocess

---
 .../spectroscopy/tests/test_conversion.py     | 65 ++----------------
 .../spectroscopy/tests/test_preprocess.py     | 67 +++++++++++++++++++
 2 files changed, 71 insertions(+), 61 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_conversion.py b/orangecontrib/spectroscopy/tests/test_conversion.py
index 7f1f20618..c445c2323 100644
--- a/orangecontrib/spectroscopy/tests/test_conversion.py
+++ b/orangecontrib/spectroscopy/tests/test_conversion.py
@@ -10,19 +10,17 @@
 from Orange.evaluation.scoring import AUC
 from Orange.data.table import DomainTransformationError
 
-from orangecontrib.spectroscopy.tests.test_preprocess import \
-    PREPROCESSORS_INDEPENDENT_SAMPLES, \
-    PREPROCESSORS
-
-from orangecontrib.spectroscopy.tests.test_preprocess import SMALL_COLLAGEN, preprocessor_data
-
 from orangecontrib.spectroscopy.preprocess import Interpolate, \
     Cut, SavitzkyGolayFiltering
 from orangecontrib.spectroscopy.data import getx
+from orangecontrib.spectroscopy.tests.util import smaller_data
 
 
 logreg = LogisticRegressionLearner(max_iter=1000)
 
+COLLAGEN = Orange.data.Table("collagen")
+SMALL_COLLAGEN = smaller_data(COLLAGEN, 2, 2)
+
 
 def separate_learn_test(data):
     sf = ms.ShuffleSplit(n_splits=1, test_size=0.2, random_state=np.random.RandomState(0))
@@ -80,20 +78,6 @@ def test_predict_different_domain_interpolation(self):
         # the more we cut the lower precision we get
         self.assertTrue(aucorig > auccut1 > auccut2 > auccut3)
 
-    def test_whole_and_train_separete(self):
-        """ Applying a preprocessor before spliting data into train and test
-        and applying is just on train data should yield the same transformation of
-        the test data. """
-        for proc in PREPROCESSORS_INDEPENDENT_SAMPLES:
-            with self.subTest(proc):
-                data = preprocessor_data(proc)
-                _, test1 = separate_learn_test(proc(data))
-                train, test = separate_learn_test(data)
-                train = proc(train)
-                test_transformed = test.transform(train.domain)
-                np.testing.assert_almost_equal(test_transformed.X, test1.X,
-                                               err_msg="Preprocessor " + str(proc))
-
     def test_predict_savgov_same_domain(self):
         data = SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)(self.collagen)
         train, test = separate_learn_test(data)
@@ -107,44 +91,3 @@ def test_predict_savgol_another_interpolate(self):
         train = Interpolate(points=getx(train))(train)
         aucai = AUC(TestOnTestData()(train, test, [logreg]))
         self.assertAlmostEqual(auc, aucai, delta=0.02)
-
-    def test_slightly_different_domain(self):
-        """ If test data has a slightly different domain then (with interpolation)
-        we should obtain a similar classification score. """
-        # rows full of unknowns make LogisticRegression undefined
-        # we can obtain them, for example, with EMSC, if one of the badspectra
-        # is a spectrum from the data
-        learner = LogisticRegressionLearner(max_iter=1000, preprocessors=[_RemoveNaNRows()])
-
-        for proc in PREPROCESSORS:
-            if hasattr(proc, "skip_add_zeros"):
-                continue
-            with self.subTest(proc):
-                # LR that can not handle unknown values
-                train, test = separate_learn_test(preprocessor_data(proc))
-                train1 = proc(train)
-                aucorig = AUC(TestOnTestData()(train1, test, [learner]))
-                test = slightly_change_wavenumbers(test, 0.00001)
-                test = odd_attr(test)
-                # a subset of points for training so that all test sets points
-                # are within the train set points, which gives no unknowns
-                train = Interpolate(points=getx(train)[1:-3])(train)  # interpolatable train
-                train = proc(train)
-                # explicit domain conversion test to catch exceptions that would
-                # otherwise be silently handled in TestOnTestData
-                _ = test.transform(train.domain)
-                aucnow = AUC(TestOnTestData()(train, test, [learner]))
-                self.assertAlmostEqual(aucnow, aucorig, delta=0.03, msg="Preprocessor " + str(proc))
-                test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
-                _ = test.transform(train.domain)  # explicit call again
-                aucnow = AUC(TestOnTestData()(train, test, [learner]))
-                # the difference should be slight
-                self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
-
-
-class _RemoveNaNRows(Orange.preprocess.preprocess.Preprocess):
-
-    def __call__(self, data):
-        mask = np.isnan(data.X)
-        mask = np.any(mask, axis=1)
-        return data[~mask]
diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index 98e13466e..8127d10ce 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -4,7 +4,9 @@
 import numpy as np
 
 import Orange
+from Orange.classification import LogisticRegressionLearner
 from Orange.data import Table
+from Orange.evaluation import TestOnTestData, AUC
 from Orange.preprocess.preprocess import PreprocessorList
 
 from orangecontrib.spectroscopy.data import getx
@@ -18,6 +20,7 @@
 from orangecontrib.spectroscopy.preprocess.me_emsc import ME_EMSC
 from orangecontrib.spectroscopy.preprocess.atm_corr import AtmCorr
 from orangecontrib.spectroscopy.preprocess.utils import replacex
+from orangecontrib.spectroscopy.tests.test_conversion import separate_learn_test, slightly_change_wavenumbers, odd_attr
 from orangecontrib.spectroscopy.tests.util import smaller_data
 
 
@@ -485,6 +488,70 @@ def test_reference_exceptions(self):
             NormalizeReference(reference=Table.from_numpy(None, [[2], [6]]))
 
 
+class TestConversion(unittest.TestCase):
+
+    preprocessors = PREPROCESSORS
+
+    def test_slightly_different_domain(self):
+        """ If test data has a slightly different domain then (with interpolation)
+        we should obtain a similar classification score. """
+        # rows full of unknowns make LogisticRegression undefined
+        # we can obtain them, for example, with EMSC, if one of the badspectra
+        # is a spectrum from the data
+        learner = LogisticRegressionLearner(max_iter=1000, preprocessors=[_RemoveNaNRows()])
+
+        for proc in self.preprocessors:
+            if hasattr(proc, "skip_add_zeros"):
+                continue
+            with self.subTest(proc):
+                # LR that can not handle unknown values
+                train, test = separate_learn_test(preprocessor_data(proc))
+                train1 = proc(train)
+                aucorig = AUC(TestOnTestData()(train1, test, [learner]))
+                test = slightly_change_wavenumbers(test, 0.00001)
+                test = odd_attr(test)
+                # a subset of points for training so that all test sets points
+                # are within the train set points, which gives no unknowns
+                train = Interpolate(points=getx(train)[1:-3])(train)  # interpolatable train
+                train = proc(train)
+                # explicit domain conversion test to catch exceptions that would
+                # otherwise be silently handled in TestOnTestData
+                _ = test.transform(train.domain)
+                aucnow = AUC(TestOnTestData()(train, test, [learner]))
+                self.assertAlmostEqual(aucnow, aucorig, delta=0.03, msg="Preprocessor " + str(proc))
+                test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
+                _ = test.transform(train.domain)  # explicit call again
+                aucnow = AUC(TestOnTestData()(train, test, [learner]))
+                # the difference should be slight
+                self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
+
+
+class TestConversionIndpSamples(TestConversion, unittest.TestCase):
+
+    preprocessors = PREPROCESSORS_INDEPENDENT_SAMPLES
+
+    def test_whole_and_train_separate(self):
+        """ Applying a preprocessor before spliting data into train and test
+        and applying is just on train data should yield the same transformation of
+        the test data. """
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                data = preprocessor_data(proc)
+                _, test1 = separate_learn_test(proc(data))
+                train, test = separate_learn_test(data)
+                train = proc(train)
+                test_transformed = test.transform(train.domain)
+                np.testing.assert_almost_equal(test_transformed.X, test1.X,
+                                               err_msg="Preprocessor " + str(proc))
+
+class _RemoveNaNRows(Orange.preprocess.preprocess.Preprocess):
+
+    def __call__(self, data):
+        mask = np.isnan(data.X)
+        mask = np.any(mask, axis=1)
+        return data[~mask]
+    
+
 class TestCommon(unittest.TestCase):
 
     def test_no_samples(self):

From 62c94af62d59f46c202803462e81f3c8b0a16cfd Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 16:15:43 +0200
Subject: [PATCH 02/13] TestCommon -> TestStrangeData

---
 .../spectroscopy/tests/test_preprocess.py      | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index 8127d10ce..660fd4913 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -550,14 +550,16 @@ def __call__(self, data):
         mask = np.isnan(data.X)
         mask = np.any(mask, axis=1)
         return data[~mask]
-    
 
-class TestCommon(unittest.TestCase):
+
+class TestStrangeData(unittest.TestCase):
+
+    preprocessors = PREPROCESSORS
 
     def test_no_samples(self):
         """ Preprocessors should not crash when there are no input samples. """
         data = SMALL_COLLAGEN[:0]
-        for proc in PREPROCESSORS:
+        for proc in self.preprocessors:
             with self.subTest(proc):
                 _ = proc(data)
 
@@ -567,13 +569,13 @@ def test_no_attributes(self):
         data = data.transform(Orange.data.Domain([],
                                                  class_vars=data.domain.class_vars,
                                                  metas=data.domain.metas))
-        for proc in PREPROCESSORS:
+        for proc in self.preprocessors:
             with self.subTest(proc):
                 _ = proc(data)
 
     def test_all_nans(self):
         """ Preprocessors should not crash when there are all-nan samples. """
-        for proc in PREPROCESSORS:
+        for proc in self.preprocessors:
             with self.subTest(proc):
                 data = preprocessor_data(proc).copy()
                 with data.unlocked():
@@ -584,7 +586,7 @@ def test_all_nans(self):
                     continue  # allow explicit preprocessor exception
 
     def test_unordered_features(self):
-        for proc in PREPROCESSORS:
+        for proc in self.preprocessors:
             with self.subTest(proc):
                 data = preprocessor_data(proc)
                 data_reversed = reverse_attr(data)
@@ -599,7 +601,7 @@ def test_unordered_features(self):
                 np.testing.assert_almost_equal(X, X_shuffle, err_msg="Preprocessor " + str(proc))
 
     def test_unknown_no_propagate(self):
-        for proc in PREPROCESSORS:
+        for proc in self.preprocessors:
             with self.subTest(proc):
                 data = preprocessor_data(proc).copy()
                 # one unknown in line
@@ -615,7 +617,7 @@ def test_unknown_no_propagate(self):
 
     def test_no_infs(self):
         """ Preprocessors should not return (-)inf """
-        for proc in PREPROCESSORS:
+        for proc in self.preprocessors:
             with self.subTest(proc):
                 data = preprocessor_data(proc).copy()
                 # add some zeros to the dataset

From 4e370b274386e66d090caebe51c5043128c18a4c Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 16:41:58 +0200
Subject: [PATCH 03/13] test_tile_reader: no need to test all preprocessors

Because tile reading uses domain transformations that are already tested in
TestConversionIndpSamplesMixin
---
 .../spectroscopy/tests/test_tile_reader.py    | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_tile_reader.py b/orangecontrib/spectroscopy/tests/test_tile_reader.py
index 8a92fec4a..0e5a32d65 100644
--- a/orangecontrib/spectroscopy/tests/test_tile_reader.py
+++ b/orangecontrib/spectroscopy/tests/test_tile_reader.py
@@ -9,7 +9,8 @@
 from Orange.widgets.tests.base import WidgetTest
 
 from orangecontrib.spectroscopy import get_sample_datasets_dir
-from orangecontrib.spectroscopy.tests.test_preprocess import PREPROCESSORS_INDEPENDENT_SAMPLES
+from orangecontrib.spectroscopy.preprocess import Interpolate, SavitzkyGolayFiltering, Cut, \
+    GaussianSmoothing, Absorbance, Transmittance, Integrate
 from orangecontrib.spectroscopy.widgets.owintegrate import OWIntegrate
 from orangecontrib.spectroscopy.widgets.owpreprocess import OWPreprocess, \
     create_preprocessor
@@ -18,11 +19,18 @@
 
 AGILENT_TILE = "agilent/5_mosaic_agg1024.dmt"
 
-# EMSC test fails on this dataset with
-# "ValueError: On entry to DLASCL parameter number 4 had an illegal value"
-PREPROCESSORS_INDEPENDENT_SAMPLES_NO_EMSC = [
-    p for p in PREPROCESSORS_INDEPENDENT_SAMPLES
-    if type(p).__name__ not in ["EMSC", "ExtractEXAFSUsage", "ME_EMSC"]]
+# no need to test all preprocessors here because tile reading uses domain
+# transformations that are already tested in TestConversionIndpSamplesMixin
+PREPROCESSORS_SEQUENCE = [
+    Interpolate(np.linspace(1000, 1700, 100)),
+    SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2),
+    Cut(lowlim=1000, highlim=1800),
+    GaussianSmoothing(sd=3.),
+    Absorbance(),
+    Transmittance(),
+    Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]])
+]
+
 
 class TestTileReaders(unittest.TestCase):
 
@@ -51,7 +59,7 @@ def test_single_preproc(self):
         # TODO problematic interface design: should be able to use Orange.data.Table directly
         path = os.path.join(get_sample_datasets_dir(), AGILENT_TILE)
         reader = OWTilefile.get_tile_reader(path)
-        for p in PREPROCESSORS_INDEPENDENT_SAMPLES_NO_EMSC:
+        for p in PREPROCESSORS_SEQUENCE:
             reader.set_preprocessor(p)
             reader.read()
 
@@ -59,7 +67,7 @@ def test_preprocessor_list(self):
         # TODO problematic interface design: should be able to use Orange.data.Table directly
         path = os.path.join(get_sample_datasets_dir(), AGILENT_TILE)
         reader = OWTilefile.get_tile_reader(path)
-        pp = PreprocessorList(PREPROCESSORS_INDEPENDENT_SAMPLES[0:7])
+        pp = PreprocessorList(PREPROCESSORS_SEQUENCE)
         reader.set_preprocessor(pp)
         t = reader.read()
         assert len(t.domain.attributes) == 3

From 80141dd2e328feeb1303acf39c6e75519f9848dd Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 16:49:23 +0200
Subject: [PATCH 04/13] Common preprocess tests into Mixins, converted 2

---
 .../spectroscopy/tests/test_preprocess.py     | 61 +++++++++----------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index 660fd4913..a1a54860f 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -29,16 +29,6 @@
 SMALLER_COLLAGEN = smaller_data(COLLAGEN[195:621], 40, 4)  # only glycogen and lipids
 
 
-def preprocessor_data(preproc):
-    """
-    Rerturn appropriate test file for a preprocessor.
-
-    Very slow preprocessors should get smaller files.
-    """
-    if isinstance(preproc, ME_EMSC):
-        return SMALLER_COLLAGEN
-    return SMALL_COLLAGEN
-
 # Preprocessors that work per sample and should return the same
 # result for a sample independent of the other samples
 PREPROCESSORS_INDEPENDENT_SAMPLES = [
@@ -61,7 +51,6 @@ def preprocessor_data(preproc):
     Normalize(method=Normalize.Vector),
     Normalize(method=Normalize.Area, int_method=Integrate.PeakMax, lower=0, upper=10000),
     Normalize(method=Normalize.MinMax),
-    ShiftAndScale(1, 2),
     Despike(threshold=5, cutoff=60, dis=5),
     ALSP(lam=100E+6, itermax=5, p=0.5),
     ARPLS(lam=100E+5, itermax=5, ratio=0.5),
@@ -168,9 +157,7 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
 
 # Preprocessors that use groups of input samples to infer
 # internal parameters.
-PREPROCESSORS_GROUPS_OF_SAMPLES = [
-    PCADenoising(components=2),
-]
+PREPROCESSORS_GROUPS_OF_SAMPLES = []
 
 PREPROCESSORS_INDEPENDENT_SAMPLES += list(
     add_edge_case_data_parameter(ME_EMSC, "reference", SMALLER_COLLAGEN[0:1], max_iter=4))
@@ -488,9 +475,7 @@ def test_reference_exceptions(self):
             NormalizeReference(reference=Table.from_numpy(None, [[2], [6]]))
 
 
-class TestConversion(unittest.TestCase):
-
-    preprocessors = PREPROCESSORS
+class TestConversionMixin:
 
     def test_slightly_different_domain(self):
         """ If test data has a slightly different domain then (with interpolation)
@@ -505,7 +490,7 @@ def test_slightly_different_domain(self):
                 continue
             with self.subTest(proc):
                 # LR that can not handle unknown values
-                train, test = separate_learn_test(preprocessor_data(proc))
+                train, test = separate_learn_test(self.data)
                 train1 = proc(train)
                 aucorig = AUC(TestOnTestData()(train1, test, [learner]))
                 test = slightly_change_wavenumbers(test, 0.00001)
@@ -526,9 +511,7 @@ def test_slightly_different_domain(self):
                 self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
 
 
-class TestConversionIndpSamples(TestConversion, unittest.TestCase):
-
-    preprocessors = PREPROCESSORS_INDEPENDENT_SAMPLES
+class TestConversionIndpSamplesMixin(TestConversionMixin):
 
     def test_whole_and_train_separate(self):
         """ Applying a preprocessor before spliting data into train and test
@@ -536,7 +519,7 @@ def test_whole_and_train_separate(self):
         the test data. """
         for proc in self.preprocessors:
             with self.subTest(proc):
-                data = preprocessor_data(proc)
+                data = self.data
                 _, test1 = separate_learn_test(proc(data))
                 train, test = separate_learn_test(data)
                 train = proc(train)
@@ -552,20 +535,18 @@ def __call__(self, data):
         return data[~mask]
 
 
-class TestStrangeData(unittest.TestCase):
-
-    preprocessors = PREPROCESSORS
+class TestStrangeDataMixin:
 
     def test_no_samples(self):
         """ Preprocessors should not crash when there are no input samples. """
-        data = SMALL_COLLAGEN[:0]
+        data = self.data[:0]
         for proc in self.preprocessors:
             with self.subTest(proc):
                 _ = proc(data)
 
     def test_no_attributes(self):
         """ Preprocessors should not crash when samples have no attributes. """
-        data = SMALL_COLLAGEN
+        data = self.data
         data = data.transform(Orange.data.Domain([],
                                                  class_vars=data.domain.class_vars,
                                                  metas=data.domain.metas))
@@ -577,7 +558,7 @@ def test_all_nans(self):
         """ Preprocessors should not crash when there are all-nan samples. """
         for proc in self.preprocessors:
             with self.subTest(proc):
-                data = preprocessor_data(proc).copy()
+                data = self.data.copy()
                 with data.unlocked():
                     data.X[0, :] = np.nan
                 try:
@@ -588,7 +569,7 @@ def test_all_nans(self):
     def test_unordered_features(self):
         for proc in self.preprocessors:
             with self.subTest(proc):
-                data = preprocessor_data(proc)
+                data = self.data
                 data_reversed = reverse_attr(data)
                 data_shuffle = shuffle_attr(data)
                 pdata = proc(data)
@@ -603,7 +584,7 @@ def test_unordered_features(self):
     def test_unknown_no_propagate(self):
         for proc in self.preprocessors:
             with self.subTest(proc):
-                data = preprocessor_data(proc).copy()
+                data = self.data.copy()
                 # one unknown in line
                 with data.unlocked():
                     for i in range(min(len(data), len(data.domain.attributes))):
@@ -619,7 +600,7 @@ def test_no_infs(self):
         """ Preprocessors should not return (-)inf """
         for proc in self.preprocessors:
             with self.subTest(proc):
-                data = preprocessor_data(proc).copy()
+                data = self.data.copy()
                 # add some zeros to the dataset
                 with data.unlocked():
                     for i in range(min(len(data), len(data.domain.attributes))):
@@ -634,7 +615,18 @@ def test_no_infs(self):
                 self.assertFalse(anyinfs, msg="Preprocessor " + str(proc))
 
 
-class TestPCADenoising(unittest.TestCase):
+class TestCommonMixin(TestStrangeDataMixin, TestConversionMixin):
+    pass
+
+
+class TestCommonIndpSamplesMixin(TestStrangeDataMixin, TestConversionIndpSamplesMixin):
+    pass
+
+
+class TestPCADenoising(unittest.TestCase, TestCommonMixin):
+
+    preprocessors = [PCADenoising(components=2)]
+    data = SMALL_COLLAGEN
 
     def test_no_samples(self):
         data = Orange.data.Table("iris")
@@ -657,7 +649,10 @@ def test_iris(self):
                                         [4.75015528, 3.15366444, 1.46254138, 0.23693223]])
 
 
-class TestShiftAndScale(unittest.TestCase):
+class TestShiftAndScale(unittest.TestCase, TestConversionIndpSamplesMixin):
+
+    preprocessors = [ShiftAndScale(1, 2)]
+    data = SMALL_COLLAGEN
 
     def test_simple(self):
         data = Table.from_numpy(None, [[1.0, 2.0, 3.0, 4.0]])

From d440f91dd8a5fb49a19860fd40e9ab7d0a0fe565 Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 16:53:18 +0200
Subject: [PATCH 05/13] test_preprocess: move common utilities to the top

---
 .../spectroscopy/tests/test_preprocess.py     | 296 +++++++++---------
 1 file changed, 148 insertions(+), 148 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index a1a54860f..96b2d14cd 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -165,6 +165,154 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
 PREPROCESSORS = PREPROCESSORS_INDEPENDENT_SAMPLES + PREPROCESSORS_GROUPS_OF_SAMPLES
 
 
+class TestConversionMixin:
+
+    def test_slightly_different_domain(self):
+        """ If test data has a slightly different domain then (with interpolation)
+        we should obtain a similar classification score. """
+        # rows full of unknowns make LogisticRegression undefined
+        # we can obtain them, for example, with EMSC, if one of the badspectra
+        # is a spectrum from the data
+        learner = LogisticRegressionLearner(max_iter=1000, preprocessors=[_RemoveNaNRows()])
+
+        for proc in self.preprocessors:
+            if hasattr(proc, "skip_add_zeros"):
+                continue
+            with self.subTest(proc):
+                # LR that can not handle unknown values
+                train, test = separate_learn_test(self.data)
+                train1 = proc(train)
+                aucorig = AUC(TestOnTestData()(train1, test, [learner]))
+                test = slightly_change_wavenumbers(test, 0.00001)
+                test = odd_attr(test)
+                # a subset of points for training so that all test sets points
+                # are within the train set points, which gives no unknowns
+                train = Interpolate(points=getx(train)[1:-3])(train)  # interpolatable train
+                train = proc(train)
+                # explicit domain conversion test to catch exceptions that would
+                # otherwise be silently handled in TestOnTestData
+                _ = test.transform(train.domain)
+                aucnow = AUC(TestOnTestData()(train, test, [learner]))
+                self.assertAlmostEqual(aucnow, aucorig, delta=0.03, msg="Preprocessor " + str(proc))
+                test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
+                _ = test.transform(train.domain)  # explicit call again
+                aucnow = AUC(TestOnTestData()(train, test, [learner]))
+                # the difference should be slight
+                self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
+
+
+class TestConversionIndpSamplesMixin(TestConversionMixin):
+
+    def test_whole_and_train_separate(self):
+        """ Applying a preprocessor before spliting data into train and test
+        and applying is just on train data should yield the same transformation of
+        the test data. """
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                data = self.data
+                _, test1 = separate_learn_test(proc(data))
+                train, test = separate_learn_test(data)
+                train = proc(train)
+                test_transformed = test.transform(train.domain)
+                np.testing.assert_almost_equal(test_transformed.X, test1.X,
+                                               err_msg="Preprocessor " + str(proc))
+
+class _RemoveNaNRows(Orange.preprocess.preprocess.Preprocess):
+
+    def __call__(self, data):
+        mask = np.isnan(data.X)
+        mask = np.any(mask, axis=1)
+        return data[~mask]
+
+
+class TestStrangeDataMixin:
+
+    def test_no_samples(self):
+        """ Preprocessors should not crash when there are no input samples. """
+        data = self.data[:0]
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                _ = proc(data)
+
+    def test_no_attributes(self):
+        """ Preprocessors should not crash when samples have no attributes. """
+        data = self.data
+        data = data.transform(Orange.data.Domain([],
+                                                 class_vars=data.domain.class_vars,
+                                                 metas=data.domain.metas))
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                _ = proc(data)
+
+    def test_all_nans(self):
+        """ Preprocessors should not crash when there are all-nan samples. """
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                data = self.data.copy()
+                with data.unlocked():
+                    data.X[0, :] = np.nan
+                try:
+                    _ = proc(data)
+                except PreprocessException:
+                    continue  # allow explicit preprocessor exception
+
+    def test_unordered_features(self):
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                data = self.data
+                data_reversed = reverse_attr(data)
+                data_shuffle = shuffle_attr(data)
+                pdata = proc(data)
+                X = pdata.X[:, np.argsort(getx(pdata))]
+                pdata_reversed = proc(data_reversed)
+                X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))]
+                np.testing.assert_almost_equal(X, X_reversed, err_msg="Preprocessor " + str(proc))
+                pdata_shuffle = proc(data_shuffle)
+                X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))]
+                np.testing.assert_almost_equal(X, X_shuffle, err_msg="Preprocessor " + str(proc))
+
+    def test_unknown_no_propagate(self):
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                data = self.data.copy()
+                # one unknown in line
+                with data.unlocked():
+                    for i in range(min(len(data), len(data.domain.attributes))):
+                        data.X[i, i] = np.nan
+
+                if hasattr(proc, "skip_add_zeros"):
+                    continue
+                pdata = proc(data)
+                sumnans = np.sum(np.isnan(pdata.X), axis=1)
+                self.assertFalse(np.any(sumnans > 1), msg="Preprocessor " + str(proc))
+
+    def test_no_infs(self):
+        """ Preprocessors should not return (-)inf """
+        for proc in self.preprocessors:
+            with self.subTest(proc):
+                data = self.data.copy()
+                # add some zeros to the dataset
+                with data.unlocked():
+                    for i in range(min(len(data), len(data.domain.attributes))):
+                        data.X[i, i] = 0
+                    data.X[0, :] = 0
+                    data.X[:, 0] = 0
+                try:
+                    pdata = proc(data)
+                except PreprocessException:
+                    continue  # allow explicit preprocessor exception
+                anyinfs = np.any(np.isinf(pdata.X))
+                self.assertFalse(anyinfs, msg="Preprocessor " + str(proc))
+
+
+class TestCommonMixin(TestStrangeDataMixin, TestConversionMixin):
+    pass
+
+
+class TestCommonIndpSamplesMixin(TestStrangeDataMixin, TestConversionIndpSamplesMixin):
+    pass
+
+
 class TestSpSubtract(unittest.TestCase):
 
     def test_simple(self):
@@ -475,154 +623,6 @@ def test_reference_exceptions(self):
             NormalizeReference(reference=Table.from_numpy(None, [[2], [6]]))
 
 
-class TestConversionMixin:
-
-    def test_slightly_different_domain(self):
-        """ If test data has a slightly different domain then (with interpolation)
-        we should obtain a similar classification score. """
-        # rows full of unknowns make LogisticRegression undefined
-        # we can obtain them, for example, with EMSC, if one of the badspectra
-        # is a spectrum from the data
-        learner = LogisticRegressionLearner(max_iter=1000, preprocessors=[_RemoveNaNRows()])
-
-        for proc in self.preprocessors:
-            if hasattr(proc, "skip_add_zeros"):
-                continue
-            with self.subTest(proc):
-                # LR that can not handle unknown values
-                train, test = separate_learn_test(self.data)
-                train1 = proc(train)
-                aucorig = AUC(TestOnTestData()(train1, test, [learner]))
-                test = slightly_change_wavenumbers(test, 0.00001)
-                test = odd_attr(test)
-                # a subset of points for training so that all test sets points
-                # are within the train set points, which gives no unknowns
-                train = Interpolate(points=getx(train)[1:-3])(train)  # interpolatable train
-                train = proc(train)
-                # explicit domain conversion test to catch exceptions that would
-                # otherwise be silently handled in TestOnTestData
-                _ = test.transform(train.domain)
-                aucnow = AUC(TestOnTestData()(train, test, [learner]))
-                self.assertAlmostEqual(aucnow, aucorig, delta=0.03, msg="Preprocessor " + str(proc))
-                test = Interpolate(points=getx(test) - 1.)(test)  # also do a shift
-                _ = test.transform(train.domain)  # explicit call again
-                aucnow = AUC(TestOnTestData()(train, test, [learner]))
-                # the difference should be slight
-                self.assertAlmostEqual(aucnow, aucorig, delta=0.05, msg="Preprocessor " + str(proc))
-
-
-class TestConversionIndpSamplesMixin(TestConversionMixin):
-
-    def test_whole_and_train_separate(self):
-        """ Applying a preprocessor before spliting data into train and test
-        and applying is just on train data should yield the same transformation of
-        the test data. """
-        for proc in self.preprocessors:
-            with self.subTest(proc):
-                data = self.data
-                _, test1 = separate_learn_test(proc(data))
-                train, test = separate_learn_test(data)
-                train = proc(train)
-                test_transformed = test.transform(train.domain)
-                np.testing.assert_almost_equal(test_transformed.X, test1.X,
-                                               err_msg="Preprocessor " + str(proc))
-
-class _RemoveNaNRows(Orange.preprocess.preprocess.Preprocess):
-
-    def __call__(self, data):
-        mask = np.isnan(data.X)
-        mask = np.any(mask, axis=1)
-        return data[~mask]
-
-
-class TestStrangeDataMixin:
-
-    def test_no_samples(self):
-        """ Preprocessors should not crash when there are no input samples. """
-        data = self.data[:0]
-        for proc in self.preprocessors:
-            with self.subTest(proc):
-                _ = proc(data)
-
-    def test_no_attributes(self):
-        """ Preprocessors should not crash when samples have no attributes. """
-        data = self.data
-        data = data.transform(Orange.data.Domain([],
-                                                 class_vars=data.domain.class_vars,
-                                                 metas=data.domain.metas))
-        for proc in self.preprocessors:
-            with self.subTest(proc):
-                _ = proc(data)
-
-    def test_all_nans(self):
-        """ Preprocessors should not crash when there are all-nan samples. """
-        for proc in self.preprocessors:
-            with self.subTest(proc):
-                data = self.data.copy()
-                with data.unlocked():
-                    data.X[0, :] = np.nan
-                try:
-                    _ = proc(data)
-                except PreprocessException:
-                    continue  # allow explicit preprocessor exception
-
-    def test_unordered_features(self):
-        for proc in self.preprocessors:
-            with self.subTest(proc):
-                data = self.data
-                data_reversed = reverse_attr(data)
-                data_shuffle = shuffle_attr(data)
-                pdata = proc(data)
-                X = pdata.X[:, np.argsort(getx(pdata))]
-                pdata_reversed = proc(data_reversed)
-                X_reversed = pdata_reversed.X[:, np.argsort(getx(pdata_reversed))]
-                np.testing.assert_almost_equal(X, X_reversed, err_msg="Preprocessor " + str(proc))
-                pdata_shuffle = proc(data_shuffle)
-                X_shuffle = pdata_shuffle.X[:, np.argsort(getx(pdata_shuffle))]
-                np.testing.assert_almost_equal(X, X_shuffle, err_msg="Preprocessor " + str(proc))
-
-    def test_unknown_no_propagate(self):
-        for proc in self.preprocessors:
-            with self.subTest(proc):
-                data = self.data.copy()
-                # one unknown in line
-                with data.unlocked():
-                    for i in range(min(len(data), len(data.domain.attributes))):
-                        data.X[i, i] = np.nan
-
-                if hasattr(proc, "skip_add_zeros"):
-                    continue
-                pdata = proc(data)
-                sumnans = np.sum(np.isnan(pdata.X), axis=1)
-                self.assertFalse(np.any(sumnans > 1), msg="Preprocessor " + str(proc))
-
-    def test_no_infs(self):
-        """ Preprocessors should not return (-)inf """
-        for proc in self.preprocessors:
-            with self.subTest(proc):
-                data = self.data.copy()
-                # add some zeros to the dataset
-                with data.unlocked():
-                    for i in range(min(len(data), len(data.domain.attributes))):
-                        data.X[i, i] = 0
-                    data.X[0, :] = 0
-                    data.X[:, 0] = 0
-                try:
-                    pdata = proc(data)
-                except PreprocessException:
-                    continue  # allow explicit preprocessor exception
-                anyinfs = np.any(np.isinf(pdata.X))
-                self.assertFalse(anyinfs, msg="Preprocessor " + str(proc))
-
-
-class TestCommonMixin(TestStrangeDataMixin, TestConversionMixin):
-    pass
-
-
-class TestCommonIndpSamplesMixin(TestStrangeDataMixin, TestConversionIndpSamplesMixin):
-    pass
-
-
 class TestPCADenoising(unittest.TestCase, TestCommonMixin):
 
     preprocessors = [PCADenoising(components=2)]

From c75c17fb330ca385693048b98a83c469559bfeb4 Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 17:11:14 +0200
Subject: [PATCH 06/13] test_preprocess: all preprocess tests in this file use
 the new mixin

---
 .../spectroscopy/tests/test_preprocess.py     | 95 ++++++++++---------
 1 file changed, 48 insertions(+), 47 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index 96b2d14cd..0228580d5 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -33,11 +33,7 @@
 # result for a sample independent of the other samples
 PREPROCESSORS_INDEPENDENT_SAMPLES = [
     Interpolate(np.linspace(1000, 1700, 100)),
-    SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2),
     Cut(lowlim=1000, highlim=1800),
-    GaussianSmoothing(sd=3.),
-    Absorbance(),
-    Transmittance(),
     Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
     Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]),
     Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]),
@@ -46,11 +42,6 @@
     Integrate(methods=Integrate.PeakAt, limits=[[1100]]),
     Integrate(methods=Integrate.PeakX, limits=[[1100, 1200]]),
     Integrate(methods=Integrate.PeakXBaseline, limits=[[1100, 1200]]),
-    RubberbandBaseline(),
-    LinearBaseline(),
-    Normalize(method=Normalize.Vector),
-    Normalize(method=Normalize.Area, int_method=Integrate.PeakMax, lower=0, upper=10000),
-    Normalize(method=Normalize.MinMax),
     Despike(threshold=5, cutoff=60, dis=5),
     ALSP(lam=100E+6, itermax=5, p=0.5),
     ARPLS(lam=100E+5, itermax=5, ratio=0.5),
@@ -131,10 +122,6 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
         yield p
 
 
-for p in [Absorbance, Transmittance]:
-    # single reference
-    PREPROCESSORS_INDEPENDENT_SAMPLES += list(add_edge_case_data_parameter(p, "reference", SMALL_COLLAGEN[0:1]))
-
 # EMSC with different kinds of reference
 PREPROCESSORS_INDEPENDENT_SAMPLES += list(
     add_edge_case_data_parameter(EMSC, "reference", SMALL_COLLAGEN[0:1]))
@@ -148,12 +135,6 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
     add_edge_case_data_parameter(AtmCorr, "reference", SMALL_COLLAGEN[0:1],
                                  correct_ranges=[(1300, 2100)], smooth_win=5))
 
-PREPROCESSORS_INDEPENDENT_SAMPLES += \
-    list(add_edge_case_data_parameter(NormalizeReference, "reference", SMALL_COLLAGEN[:1]))
-
-PREPROCESSORS_INDEPENDENT_SAMPLES += \
-    list(add_edge_case_data_parameter(SpSubtract, "reference", SMALL_COLLAGEN[:1], amount=0.1))
-
 
 # Preprocessors that use groups of input samples to infer
 # internal parameters.
@@ -313,7 +294,11 @@ class TestCommonIndpSamplesMixin(TestStrangeDataMixin, TestConversionIndpSamples
     pass
 
 
-class TestSpSubtract(unittest.TestCase):
+class TestSpSubtract(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = list(add_edge_case_data_parameter(
+        SpSubtract, "reference", SMALL_COLLAGEN[:1], amount=0.1))
+    data = SMALL_COLLAGEN
 
     def test_simple(self):
         data = Table.from_numpy(None, [[1.0, 2.0, 3.0, 4.0]])
@@ -323,7 +308,12 @@ def test_simple(self):
         np.testing.assert_almost_equal(fdata.X, [[-1.0, -2.0, -3.0, -4.0]])
 
 
-class TestTransmittance(unittest.TestCase):
+class TestTransmittance(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors =  [Transmittance()] + \
+                      list(add_edge_case_data_parameter(
+                          Transmittance, "reference", SMALL_COLLAGEN[0:1]))
+    data = SMALL_COLLAGEN
 
     def test_domain_conversion(self):
         """Test whether a domain can be used for conversion."""
@@ -357,7 +347,13 @@ def disabled_test_eq(self):
         self.assertNotEqual(a.domain, t1.domain)
 
 
-class TestAbsorbance(unittest.TestCase):
+class TestAbsorbance(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors =  [Absorbance()] + \
+                      list(add_edge_case_data_parameter(
+                          Absorbance, "reference", SMALL_COLLAGEN[0:1]))
+    data = SMALL_COLLAGEN
+
 
     def test_domain_conversion(self):
         """Test whether a domain can be used for conversion."""
@@ -390,17 +386,10 @@ def disabled_test_eq(self):
         self.assertEqual(t4.domain, t5.domain)
 
 
-class TestSavitzkyGolay(unittest.TestCase):
+class TestSavitzkyGolay(unittest.TestCase, TestCommonIndpSamplesMixin):
 
-    def test_unknown_no_propagate(self):
-        data = Orange.data.Table("iris")[:5].copy()
-        f = SavitzkyGolayFiltering()
-        with data.unlocked():
-            for i in range(4):
-                data.X[i, i] = np.nan
-            data.X[4] = np.nan
-        fdata = f(data)
-        np.testing.assert_equal(np.sum(np.isnan(fdata.X), axis=1), [1, 1, 1, 1, 4])
+    preprocessors =  [SavitzkyGolayFiltering(window=9, polyorder=2, deriv=2)]
+    data = SMALL_COLLAGEN
 
     def test_simple(self):
         data = Orange.data.Table("iris")
@@ -429,17 +418,10 @@ def disabled_test_eq(self):
         self.assertEqual(p1.domain, s2.domain)
 
 
-class TestGaussian(unittest.TestCase):
+class TestGaussian(unittest.TestCase, TestCommonIndpSamplesMixin):
 
-    def test_unknown_no_propagate(self):
-        data = Orange.data.Table("iris")[:5].copy()
-        f = GaussianSmoothing()
-        with data.unlocked():
-            for i in range(4):
-                data.X[i, i] = np.nan
-            data.X[4] = np.nan
-        fdata = f(data)
-        np.testing.assert_equal(np.sum(np.isnan(fdata.X), axis=1), [1, 1, 1, 1, 4])
+    preprocessors = [GaussianSmoothing(sd=3.)]
+    data = SMALL_COLLAGEN
 
     def test_simple(self):
         data = Orange.data.Table("iris")
@@ -450,7 +432,10 @@ def test_simple(self):
                                        [[4.4907066, 3.2794677, 1.7641664, 0.6909083]])
 
 
-class TestRubberbandBaseline(unittest.TestCase):
+class TestRubberbandBaseline(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors =  [RubberbandBaseline()]
+    data = SMALL_COLLAGEN
 
     def test_whole(self):
         """ Every point belongs in the convex region. """
@@ -471,7 +456,10 @@ def test_simple(self):
         np.testing.assert_equal(i.X, [[0, 0, -0.5, 0]])
 
 
-class TestLinearBaseline(unittest.TestCase):
+class TestLinearBaseline(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors =  [LinearBaseline()]
+    data = SMALL_COLLAGEN
 
     def test_whole(self):
         data = Table.from_numpy(None, [[1, 5, 1]])
@@ -503,7 +491,14 @@ def test_edgepoints_out_of_data(self):
         np.testing.assert_almost_equal(i.X, [[0, 4, 0]])
 
 
-class TestNormalize(unittest.TestCase):
+class TestNormalize(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = [Normalize(method=Normalize.Vector),
+                     Normalize(method=Normalize.Area,
+                               int_method=Integrate.PeakMax, lower=0, upper=10000),
+                     Normalize(method=Normalize.MinMax)]
+
+    data = SMALL_COLLAGEN
 
     def test_vector_norm(self):
         data = Table.from_numpy(None, [[2, 1, 2, 2, 3]])
@@ -606,7 +601,13 @@ def disabled_test_eq(self):
         self.assertEqual(p1.domain, p4.domain)
 
 
-class TestNormalizeReference(unittest.TestCase):
+class TestNormalizeReference(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = (list(add_edge_case_data_parameter(NormalizeReference,
+                                                      "reference", SMALL_COLLAGEN[:1])) +
+                     list(add_edge_case_data_parameter(NormalizePhaseReference,
+                                                      "reference", SMALL_COLLAGEN[:1])))
+    data = SMALL_COLLAGEN
 
     def test_reference(self):
         data = Table.from_numpy(None, [[2, 1, 3], [4, 2, 6]])
@@ -649,7 +650,7 @@ def test_iris(self):
                                         [4.75015528, 3.15366444, 1.46254138, 0.23693223]])
 
 
-class TestShiftAndScale(unittest.TestCase, TestConversionIndpSamplesMixin):
+class TestShiftAndScale(unittest.TestCase, TestCommonIndpSamplesMixin):
 
     preprocessors = [ShiftAndScale(1, 2)]
     data = SMALL_COLLAGEN

From 120135d6666afd746b5adc860d7b3f05b53e760b Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 21:03:17 +0200
Subject: [PATCH 07/13] emsc: test with new mixin

---
 orangecontrib/spectroscopy/tests/test_emsc.py      | 12 +++++++++++-
 orangecontrib/spectroscopy/tests/test_me_emsc.py   |  8 +++++++-
 .../spectroscopy/tests/test_preprocess.py          | 14 +-------------
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_emsc.py b/orangecontrib/spectroscopy/tests/test_emsc.py
index 94bcfe0e4..9e701705e 100644
--- a/orangecontrib/spectroscopy/tests/test_emsc.py
+++ b/orangecontrib/spectroscopy/tests/test_emsc.py
@@ -8,9 +8,19 @@
     SelectionFunction, SmoothedSelectionFunction
 from orangecontrib.spectroscopy.preprocess.npfunc import Sum
 from orangecontrib.spectroscopy.tests.util import spectra_table
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALL_COLLAGEN, add_edge_case_data_parameter
 
 
-class TestEMSC(unittest.TestCase):
+class TestEMSC(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    different_reference = list(
+        add_edge_case_data_parameter(EMSC, "reference", SMALL_COLLAGEN[0:1]))
+    different_badspectra = list(
+        add_edge_case_data_parameter(EMSC, "badspectra", SMALL_COLLAGEN[0:2],
+                                     reference=SMALL_COLLAGEN[-1:]))
+    preprocessors = different_reference + different_badspectra
+    data = SMALL_COLLAGEN
 
     def test_ab(self):
         data = Table.from_numpy(None, [[1.0, 2.0, 1.0, 1.0],
diff --git a/orangecontrib/spectroscopy/tests/test_me_emsc.py b/orangecontrib/spectroscopy/tests/test_me_emsc.py
index 5adac5556..c68236282 100644
--- a/orangecontrib/spectroscopy/tests/test_me_emsc.py
+++ b/orangecontrib/spectroscopy/tests/test_me_emsc.py
@@ -8,6 +8,8 @@
 from orangecontrib.spectroscopy.preprocess.me_emsc import ME_EMSC
 from orangecontrib.spectroscopy.preprocess.emsc import SelectionFunction, SmoothedSelectionFunction
 from orangecontrib.spectroscopy.preprocess.npfunc import Sum
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALLER_COLLAGEN, add_edge_case_data_parameter
 
 
 def weights_from_inflection_points_legacy(points, kappa, wavenumbers):
@@ -82,7 +84,11 @@ def weights_from_inflection_points_legacy(points, kappa, wavenumbers):
     return data
 
 
-class TestME_EMSC(unittest.TestCase):
+class TestME_EMSC(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = list(
+        add_edge_case_data_parameter(ME_EMSC, "reference", SMALLER_COLLAGEN[0:1], max_iter=4))
+    data = SMALLER_COLLAGEN
 
     @classmethod
     def setUpClass(cls):
diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index 0228580d5..18b2e4c95 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -13,11 +13,10 @@
 from orangecontrib.spectroscopy.preprocess import Absorbance, Transmittance, \
     Integrate, Interpolate, Cut, SavitzkyGolayFiltering, \
     GaussianSmoothing, PCADenoising, RubberbandBaseline, \
-    Normalize, LinearBaseline, ShiftAndScale, EMSC, MissingReferenceException, \
+    Normalize, LinearBaseline, ShiftAndScale, MissingReferenceException, \
     WrongReferenceException, NormalizeReference, XASnormalization, ExtractEXAFS, \
     PreprocessException, NormalizePhaseReference, Despike, SpSubtract
 from orangecontrib.spectroscopy.preprocess.als import ALSP, ARPLS, AIRPLS
-from orangecontrib.spectroscopy.preprocess.me_emsc import ME_EMSC
 from orangecontrib.spectroscopy.preprocess.atm_corr import AtmCorr
 from orangecontrib.spectroscopy.preprocess.utils import replacex
 from orangecontrib.spectroscopy.tests.test_conversion import separate_learn_test, slightly_change_wavenumbers, odd_attr
@@ -122,14 +121,6 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
         yield p
 
 
-# EMSC with different kinds of reference
-PREPROCESSORS_INDEPENDENT_SAMPLES += list(
-    add_edge_case_data_parameter(EMSC, "reference", SMALL_COLLAGEN[0:1]))
-# EMSC with different kinds of bad spectra
-PREPROCESSORS_INDEPENDENT_SAMPLES += list(
-    add_edge_case_data_parameter(EMSC, "badspectra", SMALL_COLLAGEN[0:2],
-                                 reference=SMALL_COLLAGEN[-1:]))
-
 # AtmCorr with different kinds of reference
 PREPROCESSORS_INDEPENDENT_SAMPLES += list(
     add_edge_case_data_parameter(AtmCorr, "reference", SMALL_COLLAGEN[0:1],
@@ -140,9 +131,6 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
 # internal parameters.
 PREPROCESSORS_GROUPS_OF_SAMPLES = []
 
-PREPROCESSORS_INDEPENDENT_SAMPLES += list(
-    add_edge_case_data_parameter(ME_EMSC, "reference", SMALLER_COLLAGEN[0:1], max_iter=4))
-
 PREPROCESSORS = PREPROCESSORS_INDEPENDENT_SAMPLES + PREPROCESSORS_GROUPS_OF_SAMPLES
 
 

From 93d0d1016dfda9ec85962c97225b23c12ef03511 Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 21:06:07 +0200
Subject: [PATCH 08/13] atmcorr: test with new mixin

---
 orangecontrib/spectroscopy/tests/test_atm_corr.py   | 9 ++++++++-
 orangecontrib/spectroscopy/tests/test_preprocess.py | 7 -------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_atm_corr.py b/orangecontrib/spectroscopy/tests/test_atm_corr.py
index 4b1ab6413..8fd0fcced 100755
--- a/orangecontrib/spectroscopy/tests/test_atm_corr.py
+++ b/orangecontrib/spectroscopy/tests/test_atm_corr.py
@@ -4,9 +4,16 @@
 
 from orangecontrib.spectroscopy.preprocess.atm_corr import AtmCorr
 from orangecontrib.spectroscopy.tests.util import spectra_table
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALL_COLLAGEN, add_edge_case_data_parameter
 
+class TestAtmCorr(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = list(
+        add_edge_case_data_parameter(AtmCorr, "reference", SMALL_COLLAGEN[0:1],
+                                     correct_ranges=[(1300, 2100)], smooth_win=5))
+    data = SMALL_COLLAGEN
 
-class TestAtmCorr(unittest.TestCase):
     def test_atm_corr(self):
         # Fake atmospheric spectrum
         def atm(wn):
diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index 18b2e4c95..d87f892fe 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -17,7 +17,6 @@
     WrongReferenceException, NormalizeReference, XASnormalization, ExtractEXAFS, \
     PreprocessException, NormalizePhaseReference, Despike, SpSubtract
 from orangecontrib.spectroscopy.preprocess.als import ALSP, ARPLS, AIRPLS
-from orangecontrib.spectroscopy.preprocess.atm_corr import AtmCorr
 from orangecontrib.spectroscopy.preprocess.utils import replacex
 from orangecontrib.spectroscopy.tests.test_conversion import separate_learn_test, slightly_change_wavenumbers, odd_attr
 from orangecontrib.spectroscopy.tests.util import smaller_data
@@ -121,12 +120,6 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
         yield p
 
 
-# AtmCorr with different kinds of reference
-PREPROCESSORS_INDEPENDENT_SAMPLES += list(
-    add_edge_case_data_parameter(AtmCorr, "reference", SMALL_COLLAGEN[0:1],
-                                 correct_ranges=[(1300, 2100)], smooth_win=5))
-
-
 # Preprocessors that use groups of input samples to infer
 # internal parameters.
 PREPROCESSORS_GROUPS_OF_SAMPLES = []

From 0f09f738f20e26e45d1d341d45f67416a875beec Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 21:08:27 +0200
Subject: [PATCH 09/13] cut: test with new mixin

---
 orangecontrib/spectroscopy/tests/test_cut.py        | 7 ++++++-
 orangecontrib/spectroscopy/tests/test_preprocess.py | 3 +--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_cut.py b/orangecontrib/spectroscopy/tests/test_cut.py
index 92e8053f8..0fa657632 100644
--- a/orangecontrib/spectroscopy/tests/test_cut.py
+++ b/orangecontrib/spectroscopy/tests/test_cut.py
@@ -3,9 +3,14 @@
 import unittest
 from orangecontrib.spectroscopy.preprocess import Cut
 from orangecontrib.spectroscopy.data import getx
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALL_COLLAGEN
 
 
-class TestCut(unittest.TestCase):
+class TestCut(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = [Cut(lowlim=1000, highlim=1800)]
+    data = SMALL_COLLAGEN
 
     @classmethod
     def setUpClass(cls):
diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index d87f892fe..97ab7ec7f 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -11,7 +11,7 @@
 
 from orangecontrib.spectroscopy.data import getx
 from orangecontrib.spectroscopy.preprocess import Absorbance, Transmittance, \
-    Integrate, Interpolate, Cut, SavitzkyGolayFiltering, \
+    Integrate, Interpolate, SavitzkyGolayFiltering, \
     GaussianSmoothing, PCADenoising, RubberbandBaseline, \
     Normalize, LinearBaseline, ShiftAndScale, MissingReferenceException, \
     WrongReferenceException, NormalizeReference, XASnormalization, ExtractEXAFS, \
@@ -31,7 +31,6 @@
 # result for a sample independent of the other samples
 PREPROCESSORS_INDEPENDENT_SAMPLES = [
     Interpolate(np.linspace(1000, 1700, 100)),
-    Cut(lowlim=1000, highlim=1800),
     Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
     Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]),
     Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]),

From be5406784fc54490761925e56bd03948014f4052 Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 21:15:06 +0200
Subject: [PATCH 10/13] interpolate, integrate: test with new mixin

---
 .../spectroscopy/tests/test_integrate.py        | 17 ++++++++++++++++-
 .../spectroscopy/tests/test_interpolate.py      |  7 ++++++-
 .../spectroscopy/tests/test_preprocess.py       |  9 ---------
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_integrate.py b/orangecontrib/spectroscopy/tests/test_integrate.py
index 928914d02..21f429c08 100644
--- a/orangecontrib/spectroscopy/tests/test_integrate.py
+++ b/orangecontrib/spectroscopy/tests/test_integrate.py
@@ -4,9 +4,24 @@
 import numpy as np
 
 from orangecontrib.spectroscopy.preprocess import Integrate
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALL_COLLAGEN
 
 
-class TestIntegrate(unittest.TestCase):
+class TestIntegrate(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = [
+        Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
+        Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]),
+        Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]),
+        Integrate(methods=Integrate.PeakMax, limits=[[1100, 1200]]),
+        Integrate(methods=Integrate.PeakBaseline, limits=[[1100, 1200]]),
+        Integrate(methods=Integrate.PeakAt, limits=[[1100]]),
+        Integrate(methods=Integrate.PeakX, limits=[[1100, 1200]]),
+        Integrate(methods=Integrate.PeakXBaseline, limits=[[1100, 1200]])
+    ]
+    data = SMALL_COLLAGEN
+
 
     def test_simple(self):
         data = Table.from_numpy(None, [[1, 2, 3, 1, 1, 1],
diff --git a/orangecontrib/spectroscopy/tests/test_interpolate.py b/orangecontrib/spectroscopy/tests/test_interpolate.py
index ee57e8ded..51743c8db 100644
--- a/orangecontrib/spectroscopy/tests/test_interpolate.py
+++ b/orangecontrib/spectroscopy/tests/test_interpolate.py
@@ -16,9 +16,14 @@
     nan_extend_edges_and_interpolate
 from orangecontrib.spectroscopy.data import getx
 from orangecontrib.spectroscopy.tests.util import spectra_table
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALL_COLLAGEN
 
 
-class TestInterpolate(unittest.TestCase):
+class TestInterpolate(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = [Interpolate(np.linspace(1000, 1700, 100))]
+    data = SMALL_COLLAGEN
 
     @classmethod
     def setUpClass(cls):
diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index 97ab7ec7f..bd2d56d96 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -30,15 +30,6 @@
 # Preprocessors that work per sample and should return the same
 # result for a sample independent of the other samples
 PREPROCESSORS_INDEPENDENT_SAMPLES = [
-    Interpolate(np.linspace(1000, 1700, 100)),
-    Integrate(limits=[[900, 100], [1100, 1200], [1200, 1300]]),
-    Integrate(methods=Integrate.Simple, limits=[[1100, 1200]]),
-    Integrate(methods=Integrate.Baseline, limits=[[1100, 1200]]),
-    Integrate(methods=Integrate.PeakMax, limits=[[1100, 1200]]),
-    Integrate(methods=Integrate.PeakBaseline, limits=[[1100, 1200]]),
-    Integrate(methods=Integrate.PeakAt, limits=[[1100]]),
-    Integrate(methods=Integrate.PeakX, limits=[[1100, 1200]]),
-    Integrate(methods=Integrate.PeakXBaseline, limits=[[1100, 1200]]),
     Despike(threshold=5, cutoff=60, dis=5),
     ALSP(lam=100E+6, itermax=5, p=0.5),
     ARPLS(lam=100E+5, itermax=5, ratio=0.5),

From a079d56ec368a3f337447eb95ee9e19d0023a4ca Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 21:20:17 +0200
Subject: [PATCH 11/13] despike, als: test with new mixin

---
 orangecontrib/spectroscopy/tests/test_als.py        | 12 +++++++++++-
 orangecontrib/spectroscopy/tests/test_despike.py    |  8 +++++++-
 orangecontrib/spectroscopy/tests/test_preprocess.py | 10 ++--------
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_als.py b/orangecontrib/spectroscopy/tests/test_als.py
index 48b1dafe1..5d7ba7389 100644
--- a/orangecontrib/spectroscopy/tests/test_als.py
+++ b/orangecontrib/spectroscopy/tests/test_als.py
@@ -4,9 +4,19 @@
 
 from Orange.data import Table
 from orangecontrib.spectroscopy.preprocess.als import ALSP, ARPLS, AIRPLS
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALLER_COLLAGEN
 
 
-class Testals(unittest.TestCase):
+class TestAls(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = [
+        ALSP(lam=100E+6, itermax=5, p=0.5),
+        ARPLS(lam=100E+5, itermax=5, ratio=0.5),
+        AIRPLS(lam=100, itermax=5, porder=1),
+    ]
+    data = SMALLER_COLLAGEN
+
     def test_als_Basic(self):
         data = Table.from_numpy(None, [[1.0, 2.0, 10.0, 5.0],
                                        [3.0, 5.0, 9.0, 4.0]])
diff --git a/orangecontrib/spectroscopy/tests/test_despike.py b/orangecontrib/spectroscopy/tests/test_despike.py
index 56aa4987e..eefb2ccf2 100644
--- a/orangecontrib/spectroscopy/tests/test_despike.py
+++ b/orangecontrib/spectroscopy/tests/test_despike.py
@@ -2,9 +2,15 @@
 import numpy as np
 from Orange.data import Table
 from orangecontrib.spectroscopy.preprocess import Despike
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALL_COLLAGEN
 
 
-class TestSpikeremoval(unittest.TestCase):
+class TestSpikeRemoval(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = [Despike(threshold=5, cutoff=60, dis=5)]
+    data = SMALL_COLLAGEN
+
     def test_spikes(self):
         data = Table.from_numpy(None, [[1000, 1, 1, 1, 1, 10, 1, 1, 1000, 1000, 1000, 1, 1000,
                                         1, 1, 1, 1000, 1000, 1000, 1000],
diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index bd2d56d96..e8f759009 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -15,8 +15,7 @@
     GaussianSmoothing, PCADenoising, RubberbandBaseline, \
     Normalize, LinearBaseline, ShiftAndScale, MissingReferenceException, \
     WrongReferenceException, NormalizeReference, XASnormalization, ExtractEXAFS, \
-    PreprocessException, NormalizePhaseReference, Despike, SpSubtract
-from orangecontrib.spectroscopy.preprocess.als import ALSP, ARPLS, AIRPLS
+    PreprocessException, NormalizePhaseReference, SpSubtract
 from orangecontrib.spectroscopy.preprocess.utils import replacex
 from orangecontrib.spectroscopy.tests.test_conversion import separate_learn_test, slightly_change_wavenumbers, odd_attr
 from orangecontrib.spectroscopy.tests.util import smaller_data
@@ -29,12 +28,7 @@
 
 # Preprocessors that work per sample and should return the same
 # result for a sample independent of the other samples
-PREPROCESSORS_INDEPENDENT_SAMPLES = [
-    Despike(threshold=5, cutoff=60, dis=5),
-    ALSP(lam=100E+6, itermax=5, p=0.5),
-    ARPLS(lam=100E+5, itermax=5, ratio=0.5),
-    AIRPLS(lam=100, itermax=5, porder=1),
-]
+PREPROCESSORS_INDEPENDENT_SAMPLES = []
 
 xas_norm_collagen = XASnormalization(edge=1630,
                                      preedge_dict={'from': 1000, 'to': 1300, 'deg': 1},

From 1e69ea7bfc129505d4ab9e21084a4e02eb41b6ab Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 21:26:35 +0200
Subject: [PATCH 12/13] test_preprocess: finish porting tests to the new mixin

---
 .../spectroscopy/tests/test_preprocess.py     | 35 +++----------------
 orangecontrib/spectroscopy/tests/test_xas.py  | 22 +++++++++++-
 2 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index e8f759009..cf3001d2c 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -7,14 +7,13 @@
 from Orange.classification import LogisticRegressionLearner
 from Orange.data import Table
 from Orange.evaluation import TestOnTestData, AUC
-from Orange.preprocess.preprocess import PreprocessorList
 
 from orangecontrib.spectroscopy.data import getx
 from orangecontrib.spectroscopy.preprocess import Absorbance, Transmittance, \
     Integrate, Interpolate, SavitzkyGolayFiltering, \
     GaussianSmoothing, PCADenoising, RubberbandBaseline, \
     Normalize, LinearBaseline, ShiftAndScale, MissingReferenceException, \
-    WrongReferenceException, NormalizeReference, XASnormalization, ExtractEXAFS, \
+    WrongReferenceException, NormalizeReference, \
     PreprocessException, NormalizePhaseReference, SpSubtract
 from orangecontrib.spectroscopy.preprocess.utils import replacex
 from orangecontrib.spectroscopy.tests.test_conversion import separate_learn_test, slightly_change_wavenumbers, odd_attr
@@ -26,27 +25,6 @@
 SMALLER_COLLAGEN = smaller_data(COLLAGEN[195:621], 40, 4)  # only glycogen and lipids
 
 
-# Preprocessors that work per sample and should return the same
-# result for a sample independent of the other samples
-PREPROCESSORS_INDEPENDENT_SAMPLES = []
-
-xas_norm_collagen = XASnormalization(edge=1630,
-                                     preedge_dict={'from': 1000, 'to': 1300, 'deg': 1},
-                                     postedge_dict={'from': 1650, 'to': 1700, 'deg': 1})
-extract_exafs = ExtractEXAFS(edge=1630, extra_from=1630, extra_to=1800,
-                             poly_deg=1, kweight=0, m=0)
-
-
-class ExtractEXAFSUsage(PreprocessorList):
-    """ExtractEXAFS needs previous XAS normalization"""
-    def __init__(self):
-        super().__init__(preprocessors=[xas_norm_collagen,
-                                        extract_exafs])
-
-
-PREPROCESSORS_INDEPENDENT_SAMPLES += [xas_norm_collagen, ExtractEXAFSUsage()]
-
-
 def add_zeros(data):
     """ Every 5th value is zero """
     s = data.copy()
@@ -104,13 +82,6 @@ def add_edge_case_data_parameter(class_, data_arg_name, data_to_modify, *args, *
         yield p
 
 
-# Preprocessors that use groups of input samples to infer
-# internal parameters.
-PREPROCESSORS_GROUPS_OF_SAMPLES = []
-
-PREPROCESSORS = PREPROCESSORS_INDEPENDENT_SAMPLES + PREPROCESSORS_GROUPS_OF_SAMPLES
-
-
 class TestConversionMixin:
 
     def test_slightly_different_domain(self):
@@ -148,6 +119,10 @@ def test_slightly_different_domain(self):
 
 
 class TestConversionIndpSamplesMixin(TestConversionMixin):
+    """
+    Testing mixin for preprocessors that work per sample and should
+    return the same result for a sample independent of the other samples
+    """
 
     def test_whole_and_train_separate(self):
         """ Applying a preprocessor before spliting data into train and test
diff --git a/orangecontrib/spectroscopy/tests/test_xas.py b/orangecontrib/spectroscopy/tests/test_xas.py
index 19582a989..e5c91c5d3 100644
--- a/orangecontrib/spectroscopy/tests/test_xas.py
+++ b/orangecontrib/spectroscopy/tests/test_xas.py
@@ -2,11 +2,31 @@
 import numpy
 import Orange
 from Orange.data import Table
+from Orange.preprocess import PreprocessorList
 
 from orangecontrib.spectroscopy.preprocess import XASnormalization, ExtractEXAFS, NoEdgejumpProvidedException
+from orangecontrib.spectroscopy.tests.test_preprocess import TestCommonIndpSamplesMixin, \
+    SMALLER_COLLAGEN
 
 
-class TestXASnormalization(unittest.TestCase):
+xas_norm_collagen = XASnormalization(edge=1630,
+                                     preedge_dict={'from': 1000, 'to': 1300, 'deg': 1},
+                                     postedge_dict={'from': 1650, 'to': 1700, 'deg': 1})
+extract_exafs = ExtractEXAFS(edge=1630, extra_from=1630, extra_to=1800,
+                             poly_deg=1, kweight=0, m=0)
+
+
+class ExtractEXAFSUsage(PreprocessorList):
+    """ExtractEXAFS needs previous XAS normalization"""
+    def __init__(self):
+        super().__init__(preprocessors=[xas_norm_collagen,
+                                        extract_exafs])
+
+
+class TestXASnormalization(unittest.TestCase, TestCommonIndpSamplesMixin):
+
+    preprocessors = [xas_norm_collagen, ExtractEXAFSUsage()]
+    data = SMALLER_COLLAGEN
 
     def test_flat(self):
         domain = Orange.data.Domain([Orange.data.ContinuousVariable(str(w))

From d856bf34ed1c6c4af9a48a6c0e04f4f2b04ae9b3 Mon Sep 17 00:00:00 2001
From: Marko Toplak <marko.toplak@gmail.com>
Date: Mon, 19 Aug 2024 21:35:44 +0200
Subject: [PATCH 13/13] test_preprocess: speedups

---
 .../spectroscopy/tests/test_preprocess.py     | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/orangecontrib/spectroscopy/tests/test_preprocess.py b/orangecontrib/spectroscopy/tests/test_preprocess.py
index cf3001d2c..a6e7412e8 100644
--- a/orangecontrib/spectroscopy/tests/test_preprocess.py
+++ b/orangecontrib/spectroscopy/tests/test_preprocess.py
@@ -237,8 +237,8 @@ class TestCommonIndpSamplesMixin(TestStrangeDataMixin, TestConversionIndpSamples
 class TestSpSubtract(unittest.TestCase, TestCommonIndpSamplesMixin):
 
     preprocessors = list(add_edge_case_data_parameter(
-        SpSubtract, "reference", SMALL_COLLAGEN[:1], amount=0.1))
-    data = SMALL_COLLAGEN
+        SpSubtract, "reference", SMALLER_COLLAGEN[:1], amount=0.1))
+    data = SMALLER_COLLAGEN
 
     def test_simple(self):
         data = Table.from_numpy(None, [[1.0, 2.0, 3.0, 4.0]])
@@ -252,12 +252,12 @@ class TestTransmittance(unittest.TestCase, TestCommonIndpSamplesMixin):
 
     preprocessors =  [Transmittance()] + \
                       list(add_edge_case_data_parameter(
-                          Transmittance, "reference", SMALL_COLLAGEN[0:1]))
-    data = SMALL_COLLAGEN
+                          Transmittance, "reference", SMALLER_COLLAGEN[0:1]))
+    data = SMALLER_COLLAGEN
 
     def test_domain_conversion(self):
         """Test whether a domain can be used for conversion."""
-        data = SMALL_COLLAGEN
+        data = self.data
         transmittance = Transmittance()(data)
         nt = Orange.data.Table.from_table(transmittance.domain, data)
         self.assertEqual(transmittance.domain, nt.domain)
@@ -266,12 +266,12 @@ def test_domain_conversion(self):
 
     def test_roundtrip(self):
         """Test AB -> TR -> AB calculation"""
-        data = SMALL_COLLAGEN
+        data = self.data
         calcdata = Absorbance()(Transmittance()(data))
         np.testing.assert_allclose(data.X, calcdata.X)
 
     def disabled_test_eq(self):
-        data = SMALL_COLLAGEN
+        data = self.data
         t1 = Transmittance()(data)
         t2 = Transmittance()(data)
         self.assertEqual(t1.domain, t2.domain)
@@ -291,13 +291,13 @@ class TestAbsorbance(unittest.TestCase, TestCommonIndpSamplesMixin):
 
     preprocessors =  [Absorbance()] + \
                       list(add_edge_case_data_parameter(
-                          Absorbance, "reference", SMALL_COLLAGEN[0:1]))
-    data = SMALL_COLLAGEN
+                          Absorbance, "reference", SMALLER_COLLAGEN[0:1]))
+    data = SMALLER_COLLAGEN
 
 
     def test_domain_conversion(self):
         """Test whether a domain can be used for conversion."""
-        data = Transmittance()(SMALL_COLLAGEN)
+        data = Transmittance()(self.data)
         absorbance = Absorbance()(data)
         nt = Orange.data.Table.from_table(absorbance.domain, data)
         self.assertEqual(absorbance.domain, nt.domain)
@@ -307,12 +307,12 @@ def test_domain_conversion(self):
     def test_roundtrip(self):
         """Test TR -> AB -> TR calculation"""
         # actually AB -> TR -> AB -> TR
-        data = Transmittance()(SMALL_COLLAGEN)
+        data = Transmittance()(self.data)
         calcdata = Transmittance()(Absorbance()(data))
         np.testing.assert_allclose(data.X, calcdata.X)
 
     def disabled_test_eq(self):
-        data = SMALL_COLLAGEN
+        data = self.data
         t1 = Absorbance()(data)
         t2 = Absorbance()(data)
         self.assertEqual(t1.domain, t2.domain)
@@ -375,7 +375,7 @@ def test_simple(self):
 class TestRubberbandBaseline(unittest.TestCase, TestCommonIndpSamplesMixin):
 
     preprocessors =  [RubberbandBaseline()]
-    data = SMALL_COLLAGEN
+    data = SMALLER_COLLAGEN
 
     def test_whole(self):
         """ Every point belongs in the convex region. """
@@ -544,10 +544,10 @@ def disabled_test_eq(self):
 class TestNormalizeReference(unittest.TestCase, TestCommonIndpSamplesMixin):
 
     preprocessors = (list(add_edge_case_data_parameter(NormalizeReference,
-                                                      "reference", SMALL_COLLAGEN[:1])) +
+                                                      "reference", SMALLER_COLLAGEN[:1])) +
                      list(add_edge_case_data_parameter(NormalizePhaseReference,
-                                                      "reference", SMALL_COLLAGEN[:1])))
-    data = SMALL_COLLAGEN
+                                                      "reference", SMALLER_COLLAGEN[:1])))
+    data = SMALLER_COLLAGEN
 
     def test_reference(self):
         data = Table.from_numpy(None, [[2, 1, 3], [4, 2, 6]])
@@ -567,10 +567,10 @@ def test_reference_exceptions(self):
 class TestPCADenoising(unittest.TestCase, TestCommonMixin):
 
     preprocessors = [PCADenoising(components=2)]
-    data = SMALL_COLLAGEN
+    data = SMALLER_COLLAGEN
 
     def test_no_samples(self):
-        data = Orange.data.Table("iris")
+        data = self.data
         proc = PCADenoising()
         d1 = proc(data[:0])
         newdata = data.transform(d1.domain)