From 22565b9934c37f7f8b171cdbc556e5a4754ccea0 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:28:36 -0800 Subject: [PATCH 01/23] Update sklearn to >= 1.5.0 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index cd5cd0b..e47a5bc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ install_requires = pandas==2.1.4 requests seaborn==0.13.0 - scikit-learn==1.2.1 + scikit-learn>=1.5.0 sklearn_pandas>=2.0.0 tqdm statsmodels==0.14.0 From 67abbf00c7279dd648c6368a3607a11e9fdc7fd0 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:44:48 -0800 Subject: [PATCH 02/23] update indices_to_mask import that was changed in sklearn 1.3.0 --- afqinsight/_serial_bagging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py index f9536b0..24a57a9 100644 --- a/afqinsight/_serial_bagging.py +++ b/afqinsight/_serial_bagging.py @@ -23,7 +23,8 @@ _parallel_predict_regression, ) from sklearn.ensemble._base import _partition_estimators -from sklearn.utils import check_array, check_random_state, indices_to_mask, resample +from sklearn.utils import check_array, check_random_state, resample +from sklearn.utils._mask import indices_to_mask from sklearn.utils.metaestimators import if_delegate_has_method from sklearn.utils.random import sample_without_replacement from sklearn.utils.validation import ( From 6669ed08ede5e04f2e7f17636a2811bf635f279e Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:54:14 -0800 Subject: [PATCH 03/23] change if_delegate_has_method to available_if in metaestimators --- afqinsight/_serial_bagging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py index 24a57a9..c86fb78 100644 --- a/afqinsight/_serial_bagging.py +++ b/afqinsight/_serial_bagging.py @@ -25,7 +25,7 @@ from sklearn.ensemble._base import _partition_estimators from sklearn.utils import check_array, check_random_state, resample from sklearn.utils._mask import indices_to_mask -from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils.metaestimators import available_if from sklearn.utils.random import sample_without_replacement from sklearn.utils.validation import ( _check_sample_weight, From a6a9dac01bad36ad02f984d56b8b586647abd5b3 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:07:07 -0800 Subject: [PATCH 04/23] change more if_delegate_has_method --- afqinsight/_serial_bagging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py index c86fb78..62de2f7 100644 --- a/afqinsight/_serial_bagging.py +++ b/afqinsight/_serial_bagging.py @@ -611,7 +611,7 @@ def predict_log_proba(self, X): else: return np.log(self.predict_proba(X)) - @if_delegate_has_method(delegate="base_estimator") + @available_if(lambda est: hasattr(est.base_estimator, "decision_function")) def decision_function(self, X): """Average of the decision functions of the base classifiers. From 752b8865bc8ae51e5e364e078f0748f9b9308c98 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:18:13 -0800 Subject: [PATCH 05/23] Fix sklearn to 1.5.2 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index e47a5bc..89d4152 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ install_requires = pandas==2.1.4 requests seaborn==0.13.0 - scikit-learn>=1.5.0 + scikit-learn==1.5.2 sklearn_pandas>=2.0.0 tqdm statsmodels==0.14.0 From 77f8d41ed20942ce52fc9026a446cb0f1554a5a5 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:27:10 -0800 Subject: [PATCH 06/23] Update statsmodels pin from 0.14.0 to 0.14.4 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 89d4152..e28d53b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,7 +44,7 @@ install_requires = scikit-learn==1.5.2 sklearn_pandas>=2.0.0 tqdm - statsmodels==0.14.0 + statsmodels==0.14.4 zip_safe = False include_package_data = True packages = find: From 879f24b5ea100e04cf1d407b2043ee185a85f04d Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:33:24 -0800 Subject: [PATCH 07/23] Update seaborn pin from 0.13.0 to 0.13.2 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index e28d53b..0618847 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,7 +40,7 @@ install_requires = numpy==1.23.5 pandas==2.1.4 requests - seaborn==0.13.0 + seaborn==0.13.2 scikit-learn==1.5.2 sklearn_pandas>=2.0.0 tqdm From e9c80c9fa736a98a11a0cc37f712972b4b388e4d Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:37:50 -0800 Subject: [PATCH 08/23] Update pandas pin from 2.1.4 to 2.2.3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 0618847..58a6e54 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,7 +38,7 @@ install_requires = matplotlib<3.9 groupyr>=0.3.4 numpy==1.23.5 - pandas==2.1.4 + pandas==2.2.3 requests seaborn==0.13.2 scikit-learn==1.5.2 From a61320dae2d3bd7d6803f904827120c455b21569 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:47:51 -0800 Subject: [PATCH 09/23] Update matplotlib pin from <3.9 to 3.9.3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 58a6e54..b30a135 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ python_requires = >=3.10 install_requires = scipy==1.13.1 dipy>=1.0.0 - matplotlib<3.9 + matplotlib==3.9.3 groupyr>=0.3.4 numpy==1.23.5 pandas==2.2.3 From 2b99096c12765b92585491358df18d532f705118 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:53:03 -0800 Subject: [PATCH 10/23] Update scipy pin from 1.3.1 to 1.4.1 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index b30a135..1b3f520 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,7 +33,7 @@ setup_requires = setuptools_scm python_requires = >=3.10 install_requires = - scipy==1.13.1 + scipy==1.14.1 dipy>=1.0.0 matplotlib==3.9.3 groupyr>=0.3.4 From d25b82ab6900657d255a7270747efbc52073a0ba Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 17:01:06 -0800 Subject: [PATCH 11/23] Update dipy pin from 1.0.0 to 1.9.0 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 1b3f520..c8be5a6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,7 @@ setup_requires = python_requires = >=3.10 install_requires = scipy==1.14.1 - dipy>=1.0.0 + dipy==1.9.0 matplotlib==3.9.3 groupyr>=0.3.4 numpy==1.23.5 From 8c2d152fd0ccbd404da92af09113d2d8e9784131 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 17:06:38 -0800 Subject: [PATCH 12/23] Update numpy pin from 1.23.5 to 2.1.3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c8be5a6..4ba765a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ install_requires = dipy==1.9.0 matplotlib==3.9.3 groupyr>=0.3.4 - numpy==1.23.5 + numpy==2.1.3 pandas==2.2.3 requests seaborn==0.13.2 From 23ffcf3ac2097c723c3b3e982f1b8e12f583648e Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 17:08:50 -0800 Subject: [PATCH 13/23] Update numpy pin from 1.23.5 to 1.26.4, numpy >= 2 breaking change --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 4ba765a..93b8a13 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ install_requires = dipy==1.9.0 matplotlib==3.9.3 groupyr>=0.3.4 - numpy==2.1.3 + numpy==1.26.4 pandas==2.2.3 requests seaborn==0.13.2 From 4d2f753c7b130e53a4716a9262c78aa8e5e62919 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Wed, 4 Dec 2024 17:20:45 -0800 Subject: [PATCH 14/23] Update references to richford to tractometry in contributing.md --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fb09e31..5d39ed2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,7 +31,7 @@ You can contribute in many ways: ### Report Bugs -Report bugs at . +Report bugs at . If you are reporting a bug, please include: @@ -64,7 +64,7 @@ articles, and such. ### Submit Feedback The best way to send feedback is to file an issue at -. +. If you are proposing a feature: From 8264584f69d0ded32684ee312221dddc0e4dbe1d Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:28:24 +0800 Subject: [PATCH 15/23] Vendorize neurocombat_sklearn and updated estimator variable --- neurocombat_sklearn | 1 + 1 file changed, 1 insertion(+) create mode 160000 neurocombat_sklearn diff --git a/neurocombat_sklearn b/neurocombat_sklearn new file mode 160000 index 0000000..25306f0 --- /dev/null +++ b/neurocombat_sklearn @@ -0,0 +1 @@ +Subproject commit 25306f0a2f088764b40a376ba276a2126ff820b4 From 59cb6b724f967d4af6ec933779917565cb1234ec Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:29:36 +0800 Subject: [PATCH 16/23] More changes --- afqinsight/tests/test_bagging.py | 70 +++++++++++++++--------------- examples/plot_hbn_site_profiles.py | 2 +- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/afqinsight/tests/test_bagging.py b/afqinsight/tests/test_bagging.py index ca81abc..6e89a20 100644 --- a/afqinsight/tests/test_bagging.py +++ b/afqinsight/tests/test_bagging.py @@ -66,7 +66,7 @@ def test_classification(): } ) - for base_estimator in [ + for estimator in [ None, DummyClassifier(), Perceptron(), @@ -76,7 +76,7 @@ def test_classification(): ]: for params in grid: SerialBaggingClassifier( - base_estimator=base_estimator, random_state=rng, **params + estimator=estimator, random_state=rng, **params ).fit(X_train, y_train).predict(X_test) @@ -124,7 +124,7 @@ def fit(self, X, y): ]: # Trained on sparse format sparse_classifier = SerialBaggingClassifier( - base_estimator=CustomSVC(decision_function_shape="ovr"), + estimator=CustomSVC(decision_function_shape="ovr"), random_state=1, **params, ).fit(X_train_sparse, y_train) @@ -132,7 +132,7 @@ def fit(self, X, y): # Trained on dense format dense_classifier = SerialBaggingClassifier( - base_estimator=CustomSVC(decision_function_shape="ovr"), + estimator=CustomSVC(decision_function_shape="ovr"), random_state=1, **params, ).fit(X_train, y_train) @@ -160,7 +160,7 @@ def test_regression(): } ) - for base_estimator in [ + for estimator in [ None, DummyRegressor(), DecisionTreeRegressor(), @@ -169,7 +169,7 @@ def test_regression(): ]: for params in grid: SerialBaggingRegressor( - base_estimator=base_estimator, random_state=rng, **params + estimator=estimator, random_state=rng, **params ).fit(X_train, y_train).predict(X_test) @@ -211,14 +211,14 @@ def fit(self, X, y): for params in parameter_sets: # Trained on sparse format sparse_classifier = SerialBaggingRegressor( - base_estimator=CustomSVR(), random_state=1, **params + estimator=CustomSVR(), random_state=1, **params ).fit(X_train_sparse, y_train) sparse_results = sparse_classifier.predict(X_test_sparse) # Trained on dense format dense_results = ( SerialBaggingRegressor( - base_estimator=CustomSVR(), random_state=1, **params + estimator=CustomSVR(), random_state=1, **params ) .fit(X_train, y_train) .predict(X_test) @@ -248,33 +248,33 @@ def test_bootstrap_samples(): diabetes.data, diabetes.target, random_state=rng ) - base_estimator = DecisionTreeRegressor().fit(X_train, y_train) + estimator = DecisionTreeRegressor().fit(X_train, y_train) # without bootstrap, all trees are perfect on the training set ensemble = SerialBaggingRegressor( - base_estimator=DecisionTreeRegressor(), + estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=False, random_state=rng, ).fit(X_train, y_train) - assert base_estimator.score(X_train, y_train) == ensemble.score(X_train, y_train) + assert estimator.score(X_train, y_train) == ensemble.score(X_train, y_train) # with bootstrap, trees are no longer perfect on the training set ensemble = SerialBaggingRegressor( - base_estimator=DecisionTreeRegressor(), + estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=True, random_state=rng, ).fit(X_train, y_train) - assert base_estimator.score(X_train, y_train) > ensemble.score(X_train, y_train) + assert estimator.score(X_train, y_train) > ensemble.score(X_train, y_train) # check that each sampling correspond to a complete bootstrap resample. # the size of each bootstrap should be the same as the input data but # the data should be different (checked using the hash of the data). ensemble = SerialBaggingRegressor( - base_estimator=DummySizeEstimator(), bootstrap=True + estimator=DummySizeEstimator(), bootstrap=True ).fit(X_train, y_train) training_hash = [] for estimator in ensemble.estimators_: @@ -291,7 +291,7 @@ def test_bootstrap_features(): ) ensemble = SerialBaggingRegressor( - base_estimator=DecisionTreeRegressor(), + estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=False, random_state=rng, @@ -301,7 +301,7 @@ def test_bootstrap_features(): assert diabetes.data.shape[1] == np.unique(features).shape[0] ensemble = SerialBaggingRegressor( - base_estimator=DecisionTreeRegressor(), + estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=True, random_state=rng, @@ -321,7 +321,7 @@ def test_probability(): with np.errstate(divide="ignore", invalid="ignore"): # Normal case ensemble = SerialBaggingClassifier( - base_estimator=DecisionTreeClassifier(), random_state=rng + estimator=DecisionTreeClassifier(), random_state=rng ).fit(X_train, y_train) assert_array_almost_equal( @@ -334,7 +334,7 @@ def test_probability(): # Degenerate case, where some classes are missing ensemble = SerialBaggingClassifier( - base_estimator=LogisticRegression(), random_state=rng, max_samples=5 + estimator=LogisticRegression(), random_state=rng, max_samples=5 ).fit(X_train, y_train) assert_array_almost_equal( @@ -354,9 +354,9 @@ def test_oob_score_classification(): iris.data, iris.target, random_state=rng ) - for base_estimator in [DecisionTreeClassifier(), SVC()]: + for estimator in [DecisionTreeClassifier(), SVC()]: clf = SerialBaggingClassifier( - base_estimator=base_estimator, + estimator=estimator, n_estimators=100, bootstrap=True, oob_score=True, @@ -371,7 +371,7 @@ def test_oob_score_classification(): assert_warns( UserWarning, SerialBaggingClassifier( - base_estimator=base_estimator, + estimator=estimator, n_estimators=1, bootstrap=True, oob_score=True, @@ -391,7 +391,7 @@ def test_oob_score_regression(): ) clf = SerialBaggingRegressor( - base_estimator=DecisionTreeRegressor(), + estimator=DecisionTreeRegressor(), n_estimators=50, bootstrap=True, oob_score=True, @@ -406,7 +406,7 @@ def test_oob_score_regression(): assert_warns( UserWarning, SerialBaggingRegressor( - base_estimator=DecisionTreeRegressor(), + estimator=DecisionTreeRegressor(), n_estimators=1, bootstrap=True, oob_score=True, @@ -425,7 +425,7 @@ def test_single_estimator(): ) clf1 = SerialBaggingRegressor( - base_estimator=KNeighborsRegressor(), + estimator=KNeighborsRegressor(), n_estimators=1, bootstrap=False, bootstrap_features=False, @@ -552,15 +552,15 @@ def test_gridsearch(): y[y == 2] = 1 # Grid search with scoring based on decision_function - parameters = {"n_estimators": (1, 2), "base_estimator__C": (1, 2)} + parameters = {"n_estimators": (1, 2), "estimator__C": (1, 2)} GridSearchCV(SerialBaggingClassifier(SVC()), parameters, scoring="roc_auc").fit( X, y ) -def test_base_estimator(): - # Check base_estimator and its default values. +def test_estimator(): + # Check estimator and its default values. rng = check_random_state(0) # Classification @@ -572,19 +572,19 @@ def test_base_estimator(): X_train, y_train ) - assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) + assert isinstance(ensemble.estimator_, DecisionTreeClassifier) ensemble = SerialBaggingClassifier( DecisionTreeClassifier(), n_jobs=3, random_state=0 ).fit(X_train, y_train) - assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) + assert isinstance(ensemble.estimator_, DecisionTreeClassifier) ensemble = SerialBaggingClassifier(Perceptron(), n_jobs=3, random_state=0).fit( X_train, y_train ) - assert isinstance(ensemble.base_estimator_, Perceptron) + assert isinstance(ensemble.estimator_, Perceptron) # Regression X_train, X_test, y_train, y_test = train_test_split( @@ -595,18 +595,18 @@ def test_base_estimator(): X_train, y_train ) - assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) + assert isinstance(ensemble.estimator_, DecisionTreeRegressor) ensemble = SerialBaggingRegressor( DecisionTreeRegressor(), n_jobs=3, random_state=0 ).fit(X_train, y_train) - assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) + assert isinstance(ensemble.estimator_, DecisionTreeRegressor) ensemble = SerialBaggingRegressor(SVR(), n_jobs=3, random_state=0).fit( X_train, y_train ) - assert isinstance(ensemble.base_estimator_, SVR) + assert isinstance(ensemble.estimator_, SVR) def test_bagging_with_pipeline(): @@ -799,7 +799,7 @@ def test_estimators_samples_deterministic(): SparseRandomProjection(n_components=2), LogisticRegression() ) clf = SerialBaggingClassifier( - base_estimator=base_pipeline, max_samples=0.5, random_state=0 + estimator=base_pipeline, max_samples=0.5, random_state=0 ) clf.fit(X, y) pipeline_estimator_coef = clf.estimators_[0].steps[-1][1].coef_.copy() @@ -939,7 +939,7 @@ def fit(self, X, y): self._sample_indices = y clf = SerialBaggingRegressor( - base_estimator=MyEstimator(), n_estimators=1, random_state=0 + estimator=MyEstimator(), n_estimators=1, random_state=0 ) clf.fit(X, y) diff --git a/examples/plot_hbn_site_profiles.py b/examples/plot_hbn_site_profiles.py index ea84bff..5ad8278 100644 --- a/examples/plot_hbn_site_profiles.py +++ b/examples/plot_hbn_site_profiles.py @@ -38,7 +38,7 @@ """ import numpy as np -from neurocombat_sklearn import CombatModel +from afqinsight.neurocombat_sklearn import CombatModel from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split From 931c30b17c57096d250c5b45e2cf69b6f7c28618 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:16:45 +0800 Subject: [PATCH 17/23] Changed folder structure for neurocombat, added pre-commit hooks --- afqinsight/neurocombat_sklearn/__init__.py | 5 + .../neurocombat_sklearn.py | 488 ++++++++++++++++++ .../neurocombat_sklearn/tests/__init__.py | 0 .../neurocombat_sklearn/tests/test_common.py | 8 + afqinsight/tests/test_bagging.py | 10 +- examples/plot_hbn_site_profiles.py | 2 +- neurocombat_sklearn | 1 - 7 files changed, 506 insertions(+), 8 deletions(-) create mode 100644 afqinsight/neurocombat_sklearn/__init__.py create mode 100644 afqinsight/neurocombat_sklearn/neurocombat_sklearn.py create mode 100644 afqinsight/neurocombat_sklearn/tests/__init__.py create mode 100644 afqinsight/neurocombat_sklearn/tests/test_common.py delete mode 160000 neurocombat_sklearn diff --git a/afqinsight/neurocombat_sklearn/__init__.py b/afqinsight/neurocombat_sklearn/__init__.py new file mode 100644 index 0000000..6a9d852 --- /dev/null +++ b/afqinsight/neurocombat_sklearn/__init__.py @@ -0,0 +1,5 @@ +from afqinsight.neurocombat_sklearn import CombatModel + +from ._version import __version__ + +__all__ = ["CombatModel", "__version__"] diff --git a/afqinsight/neurocombat_sklearn/neurocombat_sklearn.py b/afqinsight/neurocombat_sklearn/neurocombat_sklearn.py new file mode 100644 index 0000000..a00c874 --- /dev/null +++ b/afqinsight/neurocombat_sklearn/neurocombat_sklearn.py @@ -0,0 +1,488 @@ +# Authors: Walter Hugo Lopez Pinaya +# License: MIT +import numpy as np +import numpy.linalg as la +from sklearn.base import BaseEstimator +from sklearn.preprocessing import OneHotEncoder +from sklearn.utils import check_array +from sklearn.utils.validation import ( + FLOAT_DTYPES, + check_consistent_length, + check_is_fitted, +) + +__all__ = [ + "CombatModel", +] + + +class CombatModel(BaseEstimator): + """Harmonize/normalize features using Combat's [1] + parametric empirical Bayes framework + + [1] Fortin, Jean-Philippe, et al. "Harmonization of cortical thickness + measurements across scanners and sites." Neuroimage 167 (2018): 104-120. + """ + + def __init__(self, copy=True): + self.copy = copy + + def _reset(self): + """Reset internal data-dependent state, if necessary. + + __init__ parameters are not touched. + """ + + # Checking one attribute is enough, because they are all set together + if hasattr(self, "n_sites"): + del self.n_sites + del self.sites_names + del self.discrete_covariates_used + del self.continuous_covariates_used + del self.site_encoder + del self.discrete_encoders + del self.beta_hat + del self.grand_mean + del self.var_pooled + del self.gamma_star + del self.delta_star + + def fit(self, data, sites, discrete_covariates=None, continuous_covariates=None): + """Compute the parameters to perform the harmonization/normalization + + Parameters + ---------- + data : array-like, shape [n_samples, n_features] + The data used to compute the per-feature statistics + used for later harmonization along the acquisition sites. + sites : array-like, shape [n_samples, 1] + The target variable for harmonization problems + (e.g. acquisition sites or batches). + discrete_covariates : array-like, shape [n_samples, n_discrete_covariates] + The covariates which are categorical + (e.g. schizophrenia patient or healthy control). + continuous_covariates : array-like, shape [n_samples, n_continuous_covariates] + The covariates which are continuous + (e.g. age and clinical scores) + """ + + # Reset internal state before fitting + self._reset() + + data = check_array(data, copy=self.copy, estimator=self, dtype=FLOAT_DTYPES) + sites = check_array(sites, copy=self.copy, estimator=self) + + check_consistent_length(data, sites) + + if discrete_covariates is not None: + self.discrete_covariates_used = True + discrete_covariates = check_array( + discrete_covariates, copy=self.copy, dtype=None, estimator=self + ) + + if continuous_covariates is not None: + self.continuous_covariates_used = True + continuous_covariates = check_array( + continuous_covariates, + copy=self.copy, + estimator=self, + dtype=FLOAT_DTYPES, + ) + + # To have a similar code to neuroCombat and Combat original scripts + data = data.T + + sites_names, n_samples_per_site = np.unique(sites, return_counts=True) + + self.sites_names = sites_names + self.n_sites = len(sites_names) + + n_samples = sites.shape[0] + idx_per_site = [list(np.where(sites == idx)[0]) for idx in sites_names] + + design = self._make_design_matrix( + sites, discrete_covariates, continuous_covariates, fitting=True + ) + + standardized_data, _ = self._standardize_across_features( + data, design, n_samples, n_samples_per_site, fitting=True + ) + + gamma_hat, delta_hat = self._fit_ls_model( + standardized_data, design, idx_per_site + ) + + gamma_bar, tau_2, a_prior, b_prior = self._find_priors(gamma_hat, delta_hat) + + self.gamma_star, self.delta_star = self._find_parametric_adjustments( + standardized_data, + idx_per_site, + gamma_hat, + delta_hat, + gamma_bar, + tau_2, + a_prior, + b_prior, + ) + + return self + + def transform( + self, data, sites, discrete_covariates=None, continuous_covariates=None + ): + """Transform data to harmonized space + + Parameters + ---------- + data : array-like + Input data that will be transformed. + sites : array-like + Site info of the inputted data + discrete_covariates : array-like + The covariates which are categorical + continuous_covariates : array-like + The covariates which are continuous + """ + + check_is_fitted(self, "n_sites") + + data = check_array(data, copy=self.copy, estimator=self, dtype=FLOAT_DTYPES) + sites = check_array(sites, copy=self.copy, estimator=self) + + check_consistent_length(data, sites) + + if hasattr(self, "discrete_covariates_used"): + discrete_covariates = check_array( + discrete_covariates, copy=self.copy, dtype=None, estimator=self + ) + + if hasattr(self, "continuous_covariates_used"): + continuous_covariates = check_array( + continuous_covariates, + copy=self.copy, + estimator=self, + dtype=FLOAT_DTYPES, + ) + + # To have a similar code to neuroCombat and Combat original scripts + data = data.T + + new_data_sites_name = np.unique(sites) + + # Check all sites from new_data were seen + if not all(site_name in self.sites_names for site_name in new_data_sites_name): + raise ValueError( + "There is a site unseen during the fit method in the data." + ) + + n_samples = sites.shape[0] + n_samples_per_site = np.array( + [np.sum(sites == site_name) for site_name in self.sites_names] + ) + idx_per_site = [ + list(np.where(sites == site_name)[0]) for site_name in self.sites_names + ] + + design = self._make_design_matrix( + sites, discrete_covariates, continuous_covariates, fitting=False + ) + + standardized_data, standardized_mean = self._standardize_across_features( + data, design, n_samples, n_samples_per_site, fitting=False + ) + + bayes_data = self._adjust_data_final( + standardized_data, + design, + standardized_mean, + n_samples_per_site, + n_samples, + idx_per_site, + ) + + return bayes_data.T + + def fit_transform(self, data, sites, *args): + """Fit to data, then transform it""" + return self.fit(data, sites, *args).transform(data, sites, *args) + + def _make_design_matrix( + self, sites, discrete_covariates, continuous_covariates, fitting=False + ): + """Method to create a design matrix that contain: + + - One-hot encoding of the sites [n_samples, n_sites] + - One-hot encoding of each discrete covariates (removing + the first column) [n_samples, + (n_discrete_covivariate_names-1) * n_discrete_covariates] + - Each continuous covariates + + Parameters + ---------- + sites : array-like + discrete_covariates : array-like + continuous_covariates : array-like + fitting : boolean, default is False + + Returns + ------- + design : array-like + The design matrix. + """ + design_list = [] + # Sites + if fitting: + self.site_encoder = OneHotEncoder(sparse_output=False) + self.site_encoder.fit(sites) + + sites_design = self.site_encoder.transform(sites) + design_list.append(sites_design) + + # Discrete covariates + if discrete_covariates is not None: + n_discrete_covariates = discrete_covariates.shape[1] + + if fitting: + self.discrete_encoders = [] + + for i in range(n_discrete_covariates): + discrete_encoder = OneHotEncoder(sparse_output=False) + discrete_encoder.fit(discrete_covariates[:, i][:, np.newaxis]) + self.discrete_encoders.append(discrete_encoder) + + for i in range(n_discrete_covariates): + discrete_encoder = self.discrete_encoders[i] + discrete_covariate_one_hot = discrete_encoder.transform( + discrete_covariates[:, i][:, np.newaxis] + ) + discrete_covariate_design = discrete_covariate_one_hot[:, 1:] + design_list.append(discrete_covariate_design) + + # Continuous covariates + if continuous_covariates is not None: + design_list.append(continuous_covariates) + + design = np.hstack(design_list) + return design + + def _standardize_across_features( + self, data, design, n_samples, n_samples_per_site, fitting=False + ): + """Standardization of the features + + The magnitude of the features could create bias in the empirical Bayes + estimates of the prior distribution. + To avoid this, the features are standardized to all of them have similar + overall mean and variance. + + Parameters + ---------- + data : array-like + design : array-like + n_samples : integer + n_samples_per_site : list of integer + fitting : boolean, default is False + Indicates if this method is executed inside the + fit method (in order to save the parameters to use later). + + Returns + ------- + standardized_data : array-like + standardized_mean : array-like + Standardized mean used during the process + """ + if fitting: + self.beta_hat = np.dot( + np.dot(la.inv(np.dot(design.T, design)), design.T), data.T + ) + + # Standardization Model + self.grand_mean = np.dot( + (n_samples_per_site / float(n_samples)).T, + self.beta_hat[: self.n_sites, :], + ) + self.var_pooled = np.dot( + ((data - np.dot(design, self.beta_hat).T) ** 2), + np.ones((n_samples, 1)) / float(n_samples), + ) + + standardized_mean = np.dot( + self.grand_mean.T[:, np.newaxis], np.ones((1, n_samples)) + ) + + tmp = np.array(design.copy()) + tmp[:, : self.n_sites] = 0 + standardized_mean += np.dot(tmp, self.beta_hat).T + + standardized_data = (data - standardized_mean) / np.dot( + np.sqrt(self.var_pooled), np.ones((1, n_samples)) + ) + + return standardized_data, standardized_mean + + def _fit_ls_model(self, standardized_data, design, idx_per_site): + """Location and scale (L/S) adjustments + + Parameters + ---------- + standardized_data : array-like + design : array-like + idx_per_site : list of list of integer + """ + site_design = design[:, : self.n_sites] + gamma_hat = np.dot( + np.dot(la.inv(np.dot(site_design.T, site_design)), site_design.T), + standardized_data.T, + ) + + delta_hat = [] + for site_idxs in idx_per_site: + delta_hat.append(np.var(standardized_data[:, site_idxs], axis=1, ddof=1)) + + return gamma_hat, delta_hat + + def _find_priors(self, gamma_hat, delta_hat): + """Compute a and b priors""" + gamma_bar = np.mean(gamma_hat, axis=1) + tau_2 = np.var(gamma_hat, axis=1, ddof=1) + + def aprior_fn(gamma_hat): + m = np.mean(gamma_hat) + s2 = np.var(gamma_hat, ddof=1, dtype=np.float32) + return (2 * s2 + m**2) / s2 + + a_prior = list(map(aprior_fn, delta_hat)) + + def bprior_fn(gamma_hat): + m = np.mean(gamma_hat) + s2 = np.var(gamma_hat, ddof=1, dtype=np.float32) + return (m * s2 + m**3) / s2 + + b_prior = list(map(bprior_fn, delta_hat)) + + return gamma_bar, tau_2, a_prior, b_prior + + def _find_parametric_adjustments( + self, + standardized_data, + idx_per_site, + gamma_hat, + delta_hat, + gamma_bar, + tau_2, + a_prior, + b_prior, + ): + """Compute empirical Bayes site/batch effect parameter + estimates using parametric empirical priors""" + + gamma_star, delta_star = [], [] + + for i, site_idxs in enumerate(idx_per_site): + gamma_hat_adjust, delta_hat_adjust = self._iteration_solver( + standardized_data[:, site_idxs], + gamma_hat[i], + delta_hat[i], + gamma_bar[i], + tau_2[i], + a_prior[i], + b_prior[i], + ) + + gamma_star.append(gamma_hat_adjust) + delta_star.append(delta_hat_adjust) + + return np.array(gamma_star), np.array(delta_star) + + def _iteration_solver( + self, + standardized_data, + gamma_hat, + delta_hat, + gamma_bar, + tau_2, + a_prior, + b_prior, + convergence=0.0001, + ): + """Compute iterative method to find the + parametric site/batch effect adjustments""" + n = (1 - np.isnan(standardized_data)).sum(axis=1) + gamma_hat_old = gamma_hat.copy() + delta_hat_old = delta_hat.copy() + + def postmean(gamma_hat, gamma_bar, n, delta_star, tau_2): + return (tau_2 * n * gamma_hat + delta_star * gamma_bar) / ( + tau_2 * n + delta_star + ) + + def postvar(sum_2, n, a_prior, b_prior): + return (0.5 * sum_2 + b_prior) / (n / 2.0 + a_prior - 1.0) + + change = 1 + count = 0 + + while change > convergence: + gamma_hat_new = postmean(gamma_hat, gamma_bar, n, delta_hat_old, tau_2) + sum_2 = ( + ( + standardized_data + - np.dot( + gamma_hat_new[:, np.newaxis], + np.ones((1, standardized_data.shape[1])), + ) + ) + ** 2 + ).sum(axis=1) + + delta_hat_new = postvar(sum_2, n, a_prior, b_prior) + + change = max( + (abs(gamma_hat_new - gamma_hat_old) / gamma_hat_old).max(), + (abs(delta_hat_new - delta_hat_old) / delta_hat_old).max(), + ) + + gamma_hat_old = gamma_hat_new + delta_hat_old = delta_hat_new + + count = count + 1 + + return gamma_hat_new, delta_hat_new + + def _adjust_data_final( + self, + standardized_data, + design, + standardized_mean, + n_samples_per_site, + n_samples, + idx_per_site, + ): + """Compute the harmonized/normalized data""" + n_sites = self.n_sites + var_pooled = self.var_pooled + gamma_star = self.gamma_star + delta_star = self.delta_star + + site_design = design[:, :n_sites] + + bayes_data = standardized_data + + for j, site_idxs in enumerate(idx_per_site): + denominator = np.dot( + np.sqrt(delta_star[j, :])[:, np.newaxis], + np.ones((1, n_samples_per_site[j])), + ) + numerator = ( + bayes_data[:, site_idxs] + - np.dot(site_design[site_idxs, :], gamma_star).T + ) + + bayes_data[:, site_idxs] = numerator / denominator + + bayes_data = ( + bayes_data * np.dot(np.sqrt(var_pooled), np.ones((1, n_samples))) + + standardized_mean + ) + + return bayes_data diff --git a/afqinsight/neurocombat_sklearn/tests/__init__.py b/afqinsight/neurocombat_sklearn/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/afqinsight/neurocombat_sklearn/tests/test_common.py b/afqinsight/neurocombat_sklearn/tests/test_common.py new file mode 100644 index 0000000..3881a14 --- /dev/null +++ b/afqinsight/neurocombat_sklearn/tests/test_common.py @@ -0,0 +1,8 @@ +import pytest +from neurocombat_sklearn import CombatModel +from sklearn.utils.estimator_checks import check_estimator + + +@pytest.mark.parametrize("Estimator", [CombatModel]) +def test_all_transformers(Estimator): + return check_estimator(Estimator) diff --git a/afqinsight/tests/test_bagging.py b/afqinsight/tests/test_bagging.py index 6e89a20..c708807 100644 --- a/afqinsight/tests/test_bagging.py +++ b/afqinsight/tests/test_bagging.py @@ -168,9 +168,9 @@ def test_regression(): SVR(), ]: for params in grid: - SerialBaggingRegressor( - estimator=estimator, random_state=rng, **params - ).fit(X_train, y_train).predict(X_test) + SerialBaggingRegressor(estimator=estimator, random_state=rng, **params).fit( + X_train, y_train + ).predict(X_test) def test_sparse_regression(): @@ -217,9 +217,7 @@ def fit(self, X, y): # Trained on dense format dense_results = ( - SerialBaggingRegressor( - estimator=CustomSVR(), random_state=1, **params - ) + SerialBaggingRegressor(estimator=CustomSVR(), random_state=1, **params) .fit(X_train, y_train) .predict(X_test) ) diff --git a/examples/plot_hbn_site_profiles.py b/examples/plot_hbn_site_profiles.py index 5ad8278..6294ec9 100644 --- a/examples/plot_hbn_site_profiles.py +++ b/examples/plot_hbn_site_profiles.py @@ -38,11 +38,11 @@ """ import numpy as np -from afqinsight.neurocombat_sklearn import CombatModel from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split from afqinsight import AFQDataset +from afqinsight.neurocombat_sklearn import CombatModel from afqinsight.plot import plot_tract_profiles ############################################################################# diff --git a/neurocombat_sklearn b/neurocombat_sklearn deleted file mode 160000 index 25306f0..0000000 --- a/neurocombat_sklearn +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 25306f0a2f088764b40a376ba276a2126ff820b4 From 4fa05908acb121d33e45ae93b37d87638a820454 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Mon, 16 Dec 2024 23:45:40 +0800 Subject: [PATCH 18/23] Rename python file to make it unique from folder name --- .../{neurocombat_sklearn.py => neurocombat.py} | 0 .../neurocombat_sklearn/tests/test_common.py | 9 +++++++++ 2 files changed, 9 insertions(+) rename afqinsight/neurocombat_sklearn/{neurocombat_sklearn.py => neurocombat.py} (100%) create mode 100644 neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py diff --git a/afqinsight/neurocombat_sklearn/neurocombat_sklearn.py b/afqinsight/neurocombat_sklearn/neurocombat.py similarity index 100% rename from afqinsight/neurocombat_sklearn/neurocombat_sklearn.py rename to afqinsight/neurocombat_sklearn/neurocombat.py diff --git a/neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py b/neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py new file mode 100644 index 0000000..837ecd4 --- /dev/null +++ b/neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py @@ -0,0 +1,9 @@ +import pytest +from sklearn.utils.estimator_checks import check_estimator + +from afqinsight.neurocombat_sklearn import CombatModel + + +@pytest.mark.parametrize("Estimator", [CombatModel]) +def test_all_transformers(Estimator): + return check_estimator(Estimator) From 92f849935810947598c71b616cba59d8726551c3 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Tue, 17 Dec 2024 10:49:33 +0800 Subject: [PATCH 19/23] update __init__.py in neurocombat folder --- afqinsight/neurocombat_sklearn/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/afqinsight/neurocombat_sklearn/__init__.py b/afqinsight/neurocombat_sklearn/__init__.py index 6a9d852..e7a6b18 100644 --- a/afqinsight/neurocombat_sklearn/__init__.py +++ b/afqinsight/neurocombat_sklearn/__init__.py @@ -1,5 +1,3 @@ -from afqinsight.neurocombat_sklearn import CombatModel +from .neurocombat import CombatModel -from ._version import __version__ - -__all__ = ["CombatModel", "__version__"] +__all__ = ["CombatModel"] From 6ea828350af4583dc1a41f9a7f76da7157ded98e Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:37:56 +0800 Subject: [PATCH 20/23] Use absolute import in test --- afqinsight/neurocombat_sklearn/tests/test_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/afqinsight/neurocombat_sklearn/tests/test_common.py b/afqinsight/neurocombat_sklearn/tests/test_common.py index 3881a14..fcb8a14 100644 --- a/afqinsight/neurocombat_sklearn/tests/test_common.py +++ b/afqinsight/neurocombat_sklearn/tests/test_common.py @@ -1,7 +1,8 @@ import pytest -from neurocombat_sklearn import CombatModel from sklearn.utils.estimator_checks import check_estimator +from afqinsight.neurocombat_sklearn.neurocombat import CombatModel + @pytest.mark.parametrize("Estimator", [CombatModel]) def test_all_transformers(Estimator): From 0aa5180f92cdae2783dc787bff7d5d757e78355d Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:43:35 +0800 Subject: [PATCH 21/23] Update import in nn/utils.py --- afqinsight/nn/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/afqinsight/nn/utils.py b/afqinsight/nn/utils.py index 1692f34..34ffb6f 100644 --- a/afqinsight/nn/utils.py +++ b/afqinsight/nn/utils.py @@ -2,11 +2,12 @@ import tensorflow as tf import torch import torch.nn as nn -from neurocombat_sklearn import CombatModel from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split from tensorflow.keras import layers +from afqinsight.neurocombat_sklearn import CombatModel + def extract_layer_info_pytorch(layer): info = {} From 44d150d182174cdb4193484d515f828aacbb6c11 Mon Sep 17 00:00:00 2001 From: Howard Chiu <137316255+chiuhoward@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:17:54 +0800 Subject: [PATCH 22/23] Trying to pass tests --- afqinsight/_serial_bagging.py | 28 +++++++++---------- .../neurocombat_sklearn/tests/test_common.py | 2 +- afqinsight/pipeline.py | 22 +++++++-------- afqinsight/tests/test_pipelines.py | 12 ++++---- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py index 62de2f7..c6c938c 100644 --- a/afqinsight/_serial_bagging.py +++ b/afqinsight/_serial_bagging.py @@ -103,7 +103,7 @@ def _parallel_build_estimators( max_samples = ensemble._max_samples bootstrap = ensemble.bootstrap bootstrap_features = ensemble.bootstrap_features - support_sample_weight = has_fit_parameter(ensemble.base_estimator_, "sample_weight") + support_sample_weight = has_fit_parameter(ensemble.estimator_, "sample_weight") if not support_sample_weight and sample_weight is not None: raise ValueError("The base estimator doesn't support sample weight") @@ -182,7 +182,7 @@ class SerialBaggingClassifier(BaggingClassifier): Parameters ---------- - base_estimator : object, default=None + estimator : object, default=None The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a decision tree. @@ -236,7 +236,7 @@ class SerialBaggingClassifier(BaggingClassifier): Attributes ---------- - base_estimator_ : estimator + estimator_ : estimator The base estimator from which the ensemble is grown. n_features_in_ : int @@ -287,7 +287,7 @@ class SerialBaggingClassifier(BaggingClassifier): def __init__( self, - base_estimator=None, + estimator=None, n_estimators=10, *, max_samples=1.0, @@ -301,7 +301,7 @@ def __init__( verbose=0, ): super().__init__( - base_estimator=base_estimator, + estimator=estimator, n_estimators=n_estimators, max_samples=max_samples, max_features=max_features, @@ -367,7 +367,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None): self._validate_estimator() if max_depth is not None: # pragma: no cover - self.base_estimator_.max_depth = max_depth + self.estimator_.max_depth = max_depth # Validate max_samples if max_samples is None: # pragma: no cover @@ -569,7 +569,7 @@ def predict_log_proba(self, X): classes corresponds to that in the attribute :term:`classes_`. """ check_is_fitted(self) - if hasattr(self.base_estimator_, "predict_log_proba"): + if hasattr(self.estimator_, "predict_log_proba"): # Check data X = check_array( X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False @@ -611,7 +611,7 @@ def predict_log_proba(self, X): else: return np.log(self.predict_proba(X)) - @available_if(lambda est: hasattr(est.base_estimator, "decision_function")) + @available_if(lambda est: hasattr(est.estimator, "decision_function")) def decision_function(self, X): """Average of the decision functions of the base classifiers. @@ -691,7 +691,7 @@ class SerialBaggingRegressor(BaggingRegressor): Parameters ---------- - base_estimator : object, default=None + estimator : object, default=None The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a decision tree. @@ -746,7 +746,7 @@ class SerialBaggingRegressor(BaggingRegressor): Attributes ---------- - base_estimator_ : estimator + estimator_ : estimator The base estimator from which the ensemble is grown. n_features_in_ : int @@ -781,7 +781,7 @@ class SerialBaggingRegressor(BaggingRegressor): >>> X, y = make_regression(n_samples=100, n_features=4, ... n_informative=2, n_targets=1, ... random_state=0, shuffle=False) - >>> regr = BaggingRegressor(base_estimator=SVR(), + >>> regr = BaggingRegressor(estimator=SVR(), ... n_estimators=10, random_state=0).fit(X, y) >>> regr.predict([[0, 0, 0, 0]]) array([-2.8720...]) @@ -804,7 +804,7 @@ class SerialBaggingRegressor(BaggingRegressor): def __init__( self, - base_estimator=None, + estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, @@ -817,7 +817,7 @@ def __init__( verbose=0, ): super().__init__( - base_estimator=base_estimator, + estimator=estimator, n_estimators=n_estimators, max_samples=max_samples, max_features=max_features, @@ -882,7 +882,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None): self._validate_estimator() if max_depth is not None: # pragma: no cover - self.base_estimator_.max_depth = max_depth + self.estimator_.max_depth = max_depth # Validate max_samples if max_samples is None: # pragma: no cover diff --git a/afqinsight/neurocombat_sklearn/tests/test_common.py b/afqinsight/neurocombat_sklearn/tests/test_common.py index fcb8a14..117168d 100644 --- a/afqinsight/neurocombat_sklearn/tests/test_common.py +++ b/afqinsight/neurocombat_sklearn/tests/test_common.py @@ -4,6 +4,6 @@ from afqinsight.neurocombat_sklearn.neurocombat import CombatModel -@pytest.mark.parametrize("Estimator", [CombatModel]) +@pytest.mark.parametrize("Estimator", [CombatModel()]) def test_all_transformers(Estimator): return check_estimator(Estimator) diff --git a/afqinsight/pipeline.py b/afqinsight/pipeline.py index 213f218..3171eca 100755 --- a/afqinsight/pipeline.py +++ b/afqinsight/pipeline.py @@ -270,7 +270,7 @@ def call_with_kwargs(Transformer, kwargs): if estimator is not None: if inspect.isclass(estimator) and issubclass(estimator, BaseEstimator): - base_estimator = call_with_kwargs(estimator, estimator_kwargs) + estimator = call_with_kwargs(estimator, estimator_kwargs) if ensemble_meta_estimator is not None: allowed = ["bagging", "adaboost", "serial-bagging"] @@ -284,33 +284,33 @@ def call_with_kwargs(Transformer, kwargs): else: ensembler_kwargs = {} - ensembler_kwargs["base_estimator"] = base_estimator + ensembler_kwargs["estimator"] = estimator if isinstance(ensemble_meta_estimator, str): if ensemble_meta_estimator.lower() == "bagging": - if is_classifier(base_estimator): + if is_classifier(estimator): ensembler = call_with_kwargs( BaggingClassifier, ensembler_kwargs ) - elif is_regressor(base_estimator): + elif is_regressor(estimator): ensembler = call_with_kwargs( BaggingRegressor, ensembler_kwargs ) elif ensemble_meta_estimator.lower() == "serial-bagging": - if is_classifier(base_estimator): + if is_classifier(estimator): ensembler = call_with_kwargs( SerialBaggingClassifier, ensembler_kwargs ) - elif is_regressor(base_estimator): + elif is_regressor(estimator): ensembler = call_with_kwargs( SerialBaggingRegressor, ensembler_kwargs ) elif ensemble_meta_estimator.lower() == "adaboost": - if is_classifier(base_estimator): + if is_classifier(estimator): ensembler = call_with_kwargs( AdaBoostClassifier, ensembler_kwargs ) - elif is_regressor(base_estimator): + elif is_regressor(estimator): ensembler = call_with_kwargs( AdaBoostRegressor, ensembler_kwargs ) @@ -330,7 +330,7 @@ def call_with_kwargs(Transformer, kwargs): else: raise ValueError(err_msg.substitute(input=ensemble_meta_estimator)) - base_estimator = ensembler + estimator = ensembler if any( [ @@ -340,14 +340,14 @@ def call_with_kwargs(Transformer, kwargs): ] ): pl_estimator = TransformedTargetRegressor( - base_estimator, + estimator, transformer=target_transformer, func=target_transform_func, inverse_func=target_transform_inverse_func, check_inverse=target_transform_check_inverse, ) else: - pl_estimator = base_estimator + pl_estimator = estimator else: raise ValueError( "If provided, estimator must inherit from sklearn.base.BaseEstimator; " diff --git a/afqinsight/tests/test_pipelines.py b/afqinsight/tests/test_pipelines.py index d28c24d..71aa930 100644 --- a/afqinsight/tests/test_pipelines.py +++ b/afqinsight/tests/test_pipelines.py @@ -123,9 +123,9 @@ def test_classifier_pipeline_steps( else: assert isinstance(pipeline.named_steps["estimate"], EnsembleStep) # nosec ensemble_params = pipeline.named_steps["estimate"].get_params() - correct_params = EnsembleStep(base_estimator=EstimatorStep()).get_params() - ensemble_base_est = ensemble_params.pop("base_estimator") - correct_params.pop("base_estimator") + correct_params = EnsembleStep(estimator=EstimatorStep()).get_params() + ensemble_base_est = ensemble_params.pop("estimator") + correct_params.pop("estimator") assert ensemble_params == correct_params # nosec assert isinstance(ensemble_base_est, EstimatorStep) # nosec else: @@ -142,9 +142,9 @@ def test_classifier_pipeline_steps( pipeline.named_steps["estimate"].regressor, EnsembleStep ) ensemble_params = pipeline.named_steps["estimate"].regressor.get_params() - correct_params = EnsembleStep(base_estimator=EstimatorStep()).get_params() - ensemble_base_est = ensemble_params.pop("base_estimator") - correct_params.pop("base_estimator") + correct_params = EnsembleStep(estimator=EstimatorStep()).get_params() + ensemble_base_est = ensemble_params.pop("estimator") + correct_params.pop("estimator") assert ensemble_params == correct_params # nosec assert isinstance(ensemble_base_est, EstimatorStep) # nosec From 193b0aea1c941eb37a592dd0027aaa37e5b3a21f Mon Sep 17 00:00:00 2001 From: Ariel Rokem Date: Fri, 20 Dec 2024 16:42:16 -0800 Subject: [PATCH 23/23] Removes an extraneous (?) copy of neurocombat sklearn --- .../neurocombat_sklearn/tests/test_common.py | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py diff --git a/neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py b/neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py deleted file mode 100644 index 837ecd4..0000000 --- a/neurocombat_sklearn/neurocombat_sklearn/tests/test_common.py +++ /dev/null @@ -1,9 +0,0 @@ -import pytest -from sklearn.utils.estimator_checks import check_estimator - -from afqinsight.neurocombat_sklearn import CombatModel - - -@pytest.mark.parametrize("Estimator", [CombatModel]) -def test_all_transformers(Estimator): - return check_estimator(Estimator)