From a302d910a9a021508a243e70d8f8896db1dbe1a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ot=C3=A1vio=20Vasques?= <otavio.vasques@nubank.com.br>
Date: Wed, 8 Nov 2023 18:41:55 -0300
Subject: [PATCH] Remove the scikit learn restriction and bump minimal python
 version to 3.8 (#233)

* Remove the scikit learn restriction

* Set minimal version to 3.7

* Fix linter and swap the type check in the metalearners

* Replace boston dataset by california

* rollback the type check change, linter will break

* Remove list accessor of the california dataset

* Reformat imports

* Change acessors

* Fix feature name

* Remove trailing space

* Put the correct test value

* Change test value

* Change test value

* Fix test pd extractors

* Fix transformation

* Fix type annotations

* Lint fix

* Lint fix

* Put the correct version

* Add changelog

* Bump lightgbm

* Add upper limits to deps

* Bump major

* Increase major constraint of pandas

* Remove upper limitation on xgboost

* Remove silent unused keyword

* Update a few types

* Lint fix

* Add typing extensions for python 3.7 support

* trick to avoid type checking for lists

* Fix classification tests

* Try to replace ndarrays by numpy typing NDArrays

* Change back ndarray

* Reduce type list

* Add one more type

* Add other types

* Remove all other types

* Try to use numpy typing

* Drop python 3.7 support

* Swap utils by testing in pandas assertion functions

* In order to support pandas 2 it is required to bump xgboost up to version 2

* Fix xgboost dmatrix tests

* Fix rank categorical

* Solve pd extractors test

* Fix hash eval test

* Fix lookup in ensemble learner

* Add type annotation to the new functions

* Create conditional assertions based on python version

* Remove necessity for typing extension and fix hash values

* Lint fix

* Fix mypi for multiclass classification for lgbm classifier

* Bump catboost and joblib

* Bump pytest

* Bump coverage packages

* Bump xdist

* Bump mypy

* Bump hypothesis

* Rollback coverage bumps

* Update changelog

* Update changelog

* Change hash test to match exactly 8 minor version
---
 .github/workflows/push.yaml                   |   2 +-
 CHANGELOG.md                                  |   6 +
 requirements.txt                              |  12 +-
 requirements_catboost.txt                     |   2 +-
 requirements_lgbm.txt                         |   2 +-
 requirements_test.txt                         |   8 +-
 requirements_tools.txt                        |   4 +-
 requirements_xgboost.txt                      |   2 +-
 setup.py                                      |   4 +-
 src/fklearn/resources/VERSION                 |   2 +-
 src/fklearn/training/classification.py        | 194 +++++++++++-------
 src/fklearn/training/ensemble.py              |  10 +-
 src/fklearn/training/regression.py            | 187 +++++++++--------
 src/fklearn/training/transformation.py        |  23 ++-
 src/fklearn/tuning/selectors.py               |  28 ++-
 src/fklearn/types/types.py                    |   4 +-
 .../cate_learning/test_meta_learners.py       |  31 ++-
 tests/metrics/test_pd_extractors.py           |  24 +--
 tests/training/test_calibration.py            |   2 +-
 tests/training/test_classification.py         |   2 +-
 tests/training/test_pipeline.py               |  10 +-
 tests/validation/test_evaluators.py           |  17 +-
 22 files changed, 348 insertions(+), 228 deletions(-)

diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
index be99952f..acfaf886 100644
--- a/.github/workflows/push.yaml
+++ b/.github/workflows/push.yaml
@@ -50,7 +50,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9"]
+        python-version: ["3.8", "3.9"]
     steps:
       - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 58d75737..a467db27 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## [3.0.0] - 2023-11-08
+- **Enhancement**
+  - Remove support for python 3.6 and 3.7.
+  - Bumps in joblib, numpy, pandas, scikit-learn, statsmodels, toolz, catboost, lightgbm, shap, xgboost 
+    and test auxiliary packages.
+
 ## [2.3.1] - 2023-04-11
 - **Bugfix**
   - Remove incorrect `lightgbm` import from common paths
diff --git a/requirements.txt b/requirements.txt
index b33f771c..29ad3628 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
-joblib>=0.13.2,<2
-numpy>=1.16.4,<2
-pandas>=0.24.1,<2
-scikit-learn>=0.21.2,<0.25.0
-statsmodels>=0.9.0,<1
-toolz>=0.9.0,<1
+joblib>=1.3.2,<2
+numpy>=1.24.4,<2
+pandas>=2,<3
+scikit-learn>=1,<2
+statsmodels>=0.14.0,<1
+toolz>=0.12.0,<1
diff --git a/requirements_catboost.txt b/requirements_catboost.txt
index 79257b11..48319690 100644
--- a/requirements_catboost.txt
+++ b/requirements_catboost.txt
@@ -1 +1 @@
-catboost>=0.14.2,<2
+catboost>=1.2.2,<2
diff --git a/requirements_lgbm.txt b/requirements_lgbm.txt
index 89ea48ba..eb2520df 100644
--- a/requirements_lgbm.txt
+++ b/requirements_lgbm.txt
@@ -1 +1 @@
-lightgbm>=2.2.2,<4
+lightgbm>=4,<5
diff --git a/requirements_test.txt b/requirements_test.txt
index 5a351e2e..83968359 100644
--- a/requirements_test.txt
+++ b/requirements_test.txt
@@ -1,7 +1,7 @@
-pytest>=4.2.1,<7
+pytest>=7.4.3,<8
 pytest-cov>=2.6.1,<3
-pytest-xdist>=1.26.1,<3
-mypy>=0.670,<1
+pytest-xdist>=3.3.1,<4
+mypy>=1.6.1,<2
 coverage<5
 codecov>=2.0,<3
-hypothesis>=5.5.4,<7
+hypothesis>=6.88.3,<7
diff --git a/requirements_tools.txt b/requirements_tools.txt
index 8792381a..cc23f836 100644
--- a/requirements_tools.txt
+++ b/requirements_tools.txt
@@ -1,2 +1,2 @@
-shap>=0.31.0,<=0.40
-swifter>=0.284,<2
+shap>=0.43,<1
+swifter>=0.24,<2
diff --git a/requirements_xgboost.txt b/requirements_xgboost.txt
index 0254ec0c..f72dd67f 100644
--- a/requirements_xgboost.txt
+++ b/requirements_xgboost.txt
@@ -1 +1 @@
-xgboost>=0.81,<1.5
+xgboost>=2,<3
diff --git a/setup.py b/setup.py
index 065ec867..79f44d95 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@ def requirements_from_pip(filename='requirements.txt'):
       long_description=long_description,
       long_description_content_type="text/markdown",
       url='https://github.com/nubank/{:s}'.format(REPO_NAME),
-      python_requires='>=3.6.2,<3.10',
+      python_requires='>=3.8,<3.10',
       author="Nubank",
       package_dir={'': 'src'},
       packages=find_packages('src'),
@@ -52,8 +52,6 @@ def requirements_from_pip(filename='requirements.txt'):
       include_package_data=True,
       zip_safe=False,
       classifiers=[
-          'Programming Language :: Python :: 3.6',
-          'Programming Language :: Python :: 3.7',
           'Programming Language :: Python :: 3.8',
           'Programming Language :: Python :: 3.9'
           ])
diff --git a/src/fklearn/resources/VERSION b/src/fklearn/resources/VERSION
index 2bf1c1cc..4a36342f 100644
--- a/src/fklearn/resources/VERSION
+++ b/src/fklearn/resources/VERSION
@@ -1 +1 @@
-2.3.1
+3.0.0
diff --git a/src/fklearn/training/classification.py b/src/fklearn/training/classification.py
index a27eaa28..75feabad 100644
--- a/src/fklearn/training/classification.py
+++ b/src/fklearn/training/classification.py
@@ -1,6 +1,7 @@
-from typing import List, Any, Optional, Callable, Tuple, Union, TYPE_CHECKING
+from typing import List, Any, Optional, Callable, Tuple, Union, TYPE_CHECKING, Literal
 
 import numpy as np
+import numpy.typing as npt
 import pandas as pd
 from pathlib import Path
 from toolz import curry, merge, assoc
@@ -8,7 +9,7 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn import __version__ as sk_version
 
-from fklearn.types import LearnerReturnType, LogType
+from fklearn.types import LearnerReturnType, LearnerLogType, LogType
 from fklearn.common_docstrings import learner_return_docstring, learner_pred_fn_docstring
 from fklearn.training.utils import log_learner_time, expand_features_encoded
 
@@ -83,16 +84,19 @@ def p(new_df: pd.DataFrame) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("logistic_classification_learner")
 
-    log = {'logistic_classification_learner': {
-        'features': features,
-        'target': target,
-        'parameters': merged_params,
-        'prediction_column': prediction_column,
-        'package': "sklearn",
-        'package_version': sk_version,
-        'feature_importance': dict(zip(features, clf.coef_.flatten())),
-        'training_samples': len(df)},
-        'object': clf}
+    log = {
+        'logistic_classification_learner': {
+            'features': features,
+            'target': target,
+            'parameters': merged_params,
+            'prediction_column': prediction_column,
+            'package': "sklearn",
+            'package_version': sk_version,
+            'feature_importance': dict(zip(features, clf.coef_.flatten())),
+            'training_samples': len(df)
+        },
+        'object': clf
+    }
 
     return p, p(df), log
 
@@ -174,13 +178,21 @@ def xgb_classification_learner(df: pd.DataFrame,
 
     features = features if not encode_extra_cols else expand_features_encoded(df, features)
 
-    dtrain = xgb.DMatrix(df[features].values, label=df[target].values, feature_names=map(str, features), weight=weights)
+    dtrain = xgb.DMatrix(
+        df[features].values,
+        label=df[target].values,
+        feature_names=list(map(str, features)),
+        weight=weights
+    )
 
     bst = xgb.train(params, dtrain, num_estimators)
 
     def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
-        dtest = xgb.DMatrix(new_df[features].values, feature_names=map(str, features))
+        dtest = xgb.DMatrix(
+            new_df[features].values,
+            feature_names=list(map(str, features))
+        )
 
         pred = bst.predict(dtest)
         if params["objective"] == "multi:softprob":
@@ -218,16 +230,19 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("xgb_classification_learner", shap=True)
 
-    log = {'xgb_classification_learner': {
-        'features': features,
-        'target': target,
-        'prediction_column': prediction_column,
-        'package': "xgboost",
-        'package_version': xgb.__version__,
-        'parameters': assoc(params, "num_estimators", num_estimators),
-        'feature_importance': bst.get_score(),
-        'training_samples': len(df)},
-        'object': bst}
+    log = {
+        'xgb_classification_learner': {
+            'features': features,
+            'target': target,
+            'prediction_column': prediction_column,
+            'package': "xgboost",
+            'package_version': xgb.__version__,
+            'parameters': assoc(params, "num_estimators", num_estimators),
+            'feature_importance': bst.get_score(),
+            'training_samples': len(df)
+        },
+        'object': bst
+    }
 
     return p, p(df), log
 
@@ -393,16 +408,19 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("catboost_classification_learner", shap=True)
 
-    log = {'catboost_classification_learner': {
-        'features': features,
-        'target': target,
-        'prediction_column': prediction_column,
-        'package': "catboost",
-        'package_version': catboost.__version__,
-        'parameters': assoc(params, "num_estimators", num_estimators),
-        'feature_importance': cbr.feature_importances_,
-        'training_samples': len(df)},
-        'object': cbr}
+    log = {
+        'catboost_classification_learner': {
+            'features': features,
+            'target': target,
+            'prediction_column': prediction_column,
+            'package': "catboost",
+            'package_version': catboost.__version__,
+            'parameters': assoc(params, "num_estimators", num_estimators),
+            'feature_importance': cbr.feature_importances_,
+            'training_samples': len(df)
+        },
+        'object': cbr
+    }
 
     return p, p(df), log
 
@@ -501,29 +519,34 @@ def p(new_df: pd.DataFrame) -> pd.DataFrame:
 
 @curry
 @log_learner_time(learner_name='lgbm_classification_learner')
-def lgbm_classification_learner(df: pd.DataFrame,
-                                features: List[str],
-                                target: str,
-                                learning_rate: float = 0.1,
-                                num_estimators: int = 100,
-                                extra_params: Optional[LogType] = None,
-                                prediction_column: str = "prediction",
-                                weight_column: Optional[str] = None,
-                                encode_extra_cols: bool = True,
-                                valid_sets: Optional[List[pd.DataFrame]] = None,
-                                valid_names: Optional[List[str]] = None,
-                                feval: Optional[Union[
-                                    Callable[[np.ndarray, pd.DataFrame], Tuple[str, float, bool]],
-                                    List[Callable[[np.ndarray, pd.DataFrame], Tuple[str, float, bool]]]]
-                                ] = None,
-                                init_model: Optional[Union[str, Path, 'Booster']] = None,
-                                feature_name: Union[List[str], str] = 'auto',
-                                categorical_feature: Union[List[str], List[int], str] = 'auto',
-                                keep_training_booster: bool = False,
-                                callbacks: Optional[List[Callable]] = None,
-                                dataset_init_score: Optional[Union[
-                                    List, List[List], np.ndarray, pd.Series, pd.DataFrame]
-                                ] = None) -> LearnerReturnType:
+def lgbm_classification_learner(
+        df: pd.DataFrame,
+        features: List[str],
+        target: str,
+        learning_rate: float = 0.1,
+        num_estimators: int = 100,
+        extra_params: Optional[LogType] = None,
+        prediction_column: str = "prediction",
+        weight_column: Optional[str] = None,
+        encode_extra_cols: bool = True,
+        valid_sets: Optional[List[pd.DataFrame]] = None,
+        valid_names: Optional[List[str]] = None,
+        feval: Optional[Union[
+            Union[Callable[[npt.NDArray, Any], Tuple[str, float, bool]],
+                  Callable[[npt.NDArray, Any], List[Tuple[str, float, bool]]]],
+            List[Union[Callable[[npt.NDArray, Any],
+                       Tuple[str, float, bool]],
+                       Callable[[npt.NDArray, Any],
+                       List[Tuple[str, float, bool]]]]],
+            None
+        ]] = None,
+        init_model: Optional[Union[str, Path, 'Booster']] = None,
+        feature_name: Union[List[str], Literal['auto']] = 'auto',
+        categorical_feature: Union[List[str], List[int], Literal['auto']] = 'auto',
+        keep_training_booster: bool = False,
+        callbacks: Optional[List[Callable]] = None,
+        dataset_init_score: Optional[Union[List, List[List], npt.NDArray, pd.Series, pd.DataFrame]] = None
+) -> LearnerReturnType:
     """
     Fits an LGBM classifier to the dataset.
 
@@ -632,20 +655,37 @@ def lgbm_classification_learner(df: pd.DataFrame,
 
     features = features if not encode_extra_cols else expand_features_encoded(df, features)
 
-    dtrain = lgbm.Dataset(df[features].values, label=df[target], feature_name=list(map(str, features)), weight=weights,
-                          silent=True, init_score=dataset_init_score)
-
-    bst = lgbm.train(params=params, train_set=dtrain, num_boost_round=num_estimators, valid_sets=valid_sets,
-                     valid_names=valid_names, feval=feval, init_model=init_model, feature_name=feature_name,
-                     categorical_feature=categorical_feature, keep_training_booster=keep_training_booster,
-                     callbacks=callbacks)
+    dtrain = lgbm.Dataset(
+        df[features].values,
+        label=df[target],
+        feature_name=list(map(str, features)),
+        weight=weights,
+        init_score=dataset_init_score
+    )
+
+    bst = lgbm.train(
+        params=params,
+        train_set=dtrain,
+        num_boost_round=num_estimators,
+        valid_sets=valid_sets,
+        valid_names=valid_names,
+        feval=feval,
+        init_model=init_model,
+        feature_name=feature_name,
+        categorical_feature=categorical_feature,
+        keep_training_booster=keep_training_booster,
+        callbacks=callbacks
+    )
 
     def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
+        predictions = bst.predict(new_df[features].values)
+        if isinstance(predictions, List):
+            predictions = np.ndarray(predictions)
         if is_multiclass_classification:
             col_dict = {prediction_column + "_" + str(key): value
-                        for (key, value) in enumerate(bst.predict(new_df[features].values).T)}
+                        for (key, value) in enumerate(predictions.T)}
         else:
-            col_dict = {prediction_column: bst.predict(new_df[features].values)}
+            col_dict = {prediction_column: predictions}
 
         if apply_shap:
             import shap
@@ -675,16 +715,18 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("lgbm_classification_learner", shap=True)
 
-    log = {'lgbm_classification_learner': {
-        'features': features,
-        'target': target,
-        'prediction_column': prediction_column,
-        'package': "lightgbm",
-        'package_version': lgbm.__version__,
-        'parameters': assoc(params, "num_estimators", num_estimators),
-        'feature_importance': dict(zip(features, bst.feature_importance().tolist())),
-        'training_samples': len(df)},
-        'object': bst}
+    log: LearnerLogType = {
+        'lgbm_classification_learner': {
+            'features': features,
+            'target': target,
+            'prediction_column': prediction_column,
+            'package': "lightgbm",
+            'package_version': lgbm.__version__,
+            'parameters': assoc(params, "num_estimators", num_estimators),
+            'feature_importance': dict(zip(features, bst.feature_importance().tolist())),
+            'training_samples': len(df)},
+        'object': bst
+    }
 
     return p, p(df), log
 
diff --git a/src/fklearn/training/ensemble.py b/src/fklearn/training/ensemble.py
index 1c265505..fec94b84 100644
--- a/src/fklearn/training/ensemble.py
+++ b/src/fklearn/training/ensemble.py
@@ -1,5 +1,7 @@
 from typing import Any, Dict, List, TypeVar
 
+import numpy as np
+import numpy.typing as npt
 import pandas as pd
 from toolz import curry, assoc, compose
 
@@ -136,10 +138,14 @@ def xgb_octopus_classification_learner(train_set: pd.DataFrame,
     def p(df: pd.DataFrame) -> pd.DataFrame:
         pred_fn = compose(*pred_fns.values())
 
+        def lookup(df: pd.DataFrame) -> npt.NDArray:
+            idx, cols = pd.factorize(df.pred_bin.values.squeeze())
+            output = df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]
+            return output
+
         return (pred_fn(df)
                 .assign(pred_bin=prediction_column + "_bin_" + df[train_split_col].astype(str))
-                .assign(prediction=lambda d: d.lookup(d.index.values,
-                                                      d.pred_bin.values.squeeze()))
+                .assign(prediction=lookup)
                 .rename(index=str, columns={"prediction": prediction_column})
                 .drop("pred_bin", axis=1))
 
diff --git a/src/fklearn/training/regression.py b/src/fklearn/training/regression.py
index 9131abfb..2820a83f 100644
--- a/src/fklearn/training/regression.py
+++ b/src/fklearn/training/regression.py
@@ -70,16 +70,19 @@ def p(new_df: pd.DataFrame) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("linear_regression_learner")
 
-    log = {'linear_regression_learner': {
-        'features': features,
-        'target': target,
-        'parameters': params,
-        'prediction_column': prediction_column,
-        'package': "sklearn",
-        'package_version': sk_version,
-        'feature_importance': dict(zip(features, regr.coef_.flatten())),
-        'training_samples': len(df)},
-        'object': regr}
+    log = {
+        'linear_regression_learner': {
+            'features': features,
+            'target': target,
+            'parameters': params,
+            'prediction_column': prediction_column,
+            'package': "sklearn",
+            'package_version': sk_version,
+            'feature_importance': dict(zip(features, regr.coef_.flatten())),
+            'training_samples': len(df)
+        },
+        'object': regr
+    }
 
     return p, p(df), log
 
@@ -159,12 +162,20 @@ def xgb_regression_learner(df: pd.DataFrame,
 
     features = features if not encode_extra_cols else expand_features_encoded(df, features)
 
-    dtrain = xgb.DMatrix(df[features].values, label=df[target].values, weight=weights, feature_names=map(str, features))
+    dtrain = xgb.DMatrix(
+        df[features].values,
+        label=df[target].values,
+        weight=weights,
+        feature_names=list(map(str, features))
+    )
 
     bst = xgb.train(params, dtrain, num_estimators)
 
     def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
-        dtest = xgb.DMatrix(new_df[features].values, feature_names=map(str, features))
+        dtest = xgb.DMatrix(
+            new_df[features].values,
+            feature_names=list(map(str, features))
+        )
         col_dict = {prediction_column: bst.predict(dtest)}
 
         if apply_shap:
@@ -182,16 +193,19 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("xgb_regression_learner", shap=True)
 
-    log = {'xgb_regression_learner': {
-        'features': features,
-        'target': target,
-        'prediction_column': prediction_column,
-        'package': "xgboost",
-        'package_version': xgb.__version__,
-        'parameters': assoc(params, "num_estimators", num_estimators),
-        'feature_importance': bst.get_score(),
-        'training_samples': len(df)},
-        'object': bst}
+    log = {
+        'xgb_regression_learner': {
+            'features': features,
+            'target': target,
+            'prediction_column': prediction_column,
+            'package': "xgboost",
+            'package_version': xgb.__version__,
+            'parameters': assoc(params, "num_estimators", num_estimators),
+            'feature_importance': bst.get_score(),
+            'training_samples': len(df)
+        },
+        'object': bst
+    }
 
     return p, p(df), log
 
@@ -287,16 +301,19 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("CatBoostRegressor", shap=False)
 
-    log = {'catboost_regression_learner': {
-        'features': features,
-        'target': target,
-        'prediction_column': prediction_column,
-        'package': "catboost",
-        'package_version': catboost.__version__,
-        'parameters': assoc(params, "num_estimators", num_estimators),
-        'feature_importance': cbr.feature_importances_,
-        'training_samples': len(df)},
-        'object': cbr}
+    log = {
+        'catboost_regression_learner': {
+            'features': features,
+            'target': target,
+            'prediction_column': prediction_column,
+            'package': "catboost",
+            'package_version': catboost.__version__,
+            'parameters': assoc(params, "num_estimators", num_estimators),
+            'feature_importance': cbr.feature_importances_,
+            'training_samples': len(df)
+        },
+        'object': cbr
+    }
 
     return p, p(df), log
 
@@ -387,16 +404,18 @@ def p(new_df: pd.DataFrame) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("gp_regression_learner")
 
-    log = {'gp_regression_learner': {
-        'features': features,
-        'target': target,
-        'parameters': merge(params, {'extra_variance': extra_variance,
-                                     'return_std': return_std}),
-        'prediction_column': prediction_column,
-        'package': "sklearn",
-        'package_version': sk_version,
-        'training_samples': len(df)},
-        'object': gp}
+    log = {
+        'gp_regression_learner': {
+            'features': features,
+            'target': target,
+            'parameters': merge(params, {'extra_variance': extra_variance,
+                                         'return_std': return_std}),
+            'prediction_column': prediction_column,
+            'package': "sklearn",
+            'package_version': sk_version,
+            'training_samples': len(df)},
+        'object': gp
+    }
 
     return p, p(df), log
 
@@ -406,15 +425,17 @@ def p(new_df: pd.DataFrame) -> pd.DataFrame:
 
 @curry
 @log_learner_time(learner_name='lgbm_regression_learner')
-def lgbm_regression_learner(df: pd.DataFrame,
-                            features: List[str],
-                            target: str,
-                            learning_rate: float = 0.1,
-                            num_estimators: int = 100,
-                            extra_params: Dict[str, Any] = None,
-                            prediction_column: str = "prediction",
-                            weight_column: str = None,
-                            encode_extra_cols: bool = True) -> LearnerReturnType:
+def lgbm_regression_learner(
+        df: pd.DataFrame,
+        features: List[str],
+        target: str,
+        learning_rate: float = 0.1,
+        num_estimators: int = 100,
+        extra_params: Dict[str, Any] = None,
+        prediction_column: str = "prediction",
+        weight_column: str = None,
+        encode_extra_cols: bool = True
+) -> LearnerReturnType:
     """
     Fits an LGBM regressor to the dataset.
 
@@ -478,8 +499,7 @@ def lgbm_regression_learner(df: pd.DataFrame,
 
     features = features if not encode_extra_cols else expand_features_encoded(df, features)
 
-    dtrain = lgbm.Dataset(df[features].values, label=df[target], feature_name=list(map(str, features)), weight=weights,
-                          silent=True)
+    dtrain = lgbm.Dataset(df[features].values, label=df[target], feature_name=list(map(str, features)), weight=weights)
 
     bst = lgbm.train(params, dtrain, num_estimators)
 
@@ -501,16 +521,18 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("lgbm_regression_learner", shap=True)
 
-    log = {'lgbm_regression_learner': {
-        'features': features,
-        'target': target,
-        'prediction_column': prediction_column,
-        'package': "lightgbm",
-        'package_version': lgbm.__version__,
-        'parameters': assoc(params, "num_estimators", num_estimators),
-        'feature_importance': dict(zip(features, bst.feature_importance().tolist())),
-        'training_samples': len(df)},
-        'object': bst}
+    log = {
+        'lgbm_regression_learner': {
+            'features': features,
+            'target': target,
+            'prediction_column': prediction_column,
+            'package': "lightgbm",
+            'package_version': lgbm.__version__,
+            'parameters': assoc(params, "num_estimators", num_estimators),
+            'feature_importance': dict(zip(features, bst.feature_importance().tolist())),
+            'training_samples': len(df)},
+        'object': bst
+    }
 
     return p, p(df), log
 
@@ -520,13 +542,15 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
 
 @curry
 @log_learner_time(learner_name='custom_supervised_model_learner')
-def custom_supervised_model_learner(df: pd.DataFrame,
-                                    features: List[str],
-                                    target: str,
-                                    model: Any,
-                                    supervised_type: str,
-                                    log: Dict[str, Dict],
-                                    prediction_column: str = "prediction") -> LearnerReturnType:
+def custom_supervised_model_learner(
+        df: pd.DataFrame,
+        features: List[str],
+        target: str,
+        model: Any,
+        supervised_type: str,
+        log: Dict[str, Dict],
+        prediction_column: str = "prediction"
+) -> LearnerReturnType:
     """
     Fits a custom model to the dataset.
     Return the predict function, the predictions for the input dataset and a log describing the model.
@@ -658,16 +682,19 @@ def p(new_df: pd.DataFrame) -> pd.DataFrame:
 
     p.__doc__ = learner_pred_fn_docstring("elasticnet_regression_learner")
 
-    log = {'elasticnet_regression_learner': {
-        'features': features,
-        'target': target,
-        'parameters': params,
-        'prediction_column': prediction_column,
-        'package': "sklearn",
-        'package_version': sk_version,
-        'feature_importance': dict(zip(features, regr.coef_.flatten())),
-        'training_samples': len(df)},
-        'object': regr}
+    log = {
+        'elasticnet_regression_learner': {
+            'features': features,
+            'target': target,
+            'parameters': params,
+            'prediction_column': prediction_column,
+            'package': "sklearn",
+            'package_version': sk_version,
+            'feature_importance': dict(zip(features, regr.coef_.flatten())),
+            'training_samples': len(df)
+        },
+        'object': regr
+    }
 
     return p, p(df), log
 
diff --git a/src/fklearn/training/transformation.py b/src/fklearn/training/transformation.py
index 7243c57a..1a117fae 100644
--- a/src/fklearn/training/transformation.py
+++ b/src/fklearn/training/transformation.py
@@ -500,12 +500,14 @@ def rank_categorical(df: pd.DataFrame,
         Whether to store the feature value -> integer dictionary in the log
     """
 
-    col_categ_getter = lambda col: (df[col]
-                                    .value_counts()
-                                    .reset_index()
-                                    .sort_values([col, "index"], ascending=[False, True])
-                                    .set_index("index")[col]
-                                    .rank(method="first", ascending=False).to_dict())
+    def col_categ_getter(col: str) -> Dict:
+        return (df[col]
+                .value_counts()
+                .reset_index()
+                .sort_values([col, "count"], ascending=[True, False])
+                .set_index(col)["count"]
+                .rank(method="first", ascending=False)
+                .to_dict())
 
     vec = {column: col_categ_getter(column) for column in columns_to_rank}
 
@@ -1027,11 +1029,12 @@ def missing_warner(df: pd.DataFrame, cols_list: List[str],
     cols_without_missing = df_selected.loc[:, df_selected.isna().sum(axis=0) == 0].columns.tolist()
 
     def p(dataset: pd.DataFrame) -> pd.DataFrame:
-        def detailed_assignment(df: pd.DataFrame, cols_to_check: List[str]) -> np.ndarray:
+        def detailed_assignment(df: pd.DataFrame, cols_to_check: List[str]) -> list:
+            print(df.loc[:, cols_to_check])
             cols_with_missing = np.array([np.where(df[col].isna(), col, "") for col in cols_to_check]).T
-            missing_by_row_list = np.array([list(filter(None, x)) for x in cols_with_missing]).reshape(-1, 1)
-            if missing_by_row_list.size == 0:
-                return np.empty((df.shape[0], 0)).tolist()
+            missing_by_row_list: list = [list(filter(None, x)) for x in cols_with_missing]
+            if len(missing_by_row_list) == 0:
+                return np.empty((0, 0)).tolist()
             else:
                 return missing_by_row_list
 
diff --git a/src/fklearn/tuning/selectors.py b/src/fklearn/tuning/selectors.py
index e5ac1f6c..f6c710b9 100644
--- a/src/fklearn/tuning/selectors.py
+++ b/src/fklearn/tuning/selectors.py
@@ -3,12 +3,30 @@
 from toolz.curried import pipe, first, mapcat
 import pandas as pd
 
-from fklearn.tuning.samplers import remove_features_subsets, remove_by_feature_importance, remove_by_feature_shuffling
-from fklearn.tuning.stoppers import stop_by_num_features, stop_by_num_features_parallel, stop_by_iter_num, \
-    stop_by_no_improvement, stop_by_no_improvement_parallel, aggregate_stop_funcs
+from fklearn.tuning.samplers import (
+    remove_features_subsets,
+    remove_by_feature_importance,
+    remove_by_feature_shuffling
+)
+from fklearn.tuning.stoppers import (
+    stop_by_num_features,
+    stop_by_num_features_parallel,
+    stop_by_iter_num,
+    stop_by_no_improvement,
+    stop_by_no_improvement_parallel,
+    aggregate_stop_funcs
+)
 from fklearn.validation.validator import parallel_validator
-from fklearn.types import EvalFnType, ExtractorFnType, LearnerReturnType, ListLogListType, LogListType, SplitterFnType,\
-    ValidatorReturnType, LogType
+from fklearn.types import (
+    EvalFnType,
+    ExtractorFnType,
+    LearnerReturnType,
+    ListLogListType,
+    LogListType,
+    SplitterFnType,
+    ValidatorReturnType,
+    LogType
+)
 
 SaveIntermediaryFnType = Callable[[List[ValidatorReturnType]], None]
 TuningLearnerFnType = Callable[[pd.DataFrame, List[str]], LearnerReturnType]
diff --git a/src/fklearn/types/types.py b/src/fklearn/types/types.py
index 31e9e3d7..d2f62fa9 100644
--- a/src/fklearn/types/types.py
+++ b/src/fklearn/types/types.py
@@ -12,8 +12,8 @@
 ListLogListType = List[LogListType]
 
 # Learner types
-PredictFnType = Callable[[pd.DataFrame], pd.DataFrame]
-LearnerLogType = Dict[str, LogType]
+PredictFnType = Callable[..., pd.DataFrame]
+LearnerLogType = Dict[str, Any]
 LearnerReturnType = Tuple[PredictFnType, pd.DataFrame, LearnerLogType]
 
 UncurriedLearnerFnType = Callable[..., LearnerReturnType]
diff --git a/tests/causal/cate_learning/test_meta_learners.py b/tests/causal/cate_learning/test_meta_learners.py
index dd7d297e..24b6a73d 100644
--- a/tests/causal/cate_learning/test_meta_learners.py
+++ b/tests/causal/cate_learning/test_meta_learners.py
@@ -8,14 +8,25 @@
 from pandas.testing import assert_frame_equal
 
 from fklearn.causal.cate_learning.meta_learners import (
-    TREATMENT_FEATURE, _append_treatment_feature, _create_treatment_flag,
-    _filter_by_treatment, _fit_by_treatment, _get_learners, _get_model_fcn,
-    _get_unique_treatments, _predict_by_treatment_flag,
-    _simulate_t_learner_treatment_effect, _simulate_treatment_effect,
-    causal_s_classification_learner, causal_t_classification_learner)
-from fklearn.exceptions.exceptions import (MissingControlError,
-                                           MissingTreatmentError,
-                                           MultipleTreatmentsError)
+    TREATMENT_FEATURE,
+    _append_treatment_feature,
+    _create_treatment_flag,
+    _filter_by_treatment,
+    _fit_by_treatment,
+    _get_learners,
+    _get_model_fcn,
+    _get_unique_treatments,
+    _predict_by_treatment_flag,
+    _simulate_t_learner_treatment_effect,
+    _simulate_treatment_effect,
+    causal_s_classification_learner,
+    causal_t_classification_learner
+)
+from fklearn.exceptions.exceptions import (
+    MissingControlError,
+    MissingTreatmentError,
+    MultipleTreatmentsError
+)
 from fklearn.training.classification import logistic_classification_learner
 from fklearn.types import LearnerFnType
 
@@ -235,8 +246,8 @@ def test__fit_by_treatment(base_input_df):
 
     assert len(learners) == len(treatments)
     assert len(logs) == len(treatments)
-    assert type(logs) == dict
-    assert [type(learner) == LearnerFnType for learner in learners]
+    assert type(logs) is dict
+    assert [type(learner) is LearnerFnType for learner in learners]
 
 
 def ones_or_zeros_model(df):
diff --git a/tests/metrics/test_pd_extractors.py b/tests/metrics/test_pd_extractors.py
index 21a24aeb..93547d4a 100644
--- a/tests/metrics/test_pd_extractors.py
+++ b/tests/metrics/test_pd_extractors.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-from sklearn.datasets import load_boston
+from sklearn.datasets import fetch_california_housing
 
 from fklearn.data.datasets import make_tutorial_data
 from fklearn.metrics.pd_extractors import (combined_evaluator_extractor,
@@ -119,7 +119,7 @@ def test__split_evaluator_extractor__when_split_value_is_missing():
 
     results = feature3_date_evaluator(data)
 
-    date_values = [
+    date_values = pd.to_datetime([
         np.datetime64("2015-01-06T00:00:00.000000000"),
         np.datetime64("2015-01-14T00:00:00.000000000"),
         np.datetime64("2015-01-22T00:00:00.000000000"),
@@ -127,7 +127,7 @@ def test__split_evaluator_extractor__when_split_value_is_missing():
         np.datetime64("2015-03-08T00:00:00.000000000"),
         np.datetime64("2015-03-09T00:00:00.000000000"),
         np.datetime64("2015-04-04T00:00:00.000000000"),
-    ]
+    ])
 
     base_evaluator = evaluator_extractor(evaluator_name="mse_evaluator__target")
     feature3_extractor = split_evaluator_extractor(
@@ -142,15 +142,15 @@ def test__split_evaluator_extractor__when_split_value_is_missing():
 
 
 def test_extract():
-    boston = load_boston()
-    df = pd.DataFrame(boston['data'], columns=boston['feature_names'])
-    df['target'] = boston['target']
+    california = fetch_california_housing()
+    df = pd.DataFrame(california['data'], columns=california['feature_names'])
+    df['target'] = california['target']
     df['time'] = pd.date_range(start='2015-01-01', periods=len(df))
     np.random.seed(42)
     df['space'] = np.random.randint(0, 100, size=len(df))
 
     # Define train function
-    train_fn = linear_regression_learner(features=boston['feature_names'].tolist(), target="target")
+    train_fn = linear_regression_learner(features=california['feature_names'], target="target")
 
     # Define evaluator function
     base_evaluator = combined_evaluators(evaluators=[
@@ -158,7 +158,7 @@ def test_extract():
         spearman_evaluator(target_column='target', prediction_column='prediction')
     ])
 
-    splitter = split_evaluator(eval_fn=base_evaluator, split_col='RAD', split_values=[4.0, 5.0, 24.0])
+    splitter = split_evaluator(eval_fn=base_evaluator, split_col='MedInc', split_values=[0.5, 10.0, 20.0])
     temporal_week_splitter = temporal_split_evaluator(eval_fn=base_evaluator, time_col='time', time_format='%Y-%W')
     temporal_year_splitter = temporal_split_evaluator(eval_fn=base_evaluator, time_col='time', time_format='%Y')
 
@@ -216,11 +216,11 @@ def test_extract():
     assert extract(tlc_results, base_extractors).shape == (12, 9)
     assert extract(tlc_results, splitter_extractor).shape == (36, 10)
 
-    assert extract(sc_results, base_extractors).shape == (5, 9)
-    assert extract(sc_results, splitter_extractor).shape == (15, 10)
+    assert extract(sc_results, base_extractors).shape == (667, 9)
+    assert extract(sc_results, splitter_extractor).shape == (2001, 10)
 
-    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
-    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
+    assert extract(fw_sc_results, base_extractors).shape == (674, 9)
+    assert extract(fw_sc_results, splitter_extractor).shape == (2022, 10)
 
     n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
     n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
diff --git a/tests/training/test_calibration.py b/tests/training/test_calibration.py
index e5b28107..500eb974 100644
--- a/tests/training/test_calibration.py
+++ b/tests/training/test_calibration.py
@@ -56,4 +56,4 @@ def test_find_thresholds_with_same_risk():
     df_with_ecdf["fair"] = pred_df
 
     assert fair_thresholds == log["find_thresholds_with_same_risk"]["fair_thresholds"]
-    pd.util.testing.assert_frame_equal(df_expected, df_with_ecdf)
+    pd.testing.assert_frame_equal(df_expected, df_with_ecdf)
diff --git a/tests/training/test_classification.py b/tests/training/test_classification.py
index e28393ea..2d657040 100644
--- a/tests/training/test_classification.py
+++ b/tests/training/test_classification.py
@@ -507,7 +507,7 @@ def test_lgbm_classification_learner_params():
          }
     )
 
-    lgbm_dataset = lightgbm.Dataset(df[features].values, label=df[target], silent=True)
+    lgbm_dataset = lightgbm.Dataset(df[features].values, label=df[target])
 
     mock_lgbm = MagicMock()
     mock_lgbm.predict.return_value = df_result["prediction"]
diff --git a/tests/training/test_pipeline.py b/tests/training/test_pipeline.py
index 4bf462ad..22c3f9fb 100644
--- a/tests/training/test_pipeline.py
+++ b/tests/training/test_pipeline.py
@@ -49,7 +49,7 @@ def test_build_pipeline(has_repeated_learners):
     pred_test_without_shap = predict_fn(df_test)
     assert set(pred_test_without_shap.columns) == set(pred_train.columns)
 
-    pd.util.testing.assert_frame_equal(pred_test_with_shap[pred_test_without_shap.columns], pred_test_without_shap)
+    pd.testing.assert_frame_equal(pred_test_with_shap[pred_test_without_shap.columns], pred_test_without_shap)
 
 
 @pytest.mark.parametrize("has_repeated_learners", [False, True])
@@ -73,7 +73,7 @@ def p(dataset, mult=2):
         side_effect_learner, kwargs_learner, has_repeated_learners=has_repeated_learners)
     side_effect_pipeline(test_df)
 
-    pd.util.testing.assert_frame_equal(test_df, orig_df)
+    pd.testing.assert_frame_equal(test_df, orig_df)
 
 
 @pytest.mark.parametrize("has_repeated_learners", [False, True])
@@ -97,9 +97,9 @@ def dummy_learner(df):
         side_effect_pipeline = build_pipeline(*variation, has_repeated_learners=has_repeated_learners)
         predict_fn, result_df, log = side_effect_pipeline(test_df)
 
-        pd.util.testing.assert_frame_equal(test_df, orig_df)
-        pd.util.testing.assert_frame_equal(result_df, expected_df)
-        pd.util.testing.assert_frame_equal(predict_fn(test_df, mult=mult_constant), expected_df)
+        pd.testing.assert_frame_equal(test_df, orig_df)
+        pd.testing.assert_frame_equal(result_df, expected_df)
+        pd.testing.assert_frame_equal(predict_fn(test_df, mult=mult_constant), expected_df)
 
 
 @pytest.mark.parametrize("has_repeated_learners", [False, True])
diff --git a/tests/validation/test_evaluators.py b/tests/validation/test_evaluators.py
index a658fcaf..c4d14c16 100644
--- a/tests/validation/test_evaluators.py
+++ b/tests/validation/test_evaluators.py
@@ -1,3 +1,4 @@
+import sys
 import string
 
 import numpy as np
@@ -468,10 +469,18 @@ def test_hash_evaluator():
     assert eval_fn(df1)["eval_name"] != eval_fn(df3)["eval_name"]
     # if we consider all the features in the dataframe, it should return different hashes for different dataframes
     assert eval_fn_all(df1)["eval_name"] != eval_fn_all(df2)["eval_name"]
-    # Assert that the hashes stay the same everytime this is run
-    assert eval_fn_all(df1)["eval_name"] == -6356943988420224450
-    assert eval_fn_all(df2)["eval_name"] == -4865376220991082723
-    assert eval_fn_all(df3)["eval_name"] == 141388279445698461
+
+    # Assert that the hashes stay the same everytime this is run.
+    # The hash function is update in python 3.9 requiring different checks for each version.
+    python_version = sys.version_info
+    if python_version.minor == 8:
+        assert eval_fn_all(df1)["eval_name"] == -6356943988420224450
+        assert eval_fn_all(df2)["eval_name"] == -4865376220991082723
+        assert eval_fn_all(df3)["eval_name"] == 141388279445698461
+    else:
+        assert eval_fn_all(df1)["eval_name"] == 12089800085289327166
+        assert eval_fn_all(df2)["eval_name"] == 13581367852718468893
+        assert eval_fn_all(df3)["eval_name"] == 141388279445698461
 
 
 def test_exponential_coefficient_evaluator():