From 7a70a0b6e076bd7e4f54674ea2148697f80916f4 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 22 Apr 2024 18:54:41 -0400
Subject: [PATCH 1/5] added regression forest benchmark

---
 asv_benchmarks/benchmarks/ensemble.py | 45 ++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py
index c336d1e5f8805..a519cece3ac27 100644
--- a/asv_benchmarks/benchmarks/ensemble.py
+++ b/asv_benchmarks/benchmarks/ensemble.py
@@ -2,6 +2,7 @@
     GradientBoostingClassifier,
     HistGradientBoostingClassifier,
     RandomForestClassifier,
+    RandomForestRegressor
 )
 
 from .common import Benchmark, Estimator, Predictor
@@ -9,8 +10,50 @@
     _20newsgroups_highdim_dataset,
     _20newsgroups_lowdim_dataset,
     _synth_classification_dataset,
+    _synth_regression_dataset,
+    _synth_regression_sparse_dataset
 )
-from .utils import make_gen_classif_scorers
+from .utils import make_gen_classif_scorers, make_gen_reg_scorers
+
+
+class RandomForestRegressorBenchmark(Predictor, Estimator, Benchmark):
+    """
+    Benchmarks for RandomForestRegressor.
+    """
+
+    param_names = ["representation", "n_jobs"]
+    params = (["dense", "sparse"], Benchmark.n_jobs_vals)
+
+    def setup_cache(self):
+        super().setup_cache()
+
+    def make_data(self, params):
+        representation, n_jobs = params
+
+        if representation == "sparse":
+            data = _synth_regression_sparse_dataset()
+        else:
+            data = _synth_regression_dataset()
+
+        return data
+
+    def make_estimator(self, params):
+        representation, n_jobs = params
+
+        n_estimators = 500 if Benchmark.data_size == "large" else 100
+
+        estimator = RandomForestRegressor(
+            n_estimators=n_estimators,
+            min_samples_split=10,
+            max_features="log2",
+            n_jobs=n_jobs,
+            random_state=0,
+        )
+
+        return estimator
+
+    def make_scorers(self):
+        make_gen_reg_scorers(self)
 
 
 class RandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):

From 893d588bccabbd063d1d385a6da7e2d52556c3a6 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 22 Apr 2024 21:30:25 -0400
Subject: [PATCH 2/5] ran black for linting check

---
 .github/scripts/label_title_regex.py          |  1 +
 asv_benchmarks/benchmarks/ensemble.py         |  4 +-
 benchmarks/bench_glm.py                       |  1 +
 benchmarks/bench_glmnet.py                    |  1 +
 benchmarks/bench_isotonic.py                  |  1 +
 ...kernel_pca_solvers_time_vs_n_components.py |  1 +
 ...ch_kernel_pca_solvers_time_vs_n_samples.py |  1 +
 benchmarks/bench_lasso.py                     |  1 +
 benchmarks/bench_plot_lasso_path.py           |  1 +
 benchmarks/bench_plot_neighbors.py            |  1 +
 benchmarks/bench_plot_nmf.py                  |  7 ++-
 benchmarks/bench_plot_omp_lars.py             |  1 +
 ...ch_plot_polynomial_kernel_approximation.py |  1 +
 benchmarks/bench_plot_svd.py                  |  1 +
 benchmarks/bench_random_projections.py        |  1 +
 benchmarks/bench_saga.py                      |  5 +-
 .../bench_sample_without_replacement.py       |  1 +
 benchmarks/bench_text_vectorizers.py          |  1 +
 benchmarks/bench_tree.py                      |  1 +
 benchmarks/bench_tsne_mnist.py                |  6 ++-
 build_tools/generate_authors_table.py         |  1 +
 build_tools/get_comment.py                    |  3 +-
 build_tools/github/check_wheels.py            |  1 +
 build_tools/github/vendor.py                  |  1 -
 .../update_environments_and_lock_files.py     | 33 ++++++++-----
 doc/sphinxext/doi_role.py                     | 26 +++++-----
 doc/sphinxext/sphinx_issues.py                |  1 +
 .../applications/plot_face_recognition.py     |  1 +
 examples/calibration/plot_calibration.py      |  1 +
 examples/cluster/plot_affinity_propagation.py |  1 +
 examples/cluster/plot_bisect_kmeans.py        |  1 +
 .../covariance/plot_covariance_estimation.py  |  1 -
 .../ensemble/plot_feature_transformation.py   |  1 -
 .../plot_gradient_boosting_early_stopping.py  |  1 +
 .../ensemble/plot_monotonic_constraints.py    |  1 +
 .../linear_model/plot_quantile_regression.py  | 12 +++--
 examples/manifold/plot_swissroll.py           |  1 +
 .../plot_kernel_ridge_regression.py           |  1 +
 .../miscellaneous/plot_metadata_routing.py    |  1 +
 examples/mixture/plot_gmm_init.py             |  1 -
 .../plot_semi_supervised_newsgroups.py        |  1 -
 examples/tree/plot_iris_dtc.py                |  1 +
 maint_tools/check_pxd_in_installation.py      |  8 ++-
 sklearn/__check_build/__init__.py             | 10 ++--
 sklearn/_build_utils/__init__.py              |  1 +
 sklearn/_build_utils/openmp_helpers.py        | 12 +++--
 sklearn/_build_utils/pre_build_helpers.py     |  6 ++-
 sklearn/_build_utils/version.py               |  3 +-
 sklearn/_config.py                            |  4 +-
 sklearn/_distributor_init.py                  |  2 +-
 sklearn/_loss/link.py                         |  1 +
 sklearn/_loss/loss.py                         |  1 +
 sklearn/_min_dependencies.py                  |  1 +
 sklearn/base.py                               |  5 +-
 sklearn/cluster/_agglomerative.py             |  1 +
 sklearn/cluster/_bicluster.py                 |  1 +
 sklearn/cluster/_bisect_k_means.py            |  1 +
 sklearn/cluster/_feature_agglomeration.py     |  1 +
 sklearn/cluster/_hdbscan/hdbscan.py           |  1 +
 sklearn/cluster/_spectral.py                  |  3 +-
 .../tests/test_feature_agglomeration.py       |  1 +
 sklearn/cluster/tests/test_hdbscan.py         |  1 +
 sklearn/cluster/tests/test_hierarchical.py    |  1 +
 sklearn/cluster/tests/test_k_means.py         |  1 +
 sklearn/cluster/tests/test_spectral.py        |  1 +
 sklearn/covariance/_robust_covariance.py      |  1 +
 .../covariance/tests/test_graphical_lasso.py  |  4 +-
 sklearn/datasets/__init__.py                  |  7 ++-
 sklearn/datasets/_arff_parser.py              |  1 +
 sklearn/datasets/_california_housing.py       |  1 +
 sklearn/datasets/_samples_generator.py        |  4 +-
 sklearn/datasets/tests/test_20news.py         |  1 +
 sklearn/datasets/tests/test_arff_parser.py    | 24 ++++++---
 .../datasets/tests/test_california_housing.py |  1 +
 sklearn/datasets/tests/test_common.py         |  1 +
 sklearn/datasets/tests/test_covtype.py        |  1 +
 sklearn/datasets/tests/test_openml.py         |  4 +-
 sklearn/decomposition/__init__.py             |  1 -
 sklearn/decomposition/_dict_learning.py       |  4 +-
 sklearn/decomposition/_nmf.py                 |  7 ++-
 sklearn/decomposition/_pca.py                 |  3 +-
 sklearn/decomposition/_sparse_pca.py          |  1 +
 sklearn/decomposition/_truncated_svd.py       |  3 +-
 sklearn/decomposition/tests/test_fastica.py   |  1 +
 .../tests/test_incremental_pca.py             |  1 +
 sklearn/ensemble/__init__.py                  |  1 +
 sklearn/ensemble/_forest.py                   |  3 +-
 sklearn/ensemble/_gb.py                       |  6 +--
 .../_hist_gradient_boosting/binning.py        |  1 +
 .../_hist_gradient_boosting/grower.py         |  1 +
 .../_hist_gradient_boosting/predictor.py      |  1 +
 .../ensemble/_hist_gradient_boosting/utils.py |  1 +
 .../ensemble/tests/test_gradient_boosting.py  |  1 +
 .../enable_hist_gradient_boosting.py          |  1 +
 sklearn/feature_extraction/text.py            |  6 +--
 sklearn/feature_selection/_sequential.py      |  1 +
 .../tests/test_feature_select.py              |  1 +
 sklearn/gaussian_process/_gpr.py              |  8 +--
 sklearn/gaussian_process/kernels.py           |  4 +-
 sklearn/gaussian_process/tests/test_gpc.py    | 14 ++----
 sklearn/gaussian_process/tests/test_gpr.py    | 14 ++----
 sklearn/impute/__init__.py                    |  1 +
 sklearn/impute/_base.py                       |  5 +-
 sklearn/inspection/__init__.py                |  1 -
 .../tests/test_partial_dependence.py          |  1 +
 .../tests/test_permutation_importance.py      |  4 +-
 sklearn/linear_model/_glm/_newton_solver.py   |  3 +-
 sklearn/linear_model/_glm/tests/test_glm.py   |  3 +-
 sklearn/linear_model/_least_angle.py          |  4 +-
 sklearn/linear_model/_linear_loss.py          |  1 +
 sklearn/linear_model/_logistic.py             |  9 ++--
 sklearn/linear_model/_omp.py                  |  3 +-
 sklearn/linear_model/_stochastic_gradient.py  |  3 +-
 .../linear_model/tests/test_linear_loss.py    |  1 +
 sklearn/manifold/_spectral_embedding.py       |  3 +-
 sklearn/metrics/__init__.py                   |  1 -
 sklearn/metrics/_base.py                      |  1 +
 sklearn/metrics/_classification.py            |  3 +-
 sklearn/metrics/cluster/__init__.py           |  1 +
 sklearn/metrics/tests/test_classification.py  | 15 ++----
 sklearn/mixture/_bayesian_mixture.py          |  1 +
 sklearn/model_selection/_search.py            |  3 +-
 sklearn/model_selection/tests/test_split.py   |  1 +
 .../model_selection/tests/test_validation.py  |  1 +
 sklearn/neighbors/_base.py                    | 10 ++--
 sklearn/neighbors/_kde.py                     |  1 +
 sklearn/neighbors/_unsupervised.py            |  1 +
 .../neighbors/tests/test_nearest_centroid.py  |  1 +
 sklearn/neural_network/_base.py               |  3 +-
 .../neural_network/_multilayer_perceptron.py  |  6 +--
 sklearn/neural_network/_rbm.py                |  3 +-
 .../neural_network/_stochastic_optimizers.py  |  3 +-
 sklearn/neural_network/tests/test_mlp.py      |  3 +-
 sklearn/pipeline.py                           |  1 +
 sklearn/preprocessing/_polynomial.py          |  1 +
 sklearn/random_projection.py                  |  1 +
 .../tests/test_label_propagation.py           |  2 +-
 sklearn/svm/_base.py                          |  6 +--
 sklearn/svm/_bounds.py                        |  1 +
 sklearn/svm/tests/test_svm.py                 |  1 +
 sklearn/tests/random_seed.py                  |  1 +
 sklearn/tests/test_build.py                   |  6 ++-
 sklearn/tests/test_common.py                  |  6 ++-
 sklearn/tests/test_metaestimators.py          |  1 +
 sklearn/tests/test_pipeline.py                |  1 +
 sklearn/tree/tests/test_export.py             | 49 +++++++++++++------
 sklearn/utils/_response.py                    |  1 +
 sklearn/utils/_show_versions.py               |  1 +
 sklearn/utils/estimator_checks.py             |  9 ++--
 sklearn/utils/extmath.py                      |  1 +
 sklearn/utils/fixes.py                        |  1 +
 sklearn/utils/optimize.py                     |  1 +
 sklearn/utils/tests/test_extmath.py           |  4 +-
 sklearn/utils/tests/test_fast_dict.py         |  4 +-
 154 files changed, 309 insertions(+), 222 deletions(-)

diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py
index a022c3c4dd2a7..9a689b8db09b4 100644
--- a/.github/scripts/label_title_regex.py
+++ b/.github/scripts/label_title_regex.py
@@ -1,5 +1,6 @@
 """Labels PRs based on title. Must be run in a github action with the
 pull_request_target event."""
+
 import json
 import os
 import re
diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py
index a519cece3ac27..877fcdb09fe68 100644
--- a/asv_benchmarks/benchmarks/ensemble.py
+++ b/asv_benchmarks/benchmarks/ensemble.py
@@ -2,7 +2,7 @@
     GradientBoostingClassifier,
     HistGradientBoostingClassifier,
     RandomForestClassifier,
-    RandomForestRegressor
+    RandomForestRegressor,
 )
 
 from .common import Benchmark, Estimator, Predictor
@@ -11,7 +11,7 @@
     _20newsgroups_lowdim_dataset,
     _synth_classification_dataset,
     _synth_regression_dataset,
-    _synth_regression_sparse_dataset
+    _synth_regression_sparse_dataset,
 )
 from .utils import make_gen_classif_scorers, make_gen_reg_scorers
 
diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py
index 803043398d1ac..84cf31858afa7 100644
--- a/benchmarks/bench_glm.py
+++ b/benchmarks/bench_glm.py
@@ -4,6 +4,7 @@
 Data comes from a random square matrix.
 
 """
+
 from datetime import datetime
 
 import numpy as np
diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py
index 7b111f95044e2..1aaad99c10587 100644
--- a/benchmarks/bench_glmnet.py
+++ b/benchmarks/bench_glmnet.py
@@ -16,6 +16,7 @@
 
 In both cases, only 10% of the features are informative.
 """
+
 import gc
 from time import time
 
diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py
index 221e6fb12da75..556c452fa3323 100644
--- a/benchmarks/bench_isotonic.py
+++ b/benchmarks/bench_isotonic.py
@@ -10,6 +10,7 @@
 This allows the scaling of the algorithm with the problem size to be
 visualized and understood.
 """
+
 import argparse
 import gc
 from datetime import datetime
diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
index 6551cb74ff86e..26789c173688f 100644
--- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
+++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
@@ -35,6 +35,7 @@
 You can also set `arpack_all=True` to activate arpack solver for large number
 of components (this takes more time).
 """
+
 # Authors: Sylvain MARIE, Schneider Electric
 
 import time
diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
index 26a45ca9f09ca..cae74c6f442ff 100644
--- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
+++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
@@ -37,6 +37,7 @@
 Solvers comparison benchmark: time vs n_components", where this time the number
 of examples is fixed, and the desired number of components varies.
 """
+
 # Author: Sylvain MARIE, Schneider Electric
 
 import time
diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py
index 1c49c6f5cabdf..9bae570505a75 100644
--- a/benchmarks/bench_lasso.py
+++ b/benchmarks/bench_lasso.py
@@ -11,6 +11,7 @@
 
 In both cases, only 10% of the features are informative.
 """
+
 import gc
 from time import time
 
diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py
index c996c9c09520f..3b46e447401cb 100644
--- a/benchmarks/bench_plot_lasso_path.py
+++ b/benchmarks/bench_plot_lasso_path.py
@@ -2,6 +2,7 @@
 
 The input data is mostly low rank but is a fat infinite tail.
 """
+
 import gc
 import sys
 from collections import defaultdict
diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py
index 2d9cf2b08b71d..2cedb19fb23c4 100644
--- a/benchmarks/bench_plot_neighbors.py
+++ b/benchmarks/bench_plot_neighbors.py
@@ -1,6 +1,7 @@
 """
 Plot the scaling of the nearest neighbors algorithms with k, D, and N
 """
+
 from time import time
 
 import matplotlib.pyplot as plt
diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 3484850011c1f..f05ede117191b 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -1,6 +1,7 @@
 """
 Benchmarks of Non-Negative Matrix Factorization
 """
+
 # Authors: Tom Dupre la Tour (benchmark)
 #          Chih-Jen Linn (original projected gradient NMF implementation)
 #          Anthony Di Franco (projected gradient, Python and NumPy port)
@@ -258,8 +259,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
         if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0:
             raise ValueError(
                 "Maximum number of iterations must be a positive "
-                "integer; got (max_iter=%r)"
-                % self.max_iter
+                "integer; got (max_iter=%r)" % self.max_iter
             )
         if not isinstance(self.tol, numbers.Number) or self.tol < 0:
             raise ValueError(
@@ -305,8 +305,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
         if n_iter == self.max_iter and self.tol > 0:
             warnings.warn(
                 "Maximum number of iteration %d reached. Increase it"
-                " to improve convergence."
-                % self.max_iter,
+                " to improve convergence." % self.max_iter,
                 ConvergenceWarning,
             )
 
diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
index ec1bf3281f3a4..8a4bc9b1a34fe 100644
--- a/benchmarks/bench_plot_omp_lars.py
+++ b/benchmarks/bench_plot_omp_lars.py
@@ -3,6 +3,7 @@
 
 The input data is mostly low rank but is a fat infinite tail.
 """
+
 import gc
 import sys
 from time import time
diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py
index 1cd9f70a38f44..a80455e21c255 100644
--- a/benchmarks/bench_plot_polynomial_kernel_approximation.py
+++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py
@@ -38,6 +38,7 @@
 (https://people.cs.rutgers.edu/~farach/pubs/FrequentStream.pdf)
 
 """
+
 # Author: Daniel Lopez-Sanchez <lope@usal.es>
 # License: BSD 3 clause
 
diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py
index abd2c6fe9d4d4..ed99d1c44e2fd 100644
--- a/benchmarks/bench_plot_svd.py
+++ b/benchmarks/bench_plot_svd.py
@@ -2,6 +2,7 @@
 
 The data is mostly low rank but is a fat infinite tail.
 """
+
 import gc
 from collections import defaultdict
 from time import time
diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
index bd8c62ecba484..6551de690994b 100644
--- a/benchmarks/bench_random_projections.py
+++ b/benchmarks/bench_random_projections.py
@@ -6,6 +6,7 @@
 Benchmarks for random projections.
 
 """
+
 import collections
 import gc
 import optparse
diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py
index dc2ed093f11d0..c5b3e7728e2ec 100644
--- a/benchmarks/bench_saga.py
+++ b/benchmarks/bench_saga.py
@@ -3,6 +3,7 @@
 Benchmarks of sklearn SAGA vs lightning SAGA vs Liblinear. Shows the gain
 in using multinomial logistic regression in term of learning time.
 """
+
 import json
 import os
 import time
@@ -118,9 +119,7 @@ def fit_single(
                 # Lightning predict_proba is not implemented for n_classes > 2
                 y_pred = _predict_proba(lr, X)
             score = log_loss(y, y_pred, normalize=False) / n_samples
-            score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum(
-                np.abs(lr.coef_)
-            )
+            score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum(np.abs(lr.coef_))
             scores.append(score)
         train_score, test_score = tuple(scores)
 
diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
index 743292ca5fa61..39cf1a11ffed6 100644
--- a/benchmarks/bench_sample_without_replacement.py
+++ b/benchmarks/bench_sample_without_replacement.py
@@ -2,6 +2,7 @@
 Benchmarks for sampling without replacement of integer.
 
 """
+
 import gc
 import operator
 import optparse
diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py
index 31d4141d1af97..2eab7071544f9 100644
--- a/benchmarks/bench_text_vectorizers.py
+++ b/benchmarks/bench_text_vectorizers.py
@@ -8,6 +8,7 @@
  * psutil (optional, but recommended)
 
 """
+
 import itertools
 import timeit
 
diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py
index 29cd7584432b7..c522bcb39e994 100644
--- a/benchmarks/bench_tree.py
+++ b/benchmarks/bench_tree.py
@@ -13,6 +13,7 @@
 training set, classify a sample and plot the time taken as a function
 of the number of dimensions.
 """
+
 import gc
 from datetime import datetime
 
diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py
index dfd4c4e92f848..813fffcf29141 100644
--- a/benchmarks/bench_tsne_mnist.py
+++ b/benchmarks/bench_tsne_mnist.py
@@ -130,7 +130,8 @@ def sanitize(filename):
         try:
             from bhtsne.bhtsne import run_bh_tsne
         except ImportError as e:
-            raise ImportError("""\
+            raise ImportError(
+                """\
 If you want comparison with the reference implementation, build the
 binary from source (https://github.com/lvdmaaten/bhtsne) in the folder
 benchmarks/bhtsne and add an empty `__init__.py` file in the folder:
@@ -140,7 +141,8 @@ def sanitize(filename):
 $ g++ sptree.cpp tsne.cpp tsne_main.cpp -o bh_tsne -O2
 $ touch __init__.py
 $ cd ..
-""") from e
+"""
+            ) from e
 
         def bhtsne(X):
             """Wrapper for the reference lvdmaaten/bhtsne implementation."""
diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py
index f438927772619..28bb267b6f721 100644
--- a/build_tools/generate_authors_table.py
+++ b/build_tools/generate_authors_table.py
@@ -6,6 +6,7 @@
 The table should be updated for each new inclusion in the teams.
 Generating the table requires admin rights.
 """
+
 import getpass
 import sys
 import time
diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py
index 64c5784e0cd06..466396b640302 100644
--- a/build_tools/get_comment.py
+++ b/build_tools/get_comment.py
@@ -88,8 +88,7 @@ def get_message(log_file, repo, pr_number, sha, run_id, details, versions):
             "https://scikit-learn.org/dev/developers/contributing.html"
             "#how-to-contribute)) and push the changes. If you already have done "
             "that, please send an empty commit with `git commit --allow-empty` "
-            "and push the changes to trigger the CI.\n\n"
-            + sub_text
+            "and push the changes to trigger the CI.\n\n" + sub_text
         )
 
     message = ""
diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py
index 2289709fdc037..5579d86c5ce3e 100644
--- a/build_tools/github/check_wheels.py
+++ b/build_tools/github/check_wheels.py
@@ -1,5 +1,6 @@
 """Checks that dist/* contains the number of wheels built from the
 .github/workflows/wheels.yml config."""
+
 import sys
 from pathlib import Path
 
diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py
index 3bc1aceb3437c..28b44be3c9aa9 100644
--- a/build_tools/github/vendor.py
+++ b/build_tools/github/vendor.py
@@ -1,6 +1,5 @@
 """Embed vcomp140.dll and msvcp140.dll."""
 
-
 import os
 import os.path as op
 import shutil
diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py
index ab0f3e590d560..fd77cfd3c0721 100644
--- a/build_tools/update_environments_and_lock_files.py
+++ b/build_tools/update_environments_and_lock_files.py
@@ -102,7 +102,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/azure",
         "platform": "linux-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies + [
+        "conda_dependencies": common_dependencies
+        + [
             "ccache",
             "pytorch",
             "pytorch-cpu",
@@ -123,7 +124,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/azure",
         "platform": "osx-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies + [
+        "conda_dependencies": common_dependencies
+        + [
             "ccache",
             "compilers",
             "llvm-openmp",
@@ -160,7 +162,8 @@ def remove_from(alist, to_remove):
         "channel": "defaults",
         "conda_dependencies": remove_from(
             common_dependencies, ["pandas", "cython", "pip", "ninja", "meson-python"]
-        ) + ["ccache"],
+        )
+        + ["ccache"],
         "package_constraints": {
             "python": "3.9",
             "blas": "[build=openblas]",
@@ -268,7 +271,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/azure",
         "platform": "win-64",
         "channel": "conda-forge",
-        "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) + [
+        "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"])
+        + [
             "wheel",
             "pip",
         ],
@@ -284,7 +288,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/circle",
         "platform": "linux-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies_without_coverage + [
+        "conda_dependencies": common_dependencies_without_coverage
+        + [
             "scikit-image",
             "seaborn",
             "memory_profiler",
@@ -324,7 +329,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/circle",
         "platform": "linux-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies_without_coverage + [
+        "conda_dependencies": common_dependencies_without_coverage
+        + [
             "scikit-image",
             "seaborn",
             "memory_profiler",
@@ -353,7 +359,8 @@ def remove_from(alist, to_remove):
         "channel": "conda-forge",
         "conda_dependencies": remove_from(
             common_dependencies_without_coverage, ["pandas", "pyamg"]
-        ) + ["pip", "ccache"],
+        )
+        + ["pip", "ccache"],
         "package_constraints": {
             "python": "3.9",
         },
@@ -460,7 +467,8 @@ def get_package_with_constraint(package_name, build_metadata, uses_pip=False):
 
 
 def get_conda_environment_content(build_metadata):
-    template = environment.from_string("""
+    template = environment.from_string(
+        """
 # DO NOT EDIT: this file is generated from the specification found in the
 # following script to centralize the configuration for CI builds:
 # build_tools/update_environments_and_lock_files.py
@@ -476,7 +484,8 @@ def get_conda_environment_content(build_metadata):
   {% for pip_dep in build_metadata.get('pip_dependencies', []) %}
     - {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }}
   {% endfor %}
-  {% endif %}""".strip())
+  {% endif %}""".strip()
+    )
     return template.render(build_metadata=build_metadata)
 
 
@@ -532,13 +541,15 @@ def write_all_conda_lock_files(build_metadata_list):
 
 
 def get_pip_requirements_content(build_metadata):
-    template = environment.from_string("""
+    template = environment.from_string(
+        """
 # DO NOT EDIT: this file is generated from the specification found in the
 # following script to centralize the configuration for CI builds:
 # build_tools/update_environments_and_lock_files.py
 {% for pip_dep in build_metadata['pip_dependencies'] %}
 {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }}
-{% endfor %}""".strip())
+{% endfor %}""".strip()
+    )
     return template.render(build_metadata=build_metadata)
 
 
diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py
index 32e905fe650ea..9f117b07fa6a3 100644
--- a/doc/sphinxext/doi_role.py
+++ b/doc/sphinxext/doi_role.py
@@ -1,17 +1,17 @@
 """
-    doilinks
-    ~~~~~~~~
-    Extension to add links to DOIs. With this extension you can use e.g.
-    :doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will
-    create a link to a DOI resolver
-    (``https://doi.org/10.1016/S0022-2836(05)80360-2``).
-    The link caption will be the raw DOI.
-    You can also give an explicit caption, e.g.
-    :doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`.
-
-    :copyright: Copyright 2015  Jon Lund Steffensen. Based on extlinks by
-        the Sphinx team.
-    :license: BSD.
+doilinks
+~~~~~~~~
+Extension to add links to DOIs. With this extension you can use e.g.
+:doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will
+create a link to a DOI resolver
+(``https://doi.org/10.1016/S0022-2836(05)80360-2``).
+The link caption will be the raw DOI.
+You can also give an explicit caption, e.g.
+:doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`.
+
+:copyright: Copyright 2015  Jon Lund Steffensen. Based on extlinks by
+    the Sphinx team.
+:license: BSD.
 """
 
 from docutils import nodes, utils
diff --git a/doc/sphinxext/sphinx_issues.py b/doc/sphinxext/sphinx_issues.py
index 5cd532319cbd7..206359a1bd703 100644
--- a/doc/sphinxext/sphinx_issues.py
+++ b/doc/sphinxext/sphinx_issues.py
@@ -18,6 +18,7 @@
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
+
 import re
 
 from docutils import nodes, utils
diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py
index 1ff4399d60739..97a67fad52776 100644
--- a/examples/applications/plot_face_recognition.py
+++ b/examples/applications/plot_face_recognition.py
@@ -11,6 +11,7 @@
 .. _LFW: http://vis-www.cs.umass.edu/lfw/
 
 """
+
 # %%
 from time import time
 
diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py
index f928ae631b78b..91dca761d1fe3 100644
--- a/examples/calibration/plot_calibration.py
+++ b/examples/calibration/plot_calibration.py
@@ -22,6 +22,7 @@
 Brier score.
 
 """
+
 # Authors:
 # Mathieu Blondel <mathieu@mblondel.org>
 # Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py
index 5816ae298f419..e286104636d67 100644
--- a/examples/cluster/plot_affinity_propagation.py
+++ b/examples/cluster/plot_affinity_propagation.py
@@ -8,6 +8,7 @@
 Between Data Points", Science Feb. 2007
 
 """
+
 import numpy as np
 
 from sklearn import metrics
diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py
index 3aebdffddaf63..a562ebbc96ba5 100644
--- a/examples/cluster/plot_bisect_kmeans.py
+++ b/examples/cluster/plot_bisect_kmeans.py
@@ -13,6 +13,7 @@
 present for regular K-Means.
 
 """
+
 import matplotlib.pyplot as plt
 
 from sklearn.cluster import BisectingKMeans, KMeans
diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index df9af8ea330ba..04baa0fd98bc0 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -15,7 +15,6 @@
 trade-off.
 """
 
-
 # %%
 # Generate sample data
 # --------------------
diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py
index de6f92bad9dfe..d492de07fec87 100644
--- a/examples/ensemble/plot_feature_transformation.py
+++ b/examples/ensemble/plot_feature_transformation.py
@@ -20,7 +20,6 @@
 
 """
 
-
 # Author: Tim Head <betatim@gmail.com>
 #
 # License: BSD 3 clause
diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
index 1eaba2e852f28..6c239e97d66ee 100644
--- a/examples/ensemble/plot_gradient_boosting_early_stopping.py
+++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -31,6 +31,7 @@
 License: BSD 3 clause
 
 """
+
 # %%
 # Data Preparation
 # ----------------
diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py
index 15ad8e9524243..dcd5f05af626c 100644
--- a/examples/ensemble/plot_monotonic_constraints.py
+++ b/examples/ensemble/plot_monotonic_constraints.py
@@ -19,6 +19,7 @@
 <https://xgboost.readthedocs.io/en/latest/tutorials/monotonic.html>`_.
 
 """
+
 # %%
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py
index 715e6129cdef8..70dda86fabd60 100644
--- a/examples/linear_model/plot_quantile_regression.py
+++ b/examples/linear_model/plot_quantile_regression.py
@@ -261,14 +261,16 @@
 y_pred_lr = linear_regression.fit(X, y_pareto).predict(X)
 y_pred_qr = quantile_regression.fit(X, y_pareto).predict(X)
 
-print(f"""Training error (in-sample performance)
+print(
+    f"""Training error (in-sample performance)
     {linear_regression.__class__.__name__}:
     MAE = {mean_absolute_error(y_pareto, y_pred_lr):.3f}
     MSE = {mean_squared_error(y_pareto, y_pred_lr):.3f}
     {quantile_regression.__class__.__name__}:
     MAE = {mean_absolute_error(y_pareto, y_pred_qr):.3f}
     MSE = {mean_squared_error(y_pareto, y_pred_qr):.3f}
-    """)
+    """
+)
 
 # %%
 # On the training set, we see that MAE is lower for
@@ -298,14 +300,16 @@
     cv=3,
     scoring=["neg_mean_absolute_error", "neg_mean_squared_error"],
 )
-print(f"""Test error (cross-validated performance)
+print(
+    f"""Test error (cross-validated performance)
     {linear_regression.__class__.__name__}:
     MAE = {-cv_results_lr["test_neg_mean_absolute_error"].mean():.3f}
     MSE = {-cv_results_lr["test_neg_mean_squared_error"].mean():.3f}
     {quantile_regression.__class__.__name__}:
     MAE = {-cv_results_qr["test_neg_mean_absolute_error"].mean():.3f}
     MSE = {-cv_results_qr["test_neg_mean_squared_error"].mean():.3f}
-    """)
+    """
+)
 
 # %%
 # We reach similar conclusions on the out-of-sample evaluation.
diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py
index fe17d9f80030f..65df88588efef 100644
--- a/examples/manifold/plot_swissroll.py
+++ b/examples/manifold/plot_swissroll.py
@@ -8,6 +8,7 @@
 Then, we will explore how they both deal with the addition of a hole
 in the data.
 """
+
 # %%
 # Swiss Roll
 # ---------------------------------------------------
diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py
index 6d2288936179a..b865778156c3c 100644
--- a/examples/miscellaneous/plot_kernel_ridge_regression.py
+++ b/examples/miscellaneous/plot_kernel_ridge_regression.py
@@ -17,6 +17,7 @@
 datapoint.
 
 """
+
 # %%
 # Authors: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # License: BSD 3 clause
diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py
index 9984bb6183348..9cad255b763af 100644
--- a/examples/miscellaneous/plot_metadata_routing.py
+++ b/examples/miscellaneous/plot_metadata_routing.py
@@ -20,6 +20,7 @@
 
 First a few imports and some random data for the rest of the script.
 """
+
 # %%
 
 import warnings
diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py
index aa0266c98ff7a..410a843cf78db 100644
--- a/examples/mixture/plot_gmm_init.py
+++ b/examples/mixture/plot_gmm_init.py
@@ -33,7 +33,6 @@
 time to initialize and low number of GaussianMixture iterations to converge.
 """
 
-
 # Author: Gordon Walsh <gordon.p.walsh@gmail.com>
 # Data generation code from Jake Vanderplas <vanderplas@astro.washington.edu>
 
diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
index 58c7f6e42f408..19bcb13c5a99b 100644
--- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py
+++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
@@ -11,7 +11,6 @@
 
 """
 
-
 import numpy as np
 
 from sklearn.datasets import fetch_20newsgroups
diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py
index b3d834da5d067..4c54a4119ced3 100644
--- a/examples/tree/plot_iris_dtc.py
+++ b/examples/tree/plot_iris_dtc.py
@@ -14,6 +14,7 @@
 
 We also show the tree structure of a model built on all of the features.
 """
+
 # %%
 # First load the copy of the Iris dataset shipped with scikit-learn:
 from sklearn.datasets import load_iris
diff --git a/maint_tools/check_pxd_in_installation.py b/maint_tools/check_pxd_in_installation.py
index 996d45d64d42a..380edbd6350b6 100644
--- a/maint_tools/check_pxd_in_installation.py
+++ b/maint_tools/check_pxd_in_installation.py
@@ -36,7 +36,9 @@
     # We set the language to c++ and we use numpy.get_include() because
     # some modules require it.
     with open(tmpdir / "setup_tst.py", "w") as f:
-        f.write(textwrap.dedent("""
+        f.write(
+            textwrap.dedent(
+                """
             from setuptools import setup, Extension
             from Cython.Build import cythonize
             import numpy
@@ -47,7 +49,9 @@
                                     include_dirs=[numpy.get_include()])]
 
             setup(ext_modules=cythonize(extensions))
-            """))
+            """
+            )
+        )
 
     subprocess.run(
         ["python", "setup_tst.py", "build_ext", "-i"], check=True, cwd=tmpdir
diff --git a/sklearn/__check_build/__init__.py b/sklearn/__check_build/__init__.py
index 3895a0e430082..ad1a3a818b14d 100644
--- a/sklearn/__check_build/__init__.py
+++ b/sklearn/__check_build/__init__.py
@@ -1,6 +1,7 @@
-""" Module to give helpful messages to the user that did not
+"""Module to give helpful messages to the user that did not
 compile scikit-learn properly.
 """
+
 import os
 
 INPLACE_MSG = """
@@ -28,7 +29,8 @@ def raise_build_error(e):
             dir_content.append(filename.ljust(26))
         else:
             dir_content.append(filename + "\n")
-    raise ImportError("""%s
+    raise ImportError(
+        """%s
 ___________________________________________________________________________
 Contents of %s:
 %s
@@ -38,7 +40,9 @@ def raise_build_error(e):
 If you have installed scikit-learn from source, please do not forget
 to build the package before using it: run `python setup.py install` or
 `make` in the source directory.
-%s""" % (e, local_dir, "".join(dir_content).strip(), msg))
+%s"""
+        % (e, local_dir, "".join(dir_content).strip(), msg)
+    )
 
 
 try:
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
index a8ced8aa9d292..ceb72441000c3 100644
--- a/sklearn/_build_utils/__init__.py
+++ b/sklearn/_build_utils/__init__.py
@@ -1,6 +1,7 @@
 """
 Utilities useful during the build.
 """
+
 # author: Andy Mueller, Gael Varoquaux
 # license: BSD
 
diff --git a/sklearn/_build_utils/openmp_helpers.py b/sklearn/_build_utils/openmp_helpers.py
index 9172d40830bb9..ed9bf0ea3eea0 100644
--- a/sklearn/_build_utils/openmp_helpers.py
+++ b/sklearn/_build_utils/openmp_helpers.py
@@ -38,7 +38,8 @@ def check_openmp_support():
         # Pyodide doesn't support OpenMP
         return False
 
-    code = textwrap.dedent("""\
+    code = textwrap.dedent(
+        """\
         #include <omp.h>
         #include <stdio.h>
         int main(void) {
@@ -46,7 +47,8 @@ def check_openmp_support():
         printf("nthreads=%d\\n", omp_get_num_threads());
         return 0;
         }
-        """)
+        """
+    )
 
     extra_preargs = os.getenv("LDFLAGS", None)
     if extra_preargs is not None:
@@ -94,7 +96,8 @@ def check_openmp_support():
                 "Failed to build scikit-learn with OpenMP support"
             ) from openmp_exception
         else:
-            message = textwrap.dedent("""
+            message = textwrap.dedent(
+                """
 
                                 ***********
                                 * WARNING *
@@ -117,7 +120,8 @@ def check_openmp_support():
                   parallelism.
 
                                     ***
-                """)
+                """
+            )
             warnings.warn(message)
 
     return openmp_supported
diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py
index f3eb054bb037e..b73fa8658739f 100644
--- a/sklearn/_build_utils/pre_build_helpers.py
+++ b/sklearn/_build_utils/pre_build_helpers.py
@@ -64,10 +64,12 @@ def basic_check_build():
         # The following check won't work in pyodide
         return
 
-    code = textwrap.dedent("""\
+    code = textwrap.dedent(
+        """\
         #include <stdio.h>
         int main(void) {
         return 0;
         }
-        """)
+        """
+    )
     compile_test_program(code)
diff --git a/sklearn/_build_utils/version.py b/sklearn/_build_utils/version.py
index 1f8688a008e9d..49a3cfb82bebd 100644
--- a/sklearn/_build_utils/version.py
+++ b/sklearn/_build_utils/version.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
-""" Extract version number from __init__.py
-"""
+"""Extract version number from __init__.py"""
 
 import os
 
diff --git a/sklearn/_config.py b/sklearn/_config.py
index d4ccaca0a98f7..fc9392de68df6 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -1,5 +1,5 @@
-"""Global configuration state and functions for management
-"""
+"""Global configuration state and functions for management"""
+
 import os
 import threading
 from contextlib import contextmanager as contextmanager
diff --git a/sklearn/_distributor_init.py b/sklearn/_distributor_init.py
index a0142ac80878f..f0901034e83e4 100644
--- a/sklearn/_distributor_init.py
+++ b/sklearn/_distributor_init.py
@@ -1,4 +1,4 @@
-""" Distributor init file
+"""Distributor init file
 
 Distributors: you can add custom code here to support particular distributions
 of scikit-learn.
diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py
index 9459844f6b89a..a6560d58d91e6 100644
--- a/sklearn/_loss/link.py
+++ b/sklearn/_loss/link.py
@@ -1,6 +1,7 @@
 """
 Module contains classes for invertible (and differentiable) link functions.
 """
+
 # Author: Christian Lorentzen <lorentzen.ch@gmail.com>
 
 from abc import ABC, abstractmethod
diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py
index a3b205ed10687..96863cc00fe01 100644
--- a/sklearn/_loss/loss.py
+++ b/sklearn/_loss/loss.py
@@ -5,6 +5,7 @@
 Specific losses are used for regression, binary classification or multiclass
 classification.
 """
+
 # Goals:
 # - Provide a common private module for loss functions/classes.
 # - To be used in:
diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py
index a7b9c48466a5d..b015a375b2bb0 100644
--- a/sklearn/_min_dependencies.py
+++ b/sklearn/_min_dependencies.py
@@ -1,4 +1,5 @@
 """All minimum dependencies for scikit-learn."""
+
 import argparse
 from collections import defaultdict
 
diff --git a/sklearn/base.py b/sklearn/base.py
index e73ae4c8a180e..d6014332f7cc0 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -1353,9 +1353,8 @@ class _UnstableArchMixin:
 
     def _more_tags(self):
         return {
-            "non_deterministic": _IS_32BIT or platform.machine().startswith(
-                ("ppc", "powerpc")
-            )
+            "non_deterministic": _IS_32BIT
+            or platform.machine().startswith(("ppc", "powerpc"))
         }
 
 
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 2da9d8c5a0f43..fcecacc9ca57c 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -7,6 +7,7 @@
           Gael Varoquaux
 License: BSD 3 clause
 """
+
 import warnings
 from heapq import heapify, heappop, heappush, heappushpop
 from numbers import Integral, Real
diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py
index 18c98ad5348b5..b22f6a369fcc1 100644
--- a/sklearn/cluster/_bicluster.py
+++ b/sklearn/cluster/_bicluster.py
@@ -1,4 +1,5 @@
 """Spectral biclustering algorithms."""
+
 # Authors : Kemal Eren
 # License: BSD 3 clause
 
diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py
index a1f7716ced822..1d4a9e1d84c26 100644
--- a/sklearn/cluster/_bisect_k_means.py
+++ b/sklearn/cluster/_bisect_k_means.py
@@ -1,4 +1,5 @@
 """Bisecting K-means clustering."""
+
 # Author: Michal Krawczyk <mkrwczyk.1@gmail.com>
 
 import warnings
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index f84f18c1c18b3..218db48ad2331 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -2,6 +2,7 @@
 Feature agglomeration. Base classes and functions for performing feature
 agglomeration.
 """
+
 # Author: V. Michel, A. Gramfort
 # License: BSD 3 clause
 
diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
index 380448f1f8589..e77baaf4b1146 100644
--- a/sklearn/cluster/_hdbscan/hdbscan.py
+++ b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -2,6 +2,7 @@
 HDBSCAN: Hierarchical Density-Based Spatial Clustering
          of Applications with Noise
 """
+
 # Authors: Leland McInnes <leland.mcinnes@gmail.com>
 #          Steve Astels <sastels@gmail.com>
 #          John Healy <jchealy@gmail.com>
diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index d323a6b8afd03..91606056c17aa 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -793,7 +793,8 @@ def fit_predict(self, X, y=None):
 
     def _more_tags(self):
         return {
-            "pairwise": self.affinity in [
+            "pairwise": self.affinity
+            in [
                 "precomputed",
                 "precomputed_nearest_neighbors",
             ]
diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py
index 121e8f2cfe400..abeb81dca50aa 100644
--- a/sklearn/cluster/tests/test_feature_agglomeration.py
+++ b/sklearn/cluster/tests/test_feature_agglomeration.py
@@ -1,6 +1,7 @@
 """
 Tests for sklearn.cluster._feature_agglomeration
 """
+
 # Authors: Sergul Aydore 2017
 import warnings
 
diff --git a/sklearn/cluster/tests/test_hdbscan.py b/sklearn/cluster/tests/test_hdbscan.py
index 6db2d4387de18..d586d203747c2 100644
--- a/sklearn/cluster/tests/test_hdbscan.py
+++ b/sklearn/cluster/tests/test_hdbscan.py
@@ -2,6 +2,7 @@
 Tests for HDBSCAN clustering algorithm
 Based on the DBSCAN test code
 """
+
 import numpy as np
 import pytest
 from scipy import stats
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 3c99dd50ea85f..0a139bf3c4571 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -2,6 +2,7 @@
 Several basic tests for hierarchical clustering procedures
 
 """
+
 # Authors: Vincent Michel, 2010, Gael Varoquaux 2012,
 #          Matteo Visconti di Oleggio Castello 2014
 # License: BSD 3 clause
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 4a112a30b29ed..1f2f8c390c909 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -1,4 +1,5 @@
 """Testing for K-means"""
+
 import re
 import sys
 from io import StringIO
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 682df64044bf9..689a159851f50 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -1,4 +1,5 @@
 """Testing for Spectral Clustering methods"""
+
 import pickle
 import re
 
diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py
index c90e855ca6768..980bf964e6dfa 100644
--- a/sklearn/covariance/_robust_covariance.py
+++ b/sklearn/covariance/_robust_covariance.py
@@ -4,6 +4,7 @@
 Here are implemented estimators that are resistant to outliers.
 
 """
+
 # Author: Virgile Fritsch <virgile.fritsch@inria.fr>
 #
 # License: BSD 3 clause
diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py
index a7d251a5bbdfe..c0e2deb20de16 100644
--- a/sklearn/covariance/tests/test_graphical_lasso.py
+++ b/sklearn/covariance/tests/test_graphical_lasso.py
@@ -1,5 +1,5 @@
-""" Test the graphical_lasso module.
-"""
+"""Test the graphical_lasso module."""
+
 import sys
 from io import StringIO
 
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index 7ae7902f3365c..6f61e027dceaa 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -3,6 +3,7 @@
 including methods to load and fetch popular reference datasets. It also
 features some artificial data generators.
 """
+
 import textwrap
 
 from ._base import (
@@ -106,7 +107,8 @@
 
 def __getattr__(name):
     if name == "load_boston":
-        msg = textwrap.dedent("""
+        msg = textwrap.dedent(
+            """
             `load_boston` has been removed from scikit-learn since version 1.2.
 
             The Boston housing prices dataset has an ethical problem: as
@@ -153,7 +155,8 @@ def __getattr__(name):
             "Hedonic housing prices and the demand for clean air."
             Journal of environmental economics and management 5.1 (1978): 81-102.
             <https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>
-            """)
+            """
+        )
         raise ImportError(msg)
     try:
         return globals()[name]
diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py
index 5c427441012d6..86dfeb37a6ef5 100644
--- a/sklearn/datasets/_arff_parser.py
+++ b/sklearn/datasets/_arff_parser.py
@@ -1,4 +1,5 @@
 """Implementation of ARFF parsers: via LIAC-ARFF and pandas."""
+
 import itertools
 import re
 from collections import OrderedDict
diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py
index e94996ccdec65..a1e4b911f1bef 100644
--- a/sklearn/datasets/_california_housing.py
+++ b/sklearn/datasets/_california_housing.py
@@ -18,6 +18,7 @@
 Statistics and Probability Letters, 33 (1997) 291-297.
 
 """
+
 # Authors: Peter Prettenhofer
 # License: BSD 3 clause
 
diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py
index 396e4af9389e6..224978bd70770 100644
--- a/sklearn/datasets/_samples_generator.py
+++ b/sklearn/datasets/_samples_generator.py
@@ -221,9 +221,7 @@ def make_classification(
         msg = "n_classes({}) * n_clusters_per_class({}) must be"
         msg += " smaller or equal 2**n_informative({})={}"
         raise ValueError(
-            msg.format(
-                n_classes, n_clusters_per_class, n_informative, 2**n_informative
-            )
+            msg.format(n_classes, n_clusters_per_class, n_informative, 2**n_informative)
         )
 
     if weights is not None:
diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index 4072d9c8ec67f..84e7c91d3176f 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -1,6 +1,7 @@
 """Test the 20news downloader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
+
 from functools import partial
 from unittest.mock import patch
 
diff --git a/sklearn/datasets/tests/test_arff_parser.py b/sklearn/datasets/tests/test_arff_parser.py
index b675439cd2e9d..c4f9e3eb00ffd 100644
--- a/sklearn/datasets/tests/test_arff_parser.py
+++ b/sklearn/datasets/tests/test_arff_parser.py
@@ -83,7 +83,9 @@ def test_pandas_arff_parser_strip_single_quotes(parser_func):
     """Check that we properly strip single quotes from the data."""
     pd = pytest.importorskip("pandas")
 
-    arff_file = BytesIO(textwrap.dedent("""
+    arff_file = BytesIO(
+        textwrap.dedent(
+            """
             @relation 'toy'
             @attribute 'cat_single_quote' {'A', 'B', 'C'}
             @attribute 'str_single_quote' string
@@ -91,7 +93,9 @@ def test_pandas_arff_parser_strip_single_quotes(parser_func):
             @attribute 'class' numeric
             @data
             'A','some text','\"expect double quotes\"',0
-            """).encode("utf-8"))
+            """
+        ).encode("utf-8")
+    )
 
     columns_info = {
         "cat_single_quote": {
@@ -150,7 +154,9 @@ def test_pandas_arff_parser_strip_double_quotes(parser_func):
     """Check that we properly strip double quotes from the data."""
     pd = pytest.importorskip("pandas")
 
-    arff_file = BytesIO(textwrap.dedent("""
+    arff_file = BytesIO(
+        textwrap.dedent(
+            """
             @relation 'toy'
             @attribute 'cat_double_quote' {"A", "B", "C"}
             @attribute 'str_double_quote' string
@@ -158,7 +164,9 @@ def test_pandas_arff_parser_strip_double_quotes(parser_func):
             @attribute 'class' numeric
             @data
             "A","some text","\'expect double quotes\'",0
-            """).encode("utf-8"))
+            """
+        ).encode("utf-8")
+    )
 
     columns_info = {
         "cat_double_quote": {
@@ -217,7 +225,9 @@ def test_pandas_arff_parser_strip_no_quotes(parser_func):
     """Check that we properly parse with no quotes characters."""
     pd = pytest.importorskip("pandas")
 
-    arff_file = BytesIO(textwrap.dedent("""
+    arff_file = BytesIO(
+        textwrap.dedent(
+            """
             @relation 'toy'
             @attribute 'cat_without_quote' {A, B, C}
             @attribute 'str_without_quote' string
@@ -225,7 +235,9 @@ def test_pandas_arff_parser_strip_no_quotes(parser_func):
             @attribute 'class' numeric
             @data
             A,some text,'internal' quote,0
-            """).encode("utf-8"))
+            """
+        ).encode("utf-8")
+    )
 
     columns_info = {
         "cat_without_quote": {
diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index ef6fc95db80bf..b24fb5bd66a56 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -1,6 +1,7 @@
 """Test the california_housing loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
+
 from functools import partial
 
 import pytest
diff --git a/sklearn/datasets/tests/test_common.py b/sklearn/datasets/tests/test_common.py
index 8048a31041ddc..5bed37837718b 100644
--- a/sklearn/datasets/tests/test_common.py
+++ b/sklearn/datasets/tests/test_common.py
@@ -1,4 +1,5 @@
 """Test loaders for common functionality."""
+
 import inspect
 import os
 
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index e44fdaae69ec3..018505bc4fa05 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -1,6 +1,7 @@
 """Test the covtype loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
+
 from functools import partial
 
 import pytest
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index e48e361909603..70bb33e22adb7 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -1,4 +1,5 @@
 """Test the openml loader."""
+
 import gzip
 import json
 import os
@@ -1457,8 +1458,7 @@ def _mock_urlopen_raise(request, *args, **kwargs):
         raise ValueError(
             "This mechanism intends to test correct cache"
             "handling. As such, urlopen should never be "
-            "accessed. URL: %s"
-            % request.get_full_url()
+            "accessed. URL: %s" % request.get_full_url()
         )
 
     data_id = 61
diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index 1f9cfe07dc0e8..3d33938a755a7 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -4,7 +4,6 @@
 this module can be regarded as dimensionality reduction techniques.
 """
 
-
 from ..utils.extmath import randomized_svd
 from ._dict_learning import (
     DictionaryLearning,
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 177d6960033da..267e1cbfe756b 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -1,5 +1,5 @@
-""" Dictionary learning.
-"""
+"""Dictionary learning."""
+
 # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
 # License: BSD 3 clause
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index db46540e26708..75266c5f64b2b 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1,5 +1,5 @@
-""" Non-negative matrix factorization.
-"""
+"""Non-negative matrix factorization."""
+
 # Author: Vlad Niculae
 #         Lars Buitinck
 #         Mathieu Blondel <mathieu@mblondel.org>
@@ -1769,8 +1769,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
         if n_iter == self.max_iter and self.tol > 0:
             warnings.warn(
                 "Maximum number of iterations %d reached. Increase "
-                "it to improve convergence."
-                % self.max_iter,
+                "it to improve convergence." % self.max_iter,
                 ConvergenceWarning,
             )
 
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index abd2fda2d5d2f..4c49337e88093 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -1,5 +1,4 @@
-""" Principal Component Analysis.
-"""
+"""Principal Component Analysis."""
 
 # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #         Olivier Grisel <olivier.grisel@ensta.org>
diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py
index b14df8c5f4d22..fa711ce8c0703 100644
--- a/sklearn/decomposition/_sparse_pca.py
+++ b/sklearn/decomposition/_sparse_pca.py
@@ -1,4 +1,5 @@
 """Matrix factorization with Sparse PCA."""
+
 # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
 # License: BSD 3 clause
 
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index 725683e8d46c6..d238f35cb2167 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -1,5 +1,4 @@
-"""Truncated SVD for sparse matrices, aka latent semantic analysis (LSA).
-"""
+"""Truncated SVD for sparse matrices, aka latent semantic analysis (LSA)."""
 
 # Author: Lars Buitinck
 #         Olivier Grisel <olivier.grisel@ensta.org>
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index 6a376b01ecb19..bd7a35bb8a96f 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -1,6 +1,7 @@
 """
 Test the fastica algorithm.
 """
+
 import itertools
 import os
 import warnings
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 5d7c8aa03f174..646aad2db795d 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -1,4 +1,5 @@
 """Tests for Incremental PCA."""
+
 import warnings
 
 import numpy as np
diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py
index f4a3756bdaf1d..8ddf05084f1be 100644
--- a/sklearn/ensemble/__init__.py
+++ b/sklearn/ensemble/__init__.py
@@ -2,6 +2,7 @@
 The :mod:`sklearn.ensemble` module includes ensemble-based methods for
 classification, regression and anomaly detection.
 """
+
 from ._bagging import BaggingClassifier, BaggingRegressor
 from ._base import BaseEnsemble
 from ._forest import (
diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index b5ee64b6e708c..6e5a7e47b0c10 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1198,8 +1198,7 @@ def _validate_y_class_weight(self, y, classes=None):
                     raise ValueError(
                         "Valid presets for class_weight include "
                         '"balanced" and "balanced_subsample".'
-                        'Given "%s".'
-                        % self.class_weight
+                        'Given "%s".' % self.class_weight
                     )
                 if self.warm_start:
                     warn(
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 49575cefa5090..bd11e373d3915 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -741,8 +741,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                         if (
                             "pass parameters to specific steps of "
                             "your pipeline using the "
-                            "stepname__parameter"
-                            in str(e)
+                            "stepname__parameter" in str(e)
                         ):  # pipeline
                             raise ValueError(msg) from e
                         else:  # regular estimator whose input checking failed
@@ -1060,8 +1059,7 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
             warnings.warn(
                 "Using recursion method with a non-constant init predictor "
                 "will lead to incorrect partial dependence values. "
-                "Got init=%s."
-                % self.init,
+                "Got init=%s." % self.init,
                 UserWarning,
             )
         grid = np.asarray(grid, dtype=DTYPE, order="C")
diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py
index 98d01ea5cb9f2..d23f6e7b00a82 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/binning.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py
@@ -5,6 +5,7 @@
 Bin thresholds are computed with the quantiles so that each bin contains
 approximately the same number of samples.
 """
+
 # Author: Nicolas Hug
 
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 15f92cd324768..c9b1b56bc7999 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -4,6 +4,7 @@
 TreeGrower builds a regression tree fitting a Newton-Raphson step, based on
 the gradients and hessians of the training data.
 """
+
 # Author: Nicolas Hug
 
 import numbers
diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
index b939712d18893..799c25aadcec3 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
@@ -1,6 +1,7 @@
 """
 This module contains the TreePredictor class which is used for prediction.
 """
+
 # Author: Nicolas Hug
 
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.py b/sklearn/ensemble/_hist_gradient_boosting/utils.py
index 12f49b6cdce50..1ff17217164c8 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/utils.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/utils.py
@@ -1,4 +1,5 @@
 """This module contains utility routines."""
+
 from ...base import is_classifier
 from .binning import _BinMapper
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 4bfbf7c2ff6ee..f13f5983d1f4b 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1,6 +1,7 @@
 """
 Testing for the gradient boosting module (sklearn.ensemble.gradient_boosting).
 """
+
 import re
 import warnings
 
diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py
index d287400c7999f..6fa4512ce39c6 100644
--- a/sklearn/experimental/enable_hist_gradient_boosting.py
+++ b/sklearn/experimental/enable_hist_gradient_boosting.py
@@ -6,6 +6,7 @@
 :term:`experimental`, but these estimators are now stable and can be imported
 normally from `sklearn.ensemble`.
 """
+
 # Don't remove this file, we don't want to break users code just because the
 # feature isn't experimental anymore.
 
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index ea6686ef45eaa..d50c489e6b852 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -409,8 +409,7 @@ def _check_stop_words_consistency(self, stop_words, preprocess, tokenize):
                     "Your stop_words may be inconsistent with "
                     "your preprocessing. Tokenizing the stop "
                     "words generated tokens %r not in "
-                    "stop_words."
-                    % sorted(inconsistent)
+                    "stop_words." % sorted(inconsistent)
                 )
             return not inconsistent
         except Exception:
@@ -516,8 +515,7 @@ def _validate_ngram_range(self):
         if min_n > max_m:
             raise ValueError(
                 "Invalid value for ngram_range=%s "
-                "lower boundary larger than the upper boundary."
-                % str(self.ngram_range)
+                "lower boundary larger than the upper boundary." % str(self.ngram_range)
             )
 
     def _warn_for_unused_params(self):
diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py
index 5a90d46c9758b..9c393724f9cea 100644
--- a/sklearn/feature_selection/_sequential.py
+++ b/sklearn/feature_selection/_sequential.py
@@ -1,6 +1,7 @@
 """
 Sequential feature selection
 """
+
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 3815a88c374e8..d7bffec5159bf 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -1,6 +1,7 @@
 """
 Todo: cross-check the F-value with stats model
 """
+
 import itertools
 import warnings
 
diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py
index d3723016be127..67bba2e29c857 100644
--- a/sklearn/gaussian_process/_gpr.py
+++ b/sklearn/gaussian_process/_gpr.py
@@ -456,9 +456,7 @@ def predict(self, X, return_std=False, return_cov=False):
                 y_cov = self.kernel_(X) - V.T @ V
 
                 # undo normalisation
-                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(
-                    *y_cov.shape, -1
-                )
+                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(*y_cov.shape, -1)
                 # if y_cov has shape (n_samples, n_samples, 1), reshape to
                 # (n_samples, n_samples)
                 if y_cov.shape[2] == 1:
@@ -483,9 +481,7 @@ def predict(self, X, return_std=False, return_cov=False):
                     y_var[y_var_negative] = 0.0
 
                 # undo normalisation
-                y_var = np.outer(y_var, self._y_train_std**2).reshape(
-                    *y_var.shape, -1
-                )
+                y_var = np.outer(y_var, self._y_train_std**2).reshape(*y_var.shape, -1)
 
                 # if y_var has shape (n_samples, 1), reshape to (n_samples,)
                 if y_var.shape[1] == 1:
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 3b995c48b1f71..c31335696944c 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -1750,9 +1750,7 @@ def __call__(self, X, Y=None, eval_gradient=False):
 
             # We need to recompute the pairwise dimension-wise distances
             if self.anisotropic:
-                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (
-                    length_scale**2
-                )
+                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (length_scale**2)
             else:
                 D = squareform(dists**2)[:, :, np.newaxis]
 
diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py
index 842159f13ac04..bd8bd39e1cc01 100644
--- a/sklearn/gaussian_process/tests/test_gpc.py
+++ b/sklearn/gaussian_process/tests/test_gpc.py
@@ -1,4 +1,4 @@
-"""Testing for Gaussian process classification """
+"""Testing for Gaussian process classification"""
 
 # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # License: BSD 3 clause
@@ -218,8 +218,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k1__noise_level is close to the "
             "specified upper bound 0.001. "
@@ -229,8 +228,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k2__length_scale is close to the "
             "specified lower bound 1000.0. "
@@ -250,8 +248,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "length_scale is close to the "
             "specified upper bound 100.0. "
@@ -261,8 +258,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 1 of parameter "
             "length_scale is close to the "
             "specified upper bound 100.0. "
diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
index d890dc05d9f02..e280827926d28 100644
--- a/sklearn/gaussian_process/tests/test_gpr.py
+++ b/sklearn/gaussian_process/tests/test_gpr.py
@@ -1,4 +1,4 @@
-"""Testing for Gaussian process regression """
+"""Testing for Gaussian process regression"""
 
 # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # Modified by: Pete Green <p.l.green@liverpool.ac.uk>
@@ -493,8 +493,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k1__noise_level is close to the "
             "specified upper bound 0.001. "
@@ -504,8 +503,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k2__length_scale is close to the "
             "specified lower bound 1000.0. "
@@ -525,8 +523,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "length_scale is close to the "
             "specified lower bound 10.0. "
@@ -536,8 +533,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 1 of parameter "
             "length_scale is close to the "
             "specified lower bound 10.0. "
diff --git a/sklearn/impute/__init__.py b/sklearn/impute/__init__.py
index e305bc2a657dc..380bcecaf65b5 100644
--- a/sklearn/impute/__init__.py
+++ b/sklearn/impute/__init__.py
@@ -1,4 +1,5 @@
 """Transformers for missing value imputation"""
+
 import typing
 
 from ._base import MissingIndicator, SimpleImputer
diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index af298ae8c380e..04a4dffd10e68 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -701,9 +701,8 @@ def inverse_transform(self, X):
 
     def _more_tags(self):
         return {
-            "allow_nan": is_pandas_na(self.missing_values) or is_scalar_nan(
-                self.missing_values
-            )
+            "allow_nan": is_pandas_na(self.missing_values)
+            or is_scalar_nan(self.missing_values)
         }
 
     def get_feature_names_out(self, input_features=None):
diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py
index f8e08785e8358..f254967f96166 100644
--- a/sklearn/inspection/__init__.py
+++ b/sklearn/inspection/__init__.py
@@ -1,6 +1,5 @@
 """The :mod:`sklearn.inspection` module includes tools for model inspection."""
 
-
 from ._partial_dependence import partial_dependence
 from ._permutation_importance import permutation_importance
 from ._plot.decision_boundary import DecisionBoundaryDisplay
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index b052609a85a2b..3cb4999eb0833 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -1,6 +1,7 @@
 """
 Testing for the partial dependence module.
 """
+
 import warnings
 
 import numpy as np
diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py
index 2869e84c78bf8..8b3ed78cdd368 100644
--- a/sklearn/inspection/tests/test_permutation_importance.py
+++ b/sklearn/inspection/tests/test_permutation_importance.py
@@ -437,9 +437,7 @@ def test_permutation_importance_sample_weight():
     # the second half of the samples approaches to infinity, the ratio of
     # the two features importance should equal to 2 on expectation (when using
     # mean absolutes error as the loss function).
-    w = np.hstack(
-        [np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)]
-    )
+    w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
     lr.fit(x, y, w)
     pi = permutation_importance(
         lr,
diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py
index fa9b431fd2377..0b6adbe44e686 100644
--- a/sklearn/linear_model/_glm/_newton_solver.py
+++ b/sklearn/linear_model/_glm/_newton_solver.py
@@ -502,8 +502,7 @@ def inner_solve(self, X, y, sample_weight):
                 "Further options are to use another solver or to avoid such situation "
                 "in the first place. Possible remedies are removing collinear features"
                 " of X or increasing the penalization strengths.\n"
-                "The original Linear Algebra message was:\n"
-                + str(e),
+                "The original Linear Algebra message was:\n" + str(e),
                 scipy.linalg.LinAlgWarning,
             )
             # Possible causes:
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 5256a5f370272..26f6bdc08d254 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -1107,6 +1107,5 @@ def test_newton_solver_verbosity(capsys, verbose):
     if verbose >= 1:
         assert (
             "The inner solver detected a pointwise Hessian with many negative values"
-            " and resorts to lbfgs instead."
-            in captured.out
+            " and resorts to lbfgs instead." in captured.out
         )
diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
index efea6c6b4c5f9..4e038ecb28da9 100644
--- a/sklearn/linear_model/_least_angle.py
+++ b/sklearn/linear_model/_least_angle.py
@@ -2,6 +2,7 @@
 Least Angle Regression algorithm. See the documentation on the
 Generalized Linear Model for a complete discussion.
 """
+
 # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr>
 #         Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #         Gael Varoquaux
@@ -1737,8 +1738,7 @@ def fit(self, X, y, **params):
         if hasattr(Gram, "__array__"):
             warnings.warn(
                 'Parameter "precompute" cannot be an array in '
-                '%s. Automatically switch to "auto" instead.'
-                % self.__class__.__name__
+                '%s. Automatically switch to "auto" instead.' % self.__class__.__name__
             )
             Gram = "auto"
 
diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py
index 4255706e284f1..e8c1466b30623 100644
--- a/sklearn/linear_model/_linear_loss.py
+++ b/sklearn/linear_model/_linear_loss.py
@@ -1,6 +1,7 @@
 """
 Loss functions for linear models with raw_prediction = X @ coef
 """
+
 import numpy as np
 from scipy import sparse
 
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index 259ce54d3f11e..a8ecc29715886 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -1246,8 +1246,7 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError(
                 "This solver needs samples of at least 2 classes"
                 " in the data, but the data contains only one"
-                " class: %r"
-                % classes_[0]
+                " class: %r" % classes_[0]
             )
 
         if len(self.classes_) == 2:
@@ -1787,8 +1786,7 @@ def fit(self, X, y, sample_weight=None, **params):
             ):
                 raise ValueError(
                     "l1_ratios must be a list of numbers between "
-                    "0 and 1; got (l1_ratios=%r)"
-                    % self.l1_ratios
+                    "0 and 1; got (l1_ratios=%r)" % self.l1_ratios
                 )
             l1_ratios_ = self.l1_ratios
         else:
@@ -1856,8 +1854,7 @@ def fit(self, X, y, sample_weight=None, **params):
             raise ValueError(
                 "This solver needs samples of at least 2 classes"
                 " in the data, but the data contains only one"
-                " class: %r"
-                % classes[0]
+                " class: %r" % classes[0]
             )
 
         if n_classes == 2:
diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py
index efac0508963ba..2d6fe48869742 100644
--- a/sklearn/linear_model/_omp.py
+++ b/sklearn/linear_model/_omp.py
@@ -1,5 +1,4 @@
-"""Orthogonal matching pursuit algorithms
-"""
+"""Orthogonal matching pursuit algorithms"""
 
 # Author: Vlad Niculae
 #
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 67187bbdb5934..e0fad5d8be8b8 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1358,8 +1358,7 @@ def predict_proba(self, X):
             raise NotImplementedError(
                 "predict_(log_)proba only supported when"
                 " loss='log_loss' or loss='modified_huber' "
-                "(%r given)"
-                % self.loss
+                "(%r given)" % self.loss
             )
 
     @available_if(_check_proba)
diff --git a/sklearn/linear_model/tests/test_linear_loss.py b/sklearn/linear_model/tests/test_linear_loss.py
index 659ff134198db..230966db1ceaf 100644
--- a/sklearn/linear_model/tests/test_linear_loss.py
+++ b/sklearn/linear_model/tests/test_linear_loss.py
@@ -4,6 +4,7 @@
 Note that correctness of losses (which compose LinearModelLoss) is already well
 covered in the _loss module.
 """
+
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py
index f1707fad1c950..2e2e262183a17 100644
--- a/sklearn/manifold/_spectral_embedding.py
+++ b/sklearn/manifold/_spectral_embedding.py
@@ -650,7 +650,8 @@ def __init__(
 
     def _more_tags(self):
         return {
-            "pairwise": self.affinity in [
+            "pairwise": self.affinity
+            in [
                 "precomputed",
                 "precomputed_nearest_neighbors",
             ]
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 713c5fe651dbb..8a818c885043c 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -3,7 +3,6 @@
 and pairwise metrics and distance computations.
 """
 
-
 from . import cluster
 from ._classification import (
     accuracy_score,
diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py
index 53ff14b039e0c..c344008755004 100644
--- a/sklearn/metrics/_base.py
+++ b/sklearn/metrics/_base.py
@@ -2,6 +2,7 @@
 Common code for all metrics.
 
 """
+
 # Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #          Mathieu Blondel <mathieu@mblondel.org>
 #          Olivier Grisel <olivier.grisel@ensta.org>
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 999d3795b8dd9..c5290fd39eb7e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -583,8 +583,7 @@ def multilabel_confusion_matrix(
                 raise ValueError(
                     "All labels must be in [0, n labels) for "
                     "multilabel targets. "
-                    "Got %d < 0"
-                    % np.min(labels)
+                    "Got %d < 0" % np.min(labels)
                 )
 
         if n_labels is not None:
diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py
index a332997a84414..44da911061bc8 100644
--- a/sklearn/metrics/cluster/__init__.py
+++ b/sklearn/metrics/cluster/__init__.py
@@ -5,6 +5,7 @@
 - supervised, which uses a ground truth class values for each sample.
 - unsupervised, which does not and measures the 'quality' of the model itself.
 """
+
 from ._bicluster import consensus_score
 from ._supervised import (
     adjusted_mutual_info_score,
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index ec26ef7dcd399..bbebe2cba2197 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2217,8 +2217,7 @@ def test_recall_warnings(zero_division):
         )
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Recall is ill-defined and "
+                str(record.pop().message) == "Recall is ill-defined and "
                 "being set to 0.0 due to no true samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2229,8 +2228,7 @@ def test_recall_warnings(zero_division):
         recall_score([0, 0], [0, 0])
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Recall is ill-defined and "
+                str(record.pop().message) == "Recall is ill-defined and "
                 "being set to 0.0 due to no true samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2249,8 +2247,7 @@ def test_precision_warnings(zero_division):
         )
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Precision is ill-defined and "
+                str(record.pop().message) == "Precision is ill-defined and "
                 "being set to 0.0 due to no predicted samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2261,8 +2258,7 @@ def test_precision_warnings(zero_division):
         precision_score([0, 0], [0, 0])
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Precision is ill-defined and "
+                str(record.pop().message) == "Precision is ill-defined and "
                 "being set to 0.0 due to no predicted samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2307,8 +2303,7 @@ def test_fscore_warnings(zero_division):
             )
             if zero_division == "warn":
                 assert (
-                    str(record.pop().message)
-                    == "F-score is ill-defined and "
+                    str(record.pop().message) == "F-score is ill-defined and "
                     "being set to 0.0 due to no true nor predicted "
                     "samples. Use `zero_division` parameter to "
                     "control this behavior."
diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index e361ce8f61a1c..fda1a83702bbf 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -1,4 +1,5 @@
 """Bayesian Gaussian Mixture Model."""
+
 # Author: Wei Xue <xuewei4d@gmail.com>
 #         Thierry Guillemot <thierry.guillemot.work@gmail.com>
 # License: BSD 3 clause
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 6b546c6bc9441..9b9072f1491a2 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -484,8 +484,7 @@ def score(self, X, y=None, **params):
         if self.scorer_ is None:
             raise ValueError(
                 "No score function explicitly defined, "
-                "and the estimator doesn't provide one %s"
-                % self.best_estimator_
+                "and the estimator doesn't provide one %s" % self.best_estimator_
             )
         if isinstance(self.scorer_, dict):
             if self.multimetric_:
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 2afb9ae6adce7..fa425a5e6a18b 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -1,4 +1,5 @@
 """Test the split module"""
+
 import re
 import warnings
 from itertools import combinations, combinations_with_replacement, permutations
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 22306d88e021f..43916d8cecb2e 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -1,4 +1,5 @@
 """Test the validation module"""
+
 import os
 import re
 import sys
diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py
index e1e8bdbb09d7c..776d462928fbb 100644
--- a/sklearn/neighbors/_base.py
+++ b/sklearn/neighbors/_base.py
@@ -1,4 +1,5 @@
 """Base and mixin classes for nearest neighbors."""
+
 # Authors: Jake Vanderplas <vanderplas@astro.washington.edu>
 #          Fabian Pedregosa <fabian.pedregosa@inria.fr>
 #          Alexandre Gramfort <alexandre.gramfort@inria.fr>
@@ -444,8 +445,7 @@ def _check_algorithm_metric(self):
                 raise ValueError(
                     "kd_tree does not support callable metric '%s'"
                     "Function call overhead will result"
-                    "in very poor performance."
-                    % self.metric
+                    "in very poor performance." % self.metric
                 )
         elif self.metric not in VALID_METRICS[alg_check] and not isinstance(
             self.metric, DistanceMetric
@@ -898,8 +898,7 @@ class from an array representing our data set and ask who's
             if issparse(X):
                 raise ValueError(
                     "%s does not work with sparse matrices. Densify the data, "
-                    "or set algorithm='brute'"
-                    % self._fit_method
+                    "or set algorithm='brute'" % self._fit_method
                 )
             chunked_results = Parallel(n_jobs, prefer="threads")(
                 delayed(_tree_query_parallel_helper)(
@@ -1253,8 +1252,7 @@ class from an array representing our data set and ask who's
             if issparse(X):
                 raise ValueError(
                     "%s does not work with sparse matrices. Densify the data, "
-                    "or set algorithm='brute'"
-                    % self._fit_method
+                    "or set algorithm='brute'" % self._fit_method
                 )
 
             n_jobs = effective_n_jobs(self.n_jobs)
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
index 8885fb4c8c5d0..a9e5fe011150a 100644
--- a/sklearn/neighbors/_kde.py
+++ b/sklearn/neighbors/_kde.py
@@ -2,6 +2,7 @@
 Kernel Density Estimation
 -------------------------
 """
+
 # Author: Jake Vanderplas <jakevdp@cs.washington.edu>
 import itertools
 from numbers import Integral, Real
diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py
index a4ff66786340a..4185bbe15826b 100644
--- a/sklearn/neighbors/_unsupervised.py
+++ b/sklearn/neighbors/_unsupervised.py
@@ -1,4 +1,5 @@
 """Unsupervised nearest neighbors learner"""
+
 from ..base import _fit_context
 from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin
 
diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py
index ee548d8017810..09c2501818fd3 100644
--- a/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/sklearn/neighbors/tests/test_nearest_centroid.py
@@ -1,6 +1,7 @@
 """
 Testing for the nearest centroid module.
 """
+
 import numpy as np
 import pytest
 from numpy.testing import assert_array_equal
diff --git a/sklearn/neural_network/_base.py b/sklearn/neural_network/_base.py
index 73d62f9543e98..60ef660ef917d 100644
--- a/sklearn/neural_network/_base.py
+++ b/sklearn/neural_network/_base.py
@@ -1,5 +1,4 @@
-"""Utilities for the neural network modules
-"""
+"""Utilities for the neural network modules"""
 
 # Author: Issam H. Laradji <issam.laradji@gmail.com>
 # License: BSD 3 clause
diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
index cc419b57f2410..f56f68ac852c2 100644
--- a/sklearn/neural_network/_multilayer_perceptron.py
+++ b/sklearn/neural_network/_multilayer_perceptron.py
@@ -1,5 +1,4 @@
-"""Multi-layer Perceptron
-"""
+"""Multi-layer Perceptron"""
 
 # Authors: Issam H. Laradji <issam.laradji@gmail.com>
 #          Andreas Mueller
@@ -755,8 +754,7 @@ def _check_solver(self):
         if self.solver not in _STOCHASTIC_SOLVERS:
             raise AttributeError(
                 "partial_fit is only available for stochastic"
-                " optimizers. %s is not stochastic."
-                % self.solver
+                " optimizers. %s is not stochastic." % self.solver
             )
         return True
 
diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py
index e3814f45d3633..4b7f0f9422625 100644
--- a/sklearn/neural_network/_rbm.py
+++ b/sklearn/neural_network/_rbm.py
@@ -1,5 +1,4 @@
-"""Restricted Boltzmann Machine
-"""
+"""Restricted Boltzmann Machine"""
 
 # Authors: Yann N. Dauphin <dauphiya@iro.umontreal.ca>
 #          Vlad Niculae
diff --git a/sklearn/neural_network/_stochastic_optimizers.py b/sklearn/neural_network/_stochastic_optimizers.py
index d9fbaec0098d0..ab87300aff110 100644
--- a/sklearn/neural_network/_stochastic_optimizers.py
+++ b/sklearn/neural_network/_stochastic_optimizers.py
@@ -1,5 +1,4 @@
-"""Stochastic optimization methods for MLP
-"""
+"""Stochastic optimization methods for MLP"""
 
 # Authors: Jiyuan Qian <jq401@nyu.edu>
 # License: BSD 3 clause
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 6b94e2703f7e1..64ad4c5edc019 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -732,8 +732,7 @@ def test_warm_start():
         message = (
             "warm_start can only be used where `y` has the same "
             "classes as in the previous call to fit."
-            " Previously got [0 1 2], `y` has %s"
-            % np.unique(y_i)
+            " Previously got [0 1 2], `y` has %s" % np.unique(y_i)
         )
         with pytest.raises(ValueError, match=re.escape(message)):
             clf.fit(X, y_i)
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 4ee0622c699b7..b26b83e66510f 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -2,6 +2,7 @@
 The :mod:`sklearn.pipeline` module implements utilities to build a composite
 estimator, as a chain of transforms and estimators.
 """
+
 # Author: Edouard Duchesnay
 #         Gael Varoquaux
 #         Virgile Fritsch
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 2512f411a5a9c..f4c9fb032cfb0 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -1,6 +1,7 @@
 """
 This file contains preprocessing tools based on polynomials.
 """
+
 import collections
 from itertools import chain, combinations
 from itertools import combinations_with_replacement as combinations_w_r
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index c8c0193ac9b0b..886a805960d52 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -22,6 +22,7 @@
   and can even be taken to be an orthogonal projection.
 
 """
+
 # Authors: Olivier Grisel <olivier.grisel@ensta.org>,
 #          Arnaud Joly <a.joly@ulg.ac.be>
 # License: BSD 3 clause
diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py
index 8812c3c352a03..4b046aa111250 100644
--- a/sklearn/semi_supervised/tests/test_label_propagation.py
+++ b/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -1,4 +1,4 @@
-""" test the label propagation module """
+"""test the label propagation module"""
 
 import warnings
 
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index 6d154c99dc669..47d4027c50754 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -297,8 +297,7 @@ def _warn_from_fit_status(self):
             warnings.warn(
                 "Solver terminated early (max_iter=%i)."
                 "  Consider pre-processing your data with"
-                " StandardScaler or MinMaxScaler."
-                % self.max_iter,
+                " StandardScaler or MinMaxScaler." % self.max_iter,
                 ConvergenceWarning,
             )
 
@@ -1174,8 +1173,7 @@ def _fit_liblinear(
             raise ValueError(
                 "This solver needs samples of at least 2 classes"
                 " in the data, but the data contains only one"
-                " class: %r"
-                % classes_[0]
+                " class: %r" % classes_[0]
             )
 
         class_weight_ = compute_class_weight(class_weight, classes=classes_, y=y)
diff --git a/sklearn/svm/_bounds.py b/sklearn/svm/_bounds.py
index d14297230af4c..b02720637c03b 100644
--- a/sklearn/svm/_bounds.py
+++ b/sklearn/svm/_bounds.py
@@ -1,4 +1,5 @@
 """Determination of parameter bounds"""
+
 # Author: Paolo Losi
 # License: BSD 3 clause
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index e1c6e36af28fb..f728136b0f98c 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -3,6 +3,7 @@
 
 TODO: remove hard coded numerical results when possible
 """
+
 import re
 
 import numpy as np
diff --git a/sklearn/tests/random_seed.py b/sklearn/tests/random_seed.py
index 0fffd57a1016d..ecda17e36d2bf 100644
--- a/sklearn/tests/random_seed.py
+++ b/sklearn/tests/random_seed.py
@@ -8,6 +8,7 @@
 
 https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed
 """
+
 from os import environ
 from random import Random
 
diff --git a/sklearn/tests/test_build.py b/sklearn/tests/test_build.py
index 72cab1dfcb174..40a960cba6283 100644
--- a/sklearn/tests/test_build.py
+++ b/sklearn/tests/test_build.py
@@ -15,7 +15,8 @@ def test_openmp_parallelism_enabled():
         pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)")
 
     base_url = "dev" if __version__.endswith(".dev0") else "stable"
-    err_msg = textwrap.dedent("""
+    err_msg = textwrap.dedent(
+        """
         This test fails because scikit-learn has been built without OpenMP.
         This is not recommended since some estimators will run in sequential
         mode instead of leveraging thread-based parallelism.
@@ -27,6 +28,7 @@ def test_openmp_parallelism_enabled():
 
         You can skip this test by setting the environment variable
         SKLEARN_SKIP_OPENMP_TEST to any value.
-        """).format(base_url)
+        """
+    ).format(base_url)
 
     assert _openmp_parallelism_enabled(), err_msg
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index fccc58f9fa2a5..ea84eec258d83 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -255,11 +255,13 @@ def test_all_tests_are_importable():
     # Ensure that for each contentful subpackage, there is a test directory
     # within it that is also a subpackage (i.e. a directory with __init__.py)
 
-    HAS_TESTS_EXCEPTIONS = re.compile(r"""(?x)
+    HAS_TESTS_EXCEPTIONS = re.compile(
+        r"""(?x)
                                       \.externals(\.|$)|
                                       \.tests(\.|$)|
                                       \._
-                                      """)
+                                      """
+    )
     resource_modules = {
         "sklearn.datasets.data",
         "sklearn.datasets.descr",
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index b3c6820faefc2..e06d2f59a6c10 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -1,4 +1,5 @@
 """Common tests for metaestimators"""
+
 import functools
 from inspect import signature
 
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index f5ed64a094063..150dcc287e651 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1,6 +1,7 @@
 """
 Test the pipeline module.
 """
+
 import itertools
 import re
 import shutil
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index f8c612b6029c2..cd4a106ee7606 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -1,6 +1,7 @@
 """
 Testing for export functions of decision trees (sklearn.tree.export).
 """
+
 from io import StringIO
 from re import finditer, search
 from textwrap import dedent
@@ -375,12 +376,14 @@ def test_export_text():
     clf = DecisionTreeClassifier(max_depth=2, random_state=0)
     clf.fit(X, y)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- class: -1
     |--- feature_1 >  0.00
     |   |--- class: 1
-    """).lstrip()
+    """
+    ).lstrip()
 
     assert export_text(clf) == expected_report
     # testing that leaves at level 1 are not truncated
@@ -388,32 +391,38 @@ def test_export_text():
     # testing that the rest of the tree is truncated
     assert export_text(clf, max_depth=10) == expected_report
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- weights: [3.00, 0.00] class: -1
     |--- feature_1 >  0.00
     |   |--- weights: [0.00, 3.00] class: 1
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, show_weights=True) == expected_report
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |- feature_1 <= 0.00
     | |- class: -1
     |- feature_1 >  0.00
     | |- class: 1
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, spacing=1) == expected_report
 
     X_l = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, 1]]
     y_l = [-1, -1, -1, 1, 1, 1, 2]
     clf = DecisionTreeClassifier(max_depth=4, random_state=0)
     clf.fit(X_l, y_l)
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- class: -1
     |--- feature_1 >  0.00
     |   |--- truncated branch of depth 2
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, max_depth=0) == expected_report
 
     X_mo = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
@@ -422,12 +431,14 @@ def test_export_text():
     reg = DecisionTreeRegressor(max_depth=2, random_state=0)
     reg.fit(X_mo, y_mo)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.0
     |   |--- value: [-1.0, -1.0]
     |--- feature_1 >  0.0
     |   |--- value: [1.0, 1.0]
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(reg, decimals=1) == expected_report
     assert export_text(reg, decimals=1, show_weights=True) == expected_report
 
@@ -435,12 +446,14 @@ def test_export_text():
     reg = DecisionTreeRegressor(max_depth=2, random_state=0)
     reg.fit(X_single, y_mo)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- first <= 0.0
     |   |--- value: [-1.0, -1.0]
     |--- first >  0.0
     |   |--- value: [1.0, 1.0]
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(reg, decimals=1, feature_names=["first"]) == expected_report
     assert (
         export_text(reg, decimals=1, show_weights=True, feature_names=["first"])
@@ -455,20 +468,24 @@ def test_export_text_feature_class_names_array_support(constructor):
     clf = DecisionTreeClassifier(max_depth=2, random_state=0)
     clf.fit(X, y)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- b <= 0.00
     |   |--- class: -1
     |--- b >  0.00
     |   |--- class: 1
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, feature_names=constructor(["a", "b"])) == expected_report
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- class: cat
     |--- feature_1 >  0.00
     |   |--- class: dog
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, class_names=constructor(["cat", "dog"])) == expected_report
 
 
diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py
index e647ba3a4f009..0207cc1205120 100644
--- a/sklearn/utils/_response.py
+++ b/sklearn/utils/_response.py
@@ -2,6 +2,7 @@
 
 It allows to make uniform checks and validation.
 """
+
 import numpy as np
 
 from ..base import is_classifier
diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py
index 89052e88b65fe..1431108477263 100644
--- a/sklearn/utils/_show_versions.py
+++ b/sklearn/utils/_show_versions.py
@@ -3,6 +3,7 @@
 
 adapted from :func:`pandas.show_versions`
 """
+
 # License: BSD 3 clause
 
 import platform
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d2559cb66b2ad..b466a7765b819 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1461,8 +1461,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
         " the fit method."
         " Estimators are only allowed to add private attributes"
         " either started with _ or ended"
-        " with _ but %s added"
-        % ", ".join(attrs_added_by_fit)
+        " with _ but %s added" % ", ".join(attrs_added_by_fit)
     )
 
     # check that fit doesn't change any public attribute
@@ -1477,8 +1476,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
         " the fit method. Estimators are only allowed"
         " to change attributes started"
         " or ended with _, but"
-        " %s changed"
-        % ", ".join(attrs_changed_by_fit)
+        " %s changed" % ", ".join(attrs_changed_by_fit)
     )
 
 
@@ -2927,8 +2925,7 @@ def check_supervised_y_2d(name, estimator_orig):
         assert len(w) > 0, msg
         assert (
             "DataConversionWarning('A column-vector y"
-            " was passed when a 1d array was expected"
-            in msg
+            " was passed when a 1d array was expected" in msg
         )
     assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
 
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index be93464353832..2fe7dbc3cc179 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -2,6 +2,7 @@
 The :mod:`sklearn.utils.extmath` module includes utilities to perform
 optimal mathematical operations in scikit-learn that are not available in SciPy.
 """
+
 # Authors: Gael Varoquaux
 #          Alexandre Gramfort
 #          Alexandre T. Passos
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index 8eca047b1a844..33be9f4ab3473 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -3,6 +3,7 @@
 If you add content to this file, please give the version of the package
 at which the fix is no longer needed.
 """
+
 # Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
 #          Gael Varoquaux <gael.varoquaux@normalesup.org>
 #          Fabian Pedregosa <fpedregosa@acm.org>
diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py
index 024b0bcaf95ee..d79f514aae778 100644
--- a/sklearn/utils/optimize.py
+++ b/sklearn/utils/optimize.py
@@ -8,6 +8,7 @@
 regression with large design matrix), this approach gives very
 significant speedups.
 """
+
 # This is a modified file from scipy.optimize
 # Original authors: Travis Oliphant, Eric Jones
 # Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index c167a7e9d8f59..5ec962433d7c0 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -703,9 +703,7 @@ def test_incremental_weighted_mean_and_variance_simple(rng, dtype):
     mean, var, _ = _incremental_mean_and_var(X, 0, 0, 0, sample_weight=sample_weight)
 
     expected_mean = np.average(X, weights=sample_weight, axis=0)
-    expected_var = (
-        np.average(X**2, weights=sample_weight, axis=0) - expected_mean**2
-    )
+    expected_var = np.average(X**2, weights=sample_weight, axis=0) - expected_mean**2
     assert_almost_equal(mean, expected_mean)
     assert_almost_equal(var, expected_var)
 
diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py
index 8fada45db3f52..c44250c36daac 100644
--- a/sklearn/utils/tests/test_fast_dict.py
+++ b/sklearn/utils/tests/test_fast_dict.py
@@ -1,5 +1,5 @@
-""" Test fast_dict.
-"""
+"""Test fast_dict."""
+
 import numpy as np
 from numpy.testing import assert_allclose, assert_array_equal
 

From cf52ff582facba8232cfe0c517a30c6de2cfd187 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Fri, 5 Jul 2024 17:06:44 -0400
Subject: [PATCH 3/5] broke sort functions, partitioners out of _splitter.pyx

---
 sklearn/tree/_partitioner.pxd | 101 +++++
 sklearn/tree/_partitioner.pyx | 607 +++++++++++++++++++++++++++
 sklearn/tree/_sort.pxd        |  13 +
 sklearn/tree/_sort.pyx        | 123 ++++++
 sklearn/tree/_splitter.pxd    |   1 +
 sklearn/tree/_splitter.pyx    | 769 +---------------------------------
 sklearn/tree/meson.build      |   6 +
 7 files changed, 852 insertions(+), 768 deletions(-)
 create mode 100644 sklearn/tree/_partitioner.pxd
 create mode 100644 sklearn/tree/_partitioner.pyx
 create mode 100644 sklearn/tree/_sort.pxd
 create mode 100644 sklearn/tree/_sort.pyx

diff --git a/sklearn/tree/_partitioner.pxd b/sklearn/tree/_partitioner.pxd
new file mode 100644
index 0000000000000..880d9a2a52478
--- /dev/null
+++ b/sklearn/tree/_partitioner.pxd
@@ -0,0 +1,101 @@
+from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t
+
+# Constant to switch between algorithm non zero value extract algorithm
+# in SparsePartitioner
+cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
+
+
+# Introduce a fused-class to make it possible to share the split implementation
+# between the dense and sparse cases in the node_split_best and node_split_random
+# functions. The alternative would have been to use inheritance-based polymorphism
+# but it would have resulted in a ~10% overall tree fitting performance
+# degradation caused by the overhead frequent virtual method lookups.
+ctypedef fused Partitioner:
+    DensePartitioner
+    SparsePartitioner
+
+
+cdef class DensePartitioner:
+    """Partitioner specialized for dense data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    cdef:
+        const float32_t[:, :] X
+        cdef intp_t[::1] samples
+        cdef float32_t[::1] feature_values
+        cdef intp_t start
+        cdef intp_t end
+        cdef intp_t n_missing
+        cdef const unsigned char[::1] missing_values_in_feature_mask
+
+        inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil
+        inline void sort_samples_and_feature_values(
+            self,
+            intp_t current_feature
+        ) noexcept nogil
+        inline void find_min_max(
+            self,
+            intp_t current_feature,
+            float32_t* min_feature_value_out,
+            float32_t* max_feature_value_out,
+        ) noexcept nogil
+        inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil
+        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil
+        inline void partition_samples_final(
+            self,
+            intp_t best_pos,
+            float64_t best_threshold,
+            intp_t best_feature,
+            intp_t best_n_missing,
+        ) noexcept nogil
+
+
+cdef class SparsePartitioner:
+    """Partitioner specialized for sparse CSC data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    cdef:
+        intp_t[::1] samples
+        float32_t[::1] feature_values
+        intp_t start
+        intp_t end
+        intp_t n_missing
+        const unsigned char[::1] missing_values_in_feature_mask
+
+        const float32_t[::1] X_data
+        const int32_t[::1] X_indices
+        const int32_t[::1] X_indptr
+
+        intp_t n_total_samples
+
+        intp_t[::1] index_to_samples
+        intp_t[::1] sorted_samples
+
+        intp_t start_positive
+        intp_t end_negative
+        bint is_samples_sorted
+
+        inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil
+        inline void sort_samples_and_feature_values(
+            self,
+            intp_t current_feature
+        ) noexcept nogil
+        inline void find_min_max(
+            self,
+            intp_t current_feature,
+            float32_t* min_feature_value_out,
+            float32_t* max_feature_value_out,
+        ) noexcept nogil
+        inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil
+        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil
+        inline void partition_samples_final(
+            self,
+            intp_t best_pos,
+            float64_t best_threshold,
+            intp_t best_feature,
+            intp_t best_n_missing,
+        ) noexcept nogil
+        inline intp_t _partition(self, float64_t threshold, intp_t zero_pos) noexcept nogil
+        inline void extract_nnz(self, intp_t feature) noexcept nogil
diff --git a/sklearn/tree/_partitioner.pyx b/sklearn/tree/_partitioner.pyx
new file mode 100644
index 0000000000000..e0a991577d56a
--- /dev/null
+++ b/sklearn/tree/_partitioner.pyx
@@ -0,0 +1,607 @@
+from cython cimport final
+from libc.math cimport isnan, log
+from libc.stdlib cimport qsort
+from libc.string cimport memcpy
+from scipy.sparse import issparse
+
+import numpy as np
+
+from ._sort cimport sort, sparse_swap, swap, FEATURE_THRESHOLD
+
+
+@final
+cdef class DensePartitioner:
+    """Partitioner specialized for dense data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    def __init__(
+        self,
+        const float32_t[:, :] X,
+        intp_t[::1] samples,
+        float32_t[::1] feature_values,
+        const unsigned char[::1] missing_values_in_feature_mask,
+    ):
+        self.X = X
+        self.samples = samples
+        self.feature_values = feature_values
+        self.missing_values_in_feature_mask = missing_values_in_feature_mask
+
+    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
+        """Initialize splitter at the beginning of node_split."""
+        self.start = start
+        self.end = end
+        self.n_missing = 0
+
+    cdef inline void sort_samples_and_feature_values(
+        self, intp_t current_feature
+    ) noexcept nogil:
+        """Simultaneously sort based on the feature_values.
+
+        Missing values are stored at the end of feature_values.
+        The number of missing values observed in feature_values is stored
+        in self.n_missing.
+        """
+        cdef:
+            intp_t i, current_end
+            float32_t[::1] feature_values = self.feature_values
+            const float32_t[:, :] X = self.X
+            intp_t[::1] samples = self.samples
+            intp_t n_missing = 0
+            const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
+
+        # Sort samples along that feature; by
+        # copying the values into an array and
+        # sorting the array in a manner which utilizes the cache more
+        # effectively.
+        if missing_values_in_feature_mask is not None and missing_values_in_feature_mask[current_feature]:
+            i, current_end = self.start, self.end - 1
+            # Missing values are placed at the end and do not participate in the sorting.
+            while i <= current_end:
+                # Finds the right-most value that is not missing so that
+                # it can be swapped with missing values at its left.
+                if isnan(X[samples[current_end], current_feature]):
+                    n_missing += 1
+                    current_end -= 1
+                    continue
+
+                # X[samples[current_end], current_feature] is a non-missing value
+                if isnan(X[samples[i], current_feature]):
+                    samples[i], samples[current_end] = samples[current_end], samples[i]
+                    n_missing += 1
+                    current_end -= 1
+
+                feature_values[i] = X[samples[i], current_feature]
+                i += 1
+        else:
+            # When there are no missing values, we only need to copy the data into
+            # feature_values
+            for i in range(self.start, self.end):
+                feature_values[i] = X[samples[i], current_feature]
+
+        sort(&feature_values[self.start], &samples[self.start], self.end - self.start - n_missing)
+        self.n_missing = n_missing
+
+    cdef inline void find_min_max(
+        self,
+        intp_t current_feature,
+        float32_t* min_feature_value_out,
+        float32_t* max_feature_value_out,
+    ) noexcept nogil:
+        """Find the minimum and maximum value for current_feature."""
+        cdef:
+            intp_t p
+            float32_t current_feature_value
+            const float32_t[:, :] X = self.X
+            intp_t[::1] samples = self.samples
+            float32_t min_feature_value = X[samples[self.start], current_feature]
+            float32_t max_feature_value = min_feature_value
+            float32_t[::1] feature_values = self.feature_values
+
+        feature_values[self.start] = min_feature_value
+
+        for p in range(self.start + 1, self.end):
+            current_feature_value = X[samples[p], current_feature]
+            feature_values[p] = current_feature_value
+
+            if current_feature_value < min_feature_value:
+                min_feature_value = current_feature_value
+            elif current_feature_value > max_feature_value:
+                max_feature_value = current_feature_value
+
+        min_feature_value_out[0] = min_feature_value
+        max_feature_value_out[0] = max_feature_value
+
+    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
+        """Compute the next p_prev and p for iteratiing over feature values.
+
+        The missing values are not included when iterating through the feature values.
+        """
+        cdef:
+            float32_t[::1] feature_values = self.feature_values
+            intp_t end_non_missing = self.end - self.n_missing
+
+        while (
+            p[0] + 1 < end_non_missing and
+            feature_values[p[0] + 1] <= feature_values[p[0]] + FEATURE_THRESHOLD
+        ):
+            p[0] += 1
+
+        p_prev[0] = p[0]
+
+        # By adding 1, we have
+        # (feature_values[p] >= end) or (feature_values[p] > feature_values[p - 1])
+        p[0] += 1
+
+    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
+        """Partition samples for feature_values at the current_threshold."""
+        cdef:
+            intp_t p = self.start
+            intp_t partition_end = self.end
+            intp_t[::1] samples = self.samples
+            float32_t[::1] feature_values = self.feature_values
+
+        while p < partition_end:
+            if feature_values[p] <= current_threshold:
+                p += 1
+            else:
+                partition_end -= 1
+
+                feature_values[p], feature_values[partition_end] = (
+                    feature_values[partition_end], feature_values[p]
+                )
+                samples[p], samples[partition_end] = samples[partition_end], samples[p]
+
+        return partition_end
+
+    cdef inline void partition_samples_final(
+        self,
+        intp_t best_pos,
+        float64_t best_threshold,
+        intp_t best_feature,
+        intp_t best_n_missing,
+    ) noexcept nogil:
+        """Partition samples for X at the best_threshold and best_feature.
+
+        If missing values are present, this method partitions `samples`
+        so that the `best_n_missing` missing values' indices are in the
+        right-most end of `samples`, that is `samples[end_non_missing:end]`.
+        """
+        cdef:
+            # Local invariance: start <= p <= partition_end <= end
+            intp_t start = self.start
+            intp_t p = start
+            intp_t end = self.end - 1
+            intp_t partition_end = end - best_n_missing
+            intp_t[::1] samples = self.samples
+            const float32_t[:, :] X = self.X
+            float32_t current_value
+
+        if best_n_missing != 0:
+            # Move samples with missing values to the end while partitioning the
+            # non-missing samples
+            while p < partition_end:
+                # Keep samples with missing values at the end
+                if isnan(X[samples[end], best_feature]):
+                    end -= 1
+                    continue
+
+                # Swap sample with missing values with the sample at the end
+                current_value = X[samples[p], best_feature]
+                if isnan(current_value):
+                    samples[p], samples[end] = samples[end], samples[p]
+                    end -= 1
+
+                    # The swapped sample at the end is always a non-missing value, so
+                    # we can continue the algorithm without checking for missingness.
+                    current_value = X[samples[p], best_feature]
+
+                # Partition the non-missing samples
+                if current_value <= best_threshold:
+                    p += 1
+                else:
+                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
+                    partition_end -= 1
+        else:
+            # Partitioning routine when there are no missing values
+            while p < partition_end:
+                if X[samples[p], best_feature] <= best_threshold:
+                    p += 1
+                else:
+                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
+                    partition_end -= 1
+
+
+@final
+cdef class SparsePartitioner:
+    """Partitioner specialized for sparse CSC data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    def __init__(
+        self,
+        object X,
+        intp_t[::1] samples,
+        intp_t n_samples,
+        float32_t[::1] feature_values,
+        const unsigned char[::1] missing_values_in_feature_mask,
+    ):
+        if not (issparse(X) and X.format == "csc"):
+            raise ValueError("X should be in csc format")
+
+        self.samples = samples
+        self.feature_values = feature_values
+
+        # Initialize X
+        cdef intp_t n_total_samples = X.shape[0]
+
+        self.X_data = X.data
+        self.X_indices = X.indices
+        self.X_indptr = X.indptr
+        self.n_total_samples = n_total_samples
+
+        # Initialize auxiliary array used to perform split
+        self.index_to_samples = np.full(n_total_samples, fill_value=-1, dtype=np.intp)
+        self.sorted_samples = np.empty(n_samples, dtype=np.intp)
+
+        cdef intp_t p
+        for p in range(n_samples):
+            self.index_to_samples[samples[p]] = p
+
+        self.missing_values_in_feature_mask = missing_values_in_feature_mask
+
+    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
+        """Initialize splitter at the beginning of node_split."""
+        self.start = start
+        self.end = end
+        self.is_samples_sorted = 0
+        self.n_missing = 0
+
+    cdef inline void sort_samples_and_feature_values(
+        self, intp_t current_feature
+    ) noexcept nogil:
+        """Simultaneously sort based on the feature_values."""
+        cdef:
+            float32_t[::1] feature_values = self.feature_values
+            intp_t[::1] index_to_samples = self.index_to_samples
+            intp_t[::1] samples = self.samples
+
+        self.extract_nnz(current_feature)
+        # Sort the positive and negative parts of `feature_values`
+        sort(&feature_values[self.start], &samples[self.start], self.end_negative - self.start)
+        if self.start_positive < self.end:
+            sort(
+                &feature_values[self.start_positive],
+                &samples[self.start_positive],
+                self.end - self.start_positive
+            )
+
+        # Update index_to_samples to take into account the sort
+        for p in range(self.start, self.end_negative):
+            index_to_samples[samples[p]] = p
+        for p in range(self.start_positive, self.end):
+            index_to_samples[samples[p]] = p
+
+        # Add one or two zeros in feature_values, if there is any
+        if self.end_negative < self.start_positive:
+            self.start_positive -= 1
+            feature_values[self.start_positive] = 0.
+
+            if self.end_negative != self.start_positive:
+                feature_values[self.end_negative] = 0.
+                self.end_negative += 1
+
+        # XXX: When sparse supports missing values, this should be set to the
+        # number of missing values for current_feature
+        self.n_missing = 0
+
+    cdef inline void find_min_max(
+        self,
+        intp_t current_feature,
+        float32_t* min_feature_value_out,
+        float32_t* max_feature_value_out,
+    ) noexcept nogil:
+        """Find the minimum and maximum value for current_feature."""
+        cdef:
+            intp_t p
+            float32_t current_feature_value, min_feature_value, max_feature_value
+            float32_t[::1] feature_values = self.feature_values
+
+        self.extract_nnz(current_feature)
+
+        if self.end_negative != self.start_positive:
+            # There is a zero
+            min_feature_value = 0
+            max_feature_value = 0
+        else:
+            min_feature_value = feature_values[self.start]
+            max_feature_value = min_feature_value
+
+        # Find min, max in feature_values[start:end_negative]
+        for p in range(self.start, self.end_negative):
+            current_feature_value = feature_values[p]
+
+            if current_feature_value < min_feature_value:
+                min_feature_value = current_feature_value
+            elif current_feature_value > max_feature_value:
+                max_feature_value = current_feature_value
+
+        # Update min, max given feature_values[start_positive:end]
+        for p in range(self.start_positive, self.end):
+            current_feature_value = feature_values[p]
+
+            if current_feature_value < min_feature_value:
+                min_feature_value = current_feature_value
+            elif current_feature_value > max_feature_value:
+                max_feature_value = current_feature_value
+
+        min_feature_value_out[0] = min_feature_value
+        max_feature_value_out[0] = max_feature_value
+
+    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
+        """Compute the next p_prev and p for iteratiing over feature values."""
+        cdef:
+            intp_t p_next
+            float32_t[::1] feature_values = self.feature_values
+
+        if p[0] + 1 != self.end_negative:
+            p_next = p[0] + 1
+        else:
+            p_next = self.start_positive
+
+        while (p_next < self.end and
+                feature_values[p_next] <= feature_values[p[0]] + FEATURE_THRESHOLD):
+            p[0] = p_next
+            if p[0] + 1 != self.end_negative:
+                p_next = p[0] + 1
+            else:
+                p_next = self.start_positive
+
+        p_prev[0] = p[0]
+        p[0] = p_next
+
+    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
+        """Partition samples for feature_values at the current_threshold."""
+        return self._partition(current_threshold, self.start_positive)
+
+    cdef inline void partition_samples_final(
+        self,
+        intp_t best_pos,
+        float64_t best_threshold,
+        intp_t best_feature,
+        intp_t n_missing,
+    ) noexcept nogil:
+        """Partition samples for X at the best_threshold and best_feature."""
+        self.extract_nnz(best_feature)
+        self._partition(best_threshold, best_pos)
+
+    cdef inline intp_t _partition(self, float64_t threshold, intp_t zero_pos) noexcept nogil:
+        """Partition samples[start:end] based on threshold."""
+        cdef:
+            intp_t p, partition_end
+            intp_t[::1] index_to_samples = self.index_to_samples
+            float32_t[::1] feature_values = self.feature_values
+            intp_t[::1] samples = self.samples
+
+        if threshold < 0.:
+            p = self.start
+            partition_end = self.end_negative
+        elif threshold > 0.:
+            p = self.start_positive
+            partition_end = self.end
+        else:
+            # Data are already split
+            return zero_pos
+
+        while p < partition_end:
+            if feature_values[p] <= threshold:
+                p += 1
+
+            else:
+                partition_end -= 1
+
+                feature_values[p], feature_values[partition_end] = (
+                    feature_values[partition_end], feature_values[p]
+                )
+                sparse_swap(index_to_samples, samples, p, partition_end)
+
+        return partition_end
+
+    cdef inline void extract_nnz(self, intp_t feature) noexcept nogil:
+        """Extract and partition values for a given feature.
+
+        The extracted values are partitioned between negative values
+        feature_values[start:end_negative[0]] and positive values
+        feature_values[start_positive[0]:end].
+        The samples and index_to_samples are modified according to this
+        partition.
+
+        The extraction corresponds to the intersection between the arrays
+        X_indices[indptr_start:indptr_end] and samples[start:end].
+        This is done efficiently using either an index_to_samples based approach
+        or binary search based approach.
+
+        Parameters
+        ----------
+        feature : intp_t,
+            Index of the feature we want to extract non zero value.
+        """
+        cdef intp_t[::1] samples = self.samples
+        cdef float32_t[::1] feature_values = self.feature_values
+        cdef intp_t indptr_start = self.X_indptr[feature],
+        cdef intp_t indptr_end = self.X_indptr[feature + 1]
+        cdef intp_t n_indices = <intp_t>(indptr_end - indptr_start)
+        cdef intp_t n_samples = self.end - self.start
+        cdef intp_t[::1] index_to_samples = self.index_to_samples
+        cdef intp_t[::1] sorted_samples = self.sorted_samples
+        cdef const int32_t[::1] X_indices = self.X_indices
+        cdef const float32_t[::1] X_data = self.X_data
+
+        # Use binary search if n_samples * log(n_indices) <
+        # n_indices and index_to_samples approach otherwise.
+        # O(n_samples * log(n_indices)) is the running time of binary
+        # search and O(n_indices) is the running time of index_to_samples
+        # approach.
+        if ((1 - self.is_samples_sorted) * n_samples * log(n_samples) +
+                n_samples * log(n_indices) < EXTRACT_NNZ_SWITCH * n_indices):
+            extract_nnz_binary_search(X_indices, X_data,
+                                      indptr_start, indptr_end,
+                                      samples, self.start, self.end,
+                                      index_to_samples,
+                                      feature_values,
+                                      &self.end_negative, &self.start_positive,
+                                      sorted_samples, &self.is_samples_sorted)
+
+        # Using an index to samples  technique to extract non zero values
+        # index_to_samples is a mapping from X_indices to samples
+        else:
+            extract_nnz_index_to_samples(X_indices, X_data,
+                                         indptr_start, indptr_end,
+                                         samples, self.start, self.end,
+                                         index_to_samples,
+                                         feature_values,
+                                         &self.end_negative, &self.start_positive)
+
+
+cdef int compare_SIZE_t(const void* a, const void* b) noexcept nogil:
+    """Comparison function for sort.
+
+    This must return an `int` as it is used by stdlib's qsort, which expects
+    an `int` return value.
+    """
+    return <int>((<intp_t*>a)[0] - (<intp_t*>b)[0])
+
+
+cdef inline void binary_search(const int32_t[::1] sorted_array,
+                               int32_t start, int32_t end,
+                               intp_t value, intp_t* index,
+                               int32_t* new_start) noexcept nogil:
+    """Return the index of value in the sorted array.
+
+    If not found, return -1. new_start is the last pivot + 1
+    """
+    cdef int32_t pivot
+    index[0] = -1
+    while start < end:
+        pivot = start + (end - start) / 2
+
+        if sorted_array[pivot] == value:
+            index[0] = pivot
+            start = pivot + 1
+            break
+
+        if sorted_array[pivot] < value:
+            start = pivot + 1
+        else:
+            end = pivot
+    new_start[0] = start
+
+
+cdef inline void extract_nnz_index_to_samples(const int32_t[::1] X_indices,
+                                              const float32_t[::1] X_data,
+                                              int32_t indptr_start,
+                                              int32_t indptr_end,
+                                              intp_t[::1] samples,
+                                              intp_t start,
+                                              intp_t end,
+                                              intp_t[::1] index_to_samples,
+                                              float32_t[::1] feature_values,
+                                              intp_t* end_negative,
+                                              intp_t* start_positive) noexcept nogil:
+    """Extract and partition values for a feature using index_to_samples.
+
+    Complexity is O(indptr_end - indptr_start).
+    """
+    cdef int32_t k
+    cdef intp_t index
+    cdef intp_t end_negative_ = start
+    cdef intp_t start_positive_ = end
+
+    for k in range(indptr_start, indptr_end):
+        if start <= index_to_samples[X_indices[k]] < end:
+            if X_data[k] > 0:
+                start_positive_ -= 1
+                feature_values[start_positive_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, start_positive_)
+
+            elif X_data[k] < 0:
+                feature_values[end_negative_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, end_negative_)
+                end_negative_ += 1
+
+    # Returned values
+    end_negative[0] = end_negative_
+    start_positive[0] = start_positive_
+
+
+cdef inline void extract_nnz_binary_search(const int32_t[::1] X_indices,
+                                           const float32_t[::1] X_data,
+                                           int32_t indptr_start,
+                                           int32_t indptr_end,
+                                           intp_t[::1] samples,
+                                           intp_t start,
+                                           intp_t end,
+                                           intp_t[::1] index_to_samples,
+                                           float32_t[::1] feature_values,
+                                           intp_t* end_negative,
+                                           intp_t* start_positive,
+                                           intp_t[::1] sorted_samples,
+                                           bint* is_samples_sorted) noexcept nogil:
+    """Extract and partition values for a given feature using binary search.
+
+    If n_samples = end - start and n_indices = indptr_end - indptr_start,
+    the complexity is
+
+        O((1 - is_samples_sorted[0]) * n_samples * log(n_samples) +
+          n_samples * log(n_indices)).
+    """
+    cdef intp_t n_samples
+
+    if not is_samples_sorted[0]:
+        n_samples = end - start
+        memcpy(&sorted_samples[start], &samples[start],
+               n_samples * sizeof(intp_t))
+        qsort(&sorted_samples[start], n_samples, sizeof(intp_t),
+              compare_SIZE_t)
+        is_samples_sorted[0] = 1
+
+    while (indptr_start < indptr_end and
+           sorted_samples[start] > X_indices[indptr_start]):
+        indptr_start += 1
+
+    while (indptr_start < indptr_end and
+           sorted_samples[end - 1] < X_indices[indptr_end - 1]):
+        indptr_end -= 1
+
+    cdef intp_t p = start
+    cdef intp_t index
+    cdef intp_t k
+    cdef intp_t end_negative_ = start
+    cdef intp_t start_positive_ = end
+
+    while (p < end and indptr_start < indptr_end):
+        # Find index of sorted_samples[p] in X_indices
+        binary_search(X_indices, indptr_start, indptr_end,
+                      sorted_samples[p], &k, &indptr_start)
+
+        if k != -1:
+            # If k != -1, we have found a non zero value
+
+            if X_data[k] > 0:
+                start_positive_ -= 1
+                feature_values[start_positive_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, start_positive_)
+
+            elif X_data[k] < 0:
+                feature_values[end_negative_] = X_data[k]
+                index = index_to_samples[X_indices[k]]
+                sparse_swap(index_to_samples, samples, index, end_negative_)
+                end_negative_ += 1
+        p += 1
+
+    # Returned values
+    end_negative[0] = end_negative_
+    start_positive[0] = start_positive_
diff --git a/sklearn/tree/_sort.pxd b/sklearn/tree/_sort.pxd
new file mode 100644
index 0000000000000..5a0b3d20d0f35
--- /dev/null
+++ b/sklearn/tree/_sort.pxd
@@ -0,0 +1,13 @@
+from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t
+
+
+# Mitigate precision differences between 32 bit and 64 bit
+cdef float32_t FEATURE_THRESHOLD = 1e-7
+
+# Sort n-element arrays pointed to by feature_values and samples, simultaneously,
+# by the values in feature_values. Algorithm: Introsort (Musser, SP&E, 1997).
+cdef void sort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil
+
+cdef void swap(float32_t* feature_values, intp_t* samples, intp_t i, intp_t j) noexcept nogil
+cdef void sparse_swap(intp_t[::1] index_to_samples, intp_t[::1] samples,
+    intp_t pos_1, intp_t pos_2) noexcept nogil
diff --git a/sklearn/tree/_sort.pyx b/sklearn/tree/_sort.pyx
new file mode 100644
index 0000000000000..9a9db6edf6e00
--- /dev/null
+++ b/sklearn/tree/_sort.pyx
@@ -0,0 +1,123 @@
+from ._utils cimport log
+
+
+cdef inline void sparse_swap(intp_t[::1] index_to_samples, intp_t[::1] samples,
+                             intp_t pos_1, intp_t pos_2) noexcept nogil:
+    """Swap sample pos_1 and pos_2 preserving sparse invariant."""
+    samples[pos_1], samples[pos_2] = samples[pos_2], samples[pos_1]
+    index_to_samples[samples[pos_1]] = pos_1
+    index_to_samples[samples[pos_2]] = pos_2
+
+
+# Sort n-element arrays pointed to by feature_values and samples, simultaneously,
+# by the values in feature_values. Algorithm: Introsort (Musser, SP&E, 1997).
+cdef inline void sort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
+    if n == 0:
+        return
+    cdef intp_t maxd = 2 * <intp_t>log(n)
+    introsort(feature_values, samples, n, maxd)
+
+
+# Introsort with median of 3 pivot selection and 3-way partition function
+# (robust to repeated elements, e.g. lots of zero features).
+cdef void introsort(float32_t* feature_values, intp_t *samples,
+                    intp_t n, intp_t maxd) noexcept nogil:
+    cdef float32_t pivot
+    cdef intp_t i, l, r
+
+    while n > 1:
+        if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
+            heapsort(feature_values, samples, n)
+            return
+        maxd -= 1
+
+        pivot = median3(feature_values, n)
+
+        # Three-way partition.
+        i = l = 0
+        r = n
+        while i < r:
+            if feature_values[i] < pivot:
+                swap(feature_values, samples, i, l)
+                i += 1
+                l += 1
+            elif feature_values[i] > pivot:
+                r -= 1
+                swap(feature_values, samples, i, r)
+            else:
+                i += 1
+
+        introsort(feature_values, samples, l, maxd)
+        feature_values += r
+        samples += r
+        n -= r
+
+
+cdef void heapsort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
+    cdef intp_t start, end
+
+    # heapify
+    start = (n - 2) / 2
+    end = n
+    while True:
+        sift_down(feature_values, samples, start, end)
+        if start == 0:
+            break
+        start -= 1
+
+    # sort by shrinking the heap, putting the max element immediately after it
+    end = n - 1
+    while end > 0:
+        swap(feature_values, samples, 0, end)
+        sift_down(feature_values, samples, 0, end)
+        end = end - 1
+
+
+cdef inline float32_t median3(float32_t* feature_values, intp_t n) noexcept nogil:
+    # Median of three pivot selection, after Bentley and McIlroy (1993).
+    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
+    cdef float32_t a = feature_values[0], b = feature_values[n / 2], c = feature_values[n - 1]
+    if a < b:
+        if b < c:
+            return b
+        elif a < c:
+            return c
+        else:
+            return a
+    elif b < c:
+        if a < c:
+            return a
+        else:
+            return c
+    else:
+        return b
+
+
+cdef inline void swap(float32_t* feature_values, intp_t* samples,
+                      intp_t i, intp_t j) noexcept nogil:
+    # Helper for sort
+    feature_values[i], feature_values[j] = feature_values[j], feature_values[i]
+    samples[i], samples[j] = samples[j], samples[i]
+
+
+cdef inline void sift_down(float32_t* feature_values, intp_t* samples,
+                           intp_t start, intp_t end) noexcept nogil:
+    # Restore heap order in feature_values[start:end] by moving the max element to start.
+    cdef intp_t child, maxind, root
+
+    root = start
+    while True:
+        child = root * 2 + 1
+
+        # find max of root, left child, right child
+        maxind = root
+        if child < end and feature_values[maxind] < feature_values[child]:
+            maxind = child
+        if child + 1 < end and feature_values[maxind] < feature_values[child + 1]:
+            maxind = child + 1
+
+        if maxind == root:
+            break
+        else:
+            swap(feature_values, samples, root, maxind)
+            root = maxind
diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index b630252b329f2..a55cf2786cbef 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -4,6 +4,7 @@
 # See _splitter.pyx for details.
 from libcpp.vector cimport vector
 
+from ._partitioner cimport Partitioner, DensePartitioner, SparsePartitioner
 from ._criterion cimport BaseCriterion, Criterion
 from ._tree cimport ParentInfo
 from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, uint32_t
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 8bf71765355b3..eb08ec34ea2a2 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -1,30 +1,20 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-from cython cimport final
-from libc.math cimport isnan
-from libc.stdlib cimport qsort
 from libc.string cimport memcpy
 
 from ._criterion cimport Criterion
-from ._utils cimport log
+from ._sort cimport FEATURE_THRESHOLD
 from ._utils cimport rand_int
 from ._utils cimport rand_uniform
 from ._utils cimport RAND_R_MAX
 from ..utils._typedefs cimport int8_t
 
 import numpy as np
-from scipy.sparse import issparse
 
 
 cdef float64_t INFINITY = np.inf
 
-# Mitigate precision differences between 32 bit and 64 bit
-cdef float32_t FEATURE_THRESHOLD = 1e-7
-
-# Constant to switch between algorithm non zero value extract algorithm
-# in SparsePartitioner
-cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY
@@ -405,15 +395,6 @@ cdef inline void shift_missing_values_to_left_if_required(
         best.pos += best.n_missing
 
 
-# Introduce a fused-class to make it possible to share the split implementation
-# between the dense and sparse cases in the node_split_best and node_split_random
-# functions. The alternative would have been to use inheritance-based polymorphism
-# but it would have resulted in a ~10% overall tree fitting performance
-# degradation caused by the overhead frequent virtual method lookups.
-ctypedef fused Partitioner:
-    DensePartitioner
-    SparsePartitioner
-
 cdef inline intp_t node_split_best(
     Splitter splitter,
     Partitioner partitioner,
@@ -682,119 +663,6 @@ cdef inline intp_t node_split_best(
     return 0
 
 
-# Sort n-element arrays pointed to by feature_values and samples, simultaneously,
-# by the values in feature_values. Algorithm: Introsort (Musser, SP&E, 1997).
-cdef inline void sort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
-    if n == 0:
-        return
-    cdef intp_t maxd = 2 * <intp_t>log(n)
-    introsort(feature_values, samples, n, maxd)
-
-
-cdef inline void swap(float32_t* feature_values, intp_t* samples,
-                      intp_t i, intp_t j) noexcept nogil:
-    # Helper for sort
-    feature_values[i], feature_values[j] = feature_values[j], feature_values[i]
-    samples[i], samples[j] = samples[j], samples[i]
-
-
-cdef inline float32_t median3(float32_t* feature_values, intp_t n) noexcept nogil:
-    # Median of three pivot selection, after Bentley and McIlroy (1993).
-    # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
-    cdef float32_t a = feature_values[0], b = feature_values[n / 2], c = feature_values[n - 1]
-    if a < b:
-        if b < c:
-            return b
-        elif a < c:
-            return c
-        else:
-            return a
-    elif b < c:
-        if a < c:
-            return a
-        else:
-            return c
-    else:
-        return b
-
-
-# Introsort with median of 3 pivot selection and 3-way partition function
-# (robust to repeated elements, e.g. lots of zero features).
-cdef void introsort(float32_t* feature_values, intp_t *samples,
-                    intp_t n, intp_t maxd) noexcept nogil:
-    cdef float32_t pivot
-    cdef intp_t i, l, r
-
-    while n > 1:
-        if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
-            heapsort(feature_values, samples, n)
-            return
-        maxd -= 1
-
-        pivot = median3(feature_values, n)
-
-        # Three-way partition.
-        i = l = 0
-        r = n
-        while i < r:
-            if feature_values[i] < pivot:
-                swap(feature_values, samples, i, l)
-                i += 1
-                l += 1
-            elif feature_values[i] > pivot:
-                r -= 1
-                swap(feature_values, samples, i, r)
-            else:
-                i += 1
-
-        introsort(feature_values, samples, l, maxd)
-        feature_values += r
-        samples += r
-        n -= r
-
-
-cdef inline void sift_down(float32_t* feature_values, intp_t* samples,
-                           intp_t start, intp_t end) noexcept nogil:
-    # Restore heap order in feature_values[start:end] by moving the max element to start.
-    cdef intp_t child, maxind, root
-
-    root = start
-    while True:
-        child = root * 2 + 1
-
-        # find max of root, left child, right child
-        maxind = root
-        if child < end and feature_values[maxind] < feature_values[child]:
-            maxind = child
-        if child + 1 < end and feature_values[maxind] < feature_values[child + 1]:
-            maxind = child + 1
-
-        if maxind == root:
-            break
-        else:
-            swap(feature_values, samples, root, maxind)
-            root = maxind
-
-
-cdef void heapsort(float32_t* feature_values, intp_t* samples, intp_t n) noexcept nogil:
-    cdef intp_t start, end
-
-    # heapify
-    start = (n - 2) / 2
-    end = n
-    while True:
-        sift_down(feature_values, samples, start, end)
-        if start == 0:
-            break
-        start -= 1
-
-    # sort by shrinking the heap, putting the max element immediately after it
-    end = n - 1
-    while end > 0:
-        swap(feature_values, samples, 0, end)
-        sift_down(feature_values, samples, 0, end)
-        end = end - 1
-
 cdef inline int node_split_random(
     Splitter splitter,
     Partitioner partitioner,
@@ -982,641 +850,6 @@ cdef inline int node_split_random(
     return 0
 
 
-@final
-cdef class DensePartitioner:
-    """Partitioner specialized for dense data.
-
-    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
-    """
-    cdef:
-        const float32_t[:, :] X
-        cdef intp_t[::1] samples
-        cdef float32_t[::1] feature_values
-        cdef intp_t start
-        cdef intp_t end
-        cdef intp_t n_missing
-        cdef const unsigned char[::1] missing_values_in_feature_mask
-
-    def __init__(
-        self,
-        const float32_t[:, :] X,
-        intp_t[::1] samples,
-        float32_t[::1] feature_values,
-        const unsigned char[::1] missing_values_in_feature_mask,
-    ):
-        self.X = X
-        self.samples = samples
-        self.feature_values = feature_values
-        self.missing_values_in_feature_mask = missing_values_in_feature_mask
-
-    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
-        """Initialize splitter at the beginning of node_split."""
-        self.start = start
-        self.end = end
-        self.n_missing = 0
-
-    cdef inline void sort_samples_and_feature_values(
-        self, intp_t current_feature
-    ) noexcept nogil:
-        """Simultaneously sort based on the feature_values.
-
-        Missing values are stored at the end of feature_values.
-        The number of missing values observed in feature_values is stored
-        in self.n_missing.
-        """
-        cdef:
-            intp_t i, current_end
-            float32_t[::1] feature_values = self.feature_values
-            const float32_t[:, :] X = self.X
-            intp_t[::1] samples = self.samples
-            intp_t n_missing = 0
-            const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
-
-        # Sort samples along that feature; by
-        # copying the values into an array and
-        # sorting the array in a manner which utilizes the cache more
-        # effectively.
-        if missing_values_in_feature_mask is not None and missing_values_in_feature_mask[current_feature]:
-            i, current_end = self.start, self.end - 1
-            # Missing values are placed at the end and do not participate in the sorting.
-            while i <= current_end:
-                # Finds the right-most value that is not missing so that
-                # it can be swapped with missing values at its left.
-                if isnan(X[samples[current_end], current_feature]):
-                    n_missing += 1
-                    current_end -= 1
-                    continue
-
-                # X[samples[current_end], current_feature] is a non-missing value
-                if isnan(X[samples[i], current_feature]):
-                    samples[i], samples[current_end] = samples[current_end], samples[i]
-                    n_missing += 1
-                    current_end -= 1
-
-                feature_values[i] = X[samples[i], current_feature]
-                i += 1
-        else:
-            # When there are no missing values, we only need to copy the data into
-            # feature_values
-            for i in range(self.start, self.end):
-                feature_values[i] = X[samples[i], current_feature]
-
-        sort(&feature_values[self.start], &samples[self.start], self.end - self.start - n_missing)
-        self.n_missing = n_missing
-
-    cdef inline void find_min_max(
-        self,
-        intp_t current_feature,
-        float32_t* min_feature_value_out,
-        float32_t* max_feature_value_out,
-    ) noexcept nogil:
-        """Find the minimum and maximum value for current_feature."""
-        cdef:
-            intp_t p
-            float32_t current_feature_value
-            const float32_t[:, :] X = self.X
-            intp_t[::1] samples = self.samples
-            float32_t min_feature_value = X[samples[self.start], current_feature]
-            float32_t max_feature_value = min_feature_value
-            float32_t[::1] feature_values = self.feature_values
-
-        feature_values[self.start] = min_feature_value
-
-        for p in range(self.start + 1, self.end):
-            current_feature_value = X[samples[p], current_feature]
-            feature_values[p] = current_feature_value
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        min_feature_value_out[0] = min_feature_value
-        max_feature_value_out[0] = max_feature_value
-
-    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
-        """Compute the next p_prev and p for iteratiing over feature values.
-
-        The missing values are not included when iterating through the feature values.
-        """
-        cdef:
-            float32_t[::1] feature_values = self.feature_values
-            intp_t end_non_missing = self.end - self.n_missing
-
-        while (
-            p[0] + 1 < end_non_missing and
-            feature_values[p[0] + 1] <= feature_values[p[0]] + FEATURE_THRESHOLD
-        ):
-            p[0] += 1
-
-        p_prev[0] = p[0]
-
-        # By adding 1, we have
-        # (feature_values[p] >= end) or (feature_values[p] > feature_values[p - 1])
-        p[0] += 1
-
-    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
-        """Partition samples for feature_values at the current_threshold."""
-        cdef:
-            intp_t p = self.start
-            intp_t partition_end = self.end
-            intp_t[::1] samples = self.samples
-            float32_t[::1] feature_values = self.feature_values
-
-        while p < partition_end:
-            if feature_values[p] <= current_threshold:
-                p += 1
-            else:
-                partition_end -= 1
-
-                feature_values[p], feature_values[partition_end] = (
-                    feature_values[partition_end], feature_values[p]
-                )
-                samples[p], samples[partition_end] = samples[partition_end], samples[p]
-
-        return partition_end
-
-    cdef inline void partition_samples_final(
-        self,
-        intp_t best_pos,
-        float64_t best_threshold,
-        intp_t best_feature,
-        intp_t best_n_missing,
-    ) noexcept nogil:
-        """Partition samples for X at the best_threshold and best_feature.
-
-        If missing values are present, this method partitions `samples`
-        so that the `best_n_missing` missing values' indices are in the
-        right-most end of `samples`, that is `samples[end_non_missing:end]`.
-        """
-        cdef:
-            # Local invariance: start <= p <= partition_end <= end
-            intp_t start = self.start
-            intp_t p = start
-            intp_t end = self.end - 1
-            intp_t partition_end = end - best_n_missing
-            intp_t[::1] samples = self.samples
-            const float32_t[:, :] X = self.X
-            float32_t current_value
-
-        if best_n_missing != 0:
-            # Move samples with missing values to the end while partitioning the
-            # non-missing samples
-            while p < partition_end:
-                # Keep samples with missing values at the end
-                if isnan(X[samples[end], best_feature]):
-                    end -= 1
-                    continue
-
-                # Swap sample with missing values with the sample at the end
-                current_value = X[samples[p], best_feature]
-                if isnan(current_value):
-                    samples[p], samples[end] = samples[end], samples[p]
-                    end -= 1
-
-                    # The swapped sample at the end is always a non-missing value, so
-                    # we can continue the algorithm without checking for missingness.
-                    current_value = X[samples[p], best_feature]
-
-                # Partition the non-missing samples
-                if current_value <= best_threshold:
-                    p += 1
-                else:
-                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
-                    partition_end -= 1
-        else:
-            # Partitioning routine when there are no missing values
-            while p < partition_end:
-                if X[samples[p], best_feature] <= best_threshold:
-                    p += 1
-                else:
-                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
-                    partition_end -= 1
-
-
-@final
-cdef class SparsePartitioner:
-    """Partitioner specialized for sparse CSC data.
-
-    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
-    """
-    cdef intp_t[::1] samples
-    cdef float32_t[::1] feature_values
-    cdef intp_t start
-    cdef intp_t end
-    cdef intp_t n_missing
-    cdef const unsigned char[::1] missing_values_in_feature_mask
-
-    cdef const float32_t[::1] X_data
-    cdef const int32_t[::1] X_indices
-    cdef const int32_t[::1] X_indptr
-
-    cdef intp_t n_total_samples
-
-    cdef intp_t[::1] index_to_samples
-    cdef intp_t[::1] sorted_samples
-
-    cdef intp_t start_positive
-    cdef intp_t end_negative
-    cdef bint is_samples_sorted
-
-    def __init__(
-        self,
-        object X,
-        intp_t[::1] samples,
-        intp_t n_samples,
-        float32_t[::1] feature_values,
-        const unsigned char[::1] missing_values_in_feature_mask,
-    ):
-        if not (issparse(X) and X.format == "csc"):
-            raise ValueError("X should be in csc format")
-
-        self.samples = samples
-        self.feature_values = feature_values
-
-        # Initialize X
-        cdef intp_t n_total_samples = X.shape[0]
-
-        self.X_data = X.data
-        self.X_indices = X.indices
-        self.X_indptr = X.indptr
-        self.n_total_samples = n_total_samples
-
-        # Initialize auxiliary array used to perform split
-        self.index_to_samples = np.full(n_total_samples, fill_value=-1, dtype=np.intp)
-        self.sorted_samples = np.empty(n_samples, dtype=np.intp)
-
-        cdef intp_t p
-        for p in range(n_samples):
-            self.index_to_samples[samples[p]] = p
-
-        self.missing_values_in_feature_mask = missing_values_in_feature_mask
-
-    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
-        """Initialize splitter at the beginning of node_split."""
-        self.start = start
-        self.end = end
-        self.is_samples_sorted = 0
-        self.n_missing = 0
-
-    cdef inline void sort_samples_and_feature_values(
-        self, intp_t current_feature
-    ) noexcept nogil:
-        """Simultaneously sort based on the feature_values."""
-        cdef:
-            float32_t[::1] feature_values = self.feature_values
-            intp_t[::1] index_to_samples = self.index_to_samples
-            intp_t[::1] samples = self.samples
-
-        self.extract_nnz(current_feature)
-        # Sort the positive and negative parts of `feature_values`
-        sort(&feature_values[self.start], &samples[self.start], self.end_negative - self.start)
-        if self.start_positive < self.end:
-            sort(
-                &feature_values[self.start_positive],
-                &samples[self.start_positive],
-                self.end - self.start_positive
-            )
-
-        # Update index_to_samples to take into account the sort
-        for p in range(self.start, self.end_negative):
-            index_to_samples[samples[p]] = p
-        for p in range(self.start_positive, self.end):
-            index_to_samples[samples[p]] = p
-
-        # Add one or two zeros in feature_values, if there is any
-        if self.end_negative < self.start_positive:
-            self.start_positive -= 1
-            feature_values[self.start_positive] = 0.
-
-            if self.end_negative != self.start_positive:
-                feature_values[self.end_negative] = 0.
-                self.end_negative += 1
-
-        # XXX: When sparse supports missing values, this should be set to the
-        # number of missing values for current_feature
-        self.n_missing = 0
-
-    cdef inline void find_min_max(
-        self,
-        intp_t current_feature,
-        float32_t* min_feature_value_out,
-        float32_t* max_feature_value_out,
-    ) noexcept nogil:
-        """Find the minimum and maximum value for current_feature."""
-        cdef:
-            intp_t p
-            float32_t current_feature_value, min_feature_value, max_feature_value
-            float32_t[::1] feature_values = self.feature_values
-
-        self.extract_nnz(current_feature)
-
-        if self.end_negative != self.start_positive:
-            # There is a zero
-            min_feature_value = 0
-            max_feature_value = 0
-        else:
-            min_feature_value = feature_values[self.start]
-            max_feature_value = min_feature_value
-
-        # Find min, max in feature_values[start:end_negative]
-        for p in range(self.start, self.end_negative):
-            current_feature_value = feature_values[p]
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        # Update min, max given feature_values[start_positive:end]
-        for p in range(self.start_positive, self.end):
-            current_feature_value = feature_values[p]
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        min_feature_value_out[0] = min_feature_value
-        max_feature_value_out[0] = max_feature_value
-
-    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
-        """Compute the next p_prev and p for iteratiing over feature values."""
-        cdef:
-            intp_t p_next
-            float32_t[::1] feature_values = self.feature_values
-
-        if p[0] + 1 != self.end_negative:
-            p_next = p[0] + 1
-        else:
-            p_next = self.start_positive
-
-        while (p_next < self.end and
-                feature_values[p_next] <= feature_values[p[0]] + FEATURE_THRESHOLD):
-            p[0] = p_next
-            if p[0] + 1 != self.end_negative:
-                p_next = p[0] + 1
-            else:
-                p_next = self.start_positive
-
-        p_prev[0] = p[0]
-        p[0] = p_next
-
-    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
-        """Partition samples for feature_values at the current_threshold."""
-        return self._partition(current_threshold, self.start_positive)
-
-    cdef inline void partition_samples_final(
-        self,
-        intp_t best_pos,
-        float64_t best_threshold,
-        intp_t best_feature,
-        intp_t n_missing,
-    ) noexcept nogil:
-        """Partition samples for X at the best_threshold and best_feature."""
-        self.extract_nnz(best_feature)
-        self._partition(best_threshold, best_pos)
-
-    cdef inline intp_t _partition(self, float64_t threshold, intp_t zero_pos) noexcept nogil:
-        """Partition samples[start:end] based on threshold."""
-        cdef:
-            intp_t p, partition_end
-            intp_t[::1] index_to_samples = self.index_to_samples
-            float32_t[::1] feature_values = self.feature_values
-            intp_t[::1] samples = self.samples
-
-        if threshold < 0.:
-            p = self.start
-            partition_end = self.end_negative
-        elif threshold > 0.:
-            p = self.start_positive
-            partition_end = self.end
-        else:
-            # Data are already split
-            return zero_pos
-
-        while p < partition_end:
-            if feature_values[p] <= threshold:
-                p += 1
-
-            else:
-                partition_end -= 1
-
-                feature_values[p], feature_values[partition_end] = (
-                    feature_values[partition_end], feature_values[p]
-                )
-                sparse_swap(index_to_samples, samples, p, partition_end)
-
-        return partition_end
-
-    cdef inline void extract_nnz(self, intp_t feature) noexcept nogil:
-        """Extract and partition values for a given feature.
-
-        The extracted values are partitioned between negative values
-        feature_values[start:end_negative[0]] and positive values
-        feature_values[start_positive[0]:end].
-        The samples and index_to_samples are modified according to this
-        partition.
-
-        The extraction corresponds to the intersection between the arrays
-        X_indices[indptr_start:indptr_end] and samples[start:end].
-        This is done efficiently using either an index_to_samples based approach
-        or binary search based approach.
-
-        Parameters
-        ----------
-        feature : intp_t,
-            Index of the feature we want to extract non zero value.
-        """
-        cdef intp_t[::1] samples = self.samples
-        cdef float32_t[::1] feature_values = self.feature_values
-        cdef intp_t indptr_start = self.X_indptr[feature],
-        cdef intp_t indptr_end = self.X_indptr[feature + 1]
-        cdef intp_t n_indices = <intp_t>(indptr_end - indptr_start)
-        cdef intp_t n_samples = self.end - self.start
-        cdef intp_t[::1] index_to_samples = self.index_to_samples
-        cdef intp_t[::1] sorted_samples = self.sorted_samples
-        cdef const int32_t[::1] X_indices = self.X_indices
-        cdef const float32_t[::1] X_data = self.X_data
-
-        # Use binary search if n_samples * log(n_indices) <
-        # n_indices and index_to_samples approach otherwise.
-        # O(n_samples * log(n_indices)) is the running time of binary
-        # search and O(n_indices) is the running time of index_to_samples
-        # approach.
-        if ((1 - self.is_samples_sorted) * n_samples * log(n_samples) +
-                n_samples * log(n_indices) < EXTRACT_NNZ_SWITCH * n_indices):
-            extract_nnz_binary_search(X_indices, X_data,
-                                      indptr_start, indptr_end,
-                                      samples, self.start, self.end,
-                                      index_to_samples,
-                                      feature_values,
-                                      &self.end_negative, &self.start_positive,
-                                      sorted_samples, &self.is_samples_sorted)
-
-        # Using an index to samples  technique to extract non zero values
-        # index_to_samples is a mapping from X_indices to samples
-        else:
-            extract_nnz_index_to_samples(X_indices, X_data,
-                                         indptr_start, indptr_end,
-                                         samples, self.start, self.end,
-                                         index_to_samples,
-                                         feature_values,
-                                         &self.end_negative, &self.start_positive)
-
-
-cdef int compare_SIZE_t(const void* a, const void* b) noexcept nogil:
-    """Comparison function for sort.
-
-    This must return an `int` as it is used by stdlib's qsort, which expects
-    an `int` return value.
-    """
-    return <int>((<intp_t*>a)[0] - (<intp_t*>b)[0])
-
-
-cdef inline void binary_search(const int32_t[::1] sorted_array,
-                               int32_t start, int32_t end,
-                               intp_t value, intp_t* index,
-                               int32_t* new_start) noexcept nogil:
-    """Return the index of value in the sorted array.
-
-    If not found, return -1. new_start is the last pivot + 1
-    """
-    cdef int32_t pivot
-    index[0] = -1
-    while start < end:
-        pivot = start + (end - start) / 2
-
-        if sorted_array[pivot] == value:
-            index[0] = pivot
-            start = pivot + 1
-            break
-
-        if sorted_array[pivot] < value:
-            start = pivot + 1
-        else:
-            end = pivot
-    new_start[0] = start
-
-
-cdef inline void extract_nnz_index_to_samples(const int32_t[::1] X_indices,
-                                              const float32_t[::1] X_data,
-                                              int32_t indptr_start,
-                                              int32_t indptr_end,
-                                              intp_t[::1] samples,
-                                              intp_t start,
-                                              intp_t end,
-                                              intp_t[::1] index_to_samples,
-                                              float32_t[::1] feature_values,
-                                              intp_t* end_negative,
-                                              intp_t* start_positive) noexcept nogil:
-    """Extract and partition values for a feature using index_to_samples.
-
-    Complexity is O(indptr_end - indptr_start).
-    """
-    cdef int32_t k
-    cdef intp_t index
-    cdef intp_t end_negative_ = start
-    cdef intp_t start_positive_ = end
-
-    for k in range(indptr_start, indptr_end):
-        if start <= index_to_samples[X_indices[k]] < end:
-            if X_data[k] > 0:
-                start_positive_ -= 1
-                feature_values[start_positive_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, start_positive_)
-
-            elif X_data[k] < 0:
-                feature_values[end_negative_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, end_negative_)
-                end_negative_ += 1
-
-    # Returned values
-    end_negative[0] = end_negative_
-    start_positive[0] = start_positive_
-
-
-cdef inline void extract_nnz_binary_search(const int32_t[::1] X_indices,
-                                           const float32_t[::1] X_data,
-                                           int32_t indptr_start,
-                                           int32_t indptr_end,
-                                           intp_t[::1] samples,
-                                           intp_t start,
-                                           intp_t end,
-                                           intp_t[::1] index_to_samples,
-                                           float32_t[::1] feature_values,
-                                           intp_t* end_negative,
-                                           intp_t* start_positive,
-                                           intp_t[::1] sorted_samples,
-                                           bint* is_samples_sorted) noexcept nogil:
-    """Extract and partition values for a given feature using binary search.
-
-    If n_samples = end - start and n_indices = indptr_end - indptr_start,
-    the complexity is
-
-        O((1 - is_samples_sorted[0]) * n_samples * log(n_samples) +
-          n_samples * log(n_indices)).
-    """
-    cdef intp_t n_samples
-
-    if not is_samples_sorted[0]:
-        n_samples = end - start
-        memcpy(&sorted_samples[start], &samples[start],
-               n_samples * sizeof(intp_t))
-        qsort(&sorted_samples[start], n_samples, sizeof(intp_t),
-              compare_SIZE_t)
-        is_samples_sorted[0] = 1
-
-    while (indptr_start < indptr_end and
-           sorted_samples[start] > X_indices[indptr_start]):
-        indptr_start += 1
-
-    while (indptr_start < indptr_end and
-           sorted_samples[end - 1] < X_indices[indptr_end - 1]):
-        indptr_end -= 1
-
-    cdef intp_t p = start
-    cdef intp_t index
-    cdef intp_t k
-    cdef intp_t end_negative_ = start
-    cdef intp_t start_positive_ = end
-
-    while (p < end and indptr_start < indptr_end):
-        # Find index of sorted_samples[p] in X_indices
-        binary_search(X_indices, indptr_start, indptr_end,
-                      sorted_samples[p], &k, &indptr_start)
-
-        if k != -1:
-            # If k != -1, we have found a non zero value
-
-            if X_data[k] > 0:
-                start_positive_ -= 1
-                feature_values[start_positive_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, start_positive_)
-
-            elif X_data[k] < 0:
-                feature_values[end_negative_] = X_data[k]
-                index = index_to_samples[X_indices[k]]
-                sparse_swap(index_to_samples, samples, index, end_negative_)
-                end_negative_ += 1
-        p += 1
-
-    # Returned values
-    end_negative[0] = end_negative_
-    start_positive[0] = start_positive_
-
-
-cdef inline void sparse_swap(intp_t[::1] index_to_samples, intp_t[::1] samples,
-                             intp_t pos_1, intp_t pos_2) noexcept nogil:
-    """Swap sample pos_1 and pos_2 preserving sparse invariant."""
-    samples[pos_1], samples[pos_2] = samples[pos_2], samples[pos_1]
-    index_to_samples[samples[pos_1]] = pos_1
-    index_to_samples[samples[pos_2]] = pos_2
-
-
 cdef class BestSplitter(Splitter):
     """Splitter for finding the best split on dense data."""
     cdef DensePartitioner partitioner
diff --git a/sklearn/tree/meson.build b/sklearn/tree/meson.build
index 0fff299e32205..8ed696cd2481e 100644
--- a/sklearn/tree/meson.build
+++ b/sklearn/tree/meson.build
@@ -2,9 +2,15 @@ tree_extension_metadata = {
   '_tree':
     {'sources': ['_tree.pyx'],
      'override_options': ['cython_language=cpp', 'optimization=3']},
+  '_sort':
+    {'sources': ['_sort.pyx'],
+     'override_options': ['cython_language=cpp', 'optimization=3']},
   '_splitter':
     {'sources': ['_splitter.pyx'],
      'override_options': ['cython_language=cpp', 'optimization=3']},
+  '_partitioner':
+    {'sources': ['_partitioner.pyx'],
+     'override_options': ['cython_language=cpp', 'optimization=3']},
   '_criterion':
     {'sources': ['_criterion.pyx'],
      'override_options': ['cython_language=cpp', 'optimization=3']},

From 8e433a69303e7287e3fc032aa76f9bbf8297d087 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Fri, 5 Jul 2024 21:58:19 -0400
Subject: [PATCH 4/5] refactored partitioner

---
 sklearn/tree/_partitioner.pxd | 105 +++--
 sklearn/tree/_partitioner.pyx | 837 +++++++++++++++++++---------------
 2 files changed, 523 insertions(+), 419 deletions(-)

diff --git a/sklearn/tree/_partitioner.pxd b/sklearn/tree/_partitioner.pxd
index 880d9a2a52478..fd4e7c721424b 100644
--- a/sklearn/tree/_partitioner.pxd
+++ b/sklearn/tree/_partitioner.pxd
@@ -10,24 +10,51 @@ cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 # functions. The alternative would have been to use inheritance-based polymorphism
 # but it would have resulted in a ~10% overall tree fitting performance
 # degradation caused by the overhead frequent virtual method lookups.
-ctypedef fused Partitioner:
-    DensePartitioner
-    SparsePartitioner
+#ctypedef fused Partitioner:
+#    DensePartitioner
+#    SparsePartitioner
 
 
-cdef class DensePartitioner:
-    """Partitioner specialized for dense data.
+ctypedef void (*InitNodeSplitFunction)(
+    Partitioner partitioner, intp_t start, intp_t end
+) noexcept nogil
 
-    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
-    """
+ctypedef void (*SortSamplesAndFeatureValuesFunction)(
+    Partitioner partitioner, intp_t current_feature
+) noexcept nogil
+
+ctypedef void (*FindMinMaxFunction)(
+    Partitioner partitioner,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil
+
+ctypedef void (*NextPFunction)(
+    Partitioner partitioner, intp_t* p_prev, intp_t* p
+) noexcept nogil
+
+ctypedef intp_t (*PartitionSamplesFunction)(
+    Partitioner partitioner, float64_t current_threshold
+) noexcept nogil
+
+ctypedef void (*PartitionSamplesFinalFunction)(
+    Partitioner partitioner,
+    intp_t best_pos,
+    float64_t best_threshold,
+    intp_t best_feature,
+    intp_t best_n_missing,
+) noexcept nogil
+
+
+cdef class Partitioner:
     cdef:
-        const float32_t[:, :] X
-        cdef intp_t[::1] samples
-        cdef float32_t[::1] feature_values
-        cdef intp_t start
-        cdef intp_t end
-        cdef intp_t n_missing
-        cdef const unsigned char[::1] missing_values_in_feature_mask
+        intp_t[::1] samples
+        float32_t[::1] feature_values
+        intp_t start
+        intp_t end
+        intp_t n_missing
+        const unsigned char[::1] missing_values_in_feature_mask
 
         inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil
         inline void sort_samples_and_feature_values(
@@ -41,7 +68,7 @@ cdef class DensePartitioner:
             float32_t* max_feature_value_out,
         ) noexcept nogil
         inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil
-        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil
+        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil        
         inline void partition_samples_final(
             self,
             intp_t best_pos,
@@ -50,20 +77,29 @@ cdef class DensePartitioner:
             intp_t best_n_missing,
         ) noexcept nogil
 
+        InitNodeSplitFunction _init_node_split
+        SortSamplesAndFeatureValuesFunction _sort_samples_and_feature_values
+        FindMinMaxFunction _find_min_max
+        NextPFunction _next_p
+        PartitionSamplesFunction _partition_samples
+        PartitionSamplesFinalFunction _partition_samples_final
 
-cdef class SparsePartitioner:
-    """Partitioner specialized for sparse CSC data.
+
+cdef class DensePartitioner(Partitioner):
+    """Partitioner specialized for dense data.
 
     Note that this partitioner is agnostic to the splitting strategy (best vs. random).
     """
     cdef:
-        intp_t[::1] samples
-        float32_t[::1] feature_values
-        intp_t start
-        intp_t end
-        intp_t n_missing
-        const unsigned char[::1] missing_values_in_feature_mask
+        const float32_t[:, :] X
 
+
+cdef class SparsePartitioner(Partitioner):
+    """Partitioner specialized for sparse CSC data.
+
+    Note that this partitioner is agnostic to the splitting strategy (best vs. random).
+    """
+    cdef:
         const float32_t[::1] X_data
         const int32_t[::1] X_indices
         const int32_t[::1] X_indptr
@@ -76,26 +112,3 @@ cdef class SparsePartitioner:
         intp_t start_positive
         intp_t end_negative
         bint is_samples_sorted
-
-        inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil
-        inline void sort_samples_and_feature_values(
-            self,
-            intp_t current_feature
-        ) noexcept nogil
-        inline void find_min_max(
-            self,
-            intp_t current_feature,
-            float32_t* min_feature_value_out,
-            float32_t* max_feature_value_out,
-        ) noexcept nogil
-        inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil
-        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil
-        inline void partition_samples_final(
-            self,
-            intp_t best_pos,
-            float64_t best_threshold,
-            intp_t best_feature,
-            intp_t best_n_missing,
-        ) noexcept nogil
-        inline intp_t _partition(self, float64_t threshold, intp_t zero_pos) noexcept nogil
-        inline void extract_nnz(self, intp_t feature) noexcept nogil
diff --git a/sklearn/tree/_partitioner.pyx b/sklearn/tree/_partitioner.pyx
index e0a991577d56a..024360d16499e 100644
--- a/sklearn/tree/_partitioner.pyx
+++ b/sklearn/tree/_partitioner.pyx
@@ -9,8 +9,43 @@ import numpy as np
 from ._sort cimport sort, sparse_swap, swap, FEATURE_THRESHOLD
 
 
+cdef class Partitioner:
+    cdef:
+        inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
+            self._init_node_split(self, start, end)
+        
+        inline void sort_samples_and_feature_values(
+            self,
+            intp_t current_feature
+        ) noexcept nogil:
+            self._sort_samples_and_feature_values(self, current_feature)
+
+        inline void find_min_max(
+            self,
+            intp_t current_feature,
+            float32_t* min_feature_value_out,
+            float32_t* max_feature_value_out,
+        ) noexcept nogil:
+            self._find_min_max(self, current_feature, min_feature_value_out, max_feature_value_out)
+
+        inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
+            self._next_p(self, p_prev, p)
+
+        inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
+            return self._partition_samples(self, current_threshold)
+        
+        inline void partition_samples_final(
+            self,
+            intp_t best_pos,
+            float64_t best_threshold,
+            intp_t best_feature,
+            intp_t best_n_missing,
+        ) noexcept nogil:
+            self._partition_samples_final(self, best_pos, best_threshold, best_feature, best_n_missing)
+
+
 @final
-cdef class DensePartitioner:
+cdef class DensePartitioner(Partitioner):
     """Partitioner specialized for dense data.
 
     Note that this partitioner is agnostic to the splitting strategy (best vs. random).
@@ -27,189 +62,203 @@ cdef class DensePartitioner:
         self.feature_values = feature_values
         self.missing_values_in_feature_mask = missing_values_in_feature_mask
 
-    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
-        """Initialize splitter at the beginning of node_split."""
-        self.start = start
-        self.end = end
-        self.n_missing = 0
-
-    cdef inline void sort_samples_and_feature_values(
-        self, intp_t current_feature
-    ) noexcept nogil:
-        """Simultaneously sort based on the feature_values.
-
-        Missing values are stored at the end of feature_values.
-        The number of missing values observed in feature_values is stored
-        in self.n_missing.
-        """
-        cdef:
-            intp_t i, current_end
-            float32_t[::1] feature_values = self.feature_values
-            const float32_t[:, :] X = self.X
-            intp_t[::1] samples = self.samples
-            intp_t n_missing = 0
-            const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
-
-        # Sort samples along that feature; by
-        # copying the values into an array and
-        # sorting the array in a manner which utilizes the cache more
-        # effectively.
-        if missing_values_in_feature_mask is not None and missing_values_in_feature_mask[current_feature]:
-            i, current_end = self.start, self.end - 1
-            # Missing values are placed at the end and do not participate in the sorting.
-            while i <= current_end:
-                # Finds the right-most value that is not missing so that
-                # it can be swapped with missing values at its left.
-                if isnan(X[samples[current_end], current_feature]):
-                    n_missing += 1
-                    current_end -= 1
-                    continue
-
-                # X[samples[current_end], current_feature] is a non-missing value
-                if isnan(X[samples[i], current_feature]):
-                    samples[i], samples[current_end] = samples[current_end], samples[i]
-                    n_missing += 1
-                    current_end -= 1
-
-                feature_values[i] = X[samples[i], current_feature]
-                i += 1
-        else:
-            # When there are no missing values, we only need to copy the data into
-            # feature_values
-            for i in range(self.start, self.end):
-                feature_values[i] = X[samples[i], current_feature]
-
-        sort(&feature_values[self.start], &samples[self.start], self.end - self.start - n_missing)
-        self.n_missing = n_missing
+        self._init_node_split = dense_init_node_split
+        self._sort_samples_and_feature_values = dense_sort_samples_and_feature_values
+        self._find_min_max = dense_find_min_max
+        self._next_p = dense_next_p
+        self._partition_samples = dense_partition_samples
+        self._partition_samples_final = dense_partition_samples_final
+
+
+cdef inline void dense_init_node_split(
+    Partitioner self, intp_t start, intp_t end
+) noexcept nogil:
+    """Initialize splitter at the beginning of node_split."""
+    self.start = start
+    self.end = end
+    self.n_missing = 0
+
+cdef inline void dense_sort_samples_and_feature_values(
+    Partitioner self, intp_t current_feature
+) noexcept nogil:
+    """Simultaneously sort based on the feature_values.
+
+    Missing values are stored at the end of feature_values.
+    The number of missing values observed in feature_values is stored
+    in self.n_missing.
+    """
+    cdef:
+        intp_t i, current_end
+        float32_t[::1] feature_values = self.feature_values
+        const float32_t[:, :] X = (<DensePartitioner>self).X
+        intp_t[::1] samples = self.samples
+        intp_t n_missing = 0
+        const unsigned char[::1] missing_values_in_feature_mask = self.missing_values_in_feature_mask
+
+    # Sort samples along that feature; by
+    # copying the values into an array and
+    # sorting the array in a manner which utilizes the cache more
+    # effectively.
+    if missing_values_in_feature_mask is not None and missing_values_in_feature_mask[current_feature]:
+        i, current_end = self.start, self.end - 1
+        # Missing values are placed at the end and do not participate in the sorting.
+        while i <= current_end:
+            # Finds the right-most value that is not missing so that
+            # it can be swapped with missing values at its left.
+            if isnan(X[samples[current_end], current_feature]):
+                n_missing += 1
+                current_end -= 1
+                continue
+
+            # X[samples[current_end], current_feature] is a non-missing value
+            if isnan(X[samples[i], current_feature]):
+                samples[i], samples[current_end] = samples[current_end], samples[i]
+                n_missing += 1
+                current_end -= 1
+
+            feature_values[i] = X[samples[i], current_feature]
+            i += 1
+    else:
+        # When there are no missing values, we only need to copy the data into
+        # feature_values
+        for i in range(self.start, self.end):
+            feature_values[i] = X[samples[i], current_feature]
+
+    sort(&feature_values[self.start], &samples[self.start], self.end - self.start - n_missing)
+    self.n_missing = n_missing
+
+cdef inline void dense_find_min_max(
+    Partitioner self,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil:
+    """Find the minimum and maximum value for current_feature."""
+    cdef:
+        intp_t p
+        float32_t current_feature_value
+        const float32_t[:, :] X = (<DensePartitioner>self).X
+        intp_t[::1] samples = self.samples
+        float32_t min_feature_value = X[samples[self.start], current_feature]
+        float32_t max_feature_value = min_feature_value
+        float32_t[::1] feature_values = self.feature_values
+
+    feature_values[self.start] = min_feature_value
+
+    for p in range(self.start + 1, self.end):
+        current_feature_value = X[samples[p], current_feature]
+        feature_values[p] = current_feature_value
+
+        if current_feature_value < min_feature_value:
+            min_feature_value = current_feature_value
+        elif current_feature_value > max_feature_value:
+            max_feature_value = current_feature_value
+
+    min_feature_value_out[0] = min_feature_value
+    max_feature_value_out[0] = max_feature_value
+
+cdef inline void dense_next_p(
+    Partitioner self, intp_t* p_prev, intp_t* p
+) noexcept nogil:
+    """Compute the next p_prev and p for iteratiing over feature values.
+
+    The missing values are not included when iterating through the feature values.
+    """
+    cdef:
+        float32_t[::1] feature_values = self.feature_values
+        intp_t end_non_missing = self.end - self.n_missing
 
-    cdef inline void find_min_max(
-        self,
-        intp_t current_feature,
-        float32_t* min_feature_value_out,
-        float32_t* max_feature_value_out,
-    ) noexcept nogil:
-        """Find the minimum and maximum value for current_feature."""
-        cdef:
-            intp_t p
-            float32_t current_feature_value
-            const float32_t[:, :] X = self.X
-            intp_t[::1] samples = self.samples
-            float32_t min_feature_value = X[samples[self.start], current_feature]
-            float32_t max_feature_value = min_feature_value
-            float32_t[::1] feature_values = self.feature_values
-
-        feature_values[self.start] = min_feature_value
-
-        for p in range(self.start + 1, self.end):
-            current_feature_value = X[samples[p], current_feature]
-            feature_values[p] = current_feature_value
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        min_feature_value_out[0] = min_feature_value
-        max_feature_value_out[0] = max_feature_value
-
-    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
-        """Compute the next p_prev and p for iteratiing over feature values.
-
-        The missing values are not included when iterating through the feature values.
-        """
-        cdef:
-            float32_t[::1] feature_values = self.feature_values
-            intp_t end_non_missing = self.end - self.n_missing
-
-        while (
-            p[0] + 1 < end_non_missing and
-            feature_values[p[0] + 1] <= feature_values[p[0]] + FEATURE_THRESHOLD
-        ):
-            p[0] += 1
-
-        p_prev[0] = p[0]
-
-        # By adding 1, we have
-        # (feature_values[p] >= end) or (feature_values[p] > feature_values[p - 1])
+    while (
+        p[0] + 1 < end_non_missing and
+        feature_values[p[0] + 1] <= feature_values[p[0]] + FEATURE_THRESHOLD
+    ):
         p[0] += 1
 
-    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
-        """Partition samples for feature_values at the current_threshold."""
-        cdef:
-            intp_t p = self.start
-            intp_t partition_end = self.end
-            intp_t[::1] samples = self.samples
-            float32_t[::1] feature_values = self.feature_values
+    p_prev[0] = p[0]
+
+    # By adding 1, we have
+    # (feature_values[p] >= end) or (feature_values[p] > feature_values[p - 1])
+    p[0] += 1
+
+cdef inline intp_t dense_partition_samples(
+    Partitioner self, float64_t current_threshold
+) noexcept nogil:
+    """Partition samples for feature_values at the current_threshold."""
+    cdef:
+        intp_t p = self.start
+        intp_t partition_end = self.end
+        intp_t[::1] samples = self.samples
+        float32_t[::1] feature_values = self.feature_values
+
+    while p < partition_end:
+        if feature_values[p] <= current_threshold:
+            p += 1
+        else:
+            partition_end -= 1
 
+            feature_values[p], feature_values[partition_end] = (
+                feature_values[partition_end], feature_values[p]
+            )
+            samples[p], samples[partition_end] = samples[partition_end], samples[p]
+
+    return partition_end
+
+cdef inline void dense_partition_samples_final(
+    Partitioner self,
+    intp_t best_pos,
+    float64_t best_threshold,
+    intp_t best_feature,
+    intp_t best_n_missing,
+) noexcept nogil:
+    """Partition samples for X at the best_threshold and best_feature.
+
+    If missing values are present, this method partitions `samples`
+    so that the `best_n_missing` missing values' indices are in the
+    right-most end of `samples`, that is `samples[end_non_missing:end]`.
+    """
+    cdef:
+        # Local invariance: start <= p <= partition_end <= end
+        intp_t start = self.start
+        intp_t p = start
+        intp_t end = self.end - 1
+        intp_t partition_end = end - best_n_missing
+        intp_t[::1] samples = self.samples
+        const float32_t[:, :] X = (<DensePartitioner>self).X
+        float32_t current_value
+
+    if best_n_missing != 0:
+        # Move samples with missing values to the end while partitioning the
+        # non-missing samples
         while p < partition_end:
-            if feature_values[p] <= current_threshold:
+            # Keep samples with missing values at the end
+            if isnan(X[samples[end], best_feature]):
+                end -= 1
+                continue
+
+            # Swap sample with missing values with the sample at the end
+            current_value = X[samples[p], best_feature]
+            if isnan(current_value):
+                samples[p], samples[end] = samples[end], samples[p]
+                end -= 1
+
+                # The swapped sample at the end is always a non-missing value, so
+                # we can continue the algorithm without checking for missingness.
+                current_value = X[samples[p], best_feature]
+
+            # Partition the non-missing samples
+            if current_value <= best_threshold:
                 p += 1
             else:
+                samples[p], samples[partition_end] = samples[partition_end], samples[p]
                 partition_end -= 1
-
-                feature_values[p], feature_values[partition_end] = (
-                    feature_values[partition_end], feature_values[p]
-                )
+    else:
+        # Partitioning routine when there are no missing values
+        while p < partition_end:
+            if X[samples[p], best_feature] <= best_threshold:
+                p += 1
+            else:
                 samples[p], samples[partition_end] = samples[partition_end], samples[p]
-
-        return partition_end
-
-    cdef inline void partition_samples_final(
-        self,
-        intp_t best_pos,
-        float64_t best_threshold,
-        intp_t best_feature,
-        intp_t best_n_missing,
-    ) noexcept nogil:
-        """Partition samples for X at the best_threshold and best_feature.
-
-        If missing values are present, this method partitions `samples`
-        so that the `best_n_missing` missing values' indices are in the
-        right-most end of `samples`, that is `samples[end_non_missing:end]`.
-        """
-        cdef:
-            # Local invariance: start <= p <= partition_end <= end
-            intp_t start = self.start
-            intp_t p = start
-            intp_t end = self.end - 1
-            intp_t partition_end = end - best_n_missing
-            intp_t[::1] samples = self.samples
-            const float32_t[:, :] X = self.X
-            float32_t current_value
-
-        if best_n_missing != 0:
-            # Move samples with missing values to the end while partitioning the
-            # non-missing samples
-            while p < partition_end:
-                # Keep samples with missing values at the end
-                if isnan(X[samples[end], best_feature]):
-                    end -= 1
-                    continue
-
-                # Swap sample with missing values with the sample at the end
-                current_value = X[samples[p], best_feature]
-                if isnan(current_value):
-                    samples[p], samples[end] = samples[end], samples[p]
-                    end -= 1
-
-                    # The swapped sample at the end is always a non-missing value, so
-                    # we can continue the algorithm without checking for missingness.
-                    current_value = X[samples[p], best_feature]
-
-                # Partition the non-missing samples
-                if current_value <= best_threshold:
-                    p += 1
-                else:
-                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
-                    partition_end -= 1
-        else:
-            # Partitioning routine when there are no missing values
-            while p < partition_end:
-                if X[samples[p], best_feature] <= best_threshold:
-                    p += 1
-                else:
-                    samples[p], samples[partition_end] = samples[partition_end], samples[p]
-                    partition_end -= 1
+                partition_end -= 1
 
 
 @final
@@ -250,217 +299,259 @@ cdef class SparsePartitioner:
 
         self.missing_values_in_feature_mask = missing_values_in_feature_mask
 
-    cdef inline void init_node_split(self, intp_t start, intp_t end) noexcept nogil:
-        """Initialize splitter at the beginning of node_split."""
-        self.start = start
-        self.end = end
-        self.is_samples_sorted = 0
-        self.n_missing = 0
-
-    cdef inline void sort_samples_and_feature_values(
-        self, intp_t current_feature
-    ) noexcept nogil:
-        """Simultaneously sort based on the feature_values."""
-        cdef:
-            float32_t[::1] feature_values = self.feature_values
-            intp_t[::1] index_to_samples = self.index_to_samples
-            intp_t[::1] samples = self.samples
-
-        self.extract_nnz(current_feature)
-        # Sort the positive and negative parts of `feature_values`
-        sort(&feature_values[self.start], &samples[self.start], self.end_negative - self.start)
-        if self.start_positive < self.end:
-            sort(
-                &feature_values[self.start_positive],
-                &samples[self.start_positive],
-                self.end - self.start_positive
-            )
-
-        # Update index_to_samples to take into account the sort
-        for p in range(self.start, self.end_negative):
-            index_to_samples[samples[p]] = p
-        for p in range(self.start_positive, self.end):
-            index_to_samples[samples[p]] = p
-
-        # Add one or two zeros in feature_values, if there is any
-        if self.end_negative < self.start_positive:
-            self.start_positive -= 1
-            feature_values[self.start_positive] = 0.
-
-            if self.end_negative != self.start_positive:
-                feature_values[self.end_negative] = 0.
-                self.end_negative += 1
-
-        # XXX: When sparse supports missing values, this should be set to the
-        # number of missing values for current_feature
-        self.n_missing = 0
-
-    cdef inline void find_min_max(
-        self,
-        intp_t current_feature,
-        float32_t* min_feature_value_out,
-        float32_t* max_feature_value_out,
-    ) noexcept nogil:
-        """Find the minimum and maximum value for current_feature."""
-        cdef:
-            intp_t p
-            float32_t current_feature_value, min_feature_value, max_feature_value
-            float32_t[::1] feature_values = self.feature_values
-
-        self.extract_nnz(current_feature)
+        self._init_node_split = sparse_init_node_split
+        self._sort_samples_and_feature_values = sparse_sort_samples_and_feature_values
+        # self._find_min_max = sparse_find_min_max
+        # self._next_p = sparse_next_p
+        # self._partition_samples = sparse_partition_samples
+        # self._partition_samples_final = sparse_partition_samples_final
+
+
+cdef inline void sparse_init_node_split(Partitioner self, intp_t start, intp_t end) noexcept nogil:
+    """Initialize splitter at the beginning of node_split."""
+    self.start = start
+    self.end = end
+    (<SparsePartitioner>self).is_samples_sorted = 0
+    self.n_missing = 0
+
+
+cdef inline void sparse_sort_samples_and_feature_values(
+    Partitioner self, intp_t current_feature
+) noexcept nogil:
+    _sparse_sort_samples_and_feature_values(<SparsePartitioner>self, current_feature)
+
+
+cdef inline void _sparse_sort_samples_and_feature_values(
+    SparsePartitioner self, intp_t current_feature
+) noexcept nogil:
+    """Simultaneously sort based on the feature_values."""
+    cdef:
+        float32_t[::1] feature_values = self.feature_values
+        intp_t[::1] index_to_samples = self.index_to_samples
+        intp_t[::1] samples = self.samples
+
+    sparse_extract_nnz(self, current_feature)
+    # Sort the positive and negative parts of `feature_values`
+    sort(&feature_values[self.start], &samples[self.start], self.end_negative - self.start)
+    if self.start_positive < self.end:
+        sort(
+            &feature_values[self.start_positive],
+            &samples[self.start_positive],
+            self.end - self.start_positive
+        )
+
+    # Update index_to_samples to take into account the sort
+    for p in range(self.start, self.end_negative):
+        index_to_samples[samples[p]] = p
+    for p in range(self.start_positive, self.end):
+        index_to_samples[samples[p]] = p
+
+    # Add one or two zeros in feature_values, if there is any
+    if self.end_negative < self.start_positive:
+        self.start_positive -= 1
+        feature_values[self.start_positive] = 0.
 
         if self.end_negative != self.start_positive:
-            # There is a zero
-            min_feature_value = 0
-            max_feature_value = 0
-        else:
-            min_feature_value = feature_values[self.start]
-            max_feature_value = min_feature_value
-
-        # Find min, max in feature_values[start:end_negative]
-        for p in range(self.start, self.end_negative):
-            current_feature_value = feature_values[p]
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        # Update min, max given feature_values[start_positive:end]
-        for p in range(self.start_positive, self.end):
-            current_feature_value = feature_values[p]
-
-            if current_feature_value < min_feature_value:
-                min_feature_value = current_feature_value
-            elif current_feature_value > max_feature_value:
-                max_feature_value = current_feature_value
-
-        min_feature_value_out[0] = min_feature_value
-        max_feature_value_out[0] = max_feature_value
-
-    cdef inline void next_p(self, intp_t* p_prev, intp_t* p) noexcept nogil:
-        """Compute the next p_prev and p for iteratiing over feature values."""
-        cdef:
-            intp_t p_next
-            float32_t[::1] feature_values = self.feature_values
-
+            feature_values[self.end_negative] = 0.
+            self.end_negative += 1
+
+    # XXX: When sparse supports missing values, this should be set to the
+    # number of missing values for current_feature
+    self.n_missing = 0
+
+
+cdef inline void sparse_find_min_max(
+    Partitioner self,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil:
+    _sparse_find_min_max(
+        <SparsePartitioner>self,
+        current_feature,
+        min_feature_value_out,
+        max_feature_value_out
+    )
+
+cdef inline void _sparse_find_min_max(
+    SparsePartitioner self,
+    intp_t current_feature,
+    float32_t* min_feature_value_out,
+    float32_t* max_feature_value_out,
+) noexcept nogil:
+    """Find the minimum and maximum value for current_feature."""
+    cdef:
+        intp_t p
+        float32_t current_feature_value, min_feature_value, max_feature_value
+        float32_t[::1] feature_values = self.feature_values
+
+    sparse_extract_nnz(self, current_feature)
+
+    if self.end_negative != self.start_positive:
+        # There is a zero
+        min_feature_value = 0
+        max_feature_value = 0
+    else:
+        min_feature_value = feature_values[self.start]
+        max_feature_value = min_feature_value
+
+    # Find min, max in feature_values[start:end_negative]
+    for p in range(self.start, self.end_negative):
+        current_feature_value = feature_values[p]
+
+        if current_feature_value < min_feature_value:
+            min_feature_value = current_feature_value
+        elif current_feature_value > max_feature_value:
+            max_feature_value = current_feature_value
+
+    # Update min, max given feature_values[start_positive:end]
+    for p in range(self.start_positive, self.end):
+        current_feature_value = feature_values[p]
+
+        if current_feature_value < min_feature_value:
+            min_feature_value = current_feature_value
+        elif current_feature_value > max_feature_value:
+            max_feature_value = current_feature_value
+
+    min_feature_value_out[0] = min_feature_value
+    max_feature_value_out[0] = max_feature_value
+
+
+cdef inline void sparse_next_p(Partitioner self, intp_t* p_prev, intp_t* p) noexcept nogil:
+    _sparse_next_p(<SparsePartitioner>self, p_prev, p)
+
+
+cdef inline void _sparse_next_p(SparsePartitioner self, intp_t* p_prev, intp_t* p) noexcept nogil:
+    """Compute the next p_prev and p for iteratiing over feature values."""
+    cdef:
+        intp_t p_next
+        float32_t[::1] feature_values = self.feature_values
+
+    if p[0] + 1 != self.end_negative:
+        p_next = p[0] + 1
+    else:
+        p_next = self.start_positive
+
+    while (p_next < self.end and
+            feature_values[p_next] <= feature_values[p[0]] + FEATURE_THRESHOLD):
+        p[0] = p_next
         if p[0] + 1 != self.end_negative:
             p_next = p[0] + 1
         else:
             p_next = self.start_positive
 
-        while (p_next < self.end and
-                feature_values[p_next] <= feature_values[p[0]] + FEATURE_THRESHOLD):
-            p[0] = p_next
-            if p[0] + 1 != self.end_negative:
-                p_next = p[0] + 1
-            else:
-                p_next = self.start_positive
+    p_prev[0] = p[0]
+    p[0] = p_next
+
+
+cdef inline intp_t sparse_partition_samples(
+    Partitioner self, float64_t current_threshold
+) noexcept nogil:
+    """Partition samples for feature_values at the current_threshold."""
+    return sparse_partition(
+        <SparsePartitioner>self, current_threshold, (<SparsePartitioner>self).start_positive
+    )
+
+
+cdef inline void sparse_partition_samples_final(
+    Partitioner self,
+    intp_t best_pos,
+    float64_t best_threshold,
+    intp_t best_feature,
+    intp_t n_missing,
+) noexcept nogil:
+    """Partition samples for X at the best_threshold and best_feature."""
+    sparse_extract_nnz(<SparsePartitioner>self, best_feature)
+    sparse_partition(<SparsePartitioner>self, best_threshold, best_pos)
+
+
+cdef inline intp_t sparse_partition(SparsePartitioner self, float64_t threshold, intp_t zero_pos) noexcept nogil:
+    """Partition samples[start:end] based on threshold."""
+    cdef:
+        intp_t p, partition_end
+        intp_t[::1] index_to_samples = self.index_to_samples
+        float32_t[::1] feature_values = self.feature_values
+        intp_t[::1] samples = self.samples
+
+    if threshold < 0.:
+        p = self.start
+        partition_end = self.end_negative
+    elif threshold > 0.:
+        p = self.start_positive
+        partition_end = self.end
+    else:
+        # Data are already split
+        return zero_pos
+
+    while p < partition_end:
+        if feature_values[p] <= threshold:
+            p += 1
 
-        p_prev[0] = p[0]
-        p[0] = p_next
+        else:
+            partition_end -= 1
 
-    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
-        """Partition samples for feature_values at the current_threshold."""
-        return self._partition(current_threshold, self.start_positive)
+            feature_values[p], feature_values[partition_end] = (
+                feature_values[partition_end], feature_values[p]
+            )
+            sparse_swap(index_to_samples, samples, p, partition_end)
 
-    cdef inline void partition_samples_final(
-        self,
-        intp_t best_pos,
-        float64_t best_threshold,
-        intp_t best_feature,
-        intp_t n_missing,
-    ) noexcept nogil:
-        """Partition samples for X at the best_threshold and best_feature."""
-        self.extract_nnz(best_feature)
-        self._partition(best_threshold, best_pos)
-
-    cdef inline intp_t _partition(self, float64_t threshold, intp_t zero_pos) noexcept nogil:
-        """Partition samples[start:end] based on threshold."""
-        cdef:
-            intp_t p, partition_end
-            intp_t[::1] index_to_samples = self.index_to_samples
-            float32_t[::1] feature_values = self.feature_values
-            intp_t[::1] samples = self.samples
-
-        if threshold < 0.:
-            p = self.start
-            partition_end = self.end_negative
-        elif threshold > 0.:
-            p = self.start_positive
-            partition_end = self.end
-        else:
-            # Data are already split
-            return zero_pos
+    return partition_end
 
-        while p < partition_end:
-            if feature_values[p] <= threshold:
-                p += 1
 
-            else:
-                partition_end -= 1
+cdef inline void sparse_extract_nnz(SparsePartitioner self, intp_t feature) noexcept nogil:
+    """Extract and partition values for a given feature.
 
-                feature_values[p], feature_values[partition_end] = (
-                    feature_values[partition_end], feature_values[p]
-                )
-                sparse_swap(index_to_samples, samples, p, partition_end)
-
-        return partition_end
-
-    cdef inline void extract_nnz(self, intp_t feature) noexcept nogil:
-        """Extract and partition values for a given feature.
-
-        The extracted values are partitioned between negative values
-        feature_values[start:end_negative[0]] and positive values
-        feature_values[start_positive[0]:end].
-        The samples and index_to_samples are modified according to this
-        partition.
-
-        The extraction corresponds to the intersection between the arrays
-        X_indices[indptr_start:indptr_end] and samples[start:end].
-        This is done efficiently using either an index_to_samples based approach
-        or binary search based approach.
-
-        Parameters
-        ----------
-        feature : intp_t,
-            Index of the feature we want to extract non zero value.
-        """
-        cdef intp_t[::1] samples = self.samples
-        cdef float32_t[::1] feature_values = self.feature_values
-        cdef intp_t indptr_start = self.X_indptr[feature],
-        cdef intp_t indptr_end = self.X_indptr[feature + 1]
-        cdef intp_t n_indices = <intp_t>(indptr_end - indptr_start)
-        cdef intp_t n_samples = self.end - self.start
-        cdef intp_t[::1] index_to_samples = self.index_to_samples
-        cdef intp_t[::1] sorted_samples = self.sorted_samples
-        cdef const int32_t[::1] X_indices = self.X_indices
-        cdef const float32_t[::1] X_data = self.X_data
-
-        # Use binary search if n_samples * log(n_indices) <
-        # n_indices and index_to_samples approach otherwise.
-        # O(n_samples * log(n_indices)) is the running time of binary
-        # search and O(n_indices) is the running time of index_to_samples
-        # approach.
-        if ((1 - self.is_samples_sorted) * n_samples * log(n_samples) +
-                n_samples * log(n_indices) < EXTRACT_NNZ_SWITCH * n_indices):
-            extract_nnz_binary_search(X_indices, X_data,
-                                      indptr_start, indptr_end,
-                                      samples, self.start, self.end,
-                                      index_to_samples,
-                                      feature_values,
-                                      &self.end_negative, &self.start_positive,
-                                      sorted_samples, &self.is_samples_sorted)
-
-        # Using an index to samples  technique to extract non zero values
-        # index_to_samples is a mapping from X_indices to samples
-        else:
-            extract_nnz_index_to_samples(X_indices, X_data,
-                                         indptr_start, indptr_end,
-                                         samples, self.start, self.end,
-                                         index_to_samples,
-                                         feature_values,
-                                         &self.end_negative, &self.start_positive)
+    The extracted values are partitioned between negative values
+    feature_values[start:end_negative[0]] and positive values
+    feature_values[start_positive[0]:end].
+    The samples and index_to_samples are modified according to this
+    partition.
+
+    The extraction corresponds to the intersection between the arrays
+    X_indices[indptr_start:indptr_end] and samples[start:end].
+    This is done efficiently using either an index_to_samples based approach
+    or binary search based approach.
+
+    Parameters
+    ----------
+    feature : intp_t,
+        Index of the feature we want to extract non zero value.
+    """
+    cdef intp_t[::1] samples = self.samples
+    cdef float32_t[::1] feature_values = self.feature_values
+    cdef intp_t indptr_start = self.X_indptr[feature],
+    cdef intp_t indptr_end = self.X_indptr[feature + 1]
+    cdef intp_t n_indices = <intp_t>(indptr_end - indptr_start)
+    cdef intp_t n_samples = self.end - self.start
+    cdef intp_t[::1] index_to_samples = self.index_to_samples
+    cdef intp_t[::1] sorted_samples = self.sorted_samples
+    cdef const int32_t[::1] X_indices = self.X_indices
+    cdef const float32_t[::1] X_data = self.X_data
+
+    # Use binary search if n_samples * log(n_indices) <
+    # n_indices and index_to_samples approach otherwise.
+    # O(n_samples * log(n_indices)) is the running time of binary
+    # search and O(n_indices) is the running time of index_to_samples
+    # approach.
+    if ((1 - self.is_samples_sorted) * n_samples * log(n_samples) +
+            n_samples * log(n_indices) < EXTRACT_NNZ_SWITCH * n_indices):
+        extract_nnz_binary_search(X_indices, X_data,
+                                    indptr_start, indptr_end,
+                                    samples, self.start, self.end,
+                                    index_to_samples,
+                                    feature_values,
+                                    &self.end_negative, &self.start_positive,
+                                    sorted_samples, &self.is_samples_sorted)
+
+    # Using an index to samples  technique to extract non zero values
+    # index_to_samples is a mapping from X_indices to samples
+    else:
+        extract_nnz_index_to_samples(X_indices, X_data,
+                                        indptr_start, indptr_end,
+                                        samples, self.start, self.end,
+                                        index_to_samples,
+                                        feature_values,
+                                        &self.end_negative, &self.start_positive)
 
 
 cdef int compare_SIZE_t(const void* a, const void* b) noexcept nogil:

From 09a8ec5a94651911179f12d3009ae6a88ccc406a Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Fri, 5 Jul 2024 22:46:53 -0400
Subject: [PATCH 5/5] fixed some unintended commented out lines in
 SparsePartitioner

---
 sklearn/tree/_partitioner.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/tree/_partitioner.pyx b/sklearn/tree/_partitioner.pyx
index 024360d16499e..7f21e716272f4 100644
--- a/sklearn/tree/_partitioner.pyx
+++ b/sklearn/tree/_partitioner.pyx
@@ -301,10 +301,10 @@ cdef class SparsePartitioner:
 
         self._init_node_split = sparse_init_node_split
         self._sort_samples_and_feature_values = sparse_sort_samples_and_feature_values
-        # self._find_min_max = sparse_find_min_max
-        # self._next_p = sparse_next_p
-        # self._partition_samples = sparse_partition_samples
-        # self._partition_samples_final = sparse_partition_samples_final
+        self._find_min_max = sparse_find_min_max
+        self._next_p = sparse_next_p
+        self._partition_samples = sparse_partition_samples
+        self._partition_samples_final = sparse_partition_samples_final
 
 
 cdef inline void sparse_init_node_split(Partitioner self, intp_t start, intp_t end) noexcept nogil: