From 1eadfa56283557eb6106232ad7eb5e84e6b5600f Mon Sep 17 00:00:00 2001
From: Candice Moyet <cmoyet@quantmetry.com>
Date: Mon, 10 Jul 2023 10:41:38 +0200
Subject: [PATCH] FIX: estimator interface

---
 mapie/estimator/estimator.py |  11 +-
 mapie/estimator/interface.py | 256 -----------------------------------
 2 files changed, 3 insertions(+), 264 deletions(-)

diff --git a/mapie/estimator/estimator.py b/mapie/estimator/estimator.py
index 167bd2a63..ed05b850d 100644
--- a/mapie/estimator/estimator.py
+++ b/mapie/estimator/estimator.py
@@ -205,13 +205,8 @@ def _fit_oof_estimator(
 
         Returns
         -------
-        Tuple[RegressorMixin, NDArray, ArrayLike]
-
-        - [0]: RegressorMixin, fitted estimator
-        - [1]: NDArray of shape (n_samples_val,),
-          estimator predictions on the validation fold.
-        - [2]: ArrayLike of shape (n_samples_val,),
-          validation data indices.
+        RegressorMixin
+            Fitted estimator.
         """
         X_train = _safe_indexing(X, train_index)
         y_train = _safe_indexing(y, train_index)
@@ -229,7 +224,7 @@ def _predict_oof_estimator(
         estimator: RegressorMixin,
         X: ArrayLike,
         val_index: ArrayLike,
-    ):
+    ) -> Tuple[NDArray, ArrayLike]:
         """
         Perform predictions on a single out-of-fold model on a validation set.
 
diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py
index 25435fe6a..dafbb382d 100644
--- a/mapie/estimator/interface.py
+++ b/mapie/estimator/interface.py
@@ -12,263 +12,7 @@ class EnsembleEstimator(RegressorMixin):
     This class implements methods to handle the training and usage of the
     estimator. This estimator can be unique or composed by cross validated
     estimators.
-
-    Parameters
-    ----------
-    estimator: Optional[RegressorMixin]
-        Any regressor with scikit-learn API
-        (i.e. with ``fit`` and ``predict`` methods).
-        If ``None``, estimator defaults to a ``LinearRegression`` instance.
-
-        By default ``None``.
-
-    method: str
-        Method to choose for prediction interval estimates.
-        Choose among:
-
-        - ``"naive"``, based on training set conformity scores,
-        - ``"base"``, based on validation sets conformity scores,
-        - ``"plus"``, based on validation conformity scores and
-          testing predictions,
-        - ``"minmax"``, based on validation conformity scores and
-          testing predictions (min/max among cross-validation clones).
-
-        By default ``"plus"``.
-
-    cv: Optional[Union[int, str, BaseCrossValidator]]
-        The cross-validation strategy for computing conformity scores.
-        It directly drives the distinction between jackknife and cv variants.
-        Choose among:
-
-        - ``None``, to use the default 5-fold cross-validation
-        - integer, to specify the number of folds.
-          If equal to ``-1``, equivalent to
-          ``sklearn.model_selection.LeaveOneOut()``.
-        - CV splitter: any ``sklearn.model_selection.BaseCrossValidator``
-          Main variants are:
-            - ``sklearn.model_selection.LeaveOneOut`` (jackknife),
-            - ``sklearn.model_selection.KFold`` (cross-validation),
-            - ``subsample.Subsample`` object (bootstrap).
-        - ``"split"``, does not involve cross-validation but a division
-          of the data into training and calibration subsets. The splitter
-          used is the following: ``sklearn.model_selection.ShuffleSplit``.
-        - ``"prefit"``, assumes that ``estimator`` has been fitted already,
-          and the ``method`` parameter is ignored.
-          All data provided in the ``fit`` method is then used
-          for computing conformity scores only.
-          At prediction time, quantiles of these conformity scores are used
-          to provide a prediction interval with fixed width.
-          The user has to take care manually that data for model fitting and
-          conformity scores estimate are disjoint.
-
-        By default ``None``.
-
-    test_size: Optional[Union[int, float]]
-        If ``float``, should be between ``0.0`` and ``1.0`` and represent the
-        proportion of the dataset to include in the test split. If ``int``,
-        represents the absolute number of test samples. If ``None``,
-        it will be set to ``0.1``.
-
-        If cv is not ``"split"``, ``test_size`` is ignored.
-
-        By default ``None``.
-
-    n_jobs: Optional[int]
-        Number of jobs for parallel processing using joblib
-        via the "locky" backend.
-        If ``-1`` all CPUs are used.
-        If ``1`` is given, no parallel computing code is used at all,
-        which is useful for debugging.
-        For ``n_jobs`` below ``-1``, ``(n_cpus + 1 - n_jobs)`` are used.
-        ``None`` is a marker for `unset` that will be interpreted as
-        ``n_jobs=1`` (sequential execution).
-
-        By default ``None``.
-
-    agg_function: Optional[str]
-        Determines how to aggregate predictions from perturbed models, both at
-        training and prediction time.
-
-        If ``None``, it is ignored except if ``cv`` class is ``Subsample``,
-        in which case an error is raised.
-        If ``"mean"`` or ``"median"``, returns the mean or median of the
-        predictions computed from the out-of-folds models.
-        Note: if you plan to set the ``ensemble`` argument to ``True`` in the
-        ``predict`` method, you have to specify an aggregation function.
-        Otherwise an error would be raised.
-
-        The Jackknife+ interval can be interpreted as an interval around the
-        median prediction, and is guaranteed to lie inside the interval,
-        unlike the single estimator predictions.
-
-        When the cross-validation strategy is ``Subsample`` (i.e. for the
-        Jackknife+-after-Bootstrap method), this function is also used to
-        aggregate the training set in-sample predictions.
-
-        If ``cv`` is ``"prefit"`` or ``"split"``, ``agg_function`` is ignored.
-
-        By default ``"mean"``.
-
-    verbose: int
-        The verbosity level, used with joblib for multiprocessing.
-        The frequency of the messages increases with the verbosity level.
-        If it more than ``10``, all iterations are reported.
-        Above ``50``, the output is sent to stdout.
-
-        By default ``0``.
-
-    random_state: Optional[Union[int, RandomState]]
-        Pseudo random number generator state used for random sampling.
-        Pass an int for reproducible output across multiple function calls.
-
-        By default ``None``.
-
-    Attributes
-    ----------
-    single_estimator_: sklearn.RegressorMixin
-        Estimator fitted on the whole training set.
-
-    estimators_: list
-        List of out-of-folds estimators.
-
-    k_: ArrayLike
-        - Array of nans, of shape (len(y), 1) if ``cv`` is ``"prefit"``
-          (defined but not used)
-        - Dummy array of folds containing each training sample, otherwise.
-          Of shape (n_samples_train, cv.get_n_splits(X_train, y_train)).
     """
-    no_agg_cv_ = ["prefit", "split"]
-    no_agg_methods_ = ["naive", "base"]
-    fit_attributes = [
-        "single_estimator_",
-        "estimators_",
-        "k_",
-    ]
-
-    @staticmethod
-    def _fit_oof_estimator(
-        estimator: RegressorMixin,
-        X: ArrayLike,
-        y: ArrayLike,
-        train_index: ArrayLike,
-        sample_weight: Optional[ArrayLike] = None,
-    ) -> RegressorMixin:
-        """
-        Fit a single out-of-fold model on a given training set.
-
-        Parameters
-        ----------
-        estimator: RegressorMixin
-            Estimator to train.
-
-        X: ArrayLike of shape (n_samples, n_features)
-            Input data.
-
-        y: ArrayLike of shape (n_samples,)
-            Input labels.
-
-        train_index: ArrayLike of shape (n_samples_train)
-            Training data indices.
-
-        sample_weight: Optional[ArrayLike] of shape (n_samples,)
-            Sample weights. If None, then samples are equally weighted.
-            By default ``None``.
-
-        Returns
-        -------
-        Tuple[RegressorMixin, NDArray, ArrayLike]
-
-        - [0]: RegressorMixin, fitted estimator
-        - [1]: NDArray of shape (n_samples_val,),
-          estimator predictions on the validation fold.
-        - [2]: ArrayLike of shape (n_samples_val,),
-          validation data indices.
-        """
-
-    @staticmethod
-    def _predict_oof_estimator(
-        estimator: RegressorMixin,
-        X: ArrayLike,
-        val_index: ArrayLike,
-    ):
-        """
-        Perform predictions on a single out-of-fold model on a validation set.
-
-        Parameters
-        ----------
-        estimator: RegressorMixin
-            Estimator to train.
-
-        X: ArrayLike of shape (n_samples, n_features)
-            Input data.
-
-        val_index: ArrayLike of shape (n_samples_val)
-            Validation data indices.
-
-        Returns
-        -------
-        Tuple[NDArray, ArrayLike]
-            Predictions of estimator from val_index of X.
-        """
-
-    def _aggregate_with_mask(
-        self,
-        x: NDArray,
-        k: NDArray
-    ) -> NDArray:
-        """
-        Take the array of predictions, made by the refitted estimators,
-        on the testing set, and the 1-or-nan array indicating for each training
-        sample which one to integrate, and aggregate to produce phi-{t}(x_t)
-        for each training sample x_t.
-
-        Parameters
-        ----------
-        x: ArrayLike of shape (n_samples_test, n_estimators)
-            Array of predictions, made by the refitted estimators,
-            for each sample of the testing set.
-
-        k: ArrayLike of shape (n_samples_training, n_estimators)
-            1-or-nan array: indicates whether to integrate the prediction
-            of a given estimator into the aggregation, for each training
-            sample.
-
-        Returns
-        -------
-        ArrayLike of shape (n_samples_test,)
-            Array of aggregated predictions for each testing sample.
-        """
-
-    def _pred_multi(self, X: ArrayLike) -> NDArray:
-        """
-        Return a prediction per train sample for each test sample, by
-        aggregation with matrix ``k_``.
-
-        Parameters
-        ----------
-        X: ArrayLike of shape (n_samples_test, n_features)
-            Input data
-
-        Returns
-        -------
-        NDArray of shape (n_samples_test, n_samples_train)
-        """
-
-    def predict_calib(self, X: ArrayLike) -> NDArray:
-        """
-        Perform predictions on X : the calibration set. This method is
-        called in the ConformityScore class to compute the conformity scores.
-
-        Parameters
-        ----------
-        X: ArrayLike of shape (n_samples_test, n_features)
-            Input data
-
-        Returns
-        -------
-        NDArray of shape (n_samples_test, 1)
-            The predictions.
-        """
 
     def fit(
         self,