Skip to content

Commit

Permalink
ENH: beautify handling of nan in crf score
Browse files Browse the repository at this point in the history
  • Loading branch information
Candice Moyet committed Jul 24, 2023
1 parent fac0252 commit 867a386
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 48 deletions.
65 changes: 21 additions & 44 deletions mapie/conformity_scores/residual_conformity_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class ConformalResidualFittingScore(ConformityScore):
only with split and prefit methods (not with cross methods).
Warning : if the estimator provided is not fitted a subset of the
calibration data will be used to fit the model (50% by default).
calibration data will be used to fit the model (20% by default).
Parameters
----------
Expand Down Expand Up @@ -291,8 +291,6 @@ def _fit_residual_estimator(
X: NDArray,
y: NDArray,
y_pred: NDArray,
full_indexes: NDArray,
random_state: Optional[Union[int, np.random.RandomState]]
) -> Tuple[NDArray, NDArray]:
"""
Fit the residual estimator and returns the indexes used for the
Expand All @@ -309,47 +307,20 @@ def _fit_residual_estimator(
y_pred: NDArray
Predicted targets.
full_indexes: NDArray
Indexes used for the training of the estimator and the calibration.
random_state: Optional[Union[int, np.random.RandomState]]
Random state.
Returns
-------
Tuple[NDArray, NDArray]
- indexes needed for the calibration.
- indexes used for the training of the base estimator.
RegressorMixin
Fitted residual estimator
"""
(X_res_indexes,
X_cal_indexes,
y_res_indexes,
y_cal_indexes) = train_test_split(
full_indexes,
full_indexes,
test_size=self.split_size,
random_state=random_state,
)

residuals = np.abs(np.subtract(
y[y_res_indexes],
y_pred[y_res_indexes]
))
residual_estimator_targets = np.log(np.maximum(
residuals = np.abs(np.subtract(y, y_pred))
targets = np.log(np.maximum(
residuals,
np.full(residuals.shape, self.eps)
))

residual_estimator_ = residual_estimator_.fit(
X[X_res_indexes],
residual_estimator_targets
)
residual_estimator_ = residual_estimator_.fit(X, targets)

cal_index = X_cal_indexes
train_index = list(set(np.arange(y_pred.shape[0])) - set(cal_index))

self.residual_estimator_ = residual_estimator_

return cal_index, np.array(train_index)
return residual_estimator_

def get_signed_conformity_scores(
self,
Expand All @@ -375,28 +346,34 @@ def get_signed_conformity_scores(
).reshape((-1,))

if not self.prefit:
cal_indexes, train_indexes = self._fit_residual_estimator(
clone(self.residual_estimator_), X, y, y_pred, full_indexes,
random_state
cal_indexes, res_indexes = train_test_split(
full_indexes,
test_size=self.split_size,
random_state=random_state,
)
self.residual_estimator_ = self._fit_residual_estimator(
clone(self.residual_estimator_),
X[res_indexes], y[res_indexes], y_pred[res_indexes]
)
else:
cal_indexes = full_indexes
train_indexes = np.argwhere(np.isnan(y_pred)).reshape((-1,))

residuals_pred = np.maximum(
np.exp(self.residual_estimator_.predict(X[cal_indexes])),
self.eps
)
signed_conformity_scores = np.divide(
np.subtract(y[cal_indexes], y_pred[cal_indexes]),
np.abs(np.subtract(y[cal_indexes], y_pred[cal_indexes])),
residuals_pred
)

# reconstruct array with nan and conformity scores
complete_signed_cs = np.zeros_like(y_pred, dtype=float)
complete_signed_cs = np.full(
y_pred.shape, fill_value=np.nan, dtype=float
)
complete_signed_cs[cal_indexes] = signed_conformity_scores
complete_signed_cs[train_indexes] = np.nan
return signed_conformity_scores

return complete_signed_cs

def get_estimation_distribution(
self,
Expand Down
6 changes: 2 additions & 4 deletions mapie/tests/test_conformity_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

Expand Down Expand Up @@ -254,7 +253,7 @@ def test_crf_conformity_score_get_conformity_scores(y_pred: NDArray) -> None:
X_toy, y_toy, y_pred
)
expected_signed_conf_scores = np.array(
[0.38167789, 0.]
[np.nan, np.nan, 1.e+08, 1.e+08, 0.e+00, 3.e+08]
)
np.testing.assert_allclose(conf_scores, expected_signed_conf_scores)

Expand All @@ -276,8 +275,7 @@ def test_crf_score_prefit_with_default_params() -> None:
conf_scores = crf_conf_score.get_conformity_scores(
X_toy, y_toy, y_pred_list
)
_, X, _, y = train_test_split(X_toy, y_toy, test_size=0.2)
crf_conf_score.get_estimation_distribution(X, y, conf_scores)
crf_conf_score.get_estimation_distribution(X_toy, y_toy, conf_scores)


def test_invalid_estimator() -> None:
Expand Down

0 comments on commit 867a386

Please sign in to comment.