From 27fa741d427747c955dcebb355b0857cdf9ca716 Mon Sep 17 00:00:00 2001 From: Gianluca Detommaso Date: Thu, 14 Dec 2023 10:51:40 +0100 Subject: [PATCH] edits to temp scaling methods --- .../breast_cancer_temp_scaling.py | 46 ++++++----- fortuna/calibration/__init__.py | 6 +- .../temp_scaling/base.py | 60 +++++++++++++- .../temp_scaling/bias_binary_temp_scaling.py | 10 ++- .../temp_scaling/brier_binary_temp_scaling.py | 20 +++++ .../crossentropy_binary_temp_scaling.py | 28 ++++--- .../temp_scaling/f1_temp_scaling.py | 25 ++++-- .../temp_scaling/mse_binary_temp_scaling.py | 12 --- .../classification/temp_scaling/base.py | 79 +++++++++++++++---- fortuna/plot.py | 18 ++++- pyproject.toml | 2 +- 11 files changed, 230 insertions(+), 76 deletions(-) create mode 100644 fortuna/calibration/binary_classification/temp_scaling/brier_binary_temp_scaling.py delete mode 100644 fortuna/calibration/binary_classification/temp_scaling/mse_binary_temp_scaling.py diff --git a/benchmarks/calibration/temp_scaling/breast_cancer_temp_scaling.py b/benchmarks/calibration/temp_scaling/breast_cancer_temp_scaling.py index 59d98c89..33649ec9 100644 --- a/benchmarks/calibration/temp_scaling/breast_cancer_temp_scaling.py +++ b/benchmarks/calibration/temp_scaling/breast_cancer_temp_scaling.py @@ -6,10 +6,10 @@ from fortuna.calibration import ( BiasBinaryClassificationTemperatureScaling, + BrierBinaryClassificationTemperatureScaling, ClassificationTemperatureScaling, CrossEntropyBinaryClassificationTemperatureScaling, F1BinaryClassificationTemperatureScaling, - MSEBinaryClassificationTemperatureScaling, ) from fortuna.metric.classification import ( brier_score, @@ -73,22 +73,26 @@ def binary_cross_entropy(probs: np.array, targets: np.ndarray) -> float: before_f1 = f1(test_probs, test_targets) before_ce = binary_cross_entropy(test_probs, test_targets) - mse_temp_scaler = MSEBinaryClassificationTemperatureScaling() - mse_temp_scaler.fit(probs=calib_probs, targets=calib_targets) - mse_temp_scaled_test_probs = mse_temp_scaler.predict_proba(probs=test_probs) - mse_temp_scaled_test_preds = mse_temp_scaler.predict(probs=test_probs) - mse_temp_scaled_brier_score = brier_score(mse_temp_scaled_test_probs, test_targets) - mse_temp_scaled_ece = expected_calibration_error( + brier_temp_scaler = BrierBinaryClassificationTemperatureScaling() + brier_temp_scaler.fit(probs=calib_probs, targets=calib_targets) + brier_temp_scaled_test_probs = brier_temp_scaler.predict_proba(probs=test_probs) + brier_temp_scaled_test_preds = brier_temp_scaler.predict(probs=test_probs) + brier_temp_scaled_brier_score = brier_score( + brier_temp_scaled_test_probs, test_targets + ) + brier_temp_scaled_ece = expected_calibration_error( probs=np.stack( - (1 - mse_temp_scaled_test_probs, mse_temp_scaled_test_probs), axis=1 + (1 - brier_temp_scaled_test_probs, brier_temp_scaled_test_probs), axis=1 ), - preds=mse_temp_scaled_test_preds, + preds=brier_temp_scaled_test_preds, targets=test_targets, ) - mse_temp_scaled_prec = precision(mse_temp_scaled_test_preds, test_targets) - mse_temp_scaled_rec = recall(mse_temp_scaled_test_preds, test_targets) - mse_temp_scaled_f1 = f1(mse_temp_scaled_test_preds, test_targets) - mse_temp_scaled_ce = binary_cross_entropy(mse_temp_scaled_test_probs, test_targets) + brier_temp_scaled_prec = precision(brier_temp_scaled_test_preds, test_targets) + brier_temp_scaled_rec = recall(brier_temp_scaled_test_preds, test_targets) + brier_temp_scaled_f1 = f1(brier_temp_scaled_test_preds, test_targets) + brier_temp_scaled_ce = binary_cross_entropy( + brier_temp_scaled_test_probs, test_targets + ) ce_temp_scaler = CrossEntropyBinaryClassificationTemperatureScaling() ce_temp_scaler.fit(probs=calib_probs, targets=calib_targets) @@ -185,13 +189,13 @@ def binary_cross_entropy(probs: np.array, targets: np.ndarray) -> float: before_f1, ], [ - "MSE binary temperature scaling", - mse_temp_scaled_brier_score, - mse_temp_scaled_ce, - mse_temp_scaled_ece, - mse_temp_scaled_prec, - mse_temp_scaled_rec, - mse_temp_scaled_f1, + "Brier binary temperature scaling", + brier_temp_scaled_brier_score, + brier_temp_scaled_ce, + brier_temp_scaled_ece, + brier_temp_scaled_prec, + brier_temp_scaled_rec, + brier_temp_scaled_f1, ], [ "Cross-Entropy binary temperature scaling", @@ -246,7 +250,7 @@ def binary_cross_entropy(probs: np.array, targets: np.ndarray) -> float: print( tabulate( [ - ["MSE binary temperature scaling", mse_temp_scaler.temperature], + ["Brier binary temperature scaling", brier_temp_scaler.temperature], [ "Cross-Entropy binary temperature scaling", ce_temp_scaler.temperature, diff --git a/fortuna/calibration/__init__.py b/fortuna/calibration/__init__.py index f1c7f287..ff2aaa10 100644 --- a/fortuna/calibration/__init__.py +++ b/fortuna/calibration/__init__.py @@ -1,15 +1,15 @@ from fortuna.calibration.binary_classification.temp_scaling.bias_binary_temp_scaling import ( BiasBinaryClassificationTemperatureScaling, ) +from fortuna.calibration.binary_classification.temp_scaling.brier_binary_temp_scaling import ( + BrierBinaryClassificationTemperatureScaling, +) from fortuna.calibration.binary_classification.temp_scaling.crossentropy_binary_temp_scaling import ( CrossEntropyBinaryClassificationTemperatureScaling, ) from fortuna.calibration.binary_classification.temp_scaling.f1_temp_scaling import ( F1BinaryClassificationTemperatureScaling, ) -from fortuna.calibration.binary_classification.temp_scaling.mse_binary_temp_scaling import ( - MSEBinaryClassificationTemperatureScaling, -) from fortuna.calibration.classification.temp_scaling.base import ( ClassificationTemperatureScaling, ) diff --git a/fortuna/calibration/binary_classification/temp_scaling/base.py b/fortuna/calibration/binary_classification/temp_scaling/base.py index 7333887f..85a8aa5b 100644 --- a/fortuna/calibration/binary_classification/temp_scaling/base.py +++ b/fortuna/calibration/binary_classification/temp_scaling/base.py @@ -8,15 +8,67 @@ def __init__(self): self._temperature = None @abc.abstractmethod - def fit(self, probs: np.ndarray, targets: np.ndarray, **kwargs): + def fit(self, probs: np.ndarray, targets: np.ndarray): + """ + Fit the temperature scaling method. + + Parameters + ---------- + probs: np.ndarray + A one-dimensional probabilities of positive target variables for each input. + targets: np.ndarray + A one-dimensional array of integer target variables for each input. + """ pass - def predict_proba(self, probs: np.ndarray): + def predict_proba(self, probs: np.ndarray) -> np.ndarray: + """ + Predict the scaled probabilities for each input. + + Parameters + ---------- + probs: np.ndarray + A one-dimensional probabilities of positive target variables for each input. + Returns + ------- + np.ndarray + The predicted probabilities + """ + self._check_probs(probs) return np.clip(probs / self._temperature, 0.0, 1.0) - def predict(self, probs: np.ndarray): - return (self.predict_proba(probs) >= 0.5).astype(int) + def predict(self, probs: np.ndarray, threshold: float = 0.5) -> np.ndarray: + """ + Predict the target variable for each input. + + Parameters + ---------- + probs: np.ndarray + A one-dimensional probabilities of positive target variables for each input. + threshold: np.ndarray + The threshold on the predicted probabilities do decide whether a target variable is positive or + negative. + + Returns + ------- + np.ndarray + The predicted target variables. + """ + self._check_probs(probs) + return (self.predict_proba(probs) >= threshold).astype(int) @property def temperature(self): return self._temperature + + @staticmethod + def _check_probs(probs: np.ndarray): + if probs.ndim != 1: + raise ValueError("The array of probabilities must be one-dimensional.") + + @staticmethod + def _check_targets(targets: np.ndarray): + if targets.ndim != 1: + raise ValueError("The array of targets must be one-dimensional.") + if targets.dtype != int: + raise ValueError("Each element in the array of targets must be an integer.") diff --git a/fortuna/calibration/binary_classification/temp_scaling/bias_binary_temp_scaling.py b/fortuna/calibration/binary_classification/temp_scaling/bias_binary_temp_scaling.py index ee6d874b..56328a1d 100644 --- a/fortuna/calibration/binary_classification/temp_scaling/bias_binary_temp_scaling.py +++ b/fortuna/calibration/binary_classification/temp_scaling/bias_binary_temp_scaling.py @@ -8,5 +8,13 @@ class BiasBinaryClassificationTemperatureScaling( BaseBinaryClassificationTemperatureScaling ): - def fit(self, probs: np.ndarray, targets: np.ndarray, **kwargs): + """ + A temperature scaling class for binary classification. + It scales the probability that the target variables is positive with a single learnable parameters. + The method minimizes the expected bias. + """ + + def fit(self, probs: np.ndarray, targets: np.ndarray): + self._check_probs(probs) + self._check_targets(targets) self._temperature = np.mean(probs) / np.mean(targets) diff --git a/fortuna/calibration/binary_classification/temp_scaling/brier_binary_temp_scaling.py b/fortuna/calibration/binary_classification/temp_scaling/brier_binary_temp_scaling.py new file mode 100644 index 00000000..ddd85cad --- /dev/null +++ b/fortuna/calibration/binary_classification/temp_scaling/brier_binary_temp_scaling.py @@ -0,0 +1,20 @@ +import numpy as np + +from fortuna.calibration.binary_classification.temp_scaling.base import ( + BaseBinaryClassificationTemperatureScaling, +) + + +class BrierBinaryClassificationTemperatureScaling( + BaseBinaryClassificationTemperatureScaling +): + """ + A temperature scaling class for binary classification. + It scales the probability that the target variables is positive with a single learnable parameters. + The method attempts to minimize the MSE, or Brier score. + """ + + def fit(self, probs: np.ndarray, targets: np.ndarray): + self._check_probs(probs) + self._check_targets(targets) + self._temperature = np.mean(probs**2) / np.mean(probs * targets) diff --git a/fortuna/calibration/binary_classification/temp_scaling/crossentropy_binary_temp_scaling.py b/fortuna/calibration/binary_classification/temp_scaling/crossentropy_binary_temp_scaling.py index bbe4d1a7..6dc82544 100644 --- a/fortuna/calibration/binary_classification/temp_scaling/crossentropy_binary_temp_scaling.py +++ b/fortuna/calibration/binary_classification/temp_scaling/crossentropy_binary_temp_scaling.py @@ -1,7 +1,5 @@ -from typing import Dict - import numpy as np -from scipy.optimize import newton +from scipy.optimize import brute from fortuna.calibration.binary_classification.temp_scaling.base import ( BaseBinaryClassificationTemperatureScaling, @@ -11,12 +9,22 @@ class CrossEntropyBinaryClassificationTemperatureScaling( BaseBinaryClassificationTemperatureScaling ): - def fit(self, probs: np.ndarray, targets: np.ndarray, **kwargs) -> Dict: - scaled_probs = (1 - 1e-6) * (1e-6 + probs) + """ + A temperature scaling class for binary classification. + It scales the probability that the target variables is positive with a single learnable parameters. + The method minimizes the binary cross-entropy loss. + """ + + def fit(self, probs: np.ndarray, targets: np.ndarray): + self._check_probs(probs) + self._check_targets(targets) - def temp_scaling_fn(phi): - return np.mean((1 - targets) / (1 - scaled_probs * np.exp(-phi))) - 1 + def temp_scaling_fn(tau): + temp_probs = np.clip(probs / tau, 1e-9, 1 - 1e-9) + return -np.mean( + targets * np.log(temp_probs) + (1 - targets) * np.log(1 - temp_probs) + ) - phi, status = newton(temp_scaling_fn, x0=0.0, full_output=True, disp=False) - self._temperature = np.exp(phi) - return status + self._temperature = brute( + temp_scaling_fn, ranges=[(np.min(probs), 10)], Ns=1000 + )[0] diff --git a/fortuna/calibration/binary_classification/temp_scaling/f1_temp_scaling.py b/fortuna/calibration/binary_classification/temp_scaling/f1_temp_scaling.py index a0a20f5c..3994b152 100644 --- a/fortuna/calibration/binary_classification/temp_scaling/f1_temp_scaling.py +++ b/fortuna/calibration/binary_classification/temp_scaling/f1_temp_scaling.py @@ -1,14 +1,29 @@ import numpy as np from scipy.optimize import brute +from fortuna.calibration.binary_classification.temp_scaling.base import ( + BaseBinaryClassificationTemperatureScaling, +) + + +class F1BinaryClassificationTemperatureScaling( + BaseBinaryClassificationTemperatureScaling +): + """ + A temperature scaling class for binary classification. + It scales the probability that the target variables is positive with a single learnable parameters. + The method attempts to maximize the F1 score. + """ -class F1BinaryClassificationTemperatureScaling: def __init__(self): super().__init__() self._threshold = None self._temperature = None def fit(self, probs: np.ndarray, targets: np.ndarray, threshold: float): + self._check_probs(probs) + self._check_targets(targets) + self._threshold = threshold n_pos_targets = np.sum(targets) @@ -26,16 +41,10 @@ def loss_fn(tau): loss_fn, ranges=[(np.min(probs), 1 / threshold)], Ns=1000 )[0] - def predict_proba(self, probs: np.ndarray): - return np.clip(probs / self._temperature, 0.0, 1.0) - def predict(self, probs: np.ndarray): + self._check_probs(probs) return (self.predict_proba(probs) >= self._threshold).astype(int) @property def threshold(self): return self._threshold - - @property - def temperature(self): - return self._temperature diff --git a/fortuna/calibration/binary_classification/temp_scaling/mse_binary_temp_scaling.py b/fortuna/calibration/binary_classification/temp_scaling/mse_binary_temp_scaling.py deleted file mode 100644 index a863a7b6..00000000 --- a/fortuna/calibration/binary_classification/temp_scaling/mse_binary_temp_scaling.py +++ /dev/null @@ -1,12 +0,0 @@ -import numpy as np - -from fortuna.calibration.binary_classification.temp_scaling.base import ( - BaseBinaryClassificationTemperatureScaling, -) - - -class MSEBinaryClassificationTemperatureScaling( - BaseBinaryClassificationTemperatureScaling -): - def fit(self, probs: np.ndarray, targets: np.ndarray, **kwargs): - self._temperature = np.mean(probs**2) / np.mean(probs * targets) diff --git a/fortuna/calibration/classification/temp_scaling/base.py b/fortuna/calibration/classification/temp_scaling/base.py index b27d401d..3c9e64e6 100644 --- a/fortuna/calibration/classification/temp_scaling/base.py +++ b/fortuna/calibration/classification/temp_scaling/base.py @@ -1,8 +1,7 @@ import abc -from typing import Dict import numpy as np -from scipy.optimize import minimize +from scipy.optimize import brute from scipy.special import ( log_softmax, softmax, @@ -11,29 +10,79 @@ class ClassificationTemperatureScaling(abc.ABC): def __init__(self): + """ + A temperature scaling class for classification. It scales the logits with a shared learnable parameters. + """ self._temperature = None - def fit(self, probs: np.ndarray, targets: np.ndarray, **kwargs) -> Dict: + def fit(self, probs: np.ndarray, targets: np.ndarray): + """ + Fit che temperature. + + Parameters + ---------- + probs: np.ndarray + A two-dimensional array of probabilities, for each input and class. + targets: np.ndarray + A one-dimensional array of integer target variables. + """ + self._check_probs(probs) + self._check_targets(targets) log_probs = np.log(probs) one_hot_targets = np.eye(probs.shape[1])[targets.reshape(-1)] - n_data = probs.shape[0] - def cross_entropy_fn(phi): - log_temp_probs = log_softmax(log_probs * np.exp(-phi), axis=1) - return -np.sum(one_hot_targets * log_temp_probs) / n_data + def temp_scaling_fn(tau): + log_temp_probs = log_softmax(log_probs / tau, axis=1) + return -np.sum(one_hot_targets * log_temp_probs) + + self._temperature = brute(temp_scaling_fn, ranges=[(1e-6, 10)], Ns=1000)[0] - res = minimize( - cross_entropy_fn, np.array(0.0), options=dict(disp=False, **kwargs) - ) - self._temperature = np.exp(float(res.x)) - return dict(message=res.message, success=res.success) + def predict_proba(self, probs: np.ndarray) -> np.ndarray: + """ + Predict the scaled probabilities. - def predict_proba(self, probs: np.ndarray): + Parameters + ---------- + probs: np.ndarray + A two-dimensional array of probabilities, for each input and class. + + Returns + ------- + np.ndarray + The predicted probabilities for each input and class. + """ + self._check_probs(probs) return softmax(np.log(probs) / self._temperature, axis=1) - def predict(self, probs: np.ndarray): - return np.argmax(self.predict_proba(probs), axis=1) + def predict(self, probs: np.ndarray) -> np.ndarray: + """ + Predict the target variable with the largest probability. + + Parameters + ---------- + probs: np.ndarray + A two-dimensional array of probabilities, for each input and class. + + Returns + ------- + np.ndarray + The predicted target variables for each input. + """ + self._check_probs(probs) + return np.argmax(probs, axis=1) @property def temperature(self): return self._temperature + + @staticmethod + def _check_probs(probs: np.ndarray): + if probs.ndim != 2: + raise ValueError("The array of probabilities must be two-dimensional.") + + @staticmethod + def _check_targets(targets: np.ndarray): + if targets.ndim != 1: + raise ValueError("The array of targets must be one-dimensional.") + if targets.dtype != int: + raise ValueError("Each element in the array of targets must be an integer.") diff --git a/fortuna/plot.py b/fortuna/plot.py index 5b524fd4..28b42a03 100755 --- a/fortuna/plot.py +++ b/fortuna/plot.py @@ -197,6 +197,9 @@ def plot_reliability_diagram( fontsize: int = 10, title: Optional[str] = None, ylim: Optional[Tuple[float, float]] = None, + linewidth: Optional[int] = None, + linestyle: Optional[str] = None, + alpha: float = 1.0, show: bool = False, **save_options, ) -> None: @@ -225,6 +228,12 @@ def plot_reliability_diagram( Plot title. ylim: Optional[Tuple[float, float]] Bottom and top limits on the y-axis. + linewidth: Optional[int] + Line width. + linestyle: Optional[str] + Line style. + alpha: float + Opacity. show: bool Whether to show the plot. save_options: dict @@ -251,8 +260,15 @@ def plot_reliability_diagram( ax.grid() ax.plot([0, 1], [0, 0], color="grey", linestyle="--", alpha=0.3) for i, (a, c) in enumerate(zip(accs, confs)): + idx = np.argsort(c) ax.plot( - c, c - a, marker=".", linestyle="-", label=labels[i] if labels else None + c[idx], + (c - a)[idx], + marker=".", + linestyle=linestyle, + linewidth=linewidth, + alpha=alpha, + label=labels[i] if labels else None, ) if labels: ax.legend(fontsize=fontsize, loc=legend_loc if legend_loc else None) diff --git a/pyproject.toml b/pyproject.toml index 4ae82905..c7a3d425 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aws-fortuna" -version = "0.1.43" +version = "0.1.44" description = "A Library for Uncertainty Quantification." authors = ["Gianluca Detommaso ", "Alberto Gasparin "] license = "Apache-2.0"