Skip to content

Commit

Permalink
add logging of losses to user attrs, add EVR_CV loss function
Browse files Browse the repository at this point in the history
  • Loading branch information
RichieHakim committed Mar 21, 2024
1 parent 3e0d3b5 commit f7c099f
Showing 1 changed file with 141 additions and 14 deletions.
155 changes: 141 additions & 14 deletions bnpm/automatic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,11 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
self.model_best = model
self.params_best = kwargs_model

# Log the trial results in optuna: losses
trial.set_user_attr('loss', loss)
trial.set_user_attr('loss_train', loss_train)
trial.set_user_attr('loss_test', loss_test)

return loss

def fit(self) -> Union[sklearn.base.BaseEstimator, Optional[Dict[str, Any]]]:
Expand All @@ -334,11 +339,11 @@ def fit(self) -> Union[sklearn.base.BaseEstimator, Optional[Dict[str, Any]]]:
The best parameters obtained from hyperparameter tuning.
"""
# Set verbosity
if int(self.verbose) < 1:
if int(self.verbose) <= 1:
optuna.logging.set_verbosity(optuna.logging.WARNING)
elif int(self.verbose) == 1:
elif int(self.verbose) == 2:
optuna.logging.set_verbosity(optuna.logging.INFO)
elif int(self.verbose) > 1:
elif int(self.verbose) > 2:
optuna.logging.set_verbosity(optuna.logging.DEBUG)

# Initialize an Optuna study
Expand Down Expand Up @@ -645,7 +650,7 @@ def __call__(
sample_weight_test: Optional[List[float]] = None,
):
"""
Calculates the cross-entropy loss using cross-validation.
Calculates the mean-squared error loss using cross-validation.
Args:
y_train_pred (np.ndarray):
Expand All @@ -667,9 +672,9 @@ def __call__(
loss (float):
The calculated loss after applying the penalty.
loss_train (float):
The cross-entropy loss of the training set.
The mean-squared error loss of the training set.
loss_test (float):
The cross-entropy loss of the test set.
The mean-squared error loss of the test set.
"""
# Normalize the y values such that the variance of the true values is 1.
y_train_pred = y_train_pred / y_train_true.std()
Expand All @@ -695,6 +700,114 @@ def __call__(
return loss, loss_train, loss_test


class LossFunction_EVR_CV():
"""
Calculates the explained variance ratio loss of a model using
cross-validation. Output loss is 1 - EVR.
RH 2024
Args:
penalty_testTrainRatio (float):
The amount of penalty for the test loss to the train loss.
Penalty is applied with formula:
``loss = loss_test_or_train * ((loss_test / loss_train) ** penalty_testTrainRatio)``.
test_or_train (str):
A string indicating whether to apply the penalty to the test or
train loss.
It should be either ``'test'`` or ``'train'``.
"""
def __init__(
self,
penalty_testTrainRatio: float = 1.0,
test_or_train: str = 'test',
) -> None:
"""
Initializes the class with the given penalty, and test_or_train setting.
"""
self.penalty_testTrainRatio = penalty_testTrainRatio
## Set the penalty function
if test_or_train == 'test':
self.fn_penalty_testTrainRatio = lambda test, train: test * ((test / train) ** self.penalty_testTrainRatio)
elif test_or_train == 'train':
self.fn_penalty_testTrainRatio = lambda test, train: train * ((train / test) ** self.penalty_testTrainRatio)
else:
raise ValueError('test_or_train must be either "test" or "train".')


def explainable_variance_ratio(self, v1, v2, sample_weight=None):
if isinstance(v1, torch.Tensor):
v1 = v1 - torch.nanmean(v1, dim=0)
v2 = v2 - torch.nanmean(v2, dim=0)

v1_orth = v1 - (torch.nansum(v1 * v2, dim=0) / torch.nansum(v2 * v2, dim=0) )*v2

v1_var = torch.var(v1, dim=0)
EVR = 1 - (torch.var(v1_orth, dim=0) / v1_var)
if sample_weight is not None:
EVR = EVR * (sample_weight / torch.mean(sample_weight))

EVR_total_weighted = torch.nansum(v1_var * EVR) / torch.sum(v1_var)
return EVR_total_weighted
elif isinstance(v1, np.ndarray):
v1 = v1 - np.nanmean(v1, axis=0)
v2 = v2 - np.nanmean(v2, axis=0)

v1_orth = v1 - (np.nansum(v1 * v2, axis=0) / np.nansum(v2 * v2, axis=0) )*v2

v1_var = np.var(v1, axis=0)
EVR = 1 - (np.var(v1_orth, axis=0) / v1_var)
if sample_weight is not None:
EVR = EVR * (sample_weight / np.mean(sample_weight))

EVR_total_weighted = np.nansum(v1_var * EVR) / np.sum(v1_var)
return EVR_total_weighted
else:
raise ValueError(f'Expected v1 to be of type np.ndarray or torch.Tensor, but got type {type(v1)}.')

def __call__(
self,
y_train_pred: np.ndarray,
y_test_pred: np.ndarray,
y_train_true: np.ndarray,
y_test_true: np.ndarray,
sample_weight_train: Optional[List[float]] = None,
sample_weight_test: Optional[List[float]] = None,
):
"""
Calculates the explained variance ratio loss using cross-validation.
Args:
y_train_pred (np.ndarray):
Predicted output data for the training set. (shape:
*(n_samples,)*)
y_test_pred (np.ndarray):
Predicted output data for the test set. (shape: *(n_samples,)*)
y_train_true (np.ndarray):
True output data for the training set. (shape: *(n_samples,)*)
y_test_true (np.ndarray):
True output data for the test set. (shape: *(n_samples,)*)
sample_weight_train (Optional[List[float]]):
Weights assigned to each training sample.
sample_weight_test (Optional[List[float]]):
Weights assigned to each test sample.
Returns:
(tuple): tuple containing:
loss (float):
The calculated loss after applying the penalty.
loss_train (float):
1 - explained variance ratio loss of the training set.
loss_test (float):
1 - explained variance ratio loss of the test set.
"""
# Calculate 1 - explained variance ratio loss using cross-validation.

loss_train = 1 - self.explainable_variance_ratio(y_train_true, y_train_pred, sample_weight_train)
loss_test = 1 - self.explainable_variance_ratio(y_test_true, y_test_pred, sample_weight_test)
loss = self.fn_penalty_testTrainRatio(loss_test, loss_train)

return loss, loss_train, loss_test


class Auto_Classifier(Autotuner_BaseEstimator):
"""
Implements automatic hyperparameter tuning for a user defined classification model.
Expand Down Expand Up @@ -777,6 +890,9 @@ class Auto_Classifier(Autotuner_BaseEstimator):
test_size (float):
Test set ratio.
Only used if ``cv`` is ``None``.
loss_function (Optional[Callable]):
A custom loss function. If ``None``, then the default loss function
is used (CrossEntropy_CV).
verbose (bool):
Whether to print progress messages.
optuna_storage_url (Optional[str]):
Expand Down Expand Up @@ -861,6 +977,7 @@ def __init__(
cv: Optional[sklearn.model_selection._split.BaseCrossValidator] = None,
groups: Optional[np.ndarray] = None,
test_size: float = 0.3,
loss_function: Optional[Callable] = None,
verbose: bool = True,
optuna_storage_url: Optional[str] = None,
optuna_storage_name: Optional[str] = None,
Expand All @@ -879,11 +996,14 @@ def __init__(
)

## Prepare the loss function
self.fn_loss = LossFunction_CrossEntropy_CV(
penalty_testTrainRatio=penalty_testTrainRatio,
labels=y,
test_or_train='test',
)
if loss_function is None:
self.fn_loss = LossFunction_CrossEntropy_CV(
penalty_testTrainRatio=penalty_testTrainRatio,
labels=y,
test_or_train='test',
)
else:
self.fn_loss = loss_function

## Prepare the cross-validation
self.cv = sklearn.model_selection.StratifiedShuffleSplit(
Expand Down Expand Up @@ -1062,6 +1182,9 @@ class Auto_Regression(Autotuner_BaseEstimator):
test_size (float):
Test set ratio.
Only used if ``cv`` is ``None``.
loss_function (Optional[Callable]):
A custom loss function. If ``None``, then the default loss function
is used (LossFunction_MSE_CV).\n
verbose (bool):
Whether to print progress messages.
optuna_storage_url (Optional[str]):
Expand Down Expand Up @@ -1144,6 +1267,7 @@ def __init__(
cv: Optional[sklearn.model_selection._split.BaseCrossValidator] = None,
groups: Optional[np.ndarray] = None,
test_size: float = 0.3,
loss_function: Optional[Callable] = None,
verbose: bool = True,
optuna_storage_url: Optional[str] = None,
optuna_storage_name: Optional[str] = None,
Expand All @@ -1157,9 +1281,12 @@ def __init__(
self.sample_weight = sample_weight

## Prepare the loss function
self.fn_loss = LossFunction_MSE_CV(
penalty_testTrainRatio=penalty_testTrainRatio,
)
if loss_function is None:
self.fn_loss = LossFunction_MSE_CV(
penalty_testTrainRatio=penalty_testTrainRatio,
)
else:
self.fn_loss = loss_function

## Prepare the cross-validation
self.cv = sklearn.model_selection.ShuffleSplit(
Expand Down

0 comments on commit f7c099f

Please sign in to comment.