Skip to content
This repository has been archived by the owner on Dec 20, 2024. It is now read-only.

Commit

Permalink
enh: initial implementation of a cross-validated hyperparameter selec…
Browse files Browse the repository at this point in the history
…tion

Initial implementation of a cross-validated hyperparameter selection.

Add `ConfigSpace` and `smac` to the list of dependencies.
  • Loading branch information
oesteban authored and jhlegarreta committed Nov 10, 2024
1 parent 2ac0106 commit e259c33
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ classifiers = [
license = "Apache-2.0"
requires-python = ">=3.10"
dependencies = [
"ConfigSpace",
"dipy>=1.3.0",
"joblib",
"nipype>= 1.5.1,<2.0",
Expand All @@ -30,6 +31,7 @@ dependencies = [
"scikit-image>=0.14.2",
"scikit_learn>=0.18",
"scipy>=1.8.0",
"smac",
]
dynamic = ["version"]

Expand Down
72 changes: 72 additions & 0 deletions src/eddymotion/model/gpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,27 +28,33 @@
from typing import Callable, Mapping, Sequence

import numpy as np
from ConfigSpace import Configuration
from scipy import optimize
from scipy.optimize._minimize import Bounds
from sklearn.base import clone
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
Hyperparameter,
Kernel,
)
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import RepeatedKFold, cross_val_score
from sklearn.utils._param_validation import Interval, StrOptions

BOUNDS_A: tuple[float, float] = (0.1, 2.35)
"""The limits for the parameter *a* (angular distance in rad)."""
BOUNDS_LAMBDA: tuple[float, float] = (1e-3, 1000)
"""The limits for the parameter λ (signal scaling factor)."""
BOUNDS_ALPHA: tuple[float, float] = (1e-3, 500)
"""The limits for the parameter σ² (noise adjustment, alpha in Scikit-learn's GP regressor)."""
THETA_EPSILON: float = 1e-5
"""Minimum nonzero angle."""
LBFGS_CONFIGURABLE_OPTIONS = {"disp", "maxiter", "ftol", "gtol"}
"""The set of extended options that can be set on the default BFGS."""
CONFIGURABLE_OPTIONS: Mapping[str, set] = {
"Nelder-Mead": {"disp", "maxiter", "adaptive", "fatol"},
"CG": {"disp", "maxiter", "gtol"},
"cross-validation": {"scoring", "n_folds", "n_evaluations"},
}
"""
A mapping from optimizer names to the option set they allow.
Expand Down Expand Up @@ -161,6 +167,9 @@ class EddyMotionGPR(GaussianProcessRegressor):
"normalize_y": ["boolean"],
"n_targets": [Interval(Integral, 1, None, closed="left"), None],
"random_state": ["random_state"],
"n_folds": [Interval(Integral, 3, None, closed="left")],
"n_evaluations": [Interval(Integral, 3, None, closed="left")],
"n_trials": [Interval(Integral, 3, None, closed="left")],
}

def __init__(
Expand All @@ -182,6 +191,10 @@ def __init__(
gtol: float | None = None,
adaptive: bool | int | None = None,
fatol: float | None = None,
scoring: str = "neg_root_mean_squared_error",
n_folds: int | None = 10,
n_evaluations: int | None = 40,
n_trials: int | None = 200,
):
super().__init__(
kernel,
Expand All @@ -202,6 +215,10 @@ def __init__(
self.gtol = gtol
self.adaptive = adaptive
self.fatol = fatol
self.scoring = scoring
self.n_folds = n_folds
self.n_evaluations = n_evaluations
self.n_trials = n_trials

def _constrained_optimization(
self,
Expand All @@ -210,6 +227,40 @@ def _constrained_optimization(
bounds: Sequence[tuple[float, float]] | Bounds,
) -> tuple[float, float]:
options = {}

if self.optimizer == "cross-validation":
from ConfigSpace import ConfigurationSpace, Float
from smac import HyperparameterOptimizationFacade, Scenario

cs = ConfigurationSpace()
beta_a = Float(
"kernel__beta_a",
tuple(self.kernel.a_bounds),
default=self.kernel_.beta_a,
log=True,
)
beta_l = Float(
"kernel__beta_l",
tuple(self.kernel.l_bounds),
default=self.kernel_.beta_l,
log=True,
)
cs.add([beta_a, beta_l])

# Scenario object specifying the optimization environment
scenario = Scenario(cs, n_trials=self.n_trials)

# Use SMAC to find the best configuration/hyperparameters
smac = HyperparameterOptimizationFacade(
scenario,
self.cross_validation,
)
incumbent = smac.optimize()
return (
np.log([incumbent["kernel__beta_a"], incumbent["kernel__beta_l"]]),
0,
)

if self.optimizer == "fmin_l_bfgs_b":
from sklearn.utils.optimize import _check_optimize_result

Expand Down Expand Up @@ -252,6 +303,27 @@ def _constrained_optimization(

raise ValueError(f"Unknown optimizer {self.optimizer}.")

def cross_validation(
self,
config: Configuration,
seed: int | None = None,
) -> float:
rkf = RepeatedKFold(
n_splits=self.n_folds,
n_repeats=max(self.n_evaluations // self.n_folds, 1),
)
gpr = clone(self)
gpr.set_params(**dict(config))
gpr.optimizer = None
scores = cross_val_score(
gpr,
self.X_train_,
self.y_train_,
scoring=self.scoring,
cv=rkf,
)
return np.mean(scores)


class ExponentialKriging(Kernel):
"""A scikit-learn's kernel for DWI signals."""
Expand Down

0 comments on commit e259c33

Please sign in to comment.