Skip to content
This repository has been archived by the owner on Dec 20, 2024. It is now read-only.

ENH: GP hyperparameter selection by cross-validation #250

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ classifiers = [
license = "Apache-2.0"
requires-python = ">=3.10"
dependencies = [
"ConfigSpace",
"dipy>=1.3.0",
"joblib",
"nipype>= 1.5.1,<2.0",
Expand All @@ -30,6 +31,7 @@ dependencies = [
"scikit-image>=0.14.2",
"scikit_learn>=0.18",
"scipy>=1.8.0",
"smac",
]
dynamic = ["version"]

Expand Down
4 changes: 2 additions & 2 deletions scripts/dwi_gp_estimation_error_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ def main() -> None:
gpr = EddyMotionGPR(
kernel=SphericalKriging(beta_a=a, beta_l=lambda_s),
alpha=alpha,
optimizer=None,
# optimizer="Nelder-Mead",
# optimizer=None,
optimizer="cross-validation",
# disp=True,
# ftol=1,
# max_iter=2e5,
Expand Down
72 changes: 72 additions & 0 deletions src/eddymotion/model/gpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,27 +28,33 @@
from typing import Callable, Mapping, Sequence

import numpy as np
from ConfigSpace import Configuration
from scipy import optimize
from scipy.optimize._minimize import Bounds
from sklearn.base import clone
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
Hyperparameter,
Kernel,
)
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import RepeatedKFold, cross_val_score
from sklearn.utils._param_validation import Interval, StrOptions

BOUNDS_A: tuple[float, float] = (0.1, 2.35)
"""The limits for the parameter *a* (angular distance in rad)."""
BOUNDS_LAMBDA: tuple[float, float] = (1e-3, 1000)
"""The limits for the parameter λ (signal scaling factor)."""
BOUNDS_ALPHA: tuple[float, float] = (1e-3, 500)
"""The limits for the parameter σ² (noise adjustment, alpha in Scikit-learn's GP regressor)."""
THETA_EPSILON: float = 1e-5
"""Minimum nonzero angle."""
LBFGS_CONFIGURABLE_OPTIONS = {"disp", "maxiter", "ftol", "gtol"}
"""The set of extended options that can be set on the default BFGS."""
CONFIGURABLE_OPTIONS: Mapping[str, set] = {
"Nelder-Mead": {"disp", "maxiter", "adaptive", "fatol"},
"CG": {"disp", "maxiter", "gtol"},
"cross-validation": {"scoring", "n_folds", "n_evaluations"},
}
"""
A mapping from optimizer names to the option set they allow.
Expand Down Expand Up @@ -161,6 +167,9 @@
"normalize_y": ["boolean"],
"n_targets": [Interval(Integral, 1, None, closed="left"), None],
"random_state": ["random_state"],
"n_folds": [Interval(Integral, 3, None, closed="left")],
"n_evaluations": [Interval(Integral, 3, None, closed="left")],
"n_trials": [Interval(Integral, 3, None, closed="left")],
}

def __init__(
Expand All @@ -182,6 +191,10 @@
gtol: float | None = None,
adaptive: bool | int | None = None,
fatol: float | None = None,
scoring: str = "neg_root_mean_squared_error",
n_folds: int | None = 10,
n_evaluations: int | None = 40,
n_trials: int | None = 200,
):
super().__init__(
kernel,
Expand All @@ -202,6 +215,10 @@
self.gtol = gtol
self.adaptive = adaptive
self.fatol = fatol
self.scoring = scoring
self.n_folds = n_folds
self.n_evaluations = n_evaluations
self.n_trials = n_trials

def _constrained_optimization(
self,
Expand All @@ -210,6 +227,40 @@
bounds: Sequence[tuple[float, float]] | Bounds,
) -> tuple[float, float]:
options = {}

if self.optimizer == "cross-validation":
from ConfigSpace import ConfigurationSpace, Float
from smac import HyperparameterOptimizationFacade, Scenario

Check warning on line 233 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L232-L233

Added lines #L232 - L233 were not covered by tests

cs = ConfigurationSpace()
beta_a = Float(

Check warning on line 236 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L235-L236

Added lines #L235 - L236 were not covered by tests
"kernel__beta_a",
tuple(self.kernel.a_bounds),
default=self.kernel_.beta_a,
log=True,
)
beta_l = Float(

Check warning on line 242 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L242

Added line #L242 was not covered by tests
"kernel__beta_l",
tuple(self.kernel.l_bounds),
default=self.kernel_.beta_l,
log=True,
)
cs.add([beta_a, beta_l])

Check warning on line 248 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L248

Added line #L248 was not covered by tests

# Scenario object specifying the optimization environment
scenario = Scenario(cs, n_trials=self.n_trials)

Check warning on line 251 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L251

Added line #L251 was not covered by tests

# Use SMAC to find the best configuration/hyperparameters
smac = HyperparameterOptimizationFacade(

Check warning on line 254 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L254

Added line #L254 was not covered by tests
scenario,
self.cross_validation,
)
incumbent = smac.optimize()
return (

Check warning on line 259 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L258-L259

Added lines #L258 - L259 were not covered by tests
np.log([incumbent["kernel__beta_a"], incumbent["kernel__beta_l"]]),
0,
)

if self.optimizer == "fmin_l_bfgs_b":
from sklearn.utils.optimize import _check_optimize_result

Expand Down Expand Up @@ -252,6 +303,27 @@

raise ValueError(f"Unknown optimizer {self.optimizer}.")

def cross_validation(
self,
config: Configuration,
seed: int | None = None,
) -> float:
rkf = RepeatedKFold(

Check warning on line 311 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L311

Added line #L311 was not covered by tests
n_splits=self.n_folds,
n_repeats=max(self.n_evaluations // self.n_folds, 1),
)
gpr = clone(self)
gpr.set_params(**dict(config))
gpr.optimizer = None
scores = cross_val_score(

Check warning on line 318 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L315-L318

Added lines #L315 - L318 were not covered by tests
gpr,
self.X_train_,
self.y_train_,
scoring=self.scoring,
cv=rkf,
)
return np.mean(scores)

Check warning on line 325 in src/eddymotion/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/eddymotion/model/gpr.py#L325

Added line #L325 was not covered by tests


class ExponentialKriging(Kernel):
"""A scikit-learn's kernel for DWI signals."""
Expand Down
Loading