Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: GP hyperparameter selection by cross-validation #34

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ classifiers = [
license = "Apache-2.0"
requires-python = ">=3.10"
dependencies = [
"ConfigSpace",
"dipy>=1.3.0",
"joblib",
"nipype>= 1.5.1,<2.0",
Expand All @@ -30,6 +31,7 @@ dependencies = [
"scikit-image>=0.14.2",
"scikit_learn>=0.18",
"scipy>=1.8.0",
"smac",
]
dynamic = ["version"]

Expand Down
4 changes: 2 additions & 2 deletions scripts/dwi_gp_estimation_error_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ def main() -> None:
gpr = DiffusionGPR(
kernel=SphericalKriging(beta_a=a, beta_l=lambda_s),
alpha=alpha,
optimizer=None,
# optimizer="Nelder-Mead",
# optimizer=None,
optimizer="cross-validation",
# disp=True,
# ftol=1,
# max_iter=2e5,
Expand Down
72 changes: 72 additions & 0 deletions src/nifreeze/model/gpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,27 +28,33 @@
from typing import Callable, Mapping, Sequence

import numpy as np
from ConfigSpace import Configuration
from scipy import optimize
from scipy.optimize._minimize import Bounds
from sklearn.base import clone
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import (
Hyperparameter,
Kernel,
)
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import RepeatedKFold, cross_val_score
from sklearn.utils._param_validation import Interval, StrOptions

BOUNDS_A: tuple[float, float] = (0.1, 2.35)
"""The limits for the parameter *a* (angular distance in rad)."""
BOUNDS_LAMBDA: tuple[float, float] = (1e-3, 1000)
"""The limits for the parameter λ (signal scaling factor)."""
BOUNDS_ALPHA: tuple[float, float] = (1e-3, 500)
"""The limits for the parameter σ² (noise adjustment, alpha in Scikit-learn's GP regressor)."""
THETA_EPSILON: float = 1e-5
"""Minimum nonzero angle."""
LBFGS_CONFIGURABLE_OPTIONS = {"disp", "maxiter", "ftol", "gtol"}
"""The set of extended options that can be set on the default BFGS."""
CONFIGURABLE_OPTIONS: Mapping[str, set] = {
"Nelder-Mead": {"disp", "maxiter", "adaptive", "fatol"},
"CG": {"disp", "maxiter", "gtol"},
"cross-validation": {"scoring", "n_folds", "n_evaluations"},
}
"""
A mapping from optimizer names to the option set they allow.
Expand Down Expand Up @@ -162,6 +168,9 @@
"normalize_y": ["boolean"],
"n_targets": [Interval(Integral, 1, None, closed="left"), None],
"random_state": ["random_state"],
"n_folds": [Interval(Integral, 3, None, closed="left")],
"n_evaluations": [Interval(Integral, 3, None, closed="left")],
"n_trials": [Interval(Integral, 3, None, closed="left")],
}

def __init__(
Expand All @@ -183,6 +192,10 @@
gtol: float | None = None,
adaptive: bool | int | None = None,
fatol: float | None = None,
scoring: str = "neg_root_mean_squared_error",
n_folds: int | None = 10,
n_evaluations: int | None = 40,
n_trials: int | None = 200,
):
super().__init__(
kernel,
Expand All @@ -203,6 +216,10 @@
self.gtol = gtol
self.adaptive = adaptive
self.fatol = fatol
self.scoring = scoring
self.n_folds = n_folds
self.n_evaluations = n_evaluations
self.n_trials = n_trials

def _constrained_optimization(
self,
Expand All @@ -211,6 +228,40 @@
bounds: Sequence[tuple[float, float]] | Bounds,
) -> tuple[float, float]:
options = {}

if self.optimizer == "cross-validation":
from ConfigSpace import ConfigurationSpace, Float
from smac import HyperparameterOptimizationFacade, Scenario

Check warning on line 234 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L233-L234

Added lines #L233 - L234 were not covered by tests

cs = ConfigurationSpace()
beta_a = Float(

Check warning on line 237 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L236-L237

Added lines #L236 - L237 were not covered by tests
"kernel__beta_a",
tuple(self.kernel.a_bounds),
default=self.kernel_.beta_a,
log=True,
)
beta_l = Float(

Check warning on line 243 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L243

Added line #L243 was not covered by tests
"kernel__beta_l",
tuple(self.kernel.l_bounds),
default=self.kernel_.beta_l,
log=True,
)
cs.add([beta_a, beta_l])

Check warning on line 249 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L249

Added line #L249 was not covered by tests

# Scenario object specifying the optimization environment
scenario = Scenario(cs, n_trials=self.n_trials)

Check warning on line 252 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L252

Added line #L252 was not covered by tests

# Use SMAC to find the best configuration/hyperparameters
smac = HyperparameterOptimizationFacade(

Check warning on line 255 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L255

Added line #L255 was not covered by tests
scenario,
self.cross_validation,
)
incumbent = smac.optimize()
return (

Check warning on line 260 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L259-L260

Added lines #L259 - L260 were not covered by tests
np.log([incumbent["kernel__beta_a"], incumbent["kernel__beta_l"]]),
0,
)

if self.optimizer == "fmin_l_bfgs_b":
from sklearn.utils.optimize import _check_optimize_result

Expand Down Expand Up @@ -253,6 +304,27 @@

raise ValueError(f"Unknown optimizer {self.optimizer}.")

def cross_validation(
self,
config: Configuration,
seed: int | None = None,
) -> float:
rkf = RepeatedKFold(

Check warning on line 312 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L312

Added line #L312 was not covered by tests
n_splits=self.n_folds,
n_repeats=max(self.n_evaluations // self.n_folds, 1),
)
gpr = clone(self)
gpr.set_params(**dict(config))
gpr.optimizer = None
scores = cross_val_score(

Check warning on line 319 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L316-L319

Added lines #L316 - L319 were not covered by tests
gpr,
self.X_train_,
self.y_train_,
scoring=self.scoring,
cv=rkf,
)
return np.mean(scores)

Check warning on line 326 in src/nifreeze/model/gpr.py

View check run for this annotation

Codecov / codecov/patch

src/nifreeze/model/gpr.py#L326

Added line #L326 was not covered by tests


class ExponentialKriging(Kernel):
"""A scikit-learn's kernel for DWI signals."""
Expand Down
Loading