Merge pull request #252 from alan-turing-institute/pr-243

Pr 243
alan-turing-institute · Sep 30, 2024 · 1f92574 · 1f92574
2 parents a7b278d + b05091c
commit 1f92574
Show file tree

Hide file tree

Showing 6 changed files with 432 additions and 17 deletions.
diff --git a/autoemulate/emulators/gaussian_process_torch.py b/autoemulate/emulators/gaussian_process_torch.py
@@ -8,15 +8,27 @@
 from sklearn.base import BaseEstimator
 from sklearn.base import RegressorMixin
 from sklearn.exceptions import DataConversionWarning
+from sklearn.metrics import r2_score
+from sklearn.model_selection import train_test_split
 from sklearn.preprocessing._data import _handle_zeros_in_scale
 from sklearn.utils import check_array
 from sklearn.utils import check_X_y
 from sklearn.utils.validation import check_is_fitted
+from skopt.space import Categorical
+from skopt.space import Integer
+from skopt.space import Real
+from skorch.callbacks import Checkpoint
 from skorch.callbacks import EarlyStopping
+from skorch.callbacks import EpochScoring
 from skorch.callbacks import LRScheduler
 from skorch.callbacks import ProgressBar
+from skorch.dataset import Dataset
+from skorch.dataset import ValidSplit
+from skorch.helper import predefined_split
 from skorch.probabilistic import ExactGPRegressor
 
+from autoemulate.emulators.gaussian_process_utils import EarlyStoppingCustom
+from autoemulate.emulators.gaussian_process_utils import PolyMean
 from autoemulate.emulators.neural_networks.gp_module import CorrGPModule
 from autoemulate.utils import set_random_seed
 
@@ -109,6 +121,16 @@ def fit(self, X, y):
             self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False)
             y = (y - self._y_train_mean) / self._y_train_std
 
+        mean_module = (
+            self.mean_module(self.n_features_in_)
+            if callable(self.mean_module)
+            else self.mean_module
+        )
+        covar_module = (
+            self.covar_module(self.n_features_in_)
+            if callable(self.covar_module)
+            else self.covar_module
+        )
         self.model_ = ExactGPRegressor(
             CorrGPModule,
             module__mean=self._get_module(
@@ -125,15 +147,19 @@ def fit(self, X, y):
             max_epochs=self.max_epochs,
             lr=self.lr,
             optimizer=self.optimizer,
-            # callbacks=[EarlyStopping(patience=5)],
             callbacks=[
                 (
                     "lr_scheduler",
                     LRScheduler(policy="ReduceLROnPlateau", patience=5, factor=0.5),
                 ),
                 (
                     "early_stopping",
-                    EarlyStopping(monitor="train_loss", patience=10, threshold=1e-3),
+                    EarlyStoppingCustom(
+                        monitor="train_loss",
+                        patience=10,
+                        threshold=1e-3,
+                        load_best=True,
+                    ),
                 ),
             ],
             verbose=0,
@@ -191,21 +217,94 @@ def predict(self, X, return_std=False):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters for the emulator."""
-        param_space = {
-            "covar_module": [
-                # TODO: initialize lengthscale for other kernels?
-                gpytorch.kernels.RBFKernel().initialize(lengthscale=1.0),
-                gpytorch.kernels.MaternKernel(nu=2.5),
-                gpytorch.kernels.MaternKernel(nu=1.5),
-                gpytorch.kernels.PeriodicKernel(),
-                gpytorch.kernels.RQKernel(),
-            ],
-            "mean_module": [gpytorch.means.ConstantMean(), gpytorch.means.ZeroMean()],
-            "optimizer": [torch.optim.AdamW, torch.optim.Adam, torch.optim.SGD],
-            "lr": [5e-1, 1e-1, 5e-2, 1e-2],
-            "max_epochs": [50, 100, 200],
-            "normalize_y": [True, False],
-        }
+        if search_type == "random":
+            param_space = {
+                "covar_module": [
+                    # TODO: initialize lengthscale for other kernels?
+                    lambda n_features: gpytorch.kernels.RBFKernel(
+                        ard_num_dims=n_features
+                    ).initialize(lengthscale=torch.ones(n_features) * 1.0),
+                    lambda n_features: gpytorch.kernels.MaternKernel(
+                        nu=2.5, ard_num_dims=n_features
+                    ),
+                    lambda n_features: gpytorch.kernels.MaternKernel(
+                        nu=1.5, ard_num_dims=n_features
+                    ),
+                    gpytorch.kernels.PeriodicKernel(),
+                    lambda n_features: gpytorch.kernels.RQKernel(
+                        ard_num_dims=n_features
+                    ),
+                ],
+                "mean_module": [
+                    gpytorch.means.ConstantMean(),
+                    gpytorch.means.ZeroMean(),
+                    lambda n_features: gpytorch.means.LinearMean(input_size=n_features),
+                    lambda n_features: PolyMean(degree=2, input_size=n_features),
+                ],
+                "optimizer": [torch.optim.AdamW, torch.optim.Adam, torch.optim.SGD],
+                "lr": [5e-1, 1e-1, 5e-2, 1e-2],
+                "max_epochs": [
+                    50,
+                    100,
+                    200,
+                    400,
+                    800,
+                ],
+                "normalize_y": [True, False],
+            }
+        else:
+            param_space = {
+                "covar_module": Categorical(
+                    [
+                        # TODO: initialize lengthscale for other kernels?
+                        lambda n_features: gpytorch.kernels.RBFKernel(
+                            ard_num_dims=n_features
+                        ).initialize(lengthscale=torch.ones(n_features) * 1.0),
+                        lambda n_features: gpytorch.kernels.MaternKernel(
+                            nu=2.5, ard_num_dims=n_features
+                        ),
+                        lambda n_features: gpytorch.kernels.MaternKernel(
+                            nu=1.5, ard_num_dims=n_features
+                        ),
+                        gpytorch.kernels.PeriodicKernel(),
+                        lambda n_features: gpytorch.kernels.RQKernel(
+                            ard_num_dims=n_features
+                        ),
+                    ]
+                ),
+                "mean_module": Categorical(
+                    [
+                        gpytorch.means.ConstantMean(),
+                        gpytorch.means.ZeroMean(),
+                        lambda n_features: gpytorch.means.LinearMean(
+                            input_size=n_features
+                        ),
+                        lambda n_features: PolyMean(degree=2, input_size=n_features),
+                    ]
+                ),
+                "optimizer": Categorical(
+                    [
+                        # torch.optim.AdamW,
+                        torch.optim.Adam,
+                        # torch.optim.SGD,
+                    ]
+                ),
+                "lr": Categorical([5e-1, 1e-1, 5e-2, 1e-2]),
+                "max_epochs": Categorical(
+                    [
+                        50,
+                        100,
+                        200,
+                        400,
+                        800,
+                    ]
+                ),
+                "normalize_y": Categorical(
+                    [
+                        True,
+                    ]
+                ),
+            }
         return param_space
 
     @property

diff --git a/autoemulate/emulators/gaussian_process_utils/__init__.py b/autoemulate/emulators/gaussian_process_utils/__init__.py
@@ -0,0 +1,3 @@
+from .early_stopping_criterion import EarlyStoppingCustom
+from .poly_mean import PolyMean
+from .polynomial_features import PolynomialFeatures
diff --git a/autoemulate/emulators/gaussian_process_utils/early_stopping_criterion.py b/autoemulate/emulators/gaussian_process_utils/early_stopping_criterion.py
@@ -0,0 +1,64 @@
+import numpy as np
+from skorch.callbacks import EarlyStopping
+
+
+class EarlyStoppingCustom(EarlyStopping):
+    """Callback for stopping training when scores don't improve.
+
+    Stop training early if a specified `monitor` metric did not
+    improve in `patience` number of epochs by at least `threshold`.
+    **Note**: This version is virtually identical to `EarlyStopping`,
+    with the difference being that the method `_calc_new_threshold`,
+    is corrected to ensure monotonicity.
+
+    also see https://github.com/skorch-dev/skorch/pull/1065
+
+    Parameters
+    ----------
+    monitor : str (default='valid_loss')
+      Value of the history to monitor to decide whether to stop
+      training or not.  The value is expected to be double and is
+      commonly provided by scoring callbacks such as
+      :class:`skorch.callbacks.EpochScoring`.
+
+    lower_is_better : bool (default=True)
+      Whether lower scores should be considered better or worse.
+
+    patience : int (default=5)
+      Number of epochs to wait for improvement of the monitor value
+      until the training process is stopped.
+
+    threshold : int (default=1e-4)
+      Ignore score improvements smaller than `threshold`.
+
+    threshold_mode : str (default='rel')
+        One of `rel`, `abs`. Decides whether the `threshold` value is
+        interpreted in absolute terms or as a fraction of the best
+        score so far (relative)
+
+    sink : callable (default=print)
+      The target that the information about early stopping is
+      sent to. By default, the output is printed to stdout, but the
+      sink could also be a logger or :func:`~skorch.utils.noop`.
+
+    load_best: bool (default=False)
+      Whether to restore module weights from the epoch with the best value of
+      the monitored quantity. If False, the module weights obtained at the
+      last step of training are used. Note that only the module is restored.
+      Use the ``Checkpoint`` callback with the :attr:`~Checkpoint.load_best`
+      argument set to ``True`` if you need to restore the whole object.
+
+    """
+
+    def _calc_new_threshold(self, score):
+        """Determine threshold based on score."""
+        if self.threshold_mode == "rel":
+            abs_threshold_change = self.threshold * np.abs(score)
+        else:
+            abs_threshold_change = self.threshold
+
+        if self.lower_is_better:
+            new_threshold = score - abs_threshold_change
+        else:
+            new_threshold = score + abs_threshold_change
+        return new_threshold
diff --git a/autoemulate/emulators/gaussian_process_utils/poly_mean.py b/autoemulate/emulators/gaussian_process_utils/poly_mean.py
@@ -0,0 +1,53 @@
+import gpytorch
+import torch
+
+from .polynomial_features import PolynomialFeatures
+
+
+class PolyMean(gpytorch.means.Mean):
+    """A geneneral polynomial mean module to be used to construct
+    `guassian_process_torch` emulators.
+
+    Parameters
+    --------
+    degree : int
+        The degree of the polynomial for which we are defining
+        the mapping.
+    input_size : int
+        The number of features to be mapped.
+    barch_shape : int
+    bias : bool
+        Flag for including a bias in the defnition of the polymial.
+        If set to `False` polynomial includes weights only.
+    """
+
+    def __init__(self, degree, input_size, batch_shape=torch.Size(), bias=True):
+        super().__init__()
+        self.degree = degree
+        self.input_size = input_size
+
+        self.poly = PolynomialFeatures(self.degree, self.input_size)
+        self.poly.fit()
+
+        n_weights = len(self.poly.indices)
+        self.register_parameter(
+            name="weights",
+            parameter=torch.nn.Parameter(torch.randn(*batch_shape, n_weights, 1)),
+        )
+
+        if bias:
+            self.register_parameter(
+                name="bias", parameter=torch.nn.Parameter(torch.randn(*batch_shape, 1))
+            )
+        else:
+            self.bias = None
+
+    def forward(self, x):
+        x_ = self.poly.transform(x)
+        res = x_.matmul(self.weights).squeeze(-1)
+        if self.bias is not None:
+            res = res + self.bias
+        return res
+
+    def __repr__(self):
+        return f"Polymean(degree={self.degree}, input_size={self.input_size})"
diff --git a/autoemulate/emulators/gaussian_process_utils/polynomial_features.py b/autoemulate/emulators/gaussian_process_utils/polynomial_features.py
@@ -0,0 +1,74 @@
+import itertools
+
+import numpy as np
+import torch
+
+
+class PolynomialFeatures:
+    """
+    This class is used to map an existing feature set to a
+    polynomial feature set.
+
+    Examples
+    -------
+    Initialize the class to map the feature `X` (`n1` samples x `n2` features):
+
+    >>> pf = PolynomialFeatures(degree=2, input_size=X.shape[1])
+
+    Fit the instance in order to predefine the features that need to be multiplied to create the new features.
+
+    >>> pf.fit()
+
+    Generate the new polynomial feature set:
+
+    >>> X_deg_2 = pf.transform(X)
+
+    Parameters
+    --------
+    degree : int
+        The degree of the polynomial for which we are defining
+        the mapping.
+    input_size : int
+        The number of features to be mapped.
+    """
+
+    def __init__(self, degree: int, input_size: int):
+        assert degree > 0, "`degree` input must be greater than 0."
+        assert (
+            input_size > 0
+        ), "`input_size`, which defines the number of features, for has to be greate than 0"
+        self.degree = degree
+        self.indices = None
+        self.input_size = input_size
+
+    def fit(self):
+        x = list(range(self.input_size))
+
+        d = self.degree
+        L = []
+        while d > 1:
+            l = [list(p) for p in itertools.product(x, repeat=d)]
+            for li in l:
+                li.sort()
+            L += list(map(list, np.unique(l, axis=0)))
+            d -= 1
+        L += [[i] for i in x]
+
+        Ls = []
+        for d in range(1, self.degree + 1):
+            ld = []
+            for l in L:
+                if len(l) == d:
+                    ld.append(l)
+            ld.sort()
+            Ls += ld
+        self.indices = Ls
+
+    def transform(self, x):
+        if not self.indices:
+            raise ValueError(
+                "self.indices is set to None. Did you forget to call 'fit'?"
+            )
+
+        x_ = torch.stack([torch.prod(x[..., i], dim=-1) for i in self.indices], dim=-1)
+        return x_