Merge pull request #42 from alan-turing-institute/pytorch-net

Add a Pytorch MLP wrapped in Skorch
alan-turing-institute · Nov 8, 2023 · 2d4f7d9 · 2d4f7d9
2 parents b46578f + 9460368
commit 2d4f7d9
Show file tree

Hide file tree

Showing 10 changed files with 313 additions and 19 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -26,14 +26,14 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       # Cache Poetry dependencies
-      - name: Cache dependencies
-        uses: actions/cache@v2
-        with:
-          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-poetry-
-            
+      # - name: Cache dependencies
+      #   uses: actions/cache@v2
+      #   with:
+      #     path: ~/.cache/pypoetry
+      #     key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-cpu
+      #     restore-keys: |
+      #       ${{ runner.os }}-poetry-
+
       - name: Install poetry
         run: |
           curl -sSL https://install.python-poetry.org | python -
@@ -42,10 +42,20 @@ jobs:
         run: |
           poetry config virtualenvs.create false
 
+      - name: Switch to CPU version of PyTorch
+        run: |
+          poetry remove torch
+          poetry source add -p explicit pytorch https://download.pytorch.org/whl/cpu
+          poetry add --source pytorch torch
+
       - name: Install dependencies
         run: |
           poetry install
 
+      - name: Verify PyTorch installation
+        run: |
+          poetry run python -c "import torch; print(torch.__version__); print('CUDA available:', torch.cuda.is_available())"
+      
       - name: Run Tests with Coverage
         run: |
           poetry run coverage run -m pytest

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
@@ -51,7 +51,10 @@ def setup(
         log_to_file : bool
             Whether to log to file.
         """
-        self.X, self.y = check_X_y(X, y, multi_output=True, y_numeric=True)
+        self.X, self.y = check_X_y(
+            X, y, multi_output=True, y_numeric=True, dtype="float32"
+        )
+        self.y = self.y.astype("float32")  # needed for pytorch models
         self.models = [model() for model in MODEL_REGISTRY.values()]
         self.metrics = [metric for metric in METRIC_REGISTRY.keys()]
         self.cv = CV_REGISTRY[fold_strategy](folds=folds, shuffle=True)

diff --git a/autoemulate/emulators/__init__.py b/autoemulate/emulators/__init__.py
@@ -1,14 +1,16 @@
 from .base import Emulator
 from .gaussian_process import GaussianProcess
 from .gaussian_process_sk import GaussianProcessSk
-from .neural_network import NeuralNetwork
+from .neural_net_sk import NeuralNetSk
 from .random_forest import RandomForest
 from .radial_basis import RadialBasis
+from .neural_net_torch import NeuralNetTorch
 
 MODEL_REGISTRY = {
-    "GaussianProcess": GaussianProcess,
+    # "GaussianProcess": GaussianProcess,
     "GaussianProcessSk": GaussianProcessSk,
-    "NeuralNetwork": NeuralNetwork,
+    "NeuralNetSk": NeuralNetSk,
     "RandomForest": RandomForest,
     "RadialBasis": RadialBasis,
+    # "NeuralNetTorch": NeuralNetTorch,
 }
diff --git a/autoemulate/emulators/gaussian_process.py b/autoemulate/emulators/gaussian_process.py
@@ -33,7 +33,7 @@ def fit(self, X, y):
         X, y = check_X_y(X, y, multi_output=False, y_numeric=True)
         self.n_features_in_ = X.shape[1]
         self.model_ = mogp_emulator.GaussianProcess(X, y, nugget=self.nugget)
-        self.model_ = mogp_emulator.fit_GP_MAP(self.model_, n_tries=2)
+        self.model_ = mogp_emulator.fit_GP_MAP(self.model_, n_tries=15)
         self.is_fitted_ = True
         return self
 

diff --git a/autoemulate/emulators/neural_network.py → autoemulate/emulators/neural_net_sk.py b/autoemulate/emulators/neural_network.py → autoemulate/emulators/neural_net_sk.py
@@ -7,7 +7,7 @@
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 
 
-class NeuralNetwork(BaseEstimator, RegressorMixin):
+class NeuralNetSk(BaseEstimator, RegressorMixin):
     """Multi-layer perceptron Emulator.
 
     Implements MLPRegressor from scikit-learn.

diff --git a/autoemulate/emulators/neural_net_torch.py b/autoemulate/emulators/neural_net_torch.py
@@ -0,0 +1,90 @@
+# experimental version of a PyTorch neural network emulator wrapped in Skorch
+# to make it compatible with scikit-learn. Works with cross_validate and GridSearchCV,
+# but doesn't pass tests, because we're subclassing
+
+import torch
+import numpy as np
+import skorch
+from torch import nn
+from skorch import NeuralNetRegressor
+
+
+class InputShapeSetter(skorch.callbacks.Callback):
+    """Callback to set input and output layer sizes dynamically."""
+
+    def on_train_begin(self, net, X, y):
+        output_size = 1 if y.ndim == 1 else y.shape[1]
+        net.set_params(module__input_size=X.shape[1], module__output_size=output_size)
+
+
+# Step 1: Define the PyTorch Module for the MLP
+class MLPModule(nn.Module):
+    def __init__(self, input_size=10, hidden_layer_sizes=(50,), output_size=1):
+        super().__init__()
+        self.hidden_layers = nn.ModuleList()
+        self.output_layer = None
+
+        if input_size is not None and output_size is not None:
+            self.build_module(input_size, output_size, hidden_layer_sizes)
+
+    def build_module(self, input_size, output_size, hidden_layer_sizes):
+        hs = [input_size] + list(hidden_layer_sizes)
+        for i in range(len(hs) - 1):
+            self.hidden_layers.append(nn.Linear(hs[i], hs[i + 1]))
+        self.output_layer = nn.Linear(hidden_layer_sizes[-1], output_size)
+
+    def forward(self, X):
+        for layer in self.hidden_layers:
+            X = torch.relu(layer(X))
+        if self.output_layer is not None:
+            X = self.output_layer(X)
+        return X
+
+
+# Step 2: Create the Skorch wrapper for the NeuralNetRegressor
+class NeuralNetTorch(NeuralNetRegressor):
+    def __init__(
+        self,
+        module=MLPModule,
+        criterion=torch.nn.MSELoss,
+        optimizer=torch.optim.Adam,
+        lr=0.01,
+        batch_size=128,
+        max_epochs=10,
+        module__input_size=10,
+        module__output_size=1,
+        module__hidden_layer_sizes=(100,),
+        optimizer__weight_decay=0.0001,
+        iterator_train__shuffle=True,
+        callbacks=[InputShapeSetter()],
+        train_split=False,  # to run cross_validate without splitting the data
+        verbose=0,
+        **kwargs
+    ):
+        super().__init__(
+            module=module,
+            criterion=criterion,
+            optimizer=optimizer,
+            lr=lr,
+            batch_size=batch_size,
+            max_epochs=max_epochs,
+            module__input_size=module__input_size,
+            module__output_size=module__output_size,
+            module__hidden_layer_sizes=module__hidden_layer_sizes,
+            optimizer__weight_decay=optimizer__weight_decay,
+            iterator_train__shuffle=iterator_train__shuffle,
+            callbacks=callbacks,
+            train_split=train_split,
+            verbose=verbose,
+            **kwargs
+        )
+
+    def get_grid_params(self):
+        return {
+            "lr": [0.001, 0.01, 0.05],
+            "max_epochs": [10, 20, 30],
+            "module__hidden_layer_sizes": [(100,), (100, 100), (100, 100, 100)],
+        }
+
+    def _more_tags(self):
+        return {"multioutput": True}
diff --git a/autoemulate/emulators/radial_basis.py b/autoemulate/emulators/radial_basis.py
@@ -80,4 +80,9 @@ def get_grid_params(self):
         return param_grid
 
     def _more_tags(self):
-        return {"multioutput": True}
+        return {
+            "multioutput": True,
+            "_xfail_checks": {
+                "check_estimators_pickle": "Can't be pickled, written in C++"
+            },
+        }