Skip to content

Commit

Permalink
Merge pull request #42 from alan-turing-institute/pytorch-net
Browse files Browse the repository at this point in the history
Add a Pytorch MLP wrapped in Skorch
  • Loading branch information
mastoffel authored Nov 8, 2023
2 parents b46578f + 9460368 commit 2d4f7d9
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 19 deletions.
26 changes: 18 additions & 8 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ jobs:
python-version: ${{ matrix.python-version }}

# Cache Poetry dependencies
- name: Cache dependencies
uses: actions/cache@v2
with:
path: ~/.cache/pypoetry
key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-poetry-
# - name: Cache dependencies
# uses: actions/cache@v2
# with:
# path: ~/.cache/pypoetry
# key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-cpu
# restore-keys: |
# ${{ runner.os }}-poetry-

- name: Install poetry
run: |
curl -sSL https://install.python-poetry.org | python -
Expand All @@ -42,10 +42,20 @@ jobs:
run: |
poetry config virtualenvs.create false
- name: Switch to CPU version of PyTorch
run: |
poetry remove torch
poetry source add -p explicit pytorch https://download.pytorch.org/whl/cpu
poetry add --source pytorch torch
- name: Install dependencies
run: |
poetry install
- name: Verify PyTorch installation
run: |
poetry run python -c "import torch; print(torch.__version__); print('CUDA available:', torch.cuda.is_available())"
- name: Run Tests with Coverage
run: |
poetry run coverage run -m pytest
Expand Down
5 changes: 4 additions & 1 deletion autoemulate/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ def setup(
log_to_file : bool
Whether to log to file.
"""
self.X, self.y = check_X_y(X, y, multi_output=True, y_numeric=True)
self.X, self.y = check_X_y(
X, y, multi_output=True, y_numeric=True, dtype="float32"
)
self.y = self.y.astype("float32") # needed for pytorch models
self.models = [model() for model in MODEL_REGISTRY.values()]
self.metrics = [metric for metric in METRIC_REGISTRY.keys()]
self.cv = CV_REGISTRY[fold_strategy](folds=folds, shuffle=True)
Expand Down
8 changes: 5 additions & 3 deletions autoemulate/emulators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from .base import Emulator
from .gaussian_process import GaussianProcess
from .gaussian_process_sk import GaussianProcessSk
from .neural_network import NeuralNetwork
from .neural_net_sk import NeuralNetSk
from .random_forest import RandomForest
from .radial_basis import RadialBasis
from .neural_net_torch import NeuralNetTorch

MODEL_REGISTRY = {
"GaussianProcess": GaussianProcess,
# "GaussianProcess": GaussianProcess,
"GaussianProcessSk": GaussianProcessSk,
"NeuralNetwork": NeuralNetwork,
"NeuralNetSk": NeuralNetSk,
"RandomForest": RandomForest,
"RadialBasis": RadialBasis,
# "NeuralNetTorch": NeuralNetTorch,
}
2 changes: 1 addition & 1 deletion autoemulate/emulators/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def fit(self, X, y):
X, y = check_X_y(X, y, multi_output=False, y_numeric=True)
self.n_features_in_ = X.shape[1]
self.model_ = mogp_emulator.GaussianProcess(X, y, nugget=self.nugget)
self.model_ = mogp_emulator.fit_GP_MAP(self.model_, n_tries=2)
self.model_ = mogp_emulator.fit_GP_MAP(self.model_, n_tries=15)
self.is_fitted_ = True
return self

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted


class NeuralNetwork(BaseEstimator, RegressorMixin):
class NeuralNetSk(BaseEstimator, RegressorMixin):
"""Multi-layer perceptron Emulator.
Implements MLPRegressor from scikit-learn.
Expand Down
90 changes: 90 additions & 0 deletions autoemulate/emulators/neural_net_torch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# experimental version of a PyTorch neural network emulator wrapped in Skorch
# to make it compatible with scikit-learn. Works with cross_validate and GridSearchCV,
# but doesn't pass tests, because we're subclassing

import torch
import numpy as np
import skorch
from torch import nn
from skorch import NeuralNetRegressor


class InputShapeSetter(skorch.callbacks.Callback):
"""Callback to set input and output layer sizes dynamically."""

def on_train_begin(self, net, X, y):
output_size = 1 if y.ndim == 1 else y.shape[1]
net.set_params(module__input_size=X.shape[1], module__output_size=output_size)


# Step 1: Define the PyTorch Module for the MLP
class MLPModule(nn.Module):
def __init__(self, input_size=10, hidden_layer_sizes=(50,), output_size=1):
super().__init__()
self.hidden_layers = nn.ModuleList()
self.output_layer = None

if input_size is not None and output_size is not None:
self.build_module(input_size, output_size, hidden_layer_sizes)

def build_module(self, input_size, output_size, hidden_layer_sizes):
hs = [input_size] + list(hidden_layer_sizes)
for i in range(len(hs) - 1):
self.hidden_layers.append(nn.Linear(hs[i], hs[i + 1]))
self.output_layer = nn.Linear(hidden_layer_sizes[-1], output_size)

def forward(self, X):
for layer in self.hidden_layers:
X = torch.relu(layer(X))
if self.output_layer is not None:
X = self.output_layer(X)
return X


# Step 2: Create the Skorch wrapper for the NeuralNetRegressor
class NeuralNetTorch(NeuralNetRegressor):
def __init__(
self,
module=MLPModule,
criterion=torch.nn.MSELoss,
optimizer=torch.optim.Adam,
lr=0.01,
batch_size=128,
max_epochs=10,
module__input_size=10,
module__output_size=1,
module__hidden_layer_sizes=(100,),
optimizer__weight_decay=0.0001,
iterator_train__shuffle=True,
callbacks=[InputShapeSetter()],
train_split=False, # to run cross_validate without splitting the data
verbose=0,
**kwargs
):
super().__init__(
module=module,
criterion=criterion,
optimizer=optimizer,
lr=lr,
batch_size=batch_size,
max_epochs=max_epochs,
module__input_size=module__input_size,
module__output_size=module__output_size,
module__hidden_layer_sizes=module__hidden_layer_sizes,
optimizer__weight_decay=optimizer__weight_decay,
iterator_train__shuffle=iterator_train__shuffle,
callbacks=callbacks,
train_split=train_split,
verbose=verbose,
**kwargs
)

def get_grid_params(self):
return {
"lr": [0.001, 0.01, 0.05],
"max_epochs": [10, 20, 30],
"module__hidden_layer_sizes": [(100,), (100, 100), (100, 100, 100)],
}

def _more_tags(self):
return {"multioutput": True}
7 changes: 6 additions & 1 deletion autoemulate/emulators/radial_basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,9 @@ def get_grid_params(self):
return param_grid

def _more_tags(self):
return {"multioutput": True}
return {
"multioutput": True,
"_xfail_checks": {
"check_estimators_pickle": "Can't be pickled, written in C++"
},
}
Loading

0 comments on commit 2d4f7d9

Please sign in to comment.