Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conditional Neural Processes #217

Merged
merged 38 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
4311cc9
add conditional neural process
mastoffel Jun 27, 2024
98ec3b4
adjust cnp
mastoffel Jun 27, 2024
d561350
rename cnp and fix dtypes
mastoffel Jul 2, 2024
79245b5
add random state to cnp
mastoffel Jul 2, 2024
d152f57
change context points from number to proportion
mastoffel Jul 2, 2024
5aea65e
make loss more robust
mastoffel Jul 2, 2024
cf0ae1c
change context proportion to n_points again
mastoffel Jul 2, 2024
e0d6bb9
fix slow training issue
mastoffel Jul 2, 2024
d5c479d
fix dtype issue
mastoffel Jul 3, 2024
88df55a
make predictions 1d when input y is 1d to satisfy tests
mastoffel Jul 3, 2024
e5011b7
slight renaming
mastoffel Jul 3, 2024
7bb0983
make random state work with cnp
mastoffel Jul 3, 2024
72fcba7
add cnp loss
mastoffel Jul 4, 2024
8fea19d
adjust encoder and decoder to work with b x n x d inputs, including t…
mastoffel Jul 5, 2024
ef566fc
half way into correcting decoder
mastoffel Jul 5, 2024
75dbea4
simplify encoder, decoder
mastoffel Jul 8, 2024
a9d4d8e
add tests for CNPModule
mastoffel Jul 8, 2024
37367c7
add CNPDataset class
mastoffel Jul 8, 2024
9622b21
fix cnp predict
mastoffel Jul 8, 2024
f3c66ca
add custom dataset to deal with unequal batch samples
mastoffel Jul 8, 2024
78dea84
rename to max_context_points
mastoffel Jul 8, 2024
fec4029
add loss function
mastoffel Jul 10, 2024
4ba1dc7
add flexible activation
mastoffel Jul 10, 2024
b0077d6
update tests
mastoffel Jul 10, 2024
fc9e16e
cleanup
mastoffel Jul 11, 2024
792ba39
adjust cnp params
mastoffel Jul 11, 2024
55c122f
clean docs
mastoffel Jul 12, 2024
e0a42f0
fix __repr__
mastoffel Jul 12, 2024
11d33d8
update docs w cnp
mastoffel Jul 12, 2024
dbdb188
add collate tests
mastoffel Jul 16, 2024
1cfb10c
add masking to cnp to make unequal sample sizes stable
mastoffel Jul 16, 2024
89418c6
implement normalize_y for cnp
mastoffel Jul 17, 2024
f422e3b
modify cnp param search space
mastoffel Jul 19, 2024
1df5521
start attentive cnp
mastoffel Jul 19, 2024
7aea4ab
add cross-attention cnp
mastoffel Jul 23, 2024
a77f9cc
adjust attentive cnp model name
mastoffel Jul 23, 2024
6d417f3
add plotnine as dev dependency
mastoffel Aug 9, 2024
4659143
remove attn cnp
mastoffel Aug 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autoemulate/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def setup(
Number of parameter settings that are sampled. Only used if
param_search=True and param_search_type="random".
scale : bool, default=True
Whether to scale the data before fitting the models using a scaler.
Whether to scale features/parameters in X before fitting the models using a scaler.
scaler : sklearn.preprocessing.StandardScaler
Scaler to use. Defaults to StandardScaler. Can be any sklearn scaler.
reduce_dim : bool, default=False
Expand Down
4 changes: 3 additions & 1 deletion autoemulate/emulators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from .conditional_neural_process import ConditionalNeuralProcess
from .gaussian_process import GaussianProcess
from .gaussian_process_mogp import GaussianProcessMOGP
from .gradient_boosting import GradientBoosting
from .light_gbm import LightGBM
from .neural_net_sk import NeuralNetSk
from .neural_net_torch import NeuralNetTorch
from .polynomials import SecondOrderPolynomial
from .radial_basis_functions import RadialBasisFunctions
from .random_forest import RandomForest
from .rbf import RadialBasisFunctions
from .support_vector_machines import SupportVectorMachines

MODEL_REGISTRY = {
Expand All @@ -24,4 +25,5 @@
module="RadialBasisFunctionsNetwork"
),
NeuralNetSk().model_name: NeuralNetSk(),
ConditionalNeuralProcess().model_name: ConditionalNeuralProcess(),
}
302 changes: 302 additions & 0 deletions autoemulate/emulators/conditional_neural_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
import warnings

import numpy as np
import torch
from scipy.stats import loguniform
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.preprocessing._data import _handle_zeros_in_scale
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from skopt.space import Real
from skorch import NeuralNetRegressor
from skorch.callbacks import EarlyStopping
from skorch.callbacks import GradientNormClipping
from skorch.callbacks import LRScheduler
from torch import nn

from autoemulate.emulators.neural_networks.cnp_module import CNPModule
from autoemulate.emulators.neural_networks.datasets import cnp_collate_fn
from autoemulate.emulators.neural_networks.datasets import CNPDataset
from autoemulate.emulators.neural_networks.losses import CNPLoss
from autoemulate.utils import set_random_seed


class ConditionalNeuralProcess(RegressorMixin, BaseEstimator):
"""
Conditional Neural Process (CNP) Regressor.

This model integrates a meta-learning approach into a scikit-learn regressor. The fit() method accepts standard X, y inputs.
A Conditional Neural Process (CNP) operates using context points and target points in episodes. For each episode, the model
samples n_episode data points, a subset of which are designated as context points. All points from the episode, including
the context points, are used as target points. Each episode thus represents a single sample in a meta-learning framework.
During each epoch, we sample X.shape[0] times, ensuring that each data point is part of different contexts across episodes.
The model can predict with a single input X, similar to other scikit-learn models. However, since the CNP requires context
points for prediction, it stores the original X, y data as attributes, which are then used as context points for predicting
new X data.

Parameters
----------
hidden_dim : int, default=64
The number of hidden units in the neural network layers.
latent_dim : int, default=64
The dimensionality of the latent space.
hidden_layers : int, default=3
The number of hidden layers in the neural network.
min_context_points : int, default=3
The minimum number of context points to use during training.
max_context_points : int, default=10
The maximum number of context points to use during training.
n_episode : int, default=32
The number of episodes to sample during each epoch.
max_epochs : int, default=100
The maximum number of epochs to train the model.
lr : float, default=0.01
The learning rate for the optimizer.
batch_size : int, default=16
The number of samples per batch.
activation : callable, default=torch.nn.ReLU
The activation function to use in the neural network layers.
optimizer : callable, default=torch.optim.AdamW
The optimizer to use for training the model.
device : str, default="cpu"
The device to use for training. Options are "cpu" or "cuda".
random_state : int, default=None
The seed used by the random number generator.

References
----------
[1] Garnelo, M., Rosenbaum, D., Maddison, C., Ramalho, T., Saxton, D., Shanahan, M., Teh, Y.W., Rezende, D., & Eslami, S.M.A. (2018).
Conditional Neural Processes. In International Conference on Machine Learning (pp. 1704-1713). PMLR.

Attributes
----------
input_dim_ : int
The number of features in the input data.
output_dim_ : int
The number of targets in the output data.
model_ : skorch.NeuralNetRegressor
The neural network model used for regression.
X_train_ : ndarray of shape (n_samples, n_features)
The training input samples.
y_train_ : ndarray of shape (n_samples,) or (n_samples, n_outputs)
The target values (real numbers) in the training set.

Methods
-------
fit(X, y)
Fit the model to the training data.
predict(X, return_std=False)
Predict using the trained model.

Examples
--------
>>> import numpy as np
>>> from autoemulate.emulators.cnp import ConditionalNeuralProcess
>>> X = np.random.rand(100, 10)
>>> y = np.random.rand(100, 1)
>>> cnp = ConditionalNeuralProcess(hidden_dim=128, latent_dim=128, hidden_layers=4, min_context_points=3, max_context_points=10, n_episode=32, max_epochs=100, lr=0.01, batch_size=16, activation=torch.nn.ReLU, optimizer=torch.optim.AdamW, device="cpu", random_state=42)
>>> cnp.fit(X, y)
>>> y_pred = cnp.predict(X)
>>> y_pred.shape
(100, 1)
"""

def __init__(
self,
# architecture
hidden_dim=64,
latent_dim=64,
hidden_layers=3,
# data per episode
min_context_points=3,
max_context_points=10,
n_episode=32,
# training
max_epochs=100,
lr=1e-2,
batch_size=16,
activation=nn.ReLU,
optimizer=torch.optim.AdamW,
normalize_y=True,
# misc
device="cpu",
random_state=None,
attention=False,
):
self.hidden_dim = hidden_dim
self.latent_dim = latent_dim
self.hidden_layers = hidden_layers
self.min_context_points = min_context_points
self.max_context_points = max_context_points
self.n_episode = n_episode
self.max_epochs = max_epochs
self.lr = lr
self.batch_size = batch_size
self.activation = activation
self.optimizer = optimizer
self.normalize_y = normalize_y
if attention:
warnings.warn("Attention is not implemented yet, setting to False.")
attention = False
self.attention = attention
self.device = device
self.random_state = random_state
if self.random_state is not None:
set_random_seed(self.random_state)

def fit(self, X, y):
X, y = check_X_y(
X,
y,
multi_output=True,
dtype=np.float32,
copy=True,
ensure_2d=True,
# ensure_min_samples=self.n_episode,
y_numeric=True,
)
# y also needs to be float32 and 2d
y = y.astype(np.float32)
self.y_dim_ = y.ndim
if len(y.shape) == 1:
y = y.reshape(-1, 1)

self.input_dim_ = X.shape[1]
self.output_dim_ = y.shape[1]

# Normalize target value
# the zero handler is from sklearn
if self.normalize_y:
self._y_train_mean = np.mean(y, axis=0)
self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False)

# Remove mean and make unit variance
y = (y - self._y_train_mean) / self._y_train_std

if self.random_state is not None:
set_random_seed(self.random_state)

self.model_ = NeuralNetRegressor(
CNPModule,
module__input_dim=self.input_dim_,
module__output_dim=self.output_dim_,
module__hidden_dim=self.hidden_dim,
module__latent_dim=self.latent_dim,
module__hidden_layers=self.hidden_layers,
module__activation=self.activation,
dataset__min_context_points=self.min_context_points,
dataset__max_context_points=self.max_context_points,
dataset__n_episode=self.n_episode,
max_epochs=self.max_epochs,
lr=self.lr,
batch_size=self.batch_size,
optimizer=self.optimizer,
device=self.device,
dataset=CNPDataset, # special dataset to sample context and target sets
criterion=CNPLoss,
iterator_train__collate_fn=cnp_collate_fn, # special collate to different n in episodes
iterator_valid__collate_fn=cnp_collate_fn,
callbacks=[
("early_stopping", EarlyStopping(patience=10)),
(
"lr_scheduler",
LRScheduler(policy="ReduceLROnPlateau", patience=5, factor=0.5),
),
("grad_norm", GradientNormClipping(gradient_clip_value=1.0)),
],
# train_split=None,
verbose=0,
)
self.model_.fit(X, y)
self.X_train_ = X
self.y_train_ = y
self.n_features_in_ = X.shape[1]
return self

def predict(self, X, return_std=False):
check_is_fitted(self)
X = check_array(X, dtype=np.float32)
X_context = torch.from_numpy(self.X_train_).float().unsqueeze(0)
y_context = torch.from_numpy(self.y_train_).float().unsqueeze(0)
X_target = torch.from_numpy(X).float().unsqueeze(0)

with torch.no_grad():
predictions = self.model_.module_.forward(X_context, y_context, X_target)

# needs to be float64 to pass estimator tests
# squeeze out batch dimension again so that score() etc. runs
mean, logvar = predictions
mean = mean[-X.shape[0] :].numpy().astype(np.float64).squeeze()
logvar = logvar[-X.shape[0] :].numpy().astype(np.float64).squeeze()

# undo normalization
if self.normalize_y:
mean = mean * self._y_train_std + self._y_train_mean
var = np.exp(logvar) * (self._y_train_std**2)
logvar = np.log(var)

# if y is 1d, make predictions same shape
if self.y_dim_ == 1:
mean = mean.ravel()
logvar = logvar.ravel()

if return_std:
std = np.exp(0.5 * logvar)
return mean, std
else:
return mean

@staticmethod
def get_grid_params(search_type: str = "random"):
param_space = {
"max_epochs": [100, 200, 300],
"batch_size": [16, 32, 64],
"hidden_dim": [32, 64, 128],
"latent_dim": [32, 64, 128],
"max_context_points": [10, 20, 30],
"hidden_layers": [1, 2, 3, 4, 5],
"activation": [
nn.ReLU,
# nn.Tanh,
nn.GELU,
# nn.Sigmoid,
],
# ],
"optimizer": [torch.optim.AdamW, torch.optim.SGD], #
}
# match search_type:
# case "random":
# param_space |= {
# "lr": loguniform(1e-4, 1e-2),
# }
# case "bayes":
# param_space |= {
# "lr": Real(1e-4, 1e-2, prior="log-uniform"),
# }
# case _:
# raise ValueError(f"Invalid search type: {search_type}")

return param_space

@property
def model_name(self):
if self.attention:
return "AttentiveConditionalNeuralProcess"
else:
return "ConditionalNeuralProcess"

def _more_tags(self):
return {
"multioutput": True,
"poor_score": True, # can be removed when max_epochs are ~1000 by default
"non_deterministic": True,
}

def __repr__(self):
# show architecture once fitted
if hasattr(self, "model_"):
return self.model_.__repr__()
else:
return super().__repr__()
Loading
Loading