From 594f7b18fc84e55b93be97ca7fa55dd7e85cb24b Mon Sep 17 00:00:00 2001
From: Ryan Roussel <rroussel@slac.stanford.edu>
Date: Sun, 29 Dec 2024 14:35:47 -0800
Subject: [PATCH 1/4] fix some of the bugs introduced by updating botorch

---
 pyproject.toml                                |  4 +-
 xopt/generators/bayesian/bax/acquisition.py   |  4 +-
 xopt/generators/bayesian/bax/algorithms.py    |  3 +-
 .../custom_botorch/heteroskedastic.py         | 45 ++++++++++++++-----
 xopt/generators/bayesian/turbo.py             |  4 +-
 .../bayesian/test_bayesian_generator.py       |  2 +-
 .../bayesian/test_model_constructor.py        |  4 +-
 7 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 77bf115a..c7761bef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
   "numpy",
   "pydantic>=2.3",
   "pyyaml",
-  "botorch>=0.9.2,<=0.10.0",
+  "botorch",
   "scipy>=1.10.1",
   "pandas",
   "ipywidgets",
@@ -34,7 +34,7 @@ dynamic = [ "version" ]
 keywords = []
 name = "xopt"
 readme = {file = "README.md", content-type = "text/markdown"}
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 
 [project.optional-dependencies]
 dev = [
diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py
index fb059106..fdcfc72b 100644
--- a/xopt/generators/bayesian/bax/acquisition.py
+++ b/xopt/generators/bayesian/bax/acquisition.py
@@ -37,14 +37,14 @@ def __init__(self, model: Model, algorithm: Algorithm, bounds: Tensor) -> None:
         ) = self.algorithm.get_execution_paths(self.model, bounds)
 
         # Need to call the model on some data before we can condition_on_observations
-        self.model(*[self.xs_exe[:1, 0:1, 0:] for m in model.models])
+        self.model(*[self.xs_exe[:1, 0:1, 0:] for m in model.modules()])
 
         # construct a batch of size n_samples fantasy models,
         # where each fantasy model is produced by taking the model
         # at the current iteration and conditioning it
         # on one of the sampled execution path subsequences:
         xs_exe_t = [
-            model.models[i].input_transform(self.xs_exe)
+            list(model.modules())[i].input_transform(self.xs_exe)
             for i in range(len(model.models))
         ]
         ys_exe_t = [
diff --git a/xopt/generators/bayesian/bax/algorithms.py b/xopt/generators/bayesian/bax/algorithms.py
index f3ddcb89..cf67e682 100644
--- a/xopt/generators/bayesian/bax/algorithms.py
+++ b/xopt/generators/bayesian/bax/algorithms.py
@@ -76,7 +76,8 @@ def get_execution_paths(
         """get execution paths that minimize the objective function"""
 
         # build evaluation mesh
-        test_points = self.create_mesh(bounds).to(model.models[0].train_targets)
+        print(5)
+        test_points = self.create_mesh(bounds).to(list(model.modules())[0].train_targets)
 
         # get samples of the model posterior at mesh points
         posterior_samples = self.evaluate_virtual_objective(
diff --git a/xopt/generators/bayesian/custom_botorch/heteroskedastic.py b/xopt/generators/bayesian/custom_botorch/heteroskedastic.py
index 9e013e23..ad960f13 100644
--- a/xopt/generators/bayesian/custom_botorch/heteroskedastic.py
+++ b/xopt/generators/bayesian/custom_botorch/heteroskedastic.py
@@ -23,6 +23,29 @@
 
 
 class XoptHeteroskedasticSingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP):
+    r"""
+   Xopt copy of HeteroskedasticSingleTaskGP from botorch which allows for a user
+   to specify mean and covariance modules.
+
+   A single-task exact GP model using a heteroskedastic noise model.
+
+   This model differs from `SingleTaskGP` with observed observation noise
+   variances (`train_Yvar`) in that it can predict noise levels out of sample.
+   This is achieved by internally wrapping another GP (a `SingleTaskGP`) to model
+   the (log of) the observation noise. Noise levels must be provided to
+   `HeteroskedasticSingleTaskGP` as `train_Yvar`.
+
+   Examples of cases in which noise levels are known include online
+   experimentation and simulation optimization.
+
+   Example:
+       >>> train_X = torch.rand(20, 2)
+       >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True)
+       >>> se = torch.linalg.norm(train_X, dim=1, keepdim=True)
+       >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y)
+       >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar)
+   """
+
     def __init__(
         self,
         train_X: Tensor,
@@ -34,12 +57,7 @@ def __init__(
         covar_module: Optional[Module] = None,
     ) -> None:
         r"""
-        Xopt copy of HeteroskedasticSingleTaskGP from botorch which allows for a user
-        to specify mean and covariance modules.
-
-
-        Parameters
-        ----------
+        Args:
             train_X: A `batch_shape x n x d` tensor of training features.
             train_Y: A `batch_shape x n x m` tensor of training observations.
             train_Yvar: A `batch_shape x n x m` tensor of observed measurement
@@ -53,6 +71,7 @@ def __init__(
             input_transform: An input transfrom that is applied in the model's
                 forward pass.
         """
+
         if outcome_transform is not None:
             train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar)
         self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
@@ -65,12 +84,19 @@ def __init__(
                 MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0
             ),
         )
+        # Likelihood will always get evaluated with transformed X, so we need to
+        # transform the training data before constructing the noise model.
+        with torch.no_grad():
+            transformed_X = self.transform_inputs(
+                X=train_X, input_transform=input_transform
+            )
         noise_model = SingleTaskGP(
-            train_X=train_X,
+            train_X=transformed_X,
             train_Y=train_Yvar,
             likelihood=noise_likelihood,
             outcome_transform=Log(),
-            input_transform=input_transform,
+            mean_module=mean_module,
+            covar_module=covar_module
         )
         likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model))
         # This is hacky -- this class used to inherit from SingleTaskGP, but it
@@ -82,9 +108,8 @@ def __init__(
             train_X=train_X,
             train_Y=train_Y,
             likelihood=likelihood,
+            outcome_transform=None,
             input_transform=input_transform,
-            mean_module=mean_module,
-            covar_module=covar_module,
         )
         self.register_added_loss_term("noise_added_loss")
         self.update_added_loss_term(
diff --git a/xopt/generators/bayesian/turbo.py b/xopt/generators/bayesian/turbo.py
index ea5b4441..a81fbc3f 100644
--- a/xopt/generators/bayesian/turbo.py
+++ b/xopt/generators/bayesian/turbo.py
@@ -113,10 +113,10 @@ def get_trust_region(self, generator) -> Tensor:
             weights = 1.0
 
             if model is not None:
-                if model.models[0].covar_module.base_kernel.lengthscale is not None:
+                if model.models[0].covar_module.lengthscale is not None:
                     lengthscales = model.models[
                         0
-                    ].covar_module.base_kernel.lengthscale.detach()
+                    ].covar_module.lengthscale.detach()
 
                     # calculate the ratios of lengthscales for each axis
                     weights = lengthscales / torch.prod(lengthscales) ** (1 / self.dim)
diff --git a/xopt/tests/generators/bayesian/test_bayesian_generator.py b/xopt/tests/generators/bayesian/test_bayesian_generator.py
index e9722c22..dba7cf95 100644
--- a/xopt/tests/generators/bayesian/test_bayesian_generator.py
+++ b/xopt/tests/generators/bayesian/test_bayesian_generator.py
@@ -133,7 +133,7 @@ def test_transforms(self):
         input_transform = Normalize(1, bounds=torch.tensor(sinusoid_vocs.bounds))
         for inputs in model.train_inputs:
             assert torch.allclose(
-                inputs[0], input_transform(torch.from_numpy(X.data["x1"].to_numpy())).T
+                inputs[0].unsqueeze(-1).T, input_transform(torch.from_numpy(X.data["x1"].to_numpy()).unsqueeze(-1)).T
             )
 
         # test outcome transform(s)
diff --git a/xopt/tests/generators/bayesian/test_model_constructor.py b/xopt/tests/generators/bayesian/test_model_constructor.py
index e5c01186..326ce93d 100644
--- a/xopt/tests/generators/bayesian/test_model_constructor.py
+++ b/xopt/tests/generators/bayesian/test_model_constructor.py
@@ -536,8 +536,8 @@ def test_model_caching(self):
 
         state = deepcopy(constructor._hyperparameter_store)
         assert torch.equal(
-            old_model.models[0].covar_module.base_kernel.raw_lengthscale,
-            state["models.0.covar_module.base_kernel.raw_lengthscale"],
+            old_model.models[0].covar_module.raw_lengthscale,
+            state["models.0.covar_module.raw_lengthscale"],
         )
 
         # add data and use the cached model hyperparameters

From bd1d68774e17bd40ad5c08a35a6eb4d3e0245ffc Mon Sep 17 00:00:00 2001
From: Ryan Roussel <rroussel@slac.stanford.edu>
Date: Fri, 3 Jan 2025 13:16:30 -0600
Subject: [PATCH 2/4] fix bugs in bax

---
 xopt/generators/bayesian/bax/acquisition.py   |  4 +--
 xopt/generators/bayesian/bax/algorithms.py    |  9 +++--
 xopt/generators/bayesian/bax_generator.py     |  5 +++
 .../custom_botorch/heteroskedastic.py         | 36 +++++++++----------
 xopt/generators/bayesian/turbo.py             |  4 +--
 .../bayesian/test_bayesian_generator.py       |  5 ++-
 6 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/xopt/generators/bayesian/bax/acquisition.py b/xopt/generators/bayesian/bax/acquisition.py
index fdcfc72b..39092d48 100644
--- a/xopt/generators/bayesian/bax/acquisition.py
+++ b/xopt/generators/bayesian/bax/acquisition.py
@@ -37,14 +37,14 @@ def __init__(self, model: Model, algorithm: Algorithm, bounds: Tensor) -> None:
         ) = self.algorithm.get_execution_paths(self.model, bounds)
 
         # Need to call the model on some data before we can condition_on_observations
-        self.model(*[self.xs_exe[:1, 0:1, 0:] for m in model.modules()])
+        self.model.posterior(*[self.xs_exe[:1, 0:1, 0:] for m in model.models])
 
         # construct a batch of size n_samples fantasy models,
         # where each fantasy model is produced by taking the model
         # at the current iteration and conditioning it
         # on one of the sampled execution path subsequences:
         xs_exe_t = [
-            list(model.modules())[i].input_transform(self.xs_exe)
+            list(model.models)[i].input_transform(self.xs_exe)
             for i in range(len(model.models))
         ]
         ys_exe_t = [
diff --git a/xopt/generators/bayesian/bax/algorithms.py b/xopt/generators/bayesian/bax/algorithms.py
index cf67e682..dd1d40e5 100644
--- a/xopt/generators/bayesian/bax/algorithms.py
+++ b/xopt/generators/bayesian/bax/algorithms.py
@@ -2,7 +2,7 @@
 from typing import ClassVar, Dict, List, Tuple
 
 import torch
-from botorch.models.model import Model
+from botorch.models.model import Model, ModelList
 from pydantic import Field, PositiveInt
 from torch import Tensor
 
@@ -76,8 +76,11 @@ def get_execution_paths(
         """get execution paths that minimize the objective function"""
 
         # build evaluation mesh
-        print(5)
-        test_points = self.create_mesh(bounds).to(list(model.modules())[0].train_targets)
+        test_points = self.create_mesh(bounds)
+        if isinstance(model, ModelList):
+            test_points = test_points.to(model.models[0].train_targets)
+        else:
+            test_points = test_points.to(model.train_targets)
 
         # get samples of the model posterior at mesh points
         posterior_samples = self.evaluate_virtual_objective(
diff --git a/xopt/generators/bayesian/bax_generator.py b/xopt/generators/bayesian/bax_generator.py
index 5467dc1f..58b015a1 100644
--- a/xopt/generators/bayesian/bax_generator.py
+++ b/xopt/generators/bayesian/bax_generator.py
@@ -3,6 +3,7 @@
 from copy import deepcopy
 from typing import Dict
 
+from botorch.models import ModelListGP, SingleTaskGP
 from pydantic import Field, field_validator
 from pydantic_core.core_schema import ValidationInfo
 
@@ -53,6 +54,10 @@ def _get_acquisition(self, model):
             for name in self.algorithm.observable_names_ordered
         ]
         bax_model = model.subset_output(bax_model_ids)
+
+        if isinstance(bax_model, SingleTaskGP):
+            bax_model = ModelListGP(bax_model)
+
         eig = ModelListExpectedInformationGain(
             bax_model, self.algorithm, self._get_optimization_bounds()
         )
diff --git a/xopt/generators/bayesian/custom_botorch/heteroskedastic.py b/xopt/generators/bayesian/custom_botorch/heteroskedastic.py
index ad960f13..c92a356c 100644
--- a/xopt/generators/bayesian/custom_botorch/heteroskedastic.py
+++ b/xopt/generators/bayesian/custom_botorch/heteroskedastic.py
@@ -24,27 +24,27 @@
 
 class XoptHeteroskedasticSingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP):
     r"""
-   Xopt copy of HeteroskedasticSingleTaskGP from botorch which allows for a user
-   to specify mean and covariance modules.
+    Xopt copy of HeteroskedasticSingleTaskGP from botorch which allows for a user
+    to specify mean and covariance modules.
 
-   A single-task exact GP model using a heteroskedastic noise model.
+    A single-task exact GP model using a heteroskedastic noise model.
 
-   This model differs from `SingleTaskGP` with observed observation noise
-   variances (`train_Yvar`) in that it can predict noise levels out of sample.
-   This is achieved by internally wrapping another GP (a `SingleTaskGP`) to model
-   the (log of) the observation noise. Noise levels must be provided to
-   `HeteroskedasticSingleTaskGP` as `train_Yvar`.
+    This model differs from `SingleTaskGP` with observed observation noise
+    variances (`train_Yvar`) in that it can predict noise levels out of sample.
+    This is achieved by internally wrapping another GP (a `SingleTaskGP`) to model
+    the (log of) the observation noise. Noise levels must be provided to
+    `HeteroskedasticSingleTaskGP` as `train_Yvar`.
 
-   Examples of cases in which noise levels are known include online
-   experimentation and simulation optimization.
+    Examples of cases in which noise levels are known include online
+    experimentation and simulation optimization.
 
-   Example:
-       >>> train_X = torch.rand(20, 2)
-       >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True)
-       >>> se = torch.linalg.norm(train_X, dim=1, keepdim=True)
-       >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y)
-       >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar)
-   """
+    Example:
+        >>> train_X = torch.rand(20, 2)
+        >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True)
+        >>> se = torch.linalg.norm(train_X, dim=1, keepdim=True)
+        >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y)
+        >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar)
+    """
 
     def __init__(
         self,
@@ -96,7 +96,7 @@ def __init__(
             likelihood=noise_likelihood,
             outcome_transform=Log(),
             mean_module=mean_module,
-            covar_module=covar_module
+            covar_module=covar_module,
         )
         likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model))
         # This is hacky -- this class used to inherit from SingleTaskGP, but it
diff --git a/xopt/generators/bayesian/turbo.py b/xopt/generators/bayesian/turbo.py
index a81fbc3f..0cc32e2b 100644
--- a/xopt/generators/bayesian/turbo.py
+++ b/xopt/generators/bayesian/turbo.py
@@ -114,9 +114,7 @@ def get_trust_region(self, generator) -> Tensor:
 
             if model is not None:
                 if model.models[0].covar_module.lengthscale is not None:
-                    lengthscales = model.models[
-                        0
-                    ].covar_module.lengthscale.detach()
+                    lengthscales = model.models[0].covar_module.lengthscale.detach()
 
                     # calculate the ratios of lengthscales for each axis
                     weights = lengthscales / torch.prod(lengthscales) ** (1 / self.dim)
diff --git a/xopt/tests/generators/bayesian/test_bayesian_generator.py b/xopt/tests/generators/bayesian/test_bayesian_generator.py
index dba7cf95..ff200fb4 100644
--- a/xopt/tests/generators/bayesian/test_bayesian_generator.py
+++ b/xopt/tests/generators/bayesian/test_bayesian_generator.py
@@ -133,7 +133,10 @@ def test_transforms(self):
         input_transform = Normalize(1, bounds=torch.tensor(sinusoid_vocs.bounds))
         for inputs in model.train_inputs:
             assert torch.allclose(
-                inputs[0].unsqueeze(-1).T, input_transform(torch.from_numpy(X.data["x1"].to_numpy()).unsqueeze(-1)).T
+                inputs[0].unsqueeze(-1).T,
+                input_transform(
+                    torch.from_numpy(X.data["x1"].to_numpy()).unsqueeze(-1)
+                ).T,
             )
 
         # test outcome transform(s)

From d452e96c7f0b4d61ba7b49934a5e482945e7bbfb Mon Sep 17 00:00:00 2001
From: Ryan Roussel <rroussel@slac.stanford.edu>
Date: Fri, 3 Jan 2025 13:19:52 -0600
Subject: [PATCH 3/4] make changes to github actions and setup

---
 .github/actions/conda-setup/action.yml | 2 +-
 .github/workflows/tests.yml            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/conda-setup/action.yml b/.github/actions/conda-setup/action.yml
index 75b7acc0..e70cd540 100644
--- a/.github/actions/conda-setup/action.yml
+++ b/.github/actions/conda-setup/action.yml
@@ -8,7 +8,7 @@ inputs:
   python-version:
     description: "Conda environment Python version"
     required: false
-    default: "3.9"
+    default: "3.10"
   env_name:
     description: "Conda environment name to create"
     required: false
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 86db75ca..94cd26c2 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
 
     name: Test Suite
     steps:

From 1a19e81e5b878e307c8bc069f741ca930e47960b Mon Sep 17 00:00:00 2001
From: Ryan Roussel <rroussel@slac.stanford.edu>
Date: Fri, 3 Jan 2025 13:20:25 -0600
Subject: [PATCH 4/4] Update environment.yml

---
 environment.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index 1991efc9..bd7ce5d2 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,12 +3,12 @@ name: xopt-dev
 channels:
   - conda-forge
 dependencies:
-  - python>=3.9
+  - python>=3.10
   - deap
   - numpy
   - pydantic>=2.3
   - pyyaml
-  - botorch>=0.9.2,<=0.10.0
+  - botorch
   - scipy>=1.10.1
   - pandas
   - ipywidgets