Merge: Remove BoTorch Workaround (new) (#151)

- removes botorch workaround due to fixed type issue - removes support for Python 3.8 reimplements #88
emdgroup · Feb 28, 2024 · b0d37f8 · b0d37f8
2 parents 12d4d67 + 4ed8c3d
commit b0d37f8
Show file tree

Hide file tree

Showing 12 changed files with 58 additions and 68 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,6 +1,6 @@
 # NOTES:
 # - The map syntax used for matrix is flagged red but actually works
-# - This runs everything in Python 3.11, except the fulltest which is also run in 3.8
+# - This runs everything in Python 3.11, except the fulltest which is also run in 3.9
 # - Only coretest and fulltest environments are cached due to space limit
 
 name: Continuous Integration
@@ -124,7 +124,7 @@ jobs:
     needs: [typecheck, audit]
     strategy:
       matrix:
-        py-version: [ {semantic: '3.8', tox: 'py38'} ]
+        py-version: [ {semantic: '3.9', tox: 'py39'} ]
     name: Core Tests ${{ matrix.py-version.semantic }}
     runs-on: ubuntu-latest
     steps:
@@ -146,7 +146,7 @@ jobs:
     needs: [typecheck, audit]
     strategy:
       matrix:
-        py-version: [ {semantic: '3.8', tox: 'py38'}, {semantic: '3.11', tox: 'py311'} ]
+        py-version: [ {semantic: '3.9', tox: 'py39'}, {semantic: '3.11', tox: 'py311'} ]
     name: Full Tests ${{ matrix.py-version.semantic }}
     runs-on: ubuntu-latest
     steps:

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -29,7 +29,7 @@ jobs:
           pip install tox
           tox -e docs-py39
       - name: Upload docs artifact
-        uses: actions/upload-pages-artifact@v1
+        uses: actions/upload-pages-artifact@v3
         with:
           path: 'build/docs'
 

diff --git a/.github/workflows/regular.yml b/.github/workflows/regular.yml
@@ -1,6 +1,6 @@
 # NOTES:
 # - The map syntax used for matrix is flagged red but actually works
-# - This runs everything in Python 3.8, 3.9, 3.10 and 3.11
+# - This runs everything in Python 3.9, 3.10 and 3.11
 # - No environments are cached due to space limit
 
 name: Regular Checks
@@ -50,7 +50,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        py-version: [ {semantic: '3.8', tox: 'py38'}, {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
+        py-version: [ {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
     name: Lint ${{ matrix.py-version.semantic }}
     runs-on: ubuntu-latest
     steps:
@@ -69,7 +69,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        py-version: [ {semantic: '3.8', tox: 'py38'}, {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
+        py-version: [ {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
     name: Type Check ${{ matrix.py-version.semantic }}
     runs-on: ubuntu-latest
     steps:
@@ -88,7 +88,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        py-version: [ {semantic: '3.8', tox: 'py38'}, {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
+        py-version: [ {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
     name: Audit ${{ matrix.py-version.semantic }}
     runs-on: ubuntu-latest
     steps:
@@ -107,7 +107,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        py-version: [ {semantic: '3.8', tox: 'py38'}, {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
+        py-version: [ {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
     name: Core Tests ${{ matrix.py-version.semantic }}
     runs-on: ubuntu-latest
     steps:
@@ -130,7 +130,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        py-version: [ {semantic: '3.8', tox: 'py38'}, {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
+        py-version: [ {semantic: '3.9', tox: 'py39'}, {semantic: '3.10', tox: 'py310'}, {semantic: '3.11', tox: 'py311'} ]
     name: Full Tests ${{ matrix.py-version.semantic }}
     runs-on: ubuntu-latest
     steps:

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -38,7 +38,7 @@ jobs:
           pip install check-wheel-contents     
           check-wheel-contents dist/*.whl
       - name: Upload Artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: Dist_${{  github.ref_name }}
           path: dist
@@ -52,7 +52,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Download packages built
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: Dist_${{  github.ref_name }}
           path: dist
@@ -78,7 +78,7 @@ jobs:
         run: |
           pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ baybe==${{  github.ref_name }}
       - name: Download packages built
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: Dist_${{  github.ref_name }}
           path: dist

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+### Changed
+- BoTorch dependency bumped to `>=0.9.3`
+
+### Removed
+- Workaround for BoTorch hybrid recommender data type
+- Support for Python 3.8
+
 ## [0.7.4] - 2024-02-28
 ### Added
 - Subpackages for the available recommender types
@@ -119,7 +127,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Specifying target configs without explicit type information is deprecated
 - Specifying parameters/constraints at the top level of a campaign configuration JSON is
   deprecated. Instead, an explicit `searchspace` field must be provided with an optional
-  `constructor` entry.
+  `constructor` entry
 
 ## [0.7.1] - 2023-12-07
 ### Added

diff --git a/README.md b/README.md
@@ -139,15 +139,16 @@ searchspace = SearchSpace.from_product(parameters)
 As an optional step, we can specify details on how the optimization should be
 conducted. If omitted, BayBE will choose a default setting.
 
-For our example, we combine two selection strategies:
+For our example, we combine two recommenders via a so-called meta recommender named
+`TwoPhaseMetaRecommender`:
 
 1. In cases where no measurements have been made prior to the interaction with BayBE,
    a selection via `initial_recommender` is used.
 2. As soon as the first measurements are available, we switch to `recommender`.
 
-For more details on the different strategies, their underlying algorithmic
+For more details on the different recommenders, their underlying algorithmic
 details, and their configuration settings, see the
-[strategies section](https://emdgroup.github.io/baybe/userguide/strategies.html)
+[recommenders section](https://emdgroup.github.io/baybe/userguide/recommenders.html)
 of the user guide.
 
 ```python
@@ -157,7 +158,7 @@ from baybe.recommenders import (
     TwoPhaseMetaRecommender,
 )
 
-strategy = TwoPhaseMetaRecommender(
+recommender = TwoPhaseMetaRecommender(
     initial_recommender=FPSRecommender(),  # farthest point sampling
     recommender=SequentialGreedyRecommender(),  # Bayesian model-based optimization
 )
@@ -170,7 +171,7 @@ We can now construct a campaign object that brings all pieces of the puzzle toge
 ```python
 from baybe import Campaign
 
-campaign = Campaign(searchspace, objective, strategy)
+campaign = Campaign(searchspace, objective, recommender)
 ```
 
 With this object at hand, we can start our experimentation cycle.

diff --git a/baybe/recommenders/pure/bayesian/sequential_greedy.py b/baybe/recommenders/pure/bayesian/sequential_greedy.py
@@ -2,11 +2,9 @@
 
 from typing import Any, ClassVar
 
-import numpy as np
 import pandas as pd
 from attrs import define, field, validators
 from botorch.optim import optimize_acqf, optimize_acqf_discrete, optimize_acqf_mixed
-from sklearn.metrics import pairwise_distances_argmin
 
 from baybe.exceptions import NoMCAcquisitionFunctionError
 from baybe.recommenders.pure.bayesian.base import BayesianRecommender
@@ -95,7 +93,6 @@ def _recommend_discrete(
                 on=list(candidates_comp),
             )["index"]
         )
-        assert len(points) == len(idxs)
 
         return idxs
 
@@ -178,9 +175,9 @@ def _recommend_hybrid(
             # TODO: Currently assumes that discrete parameters are first and continuous
             #   second. Once parameter redesign [11611] is completed, we might adjust
             #   this.
-            candidates_comp.columns = list(range(len(candidates_comp.columns)))  # type: ignore[assignment]
+            num_comp_columns = len(candidates_comp.columns)
+            candidates_comp.columns = list(range(num_comp_columns))  # type: ignore
             fixed_features_list = candidates_comp.to_dict("records")
-
         else:
             fixed_features_list = None
 
@@ -204,7 +201,7 @@ def _recommend_hybrid(
                 inequality_constraints=[
                     c.to_botorch(
                         searchspace.continuous.parameters,
-                        idx_offset=len(candidates_comp.columns),
+                        idx_offset=num_comp_columns,
                     )
                     for c in searchspace.continuous.constraints_lin_ineq
                 ]
@@ -216,39 +213,26 @@ def _recommend_hybrid(
                 f"acquisition functions."
             ) from ex
 
-        # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
-        # TODO [14819]: The following code is necessary due to floating point
-        #   inaccuracies introduced by BoTorch (potentially due to some float32
-        #   conversion?). The current workaround is the match the recommendations back
-        #   to the closest candidate points.
-
-        # Split discrete and continuous parts
-        disc_points = points[:, : len(candidates_comp.columns)]
-        cont_points = points[:, len(candidates_comp.columns) :]
-
-        # Find the closest match with the discrete candidates
-        candidates_comp_np = candidates_comp.to_numpy()
-        disc_points_np = disc_points.numpy()
-        if not disc_points_np.flags["C_CONTIGUOUS"]:
-            disc_points_np = np.ascontiguousarray(disc_points_np)
-        if not candidates_comp_np.flags["C_CONTIGUOUS"]:
-            candidates_comp_np = np.ascontiguousarray(candidates_comp_np)
-        disc_idxs_iloc = pairwise_distances_argmin(
-            disc_points_np, candidates_comp_np, metric="manhattan"
-        )
+        disc_points = points[:, :num_comp_columns]
+        cont_points = points[:, num_comp_columns:]
 
-        # Get the actual search space dataframe indices
-        disc_idxs_loc = candidates_comp.iloc[disc_idxs_iloc].index
+        # Get selected candidate indices
+        idxs = pd.Index(
+            pd.merge(
+                candidates_comp.reset_index(),
+                pd.DataFrame(disc_points, columns=candidates_comp.columns),
+                on=list(candidates_comp),
+            )["index"]
+        )
 
         # Get experimental representation of discrete and continuous parts
-        rec_disc_exp = searchspace.discrete.exp_rep.loc[disc_idxs_loc]
+        rec_disc_exp = searchspace.discrete.exp_rep.loc[idxs]
         rec_cont_exp = pd.DataFrame(
             cont_points, columns=searchspace.continuous.param_names
         )
 
-        # Adjust the index of the continuous part and concatenate both
+        # Adjust the index of the continuous part and create overall recommendations
         rec_cont_exp.index = rec_disc_exp.index
         rec_exp = pd.concat([rec_disc_exp, rec_cont_exp], axis=1)
-        # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
         return rec_exp
diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py
@@ -42,7 +42,6 @@ def _recommend_hybrid(
             replace=len(disc_candidates) < batch_size,
         )
 
-        cont_random.reset_index(drop=True)
         cont_random.index = disc_random.index
         return pd.concat([disc_random, cont_random], axis=1)
 

diff --git a/examples/Serialization/basic_serialization.py b/examples/Serialization/basic_serialization.py
@@ -66,7 +66,7 @@
 # We next serialize the campaign to JSON.
 # This yields a JSON representation in string format.
 # Since it is rather complex, we do not print this string here.
-# Note: Dataframes are encoded via binary parquet and are hence not human-readable.
+# Note: Dataframes are binary-encoded and are hence not human-readable.
 
 string = campaign.to_json()
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,14 +6,13 @@ authors = [
 ]
 readme = "README.md"
 license = { text = "Apache-2.0" }
-requires-python =">=3.8,<3.12"
+requires-python =">=3.9,<3.12"
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
@@ -30,20 +29,20 @@ keywords = [
 dynamic = ['version']
 dependencies = [
     "attrs>=22.2.0",
-    "botorch>=0.8.1",
+    "botorch>=0.9.3",
     "cattrs>=23.2.0",
     "exceptiongroup",
     "funcy>=1.17",
     "gpytorch>=1.9.1",
     "ngboost>=0.3.12",
     "numpy>=1.24.1",
-    "pandas[parquet]>=1.4.2",
+    "pandas>=1.4.2",
     "protobuf<=3.20.3",
     "scikit-learn>=1.1.1",
     "scikit-learn-extra>=0.3.0",
     "scipy>=1.10.1",
     "setuptools-scm>=7.1.0",
-    "torch>=1.11.0",
+    "torch>=1.13.1",
     "baybe[telemetry]",
 ]
 

diff --git a/tests/README.md b/tests/README.md
@@ -42,7 +42,6 @@ pytest --cov=baybe
 
 This will produce something like this:
 ```
----------- coverage: platform darwin, python 3.8.6-final-0 -----------
 Name                                 Stmts   Miss  Cover
 --------------------------------------------------------
 baybe/acquisition.py                    58      0   100%
@@ -59,7 +58,7 @@ possibility to test different python variants as well.
 
 ### Environments
 In `tox.ini`, we have configured several environments for running different actions 
-(`fulltest`, `coretest`, `lint`, `audit`) against different versions of python (e.g. `py38`, `py39`, .
+(`fulltest`, `coretest`, `lint`, `audit`) against different versions of python (e.g. `py39`, `py310`, .
 ..). 
 You can specify both in `tox` to call a certain combination. 
 
@@ -85,13 +84,13 @@ tox -l
 ### Shortcuts
 In case you want to run several combinations, you can specify them like
 ```bash
-tox -e audit-py38,audit-py311
+tox -e audit-py39,audit-py311
 ```
 
 If you omit the python version from the environment, `tox` will use the version 
 from the command-executing environment:
 ```bash
-tox -e coretest  # runs like '-e coretest-py38' if called from a python 3.8 environment
+tox -e coretest  # runs like '-e coretest-py39' if called from a python 3.9 environment
 ```
 
 If you simply want to run all combinations, you can use