Merge pull request #206 from alan-turing-institute/docs-update

Docs update
alan-turing-institute · Mar 14, 2024 · 7df4610 · 7df4610
2 parents b512fe3 + 60dba01
commit 7df4610
Show file tree

Hide file tree

Showing 26 changed files with 845 additions and 865 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
 
 Simulations of physical systems are often slow and need lots of compute, which makes them unpractical for applications like digital twins, or when they have to run thousands of times to do uncertainty quantification or sensitivity analyses. The goal of `AutoEmulate` is to make it easy to replace simulations with fast, accurate emulators. To do this, `AutoEmulate` automatically fits and compares lots of models, like *Radial Basis Functions*, *Gaussian Processes* or *Neural Networks* to find the best emulator for a simulation.
 
-The project is in very early development. 
+The project is in early development. 
 
 <img src="misc/robot2.png" alt="emulating simulations with ML" width="61.8%">
 
@@ -36,12 +36,12 @@ poetry shell
 import numpy as np
 from autoemulate.compare import AutoEmulate
 from autoemulate.experimental_design import LatinHypercube
-from autoemulate.simulations.projectile import simulator
+from autoemulate.simulations.projectile import simulate_projectile
 
 # sample from a simulation
-lhd = LatinHypercube([(-5., 1.), (0., 1000.)])
+lhd = LatinHypercube([(-5., 1.), (0., 1000.)]) # (lower, upper) bounds for parameters
 X = lhd.sample(100)
-y = np.array([simulator(x) for x in X])
+y = np.array([simulate_projectile(x) for x in X])
 
 # compare emulator models
 ae = AutoEmulate()
@@ -68,11 +68,7 @@ best_emulator.predict(X)
 
 ## documentation
 
-The documentation is currently created using JupyterBook. To build the documentation locally, run:
-
-```bash
-poetry run jupyter-book build docs
-```
+You can find tutorials, FAQs and the API reference [here](https://alan-turing-institute.github.io/autoemulate/), but the documentation is still work in progress.
 
 ## Contributors
 

diff --git a/autoemulate/simulations/epidemic.py b/autoemulate/simulations/epidemic.py
@@ -0,0 +1,55 @@
+import numpy as np
+from scipy.integrate import solve_ivp
+
+
+def simulate_epidemic(x, N=1000, I0=1):
+    """Simulate an epidemic using the SIR model.
+
+    Parameters
+    ----------
+    x : array-like
+        The parameters of the SIR model. The first element is the transmission rate (beta) and the second element is the recovery rate (gamma).
+    N : int, optional
+        The total population size.
+    I0 : int, optional
+        The initial number of infected individuals.
+
+    Returns
+    -------
+    peak_infection_rate : float
+        The peak infection rate as a fraction of the total population.
+    """
+
+    # check inputs
+    assert len(x) == 2
+    assert N > 0
+    assert I0 > 0 and I0 < N
+    assert x[0] > 0
+    assert x[1] > 0
+
+    # unpack parameters
+    beta = x[0]
+    gamma = x[1]
+
+    S0 = N - I0
+    R0 = 0
+    t_span = [0, 160]
+    y0 = [S0, I0, R0]
+
+    def sir_model(t, y, N, beta, gamma):
+        S, I, R = y
+        dSdt = -beta * S * I / N
+        dIdt = beta * S * I / N - gamma * I
+        dRdt = gamma * I
+        return [dSdt, dIdt, dRdt]
+
+    t_eval = np.linspace(
+        t_span[0], t_span[1], 160
+    )  # Evaluate each day within the time span
+    sol = solve_ivp(sir_model, t_span, y0, args=(N, beta, gamma), t_eval=t_eval)
+
+    S, I, R = sol.y
+    I_max = np.max(I)
+    peak_infection_rate = I_max / N
+
+    return peak_infection_rate
diff --git a/autoemulate/simulations/projectile.py b/autoemulate/simulations/projectile.py
@@ -108,7 +108,7 @@ def simulator_base(x):
     return results
 
 
-def simulator(x):
+def simulate_projectile(x):
     """
     Simulator to solve ODE system for projectile motion with drag. Returns distance projectile travels.
 
@@ -128,7 +128,7 @@ def simulator(x):
     return results.y_events[0][0][2]
 
 
-def simulator_multioutput(x):
+def simulate_projectile_multioutput(x):
     """
     Simulator to solve ODE system with multiple outputs.
 

diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -29,7 +29,6 @@ chapters:
   sections:
   - file: reference/compare
   - file: reference/cross_validate
-  - file: reference/cv
   - file: reference/datasets
   - file: reference/experimental_design
   - file: reference/hyperparam_searching
@@ -43,15 +42,15 @@ chapters:
   - file: reference/simulations/index
     sections:
     - file: reference/simulations/projectile
+    - file: reference/simulations/epidemic
   - file: reference/emulators/index
     sections:
     - file: reference/emulators/gaussian_process
-    - file: reference/emulators/gaussian_process_mogp
     - file: reference/emulators/gradient_boosting
     - file: reference/emulators/neural_net_sk
     - file: reference/emulators/neural_net_torch
     - file: reference/emulators/polynomials
     - file: reference/emulators/random_forest
     - file: reference/emulators/rbf
     - file: reference/emulators/support_vector_machines
-    - file: reference/emulators/xgboost
+    - file: reference/emulators/light_gbm
diff --git a/docs/getting-started/best_model b/docs/getting-started/best_model
diff --git a/docs/getting-started/best_model_meta.json b/docs/getting-started/best_model_meta.json
diff --git a/docs/getting-started/emulator.pkl b/docs/getting-started/emulator.pkl
diff --git a/docs/getting-started/emulator_meta.json b/docs/getting-started/emulator_meta.json
diff --git a/docs/getting-started/quickstart.ipynb b/docs/getting-started/quickstart.ipynb
@@ -19,12 +19,21 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/mstoffel/turing/projects/autoemulate/autoemulate/compare.py:15: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
+      "  from tqdm.autonotebook import tqdm\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "from autoemulate.compare import AutoEmulate\n",
     "from autoemulate.experimental_design import LatinHypercube\n",
-    "from autoemulate.simulations.projectile import simulator"
+    "from autoemulate.simulations.projectile import simulate_projectile"
    ]
   },
   {
@@ -56,9 +65,9 @@
    ],
    "source": [
     "# sample from a simulation\n",
-    "lhd = LatinHypercube([(-5., 1.), (0., 1000.)])\n",
+    "lhd = LatinHypercube([(-5., 1.), (0., 1000.)]) # (upper, lower) bounds for each parameter\n",
     "X = lhd.sample(100)\n",
-    "y = np.array([simulator(x) for x in X])\n",
+    "y = np.array([simulate_projectile(x) for x in X])\n",
     "X.shape, y.shape"
    ]
   },
@@ -77,29 +86,99 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "autoemulate - Cross-validating SecondOrderPolynomial...\n",
-      "autoemulate - Parameters: {'degree': 2}\n",
-      "autoemulate - Cross-validating RBF...\n",
-      "autoemulate - Parameters: {'degree': 1, 'epsilon': 1.0, 'kernel': 'thin_plate_spline', 'smoothing': 0.0}\n",
-      "autoemulate - Cross-validating RandomForest...\n",
-      "autoemulate - Parameters: {'bootstrap': True, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_samples': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100, 'oob_score': False, 'random_state': None}\n",
-      "autoemulate - Cross-validating GradientBoosting...\n",
-      "autoemulate - Parameters: {'ccp_alpha': 0.0, 'learning_rate': 0.1, 'loss': 'squared_error', 'max_depth': 3, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100, 'n_iter_no_change': None, 'random_state': None, 'subsample': 1.0}\n",
-      "autoemulate - Cross-validating GaussianProcessSk...\n",
-      "autoemulate - Parameters: {'alpha': 1e-10, 'copy_X_train': True, 'kernel__length_scale': 1.0, 'kernel__length_scale_bounds': (1e-05, 100000.0), 'kernel': RBF(length_scale=1), 'n_restarts_optimizer': 20, 'normalize_y': True, 'optimizer': 'fmin_l_bfgs_b', 'random_state': None}\n",
-      "autoemulate - Cross-validating SupportVectorMachines...\n",
-      "autoemulate - Parameters: {'C': 1.0, 'cache_size': 200, 'coef0': 0.0, 'degree': 3, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'normalise_y': True, 'shrinking': True, 'tol': 0.001, 'verbose': False}\n",
-      "autoemulate - Cross-validating XGBoost...\n",
-      "autoemulate - Parameters: {'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'gamma': 0, 'learning_rate': 0.3, 'max_delta_step': 0, 'max_depth': 6, 'max_leaves': 0, 'min_child_weight': 1, 'n_estimators': 100, 'n_jobs': None, 'objective': 'reg:squarederror', 'random_state': None, 'reg_alpha': 0, 'reg_lambda': 1, 'subsample': 1, 'tree_method': 'auto', 'verbosity': 0}\n",
-      "autoemulate - Cross-validating NeuralNetSk...\n",
-      "autoemulate - Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100, 100), 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_iter': 200, 'random_state': None, 'solver': 'adam', 'tol': 0.0001}\n",
-      "autoemulate - Cross-validating NeuralNetTorch...\n",
-      "autoemulate - Parameters: {'module': <class 'autoemulate.emulators.neural_networks.mlp.MLPModule'>, 'criterion': <class 'torch.nn.modules.loss.MSELoss'>, 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'lr': 0.001, 'max_epochs': 1, 'batch_size': 128, 'iterator_train': <class 'torch.utils.data.dataloader.DataLoader'>, 'iterator_valid': <class 'torch.utils.data.dataloader.DataLoader'>, 'dataset': <class 'skorch.dataset.Dataset'>, 'train_split': False, 'callbacks': [<autoemulate.emulators.neural_net_torch.InputShapeSetter object at 0x2a51fc1c0>], 'predict_nonlinearity': 'auto', 'warm_start': False, 'verbose': 0, 'device': 'cpu', 'compile': False, 'use_caching': 'auto', '_params_to_validate': {'module__input_size', 'module__output_size', 'iterator_train__shuffle', 'optimizer__weight_decay'}, 'module__input_size': 2, 'module__output_size': 1, 'optimizer__weight_decay': 0.0, 'iterator_train__shuffle': True, 'callbacks__epoch_timer': <skorch.callbacks.logging.EpochTimer object at 0x2a890aa10>, 'callbacks__train_loss': <skorch.callbacks.scoring.PassthroughScoring object at 0x2a6871c90>, 'callbacks__train_loss__name': 'train_loss', 'callbacks__train_loss__lower_is_better': True, 'callbacks__train_loss__on_train': True, 'callbacks__valid_loss': <skorch.callbacks.scoring.PassthroughScoring object at 0x2a894aa70>, 'callbacks__valid_loss__name': 'valid_loss', 'callbacks__valid_loss__lower_is_better': True, 'callbacks__valid_loss__on_train': False, 'callbacks__InputShapeSetter': <autoemulate.emulators.neural_net_torch.InputShapeSetter object at 0x2a51fc1c0>, 'callbacks__print_log': <skorch.callbacks.logging.PrintLog object at 0x2a8949db0>, 'callbacks__print_log__keys_ignored': None, 'callbacks__print_log__sink': <built-in function print>, 'callbacks__print_log__tablefmt': 'simple', 'callbacks__print_log__floatfmt': '.4f', 'callbacks__print_log__stralign': 'right'}\n",
-      "autoemulate - RBF is the best model with R^2 = 0.993\n"
-     ]
+     "data": {
+      "text/html": [
+       "<p>AutoEmulate is set up with the following settings:</p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Values</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Simulation input shape (X)</th>\n",
+       "      <td>(100, 2)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Simulation output shape (y)</th>\n",
+       "      <td>(100,)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th># hold-out set samples (test_set_size)</th>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Do hyperparameter search (param_search)</th>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Type of hyperparameter search (search_type)</th>\n",
+       "      <td>random</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th># sampled parameter settings (param_search_iters)</th>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Scale data before fitting (scale)</th>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Scaler (scaler)</th>\n",
+       "      <td>StandardScaler</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Dimensionality reduction before fitting (reduce_dim)</th>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Dimensionality reduction method (dim_reducer)</th>\n",
+       "      <td>PCA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Cross-validation strategy (cross_validator)</th>\n",
+       "      <td>KFold</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th># parallel jobs (n_jobs)</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7cba9586ae6348659ab0b182ad926137",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Initializing:   0%|          | 0/10 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -278,11 +357,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Saving models\n",
+    "## Refitting the model on the entire dataset\n",
     "\n",
-    "Lastly, we can save and load the best model. Note: there are some checks that the environment in which the model is loaded is similar to the environment in which it was saved.\n",
+    "Before using the emulator model, we usually want to refit it on the entire dataset. This is done with the `refit()` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "best_emulator = ae.refit_model(best_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Saving / loading models\n",
     "\n",
-    "Before saving a model, we usually want to retrain it on the full dataset."
+    "Lastly, we can save and load the best model. Note: there are some checks that the environment in which the model is loaded is similar to the environment in which it was saved. These are based on dependencies specified in a _meta.json file which is saved when the model is saved."
    ]
   },
   {
@@ -291,19 +386,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# refit best model on full dataset\n",
-    "best_emulator = ae.refit_model(best_model)\n",
-    "\n",
     "# save & load best model\n",
-    "ae.save_model(best_emulator, \"best_model\")\n",
-    "best_emulator = ae.load_model(\"best_model\")"
+    "# ae.save_model(best_emulator, \"best_model\")\n",
+    "# best_emulator = ae.load_model(\"best_model\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Lastly, we can use the best model to make predictions for new inputs. "
+    "Lastly, we can use the best model to make predictions for new inputs. Emulator models in `AutoEmulate` are `scikit-learn estimators`, so we can use the `predict` method to make predictions. "
    ]
   },
   {
@@ -346,7 +438,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,

diff --git a/docs/reference/cv.rst b/docs/reference/cv.rst
diff --git a/docs/reference/emulators/gaussian_process_mogp.rst b/docs/reference/emulators/gaussian_process_mogp.rst
diff --git a/docs/reference/emulators/light_gbm.rst b/docs/reference/emulators/light_gbm.rst
@@ -0,0 +1,6 @@
+autoemulate.emulators.light_gbm
+=============================
+
+.. automodule:: autoemulate.emulators.light_gbm
+   :members:
+   :show-inheritance:
diff --git a/docs/reference/emulators/neural_networks/base.rst b/docs/reference/emulators/neural_networks/base.rst
@@ -0,0 +1,6 @@
+autoemulate.emulators.neural_networks.base
+======================================
+
+.. automodule:: autoemulate.emulators.neural_networks.base
+   :members:
+   :show-inheritance: