alan-turing-institute · kallewesterling · Mar 1, 2024 · Feb 27, 2024 · Feb 28, 2024 · Feb 29, 2024
diff --git a/autoemulate/compare.py b/autoemulate/compare.py
@@ -24,6 +24,7 @@
 from autoemulate.plotting import _plot_model
 from autoemulate.plotting import _plot_results
 from autoemulate.printing import _print_cv_results
+from autoemulate.printing import _print_setup
 from autoemulate.save import ModelSerialiser
 from autoemulate.utils import get_mean_scores
 from autoemulate.utils import get_model_name
@@ -125,8 +126,13 @@ def setup(
         self.n_jobs = n_jobs
         self.logger = _configure_logging(log_to_file=log_to_file)
         self.is_set_up = True
+        self.dim_reducer = dim_reducer
+        self.reduce_dim = reduce_dim
+        self.folds = folds
         self.cv_results = {}
 
+        self.print_setup()
+
     def _check_input(self, X, y):
         """Checks and possibly converts the input data.
 
@@ -198,7 +204,12 @@ def compare(self):
         self.scores_df = pd.DataFrame(
             columns=["model", "metric", "fold", "score"]
         ).astype(
-            {"model": "object", "metric": "object", "fold": "int64", "score": "float64"}
+            {
+                "model": "object",
+                "metric": "object",
+                "fold": "int64",
+                "score": "float64",
+            }
         )
 
         for i in range(len(self.models)):
@@ -368,6 +379,17 @@ def load_model(self, path=None):
 
         return serialiser._load_model(path)
 
+    def print_setup(self) -> None:
+        """Print the setup of the AutoEmulate object.
+
+        This method prints the setup of the AutoEmulate object, including the models and metrics used.
+
+        Returns
+        -------
+        None
+        """
+        _print_setup(self)
+
     def print_results(self, model=None, sort_by="r2"):
         """Print cv results.
 
@@ -470,5 +492,10 @@ def plot_model(self, model, plot="standard", n_cols=2, figsize=None):
             Number of columns in the plot grid for multi-output. Default is 2.
         """
         _plot_model(
-            model, self.X[self.test_idxs], self.y[self.test_idxs], plot, n_cols, figsize
+            model,
+            self.X[self.test_idxs],
+            self.y[self.test_idxs],
+            plot,
+            n_cols,
+            figsize,
         )
diff --git a/autoemulate/printing.py b/autoemulate/printing.py
@@ -1,7 +1,18 @@
+import pandas as pd
+
 from autoemulate.utils import get_mean_scores
 from autoemulate.utils import get_model_name
 from autoemulate.utils import get_model_scores
 
+try:
+    __IPYTHON__
+    _in_ipython_session = True
+except NameError:
+    _in_ipython_session = False
+
+if _in_ipython_session:
+    from IPython.display import display, HTML
+
 
 def _print_cv_results(models, scores_df, model=None, sort_by="r2"):
     """Print cv results.
@@ -34,3 +45,84 @@ def _print_cv_results(models, scores_df, model=None, sort_by="r2"):
         scores = get_model_scores(scores_df, model)
         print(f"Scores for {model} across all folds:")
         print(scores)
+
+
+def _print_setup(cls):
+    """Print the setup of the AutoEmulate object.
+
+    If in an IPython session, the setup will be displayed as an HTML table.
+
+    Parameters
+    ----------
+    cls : AutoEmulate
+        The AutoEmulate object.
+    """
+    if not cls.is_set_up:
+        raise RuntimeError("Must run setup() before print_setup()")
+
+    models = "\n- " + "\n- ".join(
+        [
+            x[1].__class__.__name__
+            for pipeline in cls.models
+            for x in pipeline.steps
+            if x[0] == "model"
+        ]
+    )
+    metrics = "\n- " + "\n- ".join([metric.__name__ for metric in cls.metrics])
+
+    settings = pd.DataFrame(
+        [
+            str(cls.X.shape),
+            str(cls.y.shape),
+            str(cls.train_idxs.shape[0]),
+            str(cls.test_idxs.shape[0]),
+            str(cls.param_search),
+            str(cls.search_type),
+            str(cls.param_search_iters),
+            str(cls.scale),
+            str(cls.scaler.__class__.__name__ if cls.scaler is not None else "None"),
+            str(cls.reduce_dim),
+            str(
+                cls.dim_reducer.__class__.__name__
+                if cls.dim_reducer is not None
+                else "None"
+            ),
+            str(cls.cv.__class__.__name__ if cls.cv is not None else "None"),
+            str(cls.folds),
+            str(cls.n_jobs if cls.n_jobs is not None else "1"),
+        ],
+        index=[
+            "Simulation input shape (X)",
+            "Simulation output shape (y)",
+            "# training set samples (train_idxs)",
+            "# test set samples (test_idxs)",
+            "Do hyperparameter search (param_search)",
+            "Type of hyperparameter search (search_type)",
+            "# sampled parameter settings (param_search_iters)",
+            "Scale data before fitting (scale)",
+            "Scaler (scaler)",
+            "Dimensionality reduction before fitting (reduce_dim)",
+            "Dimensionality reduction method (dim_reducer)",
+            "Cross-validation strategy (cv)",
+            "# folds (folds)",
+            "# parallel jobs (n_jobs)",
+        ],
+        columns=["Values"],
+    )
+
+    settings_str = settings.to_string(index=True, header=False)
+    width = len(settings_str.split("\n")[0])
+
+    if _in_ipython_session:
+        display(HTML("<p>AutoEmulate is set up with the following settings:</p>"))
+        display(HTML(settings.to_html()))
+        return
+
+    print("AutoEmulate is set up with the following settings:")
+    print("-" * width)
+    print(settings_str)
+    print("-" * width)
+    print("Models:" + models)
+    print("-" * width)
+    print("Metrics:" + metrics)
+    print("-" * width)