From e76fae522b09c5fb33a02fc2e03beb14005eadc0 Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Fri, 9 Feb 2024 15:10:25 +0000
Subject: [PATCH 1/8] add split

---
 autoemulate/compare.py | 72 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 60 insertions(+), 12 deletions(-)

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index cc26c428..d44dad2b 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -1,8 +1,11 @@
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 from sklearn.decomposition import PCA
 from sklearn.metrics import make_scorer
 from sklearn.model_selection import cross_validate
+from sklearn.model_selection import PredefinedSplit
+from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils.validation import check_X_y
 
@@ -88,6 +91,9 @@ def setup(
             Whether to log to file.
         """
         self.X, self.y = self._check_input(X, y)
+        self.train_idxs, self.test_idxs = self._split_data(
+            self.X, test_size=0.2, param_search=param_search
+        )
         self.models = get_and_process_models(
             MODEL_REGISTRY,
             model_subset,
@@ -130,6 +136,37 @@ def _check_input(self, X, y):
         y = y.astype("float32")  # needed for pytorch models
         return X, y
 
+    def _split_data(self, X, test_size=0.2, random_state=None, param_search=False):
+        """Splits the data into training and testing sets.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Simulation input.
+        test_size : float, default=0.2
+            Proportion of the dataset to include in the test split.
+        random_state : int, RandomState instance or None, default=None
+            Controls the shuffling applied to the data before applying the split.
+        param_search : bool
+            Whether to split the data for hyperparameter search.
+
+        Returns
+        -------
+        train_idx : array-like
+            Indices of the training set.
+        test_idx : array-like
+            Indices of the testing set.
+        """
+
+        if param_search:
+            idxs = np.arange(X.shape[0])
+            train_idxs, test_idxs = train_test_split(
+                idxs, test_size=test_size, random_state=random_state
+            )
+        else:
+            train_idxs, test_idxs = None, None
+        return train_idxs, test_idxs
+
     def _get_metrics(self, METRIC_REGISTRY):
         """
         Get metrics from REGISTRY
@@ -187,8 +224,8 @@ def compare(self):
             # hyperparameter search
             if self.param_search:
                 self.models[i] = optimize_params(
-                    X=self.X,
-                    y=self.y,
+                    X=self.X[self.train_idxs],
+                    y=self.y[self.train_idxs],
                     cv=self.cv,
                     model=self.models[i],
                     search_type=self.search_type,
@@ -197,16 +234,27 @@ def compare(self):
                     n_jobs=self.n_jobs,
                     logger=self.logger,
                 )
-            # run cross validation and store results
-            self.cv_results[get_model_name(self.models[i])] = run_cv(
-                X=self.X,
-                y=self.y,
-                cv=self.cv,
-                model=self.models[i],
-                metrics=self.metrics,
-                n_jobs=self.n_jobs,
-                logger=self.logger,
-            )
+                # only predict on one test set
+                self.cv_results[get_model_name(self.models[i])] = run_cv(
+                    X=self.X,
+                    y=self.y,
+                    cv=PredefinedSplit(test_fold=self.test_idxs),
+                    model=self.models[i],
+                    metrics=self.metrics,
+                    n_jobs=self.n_jobs,
+                    logger=self.logger,
+                )
+            else:
+                # run cross validation and store results
+                self.cv_results[get_model_name(self.models[i])] = run_cv(
+                    X=self.X,
+                    y=self.y,
+                    cv=self.cv,
+                    model=self.models[i],
+                    metrics=self.metrics,
+                    n_jobs=self.n_jobs,
+                    logger=self.logger,
+                )
             # update scores dataframe
             self.scores_df = update_scores_df(
                 self.scores_df,

From 2fe14cee89c1c0953d06f91bb03f74fb1c5ef0c7 Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Fri, 9 Feb 2024 15:57:55 +0000
Subject: [PATCH 2/8] fix indexing

---
 autoemulate/compare.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index d44dad2b..07cd4098 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -202,6 +202,12 @@ def _get_cv(self, CV_REGISTRY, fold_strategy, folds):
         """
         return CV_REGISTRY[fold_strategy](folds=folds, shuffle=True)
 
+    def _custom_split(self, X, test_idxs):
+        split_index = np.full(X.shape[0], -1)
+        split_index[test_idxs] = 0
+
+        return PredefinedSplit(test_fold=split_index)
+
     def compare(self):
         """Compares the emulator models on the data. self.setup() must be run first.
 
@@ -238,12 +244,13 @@ def compare(self):
                 self.cv_results[get_model_name(self.models[i])] = run_cv(
                     X=self.X,
                     y=self.y,
-                    cv=PredefinedSplit(test_fold=self.test_idxs),
+                    cv=self._custom_split(self.X, self.test_idxs),
                     model=self.models[i],
                     metrics=self.metrics,
                     n_jobs=self.n_jobs,
                     logger=self.logger,
                 )
+                print(f"cv output: {self.cv_results[get_model_name(self.models[i])]}")
             else:
                 # run cross validation and store results
                 self.cv_results[get_model_name(self.models[i])] = run_cv(

From 16d5d606ba8a4988c9869b2946f09b5bdc505b12 Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Fri, 9 Feb 2024 16:59:17 +0000
Subject: [PATCH 3/8] refactor single split

---
 autoemulate/compare.py        | 47 ++++------------------------
 autoemulate/cross_validate.py | 58 +++++++++++++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 43 deletions(-)

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index 07cd4098..b531d4cd 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -10,6 +10,8 @@
 from sklearn.utils.validation import check_X_y
 
 from autoemulate.cross_validate import run_cv
+from autoemulate.cross_validate import single_split
+from autoemulate.cross_validate import split_data
 from autoemulate.cross_validate import update_scores_df
 from autoemulate.cv import CV_REGISTRY
 from autoemulate.emulators import MODEL_REGISTRY
@@ -91,7 +93,7 @@ def setup(
             Whether to log to file.
         """
         self.X, self.y = self._check_input(X, y)
-        self.train_idxs, self.test_idxs = self._split_data(
+        self.train_idxs, self.test_idxs = split_data(
             self.X, test_size=0.2, param_search=param_search
         )
         self.models = get_and_process_models(
@@ -136,37 +138,6 @@ def _check_input(self, X, y):
         y = y.astype("float32")  # needed for pytorch models
         return X, y
 
-    def _split_data(self, X, test_size=0.2, random_state=None, param_search=False):
-        """Splits the data into training and testing sets.
-
-        Parameters
-        ----------
-        X : array-like, shape (n_samples, n_features)
-            Simulation input.
-        test_size : float, default=0.2
-            Proportion of the dataset to include in the test split.
-        random_state : int, RandomState instance or None, default=None
-            Controls the shuffling applied to the data before applying the split.
-        param_search : bool
-            Whether to split the data for hyperparameter search.
-
-        Returns
-        -------
-        train_idx : array-like
-            Indices of the training set.
-        test_idx : array-like
-            Indices of the testing set.
-        """
-
-        if param_search:
-            idxs = np.arange(X.shape[0])
-            train_idxs, test_idxs = train_test_split(
-                idxs, test_size=test_size, random_state=random_state
-            )
-        else:
-            train_idxs, test_idxs = None, None
-        return train_idxs, test_idxs
-
     def _get_metrics(self, METRIC_REGISTRY):
         """
         Get metrics from REGISTRY
@@ -202,12 +173,6 @@ def _get_cv(self, CV_REGISTRY, fold_strategy, folds):
         """
         return CV_REGISTRY[fold_strategy](folds=folds, shuffle=True)
 
-    def _custom_split(self, X, test_idxs):
-        split_index = np.full(X.shape[0], -1)
-        split_index[test_idxs] = 0
-
-        return PredefinedSplit(test_fold=split_index)
-
     def compare(self):
         """Compares the emulator models on the data. self.setup() must be run first.
 
@@ -240,17 +205,17 @@ def compare(self):
                     n_jobs=self.n_jobs,
                     logger=self.logger,
                 )
-                # only predict on one test set
+
                 self.cv_results[get_model_name(self.models[i])] = run_cv(
                     X=self.X,
                     y=self.y,
-                    cv=self._custom_split(self.X, self.test_idxs),
+                    cv=single_split(self.X, self.test_idxs),  # predict on test set
                     model=self.models[i],
                     metrics=self.metrics,
                     n_jobs=self.n_jobs,
                     logger=self.logger,
                 )
-                print(f"cv output: {self.cv_results[get_model_name(self.models[i])]}")
+
             else:
                 # run cross validation and store results
                 self.cv_results[get_model_name(self.models[i])] = run_cv(
diff --git a/autoemulate/cross_validate.py b/autoemulate/cross_validate.py
index 1ff0d843..2022b0f0 100644
--- a/autoemulate/cross_validate.py
+++ b/autoemulate/cross_validate.py
@@ -1,12 +1,14 @@
+import numpy as np
 import pandas as pd
 from sklearn.metrics import make_scorer
 from sklearn.model_selection import cross_validate
+from sklearn.model_selection import PredefinedSplit
+from sklearn.model_selection import train_test_split
 
 from autoemulate.utils import get_model_name
 
 
 def run_cv(X, y, cv, model, metrics, n_jobs, logger):
-    # Get model name
     model_name = get_model_name(model)
 
     # The metrics we want to use for cross-validation
@@ -16,7 +18,6 @@ def run_cv(X, y, cv, model, metrics, n_jobs, logger):
     logger.info(f"Parameters: {model.named_steps['model'].get_params()}")
 
     try:
-        # Cross-validate
         cv_results = cross_validate(
             model,
             X,
@@ -65,3 +66,56 @@ def update_scores_df(scores_df, model, cv_results):
                     "score": score,
                 }
     return scores_df
+
+
+def split_data(X, test_size=0.2, random_state=None, param_search=False):
+    """Splits the data into training and testing sets.
+
+    Parameters
+    ----------
+    X : array-like, shape (n_samples, n_features)
+        Simulation input.
+    test_size : float, default=0.2
+        Proportion of the dataset to include in the test split.
+    random_state : int, RandomState instance or None, default=None
+        Controls the shuffling applied to the data before applying the split.
+    param_search : bool
+        Whether to split the data for hyperparameter search.
+
+    Returns
+    -------
+    train_idx : array-like
+        Indices of the training set.
+    test_idx : array-like
+        Indices of the testing set.
+    """
+
+    if param_search:
+        idxs = np.arange(X.shape[0])
+        train_idxs, test_idxs = train_test_split(
+            idxs, test_size=test_size, random_state=random_state
+        )
+    else:
+        train_idxs, test_idxs = None, None
+    return train_idxs, test_idxs
+
+
+def single_split(X, test_idxs):
+    """Create a single split for sklearn's `cross_validate` function.
+
+    Parameters
+    ----------
+    X : array-like, shape (n_samples, n_features)
+        Simulation input.
+    test_idxs : array-like
+        Indices of the testing set.
+
+    Returns
+    -------
+    split_index : sklearn.model_selection.PredefinedSplit
+        An instance of the PredefinedSplit class.
+    """
+    split_index = np.full(X.shape[0], -1)
+    split_index[test_idxs] = 0
+
+    return PredefinedSplit(test_fold=split_index)

From 5ec602f1c4d90787371113fd2c8d58d9f2d43973 Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Fri, 9 Feb 2024 17:26:28 +0000
Subject: [PATCH 4/8] change printing text

---
 autoemulate/compare.py  | 11 +++++++++--
 autoemulate/printing.py | 32 +++++++++++++++++++++++---------
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index b531d4cd..fc0542c1 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -40,6 +40,7 @@ def setup(
         param_search=False,
         param_search_type="random",
         param_search_iters=20,
+        param_search_test_size=0.2,
         scale=True,
         scaler=StandardScaler(),
         reduce_dim=False,
@@ -94,7 +95,7 @@ def setup(
         """
         self.X, self.y = self._check_input(X, y)
         self.train_idxs, self.test_idxs = split_data(
-            self.X, test_size=0.2, param_search=param_search
+            self.X, test_size=param_search_test_size, param_search=param_search
         )
         self.models = get_and_process_models(
             MODEL_REGISTRY,
@@ -306,7 +307,13 @@ def print_results(self, sort_by="r2", model=None):
             The name of the model to print. If None, the best fold from each model will be printed.
             If a model name is provided, the scores for that model across all folds will be printed.
         """
-        print_cv_results(self.models, self.scores_df, model=model, sort_by=sort_by)
+        print_cv_results(
+            self.models,
+            self.scores_df,
+            model=model,
+            sort_by=sort_by,
+            param_search=self.param_search,
+        )
 
     def plot_results(
         self,
diff --git a/autoemulate/printing.py b/autoemulate/printing.py
index c74e4da1..f801c994 100644
--- a/autoemulate/printing.py
+++ b/autoemulate/printing.py
@@ -2,7 +2,7 @@
 from autoemulate.utils import get_model_name
 
 
-def print_cv_results(models, scores_df, model=None, sort_by="r2"):
+def print_cv_results(models, scores_df, model=None, sort_by="r2", param_search=False):
     """Print cv results.
 
     Parameters
@@ -26,12 +26,26 @@ def print_cv_results(models, scores_df, model=None, sort_by="r2"):
                 f"Model {model} not found. Available models are: {model_names}"
             )
     if model is None:
-        means = get_mean_scores(scores_df, metric=sort_by)
-        print("Average scores across all models:")
-        print(means)
+        if param_search:
+            means = get_mean_scores(scores_df, metric=sort_by)
+            print("Test score for each model:")
+            print(means)
+        else:
+            means = get_mean_scores(scores_df, metric=sort_by)
+            print("Average scores across all models:")
+            print(means)
     else:
-        scores = scores_df[scores_df["model"] == model].pivot(
-            index="fold", columns="metric", values="score"
-        )
-        print(f"Scores for {model} across all folds:")
-        print(scores)
+        if param_search:
+            scores = scores_df[scores_df["model"] == model].pivot(
+                index="fold", columns="metric", values="score"
+            )
+            # drop metric column
+            # get index of scores
+            print(f"Test score for {model}:")
+            print(scores)
+        else:
+            scores = scores_df[scores_df["model"] == model].pivot(
+                index="fold", columns="metric", values="score"
+            )
+            print(f"Scores for {model} across all folds:")
+            print(scores)

From 4b14b75ebde1727b7e907048e15a42c2eb333911 Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Mon, 12 Feb 2024 08:47:09 +0000
Subject: [PATCH 5/8] modify printing to fit the new hold-out set strategy

---
 autoemulate/compare.py  |  6 +++---
 autoemulate/printing.py | 11 +++--------
 autoemulate/utils.py    |  8 ++++++++
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index fc0542c1..f5fcc6a5 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -296,16 +296,16 @@ def load_model(self, filepath):
         serialiser = ModelSerialiser()
         return serialiser.load_model(filepath)
 
-    def print_results(self, sort_by="r2", model=None):
+    def print_results(self, model=None, sort_by="r2"):
         """Print cv results.
 
         Parameters
         ----------
-        sort_by : str, optional
-            The metric to sort by. Default is "r2", can also be "rmse".
         model : str, optional
             The name of the model to print. If None, the best fold from each model will be printed.
             If a model name is provided, the scores for that model across all folds will be printed.
+        sort_by : str, optional
+            The metric to sort by. Default is "r2", can also be "rmse".
         """
         print_cv_results(
             self.models,
diff --git a/autoemulate/printing.py b/autoemulate/printing.py
index f801c994..a05eaeb0 100644
--- a/autoemulate/printing.py
+++ b/autoemulate/printing.py
@@ -1,5 +1,6 @@
 from autoemulate.utils import get_mean_scores
 from autoemulate.utils import get_model_name
+from autoemulate.utils import get_model_scores
 
 
 def print_cv_results(models, scores_df, model=None, sort_by="r2", param_search=False):
@@ -36,16 +37,10 @@ def print_cv_results(models, scores_df, model=None, sort_by="r2", param_search=F
             print(means)
     else:
         if param_search:
-            scores = scores_df[scores_df["model"] == model].pivot(
-                index="fold", columns="metric", values="score"
-            )
-            # drop metric column
-            # get index of scores
+            scores = get_model_scores(scores_df, model)
             print(f"Test score for {model}:")
             print(scores)
         else:
-            scores = scores_df[scores_df["model"] == model].pivot(
-                index="fold", columns="metric", values="score"
-            )
+            scores = get_model_scores(scores_df, model)
             print(f"Scores for {model} across all folds:")
             print(scores)
diff --git a/autoemulate/utils.py b/autoemulate/utils.py
index 948d5a2a..190c1454 100644
--- a/autoemulate/utils.py
+++ b/autoemulate/utils.py
@@ -308,6 +308,14 @@ def get_mean_scores(scores_df, metric):
     return means_df
 
 
+def get_model_scores(scores_df, model_name):
+    model_scores = scores_df[scores_df["model"] == model_name].pivot(
+        index="fold", columns="metric", values="score"
+    )
+
+    return model_scores
+
+
 def set_random_seed(seed: int, deterministic: bool = False):
     """Set random seed for Python, Numpy and PyTorch.
     Args:

From 44d81431238c4708510bf08794555f3e03ad147f Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Mon, 12 Feb 2024 09:12:59 +0000
Subject: [PATCH 6/8] change plotting text for test set plots

---
 autoemulate/compare.py  |  8 ++++----
 autoemulate/plotting.py | 27 ++++++++++++++++++++++++---
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index f5fcc6a5..d0977b60 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -317,7 +317,7 @@ def print_results(self, model=None, sort_by="r2"):
 
     def plot_results(
         self,
-        model_name=None,
+        model=None,
         plot_type="actual_vs_predicted",
         n_cols=3,
         figsize=None,
@@ -327,8 +327,7 @@ def plot_results(
 
         Parameters
         ----------
-        model_name : str
-
+        model : str
             Name of the model to plot. If None, plots best folds of each models.
             If a model name is specified, plots all folds of that model.
         plot_type : str, optional
@@ -346,9 +345,10 @@ def plot_results(
             self.cv_results,
             self.X,
             self.y,
-            model_name=model_name,
+            model_name=model,
             n_cols=n_cols,
             plot_type=plot_type,
             figsize=figsize,
             output_index=output_index,
+            param_search=self.param_search,
         )
diff --git a/autoemulate/plotting.py b/autoemulate/plotting.py
index 2984126c..ed54f993 100644
--- a/autoemulate/plotting.py
+++ b/autoemulate/plotting.py
@@ -46,6 +46,7 @@ def plot_single_fold(
     plot_type="actual_vs_predicted",
     annotation=" ",
     output_index=0,
+    param_search=False,
 ):
     """Plots a single cv fold for a given model.
 
@@ -89,7 +90,8 @@ def plot_single_fold(
     display = PredictionErrorDisplay.from_predictions(
         y_true=true_values, y_pred=predicted_values, kind=plot_type, ax=ax
     )
-    ax.set_title(f"{model_name} - {annotation}: {fold_index}")
+    title_suffix = "Test set" if param_search else f"{annotation}: {fold_index}"
+    ax.set_title(f"{model_name} - {title_suffix}")
 
 
 def plot_best_fold_per_model(
@@ -100,6 +102,7 @@ def plot_best_fold_per_model(
     plot_type="actual_vs_predicted",
     figsize=None,
     output_index=0,
+    param_search=False,
 ):
     """Plots results of the best (highest R^2) cv-fold for each model in cv_results.
 
@@ -145,6 +148,7 @@ def plot_best_fold_per_model(
             plot_type=plot_type,
             annotation="Best CV-fold",
             output_index=output_index,
+            param_search=param_search,
         )
     plt.tight_layout()
     plt.show()
@@ -159,6 +163,7 @@ def plot_model_folds(
     plot_type="actual_vs_predicted",
     figsize=None,
     output_index=0,
+    param_search=False,
 ):
     """Plots all the folds for a given model.
 
@@ -182,6 +187,8 @@ def plot_model_folds(
         Overrides the default figure size.
     output_index : int, optional
         The index of the output to plot. Default is 0.
+    param_search : bool, optional
+        Whether there was a hyperparameter search.
     """
 
     n_folds = len(cv_results[model_name]["estimator"])
@@ -206,6 +213,7 @@ def plot_model_folds(
             plot_type,
             annotation="CV-fold",
             output_index=output_index,
+            param_search=param_search,
         )
     plt.tight_layout()
     plt.show()
@@ -220,6 +228,7 @@ def plot_results(
     plot_type="actual_vs_predicted",
     figsize=None,
     output_index=0,
+    param_search=False,
 ):
     """Plots the results of cross-validation.
 
@@ -241,6 +250,10 @@ def plot_results(
         “residual_vs_predicted” draws the residuals, i.e. difference between observed and predicted values, (y-axis) vs. the predicted values (x-axis).
     figsize : tuple, optional
         Overrides the default figure size.
+    output_index : int, optional
+        For multi-output: Index of the output variable to plot.
+    param_search : bool, optional
+        Whether hyperparameter search was done.
     """
 
     validate_inputs(cv_results, model_name)
@@ -248,9 +261,17 @@ def plot_results(
 
     if model_name:
         plot_model_folds(
-            cv_results, X, y, model_name, n_cols, plot_type, figsize, output_index
+            cv_results,
+            X,
+            y,
+            model_name,
+            n_cols,
+            plot_type,
+            figsize,
+            output_index,
+            param_search,
         )
     else:
         plot_best_fold_per_model(
-            cv_results, X, y, n_cols, plot_type, figsize, output_index
+            cv_results, X, y, n_cols, plot_type, figsize, output_index, param_search
         )

From d97a25596118c7e4e3c526174946968203cfb95e Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Mon, 12 Feb 2024 09:19:09 +0000
Subject: [PATCH 7/8] cleanup

---
 autoemulate/plotting.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/autoemulate/plotting.py b/autoemulate/plotting.py
index ed54f993..26001a44 100644
--- a/autoemulate/plotting.py
+++ b/autoemulate/plotting.py
@@ -133,8 +133,6 @@ def plot_best_fold_per_model(
 
     plt.figure(figsize=figsize)
 
-    if n_models == 1:
-        axes = [axes]
     for i, model_name in enumerate(cv_results):
         best_fold_index = np.argmax(cv_results[model_name]["test_r2"])
         ax = plt.subplot(n_rows, n_cols, i + 1)
@@ -199,8 +197,6 @@ def plot_model_folds(
 
     plt.figure(figsize=figsize)
 
-    if n_folds == 1:
-        axes = [axes]
     for i in range(n_folds):
         ax = plt.subplot(n_rows, n_cols, i + 1)
         plot_single_fold(

From c36ecc8c24d1bffee81add5ca42d99bd230058a2 Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Mon, 12 Feb 2024 09:35:53 +0000
Subject: [PATCH 8/8] add tests for hold-out set splitting

---
 tests/test_cross_validate.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/test_cross_validate.py b/tests/test_cross_validate.py
index dd23d7a7..041330d3 100644
--- a/tests/test_cross_validate.py
+++ b/tests/test_cross_validate.py
@@ -6,15 +6,20 @@
 import pytest
 from sklearn.datasets import make_regression
 from sklearn.model_selection import KFold
+from sklearn.model_selection import PredefinedSplit
+from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 
 from autoemulate.compare import AutoEmulate
 from autoemulate.cross_validate import run_cv
+from autoemulate.cross_validate import single_split
+from autoemulate.cross_validate import split_data
 from autoemulate.cross_validate import update_scores_df
 from autoemulate.emulators import RandomForest
 from autoemulate.metrics import METRIC_REGISTRY
 
+
 # import make_regression_data
 
 X, y = make_regression(n_samples=20, n_features=2, random_state=0)
@@ -53,3 +58,26 @@ def test_update_scores_df(cv_results):
     assert scores_df_new.shape[0] == 10
     assert scores_df_new.shape[1] == 4
     assert scores_df_new["model"][0] == "RandomForest"
+
+
+def test_single_split():
+    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
+    test_idxs = [1, 3]
+    split_index = single_split(X, test_idxs)
+
+    assert isinstance(split_index, PredefinedSplit)
+    assert np.array_equal(split_index.test_fold, [-1, 0, -1, 0])
+
+
+def test_split_data():
+    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
+    test_size = 0.2
+    random_state = 42
+    param_search = True
+
+    train_idxs, test_idxs = split_data(X, test_size, random_state, param_search)
+
+    assert isinstance(train_idxs, np.ndarray)
+    assert isinstance(test_idxs, np.ndarray)
+    assert len(train_idxs) == 3
+    assert len(test_idxs) == 1