From 2783343b474fab933d6e6bdc7f989c2d2591061a Mon Sep 17 00:00:00 2001
From: mastoffel <martin.adam.stoffel@gmail.com>
Date: Wed, 16 Oct 2024 12:04:33 +0100
Subject: [PATCH] add multi-output sensitivity analysis

---
 autoemulate/compare.py              |  27 -------
 autoemulate/sensitivity_analysis.py | 119 ++++++++++++++++++++++------
 tests/test_sensitivity_analysis.py  |  54 +++++++++++++
 3 files changed, 148 insertions(+), 52 deletions(-)
 create mode 100644 tests/test_sensitivity_analysis.py

diff --git a/autoemulate/compare.py b/autoemulate/compare.py
index dea1eaee..39c58eed 100644
--- a/autoemulate/compare.py
+++ b/autoemulate/compare.py
@@ -525,30 +525,3 @@ def plot_eval(
         )
 
         return fig
-
-    def sensitivity_analysis(self, model, problem, N=1000, plot=True):
-        """
-        Perform sensitivity analysis on a fitted emulator.
-
-        Parameters:
-        -----------
-        model_name : str
-            The name of the fitted model to analyze.
-        problem : dict
-            The problem definition, including 'num_vars', 'names', and 'bounds'.
-        N : int, optional
-            The number of samples to generate (default is 1000).
-        plot : bool, optional
-            Whether to plot the results (default is True).
-
-        Returns:
-        --------
-        dict
-            A dictionary containing the Sobol indices.
-        """
-        Si = perform_sobol_analysis(model, problem, N)
-
-        if plot:
-            plot_sensitivity_indices(Si, problem)
-
-        return Si
diff --git a/autoemulate/sensitivity_analysis.py b/autoemulate/sensitivity_analysis.py
index 2f8f0a23..02c0e66a 100644
--- a/autoemulate/sensitivity_analysis.py
+++ b/autoemulate/sensitivity_analysis.py
@@ -1,9 +1,19 @@
 import numpy as np
-from SALib.analyze import sobol
-from SALib.sample import saltelli
+import pandas as pd
+from SALib.analyze.sobol import analyze
+from SALib.sample.sobol import sample
 
 
-def perform_sobol_analysis(model, problem, N=1000):
+def sensitivity_analysis(model, problem, N=1000, as_df=False):
+    Si = sobol_analysis(model, problem, N)
+
+    if as_df:
+        return sobol_results_to_df(Si)
+    else:
+        return Si
+
+
+def sobol_analysis(model, problem, N=1024):
     """
     Perform Sobol sensitivity analysis on a fitted emulator.
 
@@ -21,39 +31,98 @@ def perform_sobol_analysis(model, problem, N=1000):
     dict
         A dictionary containing the Sobol indices.
     """
-    # Generate samples
-    param_values = saltelli.sample(problem, N)
+    # samples
+    param_values = sample(problem, N)
 
-    # Evaluate model
+    # evaluate
     Y = model.predict(param_values)
 
-    # Perform analysis
-    Si = sobol.analyze(problem, Y)
+    # multiple outputs
+    if Y.ndim == 1:
+        Y = Y.reshape(-1, 1)
+    num_outputs = Y.shape[1]
+    output_names = [f"y{i}" for i in range(num_outputs)]
+
+    results = {}
+    for i in range(num_outputs):
+        Si = analyze(problem, Y[:, i])
+        results[output_names[i]] = Si
 
-    return Si
+    return results
 
 
-def plot_sensitivity_indices(Si, problem):
+def sobol_results_to_df(results):
     """
-    Plot the Sobol sensitivity indices.
+    Convert Sobol results to a pandas DataFrame.
 
     Parameters:
     -----------
-    Si : dict
-        The Sobol indices returned by perform_sobol_analysis.
-    problem : dict
-        The problem definition used in the analysis.
+    results : dict
+        The Sobol indices returned by sobol_analysis.
+
+    Returns:
+    --------
+    pd.DataFrame
     """
-    import matplotlib.pyplot as plt
+    rows = []
+    for output, indices in results.items():
+        for index_type in ["S1", "ST", "S2"]:
+            values = indices.get(index_type)
+            conf_values = indices.get(f"{index_type}_conf")
+            if values is None or conf_values is None:
+                continue
+
+            if index_type in ["S1", "ST"]:
+                rows.extend(
+                    {
+                        "output": output,
+                        "parameter": f"X{i+1}",
+                        "index": index_type,
+                        "value": value,
+                        "confidence": conf,
+                    }
+                    for i, (value, conf) in enumerate(zip(values, conf_values))
+                )
+
+            elif index_type == "S2":
+                n = values.shape[0]
+                rows.extend(
+                    {
+                        "output": output,
+                        "parameter": f"X{i+1}-X{j+1}",
+                        "index": index_type,
+                        "value": values[i, j],
+                        "confidence": conf_values[i, j],
+                    }
+                    for i in range(n)
+                    for j in range(i + 1, n)
+                    if not np.isnan(values[i, j])
+                )
+
+    return pd.DataFrame(rows)
+
+
+# def plot_sensitivity_indices(Si, problem):
+#     """
+#     Plot the Sobol sensitivity indices.
+
+#     Parameters:
+#     -----------
+#     Si : dict
+#         The Sobol indices returned by perform_sobol_analysis.
+#     problem : dict
+#         The problem definition used in the analysis.
+#     """
+#     import matplotlib.pyplot as plt
 
-    fig, ax = plt.subplots(figsize=(10, 6))
+#     fig, ax = plt.subplots(figsize=(10, 6))
 
-    indices = Si["S1"]
-    names = problem["names"]
+#     indices = Si["S1"]
+#     names = problem["names"]
 
-    ax.bar(names, indices)
-    ax.set_ylabel("First-order Sobol index")
-    ax.set_title("Sensitivity Analysis Results")
-    plt.xticks(rotation=45)
-    plt.tight_layout()
-    plt.show()
+#     ax.bar(names, indices)
+#     ax.set_ylabel("First-order Sobol index")
+#     ax.set_title("Sensitivity Analysis Results")
+#     plt.xticks(rotation=45)
+#     plt.tight_layout()
+#     plt.show()
diff --git a/tests/test_sensitivity_analysis.py b/tests/test_sensitivity_analysis.py
new file mode 100644
index 00000000..58519b22
--- /dev/null
+++ b/tests/test_sensitivity_analysis.py
@@ -0,0 +1,54 @@
+import numpy as np
+import pytest
+from sklearn.datasets import make_regression
+
+from autoemulate.emulators import RandomForest
+from autoemulate.experimental_design import LatinHypercube
+from autoemulate.sensitivity_analysis import sobol_analysis
+from autoemulate.sensitivity_analysis import sobol_results_to_df
+from autoemulate.simulations.projectile import simulate_projectile
+from autoemulate.simulations.projectile import simulate_projectile_multioutput
+
+
+@pytest.fixture
+def Xy_1d():
+    lhd = LatinHypercube([(-5.0, 1.0), (0.0, 1000.0)])
+    X = lhd.sample(100)
+    y = np.array([simulate_projectile(x) for x in X])
+    return X, y
+
+
+@pytest.fixture
+def Xy_2d():
+    lhd = LatinHypercube([(-5.0, 1.0), (0.0, 1000.0)])
+    X = lhd.sample(100)
+    y = np.array([simulate_projectile_multioutput(x) for x in X])
+    return X, y
+
+
+@pytest.fixture
+def model_1d(Xy_1d):
+    X, y = Xy_1d
+    rf = RandomForest()
+    rf.fit(X, y)
+    return rf
+
+
+@pytest.fixture
+def model_2d(Xy_2d):
+    X, y = Xy_2d
+    rf = RandomForest()
+    rf.fit(X, y)
+    return rf
+
+
+def test_sensitivity_analysis(model_2d):
+    problem = {
+        "num_vars": 2,
+        "names": ["c", "v0"],
+        "bounds": [(-5.0, 1.0), (0.0, 1000.0)],
+    }
+
+    Si = sobol_analysis(model_2d, problem)
+    df = sobol_results_to_df(Si)
+    print(df)