From 2783343b474fab933d6e6bdc7f989c2d2591061a Mon Sep 17 00:00:00 2001 From: mastoffel Date: Wed, 16 Oct 2024 12:04:33 +0100 Subject: [PATCH] add multi-output sensitivity analysis --- autoemulate/compare.py | 27 ------- autoemulate/sensitivity_analysis.py | 119 ++++++++++++++++++++++------ tests/test_sensitivity_analysis.py | 54 +++++++++++++ 3 files changed, 148 insertions(+), 52 deletions(-) create mode 100644 tests/test_sensitivity_analysis.py diff --git a/autoemulate/compare.py b/autoemulate/compare.py index dea1eaee..39c58eed 100644 --- a/autoemulate/compare.py +++ b/autoemulate/compare.py @@ -525,30 +525,3 @@ def plot_eval( ) return fig - - def sensitivity_analysis(self, model, problem, N=1000, plot=True): - """ - Perform sensitivity analysis on a fitted emulator. - - Parameters: - ----------- - model_name : str - The name of the fitted model to analyze. - problem : dict - The problem definition, including 'num_vars', 'names', and 'bounds'. - N : int, optional - The number of samples to generate (default is 1000). - plot : bool, optional - Whether to plot the results (default is True). - - Returns: - -------- - dict - A dictionary containing the Sobol indices. - """ - Si = perform_sobol_analysis(model, problem, N) - - if plot: - plot_sensitivity_indices(Si, problem) - - return Si diff --git a/autoemulate/sensitivity_analysis.py b/autoemulate/sensitivity_analysis.py index 2f8f0a23..02c0e66a 100644 --- a/autoemulate/sensitivity_analysis.py +++ b/autoemulate/sensitivity_analysis.py @@ -1,9 +1,19 @@ import numpy as np -from SALib.analyze import sobol -from SALib.sample import saltelli +import pandas as pd +from SALib.analyze.sobol import analyze +from SALib.sample.sobol import sample -def perform_sobol_analysis(model, problem, N=1000): +def sensitivity_analysis(model, problem, N=1000, as_df=False): + Si = sobol_analysis(model, problem, N) + + if as_df: + return sobol_results_to_df(Si) + else: + return Si + + +def sobol_analysis(model, problem, N=1024): """ Perform Sobol sensitivity analysis on a fitted emulator. @@ -21,39 +31,98 @@ def perform_sobol_analysis(model, problem, N=1000): dict A dictionary containing the Sobol indices. """ - # Generate samples - param_values = saltelli.sample(problem, N) + # samples + param_values = sample(problem, N) - # Evaluate model + # evaluate Y = model.predict(param_values) - # Perform analysis - Si = sobol.analyze(problem, Y) + # multiple outputs + if Y.ndim == 1: + Y = Y.reshape(-1, 1) + num_outputs = Y.shape[1] + output_names = [f"y{i}" for i in range(num_outputs)] + + results = {} + for i in range(num_outputs): + Si = analyze(problem, Y[:, i]) + results[output_names[i]] = Si - return Si + return results -def plot_sensitivity_indices(Si, problem): +def sobol_results_to_df(results): """ - Plot the Sobol sensitivity indices. + Convert Sobol results to a pandas DataFrame. Parameters: ----------- - Si : dict - The Sobol indices returned by perform_sobol_analysis. - problem : dict - The problem definition used in the analysis. + results : dict + The Sobol indices returned by sobol_analysis. + + Returns: + -------- + pd.DataFrame """ - import matplotlib.pyplot as plt + rows = [] + for output, indices in results.items(): + for index_type in ["S1", "ST", "S2"]: + values = indices.get(index_type) + conf_values = indices.get(f"{index_type}_conf") + if values is None or conf_values is None: + continue + + if index_type in ["S1", "ST"]: + rows.extend( + { + "output": output, + "parameter": f"X{i+1}", + "index": index_type, + "value": value, + "confidence": conf, + } + for i, (value, conf) in enumerate(zip(values, conf_values)) + ) + + elif index_type == "S2": + n = values.shape[0] + rows.extend( + { + "output": output, + "parameter": f"X{i+1}-X{j+1}", + "index": index_type, + "value": values[i, j], + "confidence": conf_values[i, j], + } + for i in range(n) + for j in range(i + 1, n) + if not np.isnan(values[i, j]) + ) + + return pd.DataFrame(rows) + + +# def plot_sensitivity_indices(Si, problem): +# """ +# Plot the Sobol sensitivity indices. + +# Parameters: +# ----------- +# Si : dict +# The Sobol indices returned by perform_sobol_analysis. +# problem : dict +# The problem definition used in the analysis. +# """ +# import matplotlib.pyplot as plt - fig, ax = plt.subplots(figsize=(10, 6)) +# fig, ax = plt.subplots(figsize=(10, 6)) - indices = Si["S1"] - names = problem["names"] +# indices = Si["S1"] +# names = problem["names"] - ax.bar(names, indices) - ax.set_ylabel("First-order Sobol index") - ax.set_title("Sensitivity Analysis Results") - plt.xticks(rotation=45) - plt.tight_layout() - plt.show() +# ax.bar(names, indices) +# ax.set_ylabel("First-order Sobol index") +# ax.set_title("Sensitivity Analysis Results") +# plt.xticks(rotation=45) +# plt.tight_layout() +# plt.show() diff --git a/tests/test_sensitivity_analysis.py b/tests/test_sensitivity_analysis.py new file mode 100644 index 00000000..58519b22 --- /dev/null +++ b/tests/test_sensitivity_analysis.py @@ -0,0 +1,54 @@ +import numpy as np +import pytest +from sklearn.datasets import make_regression + +from autoemulate.emulators import RandomForest +from autoemulate.experimental_design import LatinHypercube +from autoemulate.sensitivity_analysis import sobol_analysis +from autoemulate.sensitivity_analysis import sobol_results_to_df +from autoemulate.simulations.projectile import simulate_projectile +from autoemulate.simulations.projectile import simulate_projectile_multioutput + + +@pytest.fixture +def Xy_1d(): + lhd = LatinHypercube([(-5.0, 1.0), (0.0, 1000.0)]) + X = lhd.sample(100) + y = np.array([simulate_projectile(x) for x in X]) + return X, y + + +@pytest.fixture +def Xy_2d(): + lhd = LatinHypercube([(-5.0, 1.0), (0.0, 1000.0)]) + X = lhd.sample(100) + y = np.array([simulate_projectile_multioutput(x) for x in X]) + return X, y + + +@pytest.fixture +def model_1d(Xy_1d): + X, y = Xy_1d + rf = RandomForest() + rf.fit(X, y) + return rf + + +@pytest.fixture +def model_2d(Xy_2d): + X, y = Xy_2d + rf = RandomForest() + rf.fit(X, y) + return rf + + +def test_sensitivity_analysis(model_2d): + problem = { + "num_vars": 2, + "names": ["c", "v0"], + "bounds": [(-5.0, 1.0), (0.0, 1000.0)], + } + + Si = sobol_analysis(model_2d, problem) + df = sobol_results_to_df(Si) + print(df)