From 0b02007eb7400221ae2b366d49845281f4ce9e26 Mon Sep 17 00:00:00 2001 From: Lucas de Oliveira Prates Date: Thu, 28 Nov 2024 18:22:40 -0300 Subject: [PATCH 1/3] ENH: implementing a draft version of the Multivarite Rejectio Sampler (MRS). --- .../multivariate_rejection_sampler.py | 220 ++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 rocketpy/simulation/multivariate_rejection_sampler.py diff --git a/rocketpy/simulation/multivariate_rejection_sampler.py b/rocketpy/simulation/multivariate_rejection_sampler.py new file mode 100644 index 000000000..160ba78cf --- /dev/null +++ b/rocketpy/simulation/multivariate_rejection_sampler.py @@ -0,0 +1,220 @@ +""" +Multivariate Rejection Sampling Module for RocketPy + +Notes +----- +This module is still under active development, and some features or attributes may +change in future versions. Users are encouraged to check for updates and read the +latest documentation. +""" + +import json +from random import random + +from rocketpy._encoders import RocketPyEncoder + + +class MultivariateRejectionSampler: + """Class that performs Multivariate Rejection Sampling (MRS) from MonteCarlo + results. + """ + + def __init__( + self, + montecarlo_filepath, + mrs_filepath, + distribution_dict, + ): + """Initializes Multivariate Rejection Sampler (MRS) class + + Parameters + ---------- + montecarlo_filepath : str + Filepath prefixes to the files created from a MonteCarlo simulation + results. + mrs_filepath : str + Filepath prefix to MRS obtained samples. The files created follow the same + structure as those created by the MonteCarlo class. + distribution : dict + Dictionary whose keys contain the name whose distribution changed. The values + are tuples or lists with two entries. The first entry is a probability + density (mass) function for the old distribution, while the second entry + is the probability density function for the new distribution. + + Returns + ------- + None + """ + self.montecarlo_filepath = montecarlo_filepath + self.mrs_filepath = mrs_filepath + self.distribution_dict = distribution_dict + self.original_sample_size = 0 + self.sup_ratio = 1 + self.expected_sample_size = None + self.final_sample_size = None + # TODO: is there a better way to construct input/output_list? + # Iterating and appending over lists is costly. However, the + # alternative, reading the file twice to get the number of lines, + # also does not seem to be a good option. + self.output_list = [] + self.input_list = [] + self.__setup_input() + self.__load_output() + + def __setup_input(self): + """Loads, validate and compute information from monte carlo + input with a single read from the file. + + This function does three things: + 1) Load: Loads the input data from MonteCarlo into python + objects so the sampling process does not require reading from + disk; + 2) Validate: Validates that the keys in 'distribution_dict' exist in + the input json created by the monte carlo; + 3) Compute: Computes the supremum of the probability ratios, used in the + sample function. + + While these three tasks could be disentangled to get clearer + code, the implementation as done here only requires a single + read from disk. + """ + input_filename = f"{self.montecarlo_filepath}.inputs.txt" + + try: + input_file = open(input_filename, "r+", encoding="utf-8") + except FileNotFoundError as e: + raise FileNotFoundError( + f"Input file from monte carlo {input_filename} " "not found!" + ) from e + + for line in input_file.readlines(): + try: + # loads data + line_json = json.loads(line) + self.input_list.append(line_json) + self.original_sample_size += 1 + + prob_ratio = 1 + for parameter in self.distribution_dict.keys(): + # checks dictionary keys + if parameter not in line_json.keys(): + raise ValueError( + f"Parameter key {parameter} from 'distribution_dict' " + "not found in input file!" + ) + parameter_value = line_json[parameter] + + prob_ratio *= self.__compute_probability_ratio( + parameter, parameter_value + ) + # updates the supremum of the ratio + self.sup_ratio = max(self.sup_ratio, prob_ratio) + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo input file {input_filename}!" + ) from e + + self.expected_sample_size = self.original_sample_size // self.sup_ratio + input_file.close() + + def __load_output(self): + """Load data from monte carlo outputs.""" + output_filename = f"{self.montecarlo_filepath}.outputs.txt" + sample_size_output = 0 # sanity check + + try: + output_file = open(output_filename, "r+", encoding="utf-8") + except FileNotFoundError as e: + raise FileNotFoundError( + f"Output file from monte carlo {output_filename} " "not found!" + ) from e + + for line in output_file.readlines(): + try: + line_json = json.loads(line) + self.output_list.append(line_json) + sample_size_output += 1 + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo output file {output_filename}!" + ) from e + + if self.original_sample_size != sample_size_output: + raise ValueError( + "Monte carlo input and output files have a different " + "number of samples!" + ) + + output_file.close() + + def sample(self): + """Performs rejection sampling and saves data + + Returns + ------- + None + """ + + mrs_input_file = open(f"{self.mrs_filepath}.inputs.txt", "w+", encoding="utf-8") + mrs_output_file = open( + f"{self.mrs_filepath}.outputs.txt", "w+", encoding="utf-8" + ) + mrs_error_file = open(f"{self.mrs_filepath}.errors.txt", "w+", encoding="utf-8") + + # compute sup ratio + for line_input_json, line_output_json in zip(self.input_list, self.output_list): + acceptance_prob = 1 / self.sup_ratio # probability the sample is accepted + for parameter in self.distribution_dict.keys(): + parameter_value = line_input_json[parameter] + acceptance_prob *= self.__compute_probability_ratio( + parameter, + parameter_value, + ) + # sample is accepted, write output + if random() < acceptance_prob: + mrs_input_file.write( + json.dumps(line_input_json, cls=RocketPyEncoder) + "\n" + ) + mrs_output_file.write( + json.dumps(line_output_json, cls=RocketPyEncoder) + "\n" + ) + + mrs_input_file.close() + mrs_output_file.close() + mrs_error_file.close() + + def __compute_probability_ratio(self, parameter, parameter_value): + """Computes the ratio of the new probability to the old probability + + Parameters + ---------- + parameter : str + Name of the parameter to evaluate the probability. + parameter_value : any + Value of the parameter to be passed to the density functions. + + Returns + ------- + float + The ratio of the new probability density function (numerator) + to the old one (denominator). + + Raises + ------ + ValueError + Raises exception if an error occurs when computing the ratio. + """ + try: + old_pdf = self.distribution_dict[parameter][0] + new_pdf = self.distribution_dict[parameter][1] + probability_ratio = new_pdf(parameter_value) / old_pdf(parameter_value) + except Exception as e: + raise ValueError( + "An error occurred while evaluating the " + "ratio for 'distribution_dict' probability " + f"parameter key {parameter}!" + ) from e + + return probability_ratio From d3ab5f21f3681bb18ef28a0d43c0ef6dc56470d5 Mon Sep 17 00:00:00 2001 From: Lucas de Oliveira Prates Date: Thu, 28 Nov 2024 18:23:22 -0300 Subject: [PATCH 2/3] MNT: quick notebook to test MRS during development --- test_mrs.ipynb | 176 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 test_mrs.ipynb diff --git a/test_mrs.ipynb b/test_mrs.ipynb new file mode 100644 index 000000000..09015b197 --- /dev/null +++ b/test_mrs.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quick test notebook for MRS" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "# We import these lines for debugging purposes, only works on Jupyter Notebook\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "from rocketpy.simulation.multivariate_rejection_sampler import MultivariateRejectionSampler\n", + "from rocketpy import MonteCarlo\n", + "from scipy.stats import norm\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "montecarlo_filepath = \"docs/notebooks/monte_carlo_analysis/monte_carlo_analysis_outputs/monte_carlo_class_example\"\n", + "mrs_filepath = \"mrs\"\n", + "old_mass_pdf = norm(15.426, 0.5).pdf\n", + "new_mass_pdf = norm(15, 0.5) .pdf\n", + "distribution_dict = {\n", + " \"mass\": (old_mass_pdf, new_mass_pdf),\n", + "}\n", + "mrs = MultivariateRejectionSampler(\n", + " montecarlo_filepath=montecarlo_filepath,\n", + " mrs_filepath=mrs_filepath,\n", + " distribution_dict=distribution_dict,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "107.0" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mrs.expected_sample_size" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "mrs.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following input file was imported: mrs.inputs.txt\n", + "A total of 109 simulations results were loaded from the following output file: mrs.outputs.txt\n", + "\n", + "The following error file was imported: mrs.errors.txt\n" + ] + } + ], + "source": [ + "mrs_results = MonteCarlo(mrs_filepath, None, None, None)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A total of 109 simulations results were loaded from the following output file: mrs.outputs.txt\n", + "\n", + "The following input file was imported: mrs.inputs.txt\n", + "The following error file was imported: mrs.errors.txt\n" + ] + } + ], + "source": [ + "mrs_results.import_results()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MRS mass mean after resample: 15.029610376989238\n", + "MRS mass std after resample: 0.5213162519453568\n" + ] + } + ], + "source": [ + "mrs_mass_list = []\n", + "for single_input_dict in mrs_results.inputs_log:\n", + " mrs_mass_list.append(single_input_dict[\"mass\"])\n", + "\n", + "print(f\"MRS mass mean after resample: {np.mean(mrs_mass_list)}\")\n", + "print(f\"MRS mass std after resample: {np.std(mrs_mass_list)}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "testnotebook", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 3a3c9e64a828af40558e3849c6040450e7611430 Mon Sep 17 00:00:00 2001 From: Lucas de Oliveira Prates Date: Thu, 19 Dec 2024 21:56:02 -0300 Subject: [PATCH 3/3] MNT: refactoring class to match review suggestions --- .vscode/settings.json | 1 + .../notebooks/test_mrs.ipynb | 98 ++++--- .../multivariate_rejection_sampler.py | 257 ++++++++++-------- 3 files changed, 203 insertions(+), 153 deletions(-) rename test_mrs.ipynb => docs/notebooks/test_mrs.ipynb (62%) diff --git a/.vscode/settings.json b/.vscode/settings.json index 0af75e918..cda3677b1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -285,6 +285,7 @@ "statsmodels", "STFT", "subintervals", + "supremum", "suptitle", "supxlabel", "supylabel", diff --git a/test_mrs.ipynb b/docs/notebooks/test_mrs.ipynb similarity index 62% rename from test_mrs.ipynb rename to docs/notebooks/test_mrs.ipynb index 09015b197..d25e7c9a2 100644 --- a/test_mrs.ipynb +++ b/docs/notebooks/test_mrs.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -29,11 +29,13 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "from rocketpy.simulation.multivariate_rejection_sampler import MultivariateRejectionSampler\n", + "from rocketpy.simulation.multivariate_rejection_sampler import (\n", + " MultivariateRejectionSampler,\n", + ")\n", "from rocketpy import MonteCarlo\n", "from scipy.stats import norm\n", "import numpy as np" @@ -41,66 +43,47 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ - "montecarlo_filepath = \"docs/notebooks/monte_carlo_analysis/monte_carlo_analysis_outputs/monte_carlo_class_example\"\n", - "mrs_filepath = \"mrs\"\n", + "monte_carlo_filepath = (\n", + " \"monte_carlo_analysis/monte_carlo_analysis_outputs/monte_carlo_class_example\"\n", + ")\n", + "mrs_filepath = \"monte_carlo_analysis/monte_carlo_analysis_outputs/mrs\"\n", "old_mass_pdf = norm(15.426, 0.5).pdf\n", - "new_mass_pdf = norm(15, 0.5) .pdf\n", + "new_mass_pdf = norm(15, 0.5).pdf\n", "distribution_dict = {\n", " \"mass\": (old_mass_pdf, new_mass_pdf),\n", "}\n", "mrs = MultivariateRejectionSampler(\n", - " montecarlo_filepath=montecarlo_filepath,\n", + " monte_carlo_filepath=monte_carlo_filepath,\n", " mrs_filepath=mrs_filepath,\n", - " distribution_dict=distribution_dict,\n", ")" ] }, { "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "107.0" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mrs.expected_sample_size" - ] - }, - { - "cell_type": "code", - "execution_count": 90, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ - "mrs.sample()" + "mrs.sample(distribution_dict=distribution_dict)" ] }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The following input file was imported: mrs.inputs.txt\n", - "A total of 109 simulations results were loaded from the following output file: mrs.outputs.txt\n", + "The following input file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.inputs.txt\n", + "A total of 116 simulations results were loaded from the following output file: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.outputs.txt\n", "\n", - "The following error file was imported: mrs.errors.txt\n" + "The following error file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.errors.txt\n" ] } ], @@ -110,17 +93,17 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "A total of 109 simulations results were loaded from the following output file: mrs.outputs.txt\n", + "A total of 116 simulations results were loaded from the following output file: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.outputs.txt\n", "\n", - "The following input file was imported: mrs.inputs.txt\n", - "The following error file was imported: mrs.errors.txt\n" + "The following input file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.inputs.txt\n", + "The following error file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.errors.txt\n" ] } ], @@ -130,15 +113,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "MRS mass mean after resample: 15.029610376989238\n", - "MRS mass std after resample: 0.5213162519453568\n" + "MRS mass mean after resample: 15.041934326472004\n", + "MRS mass std after resample: 0.48924085702427966\n" ] } ], @@ -150,11 +133,38 @@ "print(f\"MRS mass mean after resample: {np.mean(mrs_mass_list)}\")\n", "print(f\"MRS mass std after resample: {np.std(mrs_mass_list)}\")" ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "107.0" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mrs.expected_sample_size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "testnotebook", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -168,7 +178,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/rocketpy/simulation/multivariate_rejection_sampler.py b/rocketpy/simulation/multivariate_rejection_sampler.py index 160ba78cf..e1e63893e 100644 --- a/rocketpy/simulation/multivariate_rejection_sampler.py +++ b/rocketpy/simulation/multivariate_rejection_sampler.py @@ -9,118 +9,109 @@ """ import json +from dataclasses import dataclass from random import random from rocketpy._encoders import RocketPyEncoder +@dataclass +class SampleInformation: + """Sample information used in the MRS""" + + inputs_json: dict = None + outputs_json: dict = None + probability_ratio: float = None + acceptance_probability: float = None + + class MultivariateRejectionSampler: """Class that performs Multivariate Rejection Sampling (MRS) from MonteCarlo - results. + results. The class currently assumes that all input variables are independent + when performing the + + Attributes + ---------- """ def __init__( self, - montecarlo_filepath, + monte_carlo_filepath, mrs_filepath, - distribution_dict, ): """Initializes Multivariate Rejection Sampler (MRS) class Parameters ---------- - montecarlo_filepath : str + monte_carlo_filepath : str Filepath prefixes to the files created from a MonteCarlo simulation - results. + results and used as input for resampling. mrs_filepath : str Filepath prefix to MRS obtained samples. The files created follow the same - structure as those created by the MonteCarlo class. - distribution : dict - Dictionary whose keys contain the name whose distribution changed. The values - are tuples or lists with two entries. The first entry is a probability - density (mass) function for the old distribution, while the second entry - is the probability density function for the new distribution. + structure as those created by the MonteCarlo class but now containing the + selected sub-samples. Returns ------- None """ - self.montecarlo_filepath = montecarlo_filepath + self.monte_carlo_filepath = monte_carlo_filepath self.mrs_filepath = mrs_filepath - self.distribution_dict = distribution_dict + self.distribution_dict = None self.original_sample_size = 0 self.sup_ratio = 1 self.expected_sample_size = None self.final_sample_size = None - # TODO: is there a better way to construct input/output_list? - # Iterating and appending over lists is costly. However, the - # alternative, reading the file twice to get the number of lines, - # also does not seem to be a good option. - self.output_list = [] - self.input_list = [] + self.input_variables_names = set() + self.output_variables_names = set() + self.all_sample_list = [] + self.accepted_sample_list = [] self.__setup_input() self.__load_output() def __setup_input(self): - """Loads, validate and compute information from monte carlo - input with a single read from the file. - - This function does three things: - 1) Load: Loads the input data from MonteCarlo into python - objects so the sampling process does not require reading from - disk; - 2) Validate: Validates that the keys in 'distribution_dict' exist in - the input json created by the monte carlo; - 3) Compute: Computes the supremum of the probability ratios, used in the - sample function. - - While these three tasks could be disentangled to get clearer - code, the implementation as done here only requires a single - read from disk. + """Loads input information from monte carlo in a SampleInformation + object """ - input_filename = f"{self.montecarlo_filepath}.inputs.txt" + input_filename = f"{self.monte_carlo_filepath}.inputs.txt" try: input_file = open(input_filename, "r+", encoding="utf-8") except FileNotFoundError as e: raise FileNotFoundError( - f"Input file from monte carlo {input_filename} " "not found!" + f"Input file from monte carlo {input_filename} not found!" ) from e - for line in input_file.readlines(): - try: - # loads data + try: + for line in input_file.readlines(): + sample_info = SampleInformation() line_json = json.loads(line) - self.input_list.append(line_json) - self.original_sample_size += 1 + sample_info.inputs_json = line_json + self.all_sample_list.append(sample_info) - prob_ratio = 1 - for parameter in self.distribution_dict.keys(): - # checks dictionary keys - if parameter not in line_json.keys(): + # sets and validates input variables names + if not self.input_variables_names: + self.input_variables_names = set(line_json.keys()) + else: + if self.input_variables_names != set(line_json.keys()): raise ValueError( - f"Parameter key {parameter} from 'distribution_dict' " - "not found in input file!" + "Input file from Monte Carlo contains different " + "variables for different lines" ) - parameter_value = line_json[parameter] - - prob_ratio *= self.__compute_probability_ratio( - parameter, parameter_value - ) - # updates the supremum of the ratio - self.sup_ratio = max(self.sup_ratio, prob_ratio) - except Exception as e: - raise ValueError( - "An error occurred while reading " - f"the monte carlo input file {input_filename}!" - ) from e + self.original_sample_size += 1 + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo input file {input_filename}!" + ) from e - self.expected_sample_size = self.original_sample_size // self.sup_ratio input_file.close() def __load_output(self): - """Load data from monte carlo outputs.""" - output_filename = f"{self.montecarlo_filepath}.outputs.txt" + """Loads output information from monte carlo in a SampleInformation + object. + """ + output_filename = f"{self.monte_carlo_filepath}.outputs.txt" sample_size_output = 0 # sanity check try: @@ -130,91 +121,139 @@ def __load_output(self): f"Output file from monte carlo {output_filename} " "not found!" ) from e - for line in output_file.readlines(): - try: + try: + for line in output_file.readlines(): + if sample_size_output > self.original_sample_size: + raise ValueError( + "Monte carlo output has more lines than the input file!" + ) line_json = json.loads(line) - self.output_list.append(line_json) + self.all_sample_list[sample_size_output].outputs_json = line_json sample_size_output += 1 - except Exception as e: - raise ValueError( - "An error occurred while reading " - f"the monte carlo output file {output_filename}!" - ) from e - - if self.original_sample_size != sample_size_output: + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo output file {output_filename}!" + ) from e + if self.original_sample_size > sample_size_output: raise ValueError( - "Monte carlo input and output files have a different " - "number of samples!" + "Monte carlo output file has fewer lines than the input file!" ) output_file.close() - def sample(self): + def __validate_distribution_dict(self, distribution_dict): + """Checks that the variables passed in the distribution dictionary were + in the input file. + + """ + input_variables_names = set(distribution_dict.keys()) + for variable in input_variables_names: + if variable not in self.input_variables_names: + raise ValueError( + f"Variable key {variable} from 'distribution_dict' " + "not found in input file!" + ) + + def sample(self, distribution_dict): """Performs rejection sampling and saves data + Parameters + ---------- + distribution : dict + Dictionary whose keys contain the name whose distribution changed. + The values are tuples or lists with two entries. The first entry is + a probability density (mass) function for the old distribution, + while the second entry is the probability density function for the + new distribution. + Returns ------- None """ + self.__validate_distribution_dict(distribution_dict) + mrs_input_file = open(f"{self.mrs_filepath}.inputs.txt", "w+", encoding="utf-8") mrs_output_file = open( f"{self.mrs_filepath}.outputs.txt", "w+", encoding="utf-8" ) mrs_error_file = open(f"{self.mrs_filepath}.errors.txt", "w+", encoding="utf-8") - # compute sup ratio - for line_input_json, line_output_json in zip(self.input_list, self.output_list): - acceptance_prob = 1 / self.sup_ratio # probability the sample is accepted - for parameter in self.distribution_dict.keys(): - parameter_value = line_input_json[parameter] - acceptance_prob *= self.__compute_probability_ratio( - parameter, - parameter_value, - ) - # sample is accepted, write output - if random() < acceptance_prob: - mrs_input_file.write( - json.dumps(line_input_json, cls=RocketPyEncoder) + "\n" - ) - mrs_output_file.write( - json.dumps(line_output_json, cls=RocketPyEncoder) + "\n" - ) + self.__setup_probabilities(distribution_dict) + + try: + for sample in self.all_sample_list: + if random() < sample.acceptance_probability: + mrs_input_file.write( + json.dumps(sample.inputs_json, cls=RocketPyEncoder) + "\n" + ) + mrs_output_file.write( + json.dumps(sample.outputs_json, cls=RocketPyEncoder) + "\n" + ) + except Exception as e: + raise ValueError( + "An error occurred while writing the selected sample to the " + "output files" + ) from e mrs_input_file.close() mrs_output_file.close() mrs_error_file.close() - def __compute_probability_ratio(self, parameter, parameter_value): - """Computes the ratio of the new probability to the old probability + def __setup_probabilities(self, distribution_dict): + """Computes the probability ratio, probability ratio supremum and acceptance + probability for each sample. Parameters ---------- - parameter : str - Name of the parameter to evaluate the probability. - parameter_value : any - Value of the parameter to be passed to the density functions. + distribution : dict + Dictionary whose keys contain the name whose distribution changed. The values + are tuples or lists with two entries. The first entry is a probability + density (mass) function for the old distribution, while the second entry + is the probability density function for the new distribution. + """ + self.sup_ratio = 1 + for sample in self.all_sample_list: + sample.probability_ratio = self.__compute_probability_ratio( + sample, distribution_dict + ) + self.sup_ratio = max(self.sup_ratio, sample.probability_ratio) - Returns - ------- - float - The ratio of the new probability density function (numerator) - to the old one (denominator). + for sample in self.all_sample_list: + sample.acceptance_probability = sample.probability_ratio / self.sup_ratio + self.expected_sample_size = self.original_sample_size // self.sup_ratio + + def __compute_probability_ratio(self, sample, distribution_dict): + """Computes the ratio of the new probability to the old probability + for the given sample + + Parameters + ---------- + sample: SampleInformation + Sample information used to extract the values to evaluate the + distributions pdf. + distribution : dict + Dictionary whose keys contain the name whose distribution changed. The values + are tuples or lists with two entries. The first entry is a probability + density (mass) function for the old distribution, while the second entry + is the probability density function for the new distribution. Raises ------ ValueError Raises exception if an error occurs when computing the ratio. """ + probability_ratio = 1 try: - old_pdf = self.distribution_dict[parameter][0] - new_pdf = self.distribution_dict[parameter][1] - probability_ratio = new_pdf(parameter_value) / old_pdf(parameter_value) + for variable in distribution_dict.keys(): + value = sample.inputs_json[variable] + old_pdf = distribution_dict[variable][0] + new_pdf = distribution_dict[variable][1] + probability_ratio *= new_pdf(value) / old_pdf(value) except Exception as e: raise ValueError( - "An error occurred while evaluating the " - "ratio for 'distribution_dict' probability " - f"parameter key {parameter}!" + "An error occurred while evaluating the probability ratio" ) from e return probability_ratio