diff --git a/.vscode/settings.json b/.vscode/settings.json index 0af75e918..cda3677b1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -285,6 +285,7 @@ "statsmodels", "STFT", "subintervals", + "supremum", "suptitle", "supxlabel", "supylabel", diff --git a/docs/notebooks/test_mrs.ipynb b/docs/notebooks/test_mrs.ipynb new file mode 100644 index 000000000..d25e7c9a2 --- /dev/null +++ b/docs/notebooks/test_mrs.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quick test notebook for MRS" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "# We import these lines for debugging purposes, only works on Jupyter Notebook\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "from rocketpy.simulation.multivariate_rejection_sampler import (\n", + " MultivariateRejectionSampler,\n", + ")\n", + "from rocketpy import MonteCarlo\n", + "from scipy.stats import norm\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "monte_carlo_filepath = (\n", + " \"monte_carlo_analysis/monte_carlo_analysis_outputs/monte_carlo_class_example\"\n", + ")\n", + "mrs_filepath = \"monte_carlo_analysis/monte_carlo_analysis_outputs/mrs\"\n", + "old_mass_pdf = norm(15.426, 0.5).pdf\n", + "new_mass_pdf = norm(15, 0.5).pdf\n", + "distribution_dict = {\n", + " \"mass\": (old_mass_pdf, new_mass_pdf),\n", + "}\n", + "mrs = MultivariateRejectionSampler(\n", + " monte_carlo_filepath=monte_carlo_filepath,\n", + " mrs_filepath=mrs_filepath,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "mrs.sample(distribution_dict=distribution_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The following input file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.inputs.txt\n", + "A total of 116 simulations results were loaded from the following output file: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.outputs.txt\n", + "\n", + "The following error file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.errors.txt\n" + ] + } + ], + "source": [ + "mrs_results = MonteCarlo(mrs_filepath, None, None, None)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A total of 116 simulations results were loaded from the following output file: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.outputs.txt\n", + "\n", + "The following input file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.inputs.txt\n", + "The following error file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.errors.txt\n" + ] + } + ], + "source": [ + "mrs_results.import_results()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MRS mass mean after resample: 15.041934326472004\n", + "MRS mass std after resample: 0.48924085702427966\n" + ] + } + ], + "source": [ + "mrs_mass_list = []\n", + "for single_input_dict in mrs_results.inputs_log:\n", + " mrs_mass_list.append(single_input_dict[\"mass\"])\n", + "\n", + "print(f\"MRS mass mean after resample: {np.mean(mrs_mass_list)}\")\n", + "print(f\"MRS mass std after resample: {np.std(mrs_mass_list)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "107.0" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mrs.expected_sample_size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/rocketpy/simulation/multivariate_rejection_sampler.py b/rocketpy/simulation/multivariate_rejection_sampler.py new file mode 100644 index 000000000..e1e63893e --- /dev/null +++ b/rocketpy/simulation/multivariate_rejection_sampler.py @@ -0,0 +1,259 @@ +""" +Multivariate Rejection Sampling Module for RocketPy + +Notes +----- +This module is still under active development, and some features or attributes may +change in future versions. Users are encouraged to check for updates and read the +latest documentation. +""" + +import json +from dataclasses import dataclass +from random import random + +from rocketpy._encoders import RocketPyEncoder + + +@dataclass +class SampleInformation: + """Sample information used in the MRS""" + + inputs_json: dict = None + outputs_json: dict = None + probability_ratio: float = None + acceptance_probability: float = None + + +class MultivariateRejectionSampler: + """Class that performs Multivariate Rejection Sampling (MRS) from MonteCarlo + results. The class currently assumes that all input variables are independent + when performing the + + Attributes + ---------- + """ + + def __init__( + self, + monte_carlo_filepath, + mrs_filepath, + ): + """Initializes Multivariate Rejection Sampler (MRS) class + + Parameters + ---------- + monte_carlo_filepath : str + Filepath prefixes to the files created from a MonteCarlo simulation + results and used as input for resampling. + mrs_filepath : str + Filepath prefix to MRS obtained samples. The files created follow the same + structure as those created by the MonteCarlo class but now containing the + selected sub-samples. + + Returns + ------- + None + """ + self.monte_carlo_filepath = monte_carlo_filepath + self.mrs_filepath = mrs_filepath + self.distribution_dict = None + self.original_sample_size = 0 + self.sup_ratio = 1 + self.expected_sample_size = None + self.final_sample_size = None + self.input_variables_names = set() + self.output_variables_names = set() + self.all_sample_list = [] + self.accepted_sample_list = [] + self.__setup_input() + self.__load_output() + + def __setup_input(self): + """Loads input information from monte carlo in a SampleInformation + object + """ + input_filename = f"{self.monte_carlo_filepath}.inputs.txt" + + try: + input_file = open(input_filename, "r+", encoding="utf-8") + except FileNotFoundError as e: + raise FileNotFoundError( + f"Input file from monte carlo {input_filename} not found!" + ) from e + + try: + for line in input_file.readlines(): + sample_info = SampleInformation() + line_json = json.loads(line) + sample_info.inputs_json = line_json + self.all_sample_list.append(sample_info) + + # sets and validates input variables names + if not self.input_variables_names: + self.input_variables_names = set(line_json.keys()) + else: + if self.input_variables_names != set(line_json.keys()): + raise ValueError( + "Input file from Monte Carlo contains different " + "variables for different lines" + ) + self.original_sample_size += 1 + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo input file {input_filename}!" + ) from e + + input_file.close() + + def __load_output(self): + """Loads output information from monte carlo in a SampleInformation + object. + """ + output_filename = f"{self.monte_carlo_filepath}.outputs.txt" + sample_size_output = 0 # sanity check + + try: + output_file = open(output_filename, "r+", encoding="utf-8") + except FileNotFoundError as e: + raise FileNotFoundError( + f"Output file from monte carlo {output_filename} " "not found!" + ) from e + + try: + for line in output_file.readlines(): + if sample_size_output > self.original_sample_size: + raise ValueError( + "Monte carlo output has more lines than the input file!" + ) + line_json = json.loads(line) + self.all_sample_list[sample_size_output].outputs_json = line_json + sample_size_output += 1 + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo output file {output_filename}!" + ) from e + if self.original_sample_size > sample_size_output: + raise ValueError( + "Monte carlo output file has fewer lines than the input file!" + ) + + output_file.close() + + def __validate_distribution_dict(self, distribution_dict): + """Checks that the variables passed in the distribution dictionary were + in the input file. + + """ + input_variables_names = set(distribution_dict.keys()) + for variable in input_variables_names: + if variable not in self.input_variables_names: + raise ValueError( + f"Variable key {variable} from 'distribution_dict' " + "not found in input file!" + ) + + def sample(self, distribution_dict): + """Performs rejection sampling and saves data + + Parameters + ---------- + distribution : dict + Dictionary whose keys contain the name whose distribution changed. + The values are tuples or lists with two entries. The first entry is + a probability density (mass) function for the old distribution, + while the second entry is the probability density function for the + new distribution. + + Returns + ------- + None + """ + + self.__validate_distribution_dict(distribution_dict) + + mrs_input_file = open(f"{self.mrs_filepath}.inputs.txt", "w+", encoding="utf-8") + mrs_output_file = open( + f"{self.mrs_filepath}.outputs.txt", "w+", encoding="utf-8" + ) + mrs_error_file = open(f"{self.mrs_filepath}.errors.txt", "w+", encoding="utf-8") + + self.__setup_probabilities(distribution_dict) + + try: + for sample in self.all_sample_list: + if random() < sample.acceptance_probability: + mrs_input_file.write( + json.dumps(sample.inputs_json, cls=RocketPyEncoder) + "\n" + ) + mrs_output_file.write( + json.dumps(sample.outputs_json, cls=RocketPyEncoder) + "\n" + ) + except Exception as e: + raise ValueError( + "An error occurred while writing the selected sample to the " + "output files" + ) from e + + mrs_input_file.close() + mrs_output_file.close() + mrs_error_file.close() + + def __setup_probabilities(self, distribution_dict): + """Computes the probability ratio, probability ratio supremum and acceptance + probability for each sample. + + Parameters + ---------- + distribution : dict + Dictionary whose keys contain the name whose distribution changed. The values + are tuples or lists with two entries. The first entry is a probability + density (mass) function for the old distribution, while the second entry + is the probability density function for the new distribution. + """ + self.sup_ratio = 1 + for sample in self.all_sample_list: + sample.probability_ratio = self.__compute_probability_ratio( + sample, distribution_dict + ) + self.sup_ratio = max(self.sup_ratio, sample.probability_ratio) + + for sample in self.all_sample_list: + sample.acceptance_probability = sample.probability_ratio / self.sup_ratio + self.expected_sample_size = self.original_sample_size // self.sup_ratio + + def __compute_probability_ratio(self, sample, distribution_dict): + """Computes the ratio of the new probability to the old probability + for the given sample + + Parameters + ---------- + sample: SampleInformation + Sample information used to extract the values to evaluate the + distributions pdf. + distribution : dict + Dictionary whose keys contain the name whose distribution changed. The values + are tuples or lists with two entries. The first entry is a probability + density (mass) function for the old distribution, while the second entry + is the probability density function for the new distribution. + + Raises + ------ + ValueError + Raises exception if an error occurs when computing the ratio. + """ + probability_ratio = 1 + try: + for variable in distribution_dict.keys(): + value = sample.inputs_json[variable] + old_pdf = distribution_dict[variable][0] + new_pdf = distribution_dict[variable][1] + probability_ratio *= new_pdf(value) / old_pdf(value) + except Exception as e: + raise ValueError( + "An error occurred while evaluating the probability ratio" + ) from e + + return probability_ratio