From 9e53703ed0c27952472f2d61f8142b4476b987bb Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Wed, 17 Jan 2024 17:06:46 +0100 Subject: [PATCH] restore randomized abstraction --- paynt/cli.py | 11 +++-- paynt/synthesizer/policy_tree.py | 69 ++++++++++++++++++++++++++------ paynt/synthesizer/statistic.py | 1 - 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/paynt/cli.py b/paynt/cli.py index 8b268d4aa..06f9face7 100644 --- a/paynt/cli.py +++ b/paynt/cli.py @@ -119,7 +119,11 @@ def setup_logger(log_path = None): @click.option("--mdp-split-wrt-mdp", is_flag=True, default=False, help="# if set, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used") @click.option("--mdp-discard-unreachable-actions", is_flag=True, default=False, - help="# if set, unreachable choices will be discarded from game abstraction scheduler") + help="# if set, unreachable choices will be discarded from the splitting scheduler") +@click.option("--mdp-use-randomized-abstraction", is_flag=True, default=False, + help="# if set, randomized abstraction guess-and-verify will be used instead of game abstraction;" + + " MDP abstraction scheduler will be used for splitting" +) @click.option( "--ce-generator", type=click.Choice(["dtmc", "mdp"]), default="dtmc", show_default=True, @@ -138,7 +142,7 @@ def paynt_run( use_storm_cutoffs, unfold_strategy_storm, export_fsc_storm, export_fsc_paynt, export_evaluation, all_in_one, - mdp_split_wrt_mdp, mdp_discard_unreachable_actions, + mdp_split_wrt_mdp, mdp_discard_unreachable_actions, mdp_use_randomized_abstraction, ce_generator, profiling ): @@ -157,7 +161,8 @@ def paynt_run( paynt.quotient.pomdp.PomdpQuotient.posterior_aware = posterior_aware paynt.synthesizer.policy_tree.SynthesizerPolicyTree.split_wrt_mdp_scheduler = mdp_split_wrt_mdp - paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_actions_in_game_scheduler = mdp_discard_unreachable_actions + paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_actions = mdp_discard_unreachable_actions + paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction storm_control = None if storm_pomdp: diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index aec52528b..19a9ef62f 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -484,8 +484,10 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer): double_check_policy_tree_leaves = False # if True, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used split_wrt_mdp_scheduler = False - # if True, unreachable choices will be discarded from game abstraction scheduler - discard_unreachable_actions_in_game_scheduler = False + # if True, unreachable choices will be discarded from the splitting scheduler + discard_unreachable_actions = False + # if True, randomized abstraction guess-and-verify will be used instead of game abstraction + use_randomized_abstraction = False @property def method_name(self): @@ -543,24 +545,57 @@ def solve_game_abstraction(self, family, prop, game_solver): game_policy = game_policy_fixed return game_policy,game_sat - def parse_game_scheduler(self, game_solver): - state_values = game_solver.solution_state_values - state_to_choice = game_solver.solution_state_to_quotient_choice.copy() + def try_randomized_abstraction(self, family, prop): + # build randomized abstraction + choice_to_action = [] + for choice in range(family.mdp.choices): + action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]] + choice_to_action.append(action) + state_action_choices = self.quotient.map_state_action_to_choices(family.mdp.model,self.quotient.num_actions,choice_to_action) + model,choice_to_action = payntbind.synthesis.randomize_action_variant(family.mdp.model, state_action_choices) + + # model check + result = stormpy.model_checking(model, prop.formula, extract_scheduler=True, environment=Property.environment) + self.stat.iteration(model) + value = result.at(model.initial_states[0]) + policy_sat = prop.satisfies_threshold(value) # does this value matter? + + # extract policy for the quotient + scheduler = result.scheduler + policy = self.quotient.empty_policy() + for state in range(model.nr_states): + state_choice = scheduler.get_choice(state).get_deterministic_choice() + choice = model.transition_matrix.get_row_group_start(state) + state_choice + action = choice_to_action[choice] + quotient_state = family.mdp.quotient_state_map[state] + policy[quotient_state] = action + + # apply policy and check if it is SAT for all MDPs in the family + policy_sat = self.verify_policy(family, prop, policy) + + return policy,policy_sat + + def state_to_choice_to_scheduler(self, state_to_choice): # uncomment this to use only reachable choices of the game scheduler - if SynthesizerPolicyTree.discard_unreachable_actions_in_game_scheduler: + if SynthesizerPolicyTree.discard_unreachable_actions: state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice) scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) hole_selection = self.quotient.coloring.collectHoleOptions(scheduler_choices) + return scheduler_choices,hole_selection + + def parse_game_scheduler(self, game_solver): + state_values = game_solver.solution_state_values + state_to_choice = game_solver.solution_state_to_quotient_choice.copy() + scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice) return scheduler_choices,state_values,hole_selection def parse_mdp_scheduler(self, family, mdp_result): state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, mdp_result.result.scheduler) - scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) + scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice) state_values = [0] * self.quotient.quotient_mdp.nr_states for state in range(family.mdp.states): quotient_state = family.mdp.quotient_state_map[state] state_values[quotient_state] = mdp_result.result.at(state) - hole_selection = self.quotient.coloring.collectHoleOptions(scheduler_choices) return scheduler_choices,state_values,hole_selection @@ -573,10 +608,18 @@ def verify_family(self, family, game_solver, prop): mdp_family_result.policy = self.solve_singleton(family,prop) return mdp_family_result - if family.candidate_policy is None: - game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver) + if not SynthesizerPolicyTree.use_randomized_abstraction: + if family.candidate_policy is None: + game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver) + else: + game_policy = family.candidate_policy + game_sat = False else: - game_policy = family.candidate_policy + randomization_policy,policy_sat = self.try_randomized_abstraction(family,prop) + if policy_sat: + mdp_family_result.policy = randomization_policy + return mdp_family_result + game_policy = None game_sat = False mdp_family_result.game_policy = game_policy @@ -594,7 +637,7 @@ def verify_family(self, family, game_solver, prop): return mdp_family_result # undecided: choose scheduler choices to be used for splitting - if not SynthesizerPolicyTree.split_wrt_mdp_scheduler: + if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler): scheduler_choices,state_values,hole_selection = self.parse_game_scheduler(game_solver) else: scheduler_choices,state_values,hole_selection = self.parse_mdp_scheduler(family, mdp_result) @@ -680,7 +723,7 @@ def split(self, family, prop, hole_selection, splitter, policy): subfamily.candidate_policy = None subfamilies.append(subfamily) - if not SynthesizerPolicyTree.split_wrt_mdp_scheduler and not SynthesizerPolicyTree.discard_unreachable_actions_in_game_scheduler: + if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_actions: self.assign_candidate_policy(subfamilies, hole_selection, splitter, policy) return suboptions,subfamilies diff --git a/paynt/synthesizer/statistic.py b/paynt/synthesizer/statistic.py index 531326c67..8cec68a8b 100644 --- a/paynt/synthesizer/statistic.py +++ b/paynt/synthesizer/statistic.py @@ -256,7 +256,6 @@ def print_mdp_family_table_entries(self): sat_by_total_percentage = round(self.num_mdps_sat/self.num_mdps_total*100,2) print(sat_by_total_percentage) - headers = [ "time","nodes","nodes (merged)","leaves","leaves (merged)","leaves (merged) / MDPs %", "policies","policies (merged)","policies (merged) / SAT %","pp time","pp time %",