From 2b29aa388aefdbd2b32182a7b210435402d8e404 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Wed, 17 Jan 2024 17:39:38 +0100 Subject: [PATCH] fix keeping reachable choices for pessimistic splitting --- paynt/cli.py | 6 +++--- paynt/quotient/quotient.py | 8 ++++---- paynt/synthesizer/policy_tree.py | 30 +++++++++++++++--------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/paynt/cli.py b/paynt/cli.py index 06f9face7..8acd3475e 100644 --- a/paynt/cli.py +++ b/paynt/cli.py @@ -118,7 +118,7 @@ def setup_logger(log_path = None): @click.option("--mdp-split-wrt-mdp", is_flag=True, default=False, help="# if set, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used") -@click.option("--mdp-discard-unreachable-actions", is_flag=True, default=False, +@click.option("--mdp-discard-unreachable-choices", is_flag=True, default=False, help="# if set, unreachable choices will be discarded from the splitting scheduler") @click.option("--mdp-use-randomized-abstraction", is_flag=True, default=False, help="# if set, randomized abstraction guess-and-verify will be used instead of game abstraction;" + @@ -142,7 +142,7 @@ def paynt_run( use_storm_cutoffs, unfold_strategy_storm, export_fsc_storm, export_fsc_paynt, export_evaluation, all_in_one, - mdp_split_wrt_mdp, mdp_discard_unreachable_actions, mdp_use_randomized_abstraction, + mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction, ce_generator, profiling ): @@ -161,7 +161,7 @@ def paynt_run( paynt.quotient.pomdp.PomdpQuotient.posterior_aware = posterior_aware paynt.synthesizer.policy_tree.SynthesizerPolicyTree.split_wrt_mdp_scheduler = mdp_split_wrt_mdp - paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_actions = mdp_discard_unreachable_actions + paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_choices = mdp_discard_unreachable_choices paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction storm_control = None diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 783e3dc80..d555e0989 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -127,7 +127,7 @@ def build_assignment(self, family): def empty_scheduler(self): return [None] * self.quotient_mdp.nr_states - def keep_reachable_choices_of_scheduler(self, state_to_choice): + def discard_unreachable_choices(self, state_to_choice): state_to_choice_reachable = self.empty_scheduler() state_visited = [False]*self.quotient_mdp.nr_states initial_state = list(self.quotient_mdp.initial_states)[0] @@ -143,15 +143,15 @@ def keep_reachable_choices_of_scheduler(self, state_to_choice): state_queue.append(dst) return state_to_choice_reachable - def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True): + def scheduler_to_state_to_choice(self, mdp, scheduler, discard_unreachable_choices=True): state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, mdp.model, mdp.quotient_choice_map) state_to_choice = self.empty_scheduler() for state in range(mdp.model.nr_states): quotient_choice = state_to_quotient_choice[state] quotient_state = mdp.quotient_state_map[state] state_to_choice[quotient_state] = quotient_choice - if keep_reachable_choices: - state_to_choice = self.keep_reachable_choices_of_scheduler(state_to_choice) + if discard_unreachable_choices: + state_to_choice = self.discard_unreachable_choices(state_to_choice) return state_to_choice def state_to_choice_to_choices(self, state_to_choice): diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 19a9ef62f..73d2a2b5b 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -485,7 +485,7 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer): # if True, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used split_wrt_mdp_scheduler = False # if True, unreachable choices will be discarded from the splitting scheduler - discard_unreachable_actions = False + discard_unreachable_choices = False # if True, randomized abstraction guess-and-verify will be used instead of game abstraction use_randomized_abstraction = False @@ -572,31 +572,31 @@ def try_randomized_abstraction(self, family, prop): # apply policy and check if it is SAT for all MDPs in the family policy_sat = self.verify_policy(family, prop, policy) - return policy,policy_sat - def state_to_choice_to_scheduler(self, state_to_choice): - # uncomment this to use only reachable choices of the game scheduler - if SynthesizerPolicyTree.discard_unreachable_actions: - state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice) + def state_to_choice_to_hole_selection(self, state_to_choice): + if SynthesizerPolicyTree.discard_unreachable_choices: + state_to_choice = self.quotient.discard_unreachable_choices(state_to_choice) scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) hole_selection = self.quotient.coloring.collectHoleOptions(scheduler_choices) return scheduler_choices,hole_selection def parse_game_scheduler(self, game_solver): - state_values = game_solver.solution_state_values state_to_choice = game_solver.solution_state_to_quotient_choice.copy() - scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice) - return scheduler_choices,state_values,hole_selection + scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice) + state_values = game_solver.solution_state_values + return scheduler_choices,hole_selection,state_values def parse_mdp_scheduler(self, family, mdp_result): - state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, mdp_result.result.scheduler) - scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice) + state_to_choice = self.quotient.scheduler_to_state_to_choice( + family.mdp, mdp_result.result.scheduler, discard_unreachable_choices=False + ) + scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice) state_values = [0] * self.quotient.quotient_mdp.nr_states for state in range(family.mdp.states): quotient_state = family.mdp.quotient_state_map[state] state_values[quotient_state] = mdp_result.result.at(state) - return scheduler_choices,state_values,hole_selection + return scheduler_choices,hole_selection,state_values def verify_family(self, family, game_solver, prop): @@ -638,9 +638,9 @@ def verify_family(self, family, game_solver, prop): # undecided: choose scheduler choices to be used for splitting if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler): - scheduler_choices,state_values,hole_selection = self.parse_game_scheduler(game_solver) + scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver) else: - scheduler_choices,state_values,hole_selection = self.parse_mdp_scheduler(family, mdp_result) + scheduler_choices,hole_selection,state_values = self.parse_mdp_scheduler(family, mdp_result) splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection) mdp_family_result.splitter = splitter @@ -723,7 +723,7 @@ def split(self, family, prop, hole_selection, splitter, policy): subfamily.candidate_policy = None subfamilies.append(subfamily) - if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_actions: + if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_choices: self.assign_candidate_policy(subfamilies, hole_selection, splitter, policy) return suboptions,subfamilies