diff --git a/paynt/quotient/models.py b/paynt/quotient/models.py index 1b05c2123..9558c7d75 100644 --- a/paynt/quotient/models.py +++ b/paynt/quotient/models.py @@ -47,22 +47,6 @@ def __init__(self, model, quotient_container, quotient_state_map, quotient_choic self.quotient_container = quotient_container self.quotient_choice_map = quotient_choice_map self.quotient_state_map = quotient_state_map - self.hole_is_simple = None - - - @property - def hole_simple(self): - if self.hole_is_simple is not None: - return self.hole_is_simple - num_holes = self.quotient_container.design_space.num_holes - hole_to_states = [0 for _ in range(num_holes)] - for state in range(self.states): - quotient_state = self.quotient_state_map[state] - for hole in self.quotient_container.state_to_holes[quotient_state]: - hole_to_states[hole] += 1 - self.hole_is_simple = [hole_to_states[hole] <= 1 for hole in range(num_holes)] - return self.hole_is_simple - @property def states(self): diff --git a/paynt/quotient/pomdp.py b/paynt/quotient/pomdp.py index a5541693b..1232535b3 100644 --- a/paynt/quotient/pomdp.py +++ b/paynt/quotient/pomdp.py @@ -119,7 +119,7 @@ def __init__(self, pomdp, specification, decpomdp_manager=None): self.observation_states[obs] += 1 # initialize POMDP manager - if not self.posterior_aware: + if not PomdpQuotient.posterior_aware: self.pomdp_manager = payntbind.synthesis.PomdpManager(self.pomdp) else: self.pomdp_manager = payntbind.synthesis.PomdpManagerAposteriori(self.pomdp) @@ -218,9 +218,11 @@ def set_memory_from_result_new(self, obs_memory_dict, obs_memory_dict_cutoff, me def create_coloring(self): logger.debug("creating coloring ...") + if PomdpQuotient.posterior_aware: + return self.create_coloring_aposteriori() # create holes - all_holes = paynt.family.family.Family() + family = paynt.family.family.Family() self.observation_action_holes = [] self.observation_memory_holes = [] self.is_action_hole = [] @@ -233,9 +235,9 @@ def create_coloring(self): if num_actions > 1: option_labels = self.action_labels_at_observation[obs] for mem in range(self.observation_memory_size[obs]): - hole_indices.append(all_holes.num_holes) + hole_indices.append(family.num_holes) name = self.create_hole_name(obs,mem,True) - all_holes.add_hole(name,option_labels) + family.add_hole(name,option_labels) self.is_action_hole.append(True) self.observation_action_holes.append(hole_indices) @@ -246,29 +248,30 @@ def create_coloring(self): option_labels = [str(x) for x in range(num_updates)] for mem in range(self.observation_memory_size[obs]): name = self.create_hole_name(obs,mem,False) - hole_indices.append(all_holes.num_holes) - all_holes.add_hole(name,option_labels) + hole_indices.append(family.num_holes) + family.add_hole(name,option_labels) self.is_action_hole.append(False) self.observation_memory_holes.append(hole_indices) # create the coloring - assert self.pomdp_manager.num_holes == all_holes.num_holes - row_action_hole = self.pomdp_manager.row_action_hole - row_memory_hole = self.pomdp_manager.row_memory_hole - row_action_option = self.pomdp_manager.row_action_option - row_memory_option = self.pomdp_manager.row_memory_option + assert self.pomdp_manager.num_holes == family.num_holes + num_holes = family.num_holes + choice_action_hole = self.pomdp_manager.row_action_hole + choice_memory_hole = self.pomdp_manager.row_memory_hole + choice_action_option = self.pomdp_manager.row_action_option + choice_memory_option = self.pomdp_manager.row_memory_option choice_to_hole_options = [] - for action in range(self.quotient_mdp.nr_choices): + for choice in range(self.quotient_mdp.nr_choices): hole_options = [] - hole = row_action_hole[action] - if hole != all_holes.num_holes: - hole_options.append( (hole,row_action_option[action]) ) - hole = row_memory_hole[action] - if hole != all_holes.num_holes: - hole_options.append( (hole,row_memory_option[action]) ) + hole = choice_action_hole[choice] + if hole != num_holes: + hole_options.append( (hole,choice_action_option[choice]) ) + hole = choice_memory_hole[choice] + if hole != num_holes: + hole_options.append( (hole,choice_memory_option[choice]) ) choice_to_hole_options.append(hole_options) - return all_holes, choice_to_hole_options + return family, choice_to_hole_options def create_coloring_aposteriori(self): # a posteriori unfolding @@ -300,14 +303,14 @@ def create_coloring_aposteriori(self): holes[index] = (name,option_labels) # filter out trivial holes - all_holes = paynt.family.family.Family() + family = paynt.family.family.Family() old_to_new_indices = [None] * len(holes) for index,name_labels in enumerate(holes): if name_labels is None: continue - old_to_new_indices[index] = all_holes.num_holes + old_to_new_indices[index] = family.num_holes name,option_labels = name_labels - all_holes.add_hole(name,option_labels) + family.add_hole(name,option_labels) choice_to_hole_options_new = [] for hole_options in choice_to_hole_options: @@ -323,7 +326,7 @@ def create_coloring_aposteriori(self): if new_index is not None: self.observation_action_holes[prior].append(new_index) - return all_holes, choice_to_hole_options + return family, choice_to_hole_options def unfold_memory(self): @@ -331,27 +334,20 @@ def unfold_memory(self): # reset attributes self.quotient_mdp = None self.coloring = None - self.state_to_holes = None self.hole_option_to_actions = None self.observation_action_holes = None self.observation_memory_holes = None self.is_action_hole = None - logger.debug( - "unfolding POMDP using the following memory allocation vector: {} ..." - .format(self.observation_memory_size)) + logger.debug("unfolding {}-FSC template into POMDP ...".format(max(self.observation_memory_size))) self.quotient_mdp = self.pomdp_manager.construct_mdp() self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(self.quotient_mdp) logger.debug(f"constructed quotient MDP having {self.quotient_mdp.nr_states} states and {self.quotient_mdp.nr_choices} actions.") - if not PomdpQuotient.posterior_aware: - family, choice_to_hole_options = self.create_coloring() - else: - family, choice_to_hole_options = self.create_coloring_aposteriori() + family, choice_to_hole_options = self.create_coloring() self.coloring = payntbind.synthesis.Coloring(family.family, self.quotient_mdp.nondeterministic_choice_indices, choice_to_hole_options) - self.state_to_holes = self.coloring.getStateToHoles().copy() # to each hole-option pair a list of actions colored by this combination self.hole_option_to_actions = [[] for hole in range(family.num_holes)] diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 0ff16851a..ace12f7e6 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -43,9 +43,6 @@ def __init__(self, quotient_mdp = None, family = None, coloring = None, specific if self.quotient_mdp is not None: self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(self.quotient_mdp) - if coloring is not None: - self.state_to_holes = coloring.getStateToHoles().copy() - # (optional) counter of discarded assignments self.discarded = 0 @@ -325,9 +322,16 @@ def discard(self, mdp, hole_assignments, core_suboptions, other_suboptions, inco # reduce simple holes ds_before = reduced_design_space.size + + hole_to_states = [0] * self.design_space.num_holes + state_to_holes = self.coloring.getStateToHoles().copy() + for state in range(mdp.model.nr_states): + quotient_state = mdp.quotient_state_map[state] + for hole in state_to_holes[quotient_state]: + hole_to_states[hole] += 1 + for hole in range(reduced_design_space.num_holes): - if mdp.hole_simple[hole]: - assert len(hole_assignments[hole]) == 1 + if hole_to_states[hole] <= 1: reduced_design_space.hole_set_options(hole, hole_assignments[hole]) ds_after = reduced_design_space.size self.discarded += ds_before - ds_after