diff --git a/paynt/quotient/storm_pomdp_control.py b/paynt/quotient/storm_pomdp_control.py index de620649..23845cf3 100644 --- a/paynt/quotient/storm_pomdp_control.py +++ b/paynt/quotient/storm_pomdp_control.py @@ -94,6 +94,13 @@ def get_storm_result(self): if self.s_queue is not None: self.s_queue.put((self.result_dict, self.storm_bounds)) + def store_storm_result(self, result): + self.latest_storm_result = result + if self.quotient.specification.optimality.minimizing: + self.storm_bounds = self.latest_storm_result.upper_bound + else: + self.storm_bounds = self.latest_storm_result.lower_bound + # run Storm POMDP analysis for given model and specification # TODO: discuss Storm options def run_storm_analysis(self): @@ -155,11 +162,7 @@ def run_storm_analysis(self): print(f'-----------Storm----------- \ \nValue = {value} | Time elapsed = {round(storm_timer.read(),1)}s | FSC size = {size}\nFSC (dot) = {result.induced_mc_from_scheduler.to_dot()}\n', flush=True) - self.latest_storm_result = result - if self.quotient.specification.optimality.minimizing: - self.storm_bounds = self.latest_storm_result.upper_bound - else: - self.storm_bounds = self.latest_storm_result.lower_bound + self.store_storm_result(result) # setup interactive Storm belief model checker def interactive_storm_setup(self): @@ -217,11 +220,7 @@ def interactive_run(self, belmc): print(result.induced_mc_from_scheduler.to_dot(), file=text_file) text_file.close() - self.latest_storm_result = result - if self.quotient.specification.optimality.minimizing: - self.storm_bounds = self.latest_storm_result.upper_bound - else: - self.storm_bounds = self.latest_storm_result.lower_bound + self.store_storm_result(result) self.parse_results(self.quotient) self.update_data() @@ -270,11 +269,7 @@ def interactive_control(self, belmc, start, storm_timeout): print(result.induced_mc_from_scheduler.to_dot(), file=text_file) text_file.close() - self.latest_storm_result = result - if self.quotient.specification.optimality.minimizing: - self.storm_bounds = self.latest_storm_result.upper_bound - else: - self.storm_bounds = self.latest_storm_result.lower_bound + self.store_storm_result(result) self.parse_results(self.quotient) self.update_data() @@ -380,7 +375,7 @@ def parse_storm_result(self, quotient): # to make the code cleaner get_choice_label = self.latest_storm_result.induced_mc_from_scheduler.choice_labeling.get_labels_of_choice - cutoff_epxloration = [x for x in range(len(self.latest_storm_result.cutoff_schedulers))] + cutoff_epxloration = list(range(len(self.latest_storm_result.cutoff_schedulers))) finite_mem = False result = {x:[] for x in range(quotient.observations)} @@ -394,46 +389,31 @@ def parse_storm_result(self, quotient): # parse non cut-off states if 'cutoff' not in state.labels and 'clipping' not in state.labels: for label in state.labels: - # observation based on prism observables + observation = None if '[' in label: + # observation based on prism observables observation = self.quotient.observation_labels.index(label) - - index = -1 - - choice_label = list(get_choice_label(state.id))[0] - for i in range(len(quotient.action_labels_at_observation[int(observation)])): - if choice_label == quotient.action_labels_at_observation[int(observation)][i]: - index = i - break - - if index >= 0 and index not in result[int(observation)]: - result[int(observation)].append(index) - - if index >= 0 and index not in result_no_cutoffs[int(observation)]: - result_no_cutoffs[int(observation)].append(index) - # explicit observation index elif 'obs_' in label: - _, observation = label.split('_') - - index = -1 + # explicit observation index + _,observation = label.split('_') + if observation is not None: + observation = int(observation) choice_label = list(get_choice_label(state.id))[0] - for i in range(len(quotient.action_labels_at_observation[int(observation)])): - if choice_label == quotient.action_labels_at_observation[int(observation)][i]: - index = i + for index,action_label in enumerate(quotient.action_labels_at_observation[observation]): + if choice_label == action_label: + if index not in result[observation]: + result[observation].append(index) + if index not in result_no_cutoffs[observation]: + result_no_cutoffs[observation].append(index) break - - if index >= 0 and index not in result[int(observation)]: - result[int(observation)].append(index) - - if index >= 0 and index not in result_no_cutoffs[int(observation)]: - result_no_cutoffs[int(observation)].append(index) + # parse cut-off states else: if 'finite_mem' in state.labels and not finite_mem: finite_mem = True self.parse_paynt_result(self.quotient) - for obs, actions in self.result_dict_paynt.items(): + for obs,actions in self.result_dict_paynt.items(): for action in actions: if action not in result_no_cutoffs[obs]: result_no_cutoffs[obs].append(action) @@ -462,7 +442,6 @@ def parse_storm_result(self, quotient): for action in actions: if action not in result[observation]: result[observation].append(action) - cutoff_epxloration.remove(int(scheduler_index)) # removing unrestricted observations @@ -484,19 +463,14 @@ def parse_choice_string(self, choice_string, probability_bound=0): chars = '}{][' for c in chars: choice_string = choice_string.replace(c, '') - choice_string = choice_string.strip(', ') - choices = choice_string.split(',') result = [] - for choice in choices: probability, action = choice.split(':') # probability bound - action = int(action.strip()) - result.append(action) return result @@ -593,14 +567,14 @@ def get_subfamilies(self, restrictions, family): subfamilies = [] - for i in range(len(restrictions)): + for i,restriction in enumerate(restrictions): restricted_family = family.copy() - actions = [action for action in family.hole_options(restrictions[i]["hole"]) if action not in restrictions[i]["restriction"]] + actions = [action for action in family.hole_options(restriction["hole"]) if action not in restriction["restriction"]] if len(actions) == 0: - actions = [family.hole_options(restrictions[i]["hole"])[0]] + actions = [family.hole_options(restriction["hole"])[0]] - restricted_family.hole_set_options(restrictions[i]['hole'],actions) + restricted_family.hole_set_options(restriction['hole'],actions) for j in range(i): restricted_family.hole_set_options(restrictions[j]['hole'],restrictions[j]["restriction"]) diff --git a/paynt/synthesizer/synthesizer_ar_storm.py b/paynt/synthesizer/synthesizer_ar_storm.py index a82be3ce..f2a08562 100644 --- a/paynt/synthesizer/synthesizer_ar_storm.py +++ b/paynt/synthesizer/synthesizer_ar_storm.py @@ -41,21 +41,16 @@ def storm_split(self, families): # split each family in the current buffer to main family and corresponding subfamilies for family in families: if self.storm_control.use_cutoffs: - main_p = self.storm_control.get_main_restricted_family(family, self.storm_control.result_dict) + result_dict = self.storm_control.result_dict else: - main_p = self.storm_control.get_main_restricted_family(family, self.storm_control.result_dict_no_cutoffs) - + result_dict = self.storm_control.result_dict_no_cutoffs + main_p = self.storm_control.get_main_restricted_family(family, result_dict) if main_p is None: subfamilies.append(family) continue main_families.append(main_p) - - if self.storm_control.use_cutoffs: - subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, self.storm_control.result_dict) - else: - subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, self.storm_control.result_dict_no_cutoffs) - + subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, result_dict) subfamilies_p = self.storm_control.get_subfamilies(subfamily_restrictions, family) subfamilies.extend(subfamilies_p) diff --git a/paynt/synthesizer/synthesizer_pomdp.py b/paynt/synthesizer/synthesizer_pomdp.py index 820866f9..9f0affab 100644 --- a/paynt/synthesizer/synthesizer_pomdp.py +++ b/paynt/synthesizer/synthesizer_pomdp.py @@ -12,9 +12,6 @@ import paynt.verification.property -import math -from collections import defaultdict - from threading import Thread from queue import Queue import time @@ -63,97 +60,89 @@ def synthesize(self, family=None, print_stats=True): self.total_iters += iters_mdp return assignment - # iterative strategy using Storm analysis to enhance the synthesis - def strategy_iterative_storm(self, unfold_imperfect_only, unfold_storm=True): - ''' - @param unfold_imperfect_only if True, only imperfect observations will be unfolded - ''' - mem_size = paynt.quotient.pomdp.PomdpQuotient.initial_memory_size - - self.synthesizer.storm_control = self.storm_control + def unfold_and_synthesize(self, mem_size, unfold_storm): + paynt.quotient.pomdp.PomdpQuotient.current_family_index = mem_size - while True: - # for x in range(2): - - paynt.quotient.pomdp.PomdpQuotient.current_family_index = mem_size - - # unfold memory according to the best result - if unfold_storm: - if mem_size > 1: - obs_memory_dict = {} - if self.storm_control.is_storm_better: - # Storm's result is better and it needs memory - if self.storm_control.is_memory_needed(): - obs_memory_dict = self.storm_control.memory_vector - logger.info(f'Added memory nodes for observation based on Storm data') - else: - # consider the cut-off schedulers actions when updating memory - if self.storm_control.unfold_cutoff: - for obs in range(self.quotient.observations): - if obs in self.storm_control.result_dict: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 - else: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] - # only consider the induced DTMC without cut-off states - else: - for obs in range(self.quotient.observations): - if obs in self.storm_control.result_dict_no_cutoffs: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 - else: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] - logger.info(f'Added memory nodes for observation based on Storm data') - else: - for obs in range(self.quotient.observations): - if self.quotient.observation_states[obs]>1: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 - else: - obs_memory_dict[obs] = 1 - logger.info(f'Increase memory in all imperfect observation') - self.quotient.set_memory_from_dict(obs_memory_dict) + # unfold memory according to the best result + if not unfold_storm: + logger.info("Synthesizing optimal k={} controller ...".format(mem_size) ) + if unfold_imperfect_only: + self.quotient.set_imperfect_memory_size(mem_size) + else: + self.quotient.set_global_memory_size(mem_size) + return + + if mem_size <= 1: + return + obs_memory_dict = {} + if self.storm_control.is_storm_better: + # Storm's result is better and it needs memory + if self.storm_control.is_memory_needed(): + obs_memory_dict = self.storm_control.memory_vector + logger.info(f'Added memory nodes for observation based on Storm data') else: - logger.info("Synthesizing optimal k={} controller ...".format(mem_size) ) - if unfold_imperfect_only: - self.quotient.set_imperfect_memory_size(mem_size) + if self.storm_control.unfold_cutoff: + # consider the cut-off schedulers actions when updating memory + result_dict = self.storm_control.result_dict else: - self.quotient.set_global_memory_size(mem_size) - - family = self.quotient.family - - # if Storm's result is better, use it to obtain main family that considers only the important actions - if self.storm_control.is_storm_better: - # consider the cut-off schedulers actions - if self.storm_control.use_cutoffs: - main_family = self.storm_control.get_main_restricted_family(family, self.storm_control.result_dict) - if self.storm_control.incomplete_exploration == True: - subfamily_restrictions = [] + # only consider the induced DTMC without cut-off states + result_dict = self.storm_control.result_dict_no_cutoffs + for obs in range(self.quotient.observations): + if obs in result_dict: + obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 else: - subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, self.storm_control.result_dict) - # only consider the induced DTMC actions without cut-off states + obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + logger.info(f'Added memory nodes for observation based on Storm data') + else: + for obs in range(self.quotient.observations): + if self.quotient.observation_states[obs]>1: + obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 else: - main_family = self.storm_control.get_main_restricted_family(family, self.storm_control.result_dict_no_cutoffs) - if self.storm_control.incomplete_exploration == True: - subfamily_restrictions = [] - else: - subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, self.storm_control.result_dict_no_cutoffs) - - subfamilies = self.storm_control.get_subfamilies(subfamily_restrictions, family) - # if PAYNT is better continue normally + obs_memory_dict[obs] = 1 + logger.info(f'Increase memory in all imperfect observation') + self.quotient.set_memory_from_dict(obs_memory_dict) + family = self.quotient.family + + # if Storm's result is better, use it to obtain main family that considers only the important actions + if self.storm_control.is_storm_better: + if self.storm_control.use_cutoffs: + # consider the cut-off schedulers actions + result_dict = self.storm_control.result_dict else: - main_family = family - subfamilies = [] + # only consider the induced DTMC actions without cut-off states + result_dict =self.storm_control.result_dict_no_cutoffs + main_family = self.storm_control.get_main_restricted_family(family,result_dict) + subfamily_restrictions = [] + if not self.storm_control.incomplete_exploration: + subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, result_dict) + subfamilies = self.storm_control.get_subfamilies(subfamily_restrictions, family) + # if PAYNT is better continue normally + else: + main_family = family + subfamilies = [] - self.synthesizer.subfamilies_buffer = subfamilies - self.synthesizer.main_family = main_family + self.synthesizer.subfamilies_buffer = subfamilies + self.synthesizer.main_family = main_family - assignment = self.synthesize(family) + assignment = self.synthesize(family) + return assignment + # iterative strategy using Storm analysis to enhance the synthesis + def strategy_iterative_storm(self, unfold_imperfect_only, unfold_storm=True): + ''' + @param unfold_imperfect_only if True, only imperfect observations will be unfolded + ''' + mem_size = paynt.quotient.pomdp.PomdpQuotient.initial_memory_size + self.synthesizer.storm_control = self.storm_control + + while True: + assignment = self.unfold_and_synthesize(mem_size,unfold_storm) if assignment is not None: self.storm_control.latest_paynt_result = assignment self.storm_control.paynt_export = self.quotient.extract_policy(assignment) self.storm_control.paynt_bounds = self.quotient.specification.optimality.optimum self.storm_control.paynt_fsc_size = self.quotient.policy_size(self.storm_control.latest_paynt_result) self.storm_control.latest_paynt_result_fsc = self.quotient.assignment_to_fsc(self.storm_control.latest_paynt_result) - self.storm_control.update_data() if self.synthesis_terminate: @@ -161,10 +150,22 @@ def strategy_iterative_storm(self, unfold_imperfect_only, unfold_storm=True): mem_size += 1 - #break - # main SAYNT loop + def print_synthesized_controllers(self): + hline = "\n------------------------------------\n" + print(hline) + print("PAYNT results: ") + print(self.storm_control.paynt_bounds) + print("controller size: {}".format(self.storm_control.paynt_fsc_size)) + print() + print("Storm results: ") + print(self.storm_control.storm_bounds) + print("controller size: {}".format(self.storm_control.belief_controller_size)) + print(hline) + + def iterative_storm_loop(self, timeout, paynt_timeout, storm_timeout, iteration_limit=0): + ''' Main SAYNT loop. ''' self.interactive_queue = Queue() self.synthesizer.s_queue = self.interactive_queue self.storm_control.interactive_storm_setup() @@ -194,19 +195,10 @@ def iterative_storm_loop(self, timeout, paynt_timeout, storm_timeout, iteration_ self.storm_control.interactive_storm_resume(storm_timeout) # compute sizes of controllers + assert self.storm_control.latest_storm_result is not None self.storm_control.belief_controller_size = self.storm_control.get_belief_controller_size(self.storm_control.latest_storm_result, self.storm_control.paynt_fsc_size) - print("\n------------------------------------\n") - print("PAYNT results: ") - print(self.storm_control.paynt_bounds) - print("controller size: {}".format(self.storm_control.paynt_fsc_size)) - - print() - - print("Storm results: ") - print(self.storm_control.storm_bounds) - print("controller size: {}".format(self.storm_control.belief_controller_size)) - print("\n------------------------------------\n") + self.print_synthesized_controllers() if time.time() > iteration_timeout or iteration == iteration_limit: break @@ -247,98 +239,20 @@ def strategy_storm(self, unfold_imperfect_only, unfold_storm=True): @param unfold_imperfect_only if True, only imperfect observations will be unfolded ''' mem_size = paynt.quotient.pomdp.PomdpQuotient.initial_memory_size - self.synthesizer.storm_control = self.storm_control while True: - # for x in range(2): - if self.storm_control.is_storm_better == False: self.storm_control.parse_results(self.quotient) - - paynt.quotient.pomdp.PomdpQuotient.current_family_index = mem_size - - # unfold memory according to the best result - if unfold_storm: - if mem_size > 1: - obs_memory_dict = {} - if self.storm_control.is_storm_better: - if self.storm_control.is_memory_needed(): - obs_memory_dict = self.storm_control.memory_vector - logger.info(f'Added memory nodes for observation based on Storm data') - else: - # consider the cut-off schedulers actions when updating memory - if self.storm_control.unfold_cutoff: - for obs in range(self.quotient.observations): - if obs in self.storm_control.result_dict: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 - else: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] - # only consider the induced DTMC without cut-off states - else: - for obs in range(self.quotient.observations): - if obs in self.storm_control.result_dict_no_cutoffs: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 - else: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] - logger.info(f'Added memory nodes for observation based on Storm data') - else: - for obs in range(self.quotient.observations): - if self.quotient.observation_states[obs]>1: - obs_memory_dict[obs] = self.quotient.observation_memory_size[obs] + 1 - else: - obs_memory_dict[obs] = 1 - logger.info(f'Increase memory in all imperfect observation') - self.quotient.set_memory_from_dict(obs_memory_dict) - else: - logger.info("Synthesizing optimal k={} controller ...".format(mem_size) ) - if unfold_imperfect_only: - self.quotient.set_imperfect_memory_size(mem_size) - else: - self.quotient.set_global_memory_size(mem_size) - - family = self.quotient.family - - # if Storm's result is better, use it to obtain main family that considers only the important actions - if self.storm_control.is_storm_better: - # consider the cut-off schedulers actions - if self.storm_control.use_cutoffs: - main_family = self.storm_control.get_main_restricted_family(family, self.storm_control.result_dict) - if self.storm_control.incomplete_exploration == True: - subfamily_restrictions = [] - else: - subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, self.storm_control.result_dict) - # only consider the induced DTMC actions without cut-off states - else: - main_family = self.storm_control.get_main_restricted_family(family, self.storm_control.result_dict_no_cutoffs) - if self.storm_control.incomplete_exploration == True: - subfamily_restrictions = [] - else: - subfamily_restrictions = self.storm_control.get_subfamilies_restrictions(family, self.storm_control.result_dict_no_cutoffs) - - subfamilies = self.storm_control.get_subfamilies(subfamily_restrictions, family) - # if PAYNT is better continue normally - else: - main_family = family - subfamilies = [] - - self.synthesizer.subfamilies_buffer = subfamilies - self.synthesizer.main_family = main_family - - assignment = self.synthesize(family) - + assignment = self.unfold_and_synthesize(mem_size,unfold_storm) if assignment is not None: self.storm_control.latest_paynt_result = assignment self.storm_control.paynt_export = self.quotient.extract_policy(assignment) self.storm_control.paynt_bounds = self.quotient.specification.optimality.optimum - self.storm_control.update_data() - mem_size += 1 - #break - def strategy_iterative(self, unfold_imperfect_only): ''' @@ -366,7 +280,6 @@ def strategy_iterative(self, unfold_imperfect_only): #break - def run(self, optimum_threshold=None): # choose the synthesis strategy: if self.use_storm: @@ -391,26 +304,8 @@ def run(self, optimum_threshold=None): self.storm_control.get_storm_result() self.strategy_storm(unfold_imperfect_only=True, unfold_storm=self.storm_control.unfold_storm) - print("\n------------------------------------\n") - print("PAYNT results: ") - print(self.storm_control.paynt_bounds) - print("controller size: {}".format(self.storm_control.paynt_fsc_size)) - - print() - - print("Storm results: ") - print(self.storm_control.storm_bounds) - print("controller size: {}".format(self.storm_control.belief_controller_size)) - print("\n------------------------------------\n") + self.print_synthesized_controllers() # Pure PAYNT POMDP synthesis else: # self.strategy_iterative(unfold_imperfect_only=False) self.strategy_iterative(unfold_imperfect_only=True) - - - - - - - -