diff --git a/models/mdp/sketches/dodge/8-mod2-stagger/sketch.props b/models/mdp/sketches/dodge/8-mod2-stagger/sketch.props deleted file mode 100755 index 6387655d3..000000000 --- a/models/mdp/sketches/dodge/8-mod2-stagger/sketch.props +++ /dev/null @@ -1 +0,0 @@ -P>0.9 [F goal] \ No newline at end of file diff --git a/models/mdp/sketches/dodge/8-mod2-stagger/sketch.templ b/models/mdp/sketches/dodge/8-mod2-stagger/sketch.templ deleted file mode 100755 index d1f192cce..000000000 --- a/models/mdp/sketches/dodge/8-mod2-stagger/sketch.templ +++ /dev/null @@ -1,96 +0,0 @@ -mdp - -const int N=8; -const int xMIN = 1; -const int yMIN = 1; -const int xMAX = N; -const int yMAX = N; - -hole int x1_init in {1..8}; -hole int y1_init in {2..7}; - -hole int a1_x0_y0 in {0,1,2,3,4}; -hole int a1_x0_y1 in {0,1,2,3,4}; -hole int a1_x1_y0 in {0,1,2,3,4}; -hole int a1_x1_y1 in {0,1,2,3,4}; - -formula at1 = (x=x1 & y=y1); - - -formula crash = at1; -formula goal = (x=xMAX & y=yMAX); -formula done = goal | crash; - - -formula clk_next = mod(clk+1,2); -module clk - clk : [-1..1] init -1; - - [place] !done & clk=-1 -> (clk'=clk_next); - - [left] !done & clk=0 -> (clk'=clk_next); - [right] !done & clk=0 -> (clk'=clk_next); - [down] !done & clk=0 -> (clk'=clk_next); - [up] !done & clk=0 -> (clk'=clk_next); - [wait] !done & clk=0 -> (clk'=clk_next); - - [o] !done & clk=1 -> (clk'=clk_next); -endmodule - - -const double slip = 0.2; - -formula xright = min(x+1,xMAX); -formula xleft = max(x-1,xMIN); -formula yup = min(y+1,yMAX); -formula ydown = max(y-1,yMIN); - -module agent - x : [xMIN..xMAX] init xMIN; - y : [yMIN..yMAX] init yMIN; - - [left] true -> 1-slip : (x'=xleft) + slip : true; - [right] true -> 1-slip : (x'=xright) + slip : true; - [down] true -> 1-slip : (y'=ydown) + slip : true; - [up] true -> 1-slip : (y'=yup) + slip : true; - //[wait] true -> true; -endmodule - - -formula x1right = min(x1+1,x1_init_MAX); -formula x1left = max(x1-1,x1_init_MIN); -formula y1up = min(y1+1,y1_init_MAX); -formula y1down = max(y1-1,y1_init_MIN); - -const double oslip = 1/2; -module obstacle1 - x1 : [xMIN..xMAX] init xMAX; - y1 : [yMIN..yMAX] init yMAX; - - [place] true -> (x1'=x1_init) & (y1'=y1_init); - - [o] mod(x1,2)=0 & mod(y1,2)=0 & a1_x0_y0=0 -> 1-oslip : (x1'=x1right) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=0 & a1_x0_y0=1 -> 1-oslip : (x1'=x1left) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=0 & a1_x0_y0=2 -> 1-oslip : (y1'=y1up) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=0 & a1_x0_y0=3 -> 1-oslip : (y1'=y1down) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=0 & a1_x0_y0=4 -> 1: true; - - [o] mod(x1,2)=0 & mod(y1,2)=1 & a1_x0_y1=0 -> 1-oslip : (x1'=x1right) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=1 & a1_x0_y1=1 -> 1-oslip : (x1'=x1left) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=1 & a1_x0_y1=2 -> 1-oslip : (y1'=y1up) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=1 & a1_x0_y1=3 -> 1-oslip : (y1'=y1down) + oslip : true; - [o] mod(x1,2)=0 & mod(y1,2)=1 & a1_x0_y1=4 -> 1: true; - - [o] mod(x1,2)=1 & mod(y1,2)=0 & a1_x1_y0=0 -> 1-oslip : (x1'=x1right) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=0 & a1_x1_y0=1 -> 1-oslip : (x1'=x1left) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=0 & a1_x1_y0=2 -> 1-oslip : (y1'=y1up) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=0 & a1_x1_y0=3 -> 1-oslip : (y1'=y1down) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=0 & a1_x1_y0=4 -> 1: true; - - [o] mod(x1,2)=1 & mod(y1,2)=1 & a1_x1_y1=0 -> 1-oslip : (x1'=x1right) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=1 & a1_x1_y1=1 -> 1-oslip : (x1'=x1left) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=1 & a1_x1_y1=2 -> 1-oslip : (y1'=y1up) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=1 & a1_x1_y1=3 -> 1-oslip : (y1'=y1down) + oslip : true; - [o] mod(x1,2)=1 & mod(y1,2)=1 & a1_x1_y1=4 -> 1: true; - -endmodule diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index 6d15309ea..be7acfe80 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -99,6 +99,7 @@ def policy_to_state_valuation_actions(self, policy): Create a representation for a policy that associated action labels with state valuations. States with only one available action are omitted. ''' + policy,_ = policy sv = self.quotient_mdp.state_valuations state_valuation_to_action = [] for state,action in enumerate(policy): @@ -166,6 +167,8 @@ def fix_and_apply_policy_to_family(self, family, policy): for state in mdp.quotient_state_map: policy_fixed[state] = policy[state] + mask = [state for state,action in enumerate(policy_fixed) if action is not None] + policy_fixed = (policy_fixed,mask) return policy_fixed,mdp diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 8bc6b9611..6d24bc52d 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -23,58 +23,32 @@ logging.disable(logging.NOTSET) -class MdpFamilyResult: - def __init__(self): - # if None, then family is undediced - # if False, then all family is UNSAT - # otherwise, contains a satisfying policy for all MDPs in the family - self.policy = None - self.policy_source = "" - - self.hole_selection = None - self.splitter = None - - # policy search results - self.sat_mdps = None - self.sat_policies = None - self.unsat_mdps = None - - def __str__(self): - return str(self.sat) - - -def actions_are_compatible(a1, a2): - return a1 is None or a2 is None or a1==a2 - def policies_are_compatible(policy1, policy2): - for state,a1 in enumerate(policy1): + policy1,policy1_mask = policy1 + policy2,_ = policy2 + for state in policy1_mask: + a1 = policy1[state] a2 = policy2[state] - if not actions_are_compatible(a1,a2): + if a2 is not None and a1 != a2: return False return True -def merge_policies(policies): +def merge_policies(policy1, policy2): ''' Attempt to merge multiple policies into one. :returns one policy or None if some policies were incompatible ''' - policy = policies[0].copy() - for policy2 in policies[1:]: - for state,a1 in enumerate(policy): - a2 = policy2[state] - if not actions_are_compatible(a1,a2): - return None - policy[state] = a1 or a2 - return policy - + if not policies_are_compatible(policy1,policy2): + return None + policy1,_ = policy1 + policy2,_ = policy2 + policy = [a1 or policy2[state] for state,a1 in enumerate(policy1)] + mask = [state for state,action in enumerate(policy) if action is not None] + return (policy,mask) def merge_policies_exclusively(policy1, policy2): - - # num_states = len(policy1) - # agree_mask = stormpy.storage.BitVector(num_states,False) - # for state in range(num_states): - # agree_mask.set(policy1[state] == policy2[state],True) - + policy1,_ = policy1 + policy2,_ = policy2 policy12 = policy1.copy() policy21 = policy2.copy() for state,a1 in enumerate(policy1): @@ -86,55 +60,33 @@ def merge_policies_exclusively(policy1, policy2): return policy12,policy21 -def test_nodes(quotient, prop, node1, node2): - policy1 = node1.policy - policy2 = node2.policy - policy = merge_policies([policy1,policy2]) - if policy is not None: - return policy - - policy12,policy21 = merge_policies_exclusively(policy1,policy2) - - # try policy1 for family2 - policy,mdp = quotient.fix_and_apply_policy_to_family(node2.family, policy12) - policy_result = mdp.model_check_property(prop, alt=True) - if policy_result.sat: - return policy - - # try policy2 for family1 - policy,mdp = quotient.fix_and_apply_policy_to_family(node1.family, policy21) - policy_result = mdp.model_check_property(prop, alt=True) - if policy_result.sat: - return policy - - # neither fits - return None - - - class PolicyTreeNode: - merged = 0 - def __init__(self, family): self.family = family - - self.parent = None - self.policy = None self.splitter = None self.suboptions = [] self.child_nodes = [] - self.policy_number = None + self.sat = None + self.policy_index = None @property def is_leaf(self): - return len(self.child_nodes) == 0 + return self.sat is not None - @property - def solved(self): - return self.policy is not None and self.policy != False + def num_nodes(self): + num = 1 + for child in self.child_nodes: + num += child.num_nodes() + return num + + def num_leaves(self): + num = 1 if self.is_leaf else 0 + for child in self.child_nodes: + num += child.num_leaves() + return num def split(self, splitter, suboptions, subfamilies): self.splitter = splitter @@ -142,103 +94,138 @@ def split(self, splitter, suboptions, subfamilies): self.child_nodes = [] for subfamily in subfamilies: child_node = PolicyTreeNode(subfamily) - child_node.parent = self self.child_nodes.append(child_node) - - def double_check(self, quotient, prop): - assert self.policy is not None + def double_check(self, quotient, prop, policies): + assert self.sat is not None quotient.build(self.family) - if self.policy == False: + if self.sat is False: result = self.family.mdp.model_check_property(prop) assert not result.sat else: - SynthesizerPolicyTree.double_check_policy(quotient, self.family, prop, self.policy) + SynthesizerPolicyTree.double_check_policy(quotient, self.family, prop, policies[self.policy_index]) - - def merge_if_single_child(self): - if len(self.child_nodes) > 1: - return - self.policy = self.child_nodes[0].policy - self.splitter = None - self.suboptions = [] - self.child_nodes = [] def merge_children_indices(self, indices): - if len(indices) < 2: + if len(indices) <= 1: return target = indices[0] for j in reversed(indices[1:]): - PolicyTreeNode.merged += 1 self.suboptions[target] += self.suboptions[j] self.child_nodes[target].family.hole_set_options(self.splitter,self.suboptions[target]) self.suboptions.pop(j) self.child_nodes.pop(j) - self.merge_if_single_child() - def merge_children_solved(self): - if self.is_leaf: + if len(self.child_nodes) > 1: return - indices = [i for i,child in enumerate(self.child_nodes) if child.solved] + # a single child remains, can be merged into parent + child_node = self.child_nodes[0] + self.sat = child_node.sat + self.policy_index = child_node.policy_index + self.splitter = None + self.suboptions = [] + self.child_nodes = [] + + def merge_children_sat(self): + indices = [i for i,child in enumerate(self.child_nodes) if child.sat is True] self.merge_children_indices(indices) - - def merge_children_unsat(self): + + def merge_children_having_same_solution(self): if self.is_leaf: return - indices = [i for i,child in enumerate(self.child_nodes) if child.policy==False] + + # merge UNSAT children + indices = [i for i,child in enumerate(self.child_nodes) if child.sat is False] self.merge_children_indices(indices) + # merge children having the same policy + i = 0 + while i < len(self.child_nodes): + child1 = self.child_nodes[i] + if child1.sat is not True: + i += 1 + continue + + join_to_i = [i] + # collect other children to merge to i + for j in range(i+1,len(self.child_nodes)): + child2 = self.child_nodes[j] + if child2.policy_index == child1.policy_index: + join_to_i.append(j) + self.merge_children_indices(join_to_i) + i += 1 + + def make_policies_compatible(quotient, prop, node1, node2, policies): + policy1 = policies[node1.policy_index] + policy2 = policies[node2.policy_index] + policy = merge_policies(policy1,policy2) + if policy is not None: + return policy + + policy12,policy21 = merge_policies_exclusively(policy1,policy2) - def merge_children_compatible(self, quotient, prop): + # try policy1 for family2 + policy,mdp = quotient.fix_and_apply_policy_to_family(node2.family, policy12) + policy_result = mdp.model_check_property(prop, alt=True) + PolicyTreeNode.mdps_model_checked += 1 + if policy_result.sat: + return policy + + # try policy2 for family1 + policy,mdp = quotient.fix_and_apply_policy_to_family(node1.family, policy21) + policy_result = mdp.model_check_property(prop, alt=True) + PolicyTreeNode.mdps_model_checked += 2 + if policy_result.sat: + return policy + + # neither fits + return None + + def merge_children_having_compatible_policies(self, quotient, prop, policies): if self.is_leaf: return i = 0 while i < len(self.child_nodes): child1 = self.child_nodes[i] - - if not child1.solved: + if child1.sat is not True: i += 1 continue join_to_i = [i] - # collect other childs to merge to i + # collect other children to merge to i for j in range(i+1,len(self.child_nodes)): child2 = self.child_nodes[j] - if not child2.solved: + if child2.sat is not True: continue - policy = test_nodes(quotient,prop,child1,child2) + policy = PolicyTreeNode.make_policies_compatible(quotient,prop,child1,child2,policies) if policy is None: continue # nodes can be merged - child1.policy = policy + policies[child1.policy_index] = policy + policies[child2.policy_index] = None join_to_i.append(j) self.merge_children_indices(join_to_i) i += 1 - - if len(self.child_nodes)>1: + def skip_redundant_children(self): + ''' Adopt grandchildren of each child that uses the same splitter as self. ''' + if self.splitter is None: return - # only 1 child node that can be moved to this node - PolicyTreeNode.merged += 1 + suboptions = [] + child_nodes = [] + for child_index,child in enumerate(self.child_nodes): + if child.splitter != self.splitter: + suboptions.append(self.suboptions[child_index]) + child_nodes.append(self.child_nodes[child_index]) + else: + for grandchild_index,grandchild in enumerate(child.child_nodes): + suboptions.append(child.suboptions[grandchild_index]) + child_nodes.append(grandchild) + self.suboptions = suboptions + self.child_nodes = child_nodes - def assign_policy_number(self, num_policies): - if self.is_leaf: - if self.solved: - self.policy_number = num_policies - num_policies += 1 - return num_policies - for child in self.child_nodes: - num_policies = child.assign_policy_number(num_policies) - return num_policies - def extract_policies(self, quotient): - if self.solved: - return { f"p{self.policy_number}" : quotient.policy_to_state_valuation_actions(self.policy) } - policies = {} - for child in self.child_nodes: - policies.update(child.extract_policies(quotient)) - return policies @property def node_id(self): @@ -246,12 +233,11 @@ def node_id(self): def add_nodes_to_graphviz_tree(self, graphviz_tree): node_label = "" - if self.policy is not None: - if self.policy is False: - node_label = "X" - else: - # node_label = "✓" - node_label = f"p{self.policy_number}" + if self.sat is False: + node_label = "X" + elif self.sat is True: + # node_label = "✓" + node_label = f"p{self.policy_index}" graphviz_tree.node(self.node_id, label=node_label, shape="ellipse", width="0.15", height="0.15") for child in self.child_nodes: child.add_nodes_to_graphviz_tree(graphviz_tree) @@ -271,9 +257,13 @@ class PolicyTree: def __init__(self, family): self.root = PolicyTreeNode(family) + self.policies = [] - def __str__(self): - pass + def new_policy(self, policy): + policy_index = len(self.policies) + mask = [state for state,action in enumerate(policy) if action is not None] + self.policies.append( (policy,mask) ) + return policy_index def collect_all(self): node_queue = [self.root] @@ -295,16 +285,26 @@ def collect_leaves(self): node_queue = node_queue + node.child_nodes return leaves - def collect_solved(self): + def collect_nonleaves(self): node_queue = [self.root] - solved = [] + nonleaves = [] while node_queue: node = node_queue.pop(0) - if node.solved: - solved.append(node) + if not node.is_leaf: + nonleaves.append(node) + node_queue = node_queue + node.child_nodes + return nonleaves + + def collect_sat(self): + node_queue = [self.root] + sat = [] + while node_queue: + node = node_queue.pop(0) + if node.sat: + sat.append(node) else: node_queue += node.child_nodes - return solved + return sat def double_check(self, quotient, prop): @@ -321,42 +321,31 @@ def count_diversity(self): for node in self.collect_all(): if node.is_leaf: continue - with_policy = len([node for node in node.child_nodes if node.solved]) + with_policy = len([node for node in node.child_nodes if node.sat]) with_none = len([node for node in node.child_nodes if node.policy is None]) with_false = len([node for node in node.child_nodes if node.policy==False]) children_stats[(with_policy,with_none,with_false)] += 1 return children_stats - - def count_policy_sources(self): - from collections import defaultdict - sources = defaultdict(int) - for node in self.collect_leaves(): - if not node.solved: - continue - sources[node.policy_source] += 1 - return sources - - + def print_stats(self): members_total = self.root.family.size + num_policies = len(self.policies) + members_satisfied = 0 num_leaves_singleton = 0 - num_policies = 0 - leaves = self.collect_leaves() for node in leaves: if node.family.size==1: num_leaves_singleton += 1 - if node.policy is not None and node.policy != False: - num_policies += 1 + if node.sat: members_satisfied += node.family.size satisfied_percentage = round(members_satisfied/members_total*100,0) members_unsatisfied = members_total-members_satisfied num_nodes = len(self.collect_all()) num_leaves = len(leaves) - num_leaves_solvable = num_policies + num_leaves_solvable = len(self.collect_sat()) num_leaves_unsolvable = num_leaves-num_leaves_solvable if num_leaves_solvable > 0: leaf_solvable_avg = round(members_satisfied / num_leaves_solvable,1) @@ -370,62 +359,102 @@ def print_stats(self): print("--------------------") print("Policy tree summary:") - print("found {} satisfying policies for {}/{} family members ({}%)".format( - num_policies,members_satisfied,members_total,satisfied_percentage)) + print("found {} satisfying {} for {}/{} family members ({}%)".format( + num_policies, "policy" if num_policies==1 else "policies", members_satisfied,members_total,satisfied_percentage)) print("policy tree has {} nodes, {} of them are leaves:".format(num_nodes, num_leaves)) print("\t solvable leaves: {} (avg.size: {})".format(num_leaves_solvable,leaf_solvable_avg)) print("\tunsolvable leaves: {} (avg.size: {})".format(num_leaves_unsolvable,leaf_unsolvable_avg)) print("\t singleton leaves: {}".format(num_leaves_singleton)) - # print() - # print("(X, Y, Z) - number of internal nodes having yes/?/no children") - # for key,number in self.count_diversity().items(): - # print(key, " - ", number) - - # print() - # print("X - number of nodes solved with policy of type X") - # for key,number in self.count_policy_sources().items(): - # print(key, " - ", number) - # print("--------------------") + print("--------------------") + def discard_unused_policies(self): + policy_old_to_new = [None for _ in self.policies] + num_policies = 0 + for policy_index,policy in enumerate(self.policies): + if policy is not None: + policy_old_to_new[policy_index] = num_policies + num_policies += 1 + self.policies = [policy for policy in self.policies if policy is not None] + assert num_policies == len(self.policies) + for leaf in self.collect_sat(): + leaf.policy_index = policy_old_to_new[leaf.policy_index] + assert leaf.policy_index is not None + + def merge_compatible_policies(self, policy_indices): + policy_old_to_new_map = [policy_index for policy_index,_ in enumerate(self.policies)] + + for policy1_index_index,policy1_index in enumerate(policy_indices): + policy1 = self.policies[policy1_index] + if policy1 is None: + continue + for policy2_index in policy_indices[policy1_index_index+1:]: + policy2 = self.policies[policy2_index] + if policy2 is None: + continue + policy = merge_policies(policy1,policy2) + if policy is None: + continue + # store updated policy + self.policies[policy1_index] = policy + policy1 = policy + # discard irrelevant policy + policy_old_to_new_map[policy2_index] = policy1_index + self.policies[policy2_index] = None + + return policy_old_to_new_map - def postprocess(self, quotient, prop, stat): - - stat.num_mdps_total = quotient.design_space.size - stat.num_mdps_sat = sum([n.family.size for n in self.collect_solved()]) + def postprocess(self, quotient, prop): - stat.num_policies = len(self.collect_solved()) + import paynt.utils.profiler + postprocessing_time = paynt.utils.profiler.Timer() + postprocessing_time.start() logger.info("post-processing the policy tree...") - logger.info("merging unsat siblings...") - PolicyTreeNode.merged = 0 - all_nodes = self.collect_all() - for node in reversed(all_nodes): - node.merge_children_unsat() - logger.info("merged {} pairs".format(PolicyTreeNode.merged)) - # self.print_stats() - - logger.info("merging compatible siblings...") - PolicyTreeNode.merged = 0 - all_nodes = self.collect_all() - for node in reversed(all_nodes): - node.merge_children_compatible(quotient,prop) - logger.info("merged {} pairs".format(PolicyTreeNode.merged)) - stat.num_policies_merged = len(self.collect_solved()) - self.print_stats() - - # logger.info("merging solved siblings...") - # PolicyTreeNode.merged = 0 - # all_nodes = self.collect_all() - # for node in reversed(all_nodes): - # node.merge_children_solved() - # logger.info("merged {} pairs".format(PolicyTreeNode.merged)) - # stat.num_policies_yes = len(self.collect_solved()) - # self.print_stats() + logger.info("merging SAT siblings solved by non-exclusively compatible policies...") + PolicyTreeNode.mdps_model_checked = 0 + nodes_before = self.root.num_nodes() + for node in reversed(self.collect_all()): + node.merge_children_having_compatible_policies(quotient, prop, self.policies) + self.discard_unused_policies() + nodes_removed = nodes_before - self.root.num_nodes() + logger.info("additional {} MDPs were model checked".format(PolicyTreeNode.mdps_model_checked)) + logger.info("removed {} nodes".format(nodes_removed)) + + logger.info("merging all exclusively compatible policies...") + policies_before = len(self.policies) + policy_indices = [index for index,_ in enumerate(self.policies)] + policy_old_to_new_map = self.merge_compatible_policies(policy_indices) + for leaf in self.collect_sat(): + leaf.policy_index = policy_old_to_new_map[leaf.policy_index] + self.discard_unused_policies() + policies_removed = policies_before - len(self.policies) + logger.info("removed {} policies".format(policies_removed)) + + logger.info("reducing tree height...") + nodes_before = self.root.num_nodes() + for node in reversed(self.collect_nonleaves()): + node.skip_redundant_children() + nodes_removed = nodes_before - self.root.num_nodes() + logger.info("removed {} nodes".format(nodes_removed)) + + logger.info("merging siblings that have the same solution...") + nodes_before = self.root.num_nodes() + for node in reversed(self.collect_nonleaves()): + node.merge_children_having_same_solution() + nodes_removed = nodes_before - self.root.num_nodes() + logger.info("removed {} nodes".format(nodes_removed)) + + postprocessing_time.stop() + time = int(postprocessing_time.read()) + logger.debug(f"postprocessing took {time} s") + + def extract_policies(self, quotient): - self.root.assign_policy_number(0) - policies = self.root.extract_policies(quotient) - return policies + return { + f"p{policy_index}" : quotient.policy_to_state_valuation_actions(policy) + for policy_index,policy in enumerate(self.policies) + } def extract_policy_tree(self, quotient): logging.getLogger("graphviz").setLevel(logging.WARNING) @@ -437,6 +466,14 @@ def extract_policy_tree(self, quotient): +class MdpFamilyResult: + def __init__(self): + # if None, then family is undediced + # if False, then all family members are UNSAT + # otherwise, contains a satisfying policy for all MDPs in the family + self.policy = None + self.hole_selection = None + self.splitter = None class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer): @@ -524,17 +561,8 @@ def verify_family(self, family, game_solver, prop): self.quotient.build(family) mdp_family_result = MdpFamilyResult() - if family.size <= 8 and False: - policy_is_unique, unsat_mdps, sat_mdps, sat_policies = self.synthesize_policy_for_family(family, prop, iteration_limit=100) - if policy_is_unique: - policy = sat_policies - mdp_family_result.policy = policy - mdp_family_result.policy_source = "policy search" - return mdp_family_result - if family.size == 1: mdp_family_result.policy = self.solve_singleton(family,prop) - mdp_family_result.policy_source = "singleton" return mdp_family_result game_policy,game_value,game_sat = self.solve_game_abstraction(family,prop,game_solver) @@ -542,7 +570,6 @@ def verify_family(self, family, game_solver, prop): game_policy_sat = self.verify_policy(family,prop,game_policy) if game_policy_sat: mdp_family_result.policy = game_policy - mdp_family_result.policy_source = "game abstraction" return mdp_family_result else: logger.debug(f"game YES but nor forall family of size {family.size}") @@ -623,501 +650,59 @@ def split(self, family, prop, hole_selection, splitter): subfamilies.append(subfamily) return suboptions,subfamilies - - def create_action_coloring(self): - - quotient_mdp = self.quotient.quotient_mdp - family = paynt.family.family.Family() - choice_to_hole_options = [[] for choice in range(quotient_mdp.nr_choices)] - - for state in range(quotient_mdp.nr_states): - - state_actions = self.quotient.state_to_actions[state] - if len(state_actions) <= 1: - # hole is not needed - continue - - # create fresh hole - hole = family.num_holes - name = f'state_{state}' - option_labels = [self.quotient.action_labels[action] for action in state_actions] - family.add_hole(name,option_labels) - - for action_index,action in enumerate(state_actions): - color = [(hole,action_index)] - for choice in self.quotient.state_action_choices[state][action]: - choice_to_hole_options[choice] = color - - coloring = payntbind.synthesis.Coloring(family.family, quotient_mdp.nondeterministic_choice_indices, choice_to_hole_options) - self.action_coloring_family = family - self.action_coloring = coloring - return - - ############################### - #### POLICY SEARCH SECTION #### - - def update_scores(self, score_lists, selection): - for hole, score_list in score_lists.items(): - for choice in selection[hole]: - if choice not in score_list: - score_list.append(choice) - - def synthesize_policy_for_family(self, family, prop, all_sat=False, iteration_limit=0): - ''' - Synthesize one policy for family of MDPs (if such policy exists). - :param all_sat if True, it is assumed that all MDPs are SAT - :returns whether all SAT MDPs are solved using a single policy - :returns a list of UNSAT MDPs - :returns a list of SAT MDPs - :returns one policy if all SAT MDPs are solved using single policy or list containing a corresponding policy for each SAT MDP - ''' - sat_mdp_families = [] - sat_mdp_policies = [] - unsat_mdp_families = [] - - # create MDP subfamilies - for hole_assignment in family.all_combinations(): - subfamily = family.copy() - for hole_index, hole_option in enumerate(hole_assignment): - subfamily.hole_set_options(hole_index, [hole_option]) - - # find out which mdps are sat and unsat - if not all_sat: - self.quotient.build(subfamily) - primary_result = subfamily.mdp.model_check_property(prop) - assert primary_result.result.has_scheduler - self.stat.iteration(subfamily.mdp) - - if primary_result.sat == False: - unsat_mdp_families.append(subfamily) - continue - - sat_mdp_families.append(subfamily) - policy = self.quotient.scheduler_to_policy(primary_result.result.scheduler, subfamily.mdp) - sat_mdp_policies.append(policy) - else: - sat_mdp_families.append(subfamily) - - # no sat mdps - if len(sat_mdp_families) == 0: - return False, unsat_mdp_families, sat_mdp_families, sat_mdp_policies - - if len(sat_mdp_policies) == 0: - sat_mdp_policies = [None for _ in sat_mdp_families] - - action_family = paynt.family.family.DesignSpace(self.action_coloring_family) - action_family_stack = [action_family] - iter = 0 - - # AR for policies - while action_family_stack: - - if iteration_limit>0 and iter>iteration_limit: - break - - current_action_family = action_family_stack.pop(-1) - current_results = [] - - score_lists = {hole:[] for hole in range(current_action_family.num_holes) if current_action_family.hole_num_options(hole) > 1} - - # try to find controller inconsistency across the MDPs - # if the controllers are consistent, return True - for index, mdp_subfamily in enumerate(sat_mdp_families): - self.quotient.build_with_second_coloring(mdp_subfamily, self.action_coloring, current_action_family) # maybe copy to new family? - - primary_result = current_action_family.mdp.model_check_property(prop) - self.stat.iteration(current_action_family.mdp) - - # discard the family as soon as one MDP is unsat - if primary_result.sat == False: - current_results.append(False) - break - - # add policy if current mdp doesn't have one yet - # TODO maybe this can be done after some number of controllers are consistent? - if sat_mdp_policies[index] is None: - policy = self.quotient.scheduler_to_policy(primary_result.result.scheduler, mdp_subfamily.mdp) - sat_mdp_policies[index] = policy - - current_results.append(primary_result) - selection = self.quotient.scheduler_selection(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring) - self.update_scores(score_lists, selection) - - scores = {hole:len(score_list) for hole, score_list in score_lists.items()} - - splitters = self.quotient.holes_with_max_score(scores) - splitter = splitters[0] - - # refinement as soon as the first inconsistency is found - if scores[splitter] > 1: - break - else: - policy = self.quotient.empty_policy() - for index, (result, family) in enumerate(zip(current_results, sat_mdp_families)): - mdp_policy = self.quotient.scheduler_to_policy(result.result.scheduler, family.mdp) - policy = merge_policies([policy, mdp_policy]) - assert policy is not None - return True, unsat_mdp_families, sat_mdp_families, policy - - if False in current_results: - continue - - used_options = score_lists[splitter] - core_suboptions = [[option] for option in used_options] - other_suboptions = [option for option in current_action_family.hole_options(splitter) if option not in used_options] - if other_suboptions: - other_suboptions = [other_suboptions] - else: - other_suboptions = [] - suboptions = other_suboptions + core_suboptions # DFS solves core first - - subfamilies = [] - current_action_family.splitter = splitter - new_design_space = current_action_family.copy() - for suboption in suboptions: - subholes = new_design_space.subholes(splitter, suboption) - action_subfamily = paynt.family.family.DesignSpace(subholes) - action_subfamily.hole_set_options(splitter, suboption) - subfamilies.append(action_subfamily) - - action_family_stack = action_family_stack + subfamilies - - iter += 1 - - # compute policies for the sat mdps that were never analysed - mdps_without_policy = [index for index, policy in enumerate(sat_mdp_policies) if policy is None] - for mdp_index in mdps_without_policy: - self.quotient.build(sat_mdp_families[mdp_index]) - primary_result = sat_mdp_families[mdp_index].mdp.model_check_property(prop) - assert primary_result.result.has_scheduler - self.stat.iteration(sat_mdp_families[mdp_index].mdp) - policy = self.quotient.scheduler_to_policy(primary_result.result.scheduler, sat_mdp_families[mdp_index].mdp) - sat_mdp_policies[mdp_index] = policy - - return False, unsat_mdp_families, sat_mdp_families, sat_mdp_policies - - - def double_check_policy_synthesis(self, unsat_mdp_families, sat_mdp_families, sat_mdp_policies, sat_mdp_to_policy_map, prop): - - for unsat_family in unsat_mdp_families: - self.quotient.build(unsat_family) - result = unsat_family.mdp.model_check_property(prop) - assert not result.sat, "double check fail" - unsat_family.mdp = None - - for sat_index, sat_family in enumerate(sat_mdp_families): - self.quotient.build(sat_family) - sat_policy = sat_mdp_policies[sat_mdp_to_policy_map[sat_index]] - SynthesizerPolicyTree.double_check_policy(self.quotient, sat_family, prop, sat_policy) - sat_family.mdp = None - - - def synthesize_policy_for_family_linear(self, family, prop): - ''' - Synthesize policies for mdps in family in linear time with respect to family size - :returns a list of UNSAT MDP families - :returns a list of SAT MDP families - :returns list of policies for SAT MDP families - :returns list that maps each SAT MDP family to its policy - ''' - sat_mdp_families = [] - sat_mdp_policies = [] - sat_mdp_to_policy_map = [] - unsat_mdp_families = [] - - mdp_families = [] - - for hole_assignment in family.all_combinations(): - subfamily = family.copy() - for hole_index, hole_option in enumerate(hole_assignment): - subfamily.hole_set_options(hole_index, [hole_option]) - mdp_families.append(subfamily) - - for family in mdp_families: - self.quotient.build(family) - - result = family.mdp.model_check_property(prop) - self.stat.iteration_mdp(family.mdp.states) - self.explore(family) - if not result.sat: - family.mdp = None - unsat_mdp_families.append(family) - continue - - policy = self.quotient.scheduler_to_policy(result.result.scheduler, family.mdp) - - family.mdp = None - - for index, sat_policy in enumerate(sat_mdp_policies): - merged_policy = merge_policies([sat_policy, policy]) - if merged_policy is None: - continue - else: - sat_mdp_policies[index] = merged_policy - sat_mdp_families.append(family) - sat_mdp_to_policy_map.append(index) - break - else: - sat_mdp_families.append(family) - sat_mdp_to_policy_map.append(len(sat_mdp_policies)) - sat_mdp_policies.append(policy) - - return unsat_mdp_families, sat_mdp_families, sat_mdp_policies, sat_mdp_to_policy_map - - - def synthesize_policy_for_family_using_ceg(self, family, prop): - ''' - Synthesize policies for mdps in family using counter-example generalization - :returns a list of UNSAT MDP families - :returns a list of SAT MDP families - :returns list of policies for SAT MDP families - :returns list that maps each SAT MDP family to its policy - ''' - - sat_mdp_families = [] - sat_mdp_policies = [] - sat_mdp_to_policy_map = [] - unsat_mdp_families = [] - - self.quotient.build(family) - - smt_solver = paynt.family.smt.SmtSolver(family) - - unsat_conflict_generator = paynt.synthesizer.conflict_generator.mdp.ConflictGeneratorMdp(self.quotient) - unsat_conflict_generator.initialize() - - mdp_subfamily = smt_solver.pick_assignment(family) - - while mdp_subfamily is not None: - - self.quotient.build(mdp_subfamily) - - result = mdp_subfamily.mdp.model_check_property(prop) - self.stat.iteration(mdp_subfamily.mdp) - - if not result.sat: - # MDP CE - requests = [(0, self.quotient.specification.all_properties()[0], None)] - choices = self.quotient.coloring.selectCompatibleChoices(mdp_subfamily.family) - model,state_map,choice_map = self.quotient.restrict_quotient(choices) - model = paynt.quotient.models.MDP(model,self.quotient,state_map,choice_map,mdp_subfamily) - conflicts = unsat_conflict_generator.construct_conflicts(family, mdp_subfamily, model, requests) - - # conflicts = [list(range(family.num_holes))] # UNSAT without CE generalization - - pruned = smt_solver.exclude_conflicts(family, mdp_subfamily, conflicts) - self.explored += pruned - - # MDP CE - unsat_family = family.copy() - for hole_index in range(self.quotient.design_space.num_holes): - if hole_index in conflicts[0]: - unsat_family.hole_set_options(hole_index, mdp_subfamily.hole_options(hole_index)) - - mdp_subfamily.mdp = None - unsat_family.mdp = None - unsat_mdp_families.append(unsat_family) - else: - policy = self.quotient.scheduler_to_policy(result.result.scheduler, mdp_subfamily.mdp) - policy, policy_quotient_mdp = self.quotient.fix_and_apply_policy_to_family(family, policy) # DTMC CE - # policy_quotient_mdp = self.quotient.apply_policy_to_family(family, policy) # MDP SAT CE - quotient_assignment = self.quotient.coloring.getChoiceToAssignment() - choice_to_hole_options = [] - for choice in range(policy_quotient_mdp.choices): - quotient_choice = policy_quotient_mdp.quotient_choice_map[choice] - choice_to_hole_options.append(quotient_assignment[quotient_choice]) - - coloring = payntbind.synthesis.Coloring(family.family, policy_quotient_mdp.model.nondeterministic_choice_indices, choice_to_hole_options) - quotient_container = paynt.quotient.quotient.DtmcFamilyQuotient(policy_quotient_mdp.model, family, coloring, self.quotient.specification.negate()) - conflict_generator = paynt.synthesizer.conflict_generator.dtmc.ConflictGeneratorDtmc(quotient_container) # DTMC CE - # conflict_generator = paynt.synthesizer.conflict_generator.mdp.ConflictGeneratorMdp(quotient_container) # MDP SAT CE - conflict_generator.initialize() - mdp_subfamily.constraint_indices = family.constraint_indices - requests = [(0, quotient_container.specification.all_properties()[0], None)] - - model = quotient_container.build_assignment(mdp_subfamily) # DTMC CE - - # choices = coloring.selectCompatibleChoices(mdp_subfamily.family) # MDP SAT CE - # model,state_map,choice_map = quotient_container.restrict_quotient(choices) # MDP SAT CE - # model = paynt.quotient.models.MDP(model,quotient_container,state_map,choice_map,mdp_subfamily) # MDP SAT CE - - conflicts = conflict_generator.construct_conflicts(family, mdp_subfamily, model, requests) - pruned = smt_solver.exclude_conflicts(family, mdp_subfamily, conflicts) - self.explored += pruned - - sat_family = family.copy() - for hole_index in range(self.quotient.design_space.num_holes): - if hole_index in conflicts[0]: - sat_family.hole_set_options(hole_index, mdp_subfamily.hole_options(hole_index)) - - sat_family.mdp = None - sat_mdp_families.append(sat_family) - sat_mdp_to_policy_map.append(len(sat_mdp_policies)) - sat_mdp_policies.append(policy) - - mdp_subfamily = smt_solver.pick_assignment(family) - - return unsat_mdp_families, sat_mdp_families, sat_mdp_policies, sat_mdp_to_policy_map - - - def policy_search_ceg(self, family, prop): - - action_family = paynt.family.family.DesignSpace(self.action_coloring_family) - smt_solver = paynt.family.smt.SmtSolver(action_family) - - mdp_subfamilies = [] - - # create MDP subfamilies - for hole_assignment in family.all_combinations(): - subfamily = family.copy() - for hole_index, hole_option in enumerate(hole_assignment): - subfamily.hole_set_options(hole_index, [hole_option]) - mdp_subfamilies.append(subfamily) - - policy_family = smt_solver.pick_assignment(action_family) - pruned_overall = 0 - counter = 0 - - while policy_family is not None: - counter += 1 - - self.quotient.build_with_second_coloring(family, self.action_coloring, policy_family) # maybe copy to new family? - - # primary_result = policy_family.mdp.model_check_property(prop) - # self.stat.iteration(policy_family.mdp) - - # if not primary_result.sat: - # print("unsat") - - for mdp_subfamily in mdp_subfamilies: - - self.quotient.build_with_second_coloring(mdp_subfamily, self.action_coloring, policy_family) - self.quotient.build_with_second_coloring(mdp_subfamily, self.action_coloring, action_family) - - dtmc = self.quotient.mdp_to_dtmc(policy_family.mdp.model) - policy_dtmc = paynt.quotient.models.DTMC(dtmc,self.quotient,policy_family.mdp.quotient_state_map,policy_family.mdp.quotient_choice_map) - - dtmc_result = policy_dtmc.model_check_property(prop) - self.stat.iteration(policy_dtmc) - - if not dtmc_result.sat: - self.quotient.build(mdp_subfamily) - action_quotient_assignment = self.action_coloring.getChoiceToAssignment() - choice_to_hole_options = [] - for choice in range(mdp_subfamily.mdp.choices): - quotient_choice = mdp_subfamily.mdp.quotient_choice_map[choice] - choice_to_hole_options.append(action_quotient_assignment[quotient_choice]) - - coloring = payntbind.synthesis.Coloring(action_family.family, mdp_subfamily.mdp.model.nondeterministic_choice_indices, choice_to_hole_options) - quotient_container = paynt.quotient.quotient.DtmcFamilyQuotient(mdp_subfamily.mdp.model, action_family, coloring, self.quotient.specification) - - conflict_generator = paynt.synthesizer.conflict_generator.dtmc.ConflictGeneratorDtmc(quotient_container) - conflict_generator.initialize() - requests = [(0, quotient_container.specification.all_properties()[0], None)] - - # dtmc = self.quotient.mdp_to_dtmc(policy_family.mdp) - # policy_dtmc = paynt.quotient.models.DTMC(dtmc,self.quotient,policy_family.mdp.state_map,policy_family.mdp.choice_map) - - model = quotient_container.build_assignment(policy_family) - - conflicts = conflict_generator.construct_conflicts(action_family, policy_family, model, requests) - pruned = smt_solver.exclude_conflicts(action_family, policy_family, conflicts) - pruned_overall += pruned - # print(conflicts) - # print(pruned) - - break - else: - policy = self.quotient.scheduler_to_policy(dtmc_result.result.scheduler, mdp_subfamily.mdp) - return policy - - if counter % 100 == 0: - print(f'{round(pruned_overall/action_family.size*100,2)}%') - - policy_family = smt_solver.pick_assignment(action_family) - - - return False - - - - #### POLICY SEARCH SECTION END #### - ################################### - - - def evaluate_all(self, family, prop, keep_value_only=False): assert not prop.reward, "expecting reachability probability propery" game_solver = self.quotient.build_game_abstraction_solver(prop) policy_tree = PolicyTree(family) - ### POLICY SEARCH TESTING - # self.create_action_coloring() - # policy = self.policy_search_ceg(policy_tree.root.family, prop) - # print(policy) - - # choose policy search method - # unsat, sat, policies, policy_map = self.synthesize_policy_for_family_linear(policy_tree.root.family, prop) - # unsat, sat, policies, policy_map = self.synthesize_policy_for_family_using_ceg(policy_tree.root.family, prop) - - # self.stat.synthesis_timer.stop() - - # unsat_mdps_count = sum([s.size for s in unsat]) - # sat_mdps_count = sum([s.size for s in sat]) - - # print(f'unSAT MDPs: {unsat_mdps_count}\tunSAT families: {len(unsat)}\tavg. unSAT family size: {round(unsat_mdps_count/len(unsat),2) if len(unsat) != 0 else "N/A"}') - # print(f'SAT MDPs: {sat_mdps_count}\tSAT families: {len(sat)}\tavg. SAT family size: {round(sat_mdps_count/len(sat),2) if len(sat) != 0 else "N/A"}') - # print(f'policies: {len(policies)}\tpolicy per SAT MDP: {round(len(policies)/sat_mdps_count,2) if sat_mdps_count != 0 else "N/A"}') - # print(f'iterations: {self.stat.iterations_mdp}') - # print(f'time: {round(self.stat.synthesis_timer.time,2)}s') - - # self.double_check_policy_synthesis(unsat, sat, policies, policy_map, prop) - # exit() - ### - - if False: - self.quotient.build(policy_tree.root.family) - policy_exists,_,_,_ = self.synthesize_policy_for_family(policy_tree.root.family, prop, all_sat=True) - print("Policy exists: ", policy_exists) - return None - - policy_tree_leaves = [policy_tree.root] - while policy_tree_leaves: + undecided_leaves = [policy_tree.root] + while undecided_leaves: # gi = self.stat.iterations_game # if gi is not None and gi > 1000: # return None - policy_tree_node = policy_tree_leaves.pop(-1) + policy_tree_node = undecided_leaves.pop(-1) family = policy_tree_node.family result = self.verify_family(family,game_solver,prop) - policy_tree_node.policy = result.policy - policy_tree_node.policy_source = result.policy_source - if result.policy is not None: self.explore(family) if policy_tree_node != policy_tree.root: - family.mdp = None # memory optimization + family.mdp = None + if result.policy is False: + policy_tree_node.sat = False + else: + policy_tree_node.sat = True + policy_tree_node.policy_index = policy_tree.new_policy(result.policy) continue # refine suboptions,subfamilies = self.split(family, prop, result.hole_selection, result.splitter) if policy_tree_node != policy_tree.root: - family.mdp = None # memory optimization + family.mdp = None policy_tree_node.split(result.splitter,suboptions,subfamilies) - policy_tree_leaves = policy_tree_leaves + policy_tree_node.child_nodes + undecided_leaves += policy_tree_node.child_nodes if SynthesizerPolicyTree.double_check_policy_tree_leaves: policy_tree.double_check(self.quotient, prop) policy_tree.print_stats() - policy_tree.postprocess(self.quotient, prop, self.stat) + + self.stat.num_mdps_total = self.quotient.design_space.size + self.stat.num_mdps_sat = sum([n.family.size for n in policy_tree.collect_sat()]) + self.stat.num_nodes = len(policy_tree.collect_all()) + self.stat.num_policies = len(policy_tree.policies) + policy_tree.postprocess(self.quotient, prop) + policy_tree.print_stats() + self.stat.num_nodes_merged = len(policy_tree.collect_all()) + self.stat.num_policies_merged = len(policy_tree.policies) self.policy_tree = policy_tree # convert policy tree to family evaluation evaluations = [] for node in policy_tree.collect_leaves(): - evaluation = paynt.synthesizer.synthesizer.FamilyEvaluation(node.family,None,node.solved,policy=node.policy) + policy = policy_tree.policies[node.policy_index] if node.sat else None + evaluation = paynt.synthesizer.synthesizer.FamilyEvaluation(node.family,None,node.sat,policy=policy) evaluations.append(evaluation) return evaluations diff --git a/paynt/synthesizer/statistic.py b/paynt/synthesizer/statistic.py index 66a002429..ad5ed3075 100644 --- a/paynt/synthesizer/statistic.py +++ b/paynt/synthesizer/statistic.py @@ -52,9 +52,10 @@ def __init__(self, synthesizer): # MDP family self.num_mdps_total = None self.num_mdps_sat = None + self.num_tree_nodes = None + self.num_tree_nodes_merged = None self.num_policies = None self.num_policies_merged = None - self.num_policies_yes = None self.family_size = None self.synthesis_timer = paynt.utils.profiler.Timer() @@ -239,36 +240,39 @@ def get_summary(self): def print(self): print(self.get_summary(),end="") + # self.print_mdp_family_table_entries() def print_mdp_family_table_entries(self): model_info = "model info:\t" - model_info += "\t".join(["states","actions","MDPs","SAT MDPs","SAT %",]) + model_info += "\t".join(["states","choices","MDPs","states*MDPs","SAT MDPs","SAT %",]) print(model_info) print("\t\t",end="") print(self.quotient.quotient_mdp.nr_states,end="\t") print(self.quotient.quotient_mdp.nr_choices,end="\t") print(self.num_mdps_total,end="\t") + print(self.quotient.quotient_mdp.nr_states*self.num_mdps_total,end="\t") print(self.num_mdps_sat,end="\t") sat_by_total_percentage = round(self.num_mdps_sat/self.num_mdps_total*100,0) print(sat_by_total_percentage) synt_stats_header = "synt info:\t" - synt_stats_header += "\t".join(["policies","merged","merged/SAT %","yes nodes","time","game iters","MDP iters", "iters/MDPs"]) + synt_stats_header += "\t".join(["time","nodes","nodes (merged)","policies","policies (merged)","policies(merged) / SAT %","game iters","MDP iters", "iters/MDPs"]) print(synt_stats_header) print("\t\t",end="") + synthesis_timer = int(self.synthesis_timer.time) + print(synthesis_timer,end="\t") + print(self.num_nodes,end="\t") + print(self.num_nodes_merged,end="\t") print(self.num_policies,end="\t") print(self.num_policies_merged,end="\t") merged_by_sat_percentage = "N/A" if self.num_mdps_sat > 0: merged_by_sat_percentage = round(self.num_policies_merged/self.num_mdps_sat*100,1) print(merged_by_sat_percentage,end="\t") - print(self.num_policies_yes,end="\t") - synthesis_timer = round(self.synthesis_timer.time,0) - print(synthesis_timer,end="\t") print(self.iterations_game,end="\t") print(self.iterations_mdp,end="\t") - iters_by_mdp = round((self.iterations_game+self.iterations_mdp)/self.num_mdps_total,2) + iters_by_mdp = round((self.iterations_game+self.iterations_mdp)/self.num_mdps_total,1) print(iters_by_mdp) print()