model-free SMT coloring

randriu · Dec 5, 2024 · 66968d5 · 66968d5
1 parent a4dfd23
commit 66968d5
Show file tree

Hide file tree

Showing 7 changed files with 197 additions and 146 deletions.
diff --git a/paynt/quotient/mdp.py b/paynt/quotient/mdp.py
@@ -325,20 +325,23 @@ def scheduler_json_to_choices(self, scheduler_json):
         choices = self.state_to_choice_to_choices(state_to_choice)
         return choices
 
-    def reset_tree(self, depth, disable_counterexamples=False):
+    def reset_tree(self, depth, enable_harmonization=True):
         '''
         Rebuild the decision tree template, the design space and the coloring.
         '''
         logger.debug(f"building tree of depth {depth}")
         self.decision_tree = DecisionTree(self,self.variables,self.state_valuations)
         self.decision_tree.set_depth(depth)
 
-        # logger.debug("building coloring...")
         variables = self.decision_tree.variables
         variable_name = [v.name for v in variables]
         variable_domain = [v.domain for v in variables]
         tree_list = self.decision_tree.to_list()
-        self.coloring = payntbind.synthesis.ColoringSmt(self.quotient_mdp, variable_name, variable_domain, tree_list, disable_counterexamples)
+        self.coloring = payntbind.synthesis.ColoringSmt(
+            self.quotient_mdp.nondeterministic_choice_indices, self.choice_to_action, self.quotient_mdp.state_valuations,
+            variable_name, variable_domain, tree_list, enable_harmonization
+        )
+        self.coloring.enableStateExploration(self.quotient_mdp)
 
         # reconstruct the family
         hole_info = self.coloring.getFamilyInfo()
@@ -398,10 +401,7 @@ def build(self, family):
     def are_choices_consistent(self, choices, family):
         ''' Separate method for profiling purposes. '''
         return self.coloring.areChoicesConsistent(choices, family.family)
-        # if family.parent_info is None:
-        #     return self.coloring.areChoicesConsistent(choices, family.family)
-        # else:
-        #     return self.coloring.areChoicesConsistentUseHint(choices, family.family, family.parent_info.unsat_core_hint)
+
 
     def scheduler_is_consistent(self, mdp, prop, result):
         ''' Get hole options involved in the scheduler selection. '''
@@ -484,7 +484,7 @@ def split(self, family):
         parent_info = family.collect_parent_info(self.specification)
         parent_info.analysis_result = family.analysis_result
         parent_info.scheduler_choices = family.scheduler_choices
-        parent_info.unsat_core_hint = self.coloring.unsat_core.copy()
+        # parent_info.unsat_core_hint = self.coloring.unsat_core.copy()
         subfamilies = family.split(splitter,suboptions)
         for subfamily in subfamilies:
             subfamily.add_parent_info(parent_info)

diff --git a/paynt/synthesizer/decision_tree.py b/paynt/synthesizer/decision_tree.py
@@ -185,35 +185,20 @@ def synthesize_tree_sequence(self, opt_result_value):
             if self.resource_limit_reached():
                 break
 
-    def map_scheduler(self, scheduler_choices, opt_result_value):
-        # use counterexamples iff a dont' care action exists
-        disable_counterexamples = "__random__" not in self.quotient.action_labels
+    def map_scheduler(self, scheduler_choices):
         self.counters_reset()
         for depth in range(SynthesizerDecisionTree.tree_depth+1):
-            self.quotient.reset_tree(depth,disable_counterexamples=disable_counterexamples)
-            family = self.quotient.family
-            self.quotient.build(family)
+            self.quotient.reset_tree(depth,enable_harmonization=False)
+            family = self.quotient.family.copy()
             family.analysis_result = self.quotient.build_unsat_result()
-            best_assignment_old = self.best_assignment
-
+            self.quotient.build(family)
             consistent,hole_selection = self.quotient.are_choices_consistent(scheduler_choices, family)
             if consistent:
                 self.verify_hole_selection(family,hole_selection)
-            elif not disable_counterexamples:
-                harmonizing_hole = [hole for hole,options in enumerate(hole_selection) if len(options)>1][0]
-                selection_1 = hole_selection.copy(); selection_1[harmonizing_hole] = [selection_1[harmonizing_hole][0]]
-                selection_2 = hole_selection.copy(); selection_2[harmonizing_hole] = [selection_2[harmonizing_hole][1]]
-                for selection in [selection_1,selection_2]:
-                    self.verify_hole_selection(family,selection)
-
-            new_assignment_synthesized = self.best_assignment != best_assignment_old
-            if new_assignment_synthesized:
-                self.best_tree = self.quotient.decision_tree
-                self.best_tree.root.associate_assignment(self.best_assignment)
-                self.best_tree_value = self.best_assignment_value
-                if consistent:
-                    break
-                if not disable_counterexamples and abs( (self.best_assignment_value-opt_result_value)/opt_result_value ) < 1e-4:
+                if self.best_assignment is not None:
+                    self.best_tree = self.quotient.decision_tree
+                    self.best_tree.root.associate_assignment(self.best_assignment)
+                    self.best_tree_value = self.best_assignment_value
                     break
 
             if self.resource_limit_reached():
@@ -240,7 +225,7 @@ def run(self, optimum_threshold=None):
         self.best_assignment = self.best_assignment_value = None
         self.best_tree = self.best_tree_value = None
         if scheduler_choices is not None:
-            self.map_scheduler(scheduler_choices, opt_result_value)
+            self.map_scheduler(scheduler_choices)
         else:
             if self.quotient.specification.has_optimality:
                 epsilon = 1e-1

diff --git a/paynt/verification/property.py b/paynt/verification/property.py
@@ -149,7 +149,9 @@ def reward(self):
 
     @property
     def is_discounted_reward(self):
-        return self.formula.is_reward_operator and self.formula.subformula.is_discounted_total_reward_formula
+        # TODO add discounted reward as a type to Stormpy formula
+        # return self.formula.is_reward_operator and self.formula.subformula.is_discounted_total_reward_formula
+        return self.formula.is_reward_operator and "discount" in str(self.formula.subformula)
 
     @property
     def maximizing(self):