DT synthesis: map scheduler to a decision tree

randriu · Sep 27, 2024 · f4cd5f7 · f4cd5f7
1 parent bf13409
commit f4cd5f7
Show file tree

Hide file tree

Showing 9 changed files with 160 additions and 63 deletions.
diff --git a/.github/workflows/buildtest.yml b/.github/workflows/buildtest.yml
@@ -50,14 +50,14 @@ jobs:
     strategy:
       matrix:
         buildType:
-          - {name: "Release", imageName : "randriu/paynt", dockerTag: "mdp", setupArgs: ""}
+          - {name: "Release", imageName : "randriu/paynt", dockerTag: "mdp", baseImage : "randriu/stormpy-alex", setupArgs: "--disable-smg"}
       fail-fast: false
     if: github.ref == 'refs/heads/mdp'
     steps:
       - name: Git clone
         uses: actions/checkout@v4
       - name: Build paynt image from Dockerfile
-        run: docker build -t ${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} . --build-arg setup_args=${{ matrix.buildType.setupArgs }} --build-arg setup_args_pycarl=${{ matrix.buildType.setupArgs }} --build-arg no_threads=${NR_JOBS}
+        run: docker build -t ${{ matrix.buildType.imageName }}:${{matrix.buildType.dockerTag}} . --build-arg base_image=${{matrix.buildType.baseImage}} --build-arg setup_args=${{ matrix.buildType.setupArgs }} --build-arg no_threads=${NR_JOBS}
       - name: Login into docker
         # Only login if using mdp on original repo (and not for pull requests or forks)
         if: github.repository_owner == 'randriu'

diff --git a/Dockerfile b/Dockerfile
@@ -1,8 +1,13 @@
-FROM movesrwth/stormpy:ci
+# usage: docker build -t yourusername/paynt .
+# command-line arguments can be specified e.g. as follows: --build-arg no_threads=8
 
-# Additional arguments for compiling payntbind
+# base image
+ARG base_image=movesrwth/stormpy:ci
+FROM $base_image
+
+# additional arguments for compiling payntbind
 ARG setup_args=""
-# Number of threads to use for parallel compilation
+# number of threads to use for parallel compilation
 ARG no_threads=2
 
 WORKDIR /opt/

diff --git a/paynt/cli.py b/paynt/cli.py
@@ -126,6 +126,10 @@ def setup_logger(log_path = None):
     help="decision tree synthesis: tree depth")
 @click.option("--tree-enumeration", is_flag=True, default=False,
     help="decision tree synthesis: if set, all trees of size at most tree_depth will be enumerated")
+@click.option("--tree-map-scheduler", type=click.Path(), default=None,
+    help="decision tree synthesis: path to a scheduler to be mapped to a decision tree")
+@click.option("--add-dont-care-action", is_flag=True, default=False,
+    help="decision tree synthesis: # if set, an explicit action executing a random choice of an available action will be added to each state")
 
 @click.option(
     "--constraint-bound", type=click.FLOAT, help="bound for creating constrained POMDP for Cassandra models",
@@ -148,7 +152,7 @@ def paynt_run(
     use_storm_cutoffs, unfold_strategy_storm,
     export_fsc_storm, export_fsc_paynt, export_synthesis,
     mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction,
-    tree_depth, tree_enumeration,
+    tree_depth, tree_enumeration, tree_map_scheduler, add_dont_care_action,
     constraint_bound,
     ce_generator,
     profiling
@@ -176,6 +180,8 @@ def paynt_run(
 
     paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_depth = tree_depth
     paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_enumeration = tree_enumeration
+    paynt.synthesizer.decision_tree.SynthesizerDecisionTree.scheduler_path = tree_map_scheduler
+    paynt.quotient.mdp.MdpQuotient.add_dont_care_action = add_dont_care_action
 
     storm_control = None
     if storm_pomdp:

diff --git a/paynt/quotient/mdp.py b/paynt/quotient/mdp.py
@@ -188,13 +188,10 @@ def to_graphviz(self, graphviz_tree, variables, action_labels):
 
 class DecisionTree:
 
-    def __init__(self, quotient, variable_name, state_valuations):
+    def __init__(self, quotient, variables, state_valuations):
         self.quotient = quotient
         self.state_valuations = state_valuations
-        variables = [Variable(var,var_name,state_valuations) for var,var_name in enumerate(variable_name)]
-        variables = [v for v in variables if len(v.domain) > 1]
         self.variables = variables
-        logger.debug(f"found the following {len(self.variables)} variables: {[str(v) for v in self.variables]}")
         self.reset()
 
     def reset(self):
@@ -254,18 +251,22 @@ def to_graphviz(self):
 
 class MdpQuotient(paynt.quotient.quotient.Quotient):
 
+    # if true, an explicit action executing a random choice of an available action will be added to each state
+    add_dont_care_action = False
+
     def __init__(self, mdp, specification):
         super().__init__(specification=specification)
         updated = payntbind.synthesis.restoreActionsInAbsorbingStates(mdp)
         if updated is not None: mdp = updated
         action_labels,_ = payntbind.synthesis.extractActionLabels(mdp)
-        if "__random__" not in action_labels:
+        if "__random__" not in action_labels and MdpQuotient.add_dont_care_action:
             logger.debug("adding explicit don't-care action to every state...")
             mdp = payntbind.synthesis.addDontCareAction(mdp)
 
         self.quotient_mdp = mdp
         self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(mdp)
         self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp)
+        logger.info(f"MDP has {len(self.action_labels)} actions")
 
         assert mdp.has_state_valuations(), "model has no state valuations"
         sv = mdp.state_valuations
@@ -276,28 +277,67 @@ def __init__(self, mdp, specification):
             valuation = json.loads(str(sv.get_json(state)))
             valuation = [valuation[var_name] for var_name in variable_name]
             state_valuations.append(valuation)
-        self.decision_tree = DecisionTree(self,variable_name,state_valuations)
+        variables = [Variable(var,var_name,state_valuations) for var,var_name in enumerate(variable_name)]
+        variable_mask = [len(v.domain) > 1 for v in variables]
+        variables = [v for index,v in enumerate(variables) if variable_mask[index]]
+        for state,valuation in enumerate(state_valuations):
+            state_valuations[state] = [value for index,value in enumerate(valuation) if variable_mask[index]]
+        self.variables = variables
+        self.state_valuations = state_valuations
+        logger.debug(f"found the following {len(self.variables)} variables: {[str(v) for v in self.variables]}")
 
+        self.decision_tree = None
         self.coloring = None
         self.family = None
         self.splitter_count = None
 
-    def decide(self, node, var_name):
-        node.set_variable_by_name(var_name,self.decision_tree)
+    def state_valuation_to_state(self, valuation):
+        valuation = [valuation[v.name] for v in self.variables]
+        for state,state_valuation in enumerate(self.state_valuations):
+            if valuation == state_valuation:
+                return state
+        else:
+            assert False, "state valuation not found"
+
+    def scheduler_json_to_choices(self, scheduler_json):
+        ndi = self.quotient_mdp.nondeterministic_choice_indices.copy()
+        assert self.quotient_mdp.nr_states == len(scheduler_json)
+        state_to_choice = self.empty_scheduler()
+        for state_decision in scheduler_json:
+            state = self.state_valuation_to_state(state_decision["s"])
+            actions = state_decision["c"]
+            assert len(actions) == 1
+            action_labels = actions[0]["labels"]
+            assert len(action_labels) <= 1
+            if len(action_labels) == 0:
+                state_to_choice[state] = ndi[state]
+                continue
+            action = self.action_labels.index(action_labels[0])
+            # find a choice that executes this action
+            for choice in range(ndi[state],ndi[state+1]):
+                if self.choice_to_action[choice] == action:
+                    state_to_choice[state] = choice
+                    break
+            else:
+                assert False, "action is not available in the state"
+        state_to_choice = self.discard_unreachable_choices(state_to_choice)
+        choices = self.state_to_choice_to_choices(state_to_choice)
+        return choices
 
-    '''
-    Build the design space and coloring corresponding to the current decision tree.
-    '''
-    def set_depth(self, depth):
-        logger.debug(f"synthesizing tree of depth {depth}")
+    def reset_tree(self, depth, disable_counterexamples=False):
+        '''
+        Rebuild the decision tree template, the design space and the coloring.
+        '''
+        logger.debug(f"building tree of depth {depth}")
+        self.decision_tree = DecisionTree(self,self.variables,self.state_valuations)
         self.decision_tree.set_depth(depth)
 
         # logger.debug("building coloring...")
         variables = self.decision_tree.variables
         variable_name = [v.name for v in variables]
         variable_domain = [v.domain for v in variables]
         tree_list = self.decision_tree.to_list()
-        self.coloring = payntbind.synthesis.ColoringSmt(self.quotient_mdp, variable_name, variable_domain, tree_list, False)
+        self.coloring = payntbind.synthesis.ColoringSmt(self.quotient_mdp, variable_name, variable_domain, tree_list, disable_counterexamples)
 
         # reconstruct the family
         hole_info = self.coloring.getFamilyInfo()

diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
@@ -116,12 +116,12 @@ def discard_unreachable_choices(self, state_to_choice):
                     state_queue.append(dst)
         return state_to_choice_reachable
 
-    def scheduler_to_state_to_choice(self, mdp, scheduler, discard_unreachable_choices=True):
-        state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, mdp.model, mdp.quotient_choice_map)
+    def scheduler_to_state_to_choice(self, submdp, scheduler, discard_unreachable_choices=True):
+        state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, submdp.model, submdp.quotient_choice_map)
         state_to_choice = self.empty_scheduler()
-        for state in range(mdp.model.nr_states):
+        for state in range(submdp.model.nr_states):
             quotient_choice = state_to_quotient_choice[state]
-            quotient_state = mdp.quotient_state_map[state]
+            quotient_state = submdp.quotient_state_map[state]
             state_to_choice[quotient_state] = quotient_choice
         if discard_unreachable_choices:
             state_to_choice = self.discard_unreachable_choices(state_to_choice)

diff --git a/paynt/synthesizer/decision_tree.py b/paynt/synthesizer/decision_tree.py
@@ -5,6 +5,8 @@
 import stormpy
 import payntbind
 
+import json
+
 import logging
 logger = logging.getLogger(__name__)
 
@@ -14,8 +16,8 @@ class SynthesizerDecisionTree(paynt.synthesizer.synthesizer_ar.SynthesizerAR):
     tree_depth = 0
     # if set, all trees of size at most tree_depth will be enumerated
     tree_enumeration = False
-    # if set, the optimal k-tree will be used to jumpstart the synthesis of the (k+1)-tree
-    use_tree_hint = True
+    # path to a scheduler to be mapped to a decision tree
+    scheduler_path = None
 
     def __init__(self, *args):
         super().__init__(*args)
@@ -118,7 +120,7 @@ def export_decision_tree(self, decision_tree, export_filename_base):
 
     def synthesize_tree(self, depth:int):
         self.counters_reset()
-        self.quotient.set_depth(depth)
+        self.quotient.reset_tree(depth)
         self.best_assignment = self.best_assignment_value = None
         self.synthesize(keep_optimum=True)
         if self.best_assignment is not None:
@@ -135,7 +137,8 @@ def synthesize_tree_sequence(self, opt_result_value):
         if global_timeout is None: global_timeout = 1800
         depth_timeout = global_timeout / 2 / SynthesizerDecisionTree.tree_depth
         for depth in range(SynthesizerDecisionTree.tree_depth+1):
-            self.quotient.set_depth(depth)
+            print()
+            self.quotient.reset_tree(depth)
             best_assignment_old = self.best_assignment
 
             family = self.quotient.family
@@ -148,9 +151,9 @@ def synthesize_tree_sequence(self, opt_result_value):
             self.synthesis_timer.start()
             families = [family]
 
-            if SynthesizerDecisionTree.use_tree_hint and self.best_tree is not None:
+            if self.best_tree is not None:
                 subfamily = family.copy()
-                self.quotient.decision_tree.root.apply_hint(subfamily,self.best_tree)
+                self.quotient.decision_tree.root.apply_hint(subfamily,self.best_tree.root)
                 families = [subfamily,family]
 
             for family in families:
@@ -170,51 +173,99 @@ def synthesize_tree_sequence(self, opt_result_value):
                     result = dtmc.check_specification(self.quotient.specification)
                     logger.info(f"double-checking specification satisfiability: {result}")
 
+                self.best_tree = self.quotient.decision_tree
+                self.best_tree.root.associate_assignment(self.best_assignment)
+                self.best_tree_value = self.best_assignment_value
+
                 if abs( (self.best_assignment_value-opt_result_value)/opt_result_value ) < 1e-3:
                     break
 
-                self.best_tree = self.quotient.decision_tree.root
-                self.best_tree.associate_assignment(self.best_assignment)
+            if self.resource_limit_reached():
+                break
+
+    def map_scheduler(self, scheduler_choices, opt_result_value):
+        # use counterexamples iff a dont' care action exists
+        disable_counterexamples = "__random__" not in self.quotient.action_labels
+        self.counters_reset()
+        for depth in range(SynthesizerDecisionTree.tree_depth+1):
+            self.quotient.reset_tree(depth,disable_counterexamples=disable_counterexamples)
+            family = self.quotient.family
+            self.quotient.build(family)
+            family.analysis_result = self.quotient.build_unsat_result()
+            best_assignment_old = self.best_assignment
+
+            consistent,hole_selection = self.quotient.are_choices_consistent(scheduler_choices, family)
+            if consistent:
+                self.verify_hole_selection(family,hole_selection)
+            elif not disable_counterexamples:
+                harmonizing_hole = [hole for hole,options in enumerate(hole_selection) if len(options)>1][0]
+                selection_1 = hole_selection.copy(); selection_1[harmonizing_hole] = [selection_1[harmonizing_hole][0]]
+                selection_2 = hole_selection.copy(); selection_2[harmonizing_hole] = [selection_2[harmonizing_hole][1]]
+                for selection in [selection_1,selection_2]:
+                    self.verify_hole_selection(family,selection)
+
+            new_assignment_synthesized = self.best_assignment != best_assignment_old
+            if new_assignment_synthesized:
+                self.best_tree = self.quotient.decision_tree
+                self.best_tree.root.associate_assignment(self.best_assignment)
                 self.best_tree_value = self.best_assignment_value
+                if consistent:
+                    break
+                if not disable_counterexamples and abs( (self.best_assignment_value-opt_result_value)/opt_result_value ) < 1e-4:
+                    break
 
             if self.resource_limit_reached():
                 break
 
+        # self.counters_print()
 
     def run(self, optimum_threshold=None):
-        paynt_mdp = paynt.models.models.Mdp(self.quotient.quotient_mdp)
-        mc_result = paynt_mdp.model_check_property(self.quotient.get_property())
+
+        scheduler_choices = None
+        if SynthesizerDecisionTree.scheduler_path is None:
+            paynt_mdp = paynt.models.models.Mdp(self.quotient.quotient_mdp)
+            mc_result = paynt_mdp.model_check_property(self.quotient.get_property())
+        else:
+            opt_result_value = None
+            with open(SynthesizerDecisionTree.scheduler_path, 'r') as f:
+                scheduler_json = json.load(f)
+            scheduler_choices = self.quotient.scheduler_json_to_choices(scheduler_json)
+            submdp = self.quotient.build_from_choice_mask(scheduler_choices)
+            mc_result = submdp.model_check_property(self.quotient.get_property())
         opt_result_value = mc_result.value
         logger.info(f"the optimal scheduler has value: {opt_result_value}")
 
-        if self.quotient.specification.has_optimality:
-            epsilon = 1e-1
-            mc_result_positive = opt_result_value > 0
-            if self.quotient.specification.optimality.maximizing == mc_result_positive:
-                epsilon *= -1
-            # optimum_threshold = opt_result_value * (1 + epsilon)
-        self.set_optimality_threshold(optimum_threshold)
-
-        self.best_tree = None
-        self.best_tree_value = None
-        if not SynthesizerDecisionTree.tree_enumeration:
-            self.synthesize_tree(SynthesizerDecisionTree.tree_depth)
+        self.best_assignment = self.best_assignment_value = None
+        self.best_tree = self.best_tree_value = None
+        if scheduler_choices is not None:
+            self.map_scheduler(scheduler_choices, opt_result_value)
         else:
-            self.synthesize_tree_sequence(opt_result_value)
+            if self.quotient.specification.has_optimality:
+                epsilon = 1e-1
+                mc_result_positive = opt_result_value > 0
+                if self.quotient.specification.optimality.maximizing == mc_result_positive:
+                    epsilon *= -1
+                # optimum_threshold = opt_result_value * (1 + epsilon)
+            self.set_optimality_threshold(optimum_threshold)
+
+            if not SynthesizerDecisionTree.tree_enumeration:
+                self.synthesize_tree(SynthesizerDecisionTree.tree_depth)
+            else:
+                self.synthesize_tree_sequence(opt_result_value)
 
         logger.info(f"the optimal scheduler has value: {opt_result_value}")
         if self.best_tree is None:
             logger.info("no admissible tree found")
         else:
             self.best_tree.simplify()
-            logger.info(f"printing synthesized tree below:")
+            logger.info(f"printing the synthesized tree below:")
             print(self.best_tree.to_string())
 
             depth = self.best_tree.get_depth()
             if self.quotient.specification.has_optimality:
-                logger.info(f"synthesized tree has value {self.best_tree_value}")
+                logger.info(f"the synthesized tree has value {self.best_tree_value}")
             num_nodes = len(self.best_tree.collect_nonterminals())
-            logger.info(f"synthesized tree is of depth {depth} and has {num_nodes} decision nodes")
+            logger.info(f"the synthesized tree is of depth {depth} and has {num_nodes} decision nodes")
             if self.export_synthesis_filename_base is not None:
                 self.export_decision_tree(self.best_tree, self.export_synthesis_filename_base)
         time_total = paynt.utils.timer.GlobalTimer.read()

diff --git a/paynt/verification/property.py b/paynt/verification/property.py
@@ -50,10 +50,10 @@ def initialize(cls):
         # se.set_linear_equation_solver_type(stormpy.EquationSolverType.eigen)
 
         # se.minmax_solver_environment.method = stormpy.MinMaxMethod.policy_iteration
-        se.minmax_solver_environment.method = stormpy.MinMaxMethod.value_iteration
+        # se.minmax_solver_environment.method = stormpy.MinMaxMethod.value_iteration
         # se.minmax_solver_environment.method = stormpy.MinMaxMethod.sound_value_iteration
         # se.minmax_solver_environment.method = stormpy.MinMaxMethod.interval_iteration
-        # se.minmax_solver_environment.method = stormpy.MinMaxMethod.optimistic_value_iteration
+        se.minmax_solver_environment.method = stormpy.MinMaxMethod.optimistic_value_iteration
         # se.minmax_solver_environment.method = stormpy.MinMaxMethod.topological
 
     @classmethod