From f4cd5f75630e8e1a8b6ed909a512194dfbc3279c Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 27 Sep 2024 17:22:48 +0200 Subject: [PATCH] DT synthesis: map scheduler to a decision tree --- .github/workflows/buildtest.yml | 4 +- Dockerfile | 11 +- paynt/cli.py | 8 +- paynt/quotient/mdp.py | 68 +++++++++--- paynt/quotient/quotient.py | 8 +- paynt/synthesizer/decision_tree.py | 103 +++++++++++++----- paynt/verification/property.py | 4 +- .../src/synthesis/quotient/ColoringSmt.cpp | 13 +-- .../src/synthesis/quotient/ColoringSmt.h | 4 +- 9 files changed, 160 insertions(+), 63 deletions(-) diff --git a/.github/workflows/buildtest.yml b/.github/workflows/buildtest.yml index c8eae798..9e3f2d9c 100644 --- a/.github/workflows/buildtest.yml +++ b/.github/workflows/buildtest.yml @@ -50,14 +50,14 @@ jobs: strategy: matrix: buildType: - - {name: "Release", imageName : "randriu/paynt", dockerTag: "mdp", setupArgs: ""} + - {name: "Release", imageName : "randriu/paynt", dockerTag: "mdp", baseImage : "randriu/stormpy-alex", setupArgs: "--disable-smg"} fail-fast: false if: github.ref == 'refs/heads/mdp' steps: - name: Git clone uses: actions/checkout@v4 - name: Build paynt image from Dockerfile - run: docker build -t ${{ matrix.buildType.imageName }}:${{ matrix.buildType.dockerTag }} . --build-arg setup_args=${{ matrix.buildType.setupArgs }} --build-arg setup_args_pycarl=${{ matrix.buildType.setupArgs }} --build-arg no_threads=${NR_JOBS} + run: docker build -t ${{ matrix.buildType.imageName }}:${{matrix.buildType.dockerTag}} . --build-arg base_image=${{matrix.buildType.baseImage}} --build-arg setup_args=${{ matrix.buildType.setupArgs }} --build-arg no_threads=${NR_JOBS} - name: Login into docker # Only login if using mdp on original repo (and not for pull requests or forks) if: github.repository_owner == 'randriu' diff --git a/Dockerfile b/Dockerfile index 8745bb72..548a32e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,13 @@ -FROM movesrwth/stormpy:ci +# usage: docker build -t yourusername/paynt . +# command-line arguments can be specified e.g. as follows: --build-arg no_threads=8 -# Additional arguments for compiling payntbind +# base image +ARG base_image=movesrwth/stormpy:ci +FROM $base_image + +# additional arguments for compiling payntbind ARG setup_args="" -# Number of threads to use for parallel compilation +# number of threads to use for parallel compilation ARG no_threads=2 WORKDIR /opt/ diff --git a/paynt/cli.py b/paynt/cli.py index 9711cda1..75ddc79d 100644 --- a/paynt/cli.py +++ b/paynt/cli.py @@ -126,6 +126,10 @@ def setup_logger(log_path = None): help="decision tree synthesis: tree depth") @click.option("--tree-enumeration", is_flag=True, default=False, help="decision tree synthesis: if set, all trees of size at most tree_depth will be enumerated") +@click.option("--tree-map-scheduler", type=click.Path(), default=None, + help="decision tree synthesis: path to a scheduler to be mapped to a decision tree") +@click.option("--add-dont-care-action", is_flag=True, default=False, + help="decision tree synthesis: # if set, an explicit action executing a random choice of an available action will be added to each state") @click.option( "--constraint-bound", type=click.FLOAT, help="bound for creating constrained POMDP for Cassandra models", @@ -148,7 +152,7 @@ def paynt_run( use_storm_cutoffs, unfold_strategy_storm, export_fsc_storm, export_fsc_paynt, export_synthesis, mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction, - tree_depth, tree_enumeration, + tree_depth, tree_enumeration, tree_map_scheduler, add_dont_care_action, constraint_bound, ce_generator, profiling @@ -176,6 +180,8 @@ def paynt_run( paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_depth = tree_depth paynt.synthesizer.decision_tree.SynthesizerDecisionTree.tree_enumeration = tree_enumeration + paynt.synthesizer.decision_tree.SynthesizerDecisionTree.scheduler_path = tree_map_scheduler + paynt.quotient.mdp.MdpQuotient.add_dont_care_action = add_dont_care_action storm_control = None if storm_pomdp: diff --git a/paynt/quotient/mdp.py b/paynt/quotient/mdp.py index 13de30bf..5b58f04d 100644 --- a/paynt/quotient/mdp.py +++ b/paynt/quotient/mdp.py @@ -188,13 +188,10 @@ def to_graphviz(self, graphviz_tree, variables, action_labels): class DecisionTree: - def __init__(self, quotient, variable_name, state_valuations): + def __init__(self, quotient, variables, state_valuations): self.quotient = quotient self.state_valuations = state_valuations - variables = [Variable(var,var_name,state_valuations) for var,var_name in enumerate(variable_name)] - variables = [v for v in variables if len(v.domain) > 1] self.variables = variables - logger.debug(f"found the following {len(self.variables)} variables: {[str(v) for v in self.variables]}") self.reset() def reset(self): @@ -254,18 +251,22 @@ def to_graphviz(self): class MdpQuotient(paynt.quotient.quotient.Quotient): + # if true, an explicit action executing a random choice of an available action will be added to each state + add_dont_care_action = False + def __init__(self, mdp, specification): super().__init__(specification=specification) updated = payntbind.synthesis.restoreActionsInAbsorbingStates(mdp) if updated is not None: mdp = updated action_labels,_ = payntbind.synthesis.extractActionLabels(mdp) - if "__random__" not in action_labels: + if "__random__" not in action_labels and MdpQuotient.add_dont_care_action: logger.debug("adding explicit don't-care action to every state...") mdp = payntbind.synthesis.addDontCareAction(mdp) self.quotient_mdp = mdp self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(mdp) self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp) + logger.info(f"MDP has {len(self.action_labels)} actions") assert mdp.has_state_valuations(), "model has no state valuations" sv = mdp.state_valuations @@ -276,20 +277,59 @@ def __init__(self, mdp, specification): valuation = json.loads(str(sv.get_json(state))) valuation = [valuation[var_name] for var_name in variable_name] state_valuations.append(valuation) - self.decision_tree = DecisionTree(self,variable_name,state_valuations) + variables = [Variable(var,var_name,state_valuations) for var,var_name in enumerate(variable_name)] + variable_mask = [len(v.domain) > 1 for v in variables] + variables = [v for index,v in enumerate(variables) if variable_mask[index]] + for state,valuation in enumerate(state_valuations): + state_valuations[state] = [value for index,value in enumerate(valuation) if variable_mask[index]] + self.variables = variables + self.state_valuations = state_valuations + logger.debug(f"found the following {len(self.variables)} variables: {[str(v) for v in self.variables]}") + self.decision_tree = None self.coloring = None self.family = None self.splitter_count = None - def decide(self, node, var_name): - node.set_variable_by_name(var_name,self.decision_tree) + def state_valuation_to_state(self, valuation): + valuation = [valuation[v.name] for v in self.variables] + for state,state_valuation in enumerate(self.state_valuations): + if valuation == state_valuation: + return state + else: + assert False, "state valuation not found" + + def scheduler_json_to_choices(self, scheduler_json): + ndi = self.quotient_mdp.nondeterministic_choice_indices.copy() + assert self.quotient_mdp.nr_states == len(scheduler_json) + state_to_choice = self.empty_scheduler() + for state_decision in scheduler_json: + state = self.state_valuation_to_state(state_decision["s"]) + actions = state_decision["c"] + assert len(actions) == 1 + action_labels = actions[0]["labels"] + assert len(action_labels) <= 1 + if len(action_labels) == 0: + state_to_choice[state] = ndi[state] + continue + action = self.action_labels.index(action_labels[0]) + # find a choice that executes this action + for choice in range(ndi[state],ndi[state+1]): + if self.choice_to_action[choice] == action: + state_to_choice[state] = choice + break + else: + assert False, "action is not available in the state" + state_to_choice = self.discard_unreachable_choices(state_to_choice) + choices = self.state_to_choice_to_choices(state_to_choice) + return choices - ''' - Build the design space and coloring corresponding to the current decision tree. - ''' - def set_depth(self, depth): - logger.debug(f"synthesizing tree of depth {depth}") + def reset_tree(self, depth, disable_counterexamples=False): + ''' + Rebuild the decision tree template, the design space and the coloring. + ''' + logger.debug(f"building tree of depth {depth}") + self.decision_tree = DecisionTree(self,self.variables,self.state_valuations) self.decision_tree.set_depth(depth) # logger.debug("building coloring...") @@ -297,7 +337,7 @@ def set_depth(self, depth): variable_name = [v.name for v in variables] variable_domain = [v.domain for v in variables] tree_list = self.decision_tree.to_list() - self.coloring = payntbind.synthesis.ColoringSmt(self.quotient_mdp, variable_name, variable_domain, tree_list, False) + self.coloring = payntbind.synthesis.ColoringSmt(self.quotient_mdp, variable_name, variable_domain, tree_list, disable_counterexamples) # reconstruct the family hole_info = self.coloring.getFamilyInfo() diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 37032286..8f22931f 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -116,12 +116,12 @@ def discard_unreachable_choices(self, state_to_choice): state_queue.append(dst) return state_to_choice_reachable - def scheduler_to_state_to_choice(self, mdp, scheduler, discard_unreachable_choices=True): - state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, mdp.model, mdp.quotient_choice_map) + def scheduler_to_state_to_choice(self, submdp, scheduler, discard_unreachable_choices=True): + state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, submdp.model, submdp.quotient_choice_map) state_to_choice = self.empty_scheduler() - for state in range(mdp.model.nr_states): + for state in range(submdp.model.nr_states): quotient_choice = state_to_quotient_choice[state] - quotient_state = mdp.quotient_state_map[state] + quotient_state = submdp.quotient_state_map[state] state_to_choice[quotient_state] = quotient_choice if discard_unreachable_choices: state_to_choice = self.discard_unreachable_choices(state_to_choice) diff --git a/paynt/synthesizer/decision_tree.py b/paynt/synthesizer/decision_tree.py index 55fee6df..03c48357 100644 --- a/paynt/synthesizer/decision_tree.py +++ b/paynt/synthesizer/decision_tree.py @@ -5,6 +5,8 @@ import stormpy import payntbind +import json + import logging logger = logging.getLogger(__name__) @@ -14,8 +16,8 @@ class SynthesizerDecisionTree(paynt.synthesizer.synthesizer_ar.SynthesizerAR): tree_depth = 0 # if set, all trees of size at most tree_depth will be enumerated tree_enumeration = False - # if set, the optimal k-tree will be used to jumpstart the synthesis of the (k+1)-tree - use_tree_hint = True + # path to a scheduler to be mapped to a decision tree + scheduler_path = None def __init__(self, *args): super().__init__(*args) @@ -118,7 +120,7 @@ def export_decision_tree(self, decision_tree, export_filename_base): def synthesize_tree(self, depth:int): self.counters_reset() - self.quotient.set_depth(depth) + self.quotient.reset_tree(depth) self.best_assignment = self.best_assignment_value = None self.synthesize(keep_optimum=True) if self.best_assignment is not None: @@ -135,7 +137,8 @@ def synthesize_tree_sequence(self, opt_result_value): if global_timeout is None: global_timeout = 1800 depth_timeout = global_timeout / 2 / SynthesizerDecisionTree.tree_depth for depth in range(SynthesizerDecisionTree.tree_depth+1): - self.quotient.set_depth(depth) + print() + self.quotient.reset_tree(depth) best_assignment_old = self.best_assignment family = self.quotient.family @@ -148,9 +151,9 @@ def synthesize_tree_sequence(self, opt_result_value): self.synthesis_timer.start() families = [family] - if SynthesizerDecisionTree.use_tree_hint and self.best_tree is not None: + if self.best_tree is not None: subfamily = family.copy() - self.quotient.decision_tree.root.apply_hint(subfamily,self.best_tree) + self.quotient.decision_tree.root.apply_hint(subfamily,self.best_tree.root) families = [subfamily,family] for family in families: @@ -170,51 +173,99 @@ def synthesize_tree_sequence(self, opt_result_value): result = dtmc.check_specification(self.quotient.specification) logger.info(f"double-checking specification satisfiability: {result}") + self.best_tree = self.quotient.decision_tree + self.best_tree.root.associate_assignment(self.best_assignment) + self.best_tree_value = self.best_assignment_value + if abs( (self.best_assignment_value-opt_result_value)/opt_result_value ) < 1e-3: break - self.best_tree = self.quotient.decision_tree.root - self.best_tree.associate_assignment(self.best_assignment) + if self.resource_limit_reached(): + break + + def map_scheduler(self, scheduler_choices, opt_result_value): + # use counterexamples iff a dont' care action exists + disable_counterexamples = "__random__" not in self.quotient.action_labels + self.counters_reset() + for depth in range(SynthesizerDecisionTree.tree_depth+1): + self.quotient.reset_tree(depth,disable_counterexamples=disable_counterexamples) + family = self.quotient.family + self.quotient.build(family) + family.analysis_result = self.quotient.build_unsat_result() + best_assignment_old = self.best_assignment + + consistent,hole_selection = self.quotient.are_choices_consistent(scheduler_choices, family) + if consistent: + self.verify_hole_selection(family,hole_selection) + elif not disable_counterexamples: + harmonizing_hole = [hole for hole,options in enumerate(hole_selection) if len(options)>1][0] + selection_1 = hole_selection.copy(); selection_1[harmonizing_hole] = [selection_1[harmonizing_hole][0]] + selection_2 = hole_selection.copy(); selection_2[harmonizing_hole] = [selection_2[harmonizing_hole][1]] + for selection in [selection_1,selection_2]: + self.verify_hole_selection(family,selection) + + new_assignment_synthesized = self.best_assignment != best_assignment_old + if new_assignment_synthesized: + self.best_tree = self.quotient.decision_tree + self.best_tree.root.associate_assignment(self.best_assignment) self.best_tree_value = self.best_assignment_value + if consistent: + break + if not disable_counterexamples and abs( (self.best_assignment_value-opt_result_value)/opt_result_value ) < 1e-4: + break if self.resource_limit_reached(): break + # self.counters_print() def run(self, optimum_threshold=None): - paynt_mdp = paynt.models.models.Mdp(self.quotient.quotient_mdp) - mc_result = paynt_mdp.model_check_property(self.quotient.get_property()) + + scheduler_choices = None + if SynthesizerDecisionTree.scheduler_path is None: + paynt_mdp = paynt.models.models.Mdp(self.quotient.quotient_mdp) + mc_result = paynt_mdp.model_check_property(self.quotient.get_property()) + else: + opt_result_value = None + with open(SynthesizerDecisionTree.scheduler_path, 'r') as f: + scheduler_json = json.load(f) + scheduler_choices = self.quotient.scheduler_json_to_choices(scheduler_json) + submdp = self.quotient.build_from_choice_mask(scheduler_choices) + mc_result = submdp.model_check_property(self.quotient.get_property()) opt_result_value = mc_result.value logger.info(f"the optimal scheduler has value: {opt_result_value}") - if self.quotient.specification.has_optimality: - epsilon = 1e-1 - mc_result_positive = opt_result_value > 0 - if self.quotient.specification.optimality.maximizing == mc_result_positive: - epsilon *= -1 - # optimum_threshold = opt_result_value * (1 + epsilon) - self.set_optimality_threshold(optimum_threshold) - - self.best_tree = None - self.best_tree_value = None - if not SynthesizerDecisionTree.tree_enumeration: - self.synthesize_tree(SynthesizerDecisionTree.tree_depth) + self.best_assignment = self.best_assignment_value = None + self.best_tree = self.best_tree_value = None + if scheduler_choices is not None: + self.map_scheduler(scheduler_choices, opt_result_value) else: - self.synthesize_tree_sequence(opt_result_value) + if self.quotient.specification.has_optimality: + epsilon = 1e-1 + mc_result_positive = opt_result_value > 0 + if self.quotient.specification.optimality.maximizing == mc_result_positive: + epsilon *= -1 + # optimum_threshold = opt_result_value * (1 + epsilon) + self.set_optimality_threshold(optimum_threshold) + + if not SynthesizerDecisionTree.tree_enumeration: + self.synthesize_tree(SynthesizerDecisionTree.tree_depth) + else: + self.synthesize_tree_sequence(opt_result_value) logger.info(f"the optimal scheduler has value: {opt_result_value}") if self.best_tree is None: logger.info("no admissible tree found") else: self.best_tree.simplify() - logger.info(f"printing synthesized tree below:") + logger.info(f"printing the synthesized tree below:") print(self.best_tree.to_string()) depth = self.best_tree.get_depth() if self.quotient.specification.has_optimality: - logger.info(f"synthesized tree has value {self.best_tree_value}") + logger.info(f"the synthesized tree has value {self.best_tree_value}") num_nodes = len(self.best_tree.collect_nonterminals()) - logger.info(f"synthesized tree is of depth {depth} and has {num_nodes} decision nodes") + logger.info(f"the synthesized tree is of depth {depth} and has {num_nodes} decision nodes") if self.export_synthesis_filename_base is not None: self.export_decision_tree(self.best_tree, self.export_synthesis_filename_base) time_total = paynt.utils.timer.GlobalTimer.read() diff --git a/paynt/verification/property.py b/paynt/verification/property.py index 4b83bcb4..a0523269 100644 --- a/paynt/verification/property.py +++ b/paynt/verification/property.py @@ -50,10 +50,10 @@ def initialize(cls): # se.set_linear_equation_solver_type(stormpy.EquationSolverType.eigen) # se.minmax_solver_environment.method = stormpy.MinMaxMethod.policy_iteration - se.minmax_solver_environment.method = stormpy.MinMaxMethod.value_iteration + # se.minmax_solver_environment.method = stormpy.MinMaxMethod.value_iteration # se.minmax_solver_environment.method = stormpy.MinMaxMethod.sound_value_iteration # se.minmax_solver_environment.method = stormpy.MinMaxMethod.interval_iteration - # se.minmax_solver_environment.method = stormpy.MinMaxMethod.optimistic_value_iteration + se.minmax_solver_environment.method = stormpy.MinMaxMethod.optimistic_value_iteration # se.minmax_solver_environment.method = stormpy.MinMaxMethod.topological @classmethod diff --git a/payntbind/src/synthesis/quotient/ColoringSmt.cpp b/payntbind/src/synthesis/quotient/ColoringSmt.cpp index 29bb0441..d64d9ec1 100644 --- a/payntbind/src/synthesis/quotient/ColoringSmt.cpp +++ b/payntbind/src/synthesis/quotient/ColoringSmt.cpp @@ -16,13 +16,13 @@ ColoringSmt::ColoringSmt( std::vector const& variable_name, std::vector> const& variable_domain, std::vector> const& tree_list, - bool one_consistency_check + bool disable_counterexamples ) : initial_state(*model.getInitialStates().begin()), row_groups(model.getNondeterministicChoiceIndices()), choice_destinations(synthesis::computeChoiceDestinations(model)), choice_to_action(synthesis::extractActionLabels(model).second), variable_name(variable_name), variable_domain(variable_domain), - solver(ctx), harmonizing_variable(ctx), one_consistency_check(one_consistency_check) { + solver(ctx), harmonizing_variable(ctx), disable_counterexamples(disable_counterexamples) { timers[__FUNCTION__].start(); @@ -165,7 +165,7 @@ ColoringSmt::ColoringSmt( } timers["ColoringSmt:: create choice colors"].stop(); - if(one_consistency_check) { + if(disable_counterexamples) { timers[__FUNCTION__].stop(); return; } @@ -438,7 +438,7 @@ std::pair>> ColoringSmt::areCh return std::make_pair(true,hole_options_vector); } - if(one_consistency_check) { + if(disable_counterexamples) { solver.pop(); solver.pop(); timers[__FUNCTION__].stop(); @@ -454,7 +454,6 @@ std::pair>> ColoringSmt::areCh BitVector state_reached(numStates(),false); state_reached.set(initial_state,true); consistent = true; - uint64_t num_choices_added = 0; while(consistent) { STORM_LOG_THROW(not unexplored_states.empty(), storm::exceptions::UnexpectedException, "all states explored but UNSAT core not found"); uint64_t state = unexplored_states.front(); unexplored_states.pop(); @@ -466,7 +465,6 @@ std::pair>> ColoringSmt::areCh const char *label = choice_path_label[choice][path].c_str(); solver.add(choice_path_expresssion[choice][path], label); } - // std::cout << "(2) adding choice " << (++num_choices_added) << std::endl; consistent = check(); if(not consistent) { break; @@ -539,9 +537,6 @@ std::pair>> ColoringSmt::areCh } - - - template class ColoringSmt<>; } \ No newline at end of file diff --git a/payntbind/src/synthesis/quotient/ColoringSmt.h b/payntbind/src/synthesis/quotient/ColoringSmt.h index 73e7b8d2..88653b94 100644 --- a/payntbind/src/synthesis/quotient/ColoringSmt.h +++ b/payntbind/src/synthesis/quotient/ColoringSmt.h @@ -26,7 +26,7 @@ class ColoringSmt { std::vector const& variable_name, std::vector> const& variable_domain, std::vector> const& tree_list, - bool one_consistency_check = false + bool disable_counterexamples = false ); /** For each hole, get a list of name-type pairs. */ @@ -134,7 +134,7 @@ class ColoringSmt { void loadUnsatCore(z3::expr_vector const& unsat_core_expr, Family const& subfamily); /** If true, the object will be setup for one consistency check. */ - bool one_consistency_check; + bool disable_counterexamples; };