more choice canonicity operations

randriu · Jul 25, 2024 · ad69089 · ad69089
1 parent 970ade6
commit ad69089
Show file tree

Hide file tree

Showing 12 changed files with 667 additions and 680 deletions.
diff --git a/learning/paynt-learner.dockerfile b/learning/paynt-learner.dockerfile
diff --git a/learning/paynt_pomdp_sketch.py b/learning/paynt_pomdp_sketch.py
diff --git a/models/mdp/obstacles/sketch.templ b/models/mdp/obstacles/sketch.templ
@@ -1,65 +1,47 @@
-mdp
-
-const int N = 10;
-const int gMIN = 1;
-const int gMAX = N;
-
-const int o1x = 2;
-const int o1y = 2;
-
-const int o2x = 2;
-const int o2y = 6;
-
-const int o3x = 4;
-const int o3y = 3;
-
-const int o4x = 3;
-const int o4y = 6;
-
-const int o5x = 5;
-const int o5y = 5;
-
-const int o6x = 8;
-const int o6y = 8;
-
-formula at1 = (x = o1x & y = o1y);
-formula at2 = (x = o2x & y = o2y);
-formula at3 = (x = o3x & y = o3y);
-formula at4 = (x = o4x & y = o4y);
-formula at5 = (x = o5x & y = o5y);
-formula at6 = (x = o6x & y = o6y);
-
-formula crash = at1 | at2 | at3 | at4 | at5 | at6;
-formula goal = (x=gMAX & y=gMAX);
-
-label "notbad" = !crash;
-label "goal" = goal;
-
-
-const double slip = 0.2;
-
-formula al  = min(max(x-1,gMIN),gMAX);
-formula all = min(max(x-2,gMIN),gMAX);
-formula ar  = min(max(x+1,gMIN),gMAX);
-formula arr = min(max(x+2,gMIN),gMAX);
-formula au  = min(max(y-1,gMIN),gMAX);
-formula auu = min(max(y-2,gMIN),gMAX);
-formula ad  = min(max(y+1,gMIN),gMAX);
-formula add = min(max(y+2,gMIN),gMAX);
-
-module agent
-    x : [gMIN..gMAX] init gMIN;
-    y : [gMIN..gMAX] init gMIN;
-
-    [le] !crash -> 1-slip : (x'=al) + slip : (x'=all);
-    [ri] !crash -> 1-slip : (x'=ar) + slip : (x'=arr);
-    [up] !crash -> 1-slip : (y'=au) + slip : (y'=auu);
-    [do] !crash -> 1-slip : (y'=ad) + slip : (y'=add);
-endmodule
-
-rewards "steps"
-    [le] true: 1;
-    [ri] true: 1;
-    [up] true: 1;
-    [do] true: 1;
-endrewards
+mdp
+
+const int N = 10;
+const int gMIN = 1;
+const int gMAX = N;
+
+formula at1 = (x = 1 & y = 5);
+formula at2 = (x = 5 & y = 1);
+formula at3 = (x = 2 & y = 2);
+formula at4 = (x = 9 & y = 10);
+formula at5 = (x = 9 & y = 9);
+formula at6 = (x = 2 & y = 5);
+
+formula crash = at1 | at2 | at3 | at4 | at5 | at6;
+formula goal = (x=gMAX & y=gMAX);
+
+label "notbad" = !crash;
+label "goal" = goal;
+
+
+const double slip = 0.2;
+
+formula al  = min(max(x-1,gMIN),gMAX);
+formula all = min(max(x-2,gMIN),gMAX);
+formula ar  = min(max(x+1,gMIN),gMAX);
+formula arr = min(max(x+2,gMIN),gMAX);
+formula au  = min(max(y-1,gMIN),gMAX);
+formula auu = min(max(y-2,gMIN),gMAX);
+formula ad  = min(max(y+1,gMIN),gMAX);
+formula add = min(max(y+2,gMIN),gMAX);
+
+module agent
+    x : [gMIN..gMAX] init gMIN;
+    y : [gMIN..gMAX] init gMIN;
+
+    [le] !crash -> 1-slip : (x'=al) + slip : (x'=all);
+    [ri] !crash -> 1-slip : (x'=ar) + slip : (x'=arr);
+    [up] !crash -> 1-slip : (y'=au) + slip : (y'=auu);
+    [do] !crash -> 1-slip : (y'=ad) + slip : (y'=add);
+endmodule
+
+rewards "steps"
+    [le] true: 1;
+    [ri] true: 1;
+    [up] true: 1;
+    [do] true: 1;
+endrewards
diff --git a/paynt/parser/sketch.py b/paynt/parser/sketch.py
@@ -117,7 +117,11 @@ def load_sketch(cls, sketch_path, properties_path,
         logger.info("sketch parsing OK")
 
         paynt.verification.property.Property.initialize()
-
+
+        updated = payntbind.synthesis.addMissingChoiceLabels(explicit_quotient)
+        if updated is not None: explicit_quotient = updated
+        payntbind.synthesis.assertChoiceLabelingIsCanonic(explicit_quotient.choice_labeling)
+
         make_rewards_action_based(explicit_quotient)
         logger.debug("constructed explicit quotient having {} states and {} actions".format(
             explicit_quotient.nr_states, explicit_quotient.nr_choices))

diff --git a/paynt/quotient/mdp.py b/paynt/quotient/mdp.py
@@ -190,12 +190,12 @@ def custom_decision_tree(mdp):
 
     if model == "obstacles":
         decide(dt.root, "x")
-        # decide(dt.root.child_true, "x")
-        # decide(dt.root.child_true.child_true, "y")
-        # decide(dt.root.child_true.child_false, "y")
+        decide(dt.root.child_true, "x")
+        decide(dt.root.child_true.child_true, "y")
+        decide(dt.root.child_true.child_false, "y")
         decide(dt.root.child_false, "x")
-        # decide(dt.root.child_false.child_true, "y")
-        # decide(dt.root.child_false.child_false, "y")
+        decide(dt.root.child_false.child_true, "y")
+        decide(dt.root.child_false.child_false, "y")
 
     return dt
 
@@ -206,11 +206,13 @@ class MdpQuotient(paynt.quotient.quotient.Quotient):
     def __init__(self, mdp, specification):
         super().__init__(specification=specification)
 
-        mdp = payntbind.synthesis.restoreActionsInAbsorbingStates(mdp)
+        updated = payntbind.synthesis.restoreActionsInAbsorbingStates(mdp)
+        if updated is not None: mdp = updated
+
         self.quotient_mdp = mdp
         paynt_mdp = paynt.models.models.Mdp(mdp)
         logger.info(f"optimal scheduler has value: {paynt_mdp.model_check_property(self.get_property())}")
-        self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(self.quotient_mdp)
+        self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(mdp)
         self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp)
 
         decision_tree = custom_decision_tree(mdp)

diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
@@ -53,7 +53,7 @@ def __init__(self, quotient_mdp, family, coloring, specification):
         # for each state of the quotient, a list of available actions
         self.state_to_actions = None
 
-        self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp)
+        self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(quotient_mdp)
         self.num_actions = len(self.action_labels)
         self.state_action_choices = MdpFamilyQuotient.map_state_action_to_choices(
             self.quotient_mdp, self.num_actions, self.choice_to_action)

diff --git a/payntbind/src/synthesis/quotient/JaniChoices.cpp b/payntbind/src/synthesis/quotient/JaniChoices.cpp
@@ -1,4 +1,5 @@
 #include "JaniChoices.h"
+#include "src/synthesis/translation/choiceTransformation.h"
 
 #include <storm/models/sparse/Mdp.h>
 #include <storm/storage/jani/Model.h>
@@ -27,37 +28,9 @@ namespace synthesis {
                 choice_labeling.addLabelToChoice(jani.getAction(action_index).getName(), choice);
             }
         }
-
-        return choice_labeling;
-    }
 
-    /**
-     * Given model and its choice labeling, remove unused labels and make sure that all choices have at most 1 label.
-     * If the choice does not have a label, label it with the label derived from the provided prefix.
-     * Make sure that for each state of the MDP, either all its rows have no labels or all its rows have exactly one
-     */
-    template<typename ValueType>
-    void makeChoiceLabelingCanonic(
-        storm::models::sparse::Model<ValueType> const& model,
-        storm::models::sparse::ChoiceLabeling& choice_labeling,
-        std::string const& no_label_prefix
-    ) {
-        for(auto const& label: choice_labeling.getLabels()) {
-            if(choice_labeling.getChoices(label).empty()) {
-                choice_labeling.removeLabel(label);
-            }
-        }
-        storm::storage::BitVector no_label_labeling(model.getNumberOfChoices());
-        for(uint64_t choice = 0; choice < model.getNumberOfChoices(); choice++) {
-            uint64_t choice_num_labels = choice_labeling.getLabelsOfChoice(choice).size();
-            if(choice_num_labels > 1) {
-                throw std::invalid_argument("A choice of the model contains multiple labels.");
-            }
-            if(choice_num_labels == 0) {
-                no_label_labeling.set(choice,true);
-            }
-        }
-        std::string empty_label = choice_labeling.addUniqueLabel(no_label_prefix, no_label_labeling);
+        removeUnusedLabels(choice_labeling);
+        return choice_labeling;
     }
 
     template<typename ValueType>
@@ -68,9 +41,8 @@ namespace synthesis {
         if(model.hasStateValuations()) {
             components.stateValuations = model.getStateValuations();
         }
-        storm::models::sparse::ChoiceLabeling choice_labeling = reconstructChoiceLabelsFromJani(model);
-        makeChoiceLabelingCanonic(model,choice_labeling,"empty_label");
-        components.choiceLabeling = choice_labeling;
+        components.choiceLabeling = reconstructChoiceLabelsFromJani(model);
+        addMissingChoiceLabelsLabeling(model,components.choiceLabeling.value());
         components.rewardModels = model.getRewardModels();
         return std::make_shared<storm::models::sparse::Mdp<ValueType>>(std::move(components));
     }