diff --git a/paynt/quotient/pomdp_family.py b/paynt/quotient/pomdp_family.py index ece5dbe93..941acab7e 100644 --- a/paynt/quotient/pomdp_family.py +++ b/paynt/quotient/pomdp_family.py @@ -3,12 +3,11 @@ import stormpy.synthesis import paynt.quotient.holes +import paynt.quotient.models import paynt.quotient.quotient import paynt.quotient.coloring import paynt.quotient.mdp_family -import paynt.synthesizer.conflict_generator.storm - import json import logging @@ -222,6 +221,40 @@ def build_dtmc_sketch(self, fsc): dtmc_sketch = paynt.quotient.quotient.DtmcQuotientContainer(product, product_coloring, product_specification) return dtmc_sketch + + def compute_qvalues_for_fsc(self, dtmc_sketch): + ''' + Given a quotient MDP obtained after applying FSC to a family of POMDPs, compute for each state s, (reachable) + memory node n, and action a, the Q-value Q((s,n),a). + :note it is assumed that a randomized FSC was used + :note it is assumed the provided DTMC sketch is the one obtained after the last unfolding, i.e. no other DTMC + sketch was constructed afterwards + :return a dictionary mapping (s,n,a) to Q((s,n),a) + ''' + assert isinstance(self.product_pomdp_fsc, stormpy.synthesis.ProductPomdpRandomizedFsc), \ + "to compute Q-values, unfolder for randomized FSC must have been used" + + # model check + product = dtmc_sketch.quotient_mdp + formula = dtmc_sketch.get_property().formula + result = stormpy.model_checking(product, formula, environment=paynt.quotient.models.MarkovChain.environment) + product_state_to_value = result.get_values() + + invalid_action = self.num_actions + + # prepare the resulting map + state_memory_action_to_value = {} + # map state values to the resulting map + # print(len(product_state_to_state_memory_action)) + for product_state in range(product.nr_states): + state,memory_action = self.product_pomdp_fsc.product_state_to_state_memory_action[product_state] + memory,action = memory_action + if action == invalid_action: + continue + value = product_state_to_value[product_state] + state_memory_action_to_value[(state,memory,action)] = value + return state_memory_action_to_value + def translate_path_to_trace(self, dtmc_sketch, dtmc, path): invalid_choice = self.quotient_mdp.nr_choices diff --git a/paynt_pomdp_sketch.py b/paynt_pomdp_sketch.py index 4fed0906a..2e3ebb19b 100644 --- a/paynt_pomdp_sketch.py +++ b/paynt_pomdp_sketch.py @@ -28,7 +28,7 @@ def investigate_hole_assignment(pomdp_sketch, hole_assignment): pomdp = pomdp_sketch.build_pomdp(hole_assignment) # create a random k-FSC - fsc_is_deterministic = True + fsc_is_deterministic = False num_nodes = 3 fsc = paynt.quotient.pomdp_family.FSC(num_nodes, pomdp_sketch.num_observations, fsc_is_deterministic) fsc.fill_trivial_actions(pomdp_sketch.observation_to_actions) @@ -75,6 +75,7 @@ def investigate_hole_assignment(pomdp_sketch, hole_assignment): # apply this FSC to a POMDP sketch, obtaining a DTMC sketch dtmc_sketch = pomdp_sketch.build_dtmc_sketch(fsc) +# qvalues = pomdp_sketch.compute_qvalues_for_fsc(dtmc_sketch) # to each (sub-family of) environment(s), assign a value corresponding to the minimum specification satisfiability synthesizer = paynt.synthesizer.synthesizer_onebyone.SynthesizerOneByOne(dtmc_sketch)