Skip to content

Commit

Permalink
compute Q-values corresponding to the given FSC
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Dec 6, 2023
1 parent 8af4e6b commit e01cd2d
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
37 changes: 35 additions & 2 deletions paynt/quotient/pomdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
import stormpy.synthesis

import paynt.quotient.holes
import paynt.quotient.models
import paynt.quotient.quotient
import paynt.quotient.coloring
import paynt.quotient.mdp_family

import paynt.synthesizer.conflict_generator.storm

import json

import logging
Expand Down Expand Up @@ -222,6 +221,40 @@ def build_dtmc_sketch(self, fsc):
dtmc_sketch = paynt.quotient.quotient.DtmcQuotientContainer(product, product_coloring, product_specification)
return dtmc_sketch


def compute_qvalues_for_fsc(self, dtmc_sketch):
'''
Given a quotient MDP obtained after applying FSC to a family of POMDPs, compute for each state s, (reachable)
memory node n, and action a, the Q-value Q((s,n),a).
:note it is assumed that a randomized FSC was used
:note it is assumed the provided DTMC sketch is the one obtained after the last unfolding, i.e. no other DTMC
sketch was constructed afterwards
:return a dictionary mapping (s,n,a) to Q((s,n),a)
'''
assert isinstance(self.product_pomdp_fsc, stormpy.synthesis.ProductPomdpRandomizedFsc), \
"to compute Q-values, unfolder for randomized FSC must have been used"

# model check
product = dtmc_sketch.quotient_mdp
formula = dtmc_sketch.get_property().formula
result = stormpy.model_checking(product, formula, environment=paynt.quotient.models.MarkovChain.environment)
product_state_to_value = result.get_values()

invalid_action = self.num_actions

# prepare the resulting map
state_memory_action_to_value = {}
# map state values to the resulting map
# print(len(product_state_to_state_memory_action))
for product_state in range(product.nr_states):
state,memory_action = self.product_pomdp_fsc.product_state_to_state_memory_action[product_state]
memory,action = memory_action
if action == invalid_action:
continue
value = product_state_to_value[product_state]
state_memory_action_to_value[(state,memory,action)] = value
return state_memory_action_to_value


def translate_path_to_trace(self, dtmc_sketch, dtmc, path):
invalid_choice = self.quotient_mdp.nr_choices
Expand Down
3 changes: 2 additions & 1 deletion paynt_pomdp_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def investigate_hole_assignment(pomdp_sketch, hole_assignment):
pomdp = pomdp_sketch.build_pomdp(hole_assignment)

# create a random k-FSC
fsc_is_deterministic = True
fsc_is_deterministic = False
num_nodes = 3
fsc = paynt.quotient.pomdp_family.FSC(num_nodes, pomdp_sketch.num_observations, fsc_is_deterministic)
fsc.fill_trivial_actions(pomdp_sketch.observation_to_actions)
Expand Down Expand Up @@ -75,6 +75,7 @@ def investigate_hole_assignment(pomdp_sketch, hole_assignment):

# apply this FSC to a POMDP sketch, obtaining a DTMC sketch
dtmc_sketch = pomdp_sketch.build_dtmc_sketch(fsc)
# qvalues = pomdp_sketch.compute_qvalues_for_fsc(dtmc_sketch)

# to each (sub-family of) environment(s), assign a value corresponding to the minimum specification satisfiability
synthesizer = paynt.synthesizer.synthesizer_onebyone.SynthesizerOneByOne(dtmc_sketch)
Expand Down

0 comments on commit e01cd2d

Please sign in to comment.