Merge with Pietro's fixes and add manual parallel_branches for prom net

savoiadiego · Jun 22, 2022 · 8e7b911 · 8e7b911
1 parent 9bac346
commit 8e7b911
Show file tree

Hide file tree

Showing 10 changed files with 1,946 additions and 1,419 deletions.
diff --git a/DecisionTree.py b/DecisionTree.py
@@ -158,6 +158,7 @@ def get_split(self, data_in) -> Union[float, float, str]:
                     tests_examined['threshold'].append(threshold)
                     tests_examined['attribute'].append(column)
                     tests_examined['not_near_trivial_subset'].append(are_there_at_least_two)
+            # breakpoint()
             # select the best split
             tests_examined = pd.DataFrame.from_dict(tests_examined)
             mean_info_gain = tests_examined['info_gain'].mean()
@@ -193,17 +194,24 @@ def split_node(self, node, data_in, data_total) -> None:
             data_in = data_in.copy(deep=True) 
             data_in = data_in.drop(columns=[node.get_label().split()[0]])
         max_gain_ratio, local_threshold, split_attribute = self.get_split(data_in)
+        # breakpoint()
         # compute error predicting the most frequent class without splitting
         node_errors = data_in['target'].value_counts().sum() - data_in['target'].value_counts().max()
+        # compute percentage
+        # TODO directly compute percentage
+        node_errors = node_errors / len(data_in)
         # if split attribute does not exist then is a leaf 
         if split_attribute is not None and node.get_level() < self.max_depth:
             child_errors = self.compute_split_error(data_in[[split_attribute, 'target']], local_threshold)
+            # compute percentage
+            # TODO directly compute percentage
+            child_errors = child_errors / len(data_in)
             # if child errors are greater the actual error of the node than the split is useless
             if child_errors >= node_errors:
                 # the node (default type "DecisionNode") is "transformed" in a leaf node ("LeafNode" type)
                 parent_node = node.get_parent_node()
                 if parent_node is None and node.get_label() == 'root':
-                    print("The data are not feasible for fitting a tree. Can't find a suitable split of the root node.")
+                    print("Childs error percentage is higher than the root one. Can't find a suitable split of the root node.")
                 elif parent_node is None:
                     raise Exception("Can't transform DecisionNode {} in LeafNode: no parent found".format(node.get_label()))
                 else:

diff --git a/create-dpn-running-example-paper.py b/create-dpn-running-example-paper.py
@@ -118,9 +118,9 @@
 t_D.properties[petri_properties.READ_VARIABLE] = ['doc_is_updated', 'skip_everything']
 t_D.properties[petri_properties.WRITE_VARIABLE] = []
 
-t_G.properties[petri_properties.TRANS_GUARD] = 'skip_everything == True || doc_is_update == True'
-t_G.properties[petri_properties.READ_VARIABLE] = ['doc_is_updated', 'skip_everything']
-t_G.properties[petri_properties.WRITE_VARIABLE] = []
+skip_3.properties[petri_properties.TRANS_GUARD] = 'skip_everything == True || doc_is_updated == True'
+skip_3.properties[petri_properties.READ_VARIABLE] = ['doc_is_updated', 'skip_everything']
+skip_3.properties[petri_properties.WRITE_VARIABLE] = []
 
 t_F.properties[petri_properties.TRANS_GUARD] = 'loan_accepted == "yes"'
 t_F.properties[petri_properties.READ_VARIABLE] = ['loan_accepted']

diff --git a/create-ds-conifg.py b/create-ds-conifg.py
@@ -68,7 +68,8 @@ def create_dict(st_session_state):
     #breakpoint()
     for name in st_session_state:
         if ('e_' in name or 't_' in name) and name.split('_')[1] not in ['trace', 'event']:
-            dict_conf[name.split('_')[1]] = st.session_state[name]
+            # remove initial 'e_' or 't_' from the name
+            dict_conf["_".join(name.split('_')[1:])] = st.session_state[name]
     return dict_conf
 
 if uploaded_file is not None:

diff --git a/create-synthetic-log.py b/create-synthetic-log.py
@@ -118,6 +118,7 @@ def get_ex_cont(model_name):
     while dm != final_marking and len(visited_elements) < max_trace_length and len(all_enabled_trans) > 0:
         verboseprint(dm)
         all_enabled_trans = dpn_semantics.enabled_transitions(net, dm, ex_cont_total)
+        # breakpoint()
         for enabled in list(all_enabled_trans):
             if "guard" in enabled.properties:
                 if not dpn_semantics.evaluate_guard(enabled.properties["guard"], enabled.properties["readVariable"], ex_cont_total):
@@ -140,6 +141,7 @@ def get_ex_cont(model_name):
                 if trans.name in ["trans_A"]:
                     ex_cont["amount"] = copy.copy(ex_cont_total["amount"])
                     ex_cont["skip_everything"] = copy.copy(ex_cont_total["skip_everything"])
+                    ex_cont["doc_is_updated"] = copy.copy(ex_cont_total["doc_is_updated"])
             visited_elements.append(tuple([trans, ex_cont]))
         if 'loopB' in net_name:
             if trans.name in ["trans_S"]: