Fix in create-dpn-road-traffic and add shortening of also continuous …

…noand in shorten_rules_manually
savoiadiego · Jun 22, 2022 · 430a756 · 430a756
1 parent 0569bbd
commit 430a756
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 23 deletions.
diff --git a/Road_Traffic_Fine_Management_Process.svg b/Road_Traffic_Fine_Management_Process.svg
diff --git a/create-dpn-road-traffic.py b/create-dpn-road-traffic.py
@@ -1,18 +1,7 @@
-import pm4py
-import copy
-import numpy as np
-from tqdm import tqdm
-from random import choice
 from pm4py.objects.petri_net.obj import PetriNet, Marking
 from pm4py.objects.petri_net.utils import petri_utils
 from pm4py.objects.petri_net.exporter import exporter as pnml_exporter
 from pm4py.visualization.petri_net import visualizer as pn_visualizer
-from pm4py.objects.petri_net.importer import importer as pnml_importer
-from pm4py.objects.log.exporter.xes import exporter as xes_exporter
-from pm4py.objects.petri_net.data_petri_nets import semantics as dpn_semantics
-from pm4py.objects.petri_net import properties as petri_properties
-from pm4py.objects.petri_net.data_petri_nets.data_marking import DataMarking
-
 # create empty petri net
 net_name = "Road_Traffic_Fine_Management_Process"
 net = PetriNet(net_name)
@@ -77,17 +66,17 @@
 net.places.add(p_26)
 net.places.add(p_27)
 
-# create and add trasitions
+# create and add transitions
 t_A = PetriNet.Transition("trans_A", "Create Fine")
 t_B = PetriNet.Transition("trans_B", "Send Appeal to Prefecture")
 t_C = PetriNet.Transition("trans_C", "Insert Fine Notification")
 t_D = PetriNet.Transition("trans_D", "Send Fine")
 t_E = PetriNet.Transition("trans_E", "Insert Date Appeal to Prefecture")
 t_F = PetriNet.Transition("trans_F", "Payment")
 t_G = PetriNet.Transition("trans_G", "Appeal to Judge")
-t_H = PetriNet.Transition("trans_H", "Receive Result Appeal")
+t_H = PetriNet.Transition("trans_H", "Receive Result Appeal from Prefecture")
 t_I = PetriNet.Transition("trans_I", "Notify Result Appeal to Offender")
-t_L = PetriNet.Transition("trans_L", "Add Penalty")
+t_L = PetriNet.Transition("trans_L", "Add penalty")
 t_M = PetriNet.Transition("trans_M", "Send for Credit Collection")
 skip_1 = PetriNet.Transition("skip_1", None)
 skip_2 = PetriNet.Transition("skip_2", None)

diff --git a/discover-dpn-variants.py b/discover-dpn-variants.py
@@ -255,11 +255,14 @@ def ModelCompleter(**kwargs):
                 y_pred = dt.predict(dataset.drop(columns=['target']))
                 print("Train accuracy: {}".format(metrics.accuracy_score(dataset['target'], y_pred)))
 
-                rules = dt.extract_rules()
+                # Rule extraction without pruning
+                # rules = dt.extract_rules()
 
-                # Rule extraction with pruning and overlapping rules discovery
-                # rules = dt.extract_rules_with_pruning(dataset)
-                # rules = discover_overlapping_rules(dt, dataset, attributes_map, rules)
+                # Rule extraction with pruning
+                rules = dt.extract_rules_with_pruning(dataset)
+
+                # Overlapping rules discovery
+                rules = discover_overlapping_rules(dt, dataset, attributes_map, rules)
 
                 rules = shorten_rules_manually(rules, attributes_map)
                 rules = {k: rules[k].replace('_', ':') for k in rules}

diff --git a/utils.py b/utils.py
@@ -843,7 +843,7 @@ def discover_overlapping_rules(base_tree, dataset, attributes_map, original_rule
         leaf_instances = dataset.query(vertical_rules_query)
         # TODO not considering missing values for now, so wrong_instances could be empty
         # This happens because all the wrongly classified instances have missing values for the query attribute(s)
-        wrong_instances = leaf_instances[leaf_instances['target'] != leaf_node._label_class]
+        wrong_instances = (leaf_instances[leaf_instances['target'] != leaf_node._label_class]).copy()
 
         sub_tree = DecisionTree(attributes_map)
         sub_tree.fit(wrong_instances)
@@ -881,7 +881,8 @@ def shorten_rules_manually(original_rules, attributes_map):
     one of [10, 68, 144]".
     The series "paymentAmount > 21.0 && paymentAmount <= 37.0 && paymentAmount <= 200.0 && amount > 84.0 && amount <=
     138.0 && amount > 39.35" is rewritten as "paymentAmount > 21.0 && paymentAmount <= 37.0 && amount <= 138.0 && amount
-    84.0"
+    84.0".
+    The same reasoning is applied for atoms without '&&s' inside.
     """
 
     rules = copy.deepcopy(original_rules)
@@ -890,6 +891,8 @@ def shorten_rules_manually(original_rules, attributes_map):
         or_atoms = rules[target_class].split(' || ')
         new_target_rule = list()
         cat_atoms_same_attr_noand = dict()
+        cont_atoms_same_attr_less_noand, cont_atoms_same_attr_greater_noand = dict(), dict()
+        cont_comp_less_equal_noand, cont_comp_greater_equal_noand = dict(), dict()
 
         for or_atom in or_atoms:
             if ' && ' in or_atom:
@@ -941,17 +944,25 @@ def shorten_rules_manually(original_rules, attributes_map):
                 # Or-atom analyzed: putting its new and-atoms in conjunction
                 new_target_rule.append(' && ' .join(new_or_atom))
 
-            # If the or_atom does not have &&s inside (single atom), just simplify categorical attributes.
+            # If the or_atom does not have &&s inside (single atom), just simplify attributes.
             # For example, the series "org:resource = 10 || org:resource = 144 || org:resource = 68" is rewritten as
-            # "org:resource one of [10, 68, 144]".
-            # Here the values for each attribute are added to the 'cat_atoms_same_attr_noand' dictionary.
+            # "org:resource one of [10, 68, 144]". For continuous attributes, follows the same reasoning as before.
             else:
                 a_attr, a_comp, a_value = or_atom.split(' ')
                 # Storing information for many-values categorical attributes equalities
                 if attributes_map[a_attr] == 'categorical' and a_comp == '=':
                     if a_attr not in cat_atoms_same_attr_noand.keys():
                         cat_atoms_same_attr_noand[a_attr] = list()
                     cat_atoms_same_attr_noand[a_attr].append(a_value)
+                elif attributes_map[a_attr] == 'continuous':
+                    if a_comp in ['<', '<=']:
+                        if a_attr not in cont_atoms_same_attr_less_noand.keys() or float(a_value) <= float(cont_atoms_same_attr_less_noand[a_attr]):
+                            cont_atoms_same_attr_less_noand[a_attr] = a_value
+                            cont_comp_less_equal_noand[a_attr] = True if a_comp == '<=' else False
+                    elif a_comp in ['>', '>=']:
+                        if a_attr not in cont_atoms_same_attr_greater_noand.keys() or float(a_value) >= float(cont_atoms_same_attr_greater_noand[a_attr]):
+                            cont_atoms_same_attr_greater_noand[a_attr] = a_value
+                            cont_comp_greater_equal_noand[a_attr] = True if a_comp == '>=' else False
                 else:
                     new_target_rule.append(or_atom)
 
@@ -962,6 +973,17 @@ def shorten_rules_manually(original_rules, attributes_map):
             else:
                 new_target_rule.append(attr + ' = ' + cat_atoms_same_attr_noand[attr][0])
 
+        # Compressing continuous attributes inequalities (< / <= and then > / >=) for the 'no &&s' case
+        for attr in cont_atoms_same_attr_less_noand.keys():
+            min_value = cont_atoms_same_attr_less_noand[attr]
+            comp = ' <= ' if cont_comp_less_equal_noand[attr] else ' < '
+            new_target_rule.append(attr + comp + min_value)
+
+        for attr in cont_atoms_same_attr_greater_noand.keys():
+            max_value = cont_atoms_same_attr_greater_noand[attr]
+            comp = ' >= ' if cont_comp_greater_equal_noand[attr] else ' > '
+            new_target_rule.append(attr + comp + max_value)
+
         # Rule for a target class analyzed: putting its new or-atoms in disjunction
         rules[target_class] = ' || '.join(new_target_rule)