-
Notifications
You must be signed in to change notification settings - Fork 64
/
log_parser.py
146 lines (135 loc) · 6.72 KB
/
log_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#==============================================================================
# Copyright 2019-2020 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
import re
def parse_logs(log_lines, verbose=False):
"""
Returns ngraph metrics parsed out of the specified log output.
Regular log parsing will return:
- Number of nodes in the graph
- Number of nodes marked for clustering
- Number of ngraph clusters
Verbose log parsing will return all of the above, in addition to:
- Percentage of nodes clustered
- Has deadness issues
- Has static input issues
- Reasons why edge connected clusters did not merge
- Reasons why edge connected encapsulates did not merge
- Nodes per cluster
- Types of edges
- Op not supported
- Op failed type constraint
"""
if type(log_lines) == type(''):
log_lines = log_lines.split('\n')
else:
assert type(log_lines) == type(
[]
), "If log_lines if not a string, it should have been a list, but instead it is a " + type(
log_lines)
assert all([
type(i) == type('') and '\n' not in i for i in log_lines
]), 'Each element of the list should be a string and not contain new lines'
all_results = {}
curr_result = {}
ctr = 0
prev_line = ""
for line in log_lines:
start_of_subgraph = "NGTF_SUMMARY: Op_not_supported:" in line
# If logs of a new sub-graph is starting, save the old one
if start_of_subgraph:
if len(curr_result) > 0:
all_results[str(ctr)] = curr_result
curr_result = {}
ctr += 1
# keep collecting information in curr_result
if line.startswith('NGTF_SUMMARY'):
if 'Number of nodes in the graph' in line:
curr_result['num_nodes_in_graph'] = int(
line.split(':')[-1].strip())
elif 'Number of nodes marked for clustering' in line:
curr_result['num_nodes_marked_for_clustering'] = int(
line.split(':')[-1].strip().split(' ')[0].strip())
if verbose:
# get percentage of total nodes
match = re.search("(\d+(\.\d+)?%)", line)
nodes_clustered = ""
if match:
nodes_clustered = match.group(0)
curr_result["percentage_nodes_clustered"] = nodes_clustered
elif 'Number of ngraph clusters' in line:
curr_result['num_ng_clusters'] = int(
line.split(':')[-1].strip())
if verbose and ('DEADNESS' in line and 'STATICINPUT' in line):
line = line[len("NGTF_SUMMARY:"):]
reasons = dict([i.strip()
for i in item.split(":")]
for item in line.split(","))
if "reasons why a pair of edge connected encapsulates did not merge" in prev_line:
curr_result[
'why_edge_connected_encapsulates_did_not_merge'] = reasons
elif "reasons why a pair of edge connected clusters did not merge" in prev_line:
curr_result[
'why_edge_connected_clusters_did_not_merge'] = reasons
# default has_deadness_issues and has_static_input_issues to 'No'
if 'has_deadness_issues' not in curr_result.keys():
curr_result['has_deadness_issues'] = "No"
if 'has_static_input_issues' not in curr_result.keys():
curr_result['has_static_input_issues'] = "No"
# set has deadness/static input issues to 'Yes' if the value is > 0
if int(reasons['DEADNESS']) > 0:
curr_result['has_deadness_issues'] = "Yes"
if int(reasons['STATICINPUT']) > 0:
curr_result['has_static_input_issues'] = "Yes"
elif verbose and 'Nodes per cluster' in line:
curr_result['nodes_per_cluster'] = float(
line.split(':')[-1].strip())
elif verbose and 'Types of edges::' in line:
line = line[len("NGTF_SUMMARY: Types of edges:: "):]
edge_types = dict([i.strip()
for i in item.split(":")]
for item in line.split(","))
curr_result["types_of_edges"] = edge_type
s
elif verbose and 'Op_not_supported' in line:
curr_result["op_not_supported"] = \
[i.strip() for i in line[len("NGTF_SUMMARY: Op_not_supported: "):].split(",")]
elif verbose and 'Op_failed_type_constraint' in line:
curr_result["op_failed_type_constraint"] = \
[i.strip() for i in line[len(
"NGTF_SUMMARY: Op_failed_type_constraint: "):].split(",")]
prev_line = line
# add the last section to the results
all_results[str(ctr)] = curr_result
return all_results
def compare_parsed_values(parsed_vals, expected_vals):
# Both inputs are expected to be 2 dictionaries (representing jsons)
# The constraints in expected is <= parsed_vals. Parsed_vals should have all possible values that the parser can spit out. However expected_vals can be relaxed (even empty) and choose to only verify/match certain fields
match = lambda current, expected: all(
[expected[k] == current[k] for k in expected])
for graph_id_1 in expected_vals:
# The ordering is not important and could be different, hence search through all elements of parsed_vals
matching_id = None
for graph_id_2 in parsed_vals:
if match(expected_vals[graph_id_1], parsed_vals[graph_id_2]):
matching_id = graph_id_2
break
if matching_id is None:
return False, 'Failed to match expected graph info ' + graph_id_1 + " which was: " + str(
expected_vals[graph_id_1]
) + "\n. Got the following parsed results: " + str(parsed_vals)
else:
parsed_vals.pop(matching_id)
return True, ''