-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreorder.py
108 lines (87 loc) · 4.6 KB
/
reorder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import argparse
import os
import pandas as pd
header = ["dnnl_verbose", "action", "eng", "name", "impl", "prop", "format", "blank1", "blank2", "shape", "time"]
# TODO add all the following prefix to op_prefix_list
aten_ipex_prefix = ["AtenIpexCPUDefault", "AtenIpexCPUDev", "AtenIpexJITDev", "AtenIpexCPUSparse"]
custom_ops_prefix = ["IPEX"]
extend_ops_prefix = ["packed_add_", "_interaction", "_embedding_bag"]
mlp_ops_prefix = ["ipex_mm"]
op_prefix_list = aten_ipex_prefix
def group_and_sort(df, group_by, top_k, first_line, format_to_exclude, output_dir):
if len(df) == 0:
print("cannot find any OP that causes the reorder, you could try to add #define _DEBUG in DevOPs.cpp")
return
df["time"] = df["time"].astype(float)
# remove format that starts with format_to_exclude
if format_to_exclude:
df = df[~df["format"].str.startswith(tuple(format_to_exclude))]
print("*" * 70)
if format_to_exclude:
print("Excluded format that starts with: %s" % (", ".join(format_to_exclude)))
if first_line != -1:
print("Only print the first %d lines in each table " % first_line)
if top_k != -1:
print("For the second table, only calculate the top %d result in each group" % top_k)
print("*" * 70)
# op level time
print("*" * 70)
print("OP LEVEL")
df_groupby_name = df.groupby(group_by).sum().sort_values(by="time", ascending=False)
df_groupby_name = df_groupby_name.rename(columns={"time": "total_time"})
if first_line != -1:
print(df_groupby_name.head(first_line))
else:
print(df_groupby_name)
print()
# op and shape level time
print("*" * 70)
print("OP AND SHAPE LEVEL")
pt = pd.pivot_table(df, index=["name", "shape"], values="time", aggfunc="sum")
pt["total_time"] = pt.groupby(level=["name"]).transform("sum").loc[:, "time"]
# TODO top_k == -1
if top_k == -1:
pt = pt.sort_values(["total_time", "time"], ascending=[False, False]).groupby("name").head(len(pt))
else:
pt = pt.sort_values(["total_time", "time"], ascending=[False, False]).groupby("name").head(top_k)
if first_line != -1:
print(pt.head(first_line))
else:
print(pt)
if output_dir:
df_groupby_name.to_csv(os.path.join(output_dir, "op_level.csv"))
pt.to_csv(os.path.join(output_dir, "op_shape_level.csv"))
print("*" * 70)
print("Result has been saved at: ")
print(str(os.path.join(output_dir, "op_level.csv")))
print(str(os.path.join(output_dir, "op_shape_level.csv")))
def preprocess(file, op_to_include):
with open(file) as f:
content = f.read().splitlines()
reorders = []
for i, line in enumerate(content):
if i == 0:
continue
if line.startswith("dnnl_verbose") and len(line.split(',')) == 11:
if line.split(',')[3] == "reorder":
if content[i-1].startswith(tuple(op_to_include)):
reorder = line.replace("reorder", content[i-1]).split(",")
assert len(reorder) == 11, "Please check the verbose format of:\nOP that leads to the reorder: %s\nThe reorder verbose: %s" % (content[i-1], line)
reorders.append(reorder)
df = pd.DataFrame(reorders, columns=header)
return df
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file_name", default=None, type=str, required=True, help="path to the input onednn log file")
parser.add_argument("-o", "--output_dir", default=None, type=str, help="directory to save the output csv file")
parser.add_argument("-g", "--group_by", nargs='+', choices=header, required=True, help="column names to groupby to calculate the total time")
parser.add_argument("-p", "--op_to_include", nargs='+', choices=op_prefix_list, default="AtenIpexCPUDefault", required=True, help="OP to include")
parser.add_argument("-e", "--exclude", nargs='+', help="format starts with the given strings will be excluded")
parser.add_argument("-t", "--top_k", type=int, default=-1, help="only show the top k result within each group, if -1, show all the result")
parser.add_argument("-l", "--first_line", type=int, default=-1, help="only print the first several lines in each table")
args = parser.parse_args()
df = preprocess(args.file_name, args.op_to_include)
group_and_sort(df, args.group_by, args.top_k, args.first_line, args.exclude, args.output_dir)
# DevOPs.cpp:
# #define _DEBUG
# python reorder.py -f ../transformer/log/transFB.log -o ../transformer/log -g name -t 10 -e src_f32 -l 10 -p AtenIpexCPUDefault