Skip to content

Commit

Permalink
[micro_perf] print report info during test.
Browse files Browse the repository at this point in the history
  • Loading branch information
suisiyuan committed Oct 1, 2024
1 parent c4b3f70 commit 10df700
Showing 1 changed file with 20 additions and 5 deletions.
25 changes: 20 additions & 5 deletions byte_micro_perf/core/perf_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def parse_workload(workload):



ConfigInstance = namedtuple("ConfigInstance", ["dtype", "tensor_shapes", "index"])
ConfigInstance = namedtuple("ConfigInstance", ["dtype", "tensor_shapes", "index", "total"])
ResultItem = namedtuple("ResultItem", ["config", "report"])


Expand Down Expand Up @@ -261,7 +261,7 @@ def start_engine(self) -> None:
case_index = 0
for dtype in dtype_list:
for shape in shape_list:
test_list.append(ConfigInstance(dtype, shape, case_index))
test_list.append(ConfigInstance(dtype, shape, case_index + 1, len(dtype_list) * len(shape_list)))
case_index = case_index + 1

try:
Expand Down Expand Up @@ -379,7 +379,6 @@ def perf_func(self, rank: int, *args):

test_dtype = test_instance.dtype
test_shape = test_instance.tensor_shapes
print(f"rank {rank}, {test_instance}")

"""
input_shape could be:
Expand All @@ -399,6 +398,15 @@ def perf_func(self, rank: int, *args):
if reports and "Error" not in reports:
result_list.append(ResultItem(test_instance, reports))

latency = reports.get("Avg latency(us)", 0)
kernel_bw = reports.get("Kernel bandwidth(GB/s)", 0)
bus_bw = reports.get("Bus bandwidth(GB/s)", 0)

print(f"rank {rank}, {test_instance}, latency: {latency}\nkernel_bw: {kernel_bw}, bus_bw: {bus_bw}")
else:
print(f"rank {rank}, {test_instance}, error")


output_result_list = []
if world_size > 1:
all_result_list = backend_instance.all_gather_object(result_list)
Expand All @@ -411,8 +419,6 @@ def perf_func(self, rank: int, *args):
for test_instance in test_list:
test_dtype = test_instance.dtype
test_shape = test_instance.tensor_shapes
if rank == 0:
print(f"rank {rank}, {test_instance}")

"""
input_shape could be:
Expand All @@ -432,6 +438,15 @@ def perf_func(self, rank: int, *args):
if reports and "Error" not in reports:
result_list.append(ResultItem(test_instance, reports))

latency = reports.get("Avg latency(us)", 0)
kernel_bw = reports.get("Kernel bandwidth(GB/s)", 0)
bus_bw = reports.get("Bus bandwidth(GB/s)", 0)
if rank == 0:
print(f"rank {rank}, {test_instance}, latency: {latency}\nkernel_bw: {kernel_bw}, bus_bw: {bus_bw}")
else:
if rank == 0:
print(f"rank {rank}, {test_instance}, error")


# destroy dist
if world_size > 1:
Expand Down

0 comments on commit 10df700

Please sign in to comment.