Skip to content

Commit

Permalink
Merge pull request bytedance#65 from bytedance/optimize_micro_report
Browse files Browse the repository at this point in the history
[micro_perf] fix host2device process_inputs bug; add tensor shape to micro_perf report.
  • Loading branch information
YJessicaGao authored Apr 16, 2024
2 parents 713a0d5 + 772ca20 commit 041c295
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 7 deletions.
15 changes: 10 additions & 5 deletions byte_micro_perf/backends/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,17 +131,22 @@ def perf(self, input_shapes: List[List[int]], dtype):
]

# warmup
for _ in range(10):
num_warm_up = 10
for _ in range(num_warm_up):
self._run_operation(self.op, inputs_list[0])
self.device_synchronize()

start_time = time.time()
# perf
self.device_synchronize()
start_time = time.perf_counter_ns()
for i in range(self.iterations):
result = self._run_operation(self.op, inputs_list[input_index_list[i]])
self.device_synchronize()
execution_time = time.time() - start_time
end_time = time.perf_counter_ns()

# time in us
exec_time = (end_time - start_time) / 1e3
latency = round(exec_time / self.iterations, 2)

latency = round(execution_time * 1e6 / self.iterations, 2)
if self.op_name in ["allreduce", "allgather", "reducescatter", "alltoall", "broadcast"]:
report = dump_communication_ops_report(
self.op_name,
Expand Down
2 changes: 1 addition & 1 deletion byte_micro_perf/backends/module_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def __init__(self, xpu_device):

def process_inputs(self, input_tensors):
new_inputs = input_tensors.cpu()
return new_inputs
return [new_inputs]

def forward(self, input_tensors):
assert input_tensors.device.type == "cpu"
Expand Down
2 changes: 2 additions & 0 deletions byte_micro_perf/backends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def dump_communication_ops_report(

report = {
"Dtype": dtype,
"Tensor Shapes": input_shapes,
"Memory Size(MB)": round(mb, 2),
"Group": group_size,
"Kernel bandwidth(GB/s)": round(algo_bw, 2),
Expand Down Expand Up @@ -99,6 +100,7 @@ def dump_computation_ops_report(

report = {
"Dtype": dtype,
"Tensor Shapes": input_shapes,
"Memory Size(MB)": round(mb, 2),
"Kernel bandwidth(GB/s)": round(algo_bw, 2),
"Bandwidth Utilization(%)": bandwidth_utils,
Expand Down
2 changes: 1 addition & 1 deletion byte_micro_perf/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
matplotlib
pandas
virtualenv
virtualenv==16.7.12
scikit-learn
prompt_toolkit
tqdm
Expand Down

0 comments on commit 041c295

Please sign in to comment.