From 71234f5825a0a6d711befe68b5699e8d5e64a808 Mon Sep 17 00:00:00 2001 From: jiangzishan Date: Tue, 27 Aug 2024 20:48:28 +0800 Subject: [PATCH] provide reports convert script. --- byte_micro_perf/scripts/convert.py | 386 +++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 byte_micro_perf/scripts/convert.py diff --git a/byte_micro_perf/scripts/convert.py b/byte_micro_perf/scripts/convert.py new file mode 100644 index 00000000..f9e2c4c4 --- /dev/null +++ b/byte_micro_perf/scripts/convert.py @@ -0,0 +1,386 @@ +import sys +import csv +import json +import pathlib +import argparse +import logging + + +CUR_DIR = pathlib.Path(__file__).parent.absolute() +PRJ_ROOT_DIR = CUR_DIR.parent + +sys.path.insert(0, str(PRJ_ROOT_DIR)) + + +unique_attrs = [ + "op_name", + "sku_name", + "owner", + "perf_mode" +] + + +def get_unique_key( + op_name, + sku_name, + owner, + perf_mode, + *args, + **kwargs +): + return ".".join([ + sku_name, + owner, + op_name, + perf_mode + ]).replace(" ", "_") + + + +arguments_map = { + # 单目算子 + # [batch, len] --> [batch, len] + "sin": ["dtype", "batch", "len"], + "cos": ["dtype", "batch", "len"], + "exp": ["dtype", "batch", "len"], + "exponential": ["dtype", "batch", "len"], + "silu": ["dtype", "batch", "len"], + "gelu": ["dtype", "batch", "len"], + "swiglu": ["dtype", "batch", "len"], + # float32: float32 --> float16/bfloat16 + # float16: float16 --> float32 + # bfloat16: bfloat16 --> float32 + "cast": ["dtype", "batch", "len"], + + + # 双目算子 + # [batch, len] (op) [batch, len] --> [batch, len] + "add": ["dtype", "batch", "len"], + "mul": ["dtype", "batch", "len"], + "sub": ["dtype", "batch", "len"], + "div": ["dtype", "batch", "len"], + + + # 规约算子 + # [batch, len] --> [batch, len] + "layernorm": ["dtype", "batch", "len"], + "softmax": ["dtype", "batch", "len"], + # [batch, len] --> [batch, 1] + "reduce_sum": ["dtype", "batch", "len"], + "reduce_min": ["dtype", "batch", "len"], + "reduce_max": ["dtype", "batch", "len"], + + # 索引算子 + # [batch, len] (op) [batch] --> [batch, len] + "index_add": ["dtype", "batch", "len"], + # [batch, len] --> [batch, len] + "sort": ["dtype", "batch", "len"], + "unique": ["dtype", "batch", "len"], + "gather": ["dtype", "batch", "len"], + "scatter": ["dtype", "batch", "len"], + + + # 矩阵算子 + # [M, K] * [K, N] --> [M, N] + "gemm": ["dtype", "M", "N", "K"], + # [batch, M, K] * [batch, K, N] --> [batch, M, N] + "batch_gemm": ["dtype", "batch", "M", "N", "K"], + # # group * {[M, K] * [K, N] = [M, N] + "group_gemm": ["dtype", "batch", "group", "M_str", "N", "K"], + + + # 通信算子 + # [batch, len] --> [batch, len] + # tp_size split over batch + "broadcast": ["dtype", "tp_size", "batch", "len"], + "allreduce": ["dtype", "tp_size", "batch", "len"], + "allgather": ["dtype", "tp_size", "batch", "len"], + "alltoall": ["dtype", "tp_size", "batch", "len"], + "reducescatter": ["dtype", "tp_size", "batch", "len"], + "p2p": ["dtype", "tp_size", "batch", "len"], + + "device2host": ["dtype", "batch", "len"], + "host2device": ["dtype", "batch", "len"] +} + + +target_attrs = [ + # latency in us + "latency" +] + + +def get_csv_headers(op_name): + return unique_attrs + arguments_map.get(op_name, []) + target_attrs + + + + + + +logger = logging.getLogger("bytemlperf_aeolus") + +def setup_logger(loglevel: str): + fmt = logging.Formatter( + fmt="%(asctime)s.%(msecs)03d %(filename)s:%(lineno)d [%(levelname)s]: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + handler = logging.StreamHandler(stream=sys.stdout) + handler.setFormatter(fmt) + logger.addHandler(handler) + logger.setLevel(loglevel.upper()) + logger.propagate = False + + +sku_name_mapping = { + "MLU590-M9": "MLU590 M9", + "MLU590-M9D": "MLU590 M9D", + "MLU590-M9DK": "MLU590 M9D", + "Iluvatar BI-V150": "BI-V150", + "NVIDIA A800-SXM4-80GB": "A800 80GB SXM", + "NVIDIA H800": "H800 80GB SXM", + "NVIDIA H20": "H20 96GB SXM", + "Ascend910B2C": "Ascend910B2" +} + +dtype_map = { + "float": "float32", + "half": "float16", + "int": "int32" +} + + + + + + + + + + + + + + + + +def normal_ops_func(op, sku_name, frame, perf_mode, json_data): + if not json_data or "Error" in json_data: + return + dtype = json_data["Dtype"] + if dtype in dtype_map: + dtype = dtype_map[dtype] + + batch = json_data["Tensor Shapes"][0][0] + len = json_data["Tensor Shapes"][0][1] + latency = json_data["Avg latency(us)"] + + return [op, sku_name, frame, perf_mode, dtype, batch, len, latency] + + + +def gemm_func(op, sku_name, frame, perf_mode, json_data): + if not json_data or "Error" in json_data: + return + dtype = json_data["Dtype"] + if dtype in dtype_map: + dtype = dtype_map[dtype] + + M = json_data["Tensor Shapes"][0][0] + K = json_data["Tensor Shapes"][0][1] + N = json_data["Tensor Shapes"][1][1] + latency = json_data["Avg latency(us)"] + + return [op, sku_name, frame, perf_mode, dtype, M, N, K, latency] + + +def batch_gemm_func(op, sku_name, frame, perf_mode, json_data): + if not json_data or "Error" in json_data: + return + dtype = json_data["Dtype"] + if dtype in dtype_map: + dtype = dtype_map[dtype] + + batch_size = json_data["Tensor Shapes"][0][0] + M = json_data["Tensor Shapes"][0][1] + K = json_data["Tensor Shapes"][0][2] + N = json_data["Tensor Shapes"][1][2] + latency = json_data["Avg latency(us)"] + + return [op, sku_name, frame, perf_mode, dtype, batch_size, M, N, K, latency] + +def group_gemm_func(op, sku_name, frame, perf_mode, json_data): + if not json_data or "Error" in json_data: + return + dtype = json_data["Dtype"] + if dtype in dtype_map: + dtype = dtype_map[dtype] + + batch_size = json_data["Tensor Shapes"][0][0][0] + group = len(json_data["Tensor Shapes"]) + + M_list = [int(json_data["Tensor Shapes"][i][0][0]) // batch_size for i in range(group)] + M_list_str = "/".join([str(m) for m in M_list]) + K = json_data["Tensor Shapes"][0][0][1] + N = json_data["Tensor Shapes"][0][1][1] + latency = json_data["Avg latency(us)"] + + return [op, sku_name, frame, perf_mode, dtype, batch_size, group, M_list_str,N, K, latency] + + + +def ccl_ops_func(op, sku_name, frame, perf_mode, json_data): + if not json_data or "Error" in json_data: + return + dtype = json_data["Dtype"] + if dtype in dtype_map: + dtype = dtype_map[dtype] + + tp_size = json_data["Group"] + batch = json_data["Tensor Shapes"][0][0] + len = json_data["Tensor Shapes"][0][1] + latency = json_data["Avg latency(us)"] + + return [op, sku_name, frame, perf_mode, dtype, tp_size, batch, len, latency] + +def d2h_h2d_func(op, sku_name, frame, perf_mode, json_data): + if not json_data or "Error" in json_data: + return + dtype = json_data["Dtype"] + if dtype in dtype_map: + dtype = dtype_map[dtype] + + batch = json_data["Tensor Shapes"][0][0] + len = json_data["Tensor Shapes"][0][1] + latency = json_data["Avg latency(us)"] + + return [op, sku_name, frame, perf_mode, dtype, batch, len, latency] + + +post_func_map = { + "sin": normal_ops_func, + "cos": normal_ops_func, + "exp": normal_ops_func, + "exponential": normal_ops_func, + "silu": normal_ops_func, + "gelu": normal_ops_func, + "swiglu": normal_ops_func, + "cast": normal_ops_func, + + "add": normal_ops_func, + "mul": normal_ops_func, + "sub": normal_ops_func, + "div": normal_ops_func, + + "layernorm": normal_ops_func, + "softmax": normal_ops_func, + "reduce_sum": normal_ops_func, + "reduce_min": normal_ops_func, + "reduce_max": normal_ops_func, + + "index_add": normal_ops_func, + "sort": normal_ops_func, + "unique": normal_ops_func, + "gather": normal_ops_func, + "scatter": normal_ops_func, + + "gemm": gemm_func, + "batch_gemm": batch_gemm_func, + "group_gemm": group_gemm_func, + + "broadcast": ccl_ops_func, + "allreduce": ccl_ops_func, + "allgather": ccl_ops_func, + "alltoall": ccl_ops_func, + "reducescatter": ccl_ops_func, + "p2p": ccl_ops_func, + + "device2host": d2h_h2d_func, + "host2device": d2h_h2d_func +} + + + + +def postprocess(op, file_list, dst_dir): + json_data_list = [json.load(open(file)) for file in file_list] + if not json_data_list: + logger.error(f"no data found in {file_list}") + return + + sku_name = json_data_list[0]["Device Info"] + sku_name = sku_name_mapping.get(sku_name, sku_name) + perf_datas = [] + for json_data in json_data_list: + if "Performance" not in json_data: + logger.error(f"no performance data") + continue + perf_data = json_data["Performance"] + if not perf_datas: + perf_datas = perf_data + else: + perf_datas.extend(perf_data) + + unique_name = get_unique_key(op, sku_name, "torch", "host") + unique_csv_file = f"{unique_name}.csv" + unique_csv_path = dst_dir / unique_csv_file + + with open(unique_csv_path, "w") as f: + writer = csv.writer(f) + writer.writerow(get_csv_headers(op)) + + for perf_data in perf_datas: + if op in post_func_map: + row = post_func_map[op](op, sku_name, "torch", "host", perf_data) + if row: + writer.writerow(row) + + + +def convert_src(src, dst): + logger.info(f"src: {src}") + logger.info(f"dst: {dst}") + + op_data_map = {} + for file in src.rglob("*.json"): + dir_name = file.parent.name + if dir_name == "gemv": + dir_name = "gemm" + if not dir_name in op_data_map: + op_data_map[dir_name] = [] + op_data_map[dir_name].append(file) + + for op, files in op_data_map.items(): + logger.info(f"op: {op}") + if op not in arguments_map and op != "gemv": + logger.error(f"invalid op: {op}") + continue + postprocess(op, files, dst) + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str, required=True) + + parser.add_argument("--output_dir", type=str, default="./temp") + parser.add_argument("--log_level", type=str, default="INFO") + args = parser.parse_args() + setup_logger(args.log_level) + + src_dir = pathlib.Path(args.src).absolute() + if not src_dir.exists(): + logger.error(f"{args.src} does not exist") + exit(1) + elif not src_dir.is_dir(): + logger.error(f"{args.src} is not a directory") + exit(1) + + output_dir = pathlib.Path(args.output_dir).absolute() + if not output_dir.exists(): + output_dir.mkdir(parents=True, exist_ok=True) + elif not output_dir.is_dir(): + logger.error(f"{args.output_dir} is not a directory") + exit(1) + + convert_src(src_dir, output_dir)