provide reports convert script.

hliuca · Aug 27, 2024 · 71234f5 · 71234f5
1 parent 7c49815
commit 71234f5
Showing 1 changed file with 386 additions and 0 deletions.
diff --git a/byte_micro_perf/scripts/convert.py b/byte_micro_perf/scripts/convert.py
@@ -0,0 +1,386 @@
+import sys
+import csv
+import json
+import pathlib
+import argparse
+import logging
+
+
+CUR_DIR = pathlib.Path(__file__).parent.absolute()
+PRJ_ROOT_DIR = CUR_DIR.parent
+
+sys.path.insert(0, str(PRJ_ROOT_DIR))
+
+
+unique_attrs = [
+    "op_name",
+    "sku_name",
+    "owner",
+    "perf_mode"
+]
+
+
+def get_unique_key(
+    op_name, 
+    sku_name, 
+    owner, 
+    perf_mode, 
+    *args,
+    **kwargs
+):
+    return ".".join([
+        sku_name,
+        owner,
+        op_name,
+        perf_mode
+    ]).replace(" ", "_")
+
+
+
+arguments_map = {
+    # 单目算子
+    # [batch, len] --> [batch, len]
+    "sin": ["dtype", "batch", "len"], 
+    "cos": ["dtype", "batch", "len"],
+    "exp": ["dtype", "batch", "len"],
+    "exponential": ["dtype", "batch", "len"], 
+    "silu": ["dtype", "batch", "len"],
+    "gelu": ["dtype", "batch", "len"],
+    "swiglu": ["dtype", "batch", "len"],
+    # float32: float32 --> float16/bfloat16
+    # float16: float16 --> float32
+    # bfloat16: bfloat16 --> float32
+    "cast": ["dtype", "batch", "len"],
+
+
+    # 双目算子
+    # [batch, len] (op) [batch, len] --> [batch, len]
+    "add": ["dtype", "batch", "len"], 
+    "mul": ["dtype", "batch", "len"], 
+    "sub": ["dtype", "batch", "len"], 
+    "div": ["dtype", "batch", "len"], 
+
+
+    # 规约算子
+    # [batch, len] --> [batch, len]
+    "layernorm": ["dtype", "batch", "len"], 
+    "softmax": ["dtype", "batch", "len"],
+    # [batch, len] --> [batch, 1]
+    "reduce_sum": ["dtype", "batch", "len"],
+    "reduce_min": ["dtype", "batch", "len"],
+    "reduce_max": ["dtype", "batch", "len"],
+
+    # 索引算子
+    # [batch, len] (op) [batch] --> [batch, len]
+    "index_add": ["dtype", "batch", "len"],
+    # [batch, len] --> [batch, len]
+    "sort": ["dtype", "batch", "len"], 
+    "unique": ["dtype", "batch", "len"], 
+    "gather": ["dtype", "batch", "len"],
+    "scatter": ["dtype", "batch", "len"],
+
+
+    # 矩阵算子
+    # [M, K] * [K, N] --> [M, N]
+    "gemm": ["dtype", "M", "N", "K"], 
+    # [batch, M, K] * [batch, K, N] --> [batch, M, N]
+    "batch_gemm": ["dtype", "batch", "M", "N", "K"],
+    # # group * {[M, K] * [K, N] = [M, N]
+    "group_gemm": ["dtype", "batch", "group", "M_str", "N", "K"], 
+
+
+    # 通信算子    
+    # [batch, len] --> [batch, len]
+    # tp_size split over batch
+    "broadcast": ["dtype", "tp_size", "batch", "len"], 
+    "allreduce": ["dtype", "tp_size", "batch", "len"], 
+    "allgather": ["dtype", "tp_size", "batch", "len"], 
+    "alltoall": ["dtype", "tp_size", "batch", "len"], 
+    "reducescatter": ["dtype", "tp_size", "batch", "len"], 
+    "p2p": ["dtype", "tp_size", "batch", "len"], 
+
+    "device2host": ["dtype", "batch", "len"],
+    "host2device": ["dtype", "batch", "len"]
+}
+
+
+target_attrs = [
+    # latency in us
+    "latency"
+]
+
+
+def get_csv_headers(op_name):
+    return unique_attrs + arguments_map.get(op_name, []) + target_attrs
+
+
+
+
+
+
+logger = logging.getLogger("bytemlperf_aeolus")
+
+def setup_logger(loglevel: str):
+    fmt = logging.Formatter(
+        fmt="%(asctime)s.%(msecs)03d %(filename)s:%(lineno)d [%(levelname)s]: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    handler = logging.StreamHandler(stream=sys.stdout)
+    handler.setFormatter(fmt)
+    logger.addHandler(handler)
+    logger.setLevel(loglevel.upper())
+    logger.propagate = False
+
+
+sku_name_mapping = {
+    "MLU590-M9": "MLU590 M9",
+    "MLU590-M9D": "MLU590 M9D",
+    "MLU590-M9DK": "MLU590 M9D",
+    "Iluvatar BI-V150": "BI-V150",
+    "NVIDIA A800-SXM4-80GB": "A800 80GB SXM", 
+    "NVIDIA H800": "H800 80GB SXM", 
+    "NVIDIA H20": "H20 96GB SXM", 
+    "Ascend910B2C": "Ascend910B2"
+}
+
+dtype_map = {
+    "float": "float32", 
+    "half": "float16", 
+    "int": "int32"
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def normal_ops_func(op, sku_name, frame, perf_mode, json_data):
+    if not json_data or "Error" in json_data:
+        return
+    dtype = json_data["Dtype"]
+    if dtype in dtype_map:
+        dtype = dtype_map[dtype]
+
+    batch = json_data["Tensor Shapes"][0][0]
+    len = json_data["Tensor Shapes"][0][1]
+    latency = json_data["Avg latency(us)"]
+
+    return [op, sku_name, frame, perf_mode, dtype, batch, len, latency]
+
+
+
+def gemm_func(op, sku_name, frame, perf_mode, json_data):
+    if not json_data or "Error" in json_data:
+        return
+    dtype = json_data["Dtype"]
+    if dtype in dtype_map:
+        dtype = dtype_map[dtype]
+
+    M = json_data["Tensor Shapes"][0][0]
+    K = json_data["Tensor Shapes"][0][1]
+    N = json_data["Tensor Shapes"][1][1]
+    latency = json_data["Avg latency(us)"]
+
+    return [op, sku_name, frame, perf_mode, dtype, M, N, K, latency]
+
+
+def batch_gemm_func(op, sku_name, frame, perf_mode, json_data):
+    if not json_data or "Error" in json_data:
+        return
+    dtype = json_data["Dtype"]
+    if dtype in dtype_map:
+        dtype = dtype_map[dtype]
+
+    batch_size = json_data["Tensor Shapes"][0][0]
+    M = json_data["Tensor Shapes"][0][1]
+    K = json_data["Tensor Shapes"][0][2]
+    N = json_data["Tensor Shapes"][1][2]
+    latency = json_data["Avg latency(us)"]
+
+    return [op, sku_name, frame, perf_mode, dtype, batch_size, M, N, K, latency]
+
+def group_gemm_func(op, sku_name, frame, perf_mode, json_data):
+    if not json_data or "Error" in json_data:
+        return
+    dtype = json_data["Dtype"]
+    if dtype in dtype_map:
+        dtype = dtype_map[dtype]
+
+    batch_size = json_data["Tensor Shapes"][0][0][0]
+    group = len(json_data["Tensor Shapes"])
+
+    M_list = [int(json_data["Tensor Shapes"][i][0][0]) // batch_size for i in range(group)]
+    M_list_str = "/".join([str(m) for m in M_list])
+    K = json_data["Tensor Shapes"][0][0][1]
+    N = json_data["Tensor Shapes"][0][1][1]
+    latency = json_data["Avg latency(us)"]
+
+    return [op, sku_name, frame, perf_mode, dtype, batch_size, group, M_list_str,N, K, latency]
+
+
+
+def ccl_ops_func(op, sku_name, frame, perf_mode, json_data):
+    if not json_data or "Error" in json_data:
+        return
+    dtype = json_data["Dtype"]
+    if dtype in dtype_map:
+        dtype = dtype_map[dtype]
+
+    tp_size = json_data["Group"]
+    batch = json_data["Tensor Shapes"][0][0]
+    len = json_data["Tensor Shapes"][0][1]
+    latency = json_data["Avg latency(us)"]
+
+    return [op, sku_name, frame, perf_mode, dtype, tp_size, batch, len, latency]
+
+def d2h_h2d_func(op, sku_name, frame, perf_mode, json_data):
+    if not json_data or "Error" in json_data:
+        return
+    dtype = json_data["Dtype"]
+    if dtype in dtype_map:
+        dtype = dtype_map[dtype]
+
+    batch = json_data["Tensor Shapes"][0][0]
+    len = json_data["Tensor Shapes"][0][1]
+    latency = json_data["Avg latency(us)"]
+
+    return [op, sku_name, frame, perf_mode, dtype, batch, len, latency]
+
+
+post_func_map = {
+    "sin": normal_ops_func,
+    "cos": normal_ops_func,
+    "exp": normal_ops_func,
+    "exponential": normal_ops_func,
+    "silu": normal_ops_func,
+    "gelu": normal_ops_func,
+    "swiglu": normal_ops_func,
+    "cast": normal_ops_func,
+
+    "add": normal_ops_func,
+    "mul": normal_ops_func,
+    "sub": normal_ops_func,
+    "div": normal_ops_func,
+
+    "layernorm": normal_ops_func,
+    "softmax": normal_ops_func,
+    "reduce_sum": normal_ops_func,
+    "reduce_min": normal_ops_func,
+    "reduce_max": normal_ops_func,
+
+    "index_add": normal_ops_func,
+    "sort": normal_ops_func,
+    "unique": normal_ops_func,
+    "gather": normal_ops_func,
+    "scatter": normal_ops_func,
+
+    "gemm": gemm_func,
+    "batch_gemm": batch_gemm_func,
+    "group_gemm": group_gemm_func,
+
+    "broadcast": ccl_ops_func,
+    "allreduce": ccl_ops_func,
+    "allgather": ccl_ops_func,
+    "alltoall": ccl_ops_func,
+    "reducescatter": ccl_ops_func,
+    "p2p": ccl_ops_func,
+
+    "device2host": d2h_h2d_func,
+    "host2device": d2h_h2d_func
+}
+
+
+
+
+def postprocess(op, file_list, dst_dir):
+    json_data_list = [json.load(open(file)) for file in file_list]
+    if not json_data_list:
+        logger.error(f"no data found in {file_list}")
+        return
+
+    sku_name = json_data_list[0]["Device Info"]
+    sku_name = sku_name_mapping.get(sku_name, sku_name)
+    perf_datas = []
+    for json_data in json_data_list:
+        if "Performance" not in json_data:
+            logger.error(f"no performance data")
+            continue
+        perf_data = json_data["Performance"]
+        if not perf_datas:
+            perf_datas = perf_data
+        else:
+            perf_datas.extend(perf_data)
+
+    unique_name = get_unique_key(op, sku_name, "torch", "host")
+    unique_csv_file = f"{unique_name}.csv"
+    unique_csv_path = dst_dir / unique_csv_file
+
+    with open(unique_csv_path, "w") as f:
+        writer = csv.writer(f)
+        writer.writerow(get_csv_headers(op))
+
+        for perf_data in perf_datas:
+            if op in post_func_map:
+                row = post_func_map[op](op, sku_name, "torch", "host", perf_data)
+                if row:
+                  writer.writerow(row)
+
+
+
+def convert_src(src, dst):
+    logger.info(f"src: {src}")
+    logger.info(f"dst: {dst}")
+
+    op_data_map = {}
+    for file in src.rglob("*.json"):
+        dir_name = file.parent.name
+        if dir_name == "gemv":
+            dir_name = "gemm"
+        if not dir_name in op_data_map:
+            op_data_map[dir_name] = []
+        op_data_map[dir_name].append(file)
+
+    for op, files in op_data_map.items():
+        logger.info(f"op: {op}")
+        if op not in arguments_map and op != "gemv":
+            logger.error(f"invalid op: {op}")
+            continue
+        postprocess(op, files, dst)
+
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--src", type=str, required=True)
+
+    parser.add_argument("--output_dir", type=str, default="./temp")
+    parser.add_argument("--log_level", type=str, default="INFO")
+    args = parser.parse_args()
+    setup_logger(args.log_level)
+
+    src_dir = pathlib.Path(args.src).absolute()
+    if not src_dir.exists():
+        logger.error(f"{args.src} does not exist")
+        exit(1)
+    elif not src_dir.is_dir():
+        logger.error(f"{args.src} is not a directory")
+        exit(1)
+
+    output_dir = pathlib.Path(args.output_dir).absolute()
+    if not output_dir.exists():
+        output_dir.mkdir(parents=True, exist_ok=True)
+    elif not output_dir.is_dir():
+        logger.error(f"{args.output_dir} is not a directory")
+        exit(1)
+
+    convert_src(src_dir, output_dir)