diff --git a/scripts/build_scripts/build_with_profiler_opt.sh b/scripts/build_scripts/build_with_profiler_opt.sh index 0b709ea0eb8..2cb9e1d94a6 100755 --- a/scripts/build_scripts/build_with_profiler_opt.sh +++ b/scripts/build_scripts/build_with_profiler_opt.sh @@ -11,10 +11,15 @@ if [[ -z "$ARCH_NAME" ]]; then exit 1 fi -remove_default_log_locations - ENABLE_TRACY=1 ENABLE_PROFILER=1 cmake -B build -G Ninja -DCMAKE_CXX_COMPILER=clang++-17 -cmake --build build --target clean + +if [[ $1 == "NO_CLEAN" ]]; then + cmake --build build +else + remove_default_log_locations + cmake --build build --target clean +fi + cmake --build build --target install cmake --build build --target programming_examples PYTHON_ENV_DIR=$(pwd)/build/python_env ./create_venv.sh diff --git a/tt_eager/tracy.py b/tt_eager/tracy.py index 1e441e51908..d92a5c05e83 100644 --- a/tt_eager/tracy.py +++ b/tt_eager/tracy.py @@ -29,6 +29,8 @@ import tracy_state +DEFAULT_CHILD_CALLS = ["CompileProgram", "HWCommandQueue_write_buffer"] + def signpost(header, message=None): from tracy_tt_lib import tracy_message @@ -118,7 +120,7 @@ def run_report_setup(verbose, port): return toolsReady, captureProcess -def generate_report(outFolder, nameAppend): +def generate_report(outFolder, nameAppend, childCalls): tracyOutFile = PROFILER_LOGS_DIR / TRACY_FILE_NAME timeOut = 15 timeCount = 0 @@ -134,8 +136,14 @@ def generate_report(outFolder, nameAppend): timeCount += 1 time.sleep(1) with open(PROFILER_LOGS_DIR / TRACY_OPS_TIMES_FILE_NAME, "w") as csvFile: + childCallStr = "" + childCallsList = DEFAULT_CHILD_CALLS + if childCalls: + childCallsList = list(set(childCalls + DEFAULT_CHILD_CALLS)) + if childCallsList: + childCallStr = f"-x {','.join(childCallsList)}" subprocess.run( - f"{PROFILER_BIN_DIR / TRACY_CSVEXPROT_TOOL} -u -f TT_DNN {PROFILER_LOGS_DIR / TRACY_FILE_NAME}", + f"{PROFILER_BIN_DIR / TRACY_CSVEXPROT_TOOL} -u -p TT_DNN {childCallStr} {PROFILER_LOGS_DIR / TRACY_FILE_NAME}", shell=True, check=True, stdout=csvFile, @@ -173,6 +181,10 @@ def get_available_port(): return None +def split_comma_list(option, opt, value, parser): + setattr(parser.values, option.dest, value.split(",")) + + def main(): from optparse import OptionParser @@ -208,6 +220,13 @@ def main(): help="Show full op info for cached ops as well", default=True, ) + parser.add_option( + "--child-functions", + type="string", + help="Comma separated list of child function to have their duration included for parent OPs", + action="callback", + callback=split_comma_list, + ) if not sys.argv[1:]: parser.print_usage() @@ -306,7 +325,7 @@ def signal_handler(sig, frame): try: captureProcess.communicate(timeout=15) - generate_report(options.output_folder, options.name_append) + generate_report(options.output_folder, options.name_append, options.child_functions) except subprocess.TimeoutExpired as e: captureProcess.terminate() captureProcess.communicate() diff --git a/tt_metal/third_party/tracy b/tt_metal/third_party/tracy index a1fe17124dd..555d4cb56cf 160000 --- a/tt_metal/third_party/tracy +++ b/tt_metal/third_party/tracy @@ -1 +1 @@ -Subproject commit a1fe17124ddf8f0ade677a9cca88b4f6625dcf36 +Subproject commit 555d4cb56cf07c1cb651da16e2addb0d6304a5b1 diff --git a/tt_metal/tools/profiler/process_ops_logs.py b/tt_metal/tools/profiler/process_ops_logs.py index 0f14db45b1c..231be21a787 100755 --- a/tt_metal/tools/profiler/process_ops_logs.py +++ b/tt_metal/tools/profiler/process_ops_logs.py @@ -126,9 +126,24 @@ def import_tracy_op_logs(): with open(tracyOpTimesLog, "r") as csvFile: csvReader = csv.DictReader(csvFile) for op in csvReader: - opID = int(op["zone_text"].split(":")[-1]) - assert opID in ops.keys(), f"Op time for op {opID} must present" - ops[opID]["host_time"] = op + if "TT_DNN" in op["name"]: + opID = int(op["zone_text"].split(":")[-1]) + assert opID in ops.keys(), f"Op time for op {opID} must present" + ops[opID]["host_time"] = op + + with open(tracyOpTimesLog, "r") as csvFile: + csvReader = csv.DictReader(csvFile) + for op in csvReader: + if op["special_parent_text"] and "id:" in op["special_parent_text"]: + parentOpID = int(op["special_parent_text"].split(":")[-1]) + + if "child_calls" in ops[parentOpID].keys(): + if op["name"] in ops[parentOpID]["child_calls"].keys(): + ops[parentOpID]["child_calls"][op["name"]] += int(op["exec_time_ns"]) + else: + ops[parentOpID]["child_calls"][op["name"]] = int(op["exec_time_ns"]) + else: + ops[parentOpID]["child_calls"] = {op["name"]: int(op["exec_time_ns"])} return ops, signposts @@ -300,6 +315,13 @@ def row_compare(row): rowKeys = list(ops.keys()) + list(signposts.keys()) rowKeys.sort(key=row_compare) + childCallKeys = set() + for row in rowKeys: + if type(row) is int: + op = ops[row] + if "child_calls" in op.keys(): + for childCall in op["child_calls"]: + childCallKeys.add(f"{childCall}_TT_HOST_FUNC [ns]") for row in rowKeys: rowDict = {} if type(row) is str and "sp" in row: @@ -360,6 +382,11 @@ def row_compare(row): rowDict["OP TO OP LATENCY [ns]"] = 0 devicePreOpTime[deviceID] = analysisData[0]["end_cycle"] + if "child_calls" in opData.keys(): + for childCall, duration in opData["child_calls"].items(): + headerField = f"{childCall}_TT_HOST_FUNC [ns]" + rowDict[headerField] = f"{duration:.0f}" + assert "input_tensors" in opData.keys(), "Ops must have input tensors" if "optional_input_tensors" in opData.keys(): add_io_data(opData["input_tensors"] + opData["optional_input_tensors"], "INPUT") @@ -384,6 +411,7 @@ def row_compare(row): + tensorCSVData["INPUT"]["headers"] + tensorCSVData["OUTPUT"]["headers"] + OPS_CSV_HEADER[ioHeaderIndex + 2 :] + + sorted(list(childCallKeys)) ) writer = csv.DictWriter(allOpsCSV, fieldnames=allHeaders) writer.writeheader()