From a0c2611645cdb1a721ea79582d802ac6587403ba Mon Sep 17 00:00:00 2001 From: Mo Date: Fri, 6 Sep 2024 19:08:12 +0000 Subject: [PATCH] #9956: Support perf report for tracing runs --- tt_metal/impl/device/device.cpp | 9 + tt_metal/tools/profiler/process_ops_logs.py | 240 ++++++++++++++------ tt_metal/tools/profiler/tt_metal_tracy.hpp | 29 +++ ttnn/cpp/ttnn/operations/core/core.cpp | 7 + ttnn/tracy/__init__.py | 2 +- 5 files changed, 217 insertions(+), 70 deletions(-) create mode 100644 tt_metal/tools/profiler/tt_metal_tracy.hpp diff --git a/tt_metal/impl/device/device.cpp b/tt_metal/impl/device/device.cpp index 005189eeb80..286d6b670ea 100644 --- a/tt_metal/impl/device/device.cpp +++ b/tt_metal/impl/device/device.cpp @@ -21,6 +21,7 @@ #include "noc/noc_parameters.h" #include "tt_metal/impl/device/device_pool.hpp" #include "tt_metal/detail/persistent_kernel_cache.hpp" +#include "tt_metal/tools/profiler/tt_metal_tracy.hpp" #include "llrt/hal.hpp" namespace tt { @@ -3299,6 +3300,8 @@ bool Device::using_slow_dispatch() const { } void Device::begin_trace(const uint8_t cq_id, const uint32_t tid) { + ZoneScoped; + TracyTTMetalBeginTrace(this->id(), tid); TT_FATAL(this->trace_buffer_pool_.count(tid) == 0, "Trace already exists for tid {} on device", tid); TT_FATAL(!this->hw_command_queues_[cq_id]->tid.has_value(), "CQ {} is already being used for tracing tid {}", (uint32_t)cq_id, tid); this->MarkAllocationsSafe(); @@ -3308,6 +3311,8 @@ void Device::begin_trace(const uint8_t cq_id, const uint32_t tid) { } void Device::end_trace(const uint8_t cq_id, const uint32_t tid) { + ZoneScoped; + TracyTTMetalEndTrace(this->id(), tid); TT_FATAL(this->hw_command_queues_[cq_id]->tid == tid, "CQ {} is not being used for tracing tid {}", (uint32_t)cq_id, tid); TT_FATAL(this->trace_buffer_pool_.count(tid) > 0, "Trace instance {} must exist on device", tid); this->hw_command_queues_[cq_id]->record_end(); @@ -3324,6 +3329,8 @@ void Device::end_trace(const uint8_t cq_id, const uint32_t tid) { } void Device::replay_trace(const uint8_t cq_id, const uint32_t tid, const bool blocking) { + ZoneScoped; + TracyTTMetalReplayTrace(this->id(), tid); constexpr bool check = false; TT_FATAL(this->trace_buffer_pool_.count(tid) > 0, "Trace instance {} must exist on device" , tid); if constexpr (check) { @@ -3337,6 +3344,8 @@ void Device::replay_trace(const uint8_t cq_id, const uint32_t tid, const bool bl } void Device::release_trace(const uint32_t tid) { + ZoneScoped; + TracyTTMetalReleaseTrace(this->id(), tid); uint32_t erased = this->trace_buffer_pool_.erase(tid); // Only enable allocations once all captured traces are released if (this->trace_buffer_pool_.empty()) { diff --git a/tt_metal/tools/profiler/process_ops_logs.py b/tt_metal/tools/profiler/process_ops_logs.py index 37f1f0036be..ced75eadc8d 100755 --- a/tt_metal/tools/profiler/process_ops_logs.py +++ b/tt_metal/tools/profiler/process_ops_logs.py @@ -13,6 +13,7 @@ import json import yaml from datetime import datetime +import copy import click from loguru import logger @@ -62,6 +63,9 @@ "DEVICE COMPUTE CB RESERVE BACK [ns]", "INPUTS", "OUTPUTS", + "TRACE ID", + "TRACE RUNTIME ID", + "COMPUTE KERNEL PATH", "COMPUTE KERNEL SOURCE", "COMPUTE KERNEL HASH", "DATA MOVEMENT KERNEL SOURCE", @@ -94,37 +98,72 @@ def import_tracy_op_logs(logFolder): with open(tracyOpDataLog, "r", newline="") as csvFile: opDataDicts = csv.DictReader(csvFile, delimiter=";", quotechar="`") opsData = [] + traceIDs = {} + traceReplays = {} for opDataDict in opDataDicts: opDataStr = opDataDict["MessageName"] opDataTime = opDataDict["total_ns"] - if "TT_DNN" in opDataStr: - tmpStrs = opDataStr.split(" ->\n", 1) - opData = {} - if len(tmpStrs) > 1: # uncached device op, host op, or fallback op - jsonStr = tmpStrs[-1] - opData = json.loads(jsonStr) - if "op_hash" in opData.keys(): - assert "device_id" in opData.keys() - deviceID = int(opData["device_id"]) - opHash = int(opData["op_hash"]) - if deviceID in cached_ops.keys(): - cached_ops[deviceID][opHash] = opData.copy() + if "TT_DNN" in opDataStr or "TT_METAL" in opDataStr: + if "OP" in opDataStr: + tmpStrs = opDataStr.split(" ->\n", 1) + opData = {} + if len(tmpStrs) > 1: # uncached device op, host op, or fallback op + jsonStr = tmpStrs[-1] + opData = json.loads(jsonStr) + if "op_hash" in opData.keys(): + assert "device_id" in opData.keys() + deviceID = int(opData["device_id"]) + opHash = int(opData["op_hash"]) + if deviceID in cached_ops.keys(): + cached_ops[deviceID][opHash] = opData.copy() + else: + cached_ops[deviceID] = {opHash: opData.copy()} + del cached_ops[deviceID][opHash]["global_call_count"] + opData["trace_id"] = None + if deviceID in traceIDs: + opData["trace_id"] = traceIDs[deviceID] + else: # cached device op + opDataList = opDataStr.split(":", 1)[-1].split(",") + assert len(opDataList) > 3, "Wrong cached op info format" + opCode = opDataList[0].strip() + opHash = int(opDataList[1]) + deviceID = int(opDataList[2]) + opID = int(opDataList[3]) + assert deviceID in cached_ops.keys(), "Expected hashed op info is not found" + assert opHash in cached_ops[deviceID].keys(), "Expected hashed op info is not found" + opData = cached_ops[deviceID][opHash].copy() + opData["global_call_count"] = opID + opData["trace_id"] = None + if deviceID in traceIDs: + opData["trace_id"] = traceIDs[deviceID] + opData["tracy_time"] = opDataTime + opsData.append(opData) + elif "TRACE" in opDataStr: + IDs = opDataStr.split(":")[-1].strip().split(",") + assert len(IDs) == 2, ( + "Wrong number of IDs is provided in trace message. " + "Device and trace are the two IDs that should be provided. " + f"But IDs {IDs} were provided" + ) + deviceID = int(IDs[0].strip()) + traceID = int(IDs[1].strip()) + if "BEGIN" in opDataStr: + traceIDs[deviceID] = traceID + elif "END" in opDataStr: + assert traceIDs[deviceID] == traceID, ( + f"Wrong trace ID, device {deviceID} should finish on trace ID " + f"{traceIDs[deviceID]} but it is finishing on trace ID {traceID}" + ) + traceIDs[deviceID] = None + elif "REPLAY" in opDataStr: + replayIDTime = opDataTime + if deviceID in traceReplays: + if traceID in traceReplays[deviceID]: + traceReplays[deviceID][traceID].append(replayIDTime) + else: + traceReplays[deviceID][traceID] = [replayIDTime] else: - cached_ops[deviceID] = {opHash: opData.copy()} - del cached_ops[deviceID][opHash]["global_call_count"] - else: # cached device op - opDataList = opDataStr.split(":", 1)[-1].split(",") - assert len(opDataList) > 3, "Wrong cached op info format" - opCode = opDataList[0].strip() - opHash = int(opDataList[1]) - deviceID = int(opDataList[2]) - opID = int(opDataList[3]) - assert deviceID in cached_ops.keys(), "Expected hashed op info is not found" - assert opHash in cached_ops[deviceID].keys(), "Expected hashed op info is not found" - opData = cached_ops[deviceID][opHash].copy() - opData["global_call_count"] = opID - opData["tracy_time"] = opDataTime - opsData.append(opData) + traceReplays[deviceID] = {traceID: [replayIDTime]} if "TT_SIGNPOST" in opDataStr: signpostsCount += 1 @@ -135,7 +174,7 @@ def import_tracy_op_logs(logFolder): with open(tracyOpTimesLog, "r") as csvFile: csvReader = csv.DictReader(csvFile) for op in csvReader: - if "TT_DNN" in op["name"]: + if "TT_DNN" in op["name"] or "TT_METAL" in op["name"]: opID = int(op["zone_text"].split(":")[-1]) assert opID in ops.keys(), f"Op time for op {opID} must present" ops[opID]["host_time"] = op @@ -154,13 +193,14 @@ def import_tracy_op_logs(logFolder): else: ops[parentOpID]["child_calls"] = {op["name"]: int(op["exec_time_ns"])} - return ops, signposts + return ops, signposts, traceReplays # Generate a map of OP reference list per device. def get_device_op_data(ops): logger.info(f"Getting device ops") deviceOps = {} + hasTraceRuns = False for opID, opData in ops.items(): if "device_id" in opData.keys(): deviceID = opData["device_id"] @@ -168,6 +208,8 @@ def get_device_op_data(ops): deviceOps[deviceID] = [opData] else: deviceOps[deviceID].append(opData) + if "trace_id" in opData.keys() and opData["trace_id"] is not None: + hasTraceRuns = True def device_ops_compare(op): return int(op["global_call_count"]) @@ -175,24 +217,24 @@ def device_ops_compare(op): for deviceID in deviceOps: deviceOps[deviceID].sort(key=device_ops_compare) - return deviceOps + return deviceOps, hasTraceRuns def device_log_ops_compare(op): - if ( - "timeseries" in op - and len(op["timeseries"]) > 0 - and len(op["timeseries"][0]) > 0 - and "run_host_id" in op["timeseries"][0][0] - ): - return int(op["timeseries"][0][0]["run_host_id"]) + if "timeseries" in op and len(op["timeseries"]) > 0 and len(op["timeseries"][0]) > 1: + return int(op["timeseries"][0][1]) else: return 0 # Append device data to device ops and return the list of mapped device op ref list -def append_device_data(ops, logFolder): - deviceOps = get_device_op_data(ops) +def append_device_data(ops, traceReplays, logFolder): + traceReplayCounts = {} + for deviceID in traceReplays: + traceReplayCounts[deviceID] = {} + for traceID in traceReplays[deviceID]: + traceReplayCounts[deviceID][traceID] = len(traceReplays[deviceID][traceID]) + devicesOps, hasTraceRuns = get_device_op_data(ops) logger.info(f"Appending device data") deviceTimesLog = os.path.join(logFolder, PROFILER_DEVICE_SIDE_LOG) if os.path.isfile(deviceTimesLog): @@ -200,14 +242,56 @@ def append_device_data(ops, logFolder): setup.deviceInputLog = deviceTimesLog deviceData = import_log_run_stats(setup) freq = deviceData["deviceInfo"]["freq"] - for device in deviceOps: + for device in devicesOps: assert device in deviceData["devices"].keys() deviceOpsTime = deviceData["devices"][device]["cores"]["DEVICE"]["riscs"]["TENSIX"]["ops"] deviceOpsTime.sort(key=device_log_ops_compare) - if len(deviceOps[device]) != len(deviceOpsTime): + if hasTraceRuns: + generatedHostData = [] + opIDHostDataDict = {} + for deviceOp in devicesOps[device]: + opID = deviceOp["global_call_count"] + assert ( + opID not in opIDHostDataDict + ), f"Host op ID cannot be repeated: op ID {opID} was reported twice by the host" + opIDHostDataDict[opID] = copy.deepcopy(deviceOp) + + traceOps = {} + for deviceOpTime in deviceOpsTime: + if len(deviceOpTime["timeseries"]) > 0: + timeID, ts, statData, risc, core = deviceOpTime["timeseries"][0] + assert "run_host_id" in timeID.keys(), "Device op ID missing: Device data must provide op ID" + deviceOpID = timeID["run_host_id"] + assert ( + deviceOpID in opIDHostDataDict + ), f"Device op ID not present: Device op ID {deviceOpID} not present in host data" + traceID = opIDHostDataDict[deviceOpID]["trace_id"] + if traceID is not None: + if device in traceOps: + if traceID in traceOps[device]: + if deviceOpID in traceOps[device][traceID]: + traceReplays[device][traceID].pop(0) + traceOps[device][traceID] = set([deviceOpID]) + else: + traceOps[device][traceID].add(deviceOpID) + else: + traceOps[device][traceID] = set([deviceOpID]) + else: + traceOps[device] = {traceID: set([deviceOpID])} + assert ( + len(traceReplays[device][traceID]) > 0 + ), "Wrong trace replay count: Device has more ops than trace replay issued commands" + opIDHostDataDict[deviceOpID]["tracy_time"] = traceReplays[device][traceID][0] + opIDHostDataDict[deviceOpID]["trace_runtime_id"] = ( + traceReplayCounts[device][traceID] - len(traceReplays[device][traceID]) + 1 + ) + generatedHostData.append(copy.deepcopy(opIDHostDataDict[deviceOpID])) + devicesOps[device] = generatedHostData + + if len(devicesOps[device]) != len(deviceOpsTime): deviceOPId = None hostOPId = None - for deviceOp, deviceOpTime in zip(deviceOps[device], deviceOpsTime): + for deviceOp, deviceOpTime in zip(devicesOps[device], deviceOpsTime): if len(deviceOpTime["timeseries"]) > 0: timeID, ts, statData, risc, core = deviceOpTime["timeseries"][0] if "zone_name" in timeID.keys() and "FW" in timeID["zone_name"]: @@ -218,14 +302,16 @@ def append_device_data(ops, logFolder): break if deviceOPId and hostOPId: - assert ( - False - ), f"Device data mismatch: Expected {len(deviceOps[device])} but received {len(deviceOpsTime)} ops on device {device}. Device is showing op ID {deviceOPId} when host is showing op ID {hostOPId}" + assert False, ( + f"Device data mismatch: Expected {len(devicesOps[device])} " + f"but received {len(deviceOpsTime)} ops on device {device}. " + f"Device is showing op ID {deviceOPId} when host is showing op ID {hostOPId}" + ) else: assert ( False ), f"Device data mismatch: Expected {len(deviceOps[device])} but received {len(deviceOpsTime)} ops on device {device}" - for deviceOp, deviceOpTime in zip(deviceOps[device], deviceOpsTime): + for deviceOp, deviceOpTime in zip(devicesOps[device], deviceOpsTime): cores = set() for timeID, ts, statData, risc, core in deviceOpTime["timeseries"]: if "zone_name" in timeID.keys() and "FW" in timeID["zone_name"]: @@ -242,7 +328,20 @@ def append_device_data(ops, logFolder): for analysis, data in deviceOp["device_time"].items(): for sample in data: sample["duration_ns"] = sample["duration_cycles"] * 1000 / freq - return deviceOps + traceOps = {} + + # Tag trace ops with a UID + for device in devicesOps: + for deviceOp in devicesOps[device]: + if "trace_runtime_id" in deviceOp.keys(): + deviceOp["global_call_count"] = ( + deviceOp["global_call_count"] | deviceOp["trace_runtime_id"] << 16 + ) + traceOps[deviceOp["global_call_count"]] = deviceOp + else: + # Update host reported device op with device populated version + ops[deviceOp["global_call_count"]] = deviceOp + return devicesOps, traceOps def get_device_data_generate_report( @@ -347,7 +446,7 @@ def get_device_data_generate_report( return rowDicts -def generate_reports(ops, deviceOps, signposts, logFolder, outputFolder, date, nameAppend): +def generate_reports(ops, deviceOps, traceOps, signposts, logFolder, outputFolder, date, nameAppend): logger.info(f"OPs' perf analysis is finished! Generating reports ...") outFolder = PROFILER_OUTPUT_DIR if outputFolder: @@ -373,18 +472,6 @@ def generate_reports(ops, deviceOps, signposts, logFolder, outputFolder, date, n if os.path.isfile(f"{logFolder / PROFILER_DEVICE_SIDE_LOG}"): os.system(f"cp {logFolder / PROFILER_DEVICE_SIDE_LOG} {outFolder}") - # logger.info(f"Generating OPs yaml") - # allOpsYAMLPath = os.path.join(outFolder, f"{name}_all_ops.yaml") - # with open(allOpsYAMLPath, "w") as allOpsYAML: - # yaml.safe_dump(ops, allOpsYAML, default_flow_style=False) - # logger.info(f"OPs yaml generated at: {allOpsYAMLPath}") - - # logger.info(f"Generating Device OPs yaml") - # deviceOpsYAMLPath = os.path.join(outFolder, f"{name}_devices_ops.yaml") - # with open(deviceOpsYAMLPath, "w") as deviceOpsYAML: - # yaml.safe_dump(deviceOps, deviceOpsYAML, default_flow_style=False) - # logger.info(f"Device OPs yaml generated at: {deviceOpsYAMLPath}") - logger.info(f"Generating OPs CSV") allOpsCSVPath = os.path.join(outFolder, f"{name}.csv") with open(allOpsCSVPath, "w") as allOpsCSV: @@ -453,19 +540,26 @@ def row_compare(row): if type(row) is str and "sp" in row: ret = signposts[row]["tracy_time"] elif type(row) is int: - ret = ops[row]["tracy_time"] + if row > ((1 << 16) - 1): + ret = traceOps[row]["tracy_time"] + else: + ret = ops[row]["host_time"]["ns_since_start"] ret = int(ret) return ret - rowKeys = list(ops.keys()) + list(signposts.keys()) + rowKeys = list(ops.keys()) + list(traceOps.keys()) + list(signposts.keys()) rowKeys.sort(key=row_compare) childCallKeys = set() for row in rowKeys: if type(row) is int: - op = ops[row] - if "child_calls" in op.keys(): - for childCall in op["child_calls"]: + if row > ((1 << 16) - 1): + opData = traceOps[row] + else: + opData = ops[row] + if "child_calls" in opData.keys(): + for childCall in opData["child_calls"]: childCallKeys.add(f"{childCall}_TT_HOST_FUNC [ns]") + for row in rowKeys: rowDict = {} if type(row) is str and "sp" in row: @@ -477,7 +571,15 @@ def row_compare(row): rowDict["HOST START TS"] = int(signposts[row]["tracy_time"]) elif type(row) is int: op = row - opData = ops[op] + if op > ((1 << 16) - 1): + opData = traceOps[op] + opData["global_call_count"] = ((1 << 16) - 1) & op + else: + opData = ops[op] + opData["trace_runtime_id"] = "" + if "trac_id" not in opData.keys() or opData["trace_id"] is None: + opData["trace_id"] = "" + for field, fieldData in opData.items(): headerField = csv_header_format(field) if headerField in OPS_CSV_HEADER: @@ -572,11 +674,11 @@ def process_ops(output_folder, name_append, date): logFolder = generate_logs_folder(output_folder) reportFolder = generate_reports_folder(output_folder) - ops, signposts = import_tracy_op_logs(logFolder) + ops, signposts, traceReplays = import_tracy_op_logs(logFolder) if ops: - deviceOps = append_device_data(ops, logFolder) - generate_reports(ops, deviceOps, signposts, logFolder, reportFolder, date, name_append) + deviceOps, traceOps = append_device_data(ops, traceReplays, logFolder) + generate_reports(ops, deviceOps, traceOps, signposts, logFolder, reportFolder, date, name_append) else: deviceOps = get_device_data_generate_report(logFolder, reportFolder, date, name_append) diff --git a/tt_metal/tools/profiler/tt_metal_tracy.hpp b/tt_metal/tools/profiler/tt_metal_tracy.hpp new file mode 100644 index 00000000000..0b0f94d19ac --- /dev/null +++ b/tt_metal/tools/profiler/tt_metal_tracy.hpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#if defined(TRACY_ENABLE) + +#define TracyTTMetalBeginTrace( device_id, trace_id ) \ + std::string trace_message = fmt::format("`TT_METAL_TRACE_BEGIN: {}, {}`", device_id, trace_id); \ + TracyMessage(trace_message.c_str(), trace_message.size()); + +#define TracyTTMetalEndTrace( device_id, trace_id ) \ + std::string trace_message = fmt::format("`TT_METAL_TRACE_END: {}, {}`", device_id, trace_id); \ + TracyMessage(trace_message.c_str(), trace_message.size()); + +#define TracyTTMetalReplayTrace( device_id, trace_id ) \ + std::string trace_message = fmt::format("`TT_METAL_TRACE_REPLAY: {}, {}`", device_id, trace_id); \ + TracyMessage(trace_message.c_str(), trace_message.size()); + +#define TracyTTMetalReleaseTrace( device_id, trace_id ) \ + std::string trace_message = fmt::format("`TT_METAL_TRACE_RELEASE: {}, {}`", device_id, trace_id); \ + TracyMessage(trace_message.c_str(), trace_message.size()); +#else + +#define TracyTTMetalBeginTrace( device_id, trace_id ) +#define TracyTTMetalEndTrace( device_id, trace_id ) +#define TracyTTMetalReplayTrace( device_id, trace_id ) +#define TracyTTMetalReleaseTrace( device_id, trace_id ) + +#endif diff --git a/ttnn/cpp/ttnn/operations/core/core.cpp b/ttnn/cpp/ttnn/operations/core/core.cpp index b0523f77339..dba2edf328b 100644 --- a/ttnn/cpp/ttnn/operations/core/core.cpp +++ b/ttnn/cpp/ttnn/operations/core/core.cpp @@ -96,16 +96,19 @@ Tensor reallocate(const Tensor& input_tensor, const std::optional& // Trace APIs - Single Device uint32_t begin_trace_capture(Device* device, const uint8_t cq_id) { + ZoneScoped; uint32_t tid = Trace::next_id(); device->push_work([device, cq_id, tid]() mutable { device->begin_trace(cq_id, tid); }); return tid; } void end_trace_capture(Device* device, const uint32_t tid, const uint8_t cq_id) { + ZoneScoped; device->push_work([device, cq_id, tid]() mutable { device->end_trace(cq_id, tid); }); } void execute_trace(Device* device, const uint32_t tid, const uint8_t cq_id, bool blocking) { + ZoneScoped; // If blocking, ensure that worker thread blocks until trace is completed device->push_work([device, cq_id, tid, blocking]() mutable { device->replay_trace(cq_id, tid, blocking); }); // If blocking, wait until worker threads have completed @@ -120,6 +123,7 @@ void release_trace(Device* device, const uint32_t tid) { // Trace APIs - Multi Device uint32_t begin_trace_capture(MeshDevice* device, const uint8_t cq_id) { + ZoneScoped; auto workers = device->get_devices(); uint32_t tid = Trace::next_id(); for (auto& worker : workers) { @@ -129,6 +133,7 @@ uint32_t begin_trace_capture(MeshDevice* device, const uint8_t cq_id) { } void end_trace_capture(MeshDevice* device, const uint32_t tid, const uint8_t cq_id) { + ZoneScoped; auto workers = device->get_devices(); for (auto& worker : workers) { worker->push_work([worker, cq_id, tid]() mutable { worker->end_trace(cq_id, tid); }); @@ -136,6 +141,7 @@ void end_trace_capture(MeshDevice* device, const uint32_t tid, const uint8_t cq_ } void execute_trace(MeshDevice* device, const uint32_t tid, const uint8_t cq_id, bool blocking) { + ZoneScoped; auto workers = device->get_devices(); // If blocking, ensure that each worker thread blocks until device-local trace is completed for (auto& worker : workers) { @@ -150,6 +156,7 @@ void execute_trace(MeshDevice* device, const uint32_t tid, const uint8_t cq_id, } void release_trace(MeshDevice* device, const uint32_t tid) { + ZoneScoped; auto workers = device->get_devices(); for (auto& worker : workers) { worker->push_work([worker, tid]() mutable { worker->release_trace(tid); }); diff --git a/ttnn/tracy/__init__.py b/ttnn/tracy/__init__.py index 4c97dbe866a..5e6315139f1 100644 --- a/ttnn/tracy/__init__.py +++ b/ttnn/tracy/__init__.py @@ -144,7 +144,7 @@ def generate_report(outputFolder, nameAppend, childCalls): if childCallsList: childCallStr = f"-x {','.join(childCallsList)}" subprocess.run( - f"{PROFILER_BIN_DIR / TRACY_CSVEXPROT_TOOL} -u -p TT_DNN {childCallStr} {logsFolder / TRACY_FILE_NAME}", + f"{PROFILER_BIN_DIR / TRACY_CSVEXPROT_TOOL} -u -p TT_ {childCallStr} {logsFolder / TRACY_FILE_NAME}", shell=True, check=True, stdout=csvFile,