diff --git a/scripts/build_scripts/build_with_profiler_opt.sh b/scripts/build_scripts/build_with_profiler_opt.sh
index 0b709ea0eb8..2cb9e1d94a6 100755
--- a/scripts/build_scripts/build_with_profiler_opt.sh
+++ b/scripts/build_scripts/build_with_profiler_opt.sh
@@ -11,10 +11,15 @@ if [[ -z "$ARCH_NAME" ]]; then
     exit 1
 fi
 
-remove_default_log_locations
-
 ENABLE_TRACY=1 ENABLE_PROFILER=1 cmake -B build -G Ninja -DCMAKE_CXX_COMPILER=clang++-17
-cmake --build build --target clean
+
+if [[ $1 == "NO_CLEAN" ]]; then
+    cmake --build build
+else
+    remove_default_log_locations
+    cmake --build build --target clean
+fi
+
 cmake --build build --target install
 cmake --build build --target programming_examples
 PYTHON_ENV_DIR=$(pwd)/build/python_env ./create_venv.sh
diff --git a/tt_eager/tracy.py b/tt_eager/tracy.py
index 1e441e51908..d92a5c05e83 100644
--- a/tt_eager/tracy.py
+++ b/tt_eager/tracy.py
@@ -29,6 +29,8 @@
 
 import tracy_state
 
+DEFAULT_CHILD_CALLS = ["CompileProgram", "HWCommandQueue_write_buffer"]
+
 
 def signpost(header, message=None):
     from tracy_tt_lib import tracy_message
@@ -118,7 +120,7 @@ def run_report_setup(verbose, port):
     return toolsReady, captureProcess
 
 
-def generate_report(outFolder, nameAppend):
+def generate_report(outFolder, nameAppend, childCalls):
     tracyOutFile = PROFILER_LOGS_DIR / TRACY_FILE_NAME
     timeOut = 15
     timeCount = 0
@@ -134,8 +136,14 @@ def generate_report(outFolder, nameAppend):
         timeCount += 1
         time.sleep(1)
     with open(PROFILER_LOGS_DIR / TRACY_OPS_TIMES_FILE_NAME, "w") as csvFile:
+        childCallStr = ""
+        childCallsList = DEFAULT_CHILD_CALLS
+        if childCalls:
+            childCallsList = list(set(childCalls + DEFAULT_CHILD_CALLS))
+        if childCallsList:
+            childCallStr = f"-x {','.join(childCallsList)}"
         subprocess.run(
-            f"{PROFILER_BIN_DIR / TRACY_CSVEXPROT_TOOL} -u -f TT_DNN {PROFILER_LOGS_DIR / TRACY_FILE_NAME}",
+            f"{PROFILER_BIN_DIR / TRACY_CSVEXPROT_TOOL} -u -p TT_DNN {childCallStr} {PROFILER_LOGS_DIR / TRACY_FILE_NAME}",
             shell=True,
             check=True,
             stdout=csvFile,
@@ -173,6 +181,10 @@ def get_available_port():
     return None
 
 
+def split_comma_list(option, opt, value, parser):
+    setattr(parser.values, option.dest, value.split(","))
+
+
 def main():
     from optparse import OptionParser
 
@@ -208,6 +220,13 @@ def main():
         help="Show full op info for cached ops as well",
         default=True,
     )
+    parser.add_option(
+        "--child-functions",
+        type="string",
+        help="Comma separated list of child function to have their duration included for parent OPs",
+        action="callback",
+        callback=split_comma_list,
+    )
 
     if not sys.argv[1:]:
         parser.print_usage()
@@ -306,7 +325,7 @@ def signal_handler(sig, frame):
 
             try:
                 captureProcess.communicate(timeout=15)
-                generate_report(options.output_folder, options.name_append)
+                generate_report(options.output_folder, options.name_append, options.child_functions)
             except subprocess.TimeoutExpired as e:
                 captureProcess.terminate()
                 captureProcess.communicate()
diff --git a/tt_metal/third_party/tracy b/tt_metal/third_party/tracy
index a1fe17124dd..555d4cb56cf 160000
--- a/tt_metal/third_party/tracy
+++ b/tt_metal/third_party/tracy
@@ -1 +1 @@
-Subproject commit a1fe17124ddf8f0ade677a9cca88b4f6625dcf36
+Subproject commit 555d4cb56cf07c1cb651da16e2addb0d6304a5b1
diff --git a/tt_metal/tools/profiler/process_ops_logs.py b/tt_metal/tools/profiler/process_ops_logs.py
index 0f14db45b1c..231be21a787 100755
--- a/tt_metal/tools/profiler/process_ops_logs.py
+++ b/tt_metal/tools/profiler/process_ops_logs.py
@@ -126,9 +126,24 @@ def import_tracy_op_logs():
     with open(tracyOpTimesLog, "r") as csvFile:
         csvReader = csv.DictReader(csvFile)
         for op in csvReader:
-            opID = int(op["zone_text"].split(":")[-1])
-            assert opID in ops.keys(), f"Op time for op {opID} must present"
-            ops[opID]["host_time"] = op
+            if "TT_DNN" in op["name"]:
+                opID = int(op["zone_text"].split(":")[-1])
+                assert opID in ops.keys(), f"Op time for op {opID} must present"
+                ops[opID]["host_time"] = op
+
+    with open(tracyOpTimesLog, "r") as csvFile:
+        csvReader = csv.DictReader(csvFile)
+        for op in csvReader:
+            if op["special_parent_text"] and "id:" in op["special_parent_text"]:
+                parentOpID = int(op["special_parent_text"].split(":")[-1])
+
+                if "child_calls" in ops[parentOpID].keys():
+                    if op["name"] in ops[parentOpID]["child_calls"].keys():
+                        ops[parentOpID]["child_calls"][op["name"]] += int(op["exec_time_ns"])
+                    else:
+                        ops[parentOpID]["child_calls"][op["name"]] = int(op["exec_time_ns"])
+                else:
+                    ops[parentOpID]["child_calls"] = {op["name"]: int(op["exec_time_ns"])}
 
     return ops, signposts
 
@@ -300,6 +315,13 @@ def row_compare(row):
 
         rowKeys = list(ops.keys()) + list(signposts.keys())
         rowKeys.sort(key=row_compare)
+        childCallKeys = set()
+        for row in rowKeys:
+            if type(row) is int:
+                op = ops[row]
+                if "child_calls" in op.keys():
+                    for childCall in op["child_calls"]:
+                        childCallKeys.add(f"{childCall}_TT_HOST_FUNC [ns]")
         for row in rowKeys:
             rowDict = {}
             if type(row) is str and "sp" in row:
@@ -360,6 +382,11 @@ def row_compare(row):
                                 rowDict["OP TO OP LATENCY [ns]"] = 0
                             devicePreOpTime[deviceID] = analysisData[0]["end_cycle"]
 
+                if "child_calls" in opData.keys():
+                    for childCall, duration in opData["child_calls"].items():
+                        headerField = f"{childCall}_TT_HOST_FUNC [ns]"
+                        rowDict[headerField] = f"{duration:.0f}"
+
                 assert "input_tensors" in opData.keys(), "Ops must have input tensors"
                 if "optional_input_tensors" in opData.keys():
                     add_io_data(opData["input_tensors"] + opData["optional_input_tensors"], "INPUT")
@@ -384,6 +411,7 @@ def row_compare(row):
             + tensorCSVData["INPUT"]["headers"]
             + tensorCSVData["OUTPUT"]["headers"]
             + OPS_CSV_HEADER[ioHeaderIndex + 2 :]
+            + sorted(list(childCallKeys))
         )
         writer = csv.DictWriter(allOpsCSV, fieldnames=allHeaders)
         writer.writeheader()