Skip to content

Commit

Permalink
#9956: Trace profiling smoke test
Browse files Browse the repository at this point in the history
  • Loading branch information
mo-tenstorrent committed Nov 5, 2024
1 parent a0c2611 commit 52fc4cc
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 20 deletions.
56 changes: 40 additions & 16 deletions tests/scripts/run_profiler_regressions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,69 @@ source scripts/tools_setup_common.sh

set -eo pipefail

run_additional_T3000_test(){
remove_default_log_locations
mkdir -p $PROFILER_ARTIFACTS_DIR

./tt_metal/tools/profiler/profile_this.py -c "'pytest tests/ttnn/unit_tests/operations/ccl/test_all_gather.py::test_all_gather_on_t3000_post_commit_for_profiler_regression'" | tee $PROFILER_ARTIFACTS_DIR/test_out.log
run_async_mode_T3000_test(){
#Some tests here do not skip grayskull
if [ "$ARCH_NAME" == "wormhole_b0" ]; then
remove_default_log_locations
mkdir -p $PROFILER_ARTIFACTS_DIR

if cat $PROFILER_ARTIFACTS_DIR/test_out.log | grep "SKIPPED"
then
echo "No verification as test was skipped"
else
echo "Verifying test results"
runDate=$(ls $PROFILER_OUTPUT_DIR/)
LINE_COUNT=9 #1 header + 8 devices
res=$(verify_perf_line_count "$PROFILER_OUTPUT_DIR/$runDate/ops_perf_results_$runDate.csv" "$LINE_COUNT")
echo $res
./tt_metal/tools/profiler/profile_this.py -c "pytest -svv models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_causallm.py::test_falcon_causal_lm[wormhole_b0-True-True-20-2-BFLOAT16-L1-falcon_7b-layers_2-decode_batch32]" | tee $PROFILER_ARTIFACTS_DIR/test_out.log

if cat $PROFILER_ARTIFACTS_DIR/test_out.log | grep "SKIPPED"
then
echo "No verification as test was skipped"
else
echo "Verifying test results"
runDate=$(ls $PROFILER_OUTPUT_DIR/)
LINE_COUNT=1000 # Smoke test to see at least 1000 ops are reported
res=$(verify_perf_line_count_floor "$PROFILER_OUTPUT_DIR/$runDate/ops_perf_results_$runDate.csv" "$LINE_COUNT")
echo $res
fi
fi
}

run_async_mode_T3000_test(){
run_tracing_async_mode_T3000_test(){
#Some tests here do not skip grayskull
if [ "$ARCH_NAME" == "wormhole_b0" ]; then
remove_default_log_locations
mkdir -p $PROFILER_ARTIFACTS_DIR

./tt_metal/tools/profiler/profile_this.py -c "pytest -svv models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_causallm.py::test_falcon_causal_lm[wormhole_b0-True-True-20-2-BFLOAT16-L1-falcon_7b-layers_2-decode_batch32]" | tee $PROFILER_ARTIFACTS_DIR/test_out.log
env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml ./tt_metal/tools/profiler/profile_this.py -c "pytest models/demos/t3000/resnet50/tests/test_resnet50_performant.py::test_run_resnet50_trace_2cqs_inference[wormhole_b0-True-True-16-act_dtype0-weight_dtype0-math_fidelity0-device_params0]" | tee $PROFILER_ARTIFACTS_DIR/test_out.log

if cat $PROFILER_ARTIFACTS_DIR/test_out.log | grep "SKIPPED"
then
echo "No verification as test was skipped"
else
echo "Verifying test results"
runDate=$(ls $PROFILER_OUTPUT_DIR/)
LINE_COUNT=1000 # Smoke test to see at least 1000 ops are reported
LINE_COUNT=4100 # Smoke test to see at least 4100 ops are reported
res=$(verify_perf_line_count_floor "$PROFILER_OUTPUT_DIR/$runDate/ops_perf_results_$runDate.csv" "$LINE_COUNT")
echo $res
fi
fi
}

run_additional_T3000_test(){
remove_default_log_locations
mkdir -p $PROFILER_ARTIFACTS_DIR

./tt_metal/tools/profiler/profile_this.py -c "'pytest tests/ttnn/unit_tests/operations/ccl/test_all_gather.py::test_all_gather_on_t3000_post_commit_for_profiler_regression'" | tee $PROFILER_ARTIFACTS_DIR/test_out.log

if cat $PROFILER_ARTIFACTS_DIR/test_out.log | grep "SKIPPED"
then
echo "No verification as test was skipped"
else
echo "Verifying test results"
runDate=$(ls $PROFILER_OUTPUT_DIR/)
LINE_COUNT=9 #1 header + 8 devices
res=$(verify_perf_line_count "$PROFILER_OUTPUT_DIR/$runDate/ops_perf_results_$runDate.csv" "$LINE_COUNT")
echo $res

run_tracing_async_mode_T3000_test
fi
}

run_profiling_test(){
if [[ -z "$ARCH_NAME" ]]; then
echo "Must provide ARCH_NAME in environment" 1>&2
Expand Down
6 changes: 3 additions & 3 deletions tt_metal/tools/profiler/process_ops_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def import_tracy_op_logs(logFolder):
if len(tmpStrs) > 1: # uncached device op, host op, or fallback op
jsonStr = tmpStrs[-1]
opData = json.loads(jsonStr)
opData["trace_id"] = None
if "op_hash" in opData.keys():
assert "device_id" in opData.keys()
deviceID = int(opData["device_id"])
Expand All @@ -119,9 +120,8 @@ def import_tracy_op_logs(logFolder):
else:
cached_ops[deviceID] = {opHash: opData.copy()}
del cached_ops[deviceID][opHash]["global_call_count"]
opData["trace_id"] = None
if deviceID in traceIDs:
opData["trace_id"] = traceIDs[deviceID]
if deviceID in traceIDs:
opData["trace_id"] = traceIDs[deviceID]
else: # cached device op
opDataList = opDataStr.split(":", 1)[-1].split(",")
assert len(opDataList) > 3, "Wrong cached op info format"
Expand Down
3 changes: 2 additions & 1 deletion tt_metal/tools/profiler/tt_metal_tracy.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0
#pragma once

#if defined(TRACY_ENABLE)

Expand Down

0 comments on commit 52fc4cc

Please sign in to comment.