From fbd0602c01c2bda3ea8730ed6f72d3f4088cea01 Mon Sep 17 00:00:00 2001 From: Ivan Hamer <153605438+ihamer-tt@users.noreply.github.com> Date: Fri, 31 May 2024 09:46:42 +0200 Subject: [PATCH] Ihamer/7468 inject noc delays (#8889) * #7468: add support for delays * #7468: Bug fixes. Improved hashing. * #7468: Add atomic transaction category * #7468: documentation for debug delay * #7468: Spelling, wording * #7468: merge with main * #7468: fix typo * #7468: fix rst formatting * #7468: Break core selection for each of transaction types * #7468: test delay feature * #7468: Fix to use hash function Instead of get_feature_enabled, use get_feature_hash_string for hash computation. Comments and documentation wording as well. * #7468: Update tracy to match main * #7468: Save/restore target selection in test fixture * Update docs/source/tt-metalium/tools/watcher.rst Co-authored-by: David Ma <149437891+tt-dma@users.noreply.github.com> * #7468: Fix invocation example * #7468: Use riscv_wait instead of busy loop. * #7468: Print warning on wrong core coordinates * #7468: Fix end of lines * #7468: Update the docs for the change from loop iterations to clk cycles --------- Co-authored-by: David Ma <149437891+tt-dma@users.noreply.github.com> --- docs/source/tt-metalium/tools/watcher.rst | 21 + .../tt_metal/unit_tests_common/CMakeLists.txt | 1 + .../common/dprint_fixture.hpp | 28 +- .../common/watcher_fixture.hpp | 38 ++ .../dprint/test_invalid_print_core.cpp | 6 +- .../watcher/test_noc_sanitize_delays.cpp | 167 +++++ tt_metal/hw/inc/dataflow_api.h | 2 + tt_metal/hw/inc/debug/sanitize_noc.h | 196 +++--- tt_metal/hw/inc/dev_msgs.h | 67 +- tt_metal/impl/debug/dprint_server.cpp | 44 +- tt_metal/impl/debug/watcher_server.cpp | 633 ++++++++++-------- tt_metal/impl/program/program.cpp | 10 +- tt_metal/jit_build/build.cpp | 6 +- tt_metal/llrt/rtoptions.cpp | 116 ++-- tt_metal/llrt/rtoptions.hpp | 197 ++++-- 15 files changed, 990 insertions(+), 542 deletions(-) create mode 100644 tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize_delays.cpp diff --git a/docs/source/tt-metalium/tools/watcher.rst b/docs/source/tt-metalium/tools/watcher.rst index 82b8ad9a630..4392079f362 100644 --- a/docs/source/tt-metalium/tools/watcher.rst +++ b/docs/source/tt-metalium/tools/watcher.rst @@ -201,3 +201,24 @@ watcher log: 0x00000020,0x0000001f,0x0000001e,0x0000001d,0x0000001c,0x0000001b,0x0000001a,0x00000019, 0x00000018,0x00000017,0x00000016,0x00000015,0x00000014,0x00000013,0x00000012,0x00000011, 0x00000010,0x0000000f,0x0000000e,0x0000000d,0x0000000c,0x0000000b,0x0000000a] + + +Debug Delays +------------ +Watcher can insert NOC transaction delays for debugging purposes. These delays can be specified by +transaction type and location. Environment variable `TT_METAL_WATCHER_DELAY` specifies the number +of clock cycles to wait for. Similarly to DPRINT, the delay can be set for all cores, or a +or a subset by setting environment variable `TT_METAL_*_DEBUG_DELAY_CORES`: x,y OR (x1,y1),(x2,y2),(x3,y3) OR (x1,y1)-(x2,y2) OR all. +The * can be one of: READ, WRITE or ATOMIC indicating whether the delays will be inserted before read, write or atomic NOC +transactions. Finally, the delay can be set for a specific RISCs (BRISC, NCRISC, TRISC0, TRISC1, TRISC2) through the +environment variable `TT_METAL_*_DEBUG_DELAY_RISCVS`: (one of: BR,NC,TR0,TR1,TR2); if not set, the delay +is applied to all RISCs. +Note that `TT_METAL_WATCHER` must be set and `TT_METAL_WATCHER_DISABLE_NOC_SANITIZE` must not be +set for the delays to be applied. + +For example, the following command will run test_eltwise_binary with a delay of 10 iterations added to both READ and WRITE +transactions on BRISC core at location 0,0: + +.. code-block:: + + TT_METAL_WATCHER=1 TT_METAL_WATCHER_DEBUG_DELAY=10 TT_METAL_READ_DEBUG_DELAY_CORES=0,0 TT_METAL_WRITE_DEBUG_DELAY_CORES=0,0 TT_METAL_READ_DEBUG_DELAY_RISCVS=BR TT_METAL_WRITE_DEBUG_DELAY_RISCVS=BR ./build/test/tt_metal/test_eltwise_binary diff --git a/tests/tt_metal/tt_metal/unit_tests_common/CMakeLists.txt b/tests/tt_metal/tt_metal/unit_tests_common/CMakeLists.txt index 019b23ef9e0..03c6e0a6f28 100644 --- a/tests/tt_metal/tt_metal/unit_tests_common/CMakeLists.txt +++ b/tests/tt_metal/tt_metal/unit_tests_common/CMakeLists.txt @@ -21,6 +21,7 @@ set(UNIT_TESTS_COMMON_SRC ${CMAKE_CURRENT_SOURCE_DIR}/dram/test_dram.cpp ${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_assert.cpp ${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_noc_sanitize.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_noc_sanitize_delays.cpp ${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_pause.cpp ${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_ringbuf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/watcher/test_waypoint.cpp diff --git a/tests/tt_metal/tt_metal/unit_tests_common/common/dprint_fixture.hpp b/tests/tt_metal/tt_metal/unit_tests_common/common/dprint_fixture.hpp index 8a92e65802a..9c9f36122a0 100644 --- a/tests/tt_metal/tt_metal/unit_tests_common/common/dprint_fixture.hpp +++ b/tests/tt_metal/tt_metal/unit_tests_common/common/dprint_fixture.hpp @@ -27,12 +27,12 @@ class DPrintFixture: public CommonFixture { // The core range (physical) needs to be set >= the set of all cores // used by all tests using this fixture, so set dprint enabled for // all cores and all devices - tt::llrt::OptionsG.set_dprint_enabled(true); - tt::llrt::OptionsG.set_dprint_all_cores(CoreType::WORKER, true); - tt::llrt::OptionsG.set_dprint_all_cores(CoreType::ETH, true); - tt::llrt::OptionsG.set_dprint_all_chips(true); + tt::llrt::OptionsG.set_feature_enabled(tt::llrt::RunTimeDebugFeatureDprint, true); + tt::llrt::OptionsG.set_feature_all_cores(tt::llrt::RunTimeDebugFeatureDprint, CoreType::WORKER, true); + tt::llrt::OptionsG.set_feature_all_cores(tt::llrt::RunTimeDebugFeatureDprint, CoreType::ETH, true); + tt::llrt::OptionsG.set_feature_all_chips(tt::llrt::RunTimeDebugFeatureDprint, true); // Send output to a file so the test can check after program is run. - tt::llrt::OptionsG.set_dprint_file_name(dprint_file_name); + tt::llrt::OptionsG.set_feature_file_name(tt::llrt::RunTimeDebugFeatureDprint, dprint_file_name); tt::llrt::OptionsG.set_test_mode_enabled(true); watcher_previous_enabled = tt::llrt::OptionsG.get_watcher_enabled(); tt::llrt::OptionsG.set_watcher_enabled(false); @@ -49,7 +49,7 @@ class DPrintFixture: public CommonFixture { disabled[core_desc.dispatch_core_type].insert(core); } } - tt::llrt::OptionsG.set_dprint_disabled_cores(disabled); + tt::llrt::OptionsG.set_feature_disabled_cores(tt::llrt::RunTimeDebugFeatureDprint, disabled); ExtraSetUp(); @@ -65,12 +65,12 @@ class DPrintFixture: public CommonFixture { std::remove(dprint_file_name.c_str()); // Reset DPrint settings - tt::llrt::OptionsG.set_dprint_cores({}); - tt::llrt::OptionsG.set_dprint_enabled(false); - tt::llrt::OptionsG.set_dprint_all_cores(CoreType::WORKER, false); - tt::llrt::OptionsG.set_dprint_all_cores(CoreType::ETH, false); - tt::llrt::OptionsG.set_dprint_all_chips(false); - tt::llrt::OptionsG.set_dprint_file_name(""); + tt::llrt::OptionsG.set_feature_cores(tt::llrt::RunTimeDebugFeatureDprint, {}); + tt::llrt::OptionsG.set_feature_enabled(tt::llrt::RunTimeDebugFeatureDprint, false); + tt::llrt::OptionsG.set_feature_all_cores(tt::llrt::RunTimeDebugFeatureDprint, CoreType::WORKER, false); + tt::llrt::OptionsG.set_feature_all_cores(tt::llrt::RunTimeDebugFeatureDprint, CoreType::ETH, false); + tt::llrt::OptionsG.set_feature_all_chips(tt::llrt::RunTimeDebugFeatureDprint, false); + tt::llrt::OptionsG.set_feature_file_name(tt::llrt::RunTimeDebugFeatureDprint, ""); tt::llrt::OptionsG.set_test_mode_enabled(false); tt::llrt::OptionsG.set_watcher_enabled(watcher_previous_enabled); } @@ -97,7 +97,7 @@ class DPrintFixtureDisableDevices: public DPrintFixture { protected: void ExtraSetUp() override { // For this test, mute each devices using the environment variable - tt::llrt::OptionsG.set_dprint_all_chips(false); - tt::llrt::OptionsG.set_dprint_chip_ids({}); + tt::llrt::OptionsG.set_feature_all_chips(tt::llrt::RunTimeDebugFeatureDprint, false); + tt::llrt::OptionsG.set_feature_chip_ids(tt::llrt::RunTimeDebugFeatureDprint, {}); } }; diff --git a/tests/tt_metal/tt_metal/unit_tests_common/common/watcher_fixture.hpp b/tests/tt_metal/tt_metal/unit_tests_common/common/watcher_fixture.hpp index 11d6b7ea942..ecbfbdf3552 100644 --- a/tests/tt_metal/tt_metal/unit_tests_common/common/watcher_fixture.hpp +++ b/tests/tt_metal/tt_metal/unit_tests_common/common/watcher_fixture.hpp @@ -6,6 +6,7 @@ #include #include "common_fixture.hpp" #include "impl/debug/watcher_server.hpp" +#include "llrt/rtoptions.hpp" // A version of CommonFixture with watcher enabled class WatcherFixture: public CommonFixture { @@ -84,3 +85,40 @@ class WatcherFixture: public CommonFixture { tt::watcher_clear_log(); } }; + +// A version of WatcherFixture with read and write debug delays enabled +class WatcherDelayFixture : public WatcherFixture { +public: + tt::llrt::TargetSelection saved_target_selection[tt::llrt::RunTimeDebugFeatureCount]; + + std::map> delayed_cores; + + void SetUp() override { + tt::llrt::OptionsG.set_watcher_debug_delay(5000000); + delayed_cores[CoreType::WORKER] = {{0, 0}, {1, 1}}; + + // Store the previous state of the watcher features + saved_target_selection[tt::llrt::RunTimeDebugFeatureReadDebugDelay] = tt::llrt::OptionsG.get_feature_targets(tt::llrt::RunTimeDebugFeatureReadDebugDelay); + saved_target_selection[tt::llrt::RunTimeDebugFeatureWriteDebugDelay] = tt::llrt::OptionsG.get_feature_targets(tt::llrt::RunTimeDebugFeatureWriteDebugDelay); + saved_target_selection[tt::llrt::RunTimeDebugFeatureAtomicDebugDelay] = tt::llrt::OptionsG.get_feature_targets(tt::llrt::RunTimeDebugFeatureAtomicDebugDelay); + + // Enable read and write debug delay for the test core + tt::llrt::OptionsG.set_feature_enabled(tt::llrt::RunTimeDebugFeatureReadDebugDelay, true); + tt::llrt::OptionsG.set_feature_cores(tt::llrt::RunTimeDebugFeatureReadDebugDelay, delayed_cores); + tt::llrt::OptionsG.set_feature_enabled(tt::llrt::RunTimeDebugFeatureWriteDebugDelay, true); + tt::llrt::OptionsG.set_feature_cores(tt::llrt::RunTimeDebugFeatureWriteDebugDelay, delayed_cores); + + // Call parent + WatcherFixture::SetUp(); + } + + void TearDown() override { + // Call parent + WatcherFixture::TearDown(); + + // Restore + tt::llrt::OptionsG.set_feature_targets(tt::llrt::RunTimeDebugFeatureReadDebugDelay, saved_target_selection[tt::llrt::RunTimeDebugFeatureReadDebugDelay]); + tt::llrt::OptionsG.set_feature_targets(tt::llrt::RunTimeDebugFeatureWriteDebugDelay, saved_target_selection[tt::llrt::RunTimeDebugFeatureWriteDebugDelay]); + tt::llrt::OptionsG.set_feature_targets(tt::llrt::RunTimeDebugFeatureAtomicDebugDelay, saved_target_selection[tt::llrt::RunTimeDebugFeatureAtomicDebugDelay]); + } +}; diff --git a/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_invalid_print_core.cpp b/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_invalid_print_core.cpp index eb6a9c7e52b..671b18a0ec8 100644 --- a/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_invalid_print_core.cpp +++ b/tests/tt_metal/tt_metal/unit_tests_common/dprint/test_invalid_print_core.cpp @@ -17,8 +17,8 @@ TEST(DPrintErrorChecking, TestPrintInvalidCore) { // device setup, but not the print server should simply ignore the invalid cores. std::map> dprint_cores; dprint_cores[CoreType::WORKER] = {{0, 0}, {1, 1}, {100, 100}}; - tt::llrt::OptionsG.set_dprint_cores(dprint_cores); // Only (100, 100) is invalid. - tt::llrt::OptionsG.set_dprint_enabled(true); + tt::llrt::OptionsG.set_feature_cores(tt::llrt::RunTimeDebugFeatureDprint, dprint_cores); + tt::llrt::OptionsG.set_feature_enabled(tt::llrt::RunTimeDebugFeatureDprint, true); const int device_id = 0; Device* device = nullptr; @@ -27,6 +27,6 @@ TEST(DPrintErrorChecking, TestPrintInvalidCore) { // We expect that even though illegal worker cores were requested, device setup did not hang. // So just make sure that device setup worked and then close the device. EXPECT_TRUE(device != nullptr); - tt::llrt::OptionsG.set_dprint_enabled(false); + tt::llrt::OptionsG.set_feature_enabled(tt::llrt::RunTimeDebugFeatureDprint, false); tt::tt_metal::CloseDevice(device); } diff --git a/tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize_delays.cpp b/tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize_delays.cpp new file mode 100644 index 00000000000..e981d1b61c4 --- /dev/null +++ b/tests/tt_metal/tt_metal/unit_tests_common/watcher/test_noc_sanitize_delays.cpp @@ -0,0 +1,167 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "llrt/rtoptions.hpp" +#include "watcher_fixture.hpp" +#include "test_utils.hpp" +#include "llrt/llrt.hpp" +#include "tt_metal/detail/tt_metal.hpp" +#include "tt_metal/host_api.hpp" +#include "common/bfloat16.hpp" +#include "hostdevcommon/common_runtime_address_map.h" + +////////////////////////////////////////////////////////////////////////////////////////// +// A test for checking watcher NOC sanitization. +////////////////////////////////////////////////////////////////////////////////////////// +using namespace tt; +using namespace tt::tt_metal; + +// Incrementally populate a vector with bfloat16 values starting from a given float value +// and incrementing by 1.0f for each element. +void inc_populate(std::vector& vec, float start_from) { + float val = start_from; + for (std::uint32_t i = 0; i < vec.size(); i++) { + bfloat16 num_1_bfloat16 = bfloat16(val); + val = val + 1.0f; + bfloat16 num_2_bfloat16 = bfloat16(val); + val = val + 1.0f; + vec.at(i) = pack_two_bfloat16_into_uint32(std::pair(num_1_bfloat16, num_2_bfloat16)); + } +} + +void RunDelayTestOnCore(WatcherDelayFixture* fixture, Device* device, CoreCoord &core) { + tt_metal::Program program = tt_metal::CreateProgram(); + + const uint32_t SINGLE_TILE_SIZE = 2 * 1024; + const uint32_t NUM_TILES = 4; + const uint32_t DRAM_BUFFER_SIZE = SINGLE_TILE_SIZE * NUM_TILES; // NUM_TILES of FP16_B, hard-coded in the reader/writer kernels + const uint32_t PAGE_SIZE = DRAM_BUFFER_SIZE; + + tt_metal::InterleavedBufferConfig dram_config{ + .device=device, + .size = DRAM_BUFFER_SIZE, + .page_size = PAGE_SIZE, + .buffer_type = tt_metal::BufferType::DRAM + }; + + auto src0_dram_buffer = CreateBuffer(dram_config); + uint32_t dram_buffer_src0_addr = src0_dram_buffer->address(); + auto src1_dram_buffer = CreateBuffer(dram_config); + uint32_t dram_buffer_src1_addr = src1_dram_buffer->address(); + auto dst_dram_buffer = CreateBuffer(dram_config); + uint32_t dram_buffer_dst_addr = dst_dram_buffer->address(); + + auto dram_src0_noc_xy = src0_dram_buffer->noc_coordinates(); + auto dram_src1_noc_xy = src1_dram_buffer->noc_coordinates(); + auto dram_dst_noc_xy = dst_dram_buffer->noc_coordinates(); + + uint32_t src0_cb_index = 0; + uint32_t num_input_tiles = 2; + tt_metal::CircularBufferConfig cb_src0_config = tt_metal::CircularBufferConfig(num_input_tiles * SINGLE_TILE_SIZE, {{src0_cb_index, tt::DataFormat::Float16_b}}) + .set_page_size(src0_cb_index, SINGLE_TILE_SIZE); + auto cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config); + + uint32_t src1_cb_index = 1; + tt_metal::CircularBufferConfig cb_src1_config = tt_metal::CircularBufferConfig(num_input_tiles * SINGLE_TILE_SIZE, {{src1_cb_index, tt::DataFormat::Float16_b}}) + .set_page_size(src1_cb_index, SINGLE_TILE_SIZE); + auto cb_src1 = tt_metal::CreateCircularBuffer(program, core, cb_src1_config); + + uint32_t ouput_cb_index = 16; // output operands start at index 16 + uint32_t num_output_tiles = 2; + tt_metal::CircularBufferConfig cb_output_config = tt_metal::CircularBufferConfig(num_output_tiles * SINGLE_TILE_SIZE, {{ouput_cb_index, tt::DataFormat::Float16_b}}) + .set_page_size(ouput_cb_index, SINGLE_TILE_SIZE); + auto cb_output = tt_metal::CreateCircularBuffer(program, core, cb_output_config); + + auto binary_reader_kernel = tt_metal::CreateKernel( + program, + "tt_metal/kernels/dataflow/reader_binary_diff_lengths.cpp", + core, + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_1, .noc = tt_metal::NOC::RISCV_1_default}); + + auto unary_writer_kernel = tt_metal::CreateKernel( + program, + "tt_metal/kernels/dataflow/writer_unary.cpp", + core, + tt_metal::DataMovementConfig{.processor = tt_metal::DataMovementProcessor::RISCV_0, .noc = tt_metal::NOC::RISCV_0_default}); + + vector compute_kernel_args = { }; + + std::map binary_defines = { + { "ELTWISE_OP", "add_tiles" }, + { "ELTWISE_OP_CODE", "0" } + }; + auto eltwise_binary_kernel = tt_metal::CreateKernel( + program, + "tt_metal/kernels/compute/eltwise_binary.cpp", + core, + tt_metal::ComputeConfig{.compile_args = compute_kernel_args, .defines = binary_defines}); + + SetRuntimeArgs( + program, + eltwise_binary_kernel, + core, + {NUM_TILES, 1} + ); + + float constant = 0.0f; + float start_from = 0.0f; + std::vector src0_vec = create_constant_vector_of_bfloat16(DRAM_BUFFER_SIZE, constant); + std::vector src1_vec = create_constant_vector_of_bfloat16(DRAM_BUFFER_SIZE, 0.0f); + inc_populate(src1_vec, start_from); + std::vector expected_vec = create_constant_vector_of_bfloat16(DRAM_BUFFER_SIZE, 0.0f); + inc_populate(expected_vec, start_from + constant); + + CommandQueue& cq = device->command_queue(); + + EnqueueWriteBuffer(cq, std::ref(src0_dram_buffer), src0_vec, false); + EnqueueWriteBuffer(cq, std::ref(src1_dram_buffer), src1_vec, false); + + vector reader_args = { + dram_buffer_src0_addr, + (std::uint32_t)dram_src0_noc_xy.x, + (std::uint32_t)dram_src0_noc_xy.y, + NUM_TILES, + dram_buffer_src1_addr, + (std::uint32_t)dram_src1_noc_xy.x, + (std::uint32_t)dram_src1_noc_xy.y, + NUM_TILES, + 0}; + + vector writer_args = { + dram_buffer_dst_addr, (std::uint32_t)dram_dst_noc_xy.x, (std::uint32_t)dram_dst_noc_xy.y, NUM_TILES + }; + + SetRuntimeArgs(program, unary_writer_kernel, core, writer_args); + SetRuntimeArgs(program, binary_reader_kernel, core, reader_args); + + EnqueueProgram(cq, program, false); + std::vector result_vec; + EnqueueReadBuffer(cq, dst_dram_buffer, result_vec, true); + + // Print the feedback generated by debug_delay functionality + std::vector read_vec; + + CoreCoord worker_core = fixture->delayed_cores[CoreType::WORKER][0]; // Just check that the first delayed core has the feedback set + CoreCoord phys_core = device->physical_core_from_logical_core({0,0}, CoreType::WORKER); + read_vec = tt::llrt::read_hex_vec_from_core ( + device->id(), + phys_core, + GET_MAILBOX_ADDRESS_HOST(debug_insert_delays), sizeof(debug_insert_delays_msg_t)); + + log_info(tt::LogTest, "Read back debug_insert_delays: 0x{:x}", read_vec[0]); + EXPECT_TRUE((read_vec[0] >> 24) == 0x3); +} + +TEST_F(WatcherDelayFixture, TestWatcherSanitizeInsertDelays) { + if (this->slow_dispatch_) + GTEST_SKIP(); + + this->RunTestOnDevice( + [](WatcherFixture *fixture, Device *device){ + CoreCoord core{0, 0}; + RunDelayTestOnCore(dynamic_cast(fixture), device, core); + }, + this->devices_[0] + ); +} diff --git a/tt_metal/hw/inc/dataflow_api.h b/tt_metal/hw/inc/dataflow_api.h index e985e0f521c..91b1a26f8f3 100644 --- a/tt_metal/hw/inc/dataflow_api.h +++ b/tt_metal/hw/inc/dataflow_api.h @@ -24,6 +24,7 @@ #include "risc_attribs.h" #include "third_party/umd/device/tt_silicon_driver_common.hpp" #include "debug/assert.h" +#include "dev_msgs.h" extern uint8_t noc_index; @@ -1602,6 +1603,7 @@ void noc_semaphore_inc(uint64_t addr, uint32_t incr) { */ DEBUG_STATUS("NSIW"); DEBUG_SANITIZE_NOC_ADDR(addr, 4); + DEBUG_INSERT_DELAY(TransactionAtomic); noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, addr, NOC_UNICAST_WRITE_VC, incr, 31 /*wrap*/, false /*linked*/, false /*posted*/); DEBUG_STATUS("NSID"); } diff --git a/tt_metal/hw/inc/debug/sanitize_noc.h b/tt_metal/hw/inc/debug/sanitize_noc.h index 4f3d0db1877..b8600e514e7 100644 --- a/tt_metal/hw/inc/debug/sanitize_noc.h +++ b/tt_metal/hw/inc/debug/sanitize_noc.h @@ -28,19 +28,20 @@ #define LOG_WRITE_LEN_FROM_STATE() #endif -#if (defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) || defined(COMPILE_FOR_ERISC) || defined(COMPILE_FOR_IDLE_ERISC)) && \ - (defined(WATCHER_ENABLED)) && \ - (!defined(WATCHER_DISABLE_NOC_SANITIZE)) +#if ( \ + defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) || defined(COMPILE_FOR_ERISC) || \ + defined(COMPILE_FOR_IDLE_ERISC)) && \ + (defined(WATCHER_ENABLED)) && (!defined(WATCHER_DISABLE_NOC_SANITIZE)) -#include "watcher_common.h" #include "generated_bank_to_noc_coord_mapping.h" +#include "watcher_common.h" extern uint8_t noc_index; +#include "dev_msgs.h" #include "noc_addr_ranges_gen.h" -#include "noc_parameters.h" #include "noc_overlay_parameters.h" - +#include "noc_parameters.h" // A couple defines for specifying read/write and multi/unicast #define DEBUG_SANITIZE_NOC_READ true @@ -51,43 +52,24 @@ typedef bool debug_sanitize_noc_dir_t; typedef bool debug_sanitize_noc_cast_t; // TODO(PGK): remove soft reset when fw is downloaded at init -#define DEBUG_VALID_REG_ADDR(a, l) \ - ( \ - ( \ - ( \ - ((a) >= NOC_OVERLAY_START_ADDR) && \ - ((a) < NOC_OVERLAY_START_ADDR + NOC_STREAM_REG_SPACE_SIZE * NOC_NUM_STREAMS) \ - ) || \ - ((a) == RISCV_DEBUG_REG_SOFT_RESET_0) \ - ) \ - && (l) == 4 \ - ) -#define DEBUG_VALID_WORKER_ADDR(a, l) \ - ( \ - (a >= MEM_L1_BASE) && \ - (a + l <= MEM_L1_BASE + MEM_L1_SIZE) && \ - ((a) + (l) > (a)) \ - ) -#define DEBUG_VALID_PCIE_ADDR(a, l) (((a) >= NOC_PCIE_ADDR_BASE) && \ - ((a) + (l) <= NOC_PCIE_ADDR_END) && \ - ((a) + (l) > (a))) -#define DEBUG_VALID_DRAM_ADDR(a, l) (((a) >= NOC_DRAM_ADDR_BASE) && \ - ((a) + (l) <= NOC_DRAM_ADDR_END) && \ - ((a) + (l) > (a))) - -#define DEBUG_VALID_ETH_ADDR(a, l) (((a) >= MEM_ETH_BASE) && \ - ((a) + (l) <= MEM_ETH_BASE + MEM_ETH_SIZE)) +#define DEBUG_VALID_REG_ADDR(a, l) \ + (((((a) >= NOC_OVERLAY_START_ADDR) && \ + ((a) < NOC_OVERLAY_START_ADDR + NOC_STREAM_REG_SPACE_SIZE * NOC_NUM_STREAMS)) || \ + ((a) == RISCV_DEBUG_REG_SOFT_RESET_0)) && \ + (l) == 4) +#define DEBUG_VALID_WORKER_ADDR(a, l) ((a >= MEM_L1_BASE) && (a + l <= MEM_L1_BASE + MEM_L1_SIZE) && ((a) + (l) > (a))) +#define DEBUG_VALID_PCIE_ADDR(a, l) \ + (((a) >= NOC_PCIE_ADDR_BASE) && ((a) + (l) <= NOC_PCIE_ADDR_END) && ((a) + (l) > (a))) +#define DEBUG_VALID_DRAM_ADDR(a, l) \ + (((a) >= NOC_DRAM_ADDR_BASE) && ((a) + (l) <= NOC_DRAM_ADDR_END) && ((a) + (l) > (a))) + +#define DEBUG_VALID_ETH_ADDR(a, l) (((a) >= MEM_ETH_BASE) && ((a) + (l) <= MEM_ETH_BASE + MEM_ETH_SIZE)) // Note: // - this isn't racy w/ the host so long as invalid is written last // - this isn't racy between riscvs so long as each gets their own noc_index inline void debug_sanitize_post_noc_addr_and_hang( - uint64_t noc_addr, - uint32_t l1_addr, - uint32_t len, - debug_sanitize_noc_cast_t multicast, - uint16_t invalid -) { + uint64_t noc_addr, uint32_t l1_addr, uint32_t len, debug_sanitize_noc_cast_t multicast, uint16_t invalid) { debug_sanitize_noc_addr_msg_t tt_l1_ptr *v = *GET_MAILBOX_ADDRESS_DEV(sanitize_noc); if (v[noc_index].invalid == DebugSanitizeNocInvalidOK) { @@ -106,7 +88,7 @@ inline void debug_sanitize_post_noc_addr_and_hang( erisc_early_exit(eth_l1_mem::address_map::ERISC_MEM_MAILBOX_STACK_SAVE); #endif - while(1) { + while (1) { #if defined(COMPILE_FOR_ERISC) internal_::risc_context_switch(); #endif @@ -139,20 +121,16 @@ uint32_t debug_sanitize_noc_addr( uint32_t x_end = NOC_MCAST_ADDR_END_X(noc_addr); uint32_t y_end = NOC_MCAST_ADDR_END_Y(noc_addr); - if (!NOC_WORKER_XY_P(x, y) || - !NOC_WORKER_XY_P(x_end, y_end) || - (x > x_end || y > y_end)) { + if (!NOC_WORKER_XY_P(x, y) || !NOC_WORKER_XY_P(x_end, y_end) || (x > x_end || y > y_end)) { debug_sanitize_post_noc_addr_and_hang( - noc_addr, l1_addr, noc_len, - multicast, DebugSanitizeNocInvalidMulticast - ); + noc_addr, l1_addr, noc_len, multicast, DebugSanitizeNocInvalidMulticast); } } // Check noc addr, we save the alignment requirement from the noc src/dst because the L1 address // needs to match alignment. - uint32_t alignment_mask = NOC_L1_ALIGNMENT_BYTES-1; // Default alignment, only override in ceratin cases. - uint32_t invalid = multicast? DebugSanitizeNocInvalidMulticast : DebugSanitizeNocInvalidUnicast; + uint32_t alignment_mask = NOC_L1_ALIGNMENT_BYTES - 1; // Default alignment, only override in ceratin cases. + uint32_t invalid = multicast ? DebugSanitizeNocInvalidMulticast : DebugSanitizeNocInvalidUnicast; if (NOC_PCIE_XY_P(x, y)) { // Additional alignment restriction only applies to reads if (dir == DEBUG_SANITIZE_NOC_READ) @@ -198,8 +176,7 @@ void debug_sanitize_noc_and_worker_addr( uint32_t worker_addr, uint32_t len, debug_sanitize_noc_cast_t multicast, - debug_sanitize_noc_dir_t dir -) { + debug_sanitize_noc_dir_t dir) { // Check noc addr, get any extra alignment req for worker. uint32_t alignment_mask = debug_sanitize_noc_addr(noc_addr, worker_addr, len, multicast, dir); @@ -209,51 +186,89 @@ void debug_sanitize_noc_and_worker_addr( // Check alignment, but not for reg addresses. if (!DEBUG_VALID_REG_ADDR(worker_addr, len)) { if ((worker_addr & alignment_mask) != (noc_addr & alignment_mask)) { - debug_sanitize_post_noc_addr_and_hang(noc_addr, worker_addr, len, multicast, DebugSanitizeNocInvalidAlignment); + debug_sanitize_post_noc_addr_and_hang( + noc_addr, worker_addr, len, multicast, DebugSanitizeNocInvalidAlignment); } } } // TODO: Clean these up with #7453 -#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_FROM_STATE(noc_id) \ - DEBUG_SANITIZE_NOC_READ_TRANSACTION( \ - ((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | ((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO) << 32), \ - NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO), \ - NOC_CMD_BUF_READ_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE)); -#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_FROM_STATE(noc_id) \ - DEBUG_SANITIZE_NOC_WRITE_TRANSACTION( \ - ((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID) << 32) | ((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_LO)), \ - NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_LO), \ - NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE)); - -#define DEBUG_SANITIZE_NOC_ADDR(a, l) \ - debug_sanitize_noc_addr(a, 0, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ); LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_TRANSACTION(noc_a, worker_a, l, multicast, dir) \ - debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, multicast, dir); LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_READ_TRANSACTION(noc_a, worker_a, l) \ - debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ); LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_MULTI_READ_TRANSACTION(noc_a, worker_a, l) \ - debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_READ); LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION(noc_a, worker_a, l) \ - debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_WRITE); LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_MULTI_WRITE_TRANSACTION(noc_a, worker_a, l) \ - debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_WRITE); LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \ - DEBUG_SANITIZE_NOC_READ_TRANSACTION( \ - ((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \ - worker_a, \ +#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_FROM_STATE(noc_id) \ + DEBUG_SANITIZE_NOC_READ_TRANSACTION( \ + ((uint64_t)NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | \ + ((uint64_t)NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO) << 32), \ + NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO), \ + NOC_CMD_BUF_READ_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE)); \ + debug_insert_delay((uint8_t)TransactionRead); + +#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_FROM_STATE(noc_id) \ + DEBUG_SANITIZE_NOC_WRITE_TRANSACTION( \ + ((uint64_t)NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID) << 32) | \ + ((uint64_t)NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_LO)), \ + NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_LO), \ + NOC_CMD_BUF_READ_REG(noc_id, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE)); \ + debug_insert_delay((uint8_t)TransactionWrite); +#define DEBUG_SANITIZE_NOC_ADDR(a, l) \ + debug_sanitize_noc_addr(a, 0, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ); \ + LOG_LEN(l) +#define DEBUG_SANITIZE_NOC_TRANSACTION(noc_a, worker_a, l, multicast, dir) \ + debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, multicast, dir); \ + LOG_LEN(l) +#define DEBUG_SANITIZE_NOC_READ_TRANSACTION(noc_a, worker_a, l) \ + debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ); \ + LOG_LEN(l); \ + debug_insert_delay((uint8_t)TransactionRead); +#define DEBUG_SANITIZE_NOC_MULTI_READ_TRANSACTION(noc_a, worker_a, l) \ + debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_READ); \ + LOG_LEN(l); \ + debug_insert_delay((uint8_t)TransactionRead); +#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION(noc_a, worker_a, l) \ + debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_WRITE); \ + LOG_LEN(l); \ + debug_insert_delay((uint8_t)TransactionWrite) +#define DEBUG_SANITIZE_NOC_MULTI_WRITE_TRANSACTION(noc_a, worker_a, l) \ + debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_WRITE); \ + LOG_LEN(l); \ + debug_insert_delay((uint8_t)TransactionWrite); + +#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \ + DEBUG_SANITIZE_NOC_READ_TRANSACTION( \ + ((uint64_t)NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \ + worker_a, \ NOC_CMD_BUF_READ_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE)); -#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_STATE(noc_id, noc_a_lower, worker_a, l) \ - DEBUG_SANITIZE_NOC_READ_TRANSACTION( \ - ((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \ - worker_a, l); -#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \ - DEBUG_SANITIZE_NOC_WRITE_TRANSACTION( \ - ((uint64_t) NOC_CMD_BUF_READ_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \ - worker_a, \ +#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_STATE(noc_id, noc_a_lower, worker_a, l) \ + DEBUG_SANITIZE_NOC_READ_TRANSACTION( \ + ((uint64_t)NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \ + worker_a, \ + l); +#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \ + DEBUG_SANITIZE_NOC_WRITE_TRANSACTION( \ + ((uint64_t)NOC_CMD_BUF_READ_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \ + worker_a, \ NOC_CMD_BUF_READ_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE)); +#define DEBUG_INSERT_DELAY(transaction_type) debug_insert_delay(transaction_type) + +// Delay for debugging purposes +inline void debug_insert_delay(uint8_t transaction_type) { +#if defined(WATCHER_DEBUG_DELAY) + debug_insert_delays_msg_t tt_l1_ptr *v = GET_MAILBOX_ADDRESS_DEV(debug_insert_delays); -#else // !WATCHER_ENABLED + bool delay = false; + switch (transaction_type) { + case TransactionRead: delay = (v[0].read_delay_riscv_mask & (1 << debug_get_which_riscv())) != 0; break; + case TransactionWrite: delay = (v[0].write_delay_riscv_mask & (1 << debug_get_which_riscv())) != 0; break; + case TransactionAtomic: delay = (v[0].atomic_delay_riscv_mask & (1 << debug_get_which_riscv())) != 0; break; + default: break; + } + if (delay) { + // WATCHER_DEBUG_DELAY is a compile time constant passed with -D + riscv_wait (WATCHER_DEBUG_DELAY); + v[0].feedback |= (1 << transaction_type); // Mark that we have delayed on this transaction type + } +#endif // WATCHER_DEBUG_DELAY +} + +#else // !WATCHER_ENABLED #define DEBUG_SANITIZE_NOC_ADDR(a, l) LOG_LEN(l) #define DEBUG_SANITIZE_NOC_TRANSACTION(noc_a, worker_a, l, multicast, dir) LOG_LEN(l) @@ -261,9 +276,12 @@ void debug_sanitize_noc_and_worker_addr( #define DEBUG_SANITIZE_NOC_MULTI_READ_TRANSACTION(noc_a, worker_a, l) LOG_LEN(l) #define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION(noc_a, worker_a, l) LOG_LEN(l) #define DEBUG_SANITIZE_NOC_MULTI_WRITE_TRANSACTION(noc_a, worker_a, l) LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) LOG_READ_LEN_FROM_STATE() +#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \ + LOG_READ_LEN_FROM_STATE() #define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_STATE(noc_id, noc_a_lower, worker_a, l) LOG_LEN(l) -#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) LOG_WRITE_LEN_FROM_STATE() +#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \ + LOG_WRITE_LEN_FROM_STATE() #define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_FROM_STATE(noc_id) +#define DEBUG_INSERT_DELAY(transaction_type) -#endif // WATCHER_ENABLED && not TRISC +#endif // WATCHER_ENABLED diff --git a/tt_metal/hw/inc/dev_msgs.h b/tt_metal/hw/inc/dev_msgs.h index affcdce86e5..f7443483b81 100644 --- a/tt_metal/hw/inc/dev_msgs.h +++ b/tt_metal/hw/inc/dev_msgs.h @@ -12,8 +12,9 @@ #include "noc/noc_parameters.h" -#define GET_ETH_MAILBOX_ADDRESS_HOST(x) ((uint64_t)&(((mailboxes_t *)eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE)->x)) -#define GET_IERISC_MAILBOX_ADDRESS_HOST(x) ((uint64_t)&(((mailboxes_t *)MEM_IERISC_MAILBOX_BASE)->x)) +#define GET_ETH_MAILBOX_ADDRESS_HOST(x) \ + ((uint64_t) & (((mailboxes_t *)eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE)->x)) +#define GET_IERISC_MAILBOX_ADDRESS_HOST(x) ((uint64_t) & (((mailboxes_t *)MEM_IERISC_MAILBOX_BASE)->x)) #if defined(COMPILE_FOR_ERISC) #define GET_MAILBOX_ADDRESS_HOST(x) GET_ETH_MAILBOX_ADDRESS_HOST(x) #define GET_MAILBOX_ADDRESS_DEV(x) (&(((mailboxes_t tt_l1_ptr *)eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE)->x)) @@ -21,18 +22,18 @@ #define GET_MAILBOX_ADDRESS_HOST(x) GET_IERISC_MAILBOX_ADDRESS_HOST(x) #define GET_MAILBOX_ADDRESS_DEV(x) (&(((mailboxes_t tt_l1_ptr *)MEM_IERISC_MAILBOX_BASE)->x)) #else -#define GET_MAILBOX_ADDRESS_HOST(x) ((uint64_t)&(((mailboxes_t *)MEM_MAILBOX_BASE)->x)) +#define GET_MAILBOX_ADDRESS_HOST(x) ((uint64_t) & (((mailboxes_t *)MEM_MAILBOX_BASE)->x)) #define GET_MAILBOX_ADDRESS_DEV(x) (&(((mailboxes_t tt_l1_ptr *)MEM_MAILBOX_BASE)->x)) #endif // Messages for host to tell brisc to go constexpr uint32_t RUN_MSG_INIT = 0x40; -constexpr uint32_t RUN_MSG_GO = 0x80; +constexpr uint32_t RUN_MSG_GO = 0x80; constexpr uint32_t RUN_MSG_DONE = 0; // 0x80808000 is a micro-optimization, calculated with 1 riscv insn constexpr uint32_t RUN_SYNC_MSG_INIT = 0x40; -constexpr uint32_t RUN_SYNC_MSG_GO = 0x80; +constexpr uint32_t RUN_SYNC_MSG_GO = 0x80; constexpr uint32_t RUN_SYNC_MSG_DONE = 0; constexpr uint32_t RUN_SYNC_MSG_ALL_TRISCS_GO = 0x80808000; constexpr uint32_t RUN_SYNC_MSG_ALL_GO = 0x80808080; @@ -52,26 +53,26 @@ struct launch_msg_t { // must be cacheline aligned volatile uint16_t brisc_watcher_kernel_id; volatile uint16_t ncrisc_watcher_kernel_id; volatile uint16_t triscs_watcher_kernel_id; - volatile uint16_t ncrisc_kernel_size16; // size in 16 byte units + volatile uint16_t ncrisc_kernel_size16; // size in 16 byte units // TODO(agrebenisan): This must be added in to launch_msg_t // volatile uint16_t dispatch_core_x; // volatile uint16_t dispatch_core_y; - volatile uint8_t mode; - volatile uint8_t brisc_noc_id; - volatile uint8_t enable_brisc; - volatile uint8_t enable_ncrisc; - volatile uint8_t enable_triscs; - volatile uint8_t max_cb_index; - volatile uint8_t enable_erisc; - volatile uint8_t run; // must be in last cacheline of this msg + volatile uint8_t mode; + volatile uint8_t brisc_noc_id; + volatile uint8_t enable_brisc; + volatile uint8_t enable_ncrisc; + volatile uint8_t enable_triscs; + volatile uint8_t max_cb_index; + volatile uint8_t enable_erisc; + volatile uint8_t run; // must be in last cacheline of this msg }; struct slave_sync_msg_t { union { volatile uint32_t all; struct { - volatile uint8_t ncrisc; // ncrisc must come first, see ncrisc-halt.S + volatile uint8_t ncrisc; // ncrisc must come first, see ncrisc-halt.S volatile uint8_t trisc0; volatile uint8_t trisc1; volatile uint8_t trisc2; @@ -85,6 +86,7 @@ struct debug_status_msg_t { }; // TODO: Clean up this struct with #6738 +// This structure is populated by the device and read by the host struct debug_sanitize_noc_addr_msg_t { volatile uint64_t noc_addr; volatile uint32_t l1_addr; @@ -95,13 +97,21 @@ struct debug_sanitize_noc_addr_msg_t { volatile uint16_t pad; }; +// Host -> device. Populated with the information on where we want to insert delays. +struct debug_insert_delays_msg_t { + volatile uint8_t read_delay_riscv_mask = 0; // Which Riscs will delay their reads + volatile uint8_t write_delay_riscv_mask = 0; // Which Riscs will delay their writes + volatile uint8_t atomic_delay_riscv_mask = 0; // Which Riscs will delay their atomics + volatile uint8_t feedback = 0; // Stores the feedback about delays (used for testing) +}; + enum debug_sanitize_noc_invalid_enum { // 0 and 1 are a common stray values to write, so don't use those - DebugSanitizeNocInvalidOK = 2, - DebugSanitizeNocInvalidL1 = 3, - DebugSanitizeNocInvalidUnicast = 4, - DebugSanitizeNocInvalidMulticast = 5, - DebugSanitizeNocInvalidAlignment = 6, + DebugSanitizeNocInvalidOK = 2, + DebugSanitizeNocInvalidL1 = 3, + DebugSanitizeNocInvalidUnicast = 4, + DebugSanitizeNocInvalidMulticast = 5, + DebugSanitizeNocInvalidAlignment = 6, }; struct debug_assert_msg_t { @@ -111,13 +121,13 @@ struct debug_assert_msg_t { }; enum debug_assert_tripped_enum { - DebugAssertOK = 2, + DebugAssertOK = 2, DebugAssertTripped = 3, }; // XXXX TODO(PGK): why why why do we not have this standardized typedef enum debug_sanitize_which_riscv { - DebugBrisc = 0, + DebugBrisc = 0, DebugNCrisc = 1, DebugTrisc0 = 2, DebugTrisc1 = 3, @@ -127,6 +137,13 @@ typedef enum debug_sanitize_which_riscv { DebugNumUniqueRiscs } riscv_id_t; +typedef enum debug_transaction_type { + TransactionRead = 0, + TransactionWrite = 1, + TransactionAtomic = 2, + TransactionNumTypes +} debug_transaction_type_t; + struct debug_pause_msg_t { volatile uint8_t flags[DebugNumUniqueRiscs]; volatile uint8_t pad[8 - DebugNumUniqueRiscs]; @@ -148,6 +165,7 @@ struct mailboxes_t { struct debug_sanitize_noc_addr_msg_t sanitize_noc[NUM_NOCS]; struct debug_assert_msg_t assert_status; struct debug_pause_msg_t pause_status; + struct debug_insert_delays_msg_t debug_insert_delays; }; #ifndef TENSIX_FIRMWARE @@ -155,7 +173,8 @@ struct mailboxes_t { static_assert((MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % 32 == 0); static_assert((eth_l1_mem::address_map::ERISC_MEM_MAILBOX_BASE + offsetof(mailboxes_t, launch)) % 32 == 0); static_assert(MEM_MAILBOX_BASE + offsetof(mailboxes_t, slave_sync.ncrisc) == MEM_SLAVE_RUN_MAILBOX_ADDRESS); -static_assert(MEM_MAILBOX_BASE + offsetof(mailboxes_t, ncrisc_halt.stack_save) == MEM_NCRISC_HALT_STACK_MAILBOX_ADDRESS); +static_assert( + MEM_MAILBOX_BASE + offsetof(mailboxes_t, ncrisc_halt.stack_save) == MEM_NCRISC_HALT_STACK_MAILBOX_ADDRESS); static_assert(MEM_MAILBOX_BASE + sizeof(mailboxes_t) < MEM_MAILBOX_END); #endif @@ -166,7 +185,7 @@ struct eth_word_t { uint32_t reserved_1; }; -enum class SyncCBConfigRegion: uint8_t { +enum class SyncCBConfigRegion : uint8_t { DB_TENSIX = 0, TENSIX = 1, ROUTER_ISSUE = 2, diff --git a/tt_metal/impl/debug/dprint_server.cpp b/tt_metal/impl/debug/dprint_server.cpp index 23ba4b46b4d..958a90faec2 100644 --- a/tt_metal/impl/debug/dprint_server.cpp +++ b/tt_metal/impl/debug/dprint_server.cpp @@ -346,8 +346,8 @@ DebugPrintServerContext::DebugPrintServerContext() { inst = this; // Read hart mask + log file from rtoptions - uint32_t hart_mask = tt::llrt::OptionsG.get_dprint_riscv_mask(); - string file_name = tt::llrt::OptionsG.get_dprint_file_name(); + uint32_t hart_mask = tt::llrt::OptionsG.get_feature_riscv_mask(tt::llrt::RunTimeDebugFeatureDprint); + string file_name = tt::llrt::OptionsG.get_feature_file_name(tt::llrt::RunTimeDebugFeatureDprint); // Set the output stream according to RTOptions, either a file name or stdout if none specified. if (file_name != "") { @@ -434,26 +434,14 @@ void DebugPrintServerContext::AttachDevice(Device* device) { // If RTOptions doesn't enable DPRINT on this device, return here and don't actually attach it // to the server. - vector chip_ids = tt::llrt::OptionsG.get_dprint_chip_ids(); - if (!tt::llrt::OptionsG.get_dprint_all_chips()) + vector chip_ids = tt::llrt::OptionsG.get_feature_chip_ids(tt::llrt::RunTimeDebugFeatureDprint); + if (!tt::llrt::OptionsG.get_feature_all_chips(tt::llrt::RunTimeDebugFeatureDprint)) if (std::find(chip_ids.begin(), chip_ids.end(), device->id()) == chip_ids.end()) return; - // Helper lambda to convert CoreType to string for printing purposes. - auto core_type_to_str = [](CoreType core_type){ - switch(core_type) { - case CoreType::WORKER: - return "worker"; - case CoreType::ETH: - return "ethernet"; - default: - TT_THROW("DPRINT server unrecognized CoreType"); - } - }; - // Handle specifically disabled cores std::unordered_set disabled_phys_cores; - for (auto &type_and_cores : tt::llrt::OptionsG.get_dprint_disabled_cores()) { + for (auto &type_and_cores : tt::llrt::OptionsG.get_feature_disabled_cores(tt::llrt::RunTimeDebugFeatureDprint)) { for (auto &core : type_and_cores.second) { CoreCoord physical_core = device->physical_core_from_logical_core(core, type_and_cores.first); disabled_phys_cores.insert(physical_core); @@ -463,7 +451,7 @@ void DebugPrintServerContext::AttachDevice(Device* device) { // Core range depends on whether dprint_all_cores flag is set. vector print_cores_sanitized; for (CoreType core_type : {CoreType::WORKER, CoreType::ETH}) { - if (tt::llrt::OptionsG.get_dprint_all_cores(core_type)) { + if (tt::llrt::OptionsG.get_feature_all_cores(tt::llrt::RunTimeDebugFeatureDprint, core_type)) { // Print from all cores of the given type, cores returned here are guaranteed to be valid. for (CoreCoord phys_core: all_physical_printable_cores[core_type]) { // Don't print on specifically disabled cores. @@ -475,11 +463,11 @@ void DebugPrintServerContext::AttachDevice(Device* device) { tt::LogMetal, "DPRINT enabled on device {}, all {} cores.", device->id(), - core_type_to_str(core_type) + tt::llrt::get_core_type_name(core_type) ); } else { // Only print from the cores specified by the user - vector print_cores = tt::llrt::OptionsG.get_dprint_cores()[core_type]; + vector print_cores = tt::llrt::OptionsG.get_feature_cores(tt::llrt::RunTimeDebugFeatureDprint)[core_type]; // We should also validate that the cores the user specified are valid worker cores. for (auto logical_core : print_cores) { @@ -501,7 +489,7 @@ void DebugPrintServerContext::AttachDevice(Device* device) { tt::LogMetal, "DPRINT enabled on device {}, {} core {} (physical {}).", device->id(), - core_type_to_str(core_type), + tt::llrt::get_core_type_name(core_type), logical_core.str(), phys_core.str() ); @@ -509,7 +497,7 @@ void DebugPrintServerContext::AttachDevice(Device* device) { log_warning( tt::LogMetal, "TT_METAL_DPRINT_CORES included {} core with logical coordinates {} (physical coordinates {}), which is not a valid core on device {}. This coordinate will be ignored by the dprint server.", - core_type_to_str(core_type), + tt::llrt::get_core_type_name(core_type), logical_core.str(), valid_logical_core? phys_core.str() : "INVALID", device->id() @@ -520,7 +508,7 @@ void DebugPrintServerContext::AttachDevice(Device* device) { } // Write print enable magic for the cores the user specified. - uint32_t hart_mask = tt::llrt::OptionsG.get_dprint_riscv_mask(); + uint32_t hart_mask = tt::llrt::OptionsG.get_feature_riscv_mask(tt::llrt::RunTimeDebugFeatureDprint); for (auto &core : print_cores_sanitized) { int hart_count = GetNumRiscs(device_id, core); for (int hart_index = 0; hart_index < hart_count; hart_index++) { @@ -540,14 +528,14 @@ void DebugPrintServerContext::AttachDevice(Device* device) { void DebugPrintServerContext::DetachDevice(Device* device) { // Don't detach the device if it's disabled by env vars - in this case it wasn't attached. - vector chip_ids = tt::llrt::OptionsG.get_dprint_chip_ids(); - if (!tt::llrt::OptionsG.get_dprint_all_chips()) + vector chip_ids = tt::llrt::OptionsG.get_feature_chip_ids(tt::llrt::RunTimeDebugFeatureDprint); + if (!tt::llrt::OptionsG.get_feature_all_chips(tt::llrt::RunTimeDebugFeatureDprint)) if (std::find(chip_ids.begin(), chip_ids.end(), device->id()) == chip_ids.end()) return; // When we detach a device, we should poll to make sure there's no outstanding prints. chip_id_t chip_id = device->id(); - uint32_t risc_mask = tt::llrt::OptionsG.get_dprint_riscv_mask(); + uint32_t risc_mask = tt::llrt::OptionsG.get_feature_riscv_mask(tt::llrt::RunTimeDebugFeatureDprint); bool outstanding_prints = true; while (outstanding_prints && !server_killed_due_to_hang_) { // Polling interval of 1ms @@ -604,7 +592,7 @@ void DebugPrintServerContext::ClearLogFile() { outfile_->close(); delete outfile_; - string file_name = tt::llrt::OptionsG.get_dprint_file_name(); + string file_name = tt::llrt::OptionsG.get_feature_file_name(tt::llrt::RunTimeDebugFeatureDprint); outfile_ = new std::ofstream(file_name); stream_ = outfile_ ? outfile_ : &cout; } @@ -953,7 +941,7 @@ namespace tt { void DprintServerAttach(Device* device) { // Skip if DPRINT not enabled, and make sure profiler is not running. - if (!tt::llrt::OptionsG.get_dprint_enabled()) + if (!tt::llrt::OptionsG.get_feature_enabled(tt::llrt::RunTimeDebugFeatureDprint)) return; TT_FATAL( DebugPrintServerContext::ProfilerIsRunning == false, diff --git a/tt_metal/impl/debug/watcher_server.cpp b/tt_metal/impl/debug/watcher_server.cpp index 156928e5852..82fd5f377da 100644 --- a/tt_metal/impl/debug/watcher_server.cpp +++ b/tt_metal/impl/debug/watcher_server.cpp @@ -2,26 +2,26 @@ // // SPDX-License-Identifier: Apache-2.0 -#include +#include "watcher_server.hpp" + #include + #include #include -#include +#include #include #include -#include +#include +#include -#include "llrt/llrt.hpp" -#include "watcher_server.hpp" -#include "llrt/rtoptions.hpp" #include "dev_mem_map.h" #include "dev_msgs.h" - -#include "noc/noc_parameters.h" -#include "noc/noc_overlay_parameters.h" - #include "hostdevcommon/common_runtime_address_map.h" #include "hostdevcommon/debug_ring_buffer_common.h" +#include "llrt/llrt.hpp" +#include "llrt/rtoptions.hpp" +#include "noc/noc_overlay_parameters.h" +#include "noc/noc_parameters.h" namespace tt { namespace watcher { @@ -29,7 +29,7 @@ namespace watcher { constexpr uint64_t DEBUG_SANITIZE_NOC_SENTINEL_OK_64 = 0xbadabadabadabada; constexpr uint32_t DEBUG_SANITIZE_NOC_SENTINEL_OK_32 = 0xbadabada; constexpr uint16_t DEBUG_SANITIZE_NOC_SENTINEL_OK_16 = 0xbada; -constexpr uint16_t DEBUG_SANITIZE_NOC_SENTINEL_OK_8 = 0xda; +constexpr uint8_t DEBUG_SANITIZE_NOC_SENTINEL_OK_8 = 0xda; static std::atomic enabled = false; static std::atomic server_running = false; @@ -58,10 +58,9 @@ static double get_elapsed_secs() { } void create_log_file() { - FILE *f; - const char *fmode = tt::llrt::OptionsG.get_watcher_append()? "a" : "w"; + const char *fmode = tt::llrt::OptionsG.get_watcher_append() ? "a" : "w"; std::filesystem::path output_dir(tt::llrt::OptionsG.get_root_dir() + watcher::logfile_path); std::filesystem::create_directories(output_dir); string fname = output_dir.string() + watcher::logfile_name; @@ -85,7 +84,10 @@ void create_log_file() { fprintf(f, "\tnoc:{a, l}=an L1 address used by NOC by (eg, local src address)\n"); fprintf(f, "\tnoc:{(x,y), a, l}=NOC unicast address used by \n"); fprintf(f, "\tnoc:{(x1,y1)-(x2,y2), a, l}=NOC multicast address used by \n"); - fprintf(f, "\trmsg:=brisc host run message, D/H device/host dispatch; brisc NOC ID; I/G/D init/go/done; | separator; B/b enable/disable brisc; N/n enable/disable ncrisc; T/t enable/disable TRISC\n"); + fprintf( + f, + "\trmsg:=brisc host run message, D/H device/host dispatch; brisc NOC ID; I/G/D init/go/done; | separator; " + "B/b enable/disable brisc; N/n enable/disable ncrisc; T/t enable/disable TRISC\n"); fprintf(f, "\tsmsg:=slave run message, I/G/D for NCRISC, TRISC0, TRISC1, TRISC2\n"); fprintf(f, "\tk_ids:|| (ID map to file at end of section)\n"); fprintf(f, "\n"); @@ -96,7 +98,7 @@ void create_log_file() { void create_kernel_file() { FILE *f; - const char *fmode = tt::llrt::OptionsG.get_watcher_append()? "a" : "w"; + const char *fmode = tt::llrt::OptionsG.get_watcher_append() ? "a" : "w"; std::filesystem::path output_dir(tt::llrt::OptionsG.get_root_dir() + watcher::logfile_path); std::filesystem::create_directories(output_dir); string fname = output_dir.string() + watcher::kernel_file_name; @@ -119,7 +121,6 @@ static void log_running_kernels(const launch_msg_t *launch_msg) { } static void dump_l1_status(FILE *f, Device *device, CoreCoord core, const launch_msg_t *launch_msg) { - // Read L1 address 0, looking for memory corruption std::vector data; data = tt::llrt::read_hex_vec_from_core(device->id(), core, MEM_L1_BASE, sizeof(uint32_t)); @@ -130,42 +131,29 @@ static void dump_l1_status(FILE *f, Device *device, CoreCoord core, const launch } } -static const char * get_riscv_name(CoreCoord core, uint32_t type) -{ +static const char *get_riscv_name(CoreCoord core, uint32_t type) { switch (type) { - case DebugBrisc: - return "brisc"; - case DebugNCrisc: - return "ncrisc"; - case DebugErisc: - return "erisc"; - case DebugIErisc: - return "ierisc"; - case DebugTrisc0: - return "trisc0"; - case DebugTrisc1: - return "trisc1"; - case DebugTrisc2: - return "trisc2"; - default: - TT_THROW("Watcher data corrupted, unexpected riscv type on core {}: {}", core.str(), type); + case DebugBrisc: return "brisc"; + case DebugNCrisc: return "ncrisc"; + case DebugErisc: return "erisc"; + case DebugIErisc: return "ierisc"; + case DebugTrisc0: return "trisc0"; + case DebugTrisc1: return "trisc1"; + case DebugTrisc2: return "trisc2"; + default: TT_THROW("Watcher data corrupted, unexpected riscv type on core {}: {}", core.str(), type); } return nullptr; } -static string get_kernel_name(CoreCoord core, const launch_msg_t *launch_msg, uint32_t type) -{ +static string get_kernel_name(CoreCoord core, const launch_msg_t *launch_msg, uint32_t type) { switch (type) { case DebugBrisc: case DebugErisc: - case DebugIErisc: - return kernel_names[launch_msg->brisc_watcher_kernel_id]; - case DebugNCrisc: - return kernel_names[launch_msg->ncrisc_watcher_kernel_id]; + case DebugIErisc: return kernel_names[launch_msg->brisc_watcher_kernel_id]; + case DebugNCrisc: return kernel_names[launch_msg->ncrisc_watcher_kernel_id]; case DebugTrisc0: case DebugTrisc1: - case DebugTrisc2: - return kernel_names[launch_msg->triscs_watcher_kernel_id]; + case DebugTrisc2: return kernel_names[launch_msg->triscs_watcher_kernel_id]; default: log_running_kernels(launch_msg); TT_THROW("Watcher data corrupted, unexpected riscv type on core {}: {}", core.str(), type); @@ -174,26 +162,29 @@ static string get_kernel_name(CoreCoord core, const launch_msg_t *launch_msg, ui } static string get_debug_status(CoreCoord core, const launch_msg_t *launch_msg, const debug_status_msg_t *debug_status) { - string out; for (int cpu = 0; cpu < num_riscv_per_core; cpu++) { string risc_status; for (int byte = 0; byte < num_status_bytes_per_riscv; byte++) { char v = ((char *)&debug_status[cpu])[byte]; - if (v == 0) break; + if (v == 0) + break; if (isprint(v)) { risc_status += v; } else { log_running_kernels(launch_msg); - TT_THROW("Watcher data corrupted, unexpected debug status on core {}, unprintable character {}", - core.str(), (int)v); + TT_THROW( + "Watcher data corrupted, unexpected debug status on core {}, unprintable character {}", + core.str(), + (int)v); } } // Pad risc status to 4 chars for alignment string pad(4 - risc_status.length(), ' '); out += (pad + risc_status); - if (cpu != num_riscv_per_core - 1) out += ','; + if (cpu != num_riscv_per_core - 1) + out += ','; } out += " "; @@ -215,12 +206,7 @@ static string get_ring_buffer(Device *device, CoreCoord phys_core) { buf_addr = eth_l1_mem::address_map::ERISC_RING_BUFFER_ADDR; } } - auto from_dev = tt::llrt::read_hex_vec_from_core( - device->id(), - phys_core, - buf_addr, - RING_BUFFER_SIZE - ); + auto from_dev = tt::llrt::read_hex_vec_from_core(device->id(), phys_core, buf_addr, RING_BUFFER_SIZE); DebugRingBufMemLayout *ring_buf_data = reinterpret_cast(&(from_dev[0])); if (ring_buf_data->current_ptr == DEBUG_RING_BUFFER_STARTING_INDEX) return ""; @@ -235,9 +221,9 @@ static string get_ring_buffer(Device *device, CoreCoord phys_core) { } if (curr_idx == 0) { if (ring_buf_data->wrapped == 0) - break; // No wrapping, so no extra data available + break; // No wrapping, so no extra data available else - curr_idx = RING_BUFFER_ELEMENTS-1; // Loop + curr_idx = RING_BUFFER_ELEMENTS - 1; // Loop } else { curr_idx--; } @@ -258,42 +244,30 @@ static void log_ring_buffer(Device *device, CoreCoord core) { static std::pair get_core_and_mem_type(Device *device, CoreCoord &noc_coord, int noc) { // Get the physical coord from the noc coord - const metal_SocDescriptor& soc_d = tt::Cluster::instance().get_soc_desc(device->id()); - CoreCoord phys_core = { - NOC_0_X(noc, soc_d.grid_size.x, noc_coord.x), - NOC_0_Y(noc, soc_d.grid_size.y, noc_coord.y) - }; + const metal_SocDescriptor &soc_d = tt::Cluster::instance().get_soc_desc(device->id()); + CoreCoord phys_core = {NOC_0_X(noc, soc_d.grid_size.x, noc_coord.x), NOC_0_Y(noc, soc_d.grid_size.y, noc_coord.y)}; CoreType core_type; try { core_type = device->core_type_from_physical_core(phys_core); - } catch (std::runtime_error& e) { + } catch (std::runtime_error &e) { // We may not be able to get a core type if the physical coords are bad. return {"Unknown", ""}; } - switch(core_type) { - case CoreType::DRAM: - return {"DRAM", "DRAM"}; - case CoreType::ETH: - return {"Ethernet", "L1"}; - case CoreType::PCIE: - return {"PCIe", "PCIE"}; - case CoreType::WORKER: - return {"Tensix", "L1"}; - default: - return {"Unknown", ""}; + switch (core_type) { + case CoreType::DRAM: return {"DRAM", "DRAM"}; + case CoreType::ETH: return {"Ethernet", "L1"}; + case CoreType::PCIE: return {"PCIe", "PCIE"}; + case CoreType::WORKER: return {"Tensix", "L1"}; + default: return {"Unknown", ""}; } } -static string get_noc_target_str( - Device *device, - CoreCoord &core, - int noc, - const debug_sanitize_noc_addr_msg_t* san -) { +static string get_noc_target_str(Device *device, CoreCoord &core, int noc, const debug_sanitize_noc_addr_msg_t *san) { string out = fmt::format("{} using noc{} tried to access ", get_riscv_name(core, san->which), noc); if (san->multicast) { - CoreCoord target_phys_noc_core_start = {NOC_MCAST_ADDR_START_X(san->noc_addr), NOC_MCAST_ADDR_START_Y(san->noc_addr)}; + CoreCoord target_phys_noc_core_start = { + NOC_MCAST_ADDR_START_X(san->noc_addr), NOC_MCAST_ADDR_START_Y(san->noc_addr)}; CoreCoord target_phys_noc_core_end = {NOC_MCAST_ADDR_END_X(san->noc_addr), NOC_MCAST_ADDR_END_Y(san->noc_addr)}; auto type_and_mem = get_core_and_mem_type(device, target_phys_noc_core_start, noc); out += fmt::format( @@ -301,79 +275,73 @@ static string get_noc_target_str( type_and_mem.first, target_phys_noc_core_start.str(), target_phys_noc_core_end.str(), - type_and_mem.second - ); + type_and_mem.second); } else { CoreCoord target_phys_noc_core = {NOC_UNICAST_ADDR_X(san->noc_addr), NOC_UNICAST_ADDR_Y(san->noc_addr)}; auto type_and_mem = get_core_and_mem_type(device, target_phys_noc_core, noc); out += fmt::format( - "{} core w/ physical coords {} {}", - type_and_mem.first, - target_phys_noc_core.str(), - type_and_mem.second - ); + "{} core w/ physical coords {} {}", type_and_mem.first, target_phys_noc_core.str(), type_and_mem.second); } out += fmt::format("[addr=0x{:08x},len={}]", NOC_LOCAL_ADDR_OFFSET(san->noc_addr), san->len); return out; } -static void dump_noc_sanity_status(FILE *f, - Device *device, - CoreCoord core, - const string &core_str, - const launch_msg_t *launch_msg, - int noc, - const debug_sanitize_noc_addr_msg_t* san, - const debug_status_msg_t *debug_status) { - +static void dump_noc_sanity_status( + FILE *f, + Device *device, + CoreCoord core, + const string &core_str, + const launch_msg_t *launch_msg, + int noc, + const debug_sanitize_noc_addr_msg_t *san, + const debug_status_msg_t *debug_status) { string error_msg; string error_reason = "Watcher detected NOC error and stopped device: "; switch (san->invalid) { - case DebugSanitizeNocInvalidOK: - if (san->noc_addr != DEBUG_SANITIZE_NOC_SENTINEL_OK_64 || - san->l1_addr != DEBUG_SANITIZE_NOC_SENTINEL_OK_32 || - san->len != DEBUG_SANITIZE_NOC_SENTINEL_OK_32 || - san->multicast != DEBUG_SANITIZE_NOC_SENTINEL_OK_16 || - san->which != DEBUG_SANITIZE_NOC_SENTINEL_OK_16 - ) { + case DebugSanitizeNocInvalidOK: + if (san->noc_addr != DEBUG_SANITIZE_NOC_SENTINEL_OK_64 || + san->l1_addr != DEBUG_SANITIZE_NOC_SENTINEL_OK_32 || san->len != DEBUG_SANITIZE_NOC_SENTINEL_OK_32 || + san->multicast != DEBUG_SANITIZE_NOC_SENTINEL_OK_16 || + san->which != DEBUG_SANITIZE_NOC_SENTINEL_OK_16) { + error_msg = fmt::format( + "Watcher unexpected noc debug state on core {}, reported valid got noc{}{{0x{:08x}, {} }}", + core.str().c_str(), + san->which, + san->noc_addr, + san->len); + error_reason += "corrupted noc sanitization state - sanitization memory overwritten."; + } + break; + case DebugSanitizeNocInvalidL1: error_msg = fmt::format( - "Watcher unexpected noc debug state on core {}, reported valid got noc{}{{0x{:08x}, {} }}", - core.str().c_str(), san->which, san->noc_addr, san->len - ); - error_reason += "corrupted noc sanitization state - sanitization memory overwritten."; - } - break; - case DebugSanitizeNocInvalidL1: - error_msg = fmt::format( - "{} using noc{} accesses local L1[addr=0x{:08x},len={}]", - get_riscv_name(core, san->which), noc, san->l1_addr, san->len - ); - error_reason += "bad NOC L1/reg address."; - break; - case DebugSanitizeNocInvalidUnicast: - error_msg = get_noc_target_str(device, core, noc, san); - error_reason += "bad NOC unicast transaction."; - break; - case DebugSanitizeNocInvalidMulticast: - error_msg = get_noc_target_str(device, core, noc, san); - error_reason += "bad NOC multicast transaction."; - break; - case DebugSanitizeNocInvalidAlignment: - error_msg = get_noc_target_str(device, core, noc, san); - error_msg += fmt::format( - ", misaligned with local L1[addr=0x{:08x}]", - san->l1_addr - ); - error_reason += "bad alignment in NOC transaction."; - break; - default: - error_msg = fmt::format( - "Watcher unexpected data corruption, noc debug state on core {}, unknown failure code: {}", - core.str(), san->invalid - ); - error_reason += "corrupted noc sanitization state - unknown failure code."; + "{} using noc{} accesses local L1[addr=0x{:08x},len={}]", + get_riscv_name(core, san->which), + noc, + san->l1_addr, + san->len); + error_reason += "bad NOC L1/reg address."; + break; + case DebugSanitizeNocInvalidUnicast: + error_msg = get_noc_target_str(device, core, noc, san); + error_reason += "bad NOC unicast transaction."; + break; + case DebugSanitizeNocInvalidMulticast: + error_msg = get_noc_target_str(device, core, noc, san); + error_reason += "bad NOC multicast transaction."; + break; + case DebugSanitizeNocInvalidAlignment: + error_msg = get_noc_target_str(device, core, noc, san); + error_msg += fmt::format(", misaligned with local L1[addr=0x{:08x}]", san->l1_addr); + error_reason += "bad alignment in NOC transaction."; + break; + default: + error_msg = fmt::format( + "Watcher unexpected data corruption, noc debug state on core {}, unknown failure code: {}", + core.str(), + san->invalid); + error_reason += "corrupted noc sanitization state - unknown failure code."; } // If we logged an error, print to stdout and throw. @@ -396,20 +364,20 @@ static void dump_assert_status( const string &core_str, const launch_msg_t *launch_msg, const debug_assert_msg_t *assert_status, - const debug_status_msg_t *debug_status -) { + const debug_status_msg_t *debug_status) { switch (assert_status->tripped) { case DebugAssertTripped: { // TODO: Get rid of this once #6098 is implemented. - std::string line_num_warning = "Note that file name reporting is not yet implemented, and the reported line number for the assert may be from a different file."; + std::string line_num_warning = + "Note that file name reporting is not yet implemented, and the reported line number for the assert may " + "be from a different file."; string error_msg = fmt::format( "{}: {} tripped an assert on line {}. Current kernel: {}. {}", core_str, get_riscv_name(core, assert_status->which), assert_status->line_num, get_kernel_name(core, launch_msg, assert_status->which).c_str(), - line_num_warning.c_str() - ); + line_num_warning.c_str()); log_warning("Watcher stopped the device due to tripped assert, see watcher log for more details"); log_warning(error_msg.c_str()); log_waypoint(core, launch_msg, debug_status); @@ -425,8 +393,7 @@ static void dump_assert_status( TT_THROW( "Watcher unexpected assert state on core {}, reported OK but got risc {}, line {}.", assert_status->which, - assert_status->line_num - ); + assert_status->line_num); } break; default: @@ -434,27 +401,19 @@ static void dump_assert_status( TT_THROW( "Watcher data corruption, noc assert state on core {} unknown failure code: {}.\n", core.str(), - assert_status->tripped - ); + assert_status->tripped); } } static void dump_pause_status( - CoreCoord core, - const debug_pause_msg_t *pause_status, - std::set> &paused_cores -) { + CoreCoord core, const debug_pause_msg_t *pause_status, std::set> &paused_cores) { // Just record which cores are paused, printing handled at the end. for (int risc_id = 0; risc_id < DebugNumUniqueRiscs; risc_id++) { auto pause = pause_status->flags[risc_id]; if (pause == 1) { paused_cores.insert({core, static_cast(risc_id)}); } else if (pause > 1) { - TT_THROW( - "Watcher data corruption, pause state on core {} unknown code: {}.\n", - core.str(), - pause - ); + TT_THROW("Watcher data corruption, pause state on core {} unknown code: {}.\n", core.str(), pause); } } } @@ -466,23 +425,28 @@ static void dump_ring_buffer(FILE *f, Device *device, CoreCoord core) { static void dump_run_state(FILE *f, CoreCoord core, const launch_msg_t *launch_msg, uint32_t state) { char code = 'U'; - if (state == RUN_MSG_INIT) code = 'I'; - else if (state == RUN_MSG_GO) code = 'G'; - else if (state == RUN_MSG_DONE) code = 'D'; + if (state == RUN_MSG_INIT) + code = 'I'; + else if (state == RUN_MSG_GO) + code = 'G'; + else if (state == RUN_MSG_DONE) + code = 'D'; if (code == 'U') { log_running_kernels(launch_msg); - TT_THROW("Watcher data corruption, unexpected run state on core{}: {} (expected {} or {} or {})", - core.str(), state, RUN_MSG_INIT, RUN_MSG_GO, RUN_MSG_DONE); + TT_THROW( + "Watcher data corruption, unexpected run state on core{}: {} (expected {} or {} or {})", + core.str(), + state, + RUN_MSG_INIT, + RUN_MSG_GO, + RUN_MSG_DONE); } else { fprintf(f, "%c", code); } } -static void dump_run_mailboxes(FILE *f, - CoreCoord core, - const launch_msg_t *launch_msg, - const slave_sync_msg_t *slave_sync) { - +static void dump_run_mailboxes( + FILE *f, CoreCoord core, const launch_msg_t *launch_msg, const slave_sync_msg_t *slave_sync) { fprintf(f, "rmsg:"); if (launch_msg->mode == DISPATCH_MODE_DEV) { @@ -491,16 +455,22 @@ static void dump_run_mailboxes(FILE *f, fprintf(f, "H"); } else { log_running_kernels(launch_msg); - TT_THROW("Watcher data corruption, unexpected launch mode on core {}: {} (expected {} or {})", - core.str(), launch_msg->mode, DISPATCH_MODE_DEV, DISPATCH_MODE_HOST); + TT_THROW( + "Watcher data corruption, unexpected launch mode on core {}: {} (expected {} or {})", + core.str(), + launch_msg->mode, + DISPATCH_MODE_DEV, + DISPATCH_MODE_HOST); } if (launch_msg->brisc_noc_id == 0 || launch_msg->brisc_noc_id == 1) { fprintf(f, "%d", launch_msg->brisc_noc_id); } else { log_running_kernels(launch_msg); - TT_THROW("Watcher data corruption, unexpected brisc noc_id on core {}: {} (expected 0 or 1)", - core.str(), launch_msg->brisc_noc_id); + TT_THROW( + "Watcher data corruption, unexpected brisc noc_id on core {}: {} (expected 0 or 1)", + core.str(), + launch_msg->brisc_noc_id); } dump_run_state(f, core, launch_msg, launch_msg->run); @@ -513,9 +483,10 @@ static void dump_run_mailboxes(FILE *f, fprintf(f, "b"); } else { log_running_kernels(launch_msg); - TT_THROW("Watcher data corruption, unexpected brisc enable on core {}: {} (expected 0 or 1)", - core.str(), - launch_msg->enable_brisc); + TT_THROW( + "Watcher data corruption, unexpected brisc enable on core {}: {} (expected 0 or 1)", + core.str(), + launch_msg->enable_brisc); } if (launch_msg->enable_ncrisc == 1) { @@ -524,9 +495,10 @@ static void dump_run_mailboxes(FILE *f, fprintf(f, "n"); } else { log_running_kernels(launch_msg); - TT_THROW("Watcher data corruption, unexpected ncrisc enable on core {}: {} (expected 0 or 1)", - core.str(), - launch_msg->enable_ncrisc); + TT_THROW( + "Watcher data corruption, unexpected ncrisc enable on core {}: {} (expected 0 or 1)", + core.str(), + launch_msg->enable_ncrisc); } if (launch_msg->enable_triscs == 1) { @@ -535,9 +507,10 @@ static void dump_run_mailboxes(FILE *f, fprintf(f, "t"); } else { log_running_kernels(launch_msg); - TT_THROW("Watcher data corruption, unexpected trisc enable on core {}: {} (expected 0 or 1)", - core.str(), - launch_msg->enable_triscs); + TT_THROW( + "Watcher data corruption, unexpected trisc enable on core {}: {} (expected 0 or 1)", + core.str(), + launch_msg->enable_triscs); } fprintf(f, " "); @@ -551,13 +524,13 @@ static void dump_run_mailboxes(FILE *f, fprintf(f, " "); } -static void dump_debug_status(FILE *f, CoreCoord core, const launch_msg_t *launch_msg, const debug_status_msg_t *debug_status) { +static void dump_debug_status( + FILE *f, CoreCoord core, const launch_msg_t *launch_msg, const debug_status_msg_t *debug_status) { string out = get_debug_status(core, launch_msg, debug_status); fprintf(f, "%s ", out.c_str()); } static void dump_sync_regs(FILE *f, Device *device, CoreCoord core) { - // Read back all of the stream state, most of it is unused std::vector data; for (uint32_t operand = 0; operand < NUM_CIRCULAR_BUFFERS; operand++) { @@ -579,40 +552,46 @@ static void dump_sync_regs(FILE *f, Device *device, CoreCoord core) { } } -static void validate_kernel_ids(FILE *f, - std::map& used_kernel_names, - chip_id_t device_id, - CoreCoord core, - const launch_msg_t *launch) { - +static void validate_kernel_ids( + FILE *f, std::map &used_kernel_names, chip_id_t device_id, CoreCoord core, const launch_msg_t *launch) { if (launch->brisc_watcher_kernel_id >= kernel_names.size()) { - TT_THROW("Watcher data corruption, unexpected brisc kernel id on Device {} core {}: {} (last valid {})", - device_id, core.str(), launch->brisc_watcher_kernel_id, kernel_names.size()); - + TT_THROW( + "Watcher data corruption, unexpected brisc kernel id on Device {} core {}: {} (last valid {})", + device_id, + core.str(), + launch->brisc_watcher_kernel_id, + kernel_names.size()); } used_kernel_names[launch->brisc_watcher_kernel_id] = true; if (launch->ncrisc_watcher_kernel_id >= kernel_names.size()) { - TT_THROW("Watcher data corruption, unexpected ncrisc kernel id on Device {} core {}: {} (last valid {})", - device_id, core.str(), launch->ncrisc_watcher_kernel_id, kernel_names.size()); + TT_THROW( + "Watcher data corruption, unexpected ncrisc kernel id on Device {} core {}: {} (last valid {})", + device_id, + core.str(), + launch->ncrisc_watcher_kernel_id, + kernel_names.size()); } used_kernel_names[launch->ncrisc_watcher_kernel_id] = true; if (launch->triscs_watcher_kernel_id >= kernel_names.size()) { - TT_THROW("Watcher data corruption, unexpected trisc kernel id on Device {} core {}: {} (last valid {})", - device_id, core.str(), launch->triscs_watcher_kernel_id, kernel_names.size()); + TT_THROW( + "Watcher data corruption, unexpected trisc kernel id on Device {} core {}: {} (last valid {})", + device_id, + core.str(), + launch->triscs_watcher_kernel_id, + kernel_names.size()); } used_kernel_names[launch->triscs_watcher_kernel_id] = true; } static void dump_core( FILE *f, - std::map& used_kernel_names, + std::map &used_kernel_names, Device *device, CoreDescriptor logical_core, bool is_active_eth_core, - std::set> &paused_cores -) { + std::set> &paused_cores) { // Watcher only treats ethernet + worker cores. bool is_eth_core = (logical_core.type == CoreType::ETH); CoreCoord core = device->physical_core_from_logical_core(logical_core.coord, logical_core.type); @@ -621,8 +600,12 @@ static void dump_core( string core_type = is_eth_core ? "ethnet" : "worker"; string core_str = fmt::format( "Device {} {} core(x={:2},y={:2}) phys(x={:2},y={:2})", - device->id(), core_type, logical_core.coord.x, logical_core.coord.y, core.x, core.y - ); + device->id(), + core_type, + logical_core.coord.x, + logical_core.coord.y, + core.x, + core.y); fprintf(f, "%s: ", core_str.c_str()); // Ethernet cores have a different mailbox base addr @@ -651,14 +634,23 @@ static void dump_core( // Ethernet cores have firmware that starts at address 0, so no need to check it for a // magic value. if (!is_eth_core) - dump_l1_status(f, device, core, &mbox_data->launch); + dump_l1_status(f, device, core, &mbox_data->launch); if (!tt::llrt::OptionsG.watcher_noc_sanitize_disabled()) { for (uint32_t noc = 0; noc < NUM_NOCS; noc++) { - dump_noc_sanity_status(f, device, core, core_str, &mbox_data->launch, noc, &mbox_data->sanitize_noc[noc], mbox_data->debug_status); + dump_noc_sanity_status( + f, + device, + core, + core_str, + &mbox_data->launch, + noc, + &mbox_data->sanitize_noc[noc], + mbox_data->debug_status); } } if (!tt::llrt::OptionsG.watcher_assert_disabled()) - dump_assert_status(f, device, core, core_str, &mbox_data->launch, &mbox_data->assert_status, mbox_data->debug_status); + dump_assert_status( + f, device, core, core_str, &mbox_data->launch, &mbox_data->assert_status, mbox_data->debug_status); if (!tt::llrt::OptionsG.watcher_pause_disabled()) dump_pause_status(core, &mbox_data->pause_status, paused_cores); } @@ -678,7 +670,9 @@ static void dump_core( if (is_eth_core) { fprintf(f, "k_id:%d", mbox_data->launch.brisc_watcher_kernel_id); } else { - fprintf(f, "k_ids:%d|%d|%d", + fprintf( + f, + "k_ids:%d|%d|%d", mbox_data->launch.brisc_watcher_kernel_id, mbox_data->launch.ncrisc_watcher_kernel_id, mbox_data->launch.triscs_watcher_kernel_id); @@ -696,8 +690,8 @@ static void dump_core( } // noinline so that this fn exists to be called from dgb -static void __attribute__((noinline)) dump(FILE *f) { - for (Device* device : devices) { +static void __attribute__((noinline)) dump(FILE *f) { + for (Device *device : devices) { if (f != stdout && f != stderr) { log_info(LogLLRuntime, "Watcher checking device {}", device->id()); } @@ -736,16 +730,15 @@ static void __attribute__((noinline)) dump(FILE *f) { string paused_cores_str = "Paused cores: "; for (auto &core_and_risc : paused_cores) { paused_cores_str += fmt::format( - "{}:{}, ", - core_and_risc.first.str(), - get_riscv_name(core_and_risc.first, core_and_risc.second) - ); + "{}:{}, ", core_and_risc.first.str(), get_riscv_name(core_and_risc.first, core_and_risc.second)); } paused_cores_str += "\n"; fprintf(f, "%s", paused_cores_str.c_str()); log_info(LogLLRuntime, "{}Press ENTER to unpause core(s) and continue...", paused_cores_str); if (!tt::llrt::OptionsG.get_watcher_auto_unpause()) { - while (std::cin.get() != '\n') { ; } + while (std::cin.get() != '\n') { + ; + } } // Clear all pause flags @@ -764,12 +757,8 @@ static void __attribute__((noinline)) dump(FILE *f) { } // Clear only the one flag that we saved, in case another one was raised on device - auto pause_data = tt::llrt::read_hex_vec_from_core( - device->id(), - phys_core, - addr, - sizeof(debug_pause_msg_t) - ); + auto pause_data = + tt::llrt::read_hex_vec_from_core(device->id(), phys_core, addr, sizeof(debug_pause_msg_t)); auto pause_msg = reinterpret_cast(&(pause_data[0])); pause_msg->flags[risc_id] = 0; tt::llrt::write_hex_vec_to_core(device->id(), phys_core, pause_data, addr); @@ -799,7 +788,7 @@ static void watcher_loop(int sleep_usecs) { double last_elapsed_time = watcher::get_elapsed_secs(); while (true) { // Delay an amount such that we wait a minimum of the set sleep_usecs between polls. - while ((watcher::get_elapsed_secs() - last_elapsed_time) < ((double) sleep_usecs) / 1000000.) { + while ((watcher::get_elapsed_secs() - last_elapsed_time) < ((double)sleep_usecs) / 1000000.) { // Odds are this thread will be killed during the usleep, the kill signal is // watcher::enabled = false from the main thread. if (!watcher::enabled) @@ -825,7 +814,7 @@ static void watcher_loop(int sleep_usecs) { try { dump(logfile); - } catch (std::runtime_error& e) { + } catch (std::runtime_error &e) { // Depending on whether test mode is enabled, catch and stop server, or re-throw. if (tt::llrt::OptionsG.get_test_mode_enabled()) { watcher::watcher_killed_due_to_error = true; @@ -846,18 +835,18 @@ static void watcher_loop(int sleep_usecs) { watcher::server_running = false; } -} // namespace watcher +} // namespace watcher void watcher_init(Device *device) { - // Initialize debug status values to "unknown" - std::vector debug_status_init_val = { 'X', 'X', 'X', 'X', 'X' }; + std::vector debug_status_init_val = {'X', 'X', 'X', 'X', 'X'}; // Initialize debug sanity L1/NOC addresses to sentinel "all ok" std::vector debug_sanity_init_val; debug_sanity_init_val.resize(NUM_NOCS * sizeof(debug_sanitize_noc_addr_msg_t) / sizeof(uint32_t)); static_assert(sizeof(debug_sanitize_noc_addr_msg_t) % sizeof(uint32_t) == 0); - debug_sanitize_noc_addr_msg_t *data = reinterpret_cast(&(debug_sanity_init_val[0])); + debug_sanitize_noc_addr_msg_t *data = + reinterpret_cast(&(debug_sanity_init_val[0])); for (int i = 0; i < NUM_NOCS; i++) { data[i].noc_addr = watcher::DEBUG_SANITIZE_NOC_SENTINEL_OK_64; data[i].l1_addr = watcher::DEBUG_SANITIZE_NOC_SENTINEL_OK_32; @@ -899,17 +888,119 @@ void watcher_init(Device *device) { else watcher_enable_init_val.push_back(WatcherDisabled); - // Initialize worker cores debug values CoreCoord grid_size = device->logical_grid_size(); + + // Initialize Debug Delay feature + std::map debug_delays_val; + for (tt::llrt::RunTimeDebugFeatures delay_feature = tt::llrt::RunTimeDebugFeatureReadDebugDelay; + (int)delay_feature <= tt::llrt::RunTimeDebugFeatureAtomicDebugDelay; + delay_feature = (tt::llrt::RunTimeDebugFeatures)((int)delay_feature + 1)) { + vector chip_ids = tt::llrt::OptionsG.get_feature_chip_ids(delay_feature); + bool this_chip_enabled = tt::llrt::OptionsG.get_feature_all_chips(delay_feature) || + std::find(chip_ids.begin(), chip_ids.end(), device->id()) != chip_ids.end(); + if (this_chip_enabled) { + static_assert(sizeof(debug_sanitize_noc_addr_msg_t) % sizeof(uint32_t) == 0); + debug_insert_delays_msg_t delay_setup; + + // Create the mask based on the feature + uint32_t hart_mask = tt::llrt::OptionsG.get_feature_riscv_mask(delay_feature); + switch (delay_feature) { + case tt::llrt::RunTimeDebugFeatureReadDebugDelay: delay_setup.read_delay_riscv_mask = hart_mask; break; + case tt::llrt::RunTimeDebugFeatureWriteDebugDelay: + delay_setup.write_delay_riscv_mask = hart_mask; + break; + case tt::llrt::RunTimeDebugFeatureAtomicDebugDelay: + delay_setup.atomic_delay_riscv_mask = hart_mask; + break; + default: break; + } + + for (CoreType core_type : {CoreType::WORKER, CoreType::ETH}) { + vector delayed_cores = tt::llrt::OptionsG.get_feature_cores(delay_feature)[core_type]; + for (tt_xy_pair logical_core : delayed_cores) { + CoreCoord phys_core; + bool valid_logical_core = true; + try { + phys_core = device->physical_core_from_logical_core(logical_core, core_type); + } catch (std::runtime_error &error) { + valid_logical_core = false; + } + if (valid_logical_core) { + // Update the masks for the core + if (debug_delays_val.find(phys_core) != debug_delays_val.end()) { + debug_delays_val[phys_core].read_delay_riscv_mask |= delay_setup.read_delay_riscv_mask; + debug_delays_val[phys_core].write_delay_riscv_mask |= delay_setup.write_delay_riscv_mask; + debug_delays_val[phys_core].atomic_delay_riscv_mask |= delay_setup.atomic_delay_riscv_mask; + } else { + debug_delays_val.insert({phys_core, delay_setup}); + } + } else { + log_warning( + tt::LogMetal, + "TT_METAL_{}_CORES included {} core with logical coordinates {} (physical coordinates {}), which is not a valid core on device {}. This coordinate will be ignored by {} feature.", + tt::llrt::RunTimeDebugFeatureNames[delay_feature], + tt::llrt::get_core_type_name(core_type), + logical_core.str(), + valid_logical_core? phys_core.str() : "INVALID", + device->id(), + tt::llrt::RunTimeDebugFeatureNames[delay_feature] + ); + } + } + } + } + } + + // Iterate over debug_delays_val and print what got configured where + for (auto &delay : debug_delays_val) { + log_info( + tt::LogMetal, + "Configured Watcher debug delays for device {}, core {}: read_delay_cores_mask=0x{:x}, " + "write_delay_cores_mask=0x{:x}, atomic_delay_cores_mask=0x{:x}. Delay cycles: {}", + device->id(), + delay.first.str().c_str(), + delay.second.read_delay_riscv_mask, + delay.second.write_delay_riscv_mask, + delay.second.atomic_delay_riscv_mask, + tt::llrt::OptionsG.get_watcher_debug_delay() + ); + } + + std::vector + debug_insert_delays_init_val_zero; // Cores that are not involved get this value (0 for both masks) + debug_insert_delays_init_val_zero.resize(sizeof(debug_insert_delays_msg_t) / sizeof(uint32_t)); + std::vector debug_insert_delays_init_val; + debug_insert_delays_init_val.resize(sizeof(debug_insert_delays_msg_t) / sizeof(uint32_t)); + + // Initialize worker cores debug values for (uint32_t y = 0; y < grid_size.y; y++) { for (uint32_t x = 0; x < grid_size.x; x++) { CoreCoord logical_core(x, y); CoreCoord worker_core = device->worker_core_from_logical_core(logical_core); - tt::llrt::write_hex_vec_to_core(device->id(), worker_core, watcher_enable_init_val, GET_MAILBOX_ADDRESS_HOST(watcher_enable)); - tt::llrt::write_hex_vec_to_core(device->id(), worker_core, debug_status_init_val, GET_MAILBOX_ADDRESS_HOST(debug_status)); - tt::llrt::write_hex_vec_to_core(device->id(), worker_core, debug_sanity_init_val, GET_MAILBOX_ADDRESS_HOST(sanitize_noc)); - tt::llrt::write_hex_vec_to_core(device->id(), worker_core, debug_assert_init_val, GET_MAILBOX_ADDRESS_HOST(assert_status)); - tt::llrt::write_hex_vec_to_core(device->id(), worker_core, debug_pause_init_val, GET_MAILBOX_ADDRESS_HOST(pause_status)); + tt::llrt::write_hex_vec_to_core( + device->id(), worker_core, watcher_enable_init_val, GET_MAILBOX_ADDRESS_HOST(watcher_enable)); + tt::llrt::write_hex_vec_to_core( + device->id(), worker_core, debug_status_init_val, GET_MAILBOX_ADDRESS_HOST(debug_status)); + tt::llrt::write_hex_vec_to_core( + device->id(), worker_core, debug_sanity_init_val, GET_MAILBOX_ADDRESS_HOST(sanitize_noc)); + tt::llrt::write_hex_vec_to_core( + device->id(), worker_core, debug_assert_init_val, GET_MAILBOX_ADDRESS_HOST(assert_status)); + tt::llrt::write_hex_vec_to_core( + device->id(), worker_core, debug_pause_init_val, GET_MAILBOX_ADDRESS_HOST(pause_status)); + if (debug_delays_val.find(worker_core) != debug_delays_val.end()) { + debug_insert_delays_init_val[0] = *((uint32_t *)(&debug_delays_val[worker_core])); + tt::llrt::write_hex_vec_to_core( + device->id(), + worker_core, + debug_insert_delays_init_val, + GET_MAILBOX_ADDRESS_HOST(debug_insert_delays)); + } else { + tt::llrt::write_hex_vec_to_core( + device->id(), + worker_core, + debug_insert_delays_init_val_zero, + GET_MAILBOX_ADDRESS_HOST(debug_insert_delays)); + } tt::llrt::write_hex_vec_to_core(device->id(), worker_core, debug_ring_buf_init_val, RING_BUFFER_ADDR); } } @@ -930,61 +1021,62 @@ void watcher_init(Device *device) { device->id(), physical_core, watcher_enable_init_val, - is_active_eth_core ? - GET_ETH_MAILBOX_ADDRESS_HOST(watcher_enable) : - GET_IERISC_MAILBOX_ADDRESS_HOST(watcher_enable) - ); + is_active_eth_core ? GET_ETH_MAILBOX_ADDRESS_HOST(watcher_enable) + : GET_IERISC_MAILBOX_ADDRESS_HOST(watcher_enable)); tt::llrt::write_hex_vec_to_core( device->id(), physical_core, debug_status_init_val, - is_active_eth_core ? - GET_ETH_MAILBOX_ADDRESS_HOST(debug_status) : - GET_IERISC_MAILBOX_ADDRESS_HOST(debug_status) - ); + is_active_eth_core ? GET_ETH_MAILBOX_ADDRESS_HOST(debug_status) + : GET_IERISC_MAILBOX_ADDRESS_HOST(debug_status)); tt::llrt::write_hex_vec_to_core( device->id(), physical_core, debug_sanity_init_val, - is_active_eth_core ? - GET_ETH_MAILBOX_ADDRESS_HOST(sanitize_noc) : - GET_IERISC_MAILBOX_ADDRESS_HOST(sanitize_noc) - ); + is_active_eth_core ? GET_ETH_MAILBOX_ADDRESS_HOST(sanitize_noc) + : GET_IERISC_MAILBOX_ADDRESS_HOST(sanitize_noc)); tt::llrt::write_hex_vec_to_core( device->id(), physical_core, debug_assert_init_val, - is_active_eth_core ? - GET_ETH_MAILBOX_ADDRESS_HOST(assert_status) : - GET_IERISC_MAILBOX_ADDRESS_HOST(assert_status) - ); + is_active_eth_core ? GET_ETH_MAILBOX_ADDRESS_HOST(assert_status) + : GET_IERISC_MAILBOX_ADDRESS_HOST(assert_status)); tt::llrt::write_hex_vec_to_core( device->id(), physical_core, debug_pause_init_val, - is_active_eth_core ? - GET_ETH_MAILBOX_ADDRESS_HOST(pause_status) : - GET_IERISC_MAILBOX_ADDRESS_HOST(pause_status) - ); + is_active_eth_core ? GET_ETH_MAILBOX_ADDRESS_HOST(pause_status) + : GET_IERISC_MAILBOX_ADDRESS_HOST(pause_status)); + + if (debug_delays_val.find(physical_core) != debug_delays_val.end()) { + debug_insert_delays_init_val[0] = *((uint32_t *)(&debug_delays_val[physical_core])); + tt::llrt::write_hex_vec_to_core( + device->id(), + physical_core, + debug_insert_delays_init_val, + GET_MAILBOX_ADDRESS_HOST(debug_insert_delays)); + } else { + tt::llrt::write_hex_vec_to_core( + device->id(), + physical_core, + debug_insert_delays_init_val_zero, + GET_MAILBOX_ADDRESS_HOST(debug_insert_delays)); + } + tt::llrt::write_hex_vec_to_core( device->id(), physical_core, debug_ring_buf_init_val, - is_active_eth_core ? - eth_l1_mem::address_map::ERISC_RING_BUFFER_ADDR : - RING_BUFFER_ADDR - ); + is_active_eth_core ? eth_l1_mem::address_map::ERISC_RING_BUFFER_ADDR : RING_BUFFER_ADDR); } log_debug(LogLLRuntime, "Watcher initialized device {}", device->id()); } void watcher_attach(Device *device) { - const std::lock_guard lock(watcher::watch_mutex); if (!watcher::enabled && tt::llrt::OptionsG.get_watcher_enabled()) { - watcher::create_log_file(); if (!watcher::kernel_file) { watcher::create_kernel_file(); @@ -1013,7 +1105,6 @@ void watcher_attach(Device *device) { } void watcher_detach(Device *old) { - { const std::lock_guard lock(watcher::watch_mutex); @@ -1040,10 +1131,12 @@ void watcher_detach(Device *old) { // If we shut down the watcher server, wait until it finishes up. Do this without holding the // lock because the watcher server may be waiting on it before it does its exit check. if (watcher::devices.empty()) - while (watcher::server_running) { ; } + while (watcher::server_running) { + ; + } } -int watcher_register_kernel(const string& name) { +int watcher_register_kernel(const string &name) { const std::lock_guard lock(watcher::watch_mutex); if (!watcher::kernel_file) @@ -1056,29 +1149,19 @@ int watcher_register_kernel(const string& name) { return k_id; } -bool watcher_server_killed_due_to_error() { - return watcher::watcher_killed_due_to_error; -} +bool watcher_server_killed_due_to_error() { return watcher::watcher_killed_due_to_error; } -void watcher_server_set_error_flag(bool val) { - watcher::watcher_killed_due_to_error = val; -} +void watcher_server_set_error_flag(bool val) { watcher::watcher_killed_due_to_error = val; } -string watcher_server_get_exception_message() { - return watcher::watcher_exception_message; -} +string watcher_server_get_exception_message() { return watcher::watcher_exception_message; } -void watcher_clear_log() { - watcher::create_log_file(); -} +void watcher_clear_log() { watcher::create_log_file(); } string watcher_get_log_file_name() { return tt::llrt::OptionsG.get_root_dir() + watcher::logfile_path + watcher::logfile_name; } -int watcher_get_dump_count() { - return watcher::dump_count; -} +int watcher_get_dump_count() { return watcher::dump_count; } void watcher_dump() { if (!watcher::logfile) @@ -1098,10 +1181,10 @@ void watcher_read_kernel_ids_from_file() { size_t len; while (getline(&line, &len, f) != -1) { string s(line); - s = s.substr(0, s.length()-1); // Strip newline - int k_id = stoi(s.substr(0, s.find(":"))); // Format is {k_id}: {kernel} - watcher::kernel_names.push_back(s.substr(s.find(":")+2)); + s = s.substr(0, s.length() - 1); // Strip newline + int k_id = stoi(s.substr(0, s.find(":"))); // Format is {k_id}: {kernel} + watcher::kernel_names.push_back(s.substr(s.find(":") + 2)); } } -} // namespace tt +} // namespace tt diff --git a/tt_metal/impl/program/program.cpp b/tt_metal/impl/program/program.cpp index 4e970d4ffb5..1edcca12168 100644 --- a/tt_metal/impl/program/program.cpp +++ b/tt_metal/impl/program/program.cpp @@ -47,12 +47,16 @@ size_t KernelCompileHash(const std::shared_ptr kernel, JitBuildOptions & // Also account for watcher/dprint enabled in hash because they enable additional code to // be compiled into the kernel. string compile_hash_str = fmt::format( - "{}_{}_{}_{}_{}", + "{}_{}_{}_{}", build_key, std::to_string(std::hash{}(build_options.hlk_desc)), kernel->compute_hash(), - tt::llrt::OptionsG.get_watcher_enabled(), - tt::llrt::OptionsG.get_dprint_enabled()); + tt::llrt::OptionsG.get_watcher_enabled()); + + for (int i = 0; i < llrt::RunTimeDebugFeatureCount; i++) { + compile_hash_str += "_"; + compile_hash_str += tt::llrt::OptionsG.get_feature_hash_string((llrt::RunTimeDebugFeatures)i); + } size_t compile_hash = std::hash{}(compile_hash_str); #ifdef GENERATE_HASH_LOG diff --git a/tt_metal/jit_build/build.cpp b/tt_metal/jit_build/build.cpp index e0a18f4e38e..ea931df4b93 100644 --- a/tt_metal/jit_build/build.cpp +++ b/tt_metal/jit_build/build.cpp @@ -102,7 +102,7 @@ void JitBuildEnv::init(uint32_t build_key, tt::ARCH arch) { this->defines_ += "-DWATCHER_DISABLE_" + feature + " "; } - if (tt::llrt::OptionsG.get_dprint_enabled()) { + if (tt::llrt::OptionsG.get_feature_enabled(tt::llrt::RunTimeDebugFeatureDprint)) { this->defines_ += "-DDEBUG_PRINT_ENABLED "; } @@ -110,6 +110,10 @@ void JitBuildEnv::init(uint32_t build_key, tt::ARCH arch) { this->defines_ += "-DDEBUG_NULL_KERNELS "; } + if (tt::llrt::OptionsG.get_watcher_debug_delay()) { + this->defines_ += "-DWATCHER_DEBUG_DELAY=" + to_string(tt::llrt::OptionsG.get_watcher_debug_delay()) + " "; + } + // Includes // TODO(pgk) this list is insane this->includes_ = string("") + "-I. " + "-I.. " + "-I" + this->root_ + " " + "-I" + this->root_ + "tt_metal " + diff --git a/tt_metal/llrt/rtoptions.cpp b/tt_metal/llrt/rtoptions.cpp index aaf6250b4a4..6e36559ca88 100644 --- a/tt_metal/llrt/rtoptions.cpp +++ b/tt_metal/llrt/rtoptions.cpp @@ -2,11 +2,13 @@ // // SPDX-License-Identifier: Apache-2.0 -#include +#include "rtoptions.hpp" + #include +#include + #include -#include "rtoptions.hpp" #include "impl/debug/dprint_server.hpp" #include "tools/profiler/profiler_state.hpp" @@ -16,20 +18,30 @@ namespace tt { namespace llrt { +const char *RunTimeDebugFeatureNames[RunTimeDebugFeatureCount] = { + "DPRINT", + "READ_DEBUG_DELAY", + "WRITE_DEBUG_DELAY", + "ATOMIC_DEBUG_DELAY", +}; + // Note: global initialization order is non-deterministic // This is ok so long as this gets initialized before decisions are based on // env state RunTimeOptions OptionsG; RunTimeOptions::RunTimeOptions() { - if (const char* root_dir_ptr = std::getenv("TT_METAL_HOME")) { + if (const char *root_dir_ptr = std::getenv("TT_METAL_HOME")) { root_dir = string(root_dir_ptr) + "/"; } build_map_enabled = (getenv("TT_METAL_KERNEL_MAP") != nullptr); ParseWatcherEnv(); - ParseDPrintEnv(); + + for (int i = 0; i < RunTimeDebugFeatureCount; i++) { + ParseFeatureEnv((RunTimeDebugFeatures)i); + } // Test mode has no env var, default is disabled test_mode_enabled = false; @@ -41,22 +53,26 @@ RunTimeOptions::RunTimeOptions() { profiler_enabled = true; } #endif - TT_FATAL(!(get_dprint_enabled() && get_profiler_enabled()), "Cannot enable both debug printing and profiling"); + TT_FATAL( + !(get_feature_enabled(RunTimeDebugFeatureDprint) && get_profiler_enabled()), + "Cannot enable both debug printing and profiling"); null_kernels = (std::getenv("TT_METAL_NULL_KERNELS") != nullptr); clear_l1 = true; const char *clear_l1_enabled_str = std::getenv("TT_METAL_CLEAR_L1"); if (clear_l1_enabled_str != nullptr) { - if (clear_l1_enabled_str[0] == '0') clear_l1 = false; - if (clear_l1_enabled_str[0] == '1') clear_l1 = true; + if (clear_l1_enabled_str[0] == '0') + clear_l1 = false; + if (clear_l1_enabled_str[0] == '1') + clear_l1 = true; } const char *riscv_debug_info_enabled_str = std::getenv("TT_METAL_RISCV_DEBUG_INFO"); set_riscv_debug_info_enabled(riscv_debug_info_enabled_str != nullptr); } -const std::string& RunTimeOptions::get_root_dir() { +const std::string &RunTimeOptions::get_root_dir() { if (root_dir == "") { TT_THROW("Env var " + std::string("TT_METAL_HOME") + " is not set."); } @@ -91,12 +107,7 @@ void RunTimeOptions::ParseWatcherEnv() { // Any watcher features to disabled based on env var. std::set all_features = { - watcher_status_str, - watcher_noc_sanitize_str, - watcher_assert_str, - watcher_pause_str, - watcher_ring_buffer_str - }; + watcher_status_str, watcher_noc_sanitize_str, watcher_assert_str, watcher_pause_str, watcher_ring_buffer_str}; for (std::string feature : all_features) { std::string env_var("TT_METAL_WATCHER_DISABLE_"); env_var += feature; @@ -104,36 +115,51 @@ void RunTimeOptions::ParseWatcherEnv() { watcher_disabled_features.insert(feature); } } + + const char *watcher_debug_delay_str = getenv("TT_METAL_WATCHER_DEBUG_DELAY"); + if (watcher_debug_delay_str != nullptr) { + sscanf(watcher_debug_delay_str, "%u", &watcher_debug_delay); + // Assert watcher is also enabled (TT_METAL_WATCHER=1) + TT_ASSERT(watcher_enabled, "TT_METAL_WATCHER_DEBUG_DELAY requires TT_METAL_WATCHER"); + // Assert TT_METAL_WATCHER_DISABLE_NOC_SANITIZE is either not set or set to 0 + TT_ASSERT( + watcher_disabled_features.find(watcher_noc_sanitize_str) == watcher_disabled_features.end(), + "TT_METAL_WATCHER_DEBUG_DELAY requires TT_METAL_WATCHER_DISABLE_NOC_SANITIZE=0"); + } } -void RunTimeOptions::ParseDPrintEnv() { - ParseDPrintCoreRange("TT_METAL_DPRINT_CORES", CoreType::WORKER); - ParseDPrintCoreRange("TT_METAL_DPRINT_ETH_CORES", CoreType::ETH); - ParseDPrintChipIds("TT_METAL_DPRINT_CHIPS"); - ParseDPrintRiscvMask("TT_METAL_DPRINT_RISCVS"); - ParseDPrintFileName("TT_METAL_DPRINT_FILE"); +void RunTimeOptions::ParseFeatureEnv(RunTimeDebugFeatures feature) { + std::string feature_env_prefix("TT_METAL_"); + feature_env_prefix += RunTimeDebugFeatureNames[feature]; + + ParseFeatureCoreRange(feature, feature_env_prefix + "_CORES", CoreType::WORKER); + ParseFeatureCoreRange(feature, feature_env_prefix + "_ETH_CORES", CoreType::ETH); + ParseFeatureChipIds(feature, feature_env_prefix + "_CHIPS"); + ParseFeatureRiscvMask(feature, feature_env_prefix + "_RISCVS"); + ParseFeatureFileName(feature, feature_env_prefix + "_FILE"); - // Set dprint enabled if the user asked for any dprint cores - dprint_enabled = false; - for (auto &core_type_and_all_flag : dprint_all_cores) + // Set feature enabled if the user asked for any feature cores + feature_targets[feature].enabled = false; + for (auto &core_type_and_all_flag : feature_targets[feature].all_cores) if (core_type_and_all_flag.second) - dprint_enabled = true; - for (auto &core_type_and_cores : dprint_cores) + feature_targets[feature].enabled = true; + for (auto &core_type_and_cores : feature_targets[feature].cores) if (core_type_and_cores.second.size() > 0) - dprint_enabled = true; + feature_targets[feature].enabled = true; const char *print_noc_xfers = std::getenv("TT_METAL_DPRINT_NOC_TRANSFER_DATA"); if (print_noc_xfers != nullptr) dprint_noc_transfer_data = true; }; -void RunTimeOptions::ParseDPrintCoreRange(const char* env_var, CoreType core_type) { - char *str = std::getenv(env_var); +void RunTimeOptions::ParseFeatureCoreRange( + RunTimeDebugFeatures feature, const std::string &env_var, CoreType core_type) { + char *str = std::getenv(env_var.c_str()); vector cores; // Check if "all" is specified, rather than a range of cores. if (str != nullptr && strcmp(str, "all") == 0) { - dprint_all_cores[core_type] = true; + feature_targets[feature].all_cores[core_type] = true; return; } if (str != nullptr) { @@ -169,8 +195,9 @@ void RunTimeOptions::ParseDPrintCoreRange(const char* env_var, CoreType core_typ } cores.push_back({x, y}); str = strchr(str, ','); - str = strchr(str+1, ','); - if (str != nullptr) str++; + str = strchr(str + 1, ','); + if (str != nullptr) + str++; } } } else { @@ -179,12 +206,12 @@ void RunTimeOptions::ParseDPrintCoreRange(const char* env_var, CoreType core_typ } // Set the core range - dprint_cores[core_type] = cores; + feature_targets[feature].cores[core_type] = cores; } -void RunTimeOptions::ParseDPrintChipIds(const char* env_var) { +void RunTimeOptions::ParseFeatureChipIds(RunTimeDebugFeatures feature, const std::string &env_var) { vector chips; - char *env_var_str = std::getenv(env_var); + char *env_var_str = std::getenv(env_var.c_str()); // If the environment variable is not empty, parse it. while (env_var_str != nullptr) { @@ -194,19 +221,20 @@ void RunTimeOptions::ParseDPrintChipIds(const char* env_var) { } chips.push_back(chip); env_var_str = strchr(env_var_str, ','); - if (env_var_str != nullptr) env_var_str++; + if (env_var_str != nullptr) + env_var_str++; } // Default is no chips are specified is chip 0. if (chips.size() == 0) chips.push_back(0); - dprint_chip_ids = chips; + feature_targets[feature].chip_ids = chips; } -void RunTimeOptions::ParseDPrintRiscvMask(const char* env_var) { +void RunTimeOptions::ParseFeatureRiscvMask(RunTimeDebugFeatures feature, const std::string &env_var) { // Default is all RISCVs enabled for printing. uint32_t riscv_mask = DPRINT_RISCV_BR | DPRINT_RISCV_TR0 | DPRINT_RISCV_TR1 | DPRINT_RISCV_TR2 | DPRINT_RISCV_NC; - char *env_var_str = std::getenv(env_var); + char *env_var_str = std::getenv(env_var.c_str()); if (env_var_str != nullptr) { if (strcmp(env_var_str, "BR") == 0) { riscv_mask = DPRINT_RISCV_BR; @@ -222,14 +250,14 @@ void RunTimeOptions::ParseDPrintRiscvMask(const char* env_var) { TT_THROW("Invalid TT_DEBUG_PRINT_RISCV"); } } - dprint_riscv_mask = riscv_mask; + feature_targets[feature].riscv_mask = riscv_mask; } -void RunTimeOptions::ParseDPrintFileName(const char* env_var) { - char *env_var_str = std::getenv(env_var); - dprint_file_name = (env_var_str != nullptr)? std::string(env_var_str) : ""; +void RunTimeOptions::ParseFeatureFileName(RunTimeDebugFeatures feature, const std::string &env_var) { + char *env_var_str = std::getenv(env_var.c_str()); + feature_targets[feature].file_name = (env_var_str != nullptr) ? std::string(env_var_str) : ""; } -} // namespace llrt +} // namespace llrt -} // namespace tt +} // namespace tt diff --git a/tt_metal/llrt/rtoptions.hpp b/tt_metal/llrt/rtoptions.hpp index 79e132550f8..2004949da94 100644 --- a/tt_metal/llrt/rtoptions.hpp +++ b/tt_metal/llrt/rtoptions.hpp @@ -10,16 +10,64 @@ #pragma once -#include -#include #include +#include +#include + #include "tt_metal/common/core_coord.h" -#include "tt_metal/third_party/umd/device/tt_soc_descriptor.h" // For CoreType +#include "tt_metal/third_party/umd/device/tt_soc_descriptor.h" // For CoreType namespace tt { namespace llrt { +static inline const char *get_core_type_name(CoreType ct) { + switch (ct) { + case CoreType::ARC: + return "ARC"; + case CoreType::DRAM: + return "DRAM"; + case CoreType::ETH: + return "ethernet"; + case CoreType::PCIE: + return "PCIE"; + case CoreType::WORKER: + return "worker"; + case CoreType::HARVESTED: + return "harvested"; + case CoreType::ROUTER_ONLY: + return "router_only"; + default: + return "UNKNOWN"; + } +} + +// Enumerates the debug features that can be enabled at runtime. These features allow for +// fine-grained control over targeted cores, chips, harts, etc. +enum RunTimeDebugFeatures { + RunTimeDebugFeatureDprint, + RunTimeDebugFeatureReadDebugDelay, + RunTimeDebugFeatureWriteDebugDelay, + RunTimeDebugFeatureAtomicDebugDelay, + // NOTE: Update RunTimeDebugFeatureNames if adding new features + RunTimeDebugFeatureCount +}; + +extern const char *RunTimeDebugFeatureNames[RunTimeDebugFeatureCount]; + +// TargetSelection stores the targets for a given debug feature. I.e. for which chips, cores, harts +// to enable the feature. +struct TargetSelection { + std::map> cores; + std::map all_cores; + std::map> disabled_cores; + bool enabled; + std::vector chip_ids; + bool all_chips = false; + uint32_t riscv_mask = 0; + std::string file_name; // File name to write output to. +}; + class RunTimeOptions { std::string root_dir; @@ -31,17 +79,10 @@ class RunTimeOptions { bool watcher_append = false; bool watcher_auto_unpause = false; bool watcher_noinline = false; - - std::map> dprint_cores; - std::map dprint_all_cores; - std::map> dprint_disabled_cores; - bool dprint_enabled; - std::vector dprint_chip_ids; - bool dprint_all_chips = false; - uint32_t dprint_riscv_mask = 0; - std::string dprint_file_name; bool dprint_noc_transfer_data = false; + TargetSelection feature_targets[RunTimeDebugFeatureCount]; + bool test_mode_enabled = false; bool profiler_enabled = false; @@ -51,29 +92,30 @@ class RunTimeOptions { bool clear_l1 = false; bool riscv_debug_info_enabled = false; + uint32_t watcher_debug_delay = 0; public: RunTimeOptions(); - const std::string& get_root_dir(); + const std::string &get_root_dir(); inline bool get_build_map_enabled() { return build_map_enabled; } // Info from watcher environment variables, setters included so that user // can override with a SW call. - inline bool get_watcher_enabled() { return watcher_enabled; } - inline void set_watcher_enabled(bool enabled) { watcher_enabled = enabled; } - inline int get_watcher_interval() { return watcher_interval_ms; } + inline bool get_watcher_enabled() { return watcher_enabled; } + inline void set_watcher_enabled(bool enabled) { watcher_enabled = enabled; } + inline int get_watcher_interval() { return watcher_interval_ms; } inline void set_watcher_interval(int interval_ms) { watcher_interval_ms = interval_ms; } - inline int get_watcher_dump_all() { return watcher_dump_all; } - inline void set_watcher_dump_all(bool dump_all) { watcher_dump_all = dump_all; } - inline int get_watcher_append() { return watcher_append; } - inline void set_watcher_append(bool append) { watcher_append = append; } - inline int get_watcher_auto_unpause() { return watcher_auto_unpause; } + inline int get_watcher_dump_all() { return watcher_dump_all; } + inline void set_watcher_dump_all(bool dump_all) { watcher_dump_all = dump_all; } + inline int get_watcher_append() { return watcher_append; } + inline void set_watcher_append(bool append) { watcher_append = append; } + inline int get_watcher_auto_unpause() { return watcher_auto_unpause; } inline void set_watcher_auto_unpause(bool auto_unpause) { watcher_auto_unpause = auto_unpause; } - inline int get_watcher_noinline() { return watcher_noinline; } + inline int get_watcher_noinline() { return watcher_noinline; } inline void set_watcher_noinline(bool noinline) { watcher_noinline = noinline; } - inline std::set& get_watcher_disabled_features() { return watcher_disabled_features; } + inline std::set &get_watcher_disabled_features() { return watcher_disabled_features; } inline bool watcher_status_disabled() { return watcher_feature_disabled(watcher_status_str); } inline bool watcher_noc_sanitize_disabled() { return watcher_feature_disabled(watcher_noc_sanitize_str); } inline bool watcher_assert_disabled() { return watcher_feature_disabled(watcher_assert_str); } @@ -82,55 +124,86 @@ class RunTimeOptions { // Info from DPrint environment variables, setters included so that user can // override with a SW call. - inline bool get_dprint_enabled() { return dprint_enabled; } - inline void set_dprint_enabled(bool enable) { dprint_enabled = enable; } + inline bool get_feature_enabled(RunTimeDebugFeatures feature) { return feature_targets[feature].enabled; } + inline void set_feature_enabled(RunTimeDebugFeatures feature, bool enabled) { + feature_targets[feature].enabled = enabled; + } // Note: dprint cores are logical - inline std::map>& get_dprint_cores() { - return dprint_cores; + inline std::map> &get_feature_cores(RunTimeDebugFeatures feature) { + return feature_targets[feature].cores; } - inline void set_dprint_cores(std::map> cores) { - dprint_cores = cores; + inline void set_feature_cores(RunTimeDebugFeatures feature, std::map> cores) { + feature_targets[feature].cores = cores; } - inline std::map>& get_dprint_disabled_cores() { - return dprint_disabled_cores; + inline std::map> &get_feature_disabled_cores(RunTimeDebugFeatures feature) { + return feature_targets[feature].disabled_cores; } - inline void set_dprint_disabled_cores(std::map> disabled_cores) { - dprint_disabled_cores = disabled_cores; + inline void set_feature_disabled_cores( + RunTimeDebugFeatures feature, std::map> disabled_cores) { + feature_targets[feature].disabled_cores = disabled_cores; } // An alternative to setting cores by range, a flag to enable all. - inline void set_dprint_all_cores(CoreType core_type, bool all_cores) { - dprint_all_cores[core_type] = all_cores; + inline void set_feature_all_cores(RunTimeDebugFeatures feature, CoreType core_type, bool all_cores) { + feature_targets[feature].all_cores[core_type] = all_cores; + } + inline bool get_feature_all_cores(RunTimeDebugFeatures feature, CoreType core_type) { + return feature_targets[feature].all_cores[core_type]; } - inline bool get_dprint_all_cores(CoreType core_type) { return dprint_all_cores[core_type]; } // Note: core range is inclusive - inline void set_dprint_core_range(CoreCoord start, CoreCoord end, CoreType core_type) { - dprint_cores[core_type] = std::vector(); + inline void set_feature_core_range( + RunTimeDebugFeatures feature, CoreCoord start, CoreCoord end, CoreType core_type) { + feature_targets[feature].cores[core_type] = std::vector(); for (uint32_t x = start.x; x <= end.x; x++) { for (uint32_t y = start.y; y <= end.y; y++) { - dprint_cores[core_type].push_back({x, y}); + feature_targets[feature].cores[core_type].push_back({x, y}); } } } - inline std::vector& get_dprint_chip_ids() { return dprint_chip_ids; } - inline void set_dprint_chip_ids(std::vector chip_ids) { - dprint_chip_ids = chip_ids; + inline std::vector &get_feature_chip_ids(RunTimeDebugFeatures feature) { + return feature_targets[feature].chip_ids; + } + inline void set_feature_chip_ids(RunTimeDebugFeatures feature, std::vector chip_ids) { + feature_targets[feature].chip_ids = chip_ids; } // An alternative to setting cores by range, a flag to enable all. - inline void set_dprint_all_chips(bool all_chips) { - dprint_all_chips = all_chips; + inline void set_feature_all_chips(RunTimeDebugFeatures feature, bool all_chips) { + feature_targets[feature].all_chips = all_chips; + } + inline bool get_feature_all_chips(RunTimeDebugFeatures feature) { return feature_targets[feature].all_chips; } + inline uint32_t get_feature_riscv_mask(RunTimeDebugFeatures feature) { return feature_targets[feature].riscv_mask; } + inline void set_feature_riscv_mask(RunTimeDebugFeatures feature, uint32_t riscv_mask) { + feature_targets[feature].riscv_mask = riscv_mask; + } + inline std::string get_feature_file_name(RunTimeDebugFeatures feature) { + return feature_targets[feature].file_name; } - inline bool get_dprint_all_chips() { return dprint_all_chips; } - inline uint32_t get_dprint_riscv_mask() { return dprint_riscv_mask; } - inline void set_dprint_riscv_mask(uint32_t riscv_mask) { - dprint_riscv_mask = riscv_mask; + inline void set_feature_file_name(RunTimeDebugFeatures feature, std::string file_name) { + feature_targets[feature].file_name = file_name; } - inline std::string get_dprint_file_name() { return dprint_file_name; } - inline void set_dprint_file_name(std::string file_name) { - dprint_file_name = file_name; + inline TargetSelection get_feature_targets(RunTimeDebugFeatures feature) { return feature_targets[feature]; } + inline void set_feature_targets(RunTimeDebugFeatures feature, TargetSelection targets) { + feature_targets[feature] = targets; } + inline bool get_dprint_noc_transfers() { return dprint_noc_transfer_data; } inline void set_dprint_noc_transfers(bool val) { dprint_noc_transfer_data = val; } + // Returns the string representation for hash computation. + inline std::string get_feature_hash_string(RunTimeDebugFeatures feature) { + switch (feature) { + case RunTimeDebugFeatureDprint: return std::to_string(get_feature_enabled(feature)); + case RunTimeDebugFeatureReadDebugDelay: + case RunTimeDebugFeatureWriteDebugDelay: + case RunTimeDebugFeatureAtomicDebugDelay: + if (get_feature_enabled(feature)) { + return std::to_string(get_watcher_debug_delay()); + } else { + return "false"; + } + default: return ""; + } + } + // Used for both watcher and dprint servers, this dev option (no corresponding env var) sets // whether to catch exceptions (test mode = true) coming from debug servers or to throw them // (test mode = false). We need to catch for gtesting, since an unhandled exception will kill @@ -151,13 +224,16 @@ class RunTimeOptions { inline bool get_riscv_debug_info_enabled() { return riscv_debug_info_enabled; } inline void set_riscv_debug_info_enabled(bool enable) { riscv_debug_info_enabled = enable; } -private: - // Helper functions to parse DPrint-specific environment vaiables. - void ParseDPrintEnv(); - void ParseDPrintCoreRange(const char* env_var, CoreType core_type); - void ParseDPrintChipIds(const char* env_var); - void ParseDPrintRiscvMask(const char* env_var); - void ParseDPrintFileName(const char* env_var); + inline uint32_t get_watcher_debug_delay() { return watcher_debug_delay; } + void set_watcher_debug_delay(uint32_t delay) { watcher_debug_delay = delay; } + + private: + // Helper functions to parse feature-specific environment vaiables. + void ParseFeatureEnv(RunTimeDebugFeatures feature); + void ParseFeatureCoreRange(RunTimeDebugFeatures feature, const std::string &env_var, CoreType core_type); + void ParseFeatureChipIds(RunTimeDebugFeatures feature, const std::string &env_var); + void ParseFeatureRiscvMask(RunTimeDebugFeatures feature, const std::string &env_var); + void ParseFeatureFileName(RunTimeDebugFeatures feature, const std::string &env_var); // Helper function to parse watcher-specific environment variables. void ParseWatcherEnv(); @@ -175,9 +251,8 @@ class RunTimeOptions { } }; - extern RunTimeOptions OptionsG; -} // namespace llrt +} // namespace llrt -} // namespace tt +} // namespace tt