From 953bd0b1332dca4fa3c850097c69e22d7aefdc19 Mon Sep 17 00:00:00 2001 From: Samarth Agarwal Date: Thu, 14 Nov 2024 18:21:31 +0000 Subject: [PATCH] More work --- .../tt_metal/common/command_queue_fixture.hpp | 1 - .../tt_metal/common/device_fixture.hpp | 32 +++++ .../tt_metal/common/trace_fixture.hpp | 42 ------- .../dispatch/dispatch_program/CMakeLists.txt | 4 - .../dispatch_program/test_sub_device.cpp | 116 +---------------- .../dispatch_trace/test_sub_device.cpp | 116 +---------------- .../dispatch/sub_device_test_utils.hpp | 118 ++++++++++++++++++ 7 files changed, 154 insertions(+), 275 deletions(-) delete mode 100644 tests/tt_metal/tt_metal/common/trace_fixture.hpp rename tests/tt_metal/tt_metal/{ => dispatch}/dispatch_program/test_sub_device.cpp (62%) rename tests/tt_metal/tt_metal/{ => dispatch}/dispatch_trace/test_sub_device.cpp (67%) create mode 100644 tests/tt_metal/tt_metal/dispatch/sub_device_test_utils.hpp diff --git a/tests/tt_metal/tt_metal/common/command_queue_fixture.hpp b/tests/tt_metal/tt_metal/common/command_queue_fixture.hpp index 7a6f0ee7a42..c8d94c706bd 100644 --- a/tests/tt_metal/tt_metal/common/command_queue_fixture.hpp +++ b/tests/tt_metal/tt_metal/common/command_queue_fixture.hpp @@ -8,7 +8,6 @@ #include "common/env_lib.hpp" #include "gtest/gtest.h" #include "dispatch_fixture.hpp" -#include "trace_fixture.hpp" #include "hostdevcommon/common_runtime_address_map.h" #include "hostdevcommon/common_values.hpp" #include "impl/buffers/circular_buffer_types.hpp" diff --git a/tests/tt_metal/tt_metal/common/device_fixture.hpp b/tests/tt_metal/tt_metal/common/device_fixture.hpp index 4977795cdbf..5d419a86d12 100644 --- a/tests/tt_metal/tt_metal/common/device_fixture.hpp +++ b/tests/tt_metal/tt_metal/common/device_fixture.hpp @@ -85,6 +85,38 @@ class DeviceSingleCardFixture : public ::testing::Test { class DeviceSingleCardBufferFixture : public DeviceSingleCardFixture {}; +class SingleDeviceTraceFixture : public ::testing::Test { + protected: + Device* device_; + tt::ARCH arch_; + + void Setup(const size_t buffer_size, const uint8_t num_hw_cqs = 1) { + auto slow_dispatch = getenv("TT_METAL_SLOW_DISPATCH_MODE"); + if (slow_dispatch) { + tt::log_info( + tt::LogTest, "This suite can only be run with fast dispatch or TT_METAL_SLOW_DISPATCH_MODE unset"); + GTEST_SKIP(); + } + if (num_hw_cqs > 1) { + // Running multi-CQ test. User must set this explicitly. + auto num_cqs = getenv("TT_METAL_GTEST_NUM_HW_CQS"); + if (num_cqs == nullptr or strcmp(num_cqs, "2")) { + TT_THROW("This suite must be run with TT_METAL_GTEST_NUM_HW_CQS=2"); + GTEST_SKIP(); + } + } + this->arch_ = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name()); + const int device_id = 0; + this->device_ = tt::tt_metal::CreateDevice(device_id, num_hw_cqs, 0, buffer_size); + } + + void TearDown() override { + if (!getenv("TT_METAL_SLOW_DISPATCH_MODE")) { + tt::tt_metal::CloseDevice(this->device_); + } + } +}; + class BlackholeSingleCardFixture : public DeviceSingleCardFixture { protected: void SetUp() override { diff --git a/tests/tt_metal/tt_metal/common/trace_fixture.hpp b/tests/tt_metal/tt_metal/common/trace_fixture.hpp deleted file mode 100644 index 99375c436b0..00000000000 --- a/tests/tt_metal/tt_metal/common/trace_fixture.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "gtest/gtest.h" -#include "tt_metal/host_api.hpp" -#include "tt_metal/test_utils/env_vars.hpp" -#include "impl/kernels/kernel.hpp" - -class SingleDeviceTraceFixture : public ::testing::Test { - protected: - Device* device_; - tt::ARCH arch_; - - void Setup(const size_t buffer_size, const uint8_t num_hw_cqs = 1) { - auto slow_dispatch = getenv("TT_METAL_SLOW_DISPATCH_MODE"); - if (slow_dispatch) { - tt::log_info( - tt::LogTest, "This suite can only be run with fast dispatch or TT_METAL_SLOW_DISPATCH_MODE unset"); - GTEST_SKIP(); - } - if (num_hw_cqs > 1) { - // Running multi-CQ test. User must set this explicitly. - auto num_cqs = getenv("TT_METAL_GTEST_NUM_HW_CQS"); - if (num_cqs == nullptr or strcmp(num_cqs, "2")) { - TT_THROW("This suite must be run with TT_METAL_GTEST_NUM_HW_CQS=2"); - GTEST_SKIP(); - } - } - this->arch_ = tt::get_arch_from_string(tt::test_utils::get_umd_arch_name()); - const int device_id = 0; - this->device_ = tt::tt_metal::CreateDevice(device_id, num_hw_cqs, 0, buffer_size); - } - - void TearDown() override { - if (!getenv("TT_METAL_SLOW_DISPATCH_MODE")) { - tt::tt_metal::CloseDevice(this->device_); - } - } -}; diff --git a/tests/tt_metal/tt_metal/dispatch/dispatch_program/CMakeLists.txt b/tests/tt_metal/tt_metal/dispatch/dispatch_program/CMakeLists.txt index e43627bd549..4d105a41601 100644 --- a/tests/tt_metal/tt_metal/dispatch/dispatch_program/CMakeLists.txt +++ b/tests/tt_metal/tt_metal/dispatch/dispatch_program/CMakeLists.txt @@ -1,13 +1,9 @@ set(UNIT_TESTS_DISPATCH_PROGRAM_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/test_dispatch_program_with_kernel_created_from_string.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_dispatch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_EnqueueProgram.cpp -<<<<<<< HEAD:tests/tt_metal/tt_metal/dispatch_program/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/test_sub_device.cpp -======= CACHE INTERNAL "" ->>>>>>> #0: More test structuring:tests/tt_metal/tt_metal/dispatch/dispatch_program/CMakeLists.txt ) add_executable(unit_tests_dispatch_program ${UNIT_TESTS_DISPATCH_PROGRAM_SRC}) diff --git a/tests/tt_metal/tt_metal/dispatch_program/test_sub_device.cpp b/tests/tt_metal/tt_metal/dispatch/dispatch_program/test_sub_device.cpp similarity index 62% rename from tests/tt_metal/tt_metal/dispatch_program/test_sub_device.cpp rename to tests/tt_metal/tt_metal/dispatch/dispatch_program/test_sub_device.cpp index f92bc4df06e..7343f25e022 100644 --- a/tests/tt_metal/tt_metal/dispatch_program/test_sub_device.cpp +++ b/tests/tt_metal/tt_metal/dispatch/dispatch_program/test_sub_device.cpp @@ -17,119 +17,7 @@ #include "test_utils/stimulus.hpp" #include "command_queue_fixture.hpp" #include "command_queue_test_utils.hpp" - -std::tuple> create_single_sync_program(Device *device, SubDevice sub_device) { - auto syncer_coord = sub_device.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; - auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); - auto global_sem = CreateGlobalSemaphore(device, sub_device.cores(HalProgrammableCoreType::TENSIX), INVALID); - - Program syncer_program = CreateProgram(); - auto syncer_kernel = CreateKernel( - syncer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", - syncer_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array syncer_rt_args = {global_sem->address()}; - SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); - return {std::move(syncer_program), std::move(syncer_coord), std::move(global_sem)}; -} - -std::tuple> create_basic_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) { - auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; - auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord)); - auto waiter_core_physical = device->worker_core_from_logical_core(waiter_coord); - auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX); - auto syncer_coord = incrementer_cores.ranges().back().end_coord; - auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); - auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord); - auto all_cores = waiter_core.merge(incrementer_cores).merge(syncer_core); - auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID); - - Program waiter_program = CreateProgram(); - auto waiter_kernel = CreateKernel( - waiter_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/persistent_waiter.cpp", - waiter_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y}; - SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args); - - Program syncer_program = CreateProgram(); - auto syncer_kernel = CreateKernel( - syncer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", - syncer_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array syncer_rt_args = {global_sem->address()}; - SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); - - Program incrementer_program = CreateProgram(); - auto incrementer_kernel = CreateKernel( - incrementer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/incrementer.cpp", - incrementer_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default}); - std::array incrementer_rt_args = {global_sem->address(), waiter_core_physical.x, waiter_core_physical.y}; - SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args); - return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)}; -} - -std::tuple> create_basic_eth_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) { - auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::ACTIVE_ETH).ranges().at(0).start_coord; - auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord)); - auto waiter_core_physical = device->ethernet_core_from_logical_core(waiter_coord); - auto tensix_waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; - auto tensix_waiter_core = CoreRangeSet(CoreRange(tensix_waiter_coord, tensix_waiter_coord)); - auto tensix_waiter_core_physical = device->worker_core_from_logical_core(tensix_waiter_coord); - auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX); - auto syncer_coord = incrementer_cores.ranges().back().end_coord; - auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); - auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord); - auto all_cores = tensix_waiter_core.merge(incrementer_cores).merge(syncer_core); - auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID); - - Program waiter_program = CreateProgram(); - auto waiter_kernel = CreateKernel( - waiter_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/persistent_remote_waiter.cpp", - waiter_core, - EthernetConfig{ - .noc = NOC::RISCV_0_default, - .processor = DataMovementProcessor::RISCV_0}); - std::array waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y, tensix_waiter_core_physical.x, tensix_waiter_core_physical.y, eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE}; - SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args); - - Program syncer_program = CreateProgram(); - auto syncer_kernel = CreateKernel( - syncer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", - syncer_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array syncer_rt_args = {global_sem->address()}; - SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); - - Program incrementer_program = CreateProgram(); - auto incrementer_kernel = CreateKernel( - incrementer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/incrementer.cpp", - incrementer_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default}); - std::array incrementer_rt_args = {global_sem->address(), tensix_waiter_core_physical.x, tensix_waiter_core_physical.y}; - SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args); - return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)}; -} +#include "sub_device_test_utils.hpp" TEST_F(CommandQueueSingleCardFixture, TensixTestSubDeviceAllocations) { uint32_t local_l1_size = 3200; @@ -326,4 +214,4 @@ TEST_F(CommandQueueSingleCardFixture, TensixActiveEthTestSubDeviceBasicEthProgra } Synchronize(device); } -} \ No newline at end of file +} diff --git a/tests/tt_metal/tt_metal/dispatch_trace/test_sub_device.cpp b/tests/tt_metal/tt_metal/dispatch/dispatch_trace/test_sub_device.cpp similarity index 67% rename from tests/tt_metal/tt_metal/dispatch_trace/test_sub_device.cpp rename to tests/tt_metal/tt_metal/dispatch/dispatch_trace/test_sub_device.cpp index 378433d6b01..8e0ee2aa133 100644 --- a/tests/tt_metal/tt_metal/dispatch_trace/test_sub_device.cpp +++ b/tests/tt_metal/tt_metal/dispatch/dispatch_trace/test_sub_device.cpp @@ -17,119 +17,7 @@ #include "test_utils/stimulus.hpp" #include "command_queue_fixture.hpp" #include "command_queue_test_utils.hpp" - -std::tuple> create_single_sync_program(Device *device, SubDevice sub_device) { - auto syncer_coord = sub_device.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; - auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); - auto global_sem = CreateGlobalSemaphore(device, sub_device.cores(HalProgrammableCoreType::TENSIX), INVALID); - - Program syncer_program = CreateProgram(); - auto syncer_kernel = CreateKernel( - syncer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", - syncer_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array syncer_rt_args = {global_sem->address()}; - SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); - return {std::move(syncer_program), std::move(syncer_coord), std::move(global_sem)}; -} - -std::tuple> create_basic_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) { - auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; - auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord)); - auto waiter_core_physical = device->worker_core_from_logical_core(waiter_coord); - auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX); - auto syncer_coord = incrementer_cores.ranges().back().end_coord; - auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); - auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord); - auto all_cores = waiter_core.merge(incrementer_cores).merge(syncer_core); - auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID); - - Program waiter_program = CreateProgram(); - auto waiter_kernel = CreateKernel( - waiter_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/persistent_waiter.cpp", - waiter_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y}; - SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args); - - Program syncer_program = CreateProgram(); - auto syncer_kernel = CreateKernel( - syncer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", - syncer_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array syncer_rt_args = {global_sem->address()}; - SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); - - Program incrementer_program = CreateProgram(); - auto incrementer_kernel = CreateKernel( - incrementer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/incrementer.cpp", - incrementer_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default}); - std::array incrementer_rt_args = {global_sem->address(), waiter_core_physical.x, waiter_core_physical.y}; - SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args); - return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)}; -} - -std::tuple> create_basic_eth_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) { - auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::ACTIVE_ETH).ranges().at(0).start_coord; - auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord)); - auto waiter_core_physical = device->ethernet_core_from_logical_core(waiter_coord); - auto tensix_waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; - auto tensix_waiter_core = CoreRangeSet(CoreRange(tensix_waiter_coord, tensix_waiter_coord)); - auto tensix_waiter_core_physical = device->worker_core_from_logical_core(tensix_waiter_coord); - auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX); - auto syncer_coord = incrementer_cores.ranges().back().end_coord; - auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); - auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord); - auto all_cores = tensix_waiter_core.merge(incrementer_cores).merge(syncer_core); - auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID); - - Program waiter_program = CreateProgram(); - auto waiter_kernel = CreateKernel( - waiter_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/persistent_remote_waiter.cpp", - waiter_core, - EthernetConfig{ - .noc = NOC::RISCV_0_default, - .processor = DataMovementProcessor::RISCV_0}); - std::array waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y, tensix_waiter_core_physical.x, tensix_waiter_core_physical.y, eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE}; - SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args); - - Program syncer_program = CreateProgram(); - auto syncer_kernel = CreateKernel( - syncer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", - syncer_core, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_0, - .noc = NOC::RISCV_0_default}); - std::array syncer_rt_args = {global_sem->address()}; - SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); - - Program incrementer_program = CreateProgram(); - auto incrementer_kernel = CreateKernel( - incrementer_program, - "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/incrementer.cpp", - incrementer_cores, - DataMovementConfig{ - .processor = DataMovementProcessor::RISCV_1, - .noc = NOC::RISCV_1_default}); - std::array incrementer_rt_args = {global_sem->address(), tensix_waiter_core_physical.x, tensix_waiter_core_physical.y}; - SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args); - return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)}; -} +#include "sub_device_test_utils.hpp" TEST_F(CommandQueueSingleCardTraceFixture, TensixTestSubDeviceTraceBasicPrograms) { SubDevice sub_device_1(std::array{CoreRangeSet(CoreRange({0, 0}, {2, 2}))}); @@ -379,4 +267,4 @@ TEST_F(CommandQueueSingleCardTraceFixture, TensixTestSubDeviceIllegalOperations) device->remove_sub_device_manager(sub_device_manager_1); EXPECT_THROW(device->load_sub_device_manager(sub_device_manager_1), std::exception); } -} \ No newline at end of file +} diff --git a/tests/tt_metal/tt_metal/dispatch/sub_device_test_utils.hpp b/tests/tt_metal/tt_metal/dispatch/sub_device_test_utils.hpp new file mode 100644 index 00000000000..ae1c6da1e9a --- /dev/null +++ b/tests/tt_metal/tt_metal/dispatch/sub_device_test_utils.hpp @@ -0,0 +1,118 @@ +// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#include "host_api.hpp" + +std::tuple> create_single_sync_program(Device *device, SubDevice sub_device) { + auto syncer_coord = sub_device.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; + auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); + auto global_sem = CreateGlobalSemaphore(device, sub_device.cores(HalProgrammableCoreType::TENSIX), INVALID); + + Program syncer_program = CreateProgram(); + auto syncer_kernel = CreateKernel( + syncer_program, + "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", + syncer_core, + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_0, + .noc = NOC::RISCV_0_default}); + std::array syncer_rt_args = {global_sem->address()}; + SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); + return {std::move(syncer_program), std::move(syncer_coord), std::move(global_sem)}; +} + +std::tuple> create_basic_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) { + auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; + auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord)); + auto waiter_core_physical = device->worker_core_from_logical_core(waiter_coord); + auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX); + auto syncer_coord = incrementer_cores.ranges().back().end_coord; + auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); + auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord); + auto all_cores = waiter_core.merge(incrementer_cores).merge(syncer_core); + auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID); + + Program waiter_program = CreateProgram(); + auto waiter_kernel = CreateKernel( + waiter_program, + "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/persistent_waiter.cpp", + waiter_core, + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_0, + .noc = NOC::RISCV_0_default}); + std::array waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y}; + SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args); + + Program syncer_program = CreateProgram(); + auto syncer_kernel = CreateKernel( + syncer_program, + "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", + syncer_core, + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_0, + .noc = NOC::RISCV_0_default}); + std::array syncer_rt_args = {global_sem->address()}; + SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); + + Program incrementer_program = CreateProgram(); + auto incrementer_kernel = CreateKernel( + incrementer_program, + "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/incrementer.cpp", + incrementer_cores, + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_1, + .noc = NOC::RISCV_1_default}); + std::array incrementer_rt_args = {global_sem->address(), waiter_core_physical.x, waiter_core_physical.y}; + SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args); + return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)}; +} + +std::tuple> create_basic_eth_sync_program(Device *device, const SubDevice& sub_device_1, const SubDevice& sub_device_2) { + auto waiter_coord = sub_device_2.cores(HalProgrammableCoreType::ACTIVE_ETH).ranges().at(0).start_coord; + auto waiter_core = CoreRangeSet(CoreRange(waiter_coord, waiter_coord)); + auto waiter_core_physical = device->ethernet_core_from_logical_core(waiter_coord); + auto tensix_waiter_coord = sub_device_2.cores(HalProgrammableCoreType::TENSIX).ranges().at(0).start_coord; + auto tensix_waiter_core = CoreRangeSet(CoreRange(tensix_waiter_coord, tensix_waiter_coord)); + auto tensix_waiter_core_physical = device->worker_core_from_logical_core(tensix_waiter_coord); + auto incrementer_cores = sub_device_1.cores(HalProgrammableCoreType::TENSIX); + auto syncer_coord = incrementer_cores.ranges().back().end_coord; + auto syncer_core = CoreRangeSet(CoreRange(syncer_coord, syncer_coord)); + auto syncer_core_physical = device->worker_core_from_logical_core(syncer_coord); + auto all_cores = tensix_waiter_core.merge(incrementer_cores).merge(syncer_core); + auto global_sem = CreateGlobalSemaphore(device, all_cores, INVALID); + + Program waiter_program = CreateProgram(); + auto waiter_kernel = CreateKernel( + waiter_program, + "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/persistent_remote_waiter.cpp", + waiter_core, + EthernetConfig{ + .noc = NOC::RISCV_0_default, + .processor = DataMovementProcessor::RISCV_0}); + std::array waiter_rt_args = {global_sem->address(), incrementer_cores.num_cores(), syncer_core_physical.x, syncer_core_physical.y, tensix_waiter_core_physical.x, tensix_waiter_core_physical.y, eth_l1_mem::address_map::ERISC_L1_UNRESERVED_BASE}; + SetRuntimeArgs(waiter_program, waiter_kernel, waiter_core, waiter_rt_args); + + Program syncer_program = CreateProgram(); + auto syncer_kernel = CreateKernel( + syncer_program, + "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/syncer.cpp", + syncer_core, + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_0, + .noc = NOC::RISCV_0_default}); + std::array syncer_rt_args = {global_sem->address()}; + SetRuntimeArgs(syncer_program, syncer_kernel, syncer_core, syncer_rt_args); + + Program incrementer_program = CreateProgram(); + auto incrementer_kernel = CreateKernel( + incrementer_program, + "tests/tt_metal/tt_metal/test_kernels/misc/sub_device/incrementer.cpp", + incrementer_cores, + DataMovementConfig{ + .processor = DataMovementProcessor::RISCV_1, + .noc = NOC::RISCV_1_default}); + std::array incrementer_rt_args = {global_sem->address(), tensix_waiter_core_physical.x, tensix_waiter_core_physical.y}; + SetRuntimeArgs(incrementer_program, incrementer_kernel, incrementer_cores, incrementer_rt_args); + return {std::move(waiter_program), std::move(syncer_program), std::move(incrementer_program), std::move(global_sem)}; +}