diff --git a/CMakeLists.txt b/CMakeLists.txt index 448cb093..4ebbc49a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,5 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.16) project(umd_device) -find_package(yaml-cpp REQUIRED) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED True) @@ -11,47 +10,47 @@ if(PROJECT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) message("-- UMD: Building as master project") endif() -if(NOT CMAKE_BUILD_TYPE) - message(STATUS "Setting build type to 'Release' as none was specified.") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Release build is the default" FORCE) -endif() +if(MASTER_PROJECT) + if(NOT CMAKE_BUILD_TYPE) + message(STATUS "Setting build type to 'Release' as none was specified.") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Release build is the default" FORCE) + endif() -message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") + add_library(compiler_flags INTERFACE) + target_compile_options(compiler_flags INTERFACE -DFMT_HEADER_ONLY) +endif() +message(STATUS "UMD build type: ${CMAKE_BUILD_TYPE}") set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DDEBUG=DEBUG") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG=DEBUG") +include(${PROJECT_SOURCE_DIR}/cmake/dependencies.cmake) + add_library(umd_common_directories INTERFACE) -target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/device) +target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/device ${PROJECT_SOURCE_DIR}/third_party/fmt/include) if(NOT DEFINED ENV{ARCH_NAME}) message(FATAL_ERROR "Please set ARCH_NAME to grayskull, wormhole_b0, or blackhole") elseif($ENV{ARCH_NAME} STREQUAL "grayskull") - message("-- UMD: Building for Grayskull") + message(STATUS "UMD: Building for Grayskull") target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR}/device/grayskull ${PROJECT_SOURCE_DIR}/src/firmware/riscv/grayskull ) elseif($ENV{ARCH_NAME} STREQUAL "wormhole_b0") - message("-- UMD: Building for Wormhole") + message(STATUS "UMD: Building for Wormhole") target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR}/device/wormhole ${PROJECT_SOURCE_DIR}/src/firmware/riscv/wormhole ) elseif($ENV{ARCH_NAME} STREQUAL "blackhole") - message("-- UMD: Building for Blackhole") + message(STATUS "UMD: Building for Blackhole") target_include_directories(umd_common_directories INTERFACE ${PROJECT_SOURCE_DIR}/device/blackhole ${PROJECT_SOURCE_DIR}/src/firmware/riscv/blackhole ) endif() -include(${PROJECT_SOURCE_DIR}/cmake/dependencies.cmake) - -add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) add_subdirectory(${PROJECT_SOURCE_DIR}/device) - -if(MASTER_PROJECT) - add_subdirectory(${PROJECT_SOURCE_DIR}/tests EXCLUDE_FROM_ALL) -endif() +add_subdirectory(${PROJECT_SOURCE_DIR}/tests EXCLUDE_FROM_ALL) diff --git a/README.md b/README.md index b035f6de..e6bb5103 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,45 @@ # UMD ## Dependencies - Required Ubuntu dependencies: +``` +sudo apt install -y libhwloc-dev +``` + +## Build flow +We are transitioning away from Make. The main libraries and tests should now be built with CMake. +Specify the `ARCH_NAME` environment variable as `grayskull` or `wormhole_b0` before building. + +To build `libdevice.so`: +``` +cmake -B build -G Ninja +ninja -C build +# or +ninja umd_device -C build +``` + +To build tests: +``` +ninja umd_tests -C build +``` +## As a submodule/external project +If your project has CMake support, simply add this repo as a subdirectory: +``` +add_subdirectory() +``` +You can then use `libdevice.so` by linking against the `umd_device` target wheverever is needed. +``` +target_link_libraries(tt_metal PUBLIC umd_device) +``` + +## Deprecated Make flow +This flow is no longer maintained. `libdevice.so` will build however if you want to run tests, we suggest using the CMake flow + +Required Ubuntu dependencies: ``` sudo apt install -y libyaml-cpp-dev libhwloc-dev libgtest-dev libboost-dev ``` -## Build This target builds `libdevice.so`. Specify the `ARCH_NAME` environment variable when building (`wormhole_b0` or `grayskull`): @@ -15,8 +47,6 @@ This target builds `libdevice.so`. Specify the `ARCH_NAME` environment variable make build ``` -## Test - Run this target to build library, and gtest suite. ``` @@ -29,15 +59,12 @@ Running test suite: make run ``` -## Clean - +To Clean build directory ``` make clean ``` -## Device Selection - -Change the `ARCH_NAME` flag in the top-level Makefile or run: +To change device selection, change the `ARCH_NAME` flag in the top-level Makefile or run: ``` make build ARCH_NAME=... diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 2621e820..6480662c 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -14,6 +14,29 @@ CPMAddPackage( OPTIONS "INSTALL_GTEST OFF" ) +############################################################################################################################ +# yaml-cpp +############################################################################################################################ +CPMAddPackage( + NAME yaml-cpp + GITHUB_REPOSITORY jbeder/yaml-cpp + GIT_TAG 0.8.0 + OPTIONS + "YAML_CPP_BUILD_TESTS OFF" + "YAML_CPP_BUILD_TOOLS OFF" + "YAML_BUILD_SHARED_LIBS OFF" +) + +if (yaml-cpp_ADDED) + set_target_properties(yaml-cpp PROPERTIES DEBUG_POSTFIX "") +endif() + +############################################################################################################################ +# boost::interprocess +############################################################################################################################ +include(${PROJECT_SOURCE_DIR}/cmake/fetch_boost.cmake) +fetch_boost_library(interprocess) + ############################################################################################################################ # Nanomsg ############################################################################################################################ @@ -53,3 +76,9 @@ CPMAddPackage( OPTIONS "LIBUV_BUILD_TESTS OFF" ) + +if(NOT MASTER_PROJECT) + set(nng_include_dir ${nanomsg_SOURCE_DIR}/include PARENT_SCOPE) + set(flatbuffers_include_dir ${flatbuffers_SOURCE_DIR}/include PARENT_SCOPE) + set(libuv_include_dir ${libuv_SOURCE_DIR}/include PARENT_SCOPE) +endif() diff --git a/cmake/fetch_boost.cmake b/cmake/fetch_boost.cmake new file mode 100644 index 00000000..cd38bcc4 --- /dev/null +++ b/cmake/fetch_boost.cmake @@ -0,0 +1,23 @@ +function(fetch_boost_library BOOST_PROJECT_NAME) + CPMAddPackage( + NAME boost_${BOOST_PROJECT_NAME} + GITHUB_REPOSITORY boostorg/${BOOST_PROJECT_NAME} + GIT_TAG boost-1.85.0 + OPTIONS + "BUILD_SHARED_LIBS OFF" + ) + + get_target_property(BOOST_INTERFACE_LINK_LIBRARIES + boost_${BOOST_PROJECT_NAME} INTERFACE_LINK_LIBRARIES) + + if(NOT BOOST_INTERFACE_LINK_LIBRARIES STREQUAL + BOOST_INTERFACE_LINK_LIBRARIES-NOTFOUND) + foreach(BOOST_INTERFACE_LINK_LIBRARY IN + ITEMS ${BOOST_INTERFACE_LINK_LIBRARIES}) + if(NOT TARGET ${BOOST_INTERFACE_LINK_LIBRARY} + AND BOOST_INTERFACE_LINK_LIBRARY MATCHES "^Boost::([a-z0-9_]+)$") + fetch_boost_library(${CMAKE_MATCH_1}) + endif() + endforeach() + endif() +endfunction() diff --git a/device/CMakeLists.txt b/device/CMakeLists.txt index cf72cb0a..cecc88db 100644 --- a/device/CMakeLists.txt +++ b/device/CMakeLists.txt @@ -1,4 +1,3 @@ -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/simulation) set(UMD_DEVICE_SRCS architecture_implementation.cpp @@ -14,11 +13,18 @@ set(UMD_DEVICE_SRCS tt_soc_descriptor.cpp tt_versim_stub.cpp wormhole_implementation.cpp + simulation/tt_simulation_device.cpp + simulation/tt_simulation_host.cpp ) add_library(umd_device SHARED ${UMD_DEVICE_SRCS}) target_link_libraries(umd_device - PUBLIC umd_common_directories simulation - PRIVATE yaml-cpp hwloc rt fmt::fmt-header-only + PUBLIC yaml-cpp::yaml-cpp umd_common_directories nng uv compiler_flags + PRIVATE hwloc rt Boost::interprocess +) +target_include_directories(umd_device PUBLIC + ${flatbuffers_SOURCE_DIR}/include + ${nanomsg_SOURCE_DIR}/include + ${libuv_SOURCE_DIR}/include ) set_target_properties(umd_device PROPERTIES OUTPUT_NAME device diff --git a/device/simulation/CMakeLists.txt b/device/simulation/CMakeLists.txt deleted file mode 100644 index 8f5a4012..00000000 --- a/device/simulation/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ - -add_library(simulation SHARED - tt_simulation_host.cpp - tt_simulation_device.cpp -) -target_link_libraries(simulation - PUBLIC umd_common_directories nng uv - PRIVATE fmt::fmt-header-only -) -target_include_directories(simulation PUBLIC - ${flatbuffers_SOURCE_DIR}/include - ${nanomsg_SOURCE_DIR}/include - ${libuv_SOURCE_DIR}/include -) diff --git a/device/tt_silicon_driver.cpp b/device/tt_silicon_driver.cpp index d6602a13..0cfdf027 100644 --- a/device/tt_silicon_driver.cpp +++ b/device/tt_silicon_driver.cpp @@ -4294,7 +4294,7 @@ void tt_SiliconDevice::broadcast_write_to_cluster(const void *mem_ptr, uint32_t else if (arch_name == tt::ARCH::BLACKHOLE) { auto architecture_implementation = tt::umd::architecture_implementation::create(static_cast(arch_name)); if(cols_to_exclude.find(0) == cols_to_exclude.end() or cols_to_exclude.find(9) == cols_to_exclude.end()) { - log_assert(!tensix_or_eth_in_broadcast(cols_to_exclude, architecture_implementation.get()), "Cannot broadcast to tensix/ethernet and DRAM simultaneously on Wormhole."); + log_assert(!tensix_or_eth_in_broadcast(cols_to_exclude, architecture_implementation.get()), "Cannot broadcast to tensix/ethernet and DRAM simultaneously on Blackhole."); if(cols_to_exclude.find(0) == cols_to_exclude.end()) { // When broadcast includes column zero do not exclude anything std::set unsafe_rows = {}; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e6eb9076..9014d9d6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,16 @@ add_library(test_common INTERFACE) -target_link_libraries(test_common INTERFACE umd_device gtest_main gtest pthread fmt::fmt-header-only) +target_link_libraries(test_common INTERFACE umd_device gtest_main gtest pthread) +target_include_directories(test_common INTERFACE + ${PROJECT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/simulation) -add_custom_target(tests DEPENDS simulation_tests) +if($ENV{ARCH_NAME} STREQUAL "wormhole_b0") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/wormhole) +else() + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/$ENV{ARCH_NAME}) +endif() + +add_custom_target(umd_tests DEPENDS umd_unit_tests simulation_tests) diff --git a/tests/blackhole/CMakeLists.txt b/tests/blackhole/CMakeLists.txt new file mode 100644 index 00000000..4ff86013 --- /dev/null +++ b/tests/blackhole/CMakeLists.txt @@ -0,0 +1,13 @@ + +set(UNIT_TESTS_BH_SRCS + test_silicon_driver_bh.cpp +) + +add_executable(unit_tests_blackhole ${UNIT_TESTS_BH_SRCS}) +target_link_libraries(unit_tests_blackhole PRIVATE test_common) +set_target_properties(unit_tests_blackhole PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/umd/blackhole + OUTPUT_NAME unit_tests +) + +add_custom_target(umd_unit_tests DEPENDS unit_tests_blackhole) diff --git a/tests/blackhole/test_bh_common.h b/tests/blackhole/test_bh_common.h index 32ced7ff..8ae8436a 100644 --- a/tests/blackhole/test_bh_common.h +++ b/tests/blackhole/test_bh_common.h @@ -1,9 +1,14 @@ +// SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + #pragma once -#include "../test_utils/stimulus_generators.hpp" -#include "eth_l1_address_map.h" #include "tt_xy_pair.h" -#include -#include +#include "tt_cluster_descriptor.h" +#include "tt_device.h" + +#include "tests/test_utils/stimulus_generators.hpp" +#include "eth_l1_address_map.h" namespace tt::umd::test::utils { @@ -57,7 +62,7 @@ class BlackholeTestFixture : public ::testing::Test { std::set target_devices = {devices.begin(), devices.end()}; uint32_t num_host_mem_ch_per_mmio_device = 1; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test - device = std::make_unique(SOC_DESC_PATH, GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + device = std::make_unique(test_utils::GetAbsPath(SOC_DESC_PATH), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); assert(device != nullptr); assert(device->get_cluster_description()->get_number_of_chips() == get_detected_num_chips()); diff --git a/tests/blackhole/test_silicon_driver_bh.cpp b/tests/blackhole/test_silicon_driver_bh.cpp index 75375eba..d6c938aa 100644 --- a/tests/blackhole/test_silicon_driver_bh.cpp +++ b/tests/blackhole/test_silicon_driver_bh.cpp @@ -68,14 +68,23 @@ std::int32_t get_static_tlb_index(tt_xy_pair target) { } } +std::set get_target_devices() { + std::set target_devices; + std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(test_utils::GetAbsPath("blackhole_1chip_cluster.yaml")); + for (int i = 0; i < cluster_desc_uniq->get_number_of_chips(); i++) { + target_devices.insert(i); + } + return target_devices; +} + TEST(SiliconDriverBH, CreateDestroy) { - std::set target_devices = {0}; + std::set target_devices = get_target_devices(); uint32_t num_host_mem_ch_per_mmio_device = 1; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test tt_device_params default_params; // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting for(int i = 0; i < 50; i++) { - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/blackhole_140_arch_no_eth.yaml", "./blackhole_1chip_cluster.yaml", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false); set_params_for_remote_txn(device); device.start_device(default_params); device.deassert_risc_reset(); @@ -88,7 +97,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; // { -// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); +// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(test_utils::GetClusterDescYAML()); // if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { // GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; // } @@ -96,7 +105,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test // uint32_t num_host_mem_ch_per_mmio_device = 1; -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); +// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); // auto sdesc_per_chip = device.get_virtual_soc_descriptors(); // ASSERT_EQ(device.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; @@ -112,7 +121,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::set target_devices = {0, 1}; // std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; // { -// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); +// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(test_utils::GetClusterDescYAML()); // if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { // GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; // } @@ -121,7 +130,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test // uint32_t num_host_mem_ch_per_mmio_device = 1; // // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_1x1.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false, simulated_harvesting_masks); +// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_1x1.yaml", test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false, simulated_harvesting_masks); // auto sdesc_per_chip = device.get_virtual_soc_descriptors(); // ASSERT_EQ(device.using_harvested_soc_descriptors(), false) << "SOC descriptors should not be modified when harvesting is disabled"; @@ -139,7 +148,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // std::set target_devices = {0, 1}; // std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; // { -// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); +// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(test_utils::GetClusterDescYAML()); // if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { // GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; // } @@ -148,7 +157,7 @@ TEST(SiliconDriverBH, CreateDestroy) { // uint32_t num_host_mem_ch_per_mmio_device = 1; // std::unordered_map dynamic_tlb_config = {{"SMALL_READ_WRITE_TLB", 157}}; // Use both static and dynamic TLBs here -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); +// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); // set_params_for_remote_txn(device); // auto mmio_devices = device.get_target_mmio_device_ids(); @@ -202,67 +211,67 @@ TEST(SiliconDriverBH, CreateDestroy) { // device.close_device(); // } -// TEST(SiliconDriverWH, UnalignedStaticTLB_RW) { -// auto get_static_tlb_index_callback = [] (tt_xy_pair target) { -// return get_static_tlb_index(target); -// }; +TEST(SiliconDriverBH, UnalignedStaticTLB_RW) { + auto get_static_tlb_index_callback = [] (tt_xy_pair target) { + return get_static_tlb_index(target); + }; -// std::set target_devices = {0, 1}; + std::set target_devices = get_target_devices(); -// std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test -// dynamic_tlb_config["REG_TLB"] = 184; -// uint32_t num_host_mem_ch_per_mmio_device = 1; + std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test + dynamic_tlb_config["REG_TLB"] = 184; + uint32_t num_host_mem_ch_per_mmio_device = 1; -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); -// set_params_for_remote_txn(device); -// auto mmio_devices = device.get_target_mmio_device_ids(); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + set_params_for_remote_txn(device); + auto mmio_devices = device.get_target_mmio_device_ids(); -// for(int i = 0; i < target_devices.size(); i++) { -// // Iterate over MMIO devices and only setup static TLBs for worker cores -// if(std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { -// auto& sdesc = device.get_virtual_soc_descriptors().at(i); -// for(auto& core : sdesc.workers) { -// // Statically mapping a 1MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. -// device.configure_tlb(i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); -// } -// } -// } + for(int i = 0; i < target_devices.size(); i++) { + // Iterate over MMIO devices and only setup static TLBs for worker cores + if(std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { + auto& sdesc = device.get_virtual_soc_descriptors().at(i); + for(auto& core : sdesc.workers) { + // Statically mapping a 1MB TLB to this core, starting from address NCRISC_FIRMWARE_BASE. + device.configure_tlb(i, core, get_static_tlb_index_callback(core), l1_mem::address_map::NCRISC_FIRMWARE_BASE); + } + } + } -// device.setup_core_to_tlb_map(get_static_tlb_index_callback); + device.setup_core_to_tlb_map(get_static_tlb_index_callback); -// tt_device_params default_params; -// device.start_device(default_params); -// device.deassert_risc_reset(); + tt_device_params default_params; + device.start_device(default_params); + device.deassert_risc_reset(); -// std::vector unaligned_sizes = {3, 14, 21, 255, 362, 430, 1022, 1023, 1025}; -// for(int i = 0; i < 2; i++) { -// for(const auto& size : unaligned_sizes) { -// std::vector write_vec(size, 0); -// for(int i = 0; i < size; i++){ -// write_vec[i] = size + i; -// } -// std::vector readback_vec(size, 0); -// std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; -// for(int loop = 0; loop < 50; loop++){ -// for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { -// device.write_to_device(write_vec.data(), size, tt_cxy_pair(i, core), address, ""); -// device.wait_for_non_mmio_flush(); -// device.read_from_device(readback_vec.data(), tt_cxy_pair(i, core), address, size, ""); -// ASSERT_EQ(readback_vec, write_vec); -// readback_vec = std::vector(size, 0); -// device.write_to_sysmem(write_vec.data(), size, 0, 0, 0); -// device.read_from_sysmem(readback_vec.data(), 0, 0, size, 0); -// ASSERT_EQ(readback_vec, write_vec); -// readback_vec = std::vector(size, 0); -// device.wait_for_non_mmio_flush(); -// } -// address += 0x20; -// } + std::vector unaligned_sizes = {3, 14, 21, 255, 362, 430, 1022, 1023, 1025}; + for(int i = 0; i < target_devices.size(); i++) { + for(const auto& size : unaligned_sizes) { + std::vector write_vec(size, 0); + for(int i = 0; i < size; i++){ + write_vec[i] = size + i; + } + std::vector readback_vec(size, 0); + std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; + for(int loop = 0; loop < 50; loop++){ + for(auto& core : device.get_virtual_soc_descriptors().at(i).workers) { + device.write_to_device(write_vec.data(), size, tt_cxy_pair(i, core), address, ""); + device.wait_for_non_mmio_flush(); + device.read_from_device(readback_vec.data(), tt_cxy_pair(i, core), address, size, ""); + ASSERT_EQ(readback_vec, write_vec); + readback_vec = std::vector(size, 0); + device.write_to_sysmem(write_vec.data(), size, 0, 0, 0); + device.read_from_sysmem(readback_vec.data(), 0, 0, size, 0); + ASSERT_EQ(readback_vec, write_vec); + readback_vec = std::vector(size, 0); + device.wait_for_non_mmio_flush(); + } + address += 0x20; + } -// } -// } -// device.close_device(); -// } + } + } + device.close_device(); +} TEST(SiliconDriverBH, StaticTLB_RW) { @@ -270,20 +279,12 @@ TEST(SiliconDriverBH, StaticTLB_RW) { return get_static_tlb_index(target); }; - // std::set target_devices = {0, 1}; - std::set target_devices = {0}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml("./blackhole_1chip_cluster.yaml"); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } + std::set target_devices = get_target_devices(); std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/blackhole_140_arch_no_eth.yaml", "./blackhole_1chip_cluster.yaml", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -331,19 +332,12 @@ TEST(SiliconDriverBH, StaticTLB_RW) { TEST(SiliconDriverBH, DynamicTLB_RW) { // Don't use any static TLBs in this test. All writes go through a dynamic TLB that needs to be reconfigured for each transaction - std::set target_devices = {0}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml("./blackhole_1chip_cluster.yaml"); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverBH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } + std::set target_devices = get_target_devices(); std::unordered_map dynamic_tlb_config = {}; uint32_t num_host_mem_ch_per_mmio_device = 1; dynamic_tlb_config.insert({"DYNAMIC_TLB_BASE_INDEX", 190}); // Use this for all reads and writes to worker cores - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/blackhole_140_arch_no_eth.yaml", "./blackhole_1chip_cluster.yaml", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); @@ -400,311 +394,290 @@ TEST(SiliconDriverBH, DynamicTLB_RW) { device.close_device(); } -// TEST(SiliconDriverWH, MultiThreadedDevice) { -// // Have 2 threads read and write from a single device concurrently -// // All transactions go through a single Dynamic TLB. We want to make sure this is thread/process safe +TEST(SiliconDriverBH, MultiThreadedDevice) { + // Have 2 threads read and write from a single device concurrently + // All transactions go through a single Dynamic TLB. We want to make sure this is thread/process safe -// std::set target_devices = {0}; + std::set target_devices = get_target_devices(); -// { -// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); -// if (cluster_desc_uniq->get_number_of_chips() > 2) { -// GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a one or two chip nebula system"; -// } -// } - -// std::unordered_map dynamic_tlb_config = {}; -// uint32_t num_host_mem_ch_per_mmio_device = 1; -// dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + std::unordered_map dynamic_tlb_config = {}; + uint32_t num_host_mem_ch_per_mmio_device = 1; + dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); -// set_params_for_remote_txn(device); + set_params_for_remote_txn(device); -// tt_device_params default_params; -// device.start_device(default_params); -// device.deassert_risc_reset(); + tt_device_params default_params; + device.start_device(default_params); + device.deassert_risc_reset(); -// std::thread th1 = std::thread([&] { -// std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; -// std::vector readback_vec = {}; -// std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; -// for(int loop = 0; loop < 100; loop++) { -// for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { -// device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); -// device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); -// ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; -// readback_vec = {}; -// } -// address += 0x20; -// } -// }); - -// std::thread th2 = std::thread([&] { -// std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; -// std::vector readback_vec = {}; -// std::uint32_t address = 0x30000000; -// for(auto& core_ls : device.get_virtual_soc_descriptors().at(0).dram_cores) { -// for(int loop = 0; loop < 100; loop++) { -// for(auto& core : core_ls) { -// device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); -// device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); -// ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; -// readback_vec = {}; -// } -// address += 0x20; -// } -// } -// }); + std::thread th1 = std::thread([&] { + std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + std::vector readback_vec = {}; + std::uint32_t address = l1_mem::address_map::NCRISC_FIRMWARE_BASE; + for(int loop = 0; loop < 100; loop++) { + for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { + device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; + readback_vec = {}; + } + address += 0x20; + } + }); + + std::thread th2 = std::thread([&] { + std::vector vector_to_write = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + std::vector readback_vec = {}; + std::uint32_t address = 0x30000000; + for(auto& core_ls : device.get_virtual_soc_descriptors().at(0).dram_cores) { + for(int loop = 0; loop < 100; loop++) { + for(auto& core : core_ls) { + device.write_to_device(vector_to_write, tt_cxy_pair(0, core), address, "SMALL_READ_WRITE_TLB"); + device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 40, "SMALL_READ_WRITE_TLB"); + ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was written"; + readback_vec = {}; + } + address += 0x20; + } + } + }); -// th1.join(); -// th2.join(); -// device.close_device(); -// } + th1.join(); + th2.join(); + device.close_device(); +} -// TEST(SiliconDriverWH, MultiThreadedMemBar) { -// // Have 2 threads read and write from a single device concurrently -// // All (fairly large) transactions go through a static TLB. -// // We want to make sure the memory barrier is thread/process safe. +TEST(SiliconDriverBH, MultiThreadedMemBar) { + // Have 2 threads read and write from a single device concurrently + // All (fairly large) transactions go through a static TLB. + // We want to make sure the memory barrier is thread/process safe. -// // Memory barrier flags get sent to address 0 for all channels in this test -// auto get_static_tlb_index_callback = [] (tt_xy_pair target) { -// return get_static_tlb_index(target); -// }; + // Memory barrier flags get sent to address 0 for all channels in this test + auto get_static_tlb_index_callback = [] (tt_xy_pair target) { + return get_static_tlb_index(target); + }; -// std::set target_devices = {0}; -// uint32_t base_addr = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; -// std::unordered_map dynamic_tlb_config = {}; -// dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for reading back membar values -// uint32_t num_host_mem_ch_per_mmio_device = 1; + std::set target_devices = get_target_devices(); + uint32_t base_addr = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; + std::unordered_map dynamic_tlb_config = {}; + dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for reading back membar values + uint32_t num_host_mem_ch_per_mmio_device = 1; -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); -// set_params_for_remote_txn(device); -// for(int i = 0; i < target_devices.size(); i++) { -// // Iterate over devices and only setup static TLBs for functional worker cores -// auto& sdesc = device.get_virtual_soc_descriptors().at(i); -// for(auto& core : sdesc.workers) { -// // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. -// device.configure_tlb(i, core, get_static_tlb_index_callback(core), base_addr); -// } -// } -// device.setup_core_to_tlb_map(get_static_tlb_index_callback); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + set_params_for_remote_txn(device); + for(int i = 0; i < target_devices.size(); i++) { + // Iterate over devices and only setup static TLBs for functional worker cores + auto& sdesc = device.get_virtual_soc_descriptors().at(i); + for(auto& core : sdesc.workers) { + // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. + device.configure_tlb(i, core, get_static_tlb_index_callback(core), base_addr); + } + } + device.setup_core_to_tlb_map(get_static_tlb_index_callback); -// tt_device_params default_params; -// device.start_device(default_params); -// device.deassert_risc_reset(); + tt_device_params default_params; + device.start_device(default_params); + device.deassert_risc_reset(); -// std::vector readback_membar_vec = {}; -// for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { -// device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); -// ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers -// readback_membar_vec = {}; -// } + std::vector readback_membar_vec = {}; + for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { + device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all workers + readback_membar_vec = {}; + } -// for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(0).get_num_dram_channels(); chan++) { -// auto core = device.get_virtual_soc_descriptors().at(0).get_core_for_dram_channel(chan, 0); -// device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); -// ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all DRAM -// readback_membar_vec = {}; -// } + for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(0).get_num_dram_channels(); chan++) { + auto core = device.get_virtual_soc_descriptors().at(0).get_core_for_dram_channel(chan, 0); + device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), 0, 4, "SMALL_READ_WRITE_TLB"); + ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all DRAM + readback_membar_vec = {}; + } -// for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { -// device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); -// ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all ethernet cores -// readback_membar_vec = {}; -// } + for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { + device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers were correctly initialized on all ethernet cores + readback_membar_vec = {}; + } -// // Launch 2 thread accessing different locations of L1 and using memory barrier between write and read -// // Ensure now RAW race and membars are thread safe -// std::vector vec1(2560); -// std::vector vec2(2560); -// std::vector zeros(2560, 0); + // Launch 2 thread accessing different locations of L1 and using memory barrier between write and read + // Ensure now RAW race and membars are thread safe + std::vector vec1(2560); + std::vector vec2(2560); + std::vector zeros(2560, 0); -// for(int i = 0; i < vec1.size(); i++) { -// vec1.at(i) = i; -// } -// for(int i = 0; i < vec2.size(); i++) { -// vec2.at(i) = vec1.size() + i; -// } -// std::thread th1 = std::thread([&] { -// std::uint32_t address = base_addr; -// for(int loop = 0; loop < 50; loop++) { -// for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { -// std::vector readback_vec = {}; -// device.write_to_device(vec1, tt_cxy_pair(0, core), address, ""); -// device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); -// device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); -// ASSERT_EQ(readback_vec, vec1); -// device.write_to_device(zeros, tt_cxy_pair(0, core), address, ""); -// readback_vec = {}; -// } + for(int i = 0; i < vec1.size(); i++) { + vec1.at(i) = i; + } + for(int i = 0; i < vec2.size(); i++) { + vec2.at(i) = vec1.size() + i; + } + std::thread th1 = std::thread([&] { + std::uint32_t address = base_addr; + for(int loop = 0; loop < 50; loop++) { + for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { + std::vector readback_vec = {}; + device.write_to_device(vec1, tt_cxy_pair(0, core), address, ""); + device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); + device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec1.size(), ""); + ASSERT_EQ(readback_vec, vec1); + device.write_to_device(zeros, tt_cxy_pair(0, core), address, ""); + readback_vec = {}; + } -// } -// }); - -// std::thread th2 = std::thread([&] { -// std::uint32_t address = base_addr + vec1.size() * 4; -// for(int loop = 0; loop < 50; loop++) { -// for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { -// std::vector readback_vec = {}; -// device.write_to_device(vec2, tt_cxy_pair(0, core), address, ""); -// device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); -// device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); -// ASSERT_EQ(readback_vec, vec2); -// device.write_to_device(zeros, tt_cxy_pair(0, core), address, "") ; -// readback_vec = {}; -// } -// } -// }); - -// th1.join(); -// th2.join(); - -// for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { -// device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); -// ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for workers -// readback_membar_vec = {}; -// } + } + }); + + std::thread th2 = std::thread([&] { + std::uint32_t address = base_addr + vec1.size() * 4; + for(int loop = 0; loop < 50; loop++) { + for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { + std::vector readback_vec = {}; + device.write_to_device(vec2, tt_cxy_pair(0, core), address, ""); + device.l1_membar(0, "SMALL_READ_WRITE_TLB", {core}); + device.read_from_device(readback_vec, tt_cxy_pair(0, core), address, 4*vec2.size(), ""); + ASSERT_EQ(readback_vec, vec2); + device.write_to_device(zeros, tt_cxy_pair(0, core), address, "") ; + readback_vec = {}; + } + } + }); -// for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { -// device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); -// ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for ethernet cores -// readback_membar_vec = {}; -// } -// device.close_device(); -// } + th1.join(); + th2.join(); + for(auto& core : device.get_virtual_soc_descriptors().at(0).workers) { + device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), l1_mem::address_map::L1_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for workers + readback_membar_vec = {}; + } -// TEST(SiliconDriverWH, BroadcastWrite) { -// // Broadcast multiple vectors to tensix and dram grid. Verify broadcasted data is read back correctly -// std::set target_devices = {0, 1}; + for(auto& core : device.get_virtual_soc_descriptors().at(0).ethernet_cores) { + device.read_from_device(readback_membar_vec, tt_cxy_pair(0, core), eth_l1_mem::address_map::ERISC_BARRIER_BASE, 4, "SMALL_READ_WRITE_TLB"); + ASSERT_EQ(readback_membar_vec.at(0), 187); // Ensure that memory barriers end up in the correct sate for ethernet cores + readback_membar_vec = {}; + } + device.close_device(); +} -// { -// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); -// if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { -// GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; -// } -// } +TEST(SiliconDriverBH, DISABLED_BroadcastWrite) { // Cannot broadcast to tensix/ethernet and DRAM simultaneously on Blackhole .. wait_for_non_mmio_flush() is not working as expected? + // Broadcast multiple vectors to tensix and dram grid. Verify broadcasted data is read back correctly + std::set target_devices = get_target_devices(); -// std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test -// uint32_t num_host_mem_ch_per_mmio_device = 1; + std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test + uint32_t num_host_mem_ch_per_mmio_device = 1; -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); -// set_params_for_remote_txn(device); -// auto mmio_devices = device.get_target_mmio_device_ids(); - -// tt_device_params default_params; -// device.start_device(default_params); -// device.deassert_risc_reset(); -// std::vector broadcast_sizes = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}; -// uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; -// std::set rows_to_exclude = {0, 6}; -// std::set cols_to_exclude = {0, 5}; -// std::set rows_to_exclude_for_dram_broadcast = {}; -// std::set cols_to_exclude_for_dram_broadcast = {1, 2, 3, 4, 6, 7, 8, 9}; - -// for(const auto& size : broadcast_sizes) { -// std::vector vector_to_write(size); -// std::vector zeros(size); -// std::vector readback_vec = {}; -// for(int i = 0; i < size; i++) { -// vector_to_write[i] = i; -// zeros[i] = 0; -// } -// // Broadcast to Tensix -// device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude, cols_to_exclude, "LARGE_WRITE_TLB"); -// // Broadcast to DRAM -// device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude_for_dram_broadcast, cols_to_exclude_for_dram_broadcast, "LARGE_WRITE_TLB"); -// device.wait_for_non_mmio_flush(); - -// for(const auto i : target_devices) { -// for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { -// if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; -// device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); -// ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; -// device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data -// readback_vec = {}; -// } -// for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { -// const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); -// device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); -// ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; -// device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data -// readback_vec = {}; -// } -// } -// // Wait for data to be cleared before writing next block -// device.wait_for_non_mmio_flush(); -// } -// device.close_device(); -// } + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + set_params_for_remote_txn(device); + auto mmio_devices = device.get_target_mmio_device_ids(); -// TEST(SiliconDriverWH, VirtualCoordinateBroadcast) { -// // Broadcast multiple vectors to tensix and dram grid. Verify broadcasted data is read back correctly -// std::set target_devices = {0, 1}; + tt_device_params default_params; + device.start_device(default_params); + device.deassert_risc_reset(); + std::vector broadcast_sizes = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}; + uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; + std::set rows_to_exclude = {0, 6}; + std::set cols_to_exclude = {0, 5}; + std::set rows_to_exclude_for_dram_broadcast = {}; + std::set cols_to_exclude_for_dram_broadcast = {1, 2, 3, 4, 6, 7, 8, 9}; + + for(const auto& size : broadcast_sizes) { + std::vector vector_to_write(size); + std::vector zeros(size); + std::vector readback_vec = {}; + for(int i = 0; i < size; i++) { + vector_to_write[i] = i; + zeros[i] = 0; + } + // Broadcast to Tensix + device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude, cols_to_exclude, "LARGE_WRITE_TLB"); + device.wait_for_non_mmio_flush(); // flush here so we don't simultaneously broadcast to DRAM? + // Broadcast to DRAM + device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude_for_dram_broadcast, cols_to_exclude_for_dram_broadcast, "LARGE_WRITE_TLB"); + device.wait_for_non_mmio_flush(); + + for(const auto i : target_devices) { + for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { + if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; + device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; + device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + readback_vec = {}; + } + for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { + const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); + device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; + device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + readback_vec = {}; + } + } + // Wait for data to be cleared before writing next block + device.wait_for_non_mmio_flush(); + } + device.close_device(); +} -// { -// std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); -// if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { -// GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; -// } -// } +TEST(SiliconDriverBH, DISABLED_VirtualCoordinateBroadcast) { // same problem as above.. + // Broadcast multiple vectors to tensix and dram grid. Verify broadcasted data is read back correctly + std::set target_devices = get_target_devices(); -// std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test -// uint32_t num_host_mem_ch_per_mmio_device = 1; + std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test + uint32_t num_host_mem_ch_per_mmio_device = 1; -// tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); -// set_params_for_remote_txn(device); -// auto mmio_devices = device.get_target_mmio_device_ids(); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"), test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + set_params_for_remote_txn(device); + auto mmio_devices = device.get_target_mmio_device_ids(); -// tt_device_params default_params; -// device.start_device(default_params); -// auto eth_version = device.get_ethernet_fw_version(); -// bool virtual_bcast_supported = (eth_version >= tt_version(6, 8, 0) || eth_version == tt_version(6, 7, 241)) && device.translation_tables_en; -// if (!virtual_bcast_supported) { -// device.close_device(); -// GTEST_SKIP() << "SiliconDriverWH.VirtualCoordinateBroadcast skipped since ethernet version does not support Virtual Coordinate Broadcast or NOC translation is not enabled"; -// } + tt_device_params default_params; + device.start_device(default_params); + auto eth_version = device.get_ethernet_fw_version(); + bool virtual_bcast_supported = (eth_version >= tt_version(6, 8, 0) || eth_version == tt_version(6, 7, 241)) && device.translation_tables_en; + if (!virtual_bcast_supported) { + device.close_device(); + GTEST_SKIP() << "SiliconDriverWH.VirtualCoordinateBroadcast skipped since ethernet version does not support Virtual Coordinate Broadcast or NOC translation is not enabled"; + } -// device.deassert_risc_reset(); -// std::vector broadcast_sizes = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}; -// uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; -// std::set rows_to_exclude = {0, 3, 5, 6, 8, 9}; -// std::set cols_to_exclude = {0, 5}; -// std::set rows_to_exclude_for_dram_broadcast = {}; -// std::set cols_to_exclude_for_dram_broadcast = {1, 2, 3, 4, 6, 7, 8, 9}; - -// for(const auto& size : broadcast_sizes) { -// std::vector vector_to_write(size); -// std::vector zeros(size); -// std::vector readback_vec = {}; -// for(int i = 0; i < size; i++) { -// vector_to_write[i] = i; -// zeros[i] = 0; -// } -// // Broadcast to Tensix -// device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude, cols_to_exclude, "LARGE_WRITE_TLB"); -// // Broadcast to DRAM -// device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude_for_dram_broadcast, cols_to_exclude_for_dram_broadcast, "LARGE_WRITE_TLB"); -// device.wait_for_non_mmio_flush(); - -// for(const auto i : target_devices) { -// for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { -// if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; -// device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); -// ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; -// device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data -// readback_vec = {}; -// } -// for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { -// const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); -// device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); -// ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; -// device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data -// readback_vec = {}; -// } -// } -// // Wait for data to be cleared before writing next block -// device.wait_for_non_mmio_flush(); -// } -// device.close_device(); -// } + device.deassert_risc_reset(); + std::vector broadcast_sizes = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}; + uint32_t address = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; + std::set rows_to_exclude = {0, 3, 5, 6, 8, 9}; + std::set cols_to_exclude = {0, 5}; + std::set rows_to_exclude_for_dram_broadcast = {}; + std::set cols_to_exclude_for_dram_broadcast = {1, 2, 3, 4, 6, 7, 8, 9}; + + for(const auto& size : broadcast_sizes) { + std::vector vector_to_write(size); + std::vector zeros(size); + std::vector readback_vec = {}; + for(int i = 0; i < size; i++) { + vector_to_write[i] = i; + zeros[i] = 0; + } + // Broadcast to Tensix + device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude, cols_to_exclude, "LARGE_WRITE_TLB"); + // Broadcast to DRAM + device.broadcast_write_to_cluster(vector_to_write.data(), vector_to_write.size() * 4, address, {}, rows_to_exclude_for_dram_broadcast, cols_to_exclude_for_dram_broadcast, "LARGE_WRITE_TLB"); + device.wait_for_non_mmio_flush(); + + for(const auto i : target_devices) { + for(const auto& core : device.get_virtual_soc_descriptors().at(i).workers) { + if(rows_to_exclude.find(core.y) != rows_to_exclude.end()) continue; + device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from core " << core.x << "-" << core.y << "does not match what was broadcasted"; + device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + readback_vec = {}; + } + for(int chan = 0; chan < device.get_virtual_soc_descriptors().at(i).get_num_dram_channels(); chan++) { + const auto& core = device.get_virtual_soc_descriptors().at(i).get_core_for_dram_channel(chan, 0); + device.read_from_device(readback_vec, tt_cxy_pair(i, core), address, vector_to_write.size() * 4, "LARGE_READ_TLB"); + ASSERT_EQ(vector_to_write, readback_vec) << "Vector read back from DRAM core " << i << " " << core.x << "-" << core.y << " does not match what was broadcasted " << size; + device.write_to_device(zeros, tt_cxy_pair(i, core), address, "LARGE_WRITE_TLB"); // Clear any written data + readback_vec = {}; + } + } + // Wait for data to be cleared before writing next block + device.wait_for_non_mmio_flush(); + } + device.close_device(); +} diff --git a/tests/galaxy/test_umd_concurrent_threads.cpp b/tests/galaxy/test_umd_concurrent_threads.cpp index bd60ef39..1f6c7331 100644 --- a/tests/galaxy/test_umd_concurrent_threads.cpp +++ b/tests/galaxy/test_umd_concurrent_threads.cpp @@ -2,12 +2,12 @@ // // SPDX-License-Identifier: Apache-2.0 -#include -#include - #include #include +#include "tt_cluster_descriptor.h" +#include "tt_device.h" + #include "common/logger.hpp" #include "eth_interface.h" #include "filesystem" @@ -17,14 +17,14 @@ #include "test_galaxy_common.h" #include "tests/test_utils/generate_cluster_desc.hpp" -static const std::string SOC_DESC_PATH = "./tests/soc_descs/wormhole_b0_8x10.yaml"; +static const std::string SOC_DESC_PATH = "tests/soc_descs/wormhole_b0_8x10.yaml"; void set_params_for_remote_txn(tt_SiliconDevice& device); // Have 2 threads read and write to all cores on the Galaxy TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { // Galaxy Setup - std::string cluster_desc_path = GetClusterDescYAML().string(); + std::string cluster_desc_path = test_utils::GetClusterDescYAML(); std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); std::set target_devices_th1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; std::set target_devices_th2 = {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; @@ -53,7 +53,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores tt_SiliconDevice device = tt_SiliconDevice( - SOC_DESC_PATH, cluster_desc_path, all_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + test_utils::GetAbsPath(SOC_DESC_PATH), cluster_desc_path, all_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); set_params_for_remote_txn(device); @@ -114,7 +114,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsL1) { TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { // Galaxy Setup - std::string cluster_desc_path = GetClusterDescYAML().string(); + std::string cluster_desc_path = test_utils::GetClusterDescYAML(); std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); std::set target_devices_th1 = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; std::set target_devices_th2 = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; @@ -143,7 +143,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores tt_SiliconDevice device = tt_SiliconDevice( - SOC_DESC_PATH, cluster_desc_path, all_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + test_utils::GetAbsPath(SOC_DESC_PATH), cluster_desc_path, all_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); set_params_for_remote_txn(device); @@ -207,7 +207,7 @@ TEST(GalaxyConcurrentThreads, WriteToAllChipsDram) { TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { // Galaxy Setup - std::string cluster_desc_path = GetClusterDescYAML().string(); + std::string cluster_desc_path = test_utils::GetClusterDescYAML(); std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); std::set target_devices = {0, 1, 2, 3, 4, 5, 6, 7, 8}; for (const auto& chip : target_devices) { @@ -222,7 +222,7 @@ TEST(GalaxyConcurrentThreads, PushInputsWhileSignalingCluster) { dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores tt_SiliconDevice device = tt_SiliconDevice( - SOC_DESC_PATH, cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + test_utils::GetAbsPath(SOC_DESC_PATH), cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); set_params_for_remote_txn(device); diff --git a/tests/galaxy/test_umd_remote_api.cpp b/tests/galaxy/test_umd_remote_api.cpp index 1b5b9fa4..7335d943 100644 --- a/tests/galaxy/test_umd_remote_api.cpp +++ b/tests/galaxy/test_umd_remote_api.cpp @@ -2,11 +2,11 @@ // // SPDX-License-Identifier: Apache-2.0 -#include -#include - #include +#include "tt_cluster_descriptor.h" +#include "tt_device.h" + #include "common/logger.hpp" #include "eth_interface.h" #include "filesystem" @@ -16,13 +16,13 @@ #include "test_galaxy_common.h" #include "tests/test_utils/generate_cluster_desc.hpp" -static const std::string SOC_DESC_PATH = "./tests/soc_descs/wormhole_b0_8x10.yaml"; +static const std::string SOC_DESC_PATH = "tests/soc_descs/wormhole_b0_8x10.yaml"; void set_params_for_remote_txn(tt_SiliconDevice& device); void run_remote_read_write_test(uint32_t vector_size, bool dram_write) { // Galaxy Setup - std::string cluster_desc_path = GetClusterDescYAML().string(); + std::string cluster_desc_path = test_utils::GetClusterDescYAML(); std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); std::set target_devices = {}; for (const auto& chip : cluster_desc->get_all_chips()) { @@ -34,7 +34,7 @@ void run_remote_read_write_test(uint32_t vector_size, bool dram_write) { dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores tt_SiliconDevice device = tt_SiliconDevice( - SOC_DESC_PATH, cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + test_utils::GetAbsPath(SOC_DESC_PATH), cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); const auto sdesc_per_chip = device.get_virtual_soc_descriptors(); set_params_for_remote_txn(device); @@ -128,7 +128,7 @@ TEST(GalaxyBasicReadWrite, LargeRemoteDramBlockReadWrite) { run_remote_read_writ void run_data_mover_test( uint32_t vector_size, tt_multichip_core_addr sender_core, tt_multichip_core_addr receiver_core) { // Galaxy Setup - std::string cluster_desc_path = GetClusterDescYAML().string(); + std::string cluster_desc_path = test_utils::GetClusterDescYAML(); std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); std::set target_devices = {}; for (const auto& chip : cluster_desc->get_all_chips()) { @@ -149,7 +149,7 @@ void run_data_mover_test( dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores tt_SiliconDevice device = tt_SiliconDevice( - SOC_DESC_PATH, cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + test_utils::GetAbsPath(SOC_DESC_PATH), cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); set_params_for_remote_txn(device); @@ -243,7 +243,7 @@ TEST(GalaxyDataMovement, TwoChipMoveData4) { void run_data_broadcast_test( uint32_t vector_size, tt_multichip_core_addr sender_core, std::vector receiver_cores) { // Galaxy Setup - std::string cluster_desc_path = GetClusterDescYAML().string(); + std::string cluster_desc_path = test_utils::GetClusterDescYAML(); std::shared_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(cluster_desc_path); std::set target_devices = {}; for (const auto& chip : cluster_desc->get_all_chips()) { @@ -266,7 +266,7 @@ void run_data_broadcast_test( dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores tt_SiliconDevice device = tt_SiliconDevice( - SOC_DESC_PATH, cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + test_utils::GetAbsPath(SOC_DESC_PATH), cluster_desc_path, target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); set_params_for_remote_txn(device); @@ -318,7 +318,7 @@ void run_data_broadcast_test( // L1 to L1 single chip TEST(GalaxyDataMovement, BroadcastData1) { - tt_SocDescriptor sdesc(SOC_DESC_PATH); + tt_SocDescriptor sdesc(test_utils::GetAbsPath(SOC_DESC_PATH)); tt_multichip_core_addr sender_core(4, tt_xy_pair(1, 1), 0x5000); std::vector receiver_cores; @@ -331,7 +331,7 @@ TEST(GalaxyDataMovement, BroadcastData1) { // L1 to L1 multi chip TEST(GalaxyDataMovement, BroadcastData2) { - tt_SocDescriptor sdesc(SOC_DESC_PATH); + tt_SocDescriptor sdesc(test_utils::GetAbsPath(SOC_DESC_PATH)); tt_multichip_core_addr sender_core(12, tt_xy_pair(1, 1), 0x5000); std::vector receiver_cores; @@ -357,7 +357,7 @@ TEST(GalaxyDataMovement, BroadcastData2) { // Dram to L1 TEST(GalaxyDataMovement, BroadcastData3) { - tt_SocDescriptor sdesc(SOC_DESC_PATH); + tt_SocDescriptor sdesc(test_utils::GetAbsPath(SOC_DESC_PATH)); tt_multichip_core_addr sender_core(10, tt_xy_pair(0, 0), 0x20000); std::vector receiver_cores; @@ -371,7 +371,7 @@ TEST(GalaxyDataMovement, BroadcastData3) { // L1 to Dram TEST(GalaxyDataMovement, BroadcastData4) { - tt_SocDescriptor sdesc(SOC_DESC_PATH); + tt_SocDescriptor sdesc(test_utils::GetAbsPath(SOC_DESC_PATH)); tt_multichip_core_addr sender_core(17, tt_xy_pair(8, 8), 0x20000); std::vector receiver_cores; @@ -387,7 +387,7 @@ TEST(GalaxyDataMovement, BroadcastData4) { // Dram to Dram TEST(GalaxyDataMovement, BroadcastData5) { - tt_SocDescriptor sdesc(SOC_DESC_PATH); + tt_SocDescriptor sdesc(test_utils::GetAbsPath(SOC_DESC_PATH)); tt_multichip_core_addr sender_core(31, tt_xy_pair(2, 2), 0x20000); std::vector receiver_cores; diff --git a/tests/galaxy/test_umd_remote_api_stability.cpp b/tests/galaxy/test_umd_remote_api_stability.cpp index d199e1c9..f6bd28e8 100644 --- a/tests/galaxy/test_umd_remote_api_stability.cpp +++ b/tests/galaxy/test_umd_remote_api_stability.cpp @@ -1,23 +1,27 @@ -#include -#include +// SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 #include #include #include #include +#include "tt_cluster_descriptor.h" +#include "tt_device.h" + #include "common/logger.hpp" #include "eth_interface.h" #include "filesystem" #include "gtest/gtest.h" #include "host_mem_address_map.h" #include "l1_address_map.h" -#include "../galaxy/test_galaxy_common.h" #include "tt_soc_descriptor.h" -#include "../test_utils/stimulus_generators.hpp" -#include "../test_utils/generate_cluster_desc.hpp" -#include "../wormhole/test_wh_common.h" +#include "tests/test_utils/stimulus_generators.hpp" +#include "tests/test_utils/generate_cluster_desc.hpp" +#include "tests/galaxy/test_galaxy_common.h" +#include "tests/wormhole/test_wh_common.h" namespace tt::umd::test::utils { @@ -34,7 +38,7 @@ class WormholeGalaxyStabilityTestFixture : public WormholeTestFixture { static uint32_t scale_number_of_tests; static void SetUpTestSuite() { - std::unique_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); + std::unique_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(test_utils::GetClusterDescYAML()); detected_num_chips = cluster_desc->get_number_of_chips(); if (detected_num_chips < EXPECTED_MIN_CHIPS) { skip_tests = true; diff --git a/tests/grayskull/CMakeLists.txt b/tests/grayskull/CMakeLists.txt new file mode 100644 index 00000000..ef5c36ec --- /dev/null +++ b/tests/grayskull/CMakeLists.txt @@ -0,0 +1,13 @@ + +set(UNIT_TESTS_GS_SRCS + test_silicon_driver.cpp +) + +add_executable(unit_tests_grayskull ${UNIT_TESTS_GS_SRCS}) +target_link_libraries(unit_tests_grayskull PRIVATE test_common) +set_target_properties(unit_tests_grayskull PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/umd/grayskull + OUTPUT_NAME unit_tests +) + +add_custom_target(umd_unit_tests DEPENDS unit_tests_grayskull) diff --git a/tests/grayskull/test_silicon_driver.cpp b/tests/grayskull/test_silicon_driver.cpp index 40cec709..d8324f13 100644 --- a/tests/grayskull/test_silicon_driver.cpp +++ b/tests/grayskull/test_silicon_driver.cpp @@ -2,20 +2,22 @@ // // SPDX-License-Identifier: Apache-2.0 +#include + #include "gtest/gtest.h" -#include -#include +#include "tt_device.h" +#include "device/tt_soc_descriptor.h" #include "device/wormhole_implementation.h" #include "l1_address_map.h" -#include +#include "tests/test_utils/generate_cluster_desc.hpp" TEST(SiliconDriverGS, CreateDestroySequential) { - std::set target_devices = {0, 1, 2, 3}; + std::set target_devices = {0}; uint32_t num_host_mem_ch_per_mmio_device = 1; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test tt_device_params default_params; for(int i = 0; i < 100; i++) { - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); device.start_device(default_params); device.deassert_risc_reset(); device.close_device(); @@ -23,14 +25,14 @@ TEST(SiliconDriverGS, CreateDestroySequential) { } TEST(SiliconDriverGS, CreateMultipleInstance) { - std::set target_devices = {0, 1, 2, 3}; + std::set target_devices = {0}; uint32_t num_host_mem_ch_per_mmio_device = 1; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test tt_device_params default_params; default_params.init_device = false; std::unordered_map concurrent_devices = {}; for(int i = 0; i < 100; i++) { - concurrent_devices.insert({i, new tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true)}); + concurrent_devices.insert({i, new tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true)}); concurrent_devices.at(i) -> start_device(default_params); } @@ -41,11 +43,11 @@ TEST(SiliconDriverGS, CreateMultipleInstance) { } TEST(SiliconDriverGS, Harvesting) { - std::set target_devices = {0, 1}; + std::set target_devices = {0}; std::unordered_map simulated_harvesting_masks = {{0, 6}, {1, 12}}; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); auto sdesc_per_chip = device.get_virtual_soc_descriptors(); ASSERT_EQ(device.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; @@ -53,17 +55,17 @@ TEST(SiliconDriverGS, Harvesting) { ASSERT_EQ(chip.second.workers.size(), 96) << "Expected SOC descriptor with harvesting to have 96 workers for chip " << chip.first; } ASSERT_EQ(device.get_harvesting_masks_for_soc_descriptors().at(0), 6) << "Expected first chip to have harvesting mask of 6"; - ASSERT_EQ(device.get_harvesting_masks_for_soc_descriptors().at(1), 12) << "Expected second chip to have harvesting mask of 12"; + // ASSERT_EQ(device.get_harvesting_masks_for_soc_descriptors().at(1), 12) << "Expected second chip to have harvesting mask of 12"; device.close_device(); } TEST(SiliconDriverGS, CustomSocDesc) { - std::set target_devices = {0, 1}; + std::set target_devices = {0}; std::unordered_map simulated_harvesting_masks = {{0, 6}, {1, 12}}; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_1x1_arch.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("./tests/soc_descs/grayskull_1x1_arch.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false, simulated_harvesting_masks); auto sdesc_per_chip = device.get_virtual_soc_descriptors(); ASSERT_EQ(device.using_harvested_soc_descriptors(), false) << "SOC descriptors should not be modified when harvesting is disabled"; for(const auto& chip : sdesc_per_chip) { @@ -80,11 +82,11 @@ TEST(SiliconDriverGS, HarvestingRuntime) { return flat_index; }; - std::set target_devices = {0, 1}; + std::set target_devices = {0}; std::unordered_map simulated_harvesting_masks = {{0, 6}, {1, 12}}; std::unordered_map dynamic_tlb_config = {{"SMALL_READ_WRITE_TLB", 157}}; // Use both static and dynamic TLBs here uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); for(int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for functional worker cores @@ -145,11 +147,11 @@ TEST(SiliconDriverGS, StaticTLB_RW) { } return flat_index; }; - std::set target_devices = {0, 1}; + std::set target_devices = {0}; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); for(int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for worker cores auto& sdesc = device.get_virtual_soc_descriptors().at(i); @@ -195,12 +197,12 @@ TEST(SiliconDriverGS, StaticTLB_RW) { TEST(SiliconDriverGS, DynamicTLB_RW) { // Don't use any static TLBs in this test. All writes go through a dynamic TLB that needs to be reconfigured for each transaction - std::set target_devices = {0, 1}; + std::set target_devices = {0}; std::unordered_map dynamic_tlb_config = {}; uint32_t num_host_mem_ch_per_mmio_device = 1; dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); device.set_fallback_tlb_ordering_mode("SMALL_READ_WRITE_TLB", TLB_DATA::Posted); // Explicitly test API to set fallback tlb ordering mode tt_device_params default_params; device.start_device(default_params); @@ -244,7 +246,7 @@ TEST(SiliconDriverGS, MultiThreadedDevice) { std::unordered_map dynamic_tlb_config = {}; uint32_t num_host_mem_ch_per_mmio_device = 1; dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); tt_device_params default_params; device.start_device(default_params); @@ -303,7 +305,7 @@ TEST(SiliconDriverGS, MultiThreadedDevice) { device.close_device(); } -TEST(SiliconDriverGS, MultiThreadedMemBar) { +TEST(SiliconDriverGS, MultiThreadedMemBar) { // this tests takes ~5 mins to run // Have 2 threads read and write from a single device concurrently // All (fairly large) transactions go through a static TLB. // We want to make sure the memory barrier is thread/process safe. @@ -324,7 +326,7 @@ TEST(SiliconDriverGS, MultiThreadedMemBar) { dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for reading back membar values uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/grayskull_10x12.yaml", "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"), "", target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true); for(int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for functional worker cores diff --git a/tests/module.mk b/tests/module.mk index e9eaa99d..2be13022 100644 --- a/tests/module.mk +++ b/tests/module.mk @@ -11,7 +11,7 @@ else DEVICE_UNIT_TESTS_INCLUDES += -I$(UMD_HOME)/device/$(ARCH_NAME)/ -I$(UMD_HOME)/src/firmware/riscv/$(ARCH_NAME) endif -COMMON_UNIT_TESTS_SRCS = $(wildcard $(UMD_HOME)/tests/test_utils/*.cpp) +# COMMON_UNIT_TESTS_SRCS = $(wildcard $(UMD_HOME)/tests/test_utils/*.cpp) DEVICE_UNIT_TESTS += $(basename $(wildcard $(UMD_HOME)/tests/*.c*)) diff --git a/tests/simulation/CMakeLists.txt b/tests/simulation/CMakeLists.txt index b42278f2..8d4b4808 100644 --- a/tests/simulation/CMakeLists.txt +++ b/tests/simulation/CMakeLists.txt @@ -8,7 +8,7 @@ foreach(TEST ${SIMULATION_TEST_SRCS}) add_executable(${TEST_NAME} ${TEST}) target_link_libraries(${TEST_NAME} PRIVATE test_common) target_include_directories(${TEST_NAME} PRIVATE - ${CMAKE_SOURCE_DIR}/device/simulation + ${PROJECT_SOURCE_DIR}/device/simulation ) set_target_properties(${TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/simulation) diff --git a/tests/test_utils/generate_cluster_desc.hpp b/tests/test_utils/generate_cluster_desc.hpp index 4fdd4828..aec38f22 100644 --- a/tests/test_utils/generate_cluster_desc.hpp +++ b/tests/test_utils/generate_cluster_desc.hpp @@ -7,7 +7,40 @@ #pragma once #include +#include +#include -namespace fs = std::filesystem; +namespace test_utils { -fs::path GetClusterDescYAML(); +inline std::string GetAbsPath(std::string path_){ + std::filesystem::path umd_root_relative = std::filesystem::relative(std::filesystem::path(__FILE__).parent_path().parent_path().parent_path(), "../"); + std::filesystem::path umd_root = std::filesystem::canonical(umd_root_relative); + std::filesystem::path abs_path = umd_root / path_; + return abs_path.string(); +} + +inline std::string GetClusterDescYAML(){ + static std::string yaml_path; + static bool is_initialized = false; + if (!is_initialized){ + std::filesystem::path umd_path = std::filesystem::path(test_utils::GetAbsPath("")); + std::filesystem::path cluster_path = umd_path / ".umd"; + std::filesystem::create_directories( cluster_path ); + + cluster_path /= "cluster_desc.yaml"; + if (!std::filesystem::exists(cluster_path)){ + auto val = system ( ("touch " + cluster_path.string()).c_str()); + if(val != 0) throw std::runtime_error("Cluster Generation Failed!"); + } + // Generates the cluster descriptor in the CWD + + std::filesystem::path eth_fpath = umd_path / "device/bin/silicon/x86/create-ethernet-map"; + std::string cmd = eth_fpath.string() + " " + cluster_path.string(); + int val = system(cmd.c_str()); + if(val != 0) throw std::runtime_error("Cluster Generation Failed!"); + yaml_path = cluster_path.string(); + is_initialized = true; + } + return yaml_path; +} +} // namespace test_utils diff --git a/tests/test_utils/stimulus_generators.hpp b/tests/test_utils/stimulus_generators.hpp index 8030c10f..094f06cb 100644 --- a/tests/test_utils/stimulus_generators.hpp +++ b/tests/test_utils/stimulus_generators.hpp @@ -1,7 +1,7 @@ #pragma once #include "tt_xy_pair.h" -#include -#include +#include "tt_cluster_descriptor.h" +#include "tt_device.h" #include @@ -33,7 +33,7 @@ namespace tt::umd::test::utils { -static const std::string SOC_DESC_PATH = "./tests/soc_descs/wormhole_b0_8x10.yaml"; +static const std::string SOC_DESC_PATH = "tests/soc_descs/wormhole_b0_8x10.yaml"; enum RemoteTransferType : uint8_t { WRITE = 0, ROLLED_WRITE, READ, EPOCH_CMD_WRITE }; diff --git a/tests/wormhole/CMakeLists.txt b/tests/wormhole/CMakeLists.txt new file mode 100644 index 00000000..ff197fb2 --- /dev/null +++ b/tests/wormhole/CMakeLists.txt @@ -0,0 +1,14 @@ + +set(UNIT_TESTS_WH_SRCS + test_silicon_driver_wh.cpp + test_umd_remote_api_stability.cpp +) + +add_executable(unit_tests_wormhole ${UNIT_TESTS_WH_SRCS}) +target_link_libraries(unit_tests_wormhole PRIVATE test_common) +set_target_properties(unit_tests_wormhole PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/umd/wormhole_b0 + OUTPUT_NAME unit_tests +) + +add_custom_target(umd_unit_tests DEPENDS unit_tests_wormhole) diff --git a/tests/wormhole/test_silicon_driver_wh.cpp b/tests/wormhole/test_silicon_driver_wh.cpp index a2c4805e..df686dfa 100644 --- a/tests/wormhole/test_silicon_driver_wh.cpp +++ b/tests/wormhole/test_silicon_driver_wh.cpp @@ -1,16 +1,16 @@ // SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. // // SPDX-License-Identifier: Apache-2.0 +#include +#include #include "gtest/gtest.h" -#include +#include "tt_device.h" #include "eth_l1_address_map.h" #include "l1_address_map.h" #include "eth_l1_address_map.h" #include "eth_interface.h" #include "host_mem_address_map.h" -#include -#include #include "device/tt_cluster_descriptor.h" #include "device/wormhole_implementation.h" @@ -71,14 +71,23 @@ std::int32_t get_static_tlb_index(tt_xy_pair target) { } } +std::set get_target_devices() { + std::set target_devices; + std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(test_utils::GetClusterDescYAML()); + for (int i = 0; i < cluster_desc_uniq->get_number_of_chips(); i++) { + target_devices.insert(i); + } + return target_devices; +} + TEST(SiliconDriverWH, CreateDestroy) { - std::set target_devices = {0, 1}; + std::set target_devices = get_target_devices(); uint32_t num_host_mem_ch_per_mmio_device = 1; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test tt_device_params default_params; // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting for(int i = 0; i < 50; i++) { - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_1x1.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_1x1.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false); set_params_for_remote_txn(device); device.start_device(default_params); device.deassert_risc_reset(); @@ -87,19 +96,13 @@ TEST(SiliconDriverWH, CreateDestroy) { } TEST(SiliconDriverWH, Harvesting) { - std::set target_devices = {0, 1}; + std::set target_devices = get_target_devices(); + int num_devices = target_devices.size(); std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); auto sdesc_per_chip = device.get_virtual_soc_descriptors(); ASSERT_EQ(device.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; @@ -107,24 +110,19 @@ TEST(SiliconDriverWH, Harvesting) { for(const auto& chip : sdesc_per_chip) { ASSERT_EQ(chip.second.workers.size(), 48) << "Expected SOC descriptor with harvesting to have 48 workers for chip" << chip.first; } - ASSERT_EQ(device.get_harvesting_masks_for_soc_descriptors().at(0), 30) << "Expected first chip to have harvesting mask of 30"; - ASSERT_EQ(device.get_harvesting_masks_for_soc_descriptors().at(1), 60) << "Expected second chip to have harvesting mask of 60"; + for(int i = 0; i < num_devices; i++){ + ASSERT_EQ(device.get_harvesting_masks_for_soc_descriptors().at(i), simulated_harvesting_masks.at(i)) << "Expecting chip " << i << " to have harvesting mask of " << simulated_harvesting_masks.at(i); + } } TEST(SiliconDriverWH, CustomSocDesc) { - std::set target_devices = {0, 1}; + std::set target_devices = get_target_devices(); std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; // Initialize the driver with a 1x1 descriptor and explictly do not perform harvesting - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_1x1.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_1x1.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, false, simulated_harvesting_masks); auto sdesc_per_chip = device.get_virtual_soc_descriptors(); ASSERT_EQ(device.using_harvested_soc_descriptors(), false) << "SOC descriptors should not be modified when harvesting is disabled"; @@ -139,19 +137,13 @@ TEST(SiliconDriverWH, HarvestingRuntime) { return get_static_tlb_index(target); }; - std::set target_devices = {0, 1}; + std::set target_devices = get_target_devices(); std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } uint32_t num_host_mem_ch_per_mmio_device = 1; std::unordered_map dynamic_tlb_config = {{"SMALL_READ_WRITE_TLB", 157}}; // Use both static and dynamic TLBs here - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true, simulated_harvesting_masks); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -210,13 +202,14 @@ TEST(SiliconDriverWH, UnalignedStaticTLB_RW) { return get_static_tlb_index(target); }; - std::set target_devices = {0, 1}; + std::set target_devices = get_target_devices(); + int num_devices = target_devices.size(); std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test dynamic_tlb_config["REG_TLB"] = 184; uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -238,7 +231,7 @@ TEST(SiliconDriverWH, UnalignedStaticTLB_RW) { device.deassert_risc_reset(); std::vector unaligned_sizes = {3, 14, 21, 255, 362, 430, 1022, 1023, 1025}; - for(int i = 0; i < 2; i++) { + for(int i = 0; i < num_devices; i++) { for(const auto& size : unaligned_sizes) { std::vector write_vec(size, 0); for(int i = 0; i < size; i++){ @@ -273,19 +266,12 @@ TEST(SiliconDriverWH, StaticTLB_RW) { return get_static_tlb_index(target); }; - std::set target_devices = {0, 1}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } + std::set target_devices = get_target_devices(); std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -331,19 +317,12 @@ TEST(SiliconDriverWH, StaticTLB_RW) { TEST(SiliconDriverWH, DynamicTLB_RW) { // Don't use any static TLBs in this test. All writes go through a dynamic TLB that needs to be reconfigured for each transaction - std::set target_devices = {0, 1}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } + std::set target_devices = get_target_devices(); std::unordered_map dynamic_tlb_config = {}; uint32_t num_host_mem_ch_per_mmio_device = 1; dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); @@ -378,19 +357,12 @@ TEST(SiliconDriverWH, MultiThreadedDevice) { // Have 2 threads read and write from a single device concurrently // All transactions go through a single Dynamic TLB. We want to make sure this is thread/process safe - std::set target_devices = {0}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() > 2) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a one or two chip nebula system"; - } - } + std::set target_devices = get_target_devices(); std::unordered_map dynamic_tlb_config = {}; uint32_t num_host_mem_ch_per_mmio_device = 1; dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for all reads and writes to worker cores - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); @@ -445,20 +417,24 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { return get_static_tlb_index(target); }; - std::set target_devices = {0}; + std::set target_devices = get_target_devices(); uint32_t base_addr = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; std::unordered_map dynamic_tlb_config = {}; dynamic_tlb_config.insert({"SMALL_READ_WRITE_TLB", 157}); // Use this for reading back membar values uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); + auto mmio_devices = device.get_target_mmio_device_ids(); + for(int i = 0; i < target_devices.size(); i++) { // Iterate over devices and only setup static TLBs for functional worker cores - auto& sdesc = device.get_virtual_soc_descriptors().at(i); - for(auto& core : sdesc.workers) { - // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. - device.configure_tlb(i, core, get_static_tlb_index_callback(core), base_addr); + if(std::find(mmio_devices.begin(), mmio_devices.end(), i) != mmio_devices.end()) { + auto& sdesc = device.get_virtual_soc_descriptors().at(i); + for(auto& core : sdesc.workers) { + // Statically mapping a 1MB TLB to this core, starting from address DATA_BUFFER_SPACE_BASE. + device.configure_tlb(i, core, get_static_tlb_index_callback(core), base_addr); + } } } device.setup_core_to_tlb_map(get_static_tlb_index_callback); @@ -550,19 +526,12 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { TEST(SiliconDriverWH, BroadcastWrite) { // Broadcast multiple vectors to tensix and dram grid. Verify broadcasted data is read back correctly - std::set target_devices = {0, 1}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } + std::set target_devices = get_target_devices(); std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -614,19 +583,12 @@ TEST(SiliconDriverWH, BroadcastWrite) { TEST(SiliconDriverWH, VirtualCoordinateBroadcast) { // Broadcast multiple vectors to tensix and dram grid. Verify broadcasted data is read back correctly - std::set target_devices = {0, 1}; - - { - std::unique_ptr cluster_desc_uniq = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); - if (cluster_desc_uniq->get_number_of_chips() != target_devices.size()) { - GTEST_SKIP() << "SiliconDriverWH.Harvesting skipped because it can only be run on a two chip nebula system"; - } - } + std::set target_devices = get_target_devices(); std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice("./tests/soc_descs/wormhole_b0_8x10.yaml", GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); diff --git a/tests/wormhole/test_umd_remote_api_stability.cpp b/tests/wormhole/test_umd_remote_api_stability.cpp index ef090b61..36c02914 100644 --- a/tests/wormhole/test_umd_remote_api_stability.cpp +++ b/tests/wormhole/test_umd_remote_api_stability.cpp @@ -1,22 +1,25 @@ -#include -#include +// SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 #include #include #include #include +#include "tt_cluster_descriptor.h" +#include "tt_device.h" + #include "common/logger.hpp" #include "eth_interface.h" #include "filesystem" #include "gtest/gtest.h" #include "host_mem_address_map.h" #include "l1_address_map.h" -#include "../galaxy/test_galaxy_common.h" #include "tt_soc_descriptor.h" -#include "../test_utils/stimulus_generators.hpp" -#include "../test_utils/generate_cluster_desc.hpp" +#include "tests/test_utils/stimulus_generators.hpp" +#include "tests/test_utils/generate_cluster_desc.hpp" #include "test_wh_common.h" #include @@ -34,7 +37,7 @@ class WormholeNebulaX2TestFixture : public WormholeTestFixture { static uint32_t scale_number_of_tests; static void SetUpTestSuite() { - std::unique_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(GetClusterDescYAML().string()); + std::unique_ptr cluster_desc = tt_ClusterDescriptor::create_from_yaml(test_utils::GetClusterDescYAML()); detected_num_chips = cluster_desc->get_number_of_chips(); if (detected_num_chips != EXPECTED_NUM_CHIPS) { skip_tests = true; @@ -190,7 +193,6 @@ TEST_F(WormholeNebulaX2TestFixture, MultithreadedMixedRemoteTransfersMediumSmall t4.join(); } - TEST_F(WormholeNebulaX2TestFixture, MixedRemoteTransfersLarge) { int seed = 0; diff --git a/tests/wormhole/test_wh_common.h b/tests/wormhole/test_wh_common.h index 481ce21c..02686c96 100644 --- a/tests/wormhole/test_wh_common.h +++ b/tests/wormhole/test_wh_common.h @@ -1,9 +1,11 @@ #pragma once -#include "../test_utils/stimulus_generators.hpp" -#include "eth_l1_address_map.h" + +#include "tt_cluster_descriptor.h" +#include "tt_device.h" #include "tt_xy_pair.h" -#include -#include +#include "eth_l1_address_map.h" + +#include "tests/test_utils/stimulus_generators.hpp" namespace tt::umd::test::utils { @@ -57,7 +59,7 @@ class WormholeTestFixture : public ::testing::Test { std::set target_devices = {devices.begin(), devices.end()}; uint32_t num_host_mem_ch_per_mmio_device = 1; std::unordered_map dynamic_tlb_config = {}; // Don't set any dynamic TLBs in this test - device = std::make_unique(SOC_DESC_PATH, GetClusterDescYAML().string(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); + device = std::make_unique(test_utils::GetAbsPath(SOC_DESC_PATH), test_utils::GetClusterDescYAML(), target_devices, num_host_mem_ch_per_mmio_device, dynamic_tlb_config, false, true, true); assert(device != nullptr); assert(device->get_cluster_description()->get_number_of_chips() == get_detected_num_chips());