diff --git a/common/utils.hpp b/common/utils.hpp new file mode 100644 index 00000000..c29e85f4 --- /dev/null +++ b/common/utils.hpp @@ -0,0 +1,28 @@ +/* + * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +namespace tt::umd::utils { + +std::string get_abs_path(std::string path) { + // Note that __FILE__ might be resolved at compile time to an absolute or relative address, depending on the compiler. + std::filesystem::path current_file_path = std::filesystem::path(__FILE__); + std::filesystem::path umd_root; + if (current_file_path.is_absolute()) { + umd_root = current_file_path.parent_path().parent_path(); + } else { + std::filesystem::path umd_root_relative = std::filesystem::relative(std::filesystem::path(__FILE__).parent_path().parent_path().parent_path(), "../"); + umd_root = std::filesystem::canonical(umd_root_relative); + } + std::filesystem::path abs_path = umd_root / path; + return abs_path.string(); +} + +} // namespace std diff --git a/device/mockup/tt_mockup_device.hpp b/device/mockup/tt_mockup_device.hpp index bacfb832..2941fb3a 100644 --- a/device/mockup/tt_mockup_device.hpp +++ b/device/mockup/tt_mockup_device.hpp @@ -14,7 +14,7 @@ class tt_MockupDevice : public tt_device { public: - tt_MockupDevice(const std::string& sdesc_path) : tt_device(sdesc_path) { + tt_MockupDevice(const std::string& sdesc_path) : tt_device() { soc_descriptor_per_chip.emplace(0, tt_SocDescriptor(sdesc_path)); std::set target_devices = {0}; } diff --git a/device/simulation/tt_simulation_device.cpp b/device/simulation/tt_simulation_device.cpp index 9b0457d4..92289831 100644 --- a/device/simulation/tt_simulation_device.cpp +++ b/device/simulation/tt_simulation_device.cpp @@ -48,7 +48,7 @@ void print_flatbuffer(const DeviceRequestResponse *buf){ std::cout << std::endl; } -tt_SimulationDevice::tt_SimulationDevice(const std::string &sdesc_path) : tt_device(sdesc_path){ +tt_SimulationDevice::tt_SimulationDevice(const std::string &sdesc_path) : tt_device(){ log_info(tt::LogEmulationDriver, "Instantiating simulation device"); soc_descriptor_per_chip.emplace(0, tt_SocDescriptor(sdesc_path)); std::set target_devices = {0}; diff --git a/device/soc_descriptors/blackhole_140_arch_no_eth.yaml b/device/soc_descriptors/blackhole_140_arch_no_eth.yaml new file mode 100644 index 00000000..24de01fc --- /dev/null +++ b/device/soc_descriptors/blackhole_140_arch_no_eth.yaml @@ -0,0 +1,71 @@ +# Note taken from software repo - may need updates. +grid: + x_size: 17 + y_size: 12 + +arc: + [ 8-0 ] + +pcie: + [ 11-0 ] + +dram: + [ + [0-0, 0-1, 0-11], + [0-2, 0-10, 0-3], + [0-9, 0-4, 0-8], + [0-5, 0-7, 0-6], + [9-0, 9-1, 9-11], + [9-2, 9-10, 9-3], + [9-9, 9-4, 9-8], + [9-5, 9-7, 9-6], + ] + +eth: + [] + +functional_workers: + [ + 1-2, 2-2, 3-2, 4-2, 5-2, 6-2, 7-2, 10-2, 11-2, 12-2, 13-2, 14-2, 15-2, 16-2, + 1-3, 2-3, 3-3, 4-3, 5-3, 6-3, 7-3, 10-3, 11-3, 12-3, 13-3, 14-3, 15-3, 16-3, + 1-4, 2-4, 3-4, 4-4, 5-4, 6-4, 7-4, 10-4, 11-4, 12-4, 13-4, 14-4, 15-4, 16-4, + 1-5, 2-5, 3-5, 4-5, 5-5, 6-5, 7-5, 10-5, 11-5, 12-5, 13-5, 14-5, 15-5, 16-5, + 1-6, 2-6, 3-6, 4-6, 5-6, 6-6, 7-6, 10-6, 11-6, 12-6, 13-6, 14-6, 15-6, 16-6, + 1-7, 2-7, 3-7, 4-7, 5-7, 6-7, 7-7, 10-7, 11-7, 12-7, 13-7, 14-7, 15-7, 16-7, + 1-8, 2-8, 3-8, 4-8, 5-8, 6-8, 7-8, 10-8, 11-8, 12-8, 13-8, 14-8, 15-8, 16-8, + 1-9, 2-9, 3-9, 4-9, 5-9, 6-9, 7-9, 10-9, 11-9, 12-9, 13-9, 14-9, 15-9, 16-9, + 1-10, 2-10, 3-10, 4-10, 5-10, 6-10, 7-10, 10-10, 11-10, 12-10, 13-10, 14-10, 15-10, 16-10, + 1-11, 2-11, 3-11, 4-11, 5-11, 6-11, 7-11, 10-11, 11-11, 12-11, 13-11, 14-11, 15-11, 16-11, + ] + +harvested_workers: + [] + +router_only: + [ + 1-0, 2-0, 3-0, 4-0, 5-0, 6-0, 7-0, 10-0, 12-0, 13-0, 14-0, 15-0, 16-0, + 1-1, 2-1, 3-1, 4-1, 5-1, 6-1, 7-1, 10-1, 11-1, 12-1, 13-1, 14-1, 15-1, 16-1, + 8-1, 8-2, 8-3, 8-4, 8-5, 8-6, 8-7, 8-8, 8-9, 8-10, 8-11 + ] + +worker_l1_size: + 1499136 + +dram_bank_size: + 4294967296 + +eth_l1_size: + 262144 + +arch_name: BLACKHOLE + +features: + unpacker: + version: 2 + inline_srca_trans_without_srca_trans_instr: True + math: + dst_size_alignment: 32768 + packer: + version: 2 + overlay: + version: 2 diff --git a/device/soc_descriptors/grayskull_10x12.yaml b/device/soc_descriptors/grayskull_10x12.yaml new file mode 100644 index 00000000..cf53553f --- /dev/null +++ b/device/soc_descriptors/grayskull_10x12.yaml @@ -0,0 +1,61 @@ +grid: + x_size: 13 + y_size: 12 + +arc: + [0-2] + +pcie: + [0-4] + +dram: + [[1-0], [1-6], [4-0], [4-6], [7-0], [7-6], [10-0], [10-6]] + +eth: + [] + +functional_workers: + [ + 1-1, 2-1, 3-1, 4-1, 5-1, 6-1, 7-1, 8-1, 9-1, 10-1, 11-1, 12-1, + 1-2, 2-2, 3-2, 4-2, 5-2, 6-2, 7-2, 8-2, 9-2, 10-2, 11-2, 12-2, + 1-3, 2-3, 3-3, 4-3, 5-3, 6-3, 7-3, 8-3, 9-3, 10-3, 11-3, 12-3, + 1-4, 2-4, 3-4, 4-4, 5-4, 6-4, 7-4, 8-4, 9-4, 10-4, 11-4, 12-4, + 1-5, 2-5, 3-5, 4-5, 5-5, 6-5, 7-5, 8-5, 9-5, 10-5, 11-5, 12-5, + 1-7, 2-7, 3-7, 4-7, 5-7, 6-7, 7-7, 8-7, 9-7, 10-7, 11-7, 12-7, + 1-8, 2-8, 3-8, 4-8, 5-8, 6-8, 7-8, 8-8, 9-8, 10-8, 11-8, 12-8, + 1-9, 2-9, 3-9, 4-9, 5-9, 6-9, 7-9, 8-9, 9-9, 10-9, 11-9, 12-9, + 1-10, 2-10, 3-10, 4-10, 5-10, 6-10, 7-10, 8-10, 9-10, 10-10, 11-10, 12-10, + 1-11, 2-11, 3-11, 4-11, 5-11, 6-11, 7-11, 8-11, 9-11, 10-11, 11-11, 12-11 + ] + +harvested_workers: + [] + +router_only: + [ + 0-0, 0-11, 0-1, 0-10, 0-9, 0-3, 0-8, 0-7, 0-5, 0-6, + 12-0, 11-0, 2-0, 3-0, 9-0, 8-0, 5-0, 6-0, + 12-6, 11-6, 2-6, 3-6, 9-6, 8-6, 5-6, 6-6 + ] + +worker_l1_size: + 1048576 + +dram_bank_size: + 1073741824 + +eth_l1_size: + 0 + +arch_name: GRAYSKULL + +features: + unpacker: + version: 1 + inline_srca_trans_without_srca_trans_instr: False + math: + dst_size_alignment: 32768 + packer: + version: 1 + overlay: + version: 1 diff --git a/device/soc_descriptors/wormhole_b0_8x10.yaml b/device/soc_descriptors/wormhole_b0_8x10.yaml new file mode 100644 index 00000000..1bf2d5f9 --- /dev/null +++ b/device/soc_descriptors/wormhole_b0_8x10.yaml @@ -0,0 +1,70 @@ +# Note taken from software repo - may need updates. +grid: + x_size: 10 + y_size: 12 + +arc: + [ 0-10 ] + +pcie: + [ 0-3 ] + +dram: + [ + [0-0, 0-1, 0-11], + [0-5, 0-6, 0-7], + [5-0, 5-1, 5-11], + [5-2, 5-9, 5-10], + [5-3, 5-4, 5-8], + [5-5, 5-6, 5-7], + ] + +eth: + [ + 9-0, 1-0, 8-0, 2-0, 7-0, 3-0, 6-0, 4-0, + 9-6, 1-6, 8-6, 2-6, 7-6, 3-6, 6-6, 4-6, + ] + +functional_workers: + [ + 1-1, 2-1, 3-1, 4-1, 6-1, 7-1, 8-1, 9-1, + 1-2, 2-2, 3-2, 4-2, 6-2, 7-2, 8-2, 9-2, + 1-3, 2-3, 3-3, 4-3, 6-3, 7-3, 8-3, 9-3, + 1-4, 2-4, 3-4, 4-4, 6-4, 7-4, 8-4, 9-4, + 1-5, 2-5, 3-5, 4-5, 6-5, 7-5, 8-5, 9-5, + 1-7, 2-7, 3-7, 4-7, 6-7, 7-7, 8-7, 9-7, + 1-8, 2-8, 3-8, 4-8, 6-8, 7-8, 8-8, 9-8, + 1-9, 2-9, 3-9, 4-9, 6-9, 7-9, 8-9, 9-9, + 1-10, 2-10, 3-10, 4-10, 6-10, 7-10, 8-10, 9-10, + 1-11, 2-11, 3-11, 4-11, 6-11, 7-11, 8-11, 9-11, + ] + +harvested_workers: + [] + +router_only: + [ + 0-2, 0-4, 0-8, 0-9 + ] + +worker_l1_size: + 1499136 + +dram_bank_size: + 2147483648 + +eth_l1_size: + 262144 + +arch_name: WORMHOLE_B0 + +features: + unpacker: + version: 2 + inline_srca_trans_without_srca_trans_instr: True + math: + dst_size_alignment: 32768 + packer: + version: 2 + overlay: + version: 2 diff --git a/device/tt_device.cpp b/device/tt_device.cpp index 9d974936..9df2f392 100644 --- a/device/tt_device.cpp +++ b/device/tt_device.cpp @@ -21,7 +21,7 @@ //////// // Device base //////// -tt_device::tt_device(const std::string& sdesc_path) : soc_descriptor_per_chip({}) { +tt_device::tt_device() : soc_descriptor_per_chip({}) { } tt_device::~tt_device() { diff --git a/device/tt_device.h b/device/tt_device.h index 340dcab9..5d944ac1 100644 --- a/device/tt_device.h +++ b/device/tt_device.h @@ -222,7 +222,7 @@ struct tt_device_params { class tt_device { public: - tt_device(const std::string& sdesc_path); + tt_device(); virtual ~tt_device(); // Setup/Teardown Functions /** @@ -605,7 +605,7 @@ class tt_device */ class tt_SiliconDevice: public tt_device { - public: +public: // Constructor /** * Silicon Driver constructor. @@ -623,6 +623,20 @@ class tt_SiliconDevice: public tt_device const uint32_t &num_host_mem_ch_per_mmio_device = 1, const bool skip_driver_allocs = false, const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); + /** + * Silicon Driver constructor. For now this is a constructor that should be used to work towards removing all + * of the params from the constructor. For now this works only for Wormhole. Fixing parsing cluster descriptor + * will enable this to work for Grayskull and Blackhole. + * + * @param num_host_mem_ch_per_mmio_device Requested number of host channels (hugepages). + * @param skip_driver_allocs + * @param clean_system_resource Specifies if host state from previous runs needs to be cleaned up. + * @param perform_harvesting Allow the driver to modify the SOC descriptors per chip. + * @param simulated_harvesting_masks + */ + tt_SiliconDevice(const uint32_t &num_host_mem_ch_per_mmio_device = 1, const bool skip_driver_allocs = false, + const bool clean_system_resources = false, bool perform_harvesting = true, std::unordered_map simulated_harvesting_masks = {}); + //Setup/Teardown Functions virtual std::unordered_map& get_virtual_soc_descriptors(); virtual void set_device_l1_address_params(const tt_device_l1_address_params& l1_address_params_); @@ -705,11 +719,12 @@ class tt_SiliconDevice: public tt_device virtual tt_version get_ethernet_fw_version() const; // TODO: This should be accessible through public API, probably to be moved to tt_device. PCIDevice *get_pci_device(int device_id) const; + const tt_ClusterDescriptor* get_cluster_desc(); // Destructor virtual ~tt_SiliconDevice (); - private: +private: // Helper functions // Startup + teardown void create_device(const std::unordered_set &target_mmio_device_ids, const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, const bool clean_system_resources); @@ -770,6 +785,9 @@ class tt_SiliconDevice: public tt_device // This functions has to be called for local chip, and then it will wait for all connected remote chips to flush. void wait_for_connected_non_mmio_flush(chip_id_t chip_id); + void construct_tt_silicon_device(const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, + const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks); + // State variables tt_device_dram_address_params dram_address_params; tt_device_l1_address_params l1_address_params; @@ -782,6 +800,7 @@ class tt_SiliconDevice: public tt_device std::unordered_map> m_pci_device_map; // Map of enabled pci devices int m_num_pci_devices; // Number of pci devices in system (enabled or disabled) std::shared_ptr ndesc; + std::string sdesc_path; // remote eth transfer setup static constexpr std::uint32_t NUM_ETH_CORES_FOR_NON_MMIO_TRANSFERS = 6; diff --git a/device/tt_silicon_driver.cpp b/device/tt_silicon_driver.cpp index 50ff405d..3c845ea0 100644 --- a/device/tt_silicon_driver.cpp +++ b/device/tt_silicon_driver.cpp @@ -36,6 +36,7 @@ #include #include +#include "tt_arch_types.h" #include "yaml-cpp/yaml.h" #include "common/logger.hpp" @@ -167,6 +168,10 @@ std::unordered_map& tt_SiliconDevice::get_virtual_s return soc_descriptor_per_chip; } +const tt_ClusterDescriptor* tt_SiliconDevice::get_cluster_desc() { + return ndesc.get(); +} + void tt_SiliconDevice::initialize_interprocess_mutexes(int pci_interface_id, bool cleanup_mutexes_in_shm) { // These mutexes are intended to be based on physical devices/pci-intf not logical. Set these up ahead of time here (during device init) // since its unsafe to modify shared state during multithreaded runtime. @@ -287,30 +292,11 @@ std::unordered_map tt_SiliconDevice::get_harvesting_masks_f return default_harvesting_masks; } -tt_SiliconDevice::tt_SiliconDevice(const std::string &sdesc_path, const std::string &ndesc_path, const std::set &target_devices, - const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, - const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) : tt_device(sdesc_path) { +void tt_SiliconDevice::construct_tt_silicon_device(const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, + const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) { + std::unordered_set target_mmio_device_ids; - target_devices_in_cluster = target_devices; - arch_name = tt_SocDescriptor(sdesc_path).arch; - perform_harvesting_on_sdesc = perform_harvesting; - - auto available_device_ids = detect_available_device_ids(); - m_num_pci_devices = available_device_ids.size(); - - if (!skip_driver_allocs) { - log_info(LogSiliconDriver, "Detected {} PCI device{} : {}", m_num_pci_devices, (m_num_pci_devices > 1) ? "s":"", available_device_ids); - log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices); - } - - if (ndesc_path == "") { - ndesc = tt_ClusterDescriptor::create_for_grayskull_cluster(target_devices, available_device_ids); - } - else { - ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path); - } - - for (auto &d: target_devices){ + for (auto &d: target_devices_in_cluster){ if (ndesc->is_chip_mmio_capable(d)){ target_mmio_device_ids.insert(d); } @@ -344,7 +330,7 @@ tt_SiliconDevice::tt_SiliconDevice(const std::string &sdesc_path, const std::str translation_tables_en = false; for(auto& masks : harvesting_masks) { - if(target_devices.find(masks.first) != target_devices.end()) { + if(target_devices_in_cluster.find(masks.first) != target_devices_in_cluster.end()) { harvested_rows_per_target[masks.first] = get_harvested_noc_rows(masks.second); noc_translation_enabled_for_chip[masks.first] = noc_translation_enabled.at(masks.first); num_rows_harvested.insert({masks.first, std::bitset<32>(masks.second).count()}); @@ -373,7 +359,7 @@ tt_SiliconDevice::tt_SiliconDevice(const std::string &sdesc_path, const std::str } else if(arch_name == tt::ARCH::BLACKHOLE) { // Default harvesting info for Blackhole, describing no harvesting - for(auto chip_id = target_devices.begin(); chip_id != target_devices.end(); chip_id++){ + for(auto chip_id = target_devices_in_cluster.begin(); chip_id != target_devices_in_cluster.end(); chip_id++){ harvested_rows_per_target[*chip_id] = 0; //get_harvested_noc_rows_for_chip(*chip_id); num_rows_harvested.insert({*chip_id, 0}); // Only set for broadcast TLB to get RISCS out of reset. We want all rows to have a reset signal sent. if(harvested_rows_per_target[*chip_id]) { @@ -383,7 +369,7 @@ tt_SiliconDevice::tt_SiliconDevice(const std::string &sdesc_path, const std::str } else if(arch_name == tt::ARCH::GRAYSKULL) { // Multichip harvesting is supported for GS. - for(auto chip_id = target_devices.begin(); chip_id != target_devices.end(); chip_id++){ + for(auto chip_id = target_devices_in_cluster.begin(); chip_id != target_devices_in_cluster.end(); chip_id++){ harvested_rows_per_target[*chip_id] = get_harvested_noc_rows_for_chip(*chip_id); num_rows_harvested.insert({*chip_id, 0}); // Only set for broadcast TLB to get RISCS out of reset. We want all rows to have a reset signal sent. if(harvested_rows_per_target[*chip_id]) { @@ -394,7 +380,7 @@ tt_SiliconDevice::tt_SiliconDevice(const std::string &sdesc_path, const std::str if(simulated_harvesting_masks.size()) { performed_harvesting = true; - for (auto device_id = target_devices.begin(); device_id != target_devices.end(); device_id++) { + for (auto device_id = target_devices_in_cluster.begin(); device_id != target_devices_in_cluster.end(); device_id++) { log_assert(simulated_harvesting_masks.find(*device_id) != simulated_harvesting_masks.end(), "Could not find harvesting mask for device_id {}", *device_id); if(arch_name == tt::ARCH::GRAYSKULL) { if ((simulated_harvesting_masks.at(*device_id) & harvested_rows_per_target[*device_id]) != harvested_rows_per_target[*device_id]) { @@ -437,7 +423,70 @@ tt_SiliconDevice::tt_SiliconDevice(const std::string &sdesc_path, const std::str // Default initialize host_address_params based on detected arch host_address_params = architecture_implementation->get_host_address_params(); +} + +tt_SiliconDevice::tt_SiliconDevice(const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, + const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) : tt_device() { + auto available_device_ids = detect_available_device_ids(); + m_num_pci_devices = available_device_ids.size(); + + int physical_device_id = available_device_ids[0]; + PCIDevice pci_device (physical_device_id, 0); + tt::ARCH device_arch = pci_device.get_arch(); + + std::string ndesc_path = ""; + if (device_arch == tt::ARCH::WORMHOLE_B0) { + ndesc_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); + } + + sdesc_path = tt_SocDescriptor::get_soc_descriptor_path(device_arch); + + arch_name = tt_SocDescriptor(sdesc_path).arch; + perform_harvesting_on_sdesc = perform_harvesting; + + if (!skip_driver_allocs) { + log_info(LogSiliconDriver, "Detected {} PCI device{} : {}", m_num_pci_devices, (m_num_pci_devices > 1) ? "s":"", available_device_ids); + log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices); + } + + // TODO: this does not work for Grayskull and Blackhole. Parsing cluster descriptor needs + // to be fixed in order to make this work. Blackhole and Grayskull don't have chips section inside cluster descriptor. + log_assert(device_arch == tt::ARCH::WORMHOLE_B0, "This constructor is only supported for Wormhole devices"); + ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path); + + std::set target_devices; + for (int i = 0; i < ndesc->get_number_of_chips(); i++) { + target_devices.insert(i); + } + target_devices_in_cluster = target_devices; + + construct_tt_silicon_device(num_host_mem_ch_per_mmio_device, skip_driver_allocs, clean_system_resources, perform_harvesting, simulated_harvesting_masks); +} + +tt_SiliconDevice::tt_SiliconDevice(const std::string &sdesc_path, const std::string &ndesc_path, const std::set &target_devices, + const uint32_t &num_host_mem_ch_per_mmio_device, const bool skip_driver_allocs, + const bool clean_system_resources, bool perform_harvesting, std::unordered_map simulated_harvesting_masks) : tt_device() { + auto available_device_ids = detect_available_device_ids(); + m_num_pci_devices = available_device_ids.size(); + + target_devices_in_cluster = target_devices; + arch_name = tt_SocDescriptor(sdesc_path).arch; + perform_harvesting_on_sdesc = perform_harvesting; + this->sdesc_path = sdesc_path; + + if (!skip_driver_allocs) { + log_info(LogSiliconDriver, "Detected {} PCI device{} : {}", m_num_pci_devices, (m_num_pci_devices > 1) ? "s":"", available_device_ids); + log_debug(LogSiliconDriver, "Passed target devices: {}", target_devices); + } + + if (ndesc_path == "") { + ndesc = tt_ClusterDescriptor::create_for_grayskull_cluster(target_devices_in_cluster, available_device_ids); + } + else { + ndesc = tt_ClusterDescriptor::create_from_yaml(ndesc_path); + } + construct_tt_silicon_device(num_host_mem_ch_per_mmio_device, skip_driver_allocs, clean_system_resources, perform_harvesting, simulated_harvesting_masks); } void tt_SiliconDevice::configure_active_ethernet_cores_for_mmio_device(chip_id_t mmio_chip, const std::unordered_set& active_eth_cores_per_chip) { diff --git a/device/tt_soc_descriptor.cpp b/device/tt_soc_descriptor.cpp index 9a572420..9e019c79 100644 --- a/device/tt_soc_descriptor.cpp +++ b/device/tt_soc_descriptor.cpp @@ -5,6 +5,8 @@ #include "yaml-cpp/yaml.h" #include "tt_soc_descriptor.h" +#include "common/utils.hpp" + #include #include #include @@ -273,6 +275,19 @@ bool tt_SocDescriptor::is_ethernet_core(const tt_xy_pair &core) const { return this->ethernet_core_channel_map.find(core) != ethernet_core_channel_map.end(); } +std::string tt_SocDescriptor::get_soc_descriptor_path(tt::ARCH arch) { + switch (arch) { + case tt::ARCH::GRAYSKULL: + return tt::umd::utils::get_abs_path("device/soc_descriptors/grayskull_10x12.yaml"); + case tt::ARCH::WORMHOLE_B0: + return tt::umd::utils::get_abs_path("device/soc_descriptors/wormhole_b0_8x10.yaml"); + case tt::ARCH::BLACKHOLE: + return tt::umd::utils::get_abs_path("device/soc_descriptors/blackhole_140_arch_no_eth.yaml"); + default: + throw std::runtime_error("Invalid architecture"); + } +} + std::ostream &operator<<(std::ostream &out, const tt::ARCH &arch_name) { if (arch_name == tt::ARCH::Invalid) { out << "none"; diff --git a/device/tt_soc_descriptor.h b/device/tt_soc_descriptor.h index 372d0a29..e0529570 100644 --- a/device/tt_soc_descriptor.h +++ b/device/tt_soc_descriptor.h @@ -189,11 +189,14 @@ class tt_SocDescriptor { void perform_harvesting(std::size_t harvesting_mask); + static std::string get_soc_descriptor_path(tt::ARCH arch); + private: - std::unique_ptr coordinate_manager = nullptr; void create_coordinate_manager(std::size_t harvesting_mask); void load_core_descriptors_from_device_descriptor(YAML::Node &device_descriptor_yaml); void load_soc_features_from_device_descriptor(YAML::Node &device_descriptor_yaml); + + std::unique_ptr coordinate_manager = nullptr; }; // Allocates a new soc descriptor on the heap. Returns an owning pointer. diff --git a/tests/api/test_chip.cpp b/tests/api/test_chip.cpp index 308bc9e0..df90e01a 100644 --- a/tests/api/test_chip.cpp +++ b/tests/api/test_chip.cpp @@ -22,101 +22,8 @@ // TODO: write this test to work with Chip not whole Cluster. using Cluster = tt_SiliconDevice; -inline std::unique_ptr get_cluster_desc() { - // TODO: This should not be needed. And could be part of the cluster descriptor probably. - // Note that cluster descriptor holds logical ids of chips. - // Which are different than physical PCI ids, which are /dev/tenstorrent/N ones. - // You have to see if physical PCIe is GS before constructing a cluster descriptor. - std::vector pci_device_ids = PCIDevice::enumerate_devices(); - std::set pci_device_ids_set (pci_device_ids.begin(), pci_device_ids.end()); - - tt::ARCH device_arch = tt::ARCH::GRAYSKULL; - if (!pci_device_ids.empty()) { - // TODO: This should be removed from the API, the driver itself should do it. - int physical_device_id = pci_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device (physical_device_id, 0); - device_arch = pci_device.get_arch(); - } - - // TODO: Make this test work on a host system without any tt devices. - if (pci_device_ids.empty()) { - std::cout << "No Tenstorrent devices found. Skipping test." << std::endl; - return nullptr; - } - - // TODO: Remove different branch for different archs - std::unique_ptr cluster_desc; - if (device_arch == tt::ARCH::GRAYSKULL) { - cluster_desc = tt_ClusterDescriptor::create_for_grayskull_cluster(pci_device_ids_set, pci_device_ids); - } else if (device_arch == tt::ARCH::BLACKHOLE) { - std::string yaml_path = test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"); - cluster_desc = tt_ClusterDescriptor::create_from_yaml(yaml_path); - } else { - // TODO: remove getting manually cluster descriptor from yaml. - std::string yaml_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - cluster_desc = tt_ClusterDescriptor::create_from_yaml(yaml_path); - } - - return cluster_desc; -} - inline std::unique_ptr get_cluster() { - - // TODO: This should not be needed. And could be part of the cluster descriptor probably. - // Note that cluster descriptor holds logical ids of chips. - // Which are different than physical PCI ids, which are /dev/tenstorrent/N ones. - // You have to see if physical PCIe is GS before constructing a cluster descriptor. - std::vector pci_device_ids = PCIDevice::enumerate_devices(); - std::set pci_device_ids_set (pci_device_ids.begin(), pci_device_ids.end()); - - tt::ARCH device_arch = tt::ARCH::GRAYSKULL; - if (!pci_device_ids.empty()) { - // TODO: This should be removed from the API, the driver itself should do it. - int physical_device_id = pci_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device (physical_device_id, 0); - device_arch = pci_device.get_arch(); - } - - // TODO: Make this test work on a host system without any tt devices. - if (pci_device_ids.empty()) { - std::cout << "No Tenstorrent devices found. Skipping test." << std::endl; - return nullptr; - } - - std::string yaml_path; - if (device_arch == tt::ARCH::GRAYSKULL) { - yaml_path = ""; - } else if (device_arch == tt::ARCH::BLACKHOLE) { - yaml_path = test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"); - } else { - // TODO: remove getting manually cluster descriptor from yaml. - yaml_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - } - // TODO: Remove the need to do this, allow default constructor to construct with all chips. - std::unique_ptr cluster_desc = get_cluster_desc(); - std::unordered_set detected_num_chips = cluster_desc->get_all_chips(); - - // TODO: make this unordered vs set conversion not needed. - std::set detected_num_chips_set (detected_num_chips.begin(), detected_num_chips.end()); - - - // TODO: This would be incorporated inside SocDescriptor. - std::string soc_path; - if (device_arch == tt::ARCH::GRAYSKULL) { - soc_path = test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"); - } else if (device_arch == tt::ARCH::WORMHOLE_B0) { - soc_path = test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"); - } else if (device_arch == tt::ARCH::BLACKHOLE) { - soc_path = test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"); - } else { - throw std::runtime_error("Unsupported architecture"); - } - - - // TODO: Don't pass each of these arguments. - return std::unique_ptr(new Cluster(soc_path, device_arch == tt::ARCH::GRAYSKULL ? "" : yaml_path, detected_num_chips_set)); + return std::unique_ptr(new Cluster()); } // TODO: Once default auto TLB setup is in, check it is setup properly. diff --git a/tests/api/test_cluster.cpp b/tests/api/test_cluster.cpp index 258598dc..c7c3c3cb 100644 --- a/tests/api/test_cluster.cpp +++ b/tests/api/test_cluster.cpp @@ -33,101 +33,9 @@ using Cluster = tt_SiliconDevice; // N150. N300 // Galaxy -// TODO: This function should not exist, the API itself should be simple enough. -inline std::unique_ptr get_cluster_desc() { - // TODO: This should not be needed. And could be part of the cluster descriptor probably. - // Note that cluster descriptor holds logical ids of chips. - // Which are different than physical PCI ids, which are /dev/tenstorrent/N ones. - // You have to see if physical PCIe is GS before constructing a cluster descriptor. - std::vector pci_device_ids = PCIDevice::enumerate_devices(); - std::set pci_device_ids_set(pci_device_ids.begin(), pci_device_ids.end()); - - tt::ARCH device_arch = tt::ARCH::GRAYSKULL; - if (!pci_device_ids.empty()) { - // TODO: This should be removed from the API, the driver itself should do it. - int physical_device_id = pci_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device(physical_device_id, 0); - device_arch = pci_device.get_arch(); - } - - // TODO: Make this test work on a host system without any tt devices. - if (pci_device_ids.empty()) { - std::cout << "No Tenstorrent devices found. Skipping test." << std::endl; - return nullptr; - } - - // TODO: Remove different branch for different archs - std::unique_ptr cluster_desc; - if (device_arch == tt::ARCH::GRAYSKULL) { - cluster_desc = tt_ClusterDescriptor::create_for_grayskull_cluster(pci_device_ids_set, pci_device_ids); - } else if (device_arch == tt::ARCH::BLACKHOLE) { - std::string yaml_path = test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"); - cluster_desc = tt_ClusterDescriptor::create_from_yaml(yaml_path); - } else { - // TODO: remove getting manually cluster descriptor from yaml. - std::string yaml_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - cluster_desc = tt_ClusterDescriptor::create_from_yaml(yaml_path); - } - - return cluster_desc; -} - -// TODO: This function should not exist, the API itself should be simple enough. inline std::unique_ptr get_cluster() { - // TODO: This should not be needed. And could be part of the cluster descriptor probably. - // Note that cluster descriptor holds logical ids of chips. - // Which are different than physical PCI ids, which are /dev/tenstorrent/N ones. - // You have to see if physical PCIe is GS before constructing a cluster descriptor. - std::vector pci_device_ids = PCIDevice::enumerate_devices(); - std::set pci_device_ids_set(pci_device_ids.begin(), pci_device_ids.end()); - - tt::ARCH device_arch = tt::ARCH::GRAYSKULL; - if (!pci_device_ids.empty()) { - // TODO: This should be removed from the API, the driver itself should do it. - int physical_device_id = pci_device_ids[0]; - // TODO: remove logical_device_id - PCIDevice pci_device(physical_device_id, 0); - device_arch = pci_device.get_arch(); - } - - // TODO: Make this test work on a host system without any tt devices. - if (pci_device_ids.empty()) { - std::cout << "No Tenstorrent devices found. Skipping test." << std::endl; - return nullptr; - } - - std::string yaml_path; - if (device_arch == tt::ARCH::GRAYSKULL) { - yaml_path = ""; - } else if (device_arch == tt::ARCH::BLACKHOLE) { - yaml_path = test_utils::GetAbsPath("blackhole_1chip_cluster.yaml"); - } else { - // TODO: remove getting manually cluster descriptor from yaml. - yaml_path = tt_ClusterDescriptor::get_cluster_descriptor_file_path(); - } - // TODO: Remove the need to do this, allow default constructor to construct with all chips. - std::unique_ptr cluster_desc = get_cluster_desc(); - std::unordered_set detected_num_chips = cluster_desc->get_all_chips(); - - // TODO: make this unordered vs set conversion not needed. - std::set detected_num_chips_set(detected_num_chips.begin(), detected_num_chips.end()); - - // TODO: This would be incorporated inside SocDescriptor. - std::string soc_path; - if (device_arch == tt::ARCH::GRAYSKULL) { - soc_path = test_utils::GetAbsPath("tests/soc_descs/grayskull_10x12.yaml"); - } else if (device_arch == tt::ARCH::WORMHOLE_B0) { - soc_path = test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"); - } else if (device_arch == tt::ARCH::BLACKHOLE) { - soc_path = test_utils::GetAbsPath("tests/soc_descs/blackhole_140_arch_no_eth.yaml"); - } else { - throw std::runtime_error("Unsupported architecture"); - } - - // TODO: Don't pass each of these arguments. return std::unique_ptr( - new Cluster(soc_path, device_arch == tt::ARCH::GRAYSKULL ? "" : yaml_path, detected_num_chips_set)); + new Cluster()); } // TODO: Should not be wormhole specific. @@ -181,9 +89,10 @@ void setup_wormhole_remote(Cluster* umd_cluster) { TEST(ApiClusterTest, OpenAllChips) { std::unique_ptr umd_cluster = get_cluster(); } TEST(ApiClusterTest, SimpleIOAllChips) { - std::unique_ptr cluster_desc = get_cluster_desc(); std::unique_ptr umd_cluster = get_cluster(); + const tt_ClusterDescriptor* cluster_desc = umd_cluster->get_cluster_desc(); + if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) { std::cout << "No chips found. Skipping test." << std::endl; return; @@ -239,9 +148,10 @@ TEST(ApiClusterTest, SimpleIOAllChips) { } TEST(ApiClusterTest, RemoteFlush) { - std::unique_ptr cluster_desc = get_cluster_desc(); std::unique_ptr umd_cluster = get_cluster(); + const tt_ClusterDescriptor* cluster_desc = umd_cluster->get_cluster_desc(); + if (umd_cluster == nullptr || umd_cluster->get_all_chips_in_cluster().empty()) { std::cout << "No chips found. Skipping test." << std::endl; return; diff --git a/tests/wormhole/test_silicon_driver_wh.cpp b/tests/wormhole/test_silicon_driver_wh.cpp index a0c74775..15a7a0d7 100644 --- a/tests/wormhole/test_silicon_driver_wh.cpp +++ b/tests/wormhole/test_silicon_driver_wh.cpp @@ -111,7 +111,7 @@ TEST(SiliconDriverWH, Harvesting) { std::unordered_map simulated_harvesting_masks = {{0, 30}, {1, 60}}; uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); auto sdesc_per_chip = device.get_virtual_soc_descriptors(); ASSERT_EQ(device.using_harvested_soc_descriptors(), true) << "Expected Driver to have performed harvesting"; @@ -153,7 +153,7 @@ TEST(SiliconDriverWH, HarvestingRuntime) { uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true, simulated_harvesting_masks); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -218,7 +218,7 @@ TEST(SiliconDriverWH, UnalignedStaticTLB_RW) { uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -277,7 +277,7 @@ TEST(SiliconDriverWH, StaticTLB_RW) { uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -326,7 +326,7 @@ TEST(SiliconDriverWH, DynamicTLB_RW) { std::set target_devices = get_target_devices(); uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true); set_params_for_remote_txn(device); @@ -364,7 +364,7 @@ TEST(SiliconDriverWH, MultiThreadedDevice) { std::set target_devices = get_target_devices(); uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true); set_params_for_remote_txn(device); @@ -423,7 +423,7 @@ TEST(SiliconDriverWH, MultiThreadedMemBar) { uint32_t base_addr = l1_mem::address_map::DATA_BUFFER_SPACE_BASE; uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -530,7 +530,7 @@ TEST(SiliconDriverWH, BroadcastWrite) { uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -586,7 +586,7 @@ TEST(SiliconDriverWH, VirtualCoordinateBroadcast) { uint32_t num_host_mem_ch_per_mmio_device = 1; - tt_SiliconDevice device = tt_SiliconDevice(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), tt_ClusterDescriptor::get_cluster_descriptor_file_path(), target_devices, num_host_mem_ch_per_mmio_device, false, true, true); + tt_SiliconDevice device = tt_SiliconDevice(num_host_mem_ch_per_mmio_device, false, true, true); set_params_for_remote_txn(device); auto mmio_devices = device.get_target_mmio_device_ids(); @@ -668,10 +668,7 @@ TEST(SiliconDriverWH, VirtualCoordinateBroadcast) { TEST(SiliconDriverWH, SysmemTestWithPcie) { auto target_devices = get_target_devices(); - tt_SiliconDevice device(test_utils::GetAbsPath("tests/soc_descs/wormhole_b0_8x10.yaml"), - tt_ClusterDescriptor::get_cluster_descriptor_file_path(), - target_devices, - 1, // one "host memory channel", currently a 1G huge page + tt_SiliconDevice device(1, // one "host memory channel", currently a 1G huge page false, // skip driver allocs - no (don't skip) true, // clean system resources - yes true); // perform harvesting - yes