Skip to content

Commit

Permalink
#2934 Add check that expects num host channels for a mmio device to b…
Browse files Browse the repository at this point in the history
…e number of chips on a card and run multi cq device fixture on GS
  • Loading branch information
abhullar-tt committed Dec 7, 2023
1 parent ecd6303 commit 9f783b4
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 31 deletions.
1 change: 1 addition & 0 deletions tests/tt_metal/tt_metal/unit_tests/basic/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ TEST_F(DeviceFixture, TestDeviceToHostMemChannelAssignment) {
}

for (const auto& [mmio_dev_id, device_group] : mmio_device_to_device_group) {
EXPECT_EQ(tt::Cluster::instance().get_num_host_channels(mmio_dev_id), device_group.size());
EXPECT_EQ(tt::Cluster::instance().get_assigned_channel_for_device(mmio_dev_id), 0);
std::unordered_set<uint16_t> channels;
for (const chip_id_t &device_id : device_group) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "tt_metal/detail/tt_metal.hpp"
#include "tt_metal/test_utils/env_vars.hpp"
#include "tt_metal/test_utils/stimulus.hpp"
#include "tt_metal/test_utils/print_helpers.hpp"

using namespace tt::tt_metal;

Expand All @@ -22,7 +23,12 @@ TEST_F(MultiCommandQueueFixture, TestAccessCommandQueue) {
}
}

TEST_F(BasicFastDispatchFixture, TestCannotAccessCommandQueueForClosedDevice) {
TEST(FastDispatchHostSuite, TestCannotAccessCommandQueueForClosedDevice) {
auto slow_dispatch = getenv("TT_METAL_SLOW_DISPATCH_MODE");
if (slow_dispatch) {
TT_THROW("This suite can only be run with fast dispatch or TT_METAL_SLOW_DISPATCH_MODE unset");
GTEST_SKIP();
}
const unsigned int device_id = 0;
Device* device = CreateDevice(device_id);
EXPECT_NO_THROW(detail::GetCommandQueue(device));
Expand Down Expand Up @@ -52,7 +58,7 @@ TEST_F(MultiCommandQueueFixture, TestDirectedLoopbackToUniqueHugepage) {
for (chip_id_t device_id = 0; device_id < num_devices_; device_id++) {
chip_id_t mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device_id);
uint16_t channel = tt::Cluster::instance().get_assigned_channel_for_device(device_id);
tt::Cluster::instance().read_sysmem(&readback_data, byte_size, address, mmio_device_id, channel);
tt::Cluster::instance().read_sysmem(readback_data.data(), byte_size, address, mmio_device_id, channel);
EXPECT_EQ(readback_data, golden_data.at(device_id));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,6 @@ class CommandQueueWithDPrintFixture: public CommandQueueFixture {
bool test_skipped = false;
};

class BasicFastDispatchFixture : public ::testing::Test {
protected:
void SetUp() override {
auto slow_dispatch = getenv("TT_METAL_SLOW_DISPATCH_MODE");
if (slow_dispatch) {
TT_THROW("This suite can only be run with fast dispatch or TT_METAL_SLOW_DISPATCH_MODE unset");
GTEST_SKIP();
}
}
};

class MultiCommandQueueFixture : public ::testing::Test {
protected:
void SetUp() override {
Expand All @@ -107,11 +96,7 @@ class MultiCommandQueueFixture : public ::testing::Test {
}
arch_ = tt::get_arch_from_string(tt::test_utils::get_env_arch_name());

num_devices_ = tt::tt_metal::Device::detect_num_available_devices();

if (arch_ == tt::ARCH::GRAYSKULL && num_devices_ > 1) {
GTEST_SKIP();
}
num_devices_ = tt::tt_metal::GetNumAvailableDevices();

for (unsigned int id = 0; id < num_devices_; id++) {
auto* device = tt::tt_metal::CreateDevice(id);
Expand Down
12 changes: 7 additions & 5 deletions tt_metal/detail/tt_metal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,16 @@ namespace tt::tt_metal{
{
detail::DispatchStateCheck(true);
// For now there is only one SW CommandQueue per device
static std::vector<std::unique_ptr<CommandQueue>> command_queues( Device::detect_num_available_devices() );
static std::vector<std::unique_ptr<CommandQueue>> command_queues( GetNumAvailableDevices() );
chip_id_t id = device->id();
TT_FATAL(id < command_queues.size(), "Invalid device {} detected", id);
TT_FATAL(device->is_initialized(), "Cannot access command queue for closed device {}", id);
if (not command_queues[id] or (command_queues[id] and command_queues[id]->device != device)) {
command_queues[device->id()] = std::make_unique<CommandQueue>(device);
static std::mutex cq_creation_mutex;
{
std::lock_guard<std::mutex> lock(cq_creation_mutex);
if (not command_queues[id] or (command_queues[id] and command_queues[id]->device != device)) {
command_queues[device->id()] = std::make_unique<CommandQueue>(device);
}
}
return *(command_queues[id]);
}
Expand Down Expand Up @@ -476,8 +480,6 @@ namespace tt::tt_metal{

const std::tuple<uint32_t, uint32_t> tlb_data = tt::Cluster::instance().get_tlb_data(tt_cxy_pair(device->id(), device->worker_core_from_logical_core(*device->dispatch_cores().begin()))).value();
auto [tlb_offset, tlb_size] = tlb_data;
// std::cout << "CORE: " << device->worker_core_from_logical_core(*device->dispatch_cores().begin()).str() << std::endl;
// std::cout << "after sending pointers to device. my tlb_offset: " << tlb_offset << ", my tlb_size: " << tlb_size << std::endl;

launch_msg_t msg = dispatch_program.kernels_on_core(producer_logical_core)->launch_msg;

Expand Down
8 changes: 8 additions & 0 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,14 @@ bool Device::initialize(const std::vector<uint32_t>& l1_bank_remap) {
this->dispatch_cores(),
[&, this](CoreCoord core) { return this->worker_core_from_logical_core(core); }
);

std::vector<uint32_t> pointers(CQ_START / sizeof(uint32_t), 0);
pointers[0] = CQ_START >> 4;

chip_id_t mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(this->id_);
uint16_t channel = tt::Cluster::instance().get_assigned_channel_for_device(this->id_);
tt::Cluster::instance().write_sysmem(pointers.data(), pointers.size() * sizeof(uint32_t), 0, mmio_device_id, channel);

detail::SendDispatchKernelToDevice(this);
}

Expand Down
7 changes: 0 additions & 7 deletions tt_metal/impl/dispatch/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,13 +655,6 @@ EnqueueCommandType EnqueueWrapCommand::type() { return this->type_; }

// CommandQueue section
CommandQueue::CommandQueue(Device* device) {
vector<uint32_t> pointers(CQ_START / sizeof(uint32_t), 0);
pointers[0] = CQ_START >> 4;

chip_id_t mmio_device_id = tt::Cluster::instance().get_associated_mmio_device(device->id());
uint16_t channel = tt::Cluster::instance().get_assigned_channel_for_device(device->id());
tt::Cluster::instance().write_sysmem(pointers.data(), pointers.size() * sizeof(uint32_t), 0, mmio_device_id, channel);

this->device = device;
}

Expand Down
2 changes: 1 addition & 1 deletion tt_metal/llrt/tt_cluster.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Cluster {
std::function<void(uint32_t, uint32_t, const uint8_t*, uint32_t)> get_fast_pcie_static_tlb_write_callable(int chip_id) const {
chip_id_t mmio_device_id = device_to_mmio_device_.at(chip_id);
tt_SiliconDevice* device = dynamic_cast<tt_SiliconDevice*>(this->mmio_device_id_to_driver_.at(mmio_device_id).get());
return device->get_fast_pcie_static_tlb_write_callable(chip_id);
return device->get_fast_pcie_static_tlb_write_callable(mmio_device_id);
}

void write_reg(const std::uint32_t *mem_ptr, tt_cxy_pair target, uint64_t addr) const;
Expand Down

0 comments on commit 9f783b4

Please sign in to comment.