Skip to content

Commit

Permalink
#12844: Remove dependency on generated_bank_to_noc_coord_mapping.h
Browse files Browse the repository at this point in the history
Move the global arrays from generated_bank_to_noc_coord_mapping.h to
brisc.cc and ncrisc.cc
For Kernel, this global arrays are declared as extern in dataflow_api.h
Allocate space in L1 for the host to copy array data during build.
Firmware would then copy this data to the global array during init.
  • Loading branch information
spoojaryTT committed Nov 14, 2024
1 parent fdbdc91 commit ed0095f
Show file tree
Hide file tree
Showing 14 changed files with 181 additions and 19 deletions.
49 changes: 48 additions & 1 deletion tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "tools/profiler/kernel_profiler.hpp"
#include "dev_msgs.h"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "dataflow_api.h"
#include "dev_mem_map.h"
Expand Down Expand Up @@ -66,6 +65,12 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

//DRAM and L1 bank offsets and noc coordinates.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used));

#define MEM_MOVER_VIEW_IRAM_BASE_ADDR (0x4 << 12)

#if defined(PROFILE_KERNEL)
Expand Down Expand Up @@ -343,6 +348,48 @@ int main() {
int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
l1_to_local_mem_copy((uint*)__ldm_data_start, (uint tt_l1_ptr*)MEM_BRISC_INIT_LOCAL_L1_BASE_SCRATCH, num_words);

int32_t num_dram_to_noc_words = (NUM_NOCS * NUM_DRAM_BANKS) << 1;
l1_to_local_mem_copy((uint*)dram_bank_to_noc_xy, (uint tt_l1_ptr*)MEM_BANK_TO_NOC_XY_SCRATCH, num_dram_to_noc_words);

int32_t num_l1_to_noc_words = (NUM_NOCS * NUM_L1_BANKS) << 1;
//DPRINT << " base addr = " << MEM_BANK_TO_NOC_XY_SCRATCH << " num_dram_to_noc_words = " << num_dram_to_noc_words << " num_l1_to_noc_words = " << num_l1_to_noc_words << ENDL();
l1_to_local_mem_copy((uint*)l1_bank_to_noc_xy, (uint tt_l1_ptr*)(MEM_BANK_TO_NOC_XY_SCRATCH + num_dram_to_noc_words), num_l1_to_noc_words);

int32_t num_dram_offset_words = NUM_DRAM_BANKS << 2;
l1_to_local_mem_copy((uint*)bank_to_dram_offset, (uint tt_l1_ptr*)(MEM_BANK_OFFSET_SCRATCH), num_dram_offset_words);

//DPRINT << "DRAM noc_xy[0][0] = " << dram_bank_to_noc_xy[0][0] << " and " << temp_dram_bank_to_noc_xy[0][0] << ENDL();
//DPRINT << "DRAM noc_xy[0][1] = " << dram_bank_to_noc_xy[0][1] << " and " << temp_dram_bank_to_noc_xy[0][1] << ENDL();
//DPRINT << "DRAM noc_xy[0][last] = " << dram_bank_to_noc_xy[0][NUM_DRAM_BANKS-1] << " and " << temp_dram_bank_to_noc_xy[0][NUM_DRAM_BANKS-1] << ENDL();
//DPRINT << "DRAM noc_xy[0][last-1] = " << dram_bank_to_noc_xy[0][NUM_DRAM_BANKS-2] << " and " << temp_dram_bank_to_noc_xy[0][NUM_DRAM_BANKS-2] << ENDL();

//DPRINT << "DRAM noc_xy[1][0] = " << dram_bank_to_noc_xy[1][0] << " and " << temp_dram_bank_to_noc_xy[1][0] << ENDL();
//DPRINT << "DRAM noc_xy[1][1] = " << dram_bank_to_noc_xy[1][1] << " and " << temp_dram_bank_to_noc_xy[1][1] << ENDL();
//DPRINT << "DRAM noc_xy[1][last] = " << dram_bank_to_noc_xy[1][NUM_DRAM_BANKS-1] << " and " << temp_dram_bank_to_noc_xy[1][NUM_DRAM_BANKS-1] << ENDL();
//DPRINT << "DRAM noc_xy[1][last-1] = " << dram_bank_to_noc_xy[1][NUM_DRAM_BANKS-2] << " and " << temp_dram_bank_to_noc_xy[1][NUM_DRAM_BANKS-2] << ENDL();

//DPRINT << "l1 noc_xy[0][0] = " << l1_bank_to_noc_xy[0][0] << " and " << temp_l1_bank_to_noc_xy[0][0] << ENDL();
//DPRINT << "l1 noc_xy[0][1] = " << l1_bank_to_noc_xy[0][1] << " and " << temp_l1_bank_to_noc_xy[0][1] << ENDL();
//DPRINT << "l1 noc_xy[0][last] = " << l1_bank_to_noc_xy[0][NUM_L1_BANKS-1] << " and " << temp_l1_bank_to_noc_xy[0][NUM_L1_BANKS-1] << ENDL();
//DPRINT << "l1 noc_xy[0][last-1] = " << l1_bank_to_noc_xy[0][NUM_L1_BANKS-2] << " and " << temp_l1_bank_to_noc_xy[0][NUM_L1_BANKS-2] << ENDL();

//DPRINT << "l1 noc_xy[1][0] = " << l1_bank_to_noc_xy[1][0] << " and " << temp_l1_bank_to_noc_xy[1][0] << ENDL();
//DPRINT << "l1 noc_xy[1][1] = " << l1_bank_to_noc_xy[1][1] << " and " << temp_l1_bank_to_noc_xy[1][1] << ENDL();
//DPRINT << "l1 noc_xy[1][last] = " << l1_bank_to_noc_xy[1][NUM_L1_BANKS-1] << " and " << temp_l1_bank_to_noc_xy[1][NUM_L1_BANKS-1] << ENDL();
//DPRINT << "l1 noc_xy[1][last-1] = " << l1_bank_to_noc_xy[1][NUM_L1_BANKS-2] << " and " << temp_l1_bank_to_noc_xy[1][NUM_L1_BANKS-2] << ENDL();

//DPRINT << "dram_offset [0] = " << bank_to_dram_offset[0] << " and " << temp_bank_to_dram_offset[0] << ENDL();
//DPRINT << "dram_offset [1] = " << bank_to_dram_offset[1] << " and " << temp_bank_to_dram_offset[1] << ENDL();
//DPRINT << "dram_offset [last] = " << bank_to_dram_offset[NUM_DRAM_BANKS-1] << " and " << temp_bank_to_dram_offset[NUM_DRAM_BANKS-1] << ENDL();
//DPRINT << "dram_offset [last-1] = " << bank_to_dram_offset[NUM_DRAM_BANKS-2] << " and " << temp_bank_to_dram_offset[NUM_DRAM_BANKS-2] << ENDL();

int32_t num_l1_offset_words = NUM_L1_BANKS << 2;
l1_to_local_mem_copy((uint*)bank_to_l1_offset, (uint tt_l1_ptr*)(MEM_BANK_OFFSET_SCRATCH + num_dram_offset_words), num_l1_offset_words);
//DPRINT << "l1_offset [0] = " << bank_to_l1_offset[0] << " and " << temp_bank_to_l1_offset[0] << ENDL();
//DPRINT << "l1_offset [1] = " << bank_to_l1_offset[1] << " and " << temp_bank_to_l1_offset[1] << ENDL();
//DPRINT << "l1_offset [last] = " << bank_to_l1_offset[NUM_L1_BANKS-1] << " and " << temp_bank_to_l1_offset[NUM_L1_BANKS-1] << ENDL();
//DPRINT << "l1_offset [last-1] = " << bank_to_l1_offset[NUM_L1_BANKS-2] << " and " << temp_bank_to_l1_offset[NUM_L1_BANKS-2] << ENDL();

mailboxes->launch_msg_rd_ptr = 0; // Initialize the rdptr to 0
noc_index = 0;
risc_init();
Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "ethernet/dataflow_api.h"
#include "ethernet/tunneling.h"
#include "firmware_common.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "noc_parameters.h"
#include "risc_attribs.h"
#include "tools/profiler/kernel_profiler.hpp"
Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "tools/profiler/kernel_profiler.hpp"
#include "dev_msgs.h"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"
#include "dataflow_api.h"

Expand Down
17 changes: 16 additions & 1 deletion tt_metal/hw/firmware/src/ncrisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "firmware_common.h"
#include "tools/profiler/kernel_profiler.hpp"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"

#include "debug/waypoint.h"
Expand Down Expand Up @@ -39,6 +38,12 @@ uint32_t tt_l1_ptr *rta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *crta_l1_base __attribute__((used));
uint32_t tt_l1_ptr *sem_l1_base[ProgrammableCoreType::COUNT] __attribute__((used));

//DRAM and L1 bank offsets and noc coordinates.
uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS] __attribute__((used));
int32_t bank_to_dram_offset[NUM_DRAM_BANKS] __attribute__((used));
uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS] __attribute__((used));
int32_t bank_to_l1_offset[NUM_L1_BANKS] __attribute__((used));

#if defined(PROFILE_KERNEL)
namespace kernel_profiler {
uint32_t wIndex __attribute__((used));
Expand Down Expand Up @@ -79,6 +84,16 @@ int main(int argc, char *argv[]) {
int32_t num_words = ((uint)__ldm_data_end - (uint)__ldm_data_start) >> 2;
l1_to_local_mem_copy((uint *)__ldm_data_start, (uint tt_l1_ptr *)MEM_NCRISC_INIT_LOCAL_L1_BASE_SCRATCH, num_words);

int32_t num_dram_to_noc_words = (NUM_NOCS * NUM_DRAM_BANKS) << 1;
l1_to_local_mem_copy((uint*)dram_bank_to_noc_xy, (uint tt_l1_ptr*)MEM_BANK_TO_NOC_XY_SCRATCH, num_dram_to_noc_words);
int32_t num_l1_to_noc_words = (NUM_NOCS * NUM_L1_BANKS) << 1;
l1_to_local_mem_copy((uint*)l1_bank_to_noc_xy, (uint tt_l1_ptr*)(MEM_BANK_TO_NOC_XY_SCRATCH + num_dram_to_noc_words), num_l1_to_noc_words);

int32_t num_dram_offset_words = NUM_DRAM_BANKS << 2;
l1_to_local_mem_copy((uint*)bank_to_dram_offset, (uint tt_l1_ptr*)(MEM_BANK_OFFSET_SCRATCH), num_dram_offset_words);
int32_t num_l1_offset_words = NUM_L1_BANKS << 2;
l1_to_local_mem_copy((uint*)bank_to_l1_offset, (uint tt_l1_ptr*)(MEM_BANK_OFFSET_SCRATCH + num_dram_offset_words), num_l1_offset_words);

risc_init();

// If NCRISC has IRAM it needs to halt before BRISC copies data from L1 to IRAM
Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/firmware/src/slave_idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "firmware_common.h"
#include "tools/profiler/kernel_profiler.hpp"
#include "risc_attribs.h"
#include "generated_bank_to_noc_coord_mapping.h"
#include "circular_buffer.h"

#include "debug/waypoint.h"
Expand Down
9 changes: 9 additions & 0 deletions tt_metal/hw/inc/blackhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
#define MEM_NCRISC_LOCAL_SIZE (8 * 1024)
#define MEM_TRISC_LOCAL_SIZE (4 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

/////////////
// Firmware/kernel code holes
#define MEM_BRISC_FIRMWARE_SIZE (5 * 1024)
Expand Down Expand Up @@ -91,6 +96,10 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_XY_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_OFFSET_SCRATCH (MEM_BANK_TO_NOC_XY_SCRATCH + MEM_BANK_TO_NOC_XY_SIZE)
#define MEM_BANK_OFFSET_END (MEM_BANK_OFFSET_SCRATCH + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand Down
9 changes: 9 additions & 0 deletions tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,16 @@
#if defined(KERNEL_BUILD)
constexpr uint8_t noc_index = NOC_INDEX;
constexpr uint8_t noc_mode = NOC_MODE;
extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS];
extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS];
extern uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS];
extern int32_t bank_to_l1_offset[NUM_L1_BANKS];
#else
extern uint16_t dram_bank_to_noc_xy[NUM_NOCS][NUM_DRAM_BANKS];
extern int32_t bank_to_dram_offset[NUM_DRAM_BANKS];
extern uint16_t l1_bank_to_noc_xy[NUM_NOCS][NUM_L1_BANKS];
extern int32_t bank_to_l1_offset[NUM_L1_BANKS];

extern uint8_t noc_index;
constexpr uint8_t noc_mode = DM_DEDICATED_NOC;
#endif
Expand Down
9 changes: 9 additions & 0 deletions tt_metal/hw/inc/grayskull/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
#define MEM_NCRISC_LOCAL_SIZE (4 * 1024)
#define MEM_TRISC_LOCAL_SIZE (2 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

#define NCRISC_HAS_IRAM 1
#define MEM_NCRISC_IRAM_BASE 0xFFC00000
#define MEM_NCRISC_IRAM_SIZE (16 * 1024)
Expand Down Expand Up @@ -96,6 +101,10 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_XY_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_OFFSET_SCRATCH (MEM_BANK_TO_NOC_XY_SCRATCH + MEM_BANK_TO_NOC_XY_SIZE)
#define MEM_BANK_OFFSET_END (MEM_BANK_OFFSET_SCRATCH + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand Down
9 changes: 9 additions & 0 deletions tt_metal/hw/inc/wormhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
#define MEM_NCRISC_LOCAL_SIZE (4 * 1024)
#define MEM_TRISC_LOCAL_SIZE (2 * 1024)

// Memory for (dram/l1)_bank_to_noc_xy arrays, size needs to be atleast 2 * NUM_NOCS * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_TO_NOC_XY_SIZE 1024
// Memory for bank_to_dram_offset and bank_to_l1_offset arrays, size needs to be atleast 4 * (NUM_DRAM_BANKS + NUM_L1_BANKS)
#define MEM_BANK_OFFSET_SIZE 1024

#define NCRISC_HAS_IRAM 1
#define MEM_NCRISC_IRAM_BASE 0xFFC00000
#define MEM_NCRISC_IRAM_SIZE (16 * 1024)
Expand Down Expand Up @@ -98,6 +103,10 @@
#define MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC0_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH (MEM_TRISC1_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)

#define MEM_BANK_TO_NOC_XY_SCRATCH (MEM_TRISC2_INIT_LOCAL_L1_BASE_SCRATCH + MEM_TRISC_LOCAL_SIZE)
#define MEM_BANK_OFFSET_SCRATCH (MEM_BANK_TO_NOC_XY_SCRATCH + MEM_BANK_TO_NOC_XY_SIZE)
#define MEM_BANK_OFFSET_END (MEM_BANK_OFFSET_SCRATCH + MEM_BANK_OFFSET_SIZE)

/////////////
// Stack info
// Increasing the stack size comes at the expense of less local memory for globals
Expand Down
76 changes: 69 additions & 7 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,32 @@ void Device::build_firmware() {
jit_build_set(this->firmware_build_states_, nullptr);
}

void Device::initialize_global_array(const HalProgrammableCoreType &core_type, CoreCoord phys_core)
{
tt::Cluster::instance().write_core(&dram_bank_to_noc_xy_[0][0], dram_bank_to_noc_xy_[0].size() * sizeof(uint16_t), tt_cxy_pair(this->id(), phys_core), MEM_BANK_TO_NOC_XY_SCRATCH);


uint64_t addr = MEM_BANK_TO_NOC_XY_SCRATCH + (dram_bank_to_noc_xy_[0].size() * sizeof(uint16_t));
//tt::log_info(tt::LogTest, "initialize_global_array dram array 1 addr = {}", addr);
tt::Cluster::instance().write_core(&dram_bank_to_noc_xy_[1][0], dram_bank_to_noc_xy_[1].size() * sizeof(uint16_t), tt_cxy_pair(this->id(), phys_core), addr);

addr = MEM_BANK_TO_NOC_XY_SCRATCH + (sizeof(uint16_t) * (dram_bank_to_noc_xy_[0].size() + dram_bank_to_noc_xy_[1].size()));
tt::Cluster::instance().write_core(&l1_bank_to_noc_xy_[0][0], l1_bank_to_noc_xy_[0].size() * sizeof(uint16_t), tt_cxy_pair(this->id(), phys_core), addr);

addr = MEM_BANK_TO_NOC_XY_SCRATCH + (sizeof(uint16_t) * (dram_bank_to_noc_xy_[0].size() + dram_bank_to_noc_xy_[1].size() + l1_bank_to_noc_xy_[0].size()));
tt::Cluster::instance().write_core(&l1_bank_to_noc_xy_[1][0], l1_bank_to_noc_xy_[1].size() * sizeof(uint16_t), tt_cxy_pair(this->id(), phys_core), addr);

tt::Cluster::instance().write_core(&dram_bank_offset_map_[0], dram_bank_offset_map_.size() * sizeof(int32_t), tt_cxy_pair(this->id(), phys_core), MEM_BANK_OFFSET_SCRATCH);

addr = MEM_BANK_OFFSET_SCRATCH + (dram_bank_offset_map_.size() * sizeof(int32_t));
tt::Cluster::instance().write_core(&l1_bank_offset_map_[0], l1_bank_offset_map_.size() * sizeof(int32_t), tt_cxy_pair(this->id(), phys_core), addr);
}

void Device::initialize_firmware(const HalProgrammableCoreType &core_type, CoreCoord phys_core, launch_msg_t *launch_msg, go_msg_t* go_msg) {
ZoneScoped;

this->initialize_global_array(core_type, phys_core);

uint32_t core_type_idx = hal.get_programmable_core_type_index(core_type);
uint32_t processor_class_count = hal.get_processor_classes_count(core_type);

Expand Down Expand Up @@ -604,6 +627,7 @@ void Device::reset_cores() {

void Device::initialize_and_launch_firmware() {
ZoneScoped;
tt::log_info(tt::LogTest, "initialize_and_launch_firmware called for ");

launch_msg_t launch_msg;
go_msg_t go_msg;
Expand Down Expand Up @@ -3544,37 +3568,75 @@ void Device::MarkAllocationsSafe() {
tt::tt_metal::allocator::mark_allocations_safe(*this->get_initialized_allocator());
}

void Device::generate_device_headers(const std::string &path) const
void Device::generate_device_headers(const std::string &path)
{
const size_t num_dram_banks = this->num_banks(BufferType::DRAM);
const size_t num_dram_banks_pow2 = std::pow(2, std::ceil(std::log2(num_dram_banks)));
std::vector<CoreCoord> dram_noc_coord_per_bank(num_dram_banks);
std::vector<int32_t> dram_offsets_per_bank(num_dram_banks);
dram_bank_offset_map_.clear();
dram_bank_offset_map_.resize(num_dram_banks);
for (unsigned bank_id = 0; bank_id < num_dram_banks; bank_id++) {
dram_noc_coord_per_bank[bank_id] = this->dram_core_from_dram_channel(this->dram_channel_from_bank_id(bank_id));
dram_offsets_per_bank[bank_id] = this->bank_offset(BufferType::DRAM, bank_id);
dram_bank_offset_map_[bank_id] = this->bank_offset(BufferType::DRAM, bank_id);
}
const size_t num_l1_banks = this->num_banks(BufferType::L1);
const size_t num_l1_banks_pow2 = std::pow(2, std::ceil(std::log2(num_l1_banks)));
std::vector<CoreCoord> l1_noc_coord_per_bank(num_l1_banks);
std::vector<int32_t> l1_offset_per_bank(num_l1_banks);
l1_bank_offset_map_.clear();
l1_bank_offset_map_.resize(num_l1_banks);
for (unsigned bank_id = 0; bank_id < num_l1_banks; bank_id++) {
l1_noc_coord_per_bank[bank_id] = this->worker_core_from_logical_core(this->logical_core_from_bank_id(bank_id));
l1_offset_per_bank[bank_id] = this->bank_offset(BufferType::L1, bank_id);
l1_bank_offset_map_[bank_id] = this->bank_offset(BufferType::L1, bank_id);
}

const metal_SocDescriptor& soc_d = tt::Cluster::instance().get_soc_desc(this->id());

// Generate header file in proper location
tt::log_info(tt::LogTest, "Calling genfiles for path {}", path);
jit_build_genfiles_bank_to_noc_coord_descriptor (
path,
soc_d.grid_size,
dram_noc_coord_per_bank,
dram_offsets_per_bank,
dram_bank_offset_map_,
l1_noc_coord_per_bank,
l1_offset_per_bank,
l1_bank_offset_map_,
this->get_allocator_alignment()
);
this->generate_mem_bank_info(
soc_d.grid_size,
dram_noc_coord_per_bank,
l1_noc_coord_per_bank);
}

void Device::generate_mem_bank_info(
tt_xy_pair grid_size,
std::vector<CoreCoord>& dram_bank_map,
std::vector<CoreCoord>& l1_bank_map)
{
tt::log_info(tt::LogTest, "generate_mem_bank_info called for device {}\n", this->id_);
dram_bank_to_noc_xy_.clear();
dram_bank_to_noc_xy_.resize(2);
for (unsigned int noc = 0; noc < 2; noc++) {
dram_bank_to_noc_xy_[noc].reserve(dram_bank_map.size());
for (unsigned int bank_id = 0; bank_id < dram_bank_map.size(); bank_id++) {
uint16_t noc_x = NOC_0_X(noc, grid_size.x, dram_bank_map[bank_id].x);
uint16_t noc_y = NOC_0_Y(noc, grid_size.y, dram_bank_map[bank_id].y);
uint16_t xy = ((noc_y << NOC_ADDR_NODE_ID_BITS) | noc_x) << NOC_COORD_REG_OFFSET;
dram_bank_to_noc_xy_[noc].push_back(xy);
}
}

l1_bank_to_noc_xy_.clear();
l1_bank_to_noc_xy_.resize(2);
for (unsigned int noc = 0; noc < 2; noc++) {
l1_bank_to_noc_xy_[noc].reserve(l1_bank_map.size());
for (unsigned int bank_id = 0; bank_id < l1_bank_map.size(); bank_id++) {
uint16_t noc_x = NOC_0_X(noc, grid_size.x, l1_bank_map[bank_id].x);
uint16_t noc_y = NOC_0_Y(noc, grid_size.y, l1_bank_map[bank_id].y);
uint16_t xy = ((noc_y << NOC_ADDR_NODE_ID_BITS) | noc_x) << NOC_COORD_REG_OFFSET;
l1_bank_to_noc_xy_[noc].push_back(xy);
}
}
}

size_t Device::get_device_kernel_defines_hash() {
Expand Down
12 changes: 11 additions & 1 deletion tt_metal/impl/device/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,11 @@ class Device {
// machine inf
float sfpu_inf() const;

void generate_device_headers(const std::string &path) const;
void generate_device_headers(const std::string &path);
void generate_mem_bank_info(
tt_xy_pair grid_size,
std::vector<CoreCoord>& dram_bank_map,
std::vector<CoreCoord>& l1_bank_map);
const JitBuildEnv& build_env() const { return this->build_env_; }
const string build_firmware_target_path(uint32_t programmable_core, uint32_t processor_class, int i) const;
const string build_kernel_target_path(uint32_t programmable_core, uint32_t processor_class, int i, const string& kernel_name) const;
Expand Down Expand Up @@ -259,6 +263,7 @@ class Device {
void initialize_build();
void initialize_device_kernel_defines();
void build_firmware();
void initialize_global_array(const HalProgrammableCoreType &core_type, CoreCoord phys_core);
void initialize_firmware(const HalProgrammableCoreType &core_type, CoreCoord phys_core, launch_msg_t *launch_msg, go_msg_t* go_msg);
void reset_cores();
void initialize_and_launch_firmware();
Expand Down Expand Up @@ -396,6 +401,11 @@ class Device {
SubDeviceManagerId next_sub_device_manager_id_ = {0};
SubDeviceManagerId default_sub_device_manager_id_ = {0};
detail::SubDeviceManager *default_sub_device_manager_ = nullptr;

std::vector<int32_t> dram_bank_offset_map_;
std::vector<int32_t> l1_bank_offset_map_;
std::vector<std::vector<uint16_t>> dram_bank_to_noc_xy_;
std::vector<std::vector<uint16_t>> l1_bank_to_noc_xy_;
};

} // namespace v0
Expand Down
Loading

0 comments on commit ed0095f

Please sign in to comment.