Skip to content

Commit

Permalink
#13655: Initial FD refactor to support sub devices
Browse files Browse the repository at this point in the history
Support multiple dispatch entries for worker->dispatch sync
Update dispatch d/s to have a semaphore per dispatch entry to enable syncing on specific worker counts
Update LaunchMessageRingBufferState and WorkerConfigBufferMgr to be tracked per sub_device
Update various FD commands to support syncing on multiple sub devices:
- ERB, EWB, ERE will be updated to take in a list of sub devices for blocking on in the future. Currently will sync all sub_devices
- Trace will currently track all sub devices. Potential to track specific sub devices (could be automatic) in the future
- EP is currently hardcoded to sub device 0. This will be updated to determine the used sub devices in the future
  • Loading branch information
tt-aho committed Oct 29, 2024
1 parent cdcf7b5 commit 1b0640f
Show file tree
Hide file tree
Showing 22 changed files with 573 additions and 239 deletions.
4 changes: 2 additions & 2 deletions tests/tt_metal/tools/profiler/test_device_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,11 @@ def test_dispatch_cores():
REF_COUNT_DICT = {
"grayskull": {
"Tensix CQ Dispatch": 16,
"Tensix CQ Prefetch": 24,
"Tensix CQ Prefetch": 25,
},
"wormhole_b0": {
"Tensix CQ Dispatch": 16,
"Tensix CQ Prefetch": 24,
"Tensix CQ Prefetch": 25,
},
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ int main(int argc, char **argv) {
0, // prefetch_downstream_buffer_pages
num_compute_cores, // max_write_packed_cores
0,
dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
0,
0,
0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1912,6 +1912,7 @@ void configure_for_single_chip(Device *device,
prefetch_downstream_buffer_pages,
num_compute_cores, // max_write_packed_cores
0,
dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
0,
0,
0,
Expand All @@ -1932,6 +1933,7 @@ void configure_for_single_chip(Device *device,
dispatch_compile_args[12] = dispatch_downstream_cb_sem;
dispatch_compile_args[13] = dispatch_h_cb_sem;
dispatch_compile_args[14] = dispatch_d_preamble_size;
dispatch_compile_args[21] = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
CoreCoord phys_dispatch_d_downstream_core =
packetized_path_en_g ? phys_dispatch_relay_mux_core : phys_dispatch_h_core;
configure_kernel_variant<true, false>(program,
Expand All @@ -1952,6 +1954,7 @@ void configure_for_single_chip(Device *device,
dispatch_compile_args[12] = dispatch_h_cb_sem;
dispatch_compile_args[13] = dispatch_downstream_cb_sem;
dispatch_compile_args[14] = 0; // preamble size
dispatch_compile_args[21] = 1; // unused: dispatch_d only. max_num_worker_sems is used for array sizing, set to 1
CoreCoord phys_dispatch_h_upstream_core =
packetized_path_en_g ? phys_dispatch_relay_demux_core : phys_dispatch_core;
configure_kernel_variant<false, true>(program,
Expand Down Expand Up @@ -2655,6 +2658,7 @@ void configure_for_multi_chip(Device *device,
prefetch_downstream_buffer_pages,
num_compute_cores,
0,
dispatch_constants::DISPATCH_MESSAGE_ENTRIES,
0,
0,
0,
Expand All @@ -2675,6 +2679,7 @@ void configure_for_multi_chip(Device *device,
dispatch_compile_args[12] = dispatch_downstream_cb_sem;
dispatch_compile_args[13] = dispatch_h_cb_sem;
dispatch_compile_args[14] = dispatch_d_preamble_size;
dispatch_compile_args[21] = dispatch_constants::DISPATCH_MESSAGE_ENTRIES;
CoreCoord phys_dispatch_d_downstream_core =
packetized_path_en_g ? phys_dispatch_relay_mux_core : phys_dispatch_h_core;
configure_kernel_variant<true, false>(program_r,
Expand All @@ -2694,6 +2699,7 @@ void configure_for_multi_chip(Device *device,
dispatch_compile_args[12] = dispatch_h_cb_sem;
dispatch_compile_args[13] = dispatch_downstream_cb_sem;
dispatch_compile_args[14] = 0; // preamble size
dispatch_compile_args[21] = 1; // unused: dispatch_d only. max_num_worker_sems is used for array sizing, set to 1
CoreCoord phys_dispatch_h_upstream_core =
packetized_path_en_g ? phys_dispatch_relay_demux_core : phys_dispatch_core;
configure_kernel_variant<false, true>(program,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void kernel_main() {
tt_l1_ptr mailboxes_t* const mailboxes = (tt_l1_ptr mailboxes_t*)(MEM_MAILBOX_BASE);
#endif
uint64_t dispatch_addr = NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31, false);
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void MAIN {
#endif
uint64_t dispatch_addr =
NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, false /*linked*/);
}
#else
Expand Down
4 changes: 2 additions & 2 deletions tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ int main() {
// For future proofing, the noc_index value is initialized to 0, to ensure an invalid NOC txn is not issued.
uint64_t dispatch_addr =
NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
mailboxes->go_message.signal = RUN_MSG_DONE;
// Notify dispatcher that this has been done
DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4);
Expand Down Expand Up @@ -453,7 +453,7 @@ int main() {
launch_msg_address->kernel_config.enables = 0;
uint64_t dispatch_addr =
NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4);
noc_fast_atomic_increment(
noc_index,
Expand Down
6 changes: 3 additions & 3 deletions tt_metal/hw/firmware/src/erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void __attribute__((noinline)) Application(void) {
launch_msg_address->kernel_config.enables = 0;
uint64_t dispatch_addr =
NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
internal_::notify_dispatch_core_done(dispatch_addr);
mailboxes->launch_msg_rd_ptr = (launch_msg_rd_ptr + 1) & (launch_msg_buffer_num_entries - 1);
// Only executed if watcher is enabled. Ensures that we don't report stale data due to invalid launch messages in the ring buffer
Expand All @@ -94,9 +94,9 @@ void __attribute__((noinline)) Application(void) {
} else if (go_message_signal == RUN_MSG_RESET_READ_PTR) {
// Reset the launch message buffer read ptr
mailboxes->launch_msg_rd_ptr = 0;
int64_t dispatch_addr =
uint64_t dispatch_addr =
NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR);
NOC_Y(mailboxes->go_message.master_y), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
mailboxes->go_message.signal = RUN_MSG_DONE;
internal_::notify_dispatch_core_done(dispatch_addr);
} else {
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/hw/firmware/src/idle_erisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ int main() {
launch_msg_address->kernel_config.enables = 0;
uint64_t dispatch_addr =
NOC_XY_ADDR(NOC_X(mailboxes->go_message.master_x),
NOC_Y(mailboxes->go_message.master_x), DISPATCH_MESSAGE_ADDR);
NOC_Y(mailboxes->go_message.master_x), DISPATCH_MESSAGE_ADDR + mailboxes->go_message.dispatch_message_offset);
DEBUG_SANITIZE_NOC_ADDR(noc_index, dispatch_addr, 4);
noc_fast_atomic_increment(noc_index, NCRISC_AT_CMD_BUF, dispatch_addr, NOC_UNICAST_WRITE_VC, 1, 31 /*wrap*/, false /*linked*/);
mailboxes->launch_msg_rd_ptr = (launch_msg_rd_ptr + 1) & (launch_msg_buffer_num_entries - 1);
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/hw/inc/dev_msgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ struct kernel_config_msg_t {
} __attribute__((packed));

struct go_msg_t {
volatile uint8_t pad;
volatile uint8_t dispatch_message_offset;
volatile uint8_t master_x;
volatile uint8_t master_y;
volatile uint8_t signal; // INIT, GO, DONE, RESET_RD_PTR
Expand Down
Loading

0 comments on commit 1b0640f

Please sign in to comment.