Skip to content

Commit

Permalink
#0: Add NOC_XY_PCIE_ENCODING specifically for pcie cores since WH has…
Browse files Browse the repository at this point in the history
… an additional address offset
  • Loading branch information
tt-aho committed Jun 4, 2024
1 parent 626e6de commit d771a74
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 681 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ void kernel_main() {
volatile tt_l1_ptr uint32_t* done_address = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(L1_UNRESERVED_BASE);

while (done_address[0] == 0) {
uint64_t host_src_addr = get_noc_addr_helper(NOC_XY_ENCODING(PCIE_NOC_X, PCIE_NOC_Y), pcie_read_ptr);
uint64_t host_src_addr = get_noc_addr_helper(NOC_XY_PCIE_ENCODING(PCIE_NOC_X, PCIE_NOC_Y, NOC_INDEX), pcie_read_ptr);
noc_async_read(host_src_addr, L1_UNRESERVED_BASE, read_sizeB);
pcie_read_ptr += read_sizeB;
if (pcie_read_ptr > pcie_base + pcie_sizeB) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ void kernel_main() {
constexpr uint32_t base_pcie_dst_address = get_compile_time_arg_val(1);
constexpr uint32_t num_16b_writes = get_compile_time_arg_val(2);

uint64_t pcie_core_noc_encoding = uint64_t(NOC_XY_ENCODING(PCIE_NOC_X, PCIE_NOC_Y)) << 32;
uint64_t pcie_core_noc_encoding = uint64_t(NOC_XY_PCIE_ENCODING(PCIE_NOC_X, PCIE_NOC_Y, NOC_INDEX)) << 32;

uint32_t l1_src_address = base_l1_src_address;
uint32_t pcie_dst_address = base_pcie_dst_address;
Expand Down
3 changes: 3 additions & 0 deletions tt_metal/hw/inc/blackhole/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#define NOC_XY_ENCODING(x, y) \
((((uint64_t)(y)) << (NOC_ADDR_LOCAL_BITS + NOC_ADDR_NODE_ID_BITS)) | (((uint64_t)(x)) << NOC_ADDR_LOCAL_BITS))

#define NOC_XY_PCIE_ENCODING(x, y, noc_index) \
NOC_XY_ENCODING(x, y)

#define NOC_MULTICAST_ENCODING(x_start, y_start, x_end, y_end) \
((((uint64_t)(x_start)) << (NOC_ADDR_LOCAL_BITS + 2 * NOC_ADDR_NODE_ID_BITS)) | \
(((uint64_t)(y_start)) << (NOC_ADDR_LOCAL_BITS + 3 * NOC_ADDR_NODE_ID_BITS)) | \
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ uint64_t get_l1_noc_addr(const uint32_t id, const uint32_t page_size, const uint
}

uint64_t get_system_memory_noc_addr(const uint32_t id, const uint32_t page_size, const uint32_t base_addr, const uint32_t offset = 0) {
constexpr static uint64_t pcie_core_noc_encoding = uint64_t(NOC_XY_ENCODING(PCIE_NOC_X, PCIE_NOC_Y)) << 32;
uint64_t pcie_core_noc_encoding = uint64_t(NOC_XY_PCIE_ENCODING(NOC_X(PCIE_NOC_X), NOC_Y(PCIE_NOC_Y), noc_index)) << 32;
uint32_t addr = base_addr + page_size * id + offset;
uint64_t noc_addr = pcie_core_noc_encoding | addr;
return noc_addr;
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/hw/inc/grayskull/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// Address formats
#define NOC_XY_ENCODING(x, y) ((((uint32_t)(y)) << (NOC_ADDR_NODE_ID_BITS)) | (((uint32_t)(x))))

#define NOC_XY_PCIE_ENCODING(x, y, noc_index) NOC_XY_ENCODING(x, y)

#define NOC_MULTICAST_ENCODING(x_start, y_start, x_end, y_end) \
((x_start) << (2 * NOC_ADDR_NODE_ID_BITS)) | ((y_start) << (3 * NOC_ADDR_NODE_ID_BITS)) | (x_end) | \
((y_end) << (NOC_ADDR_NODE_ID_BITS))
Expand Down
12 changes: 10 additions & 2 deletions tt_metal/hw/inc/wormhole/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,21 @@
#define PCIE_NOC_X 0
#define PCIE_NOC_Y 3

#define PCIE_NOC1_X 9
#define PCIE_NOC1_Y 8

// Address formats
#define NOC_XY_ENCODING(x, y) \
(((uint32_t)(y)) << ((NOC_ADDR_LOCAL_BITS % 32)+NOC_ADDR_NODE_ID_BITS)) | \
(((uint32_t)(x)) << (NOC_ADDR_LOCAL_BITS % 32)) | ((x == PCIE_NOC_X and y == PCIE_NOC_Y) * 0x8) \
(((uint32_t)(x)) << (NOC_ADDR_LOCAL_BITS % 32)) \

// Address formats
#define NOC_XY_PCIE_ENCODING(x, y, noc_index) \
NOC_XY_ENCODING(x, y) | \
((noc_index ? (x == PCIE_NOC1_X and y == PCIE_NOC1_Y) : (x == PCIE_NOC_X and y == PCIE_NOC_Y)) * 0x8) \

#define NOC_MULTICAST_ENCODING(x_start, y_start, x_end, y_end) \
(((uint32_t)(x_start)) << ((NOC_ADDR_LOCAL_BITS % 32)+2*NOC_ADDR_NODE_ID_BITS)) | \
(((uint32_t)(x_start)) << ((NOC_ADDR_LOCAL_BITS % 32)+2*NOC_ADDR_NODE_ID_BITS)) | \
(((uint32_t)(y_start)) << ((NOC_ADDR_LOCAL_BITS % 32)+3*NOC_ADDR_NODE_ID_BITS)) | \
(((uint32_t)(x_end)) << (NOC_ADDR_LOCAL_BITS % 32)) | \
(((uint32_t)(y_end)) << ((NOC_ADDR_LOCAL_BITS % 32)+NOC_ADDR_NODE_ID_BITS)) \
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ constexpr uint32_t is_h_variant = get_compile_time_arg_val(16);
constexpr uint32_t upstream_noc_xy = uint32_t(NOC_XY_ENCODING(UPSTREAM_NOC_X, UPSTREAM_NOC_Y));
constexpr uint32_t downstream_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_NOC_X, DOWNSTREAM_NOC_Y));
constexpr uint32_t my_noc_xy = uint32_t(NOC_XY_ENCODING(MY_NOC_X, MY_NOC_Y));
constexpr uint32_t pcie_noc_xy = uint32_t(NOC_XY_ENCODING(NOC_0_X(static_cast<uint8_t>(NOC_INDEX), noc_size_x, PCIE_NOC_X), NOC_0_Y(static_cast<uint8_t>(NOC_INDEX), noc_size_y, PCIE_NOC_Y)));
constexpr uint32_t pcie_noc_xy = uint32_t(NOC_XY_PCIE_ENCODING(NOC_0_X(static_cast<uint8_t>(NOC_INDEX), noc_size_x, PCIE_NOC_X), NOC_0_Y(static_cast<uint8_t>(NOC_INDEX), noc_size_y, PCIE_NOC_Y), NOC_INDEX));
constexpr uint32_t dispatch_cb_page_size = 1 << dispatch_cb_log_page_size;

constexpr uint32_t completion_queue_end_addr = completion_queue_base_addr + completion_queue_size;
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ constexpr uint32_t is_h_variant = get_compile_time_arg_val(22);
constexpr uint32_t my_noc_xy = uint32_t(NOC_XY_ENCODING(MY_NOC_X, MY_NOC_Y));
constexpr uint32_t upstream_noc_xy = uint32_t(NOC_XY_ENCODING(UPSTREAM_NOC_X, UPSTREAM_NOC_Y));
constexpr uint32_t downstream_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_NOC_X, DOWNSTREAM_NOC_Y));
constexpr uint32_t pcie_noc_xy = uint32_t(NOC_XY_ENCODING(NOC_0_X(static_cast<uint8_t>(NOC_INDEX), noc_size_x, PCIE_NOC_X), NOC_0_Y(static_cast<uint8_t>(NOC_INDEX), noc_size_y, PCIE_NOC_Y)));
constexpr uint32_t pcie_noc_xy = uint32_t(NOC_XY_PCIE_ENCODING(NOC_0_X(static_cast<uint8_t>(NOC_INDEX), noc_size_x, PCIE_NOC_X), NOC_0_Y(static_cast<uint8_t>(NOC_INDEX), noc_size_y, PCIE_NOC_Y), NOC_INDEX));
constexpr uint32_t downstream_cb_page_size = 1 << downstream_cb_log_page_size;
constexpr uint32_t downstream_cb_end = downstream_cb_base + (1 << downstream_cb_log_page_size) * downstream_cb_pages;
constexpr uint32_t prefetch_q_end = prefetch_q_base + prefetch_q_size;
Expand Down
Loading

0 comments on commit d771a74

Please sign in to comment.