Skip to content

Commit

Permalink
#7844: Add ability to gather noc xfer data through dprint
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-dma committed May 15, 2024
1 parent 4d9ae2f commit c0b2608
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 69 deletions.
5 changes: 3 additions & 2 deletions tt_metal/hostdevcommon/dprint_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@
DPRINT_PREFIX(WAIT) \
DPRINT_PREFIX(BFLOAT16) \
DPRINT_PREFIX(SETPRECISION) \
DPRINT_PREFIX(FIXED) \
DPRINT_PREFIX(NOC_LOG_XFER) \
DPRINT_PREFIX(FIXED) \
DPRINT_PREFIX(DEFAULTFLOAT) \
DPRINT_PREFIX(HEX) \
DPRINT_PREFIX(HEX) \
DPRINT_PREFIX(OCT) \
DPRINT_PREFIX(DEC) \
DPRINT_PREFIX(TILESLICE) \
Expand Down
22 changes: 9 additions & 13 deletions tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,6 @@ void noc_async_read_one_packet_set_state(std::uint64_t src_noc_addr, std::uint32
DEBUG_STATUS("RPD");

DEBUG_STATUS("NARW");
DEBUG_SANITIZE_NOC_ADDR(src_noc_addr, size);

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_addr >> 32);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, size);
Expand All @@ -582,8 +581,8 @@ void noc_async_read_one_packet_with_state(std::uint32_t src_noc_addr, std::uint3

DEBUG_STATUS("NARW");

// TODO: need a way sanitize size + addr w/o directly providing x/y here (grab x/y form state?)
// DEBUG_SANITIZE_READ_NOC_TRANSACTION(src_noc_addr, dst_local_l1_addr, size);
// In order to sanitize, need to grab full noc addr + xfer size from state.
DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_index, src_noc_addr, dst_local_l1_addr);

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO, dst_local_l1_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO, src_noc_addr);
Expand All @@ -609,9 +608,6 @@ void noc_async_read_set_state(std::uint64_t src_noc_addr) {
while (!noc_cmd_buf_ready(noc_index, NCRISC_RD_CMD_BUF));
DEBUG_STATUS("RPD");

// TODO: need to sanitize in noc_async_read_with_state
// DEBUG_SANITIZE_NOC_ADDR(src_noc_addr, size);

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_noc_addr >> 32);

DEBUG_STATUS("NARD");
Expand All @@ -627,9 +623,8 @@ void noc_async_read_with_state(std::uint32_t src_noc_addr, std::uint32_t dst_loc
*/
DEBUG_STATUS("NARW");

// TODO: need a way sanitize size + addr w/o directly providing x/y here (grab x/y form state?)
// DEBUG_SANITIZE_NOC_ADDR(src_noc_addr, size);
DEBUG_SANITIZE_WORKER_ADDR(dst_local_l1_addr, size);
// In order to sanitize, need to grab full noc addr + xfer size from state.
DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_STATE(noc_index, src_noc_addr, dst_local_l1_addr, size);

while (size > NOC_MAX_BURST_SIZE) {
DEBUG_STATUS("RPW");
Expand Down Expand Up @@ -701,7 +696,6 @@ FORCE_INLINE
void noc_async_write_one_packet_set_state(std::uint64_t dst_noc_addr, std::uint32_t size) {

DEBUG_STATUS("NWPW");
DEBUG_SANITIZE_NOC_ADDR(dst_noc_addr, size);
while (!noc_cmd_buf_ready(noc_index, NCRISC_WR_CMD_BUF));
DEBUG_STATUS("NWPD");

Expand All @@ -722,11 +716,12 @@ FORCE_INLINE
void noc_async_write_one_packet_with_state(std::uint32_t src_local_l1_addr, std::uint32_t dst_noc_addr) {

DEBUG_STATUS("NWPW");
// TODO: need a way sanitize size + addr w/o directly providing x/y here (grab x/y form state?)
// DEBUG_SANITIZE_NOC_WRITE_TRANSACTION(dst_noc_addr, src_local_l1_addr, size);
while (!noc_cmd_buf_ready(noc_index, NCRISC_WR_CMD_BUF));
DEBUG_STATUS("NWPD");

// In order to sanitize, need to grab full noc addr + xfer size from state.
DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_index, dst_noc_addr, src_local_l1_addr);

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_LO, src_local_l1_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_LO, dst_noc_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ);
Expand Down Expand Up @@ -1004,7 +999,7 @@ struct InterleavedPow2AddrGenFast {
}

DEBUG_STATUS("NRPW");
DEBUG_SANITIZE_NOC_READ_TRANSACTION(get_noc_addr_helper(src_noc_xy, src_addr), dest_addr, log_base_2_of_page_size);
DEBUG_SANITIZE_NOC_READ_TRANSACTION(get_noc_addr_helper(src_noc_xy, src_addr), dest_addr, 1 << log_base_2_of_page_size);
while (!noc_cmd_buf_ready(noc_index, NCRISC_RD_CMD_BUF));
DEBUG_STATUS("NRPD");

Expand Down Expand Up @@ -1047,6 +1042,7 @@ struct InterleavedPow2AddrGenFast {
DEBUG_STATUS("RPW");
while (!noc_cmd_buf_ready(noc_index, NCRISC_RD_CMD_BUF));
DEBUG_STATUS("RPD");
DEBUG_SANITIZE_NOC_READ_TRANSACTION(get_noc_addr_helper(src_noc_xy, src_addr), dest_addr, size);

NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO, dest_addr);
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO, src_addr); // (uint32_t)src_addr
Expand Down
2 changes: 0 additions & 2 deletions tt_metal/hw/inc/dataflow_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ void noc_fast_read_wait_ready() {

FORCE_INLINE
void noc_fast_read_set_src_xy(uint64_t src_addr) {
DEBUG_SANITIZE_NOC_ADDR(src_addr, NOC_CMD_BUF_READ_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE));
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID, src_addr >> 32);
}

Expand Down Expand Up @@ -64,7 +63,6 @@ void noc_fast_write_set_cmd_field(uint32_t vc, bool mcast, bool linked) {

FORCE_INLINE
void noc_fast_write_set_dst_xy(uint64_t dest_addr) {
DEBUG_SANITIZE_NOC_ADDR(dest_addr, NOC_CMD_BUF_READ_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE));
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_RET_ADDR_MID, dest_addr >> 32);
}

Expand Down
10 changes: 6 additions & 4 deletions tt_metal/hw/inc/debug/dprint.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@
#include "hostdevcommon/common_runtime_address_map.h"

#include "dprint_buffer.h"
#if defined(COMPILE_FOR_ERISC)
#include "ethernet/tunneling.h"
#endif

#include "status.h"

#define DPRINT DebugPrinter()
Expand Down Expand Up @@ -83,6 +79,7 @@ struct HEX { char tmp; } ATTR_PACK; // Analog of cout << std::hex
struct OCT { char tmp; } ATTR_PACK; // Analog of cout << std::oct
struct DEC { char tmp; } ATTR_PACK; // Analog of cout << std::dec
struct SETW { char w; SETW(char w) : w(w) {} } ATTR_PACK; // Analog of cout << std::setw()
struct NOC_LOG_XFER { uint32_t size; NOC_LOG_XFER(uint32_t sz) : size(sz) {} } ATTR_PACK; // For tracking noc transactions.
struct U32_ARRAY {
uint32_t* ptr; uint32_t len;
U32_ARRAY(uint32_t* ptr, uint32_t len) : ptr(ptr), len(len) {}
Expand Down Expand Up @@ -142,6 +139,7 @@ template<> uint8_t DebugPrintTypeToId<RAISE>() { return DPrintRAISE; }
template<> uint8_t DebugPrintTypeToId<WAIT>() { return DPrintWAIT; }
template<> uint8_t DebugPrintTypeToId<BF16>() { return DPrintBFLOAT16; }
template<> uint8_t DebugPrintTypeToId<SETPRECISION>() { return DPrintSETPRECISION; }
template<> uint8_t DebugPrintTypeToId<NOC_LOG_XFER>() { return DPrintNOC_LOG_XFER; }
template<> uint8_t DebugPrintTypeToId<FIXED>() { return DPrintFIXED; }
template<> uint8_t DebugPrintTypeToId<DEFAULTFLOAT>() { return DPrintDEFAULTFLOAT; }
template<> uint8_t DebugPrintTypeToId<HEX>() { return DPrintHEX; }
Expand Down Expand Up @@ -213,6 +211,9 @@ void debug_print(DebugPrinter &dp, DebugPrintData data) {
#if defined(COMPILE_FOR_ERISC)
internal_::risc_context_switch();
#endif
// If we've closed the device, we've now disabled printing on it, don't hang.
if (*dp.wpos() == DEBUG_PRINT_SERVER_DISABLED_MAGIC)
return;
; // wait for host to catch up to wpos with it's rpos
}
DEBUG_STATUS("DPD");
Expand Down Expand Up @@ -293,6 +294,7 @@ template DebugPrinter operator<< <HEX>(DebugPrinter, HEX val);
template DebugPrinter operator<< <OCT>(DebugPrinter, OCT val);
template DebugPrinter operator<< <DEC>(DebugPrinter, DEC val);
template DebugPrinter operator<< <SETPRECISION>(DebugPrinter, SETPRECISION val);
template DebugPrinter operator<< <NOC_LOG_XFER>(DebugPrinter, NOC_LOG_XFER val);
template DebugPrinter operator<< <BF16>(DebugPrinter, BF16 val);
template DebugPrinter operator<< <F32>(DebugPrinter, F32 val);
template DebugPrinter operator<< <U32>(DebugPrinter, U32 val);
Expand Down
64 changes: 45 additions & 19 deletions tt_metal/hw/inc/debug/sanitize_noc.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,20 @@
//
#pragma once

#include "dprint.h"

// Add the ability to skip NOC logging, we can't have the tunneling cores stalling waiting for the
// print server.
#if !defined(SKIP_NOC_LOGGING)
#define LOG_LEN(l) DPRINT << NOC_LOG_XFER(l)
#define LOG_READ_LEN_FROM_STATE() LOG_LEN(NOC_CMD_BUF_READ_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE))
#define LOG_WRITE_LEN_FROM_STATE() LOG_LEN(NOC_CMD_BUF_READ_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE))
#else
#define LOG_LEN(l)
#define LOG_READ_LEN_FROM_STATE()
#define LOG_WRITE_LEN_FROM_STATE()
#endif

#if (defined(COMPILE_FOR_BRISC) || defined(COMPILE_FOR_NCRISC) || defined(COMPILE_FOR_ERISC) || defined(COMPILE_FOR_IDLE_ERISC)) && \
(defined(WATCHER_ENABLED)) && \
(!defined(WATCHER_DISABLE_NOC_SANITIZE))
Expand All @@ -27,6 +41,7 @@ extern uint8_t noc_index;
#include "noc_parameters.h"
#include "noc_overlay_parameters.h"


// A couple defines for specifying read/write and multi/unicast
#define DEBUG_SANITIZE_NOC_READ true
#define DEBUG_SANITIZE_NOC_WRITE false
Expand Down Expand Up @@ -199,33 +214,44 @@ void debug_sanitize_noc_and_worker_addr(
}
}

// TODO: should be able clean up uses of the first three macros and remove them.
#define DEBUG_SANITIZE_WORKER_ADDR(a, l) \
debug_sanitize_worker_addr(a, l)
// TODO: Clean these up with #7453
#define DEBUG_SANITIZE_NOC_ADDR(a, l) \
debug_sanitize_noc_addr(a, 0, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ)
#define DEBUG_SANITIZE_NOC_MULTI_ADDR(a, l) \
debug_sanitize_noc_addr(a, 0, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_READ)
debug_sanitize_noc_addr(a, 0, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ); LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_TRANSACTION(noc_a, worker_a, l, multicast, dir) \
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, multicast, dir)
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, multicast, dir); LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_READ_TRANSACTION(noc_a, worker_a, l) \
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ)
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_READ); LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_MULTI_READ_TRANSACTION(noc_a, worker_a, l) \
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_READ)
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_READ); LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION(noc_a, worker_a, l) \
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_WRITE)
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_UNICAST, DEBUG_SANITIZE_NOC_WRITE); LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_MULTI_WRITE_TRANSACTION(noc_a, worker_a, l) \
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_WRITE)
debug_sanitize_noc_and_worker_addr(noc_a, worker_a, l, DEBUG_SANITIZE_NOC_MULTICAST, DEBUG_SANITIZE_NOC_WRITE); LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \
DEBUG_SANITIZE_NOC_READ_TRANSACTION( \
((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \
worker_a, \
NOC_CMD_BUF_READ_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE));
#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_STATE(noc_id, noc_a_lower, worker_a, l) \
DEBUG_SANITIZE_NOC_READ_TRANSACTION( \
((uint64_t) NOC_CMD_BUF_READ_REG(noc_id, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \
worker_a, l);
#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) \
DEBUG_SANITIZE_NOC_WRITE_TRANSACTION( \
((uint64_t) NOC_CMD_BUF_READ_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_TARG_ADDR_MID) << 32) | noc_a_lower, \
worker_a, \
NOC_CMD_BUF_READ_REG(noc_index, NCRISC_WR_CMD_BUF, NOC_AT_LEN_BE));

#else // !WATCHER_ENABLED

#define DEBUG_SANITIZE_WORKER_ADDR(a, l)
#define DEBUG_SANITIZE_NOC_ADDR(a, l)
#define DEBUG_SANITIZE_NOC_MULTI_ADDR(a, l)
#define DEBUG_SANITIZE_NOC_TRANSACTION(noc_a, worker_a, l, multicast, dir)
#define DEBUG_SANITIZE_NOC_READ_TRANSACTION(noc_a, worker_a, l)
#define DEBUG_SANITIZE_NOC_MULTI_READ_TRANSACTION(noc_a, worker_a, l)
#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION(noc_a, worker_a, l)
#define DEBUG_SANITIZE_NOC_MULTI_WRITE_TRANSACTION(noc_a, worker_a, l)
#define DEBUG_SANITIZE_NOC_ADDR(a, l) LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_TRANSACTION(noc_a, worker_a, l, multicast, dir) LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_READ_TRANSACTION(noc_a, worker_a, l) LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_MULTI_READ_TRANSACTION(noc_a, worker_a, l) LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION(noc_a, worker_a, l) LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_MULTI_WRITE_TRANSACTION(noc_a, worker_a, l) LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) LOG_READ_LEN_FROM_STATE()
#define DEBUG_SANITIZE_NOC_READ_TRANSACTION_WITH_ADDR_STATE(noc_id, noc_a_lower, worker_a, l) LOG_LEN(l)
#define DEBUG_SANITIZE_NOC_WRITE_TRANSACTION_WITH_ADDR_AND_SIZE_STATE(noc_id, noc_a_lower, worker_a) LOG_WRITE_LEN_FROM_STATE()

#endif // WATCHER_ENABLED && not TRISC
70 changes: 51 additions & 19 deletions tt_metal/impl/debug/dprint_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,30 @@ static inline int GetNumRiscs(int chip_id, const CoreCoord &core) {
return (tt::llrt::is_ethernet_core(core, chip_id))? DPRINT_NRISCVS_ETH : DPRINT_NRISCVS;
}

// Helper function to get all physical printable cores on a device
static map<CoreType, set<CoreCoord>> get_all_physical_printable_cores(Device *device) {
map<CoreType, set<CoreCoord>> all_physical_printable_cores;
// The set of all printable cores is Tensix + Eth cores
CoreCoord logical_grid_size = device->logical_grid_size();
for (uint32_t x = 0; x < logical_grid_size.x; x++) {
for (uint32_t y = 0; y < logical_grid_size.y; y++) {
CoreCoord logical_coord(x, y);
CoreCoord worker_core = device->worker_core_from_logical_core(logical_coord);
all_physical_printable_cores[CoreType::WORKER].insert(worker_core);
}
}
for (const auto& eth_core : device->get_active_ethernet_cores()) {
CoreCoord physical_core = device->ethernet_core_from_logical_core(eth_core);
all_physical_printable_cores[CoreType::ETH].insert(physical_core);
}
for (const auto& eth_core : device->get_inactive_ethernet_cores()) {
CoreCoord physical_core = device->ethernet_core_from_logical_core(eth_core);
all_physical_printable_cores[CoreType::ETH].insert(physical_core);
}

return all_physical_printable_cores;
}

// A null stream for when the print server is muted.
class NullBuffer : public std::streambuf {
public:
Expand Down Expand Up @@ -131,6 +155,8 @@ struct DebugPrintServerContext {

std::ofstream* outfile_ = nullptr; // non-cout
std::ostream* stream_ = nullptr; // either == outfile_ or is &cout
std::ofstream* noc_log_ = nullptr;
std::map<uint32_t, uint32_t> noc_xfer_counts;

// A map to from {device id, core coord x, y, hart index} to the signal code it's waiting for.
std::map<tuple<uint32_t, uint32_t, uint32_t, uint32_t>, uint32_t> hart_waiting_on_signal_;
Expand Down Expand Up @@ -297,7 +323,8 @@ void WriteInitMagic(Device *device, const CoreCoord& core, int hart_id, bool ena

// TODO(AP): this could use a cleanup - need a different mechanism to know if a kernel is running on device.
// Force wait for first kernel launch by first writing a non-zero and waiting for a zero.
vector<uint32_t> initbuf = { uint32_t(enabled ? DEBUG_PRINT_SERVER_STARTING_MAGIC : DEBUG_PRINT_SERVER_DISABLED_MAGIC) };
vector<uint32_t> initbuf = vector<uint32_t>(PRINT_BUFFER_SIZE / sizeof(uint32_t), 0);
initbuf[0] = uint32_t(enabled ? DEBUG_PRINT_SERVER_STARTING_MAGIC : DEBUG_PRINT_SERVER_DISABLED_MAGIC);
tt::llrt::write_hex_vec_to_core(device->id(), core, initbuf, base_addr);
} // WriteInitMagic

Expand Down Expand Up @@ -327,6 +354,7 @@ DebugPrintServerContext::DebugPrintServerContext() {
outfile_ = new std::ofstream(file_name);
}
stream_ = outfile_ ? outfile_ : &cout;
noc_log_ = new std::ofstream("noc_log.csv");

stop_print_server_ = false;
mute_print_server_ = false;
Expand Down Expand Up @@ -355,6 +383,10 @@ DebugPrintServerContext::~DebugPrintServerContext() {
outfile_->close();
delete outfile_;
}
for (auto &size_and_count : noc_xfer_counts)
*noc_log_ << size_and_count.first << "," << size_and_count.second << "\n";
noc_log_->close();
delete noc_log_;
inst = nullptr;
} // ~DebugPrintServerContext

Expand Down Expand Up @@ -384,24 +416,7 @@ void DebugPrintServerContext::AttachDevice(Device* device) {

// A set of all valid printable cores, used for checking the user input. Note that the coords
// here are physical.
map<CoreType, set<CoreCoord>> all_physical_printable_cores;
// The set of all printable cores is Tensix + Eth cores
CoreCoord logical_grid_size = device->logical_grid_size();
for (uint32_t x = 0; x < logical_grid_size.x; x++) {
for (uint32_t y = 0; y < logical_grid_size.y; y++) {
CoreCoord logical_coord(x, y);
CoreCoord worker_core = device->worker_core_from_logical_core(logical_coord);
all_physical_printable_cores[CoreType::WORKER].insert(worker_core);
}
}
for (const auto& eth_core : device->get_active_ethernet_cores()) {
CoreCoord physical_core = device->ethernet_core_from_logical_core(eth_core);
all_physical_printable_cores[CoreType::ETH].insert(physical_core);
}
for (const auto& eth_core : device->get_inactive_ethernet_cores()) {
CoreCoord physical_core = device->ethernet_core_from_logical_core(eth_core);
all_physical_printable_cores[CoreType::ETH].insert(physical_core);
}
map<CoreType, set<CoreCoord>> all_physical_printable_cores = get_all_physical_printable_cores(device);

// Initialize all print buffers on all cores on the device to have print disabled magic. We
// will then write print enabled magic for only the cores the user has specified to monitor.
Expand Down Expand Up @@ -554,6 +569,7 @@ void DebugPrintServerContext::DetachDevice(Device* device) {
uint32_t wpos = from_dev[0], rpos = from_dev[1];
if (rpos < wpos) {
outstanding_prints = true;
log_warning("Phys core {} is still waiting!", core.str());
break;
}
}
Expand All @@ -570,6 +586,17 @@ void DebugPrintServerContext::DetachDevice(Device* device) {
device_to_core_range_.erase(device);
device_to_core_range_lock_.unlock();
log_info(tt::LogMetal, "DPRINT Server dettached device {}", device->id());

// When detaching a device, disable prints on it.
map<CoreType, set<CoreCoord>> all_physical_printable_cores = get_all_physical_printable_cores(device);
for (auto &type_and_cores : all_physical_printable_cores) {
for (auto &core : type_and_cores.second) {
int hart_count = GetNumRiscs(device->id(), core);
for (int hart_index = 0; hart_index < hart_count; hart_index++) {
WriteInitMagic(device, core, hart_index, false);
}
}
}
} // DetachDevice

void DebugPrintServerContext::ClearLogFile() {
Expand Down Expand Up @@ -709,6 +736,11 @@ bool DebugPrintServerContext::PeekOneHartNonBlocking(
stream << std::setprecision(*ptr);
TT_ASSERT(sz == 1);
break;
case DPrintNOC_LOG_XFER:
if (tt::llrt::OptionsG.get_dprint_noc_transfers())
noc_xfer_counts[*reinterpret_cast<uint32_t*>(ptr)]++;
TT_ASSERT(sz == 4);
break;
case DPrintFIXED:
stream << std::fixed;
TT_ASSERT(sz == 1);
Expand Down
Loading

0 comments on commit c0b2608

Please sign in to comment.