Skip to content

Commit

Permalink
#8223: Clean up magic numbers and fix first time stamp calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
mo-tenstorrent committed May 28, 2024
1 parent 9c5fe88 commit 49ec429
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 24 deletions.
4 changes: 1 addition & 3 deletions tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,7 @@ uint32_t process_relay_linear_cmd(uint32_t cmd_ptr,

uint32_t process_stall(uint32_t cmd_ptr) {

//DeviceZoneScopedN("PROCESS-STALL");
static uint32_t count = 0;

count++;
Expand Down Expand Up @@ -776,7 +777,6 @@ uint32_t process_exec_buf_cmd(uint32_t cmd_ptr_outer,

bool done = false;
while (!done) {
//DeviceZoneScopedMainN("PROC-MAIN");
uint32_t cmd_ptr = cmddat_q_base;

paged_read_into_cmddat_q(cmd_ptr);
Expand Down Expand Up @@ -1012,7 +1012,6 @@ void kernel_main_h() {

bool done = false;
while (!done) {
//DeviceZoneScopedMainN("KERNEL-MAIN-H");
fetch_q_get_cmds<sizeof(CQPrefetchHToPrefetchDHeader)>(fence, cmd_ptr, pcie_read_ptr);

volatile CQPrefetchCmd tt_l1_ptr *cmd = (volatile CQPrefetchCmd tt_l1_ptr *)(cmd_ptr + sizeof(CQPrefetchHToPrefetchDHeader));
Expand Down Expand Up @@ -1051,7 +1050,6 @@ void kernel_main_d() {

bool done = false;
while (!done) {
//DeviceZoneScopedMainN("KERNEL-MAIN-D");
// cmds come in packed batches based on HostQ reads in prefetch_h
// once a packed batch ends, we need to jump to the next page
uint32_t length = relay_cb_get_cmds(fence, cmd_ptr);
Expand Down
8 changes: 5 additions & 3 deletions tt_metal/tools/profiler/kernel_profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ namespace kernel_profiler{
extern uint32_t sums[SUM_COUNT];
extern uint32_t sumIDs[SUM_COUNT];

constexpr uint32_t QUICK_PUSH_MARKER_COUNT = 2;

#if defined(COMPILE_FOR_BRISC)
constexpr uint32_t profilerBuffer = PROFILER_L1_BUFFER_BR;
constexpr uint32_t deviceBufferEndIndex = DEVICE_BUFFER_END_INDEX_BR;
Expand Down Expand Up @@ -398,7 +400,7 @@ namespace kernel_profiler{
uint64_t dram_bank_dst_noc_addr = s.get_noc_addr(core_flat_id / profiler_core_count_per_dram, dram_offset);

mark_end_at_index_inlined(wIndex, hash, get_end_timer_id(hash));
wIndex += 2 * PROFILER_L1_MARKER_UINT32_SIZE;
wIndex += QUICK_PUSH_MARKER_COUNT * PROFILER_L1_MARKER_UINT32_SIZE;

uint32_t currEndIndex = profiler_control_buffer[HOST_BUFFER_END_INDEX_NC] + wIndex;

Expand All @@ -425,7 +427,7 @@ namespace kernel_profiler{
bool start_marked = false;
inline __attribute__((always_inline)) profileScope ()
{
if (wIndex < (PROFILER_L1_VECTOR_SIZE - stackSize - (2 * PROFILER_L1_MARKER_UINT32_SIZE)))
if (wIndex < (PROFILER_L1_VECTOR_SIZE - stackSize - (QUICK_PUSH_MARKER_COUNT * PROFILER_L1_MARKER_UINT32_SIZE)))
{
stackSize += PROFILER_L1_MARKER_UINT32_SIZE;
start_marked = true;
Expand All @@ -443,7 +445,7 @@ namespace kernel_profiler{
start_marked = false;
stackSize -= PROFILER_L1_MARKER_UINT32_SIZE;
}
if (wIndex >= (PROFILER_L1_VECTOR_SIZE - (2 * PROFILER_L1_MARKER_UINT32_SIZE)))
if (wIndex >= (PROFILER_L1_VECTOR_SIZE - (QUICK_PUSH_MARKER_COUNT * PROFILER_L1_MARKER_UINT32_SIZE)))
{
quick_push();
}
Expand Down
50 changes: 32 additions & 18 deletions tt_metal/tools/profiler/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,24 +380,6 @@ void DeviceProfiler::dumpResults (

}

for (const auto &worker_core : worker_cores) {
std::pair<uint32_t, CoreCoord> device_core = {device_id, worker_core};
if (device_tracy_contexts.find(device_core) == device_tracy_contexts.end())
{
auto tracyCtx = TracyTTContext();
std::string tracyTTCtxName = fmt::format("Device: {}, Core ({},{})", device_id, worker_core.x, worker_core.y);
TracyTTContextPopulate(tracyCtx, smallest_timestamp, 1000.f / (float)device_core_frequency);
TracyTTContextName(tracyCtx, tracyTTCtxName.c_str(), tracyTTCtxName.size());

device_tracy_contexts.emplace(
device_core,
tracyCtx
);
}
}

//std::sort (device_events.begin(), device_events.end());

}
else
{
Expand All @@ -412,6 +394,38 @@ void DeviceProfiler::pushTracyDeviceResults()
ZoneScoped;
std::set<std::pair<uint32_t, CoreCoord>> device_cores_set;
std::vector<std::pair<uint32_t, CoreCoord>> device_cores;
for (auto& event: device_events)
{
std::pair<uint32_t, CoreCoord> device_core = {event.chip_id, (CoreCoord){event.core_x,event.core_y}};
auto ret = device_cores_set.insert(device_core);
if (ret.second )
{
device_cores.push_back(device_core);
}
}

for (auto& device_core: device_cores)
{
int device_id = device_core.first;
CoreCoord worker_core = device_core.second;


if (device_tracy_contexts.find(device_core) == device_tracy_contexts.end())
{
auto tracyCtx = TracyTTContext();
std::string tracyTTCtxName = fmt::format("Device: {}, Core ({},{})", device_id, worker_core.x, worker_core.y);

TracyTTContextPopulate(tracyCtx, smallest_timestamp, 1000.f / (float)device_core_frequency);

TracyTTContextName(tracyCtx, tracyTTCtxName.c_str(), tracyTTCtxName.size());

device_tracy_contexts.emplace(
device_core,
tracyCtx
);
}
}

for (auto& event: device_events)
{
std::pair<uint32_t, CoreCoord> device_core = {event.chip_id, (CoreCoord){event.core_x,event.core_y}};
Expand Down

0 comments on commit 49ec429

Please sign in to comment.