diff --git a/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h b/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h index 245d2520510..bfe1a835fa3 100644 --- a/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h +++ b/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h @@ -273,7 +273,6 @@ enum class module_type { // Used by by IPU profiling/debug on Windows typedef struct { uint64_t perf_address; - uint32_t perf_value; } profile_data_t; typedef struct { diff --git a/src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_plugin.cpp b/src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_plugin.cpp index 28cec0a0de1..6b127648291 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_plugin.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_plugin.cpp @@ -165,7 +165,7 @@ namespace xdp { msg << "AIE Debug monitoring AIE tile (" << tile.col << "," << tile.row << ") in module " << module << "."; xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - op_profile_data.emplace_back(profile_data_t{Regs[i] + (tile.col << 25) + (tile.row << 20), 0}); + op_profile_data.emplace_back(profile_data_t{Regs[i] + (tile.col << 25) + (tile.row << 20)}); counterId++; } } @@ -251,10 +251,10 @@ namespace xdp { auto read_op_code_ = XAie_RequestCustomTxnOp(&aieDevInst); try { - mKernel = xrt::kernel(context, "DPU_1x4_NEW"); + mKernel = xrt::kernel(context, "XDP_KERNEL"); } catch (std::exception &e){ std::stringstream msg; - msg << "Unable to find DPU kernel from hardware context. Not configuring AIE Profile. " << e.what() ; + msg << "Unable to find XDP_KERNEL kernel from hardware context. Not configuring AIE Debug. " << e.what() ; xrt_core::message::send(severity_level::warning, "XRT", msg.str()); return; } @@ -270,7 +270,7 @@ namespace xdp { instr_bo = xrt::bo(context.get_device(), instr_buf.ibuf_.size(), XCL_BO_FLAGS_CACHEABLE, mKernel.group_id(1)); } catch (std::exception &e){ std::stringstream msg; - msg << "Unable to create the instruction buffer for polling during AIE Profile. " << e.what() << std::endl; + msg << "Unable to create the instruction buffer for polling during AIE Debug. " << e.what() << std::endl; xrt_core::message::send(severity_level::warning, "XRT", msg.str()); return; } @@ -284,21 +284,34 @@ namespace xdp { std::stringstream msg; msg << "Unable to successfully execute AIE Profile polling kernel. " << e.what() << std::endl; xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + return; } XAie_ClearTransaction(&aieDevInst); - auto instrbo_map = instr_bo.map(); - instr_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - // TODO: figure out where the 8 comes from - instrbo_map += sizeof(XAie_TxnHeader) + sizeof(XAie_CustomOpHdr) + 8; - auto output = reinterpret_cast(instrbo_map); + static constexpr uint32_t size_4K = 0x1000; + static constexpr uint32_t offset_3K = 0x0C00; - for (uint32_t i = 0; i < output->count; i++) { + // results BO syncs AIE Debug result from device + xrt::bo result_bo; + try { + result_bo = xrt::bo(context.get_device(), size_4K, XCL_BO_FLAGS_CACHEABLE, mKernel.group_id(1)); + } catch (std::exception &e) { std::stringstream msg; - msg << "Debug Register address/values: 0x" << std::hex << output->profile_data[i].perf_address << ": " << std::dec << output->profile_data[i].perf_value; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + msg << "Unable to create result buffer for AIE Debug. Cannot get AIE Debug Info." << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); + return; + } + + auto result_bo_map = result_bo.map(); + result_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + uint32_t* output = reinterpret_cast(result_bo_map+offset_3K); + + for (uint32_t i = 0; i < op->count; i++) { + std::stringstream msg; + msg << "Debug Register address/values: 0x" << std::hex << op->profile_data[i].perf_address << ": " << std::dec << output[i]; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); } free(op); @@ -310,3 +323,4 @@ namespace xdp { } } // end namespace xdp + diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/win/aie_profile.cpp b/src/runtime_src/xdp/profile/plugin/aie_profile/win/aie_profile.cpp index d733a98e586..8983c326e69 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/win/aie_profile.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/win/aie_profile.cpp @@ -284,7 +284,7 @@ namespace xdp { std::vector Regs = regValues[type]; // 25 is column offset and 20 is row offset for IPU - op_profile_data.emplace_back(profile_data_t{Regs[i] + (col << 25) + (row << 20), 0}); + op_profile_data.emplace_back(profile_data_t{Regs[i] + (col << 25) + (row << 20)}); std::vector values; values.insert(values.end(), {col, row, phyStartEvent, phyEndEvent, resetEvent, 0, 1000, payload}); @@ -314,10 +314,10 @@ namespace xdp { auto context = metadata->getHwContext(); try { - mKernel = xrt::kernel(context, "DPU_PROFILE"); + mKernel = xrt::kernel(context, "XDP_KERNEL"); } catch (std::exception &e){ std::stringstream msg; - msg << "Unable to find DPU_PROFILE kernel from hardware context. Failed to configure AIE Profile." << e.what() ; + msg << "Unable to find XDP_KERNEL kernel from hardware context. Failed to configure AIE Profile." << e.what() ; xrt_core::message::send(severity_level::warning, "XRT", msg.str()); return false; } @@ -518,23 +518,36 @@ namespace xdp { std::stringstream msg; msg << "Unable to successfully execute AIE Profile polling kernel. " << e.what() << std::endl; xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + return; } XAie_ClearTransaction(&aieDevInst); - auto instrbo_map = instr_bo.map(); - instr_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); - - // TODO: figure out where the 8 comes from - instrbo_map += sizeof(XAie_TxnHeader) + sizeof(XAie_CustomOpHdr) + 8; - auto output = reinterpret_cast(instrbo_map); + static constexpr uint32_t size_4K = 0x1000; + static constexpr uint32_t offset_3K = 0x0C00; - for (uint32_t i = 0; i < output->count; i++) { + // results BO syncs profile result from device + xrt::bo result_bo; + try { + result_bo = xrt::bo(context.get_device(), size_4K, XCL_BO_FLAGS_CACHEABLE, mKernel.group_id(1)); + } catch (std::exception &e) { std::stringstream msg; - msg << "Counter address/values: 0x" << std::hex << output->profile_data[i].perf_address << ": " << std::dec << output->profile_data[i].perf_value; - xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + msg << "Unable to create result buffer for AIE Profle. Cannot get AIE Profile Info." << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); + return; + } + + auto result_bo_map = result_bo.map(); + result_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + uint32_t* output = reinterpret_cast(result_bo_map+offset_3K); + + for (uint32_t i = 0; i < op->count; i++) { + std::stringstream msg; + msg << "Counter address/values: 0x" << std::hex << op->profile_data[i].perf_address << ": " << std::dec << output[i]; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); std::vector values = outputValues[i]; - values[5] = static_cast(output->profile_data[i].perf_value); //write pc value + values[5] = static_cast(output[i]); //write pc value db->getDynamicInfo().addAIESample(index, timestamp, values); } diff --git a/src/runtime_src/xdp/profile/plugin/ml_timeline/clientDev/ml_timeline.cpp b/src/runtime_src/xdp/profile/plugin/ml_timeline/clientDev/ml_timeline.cpp index e0e50dca122..808cecbf3f2 100644 --- a/src/runtime_src/xdp/profile/plugin/ml_timeline/clientDev/ml_timeline.cpp +++ b/src/runtime_src/xdp/profile/plugin/ml_timeline/clientDev/ml_timeline.cpp @@ -79,13 +79,12 @@ namespace xdp { return; } - static const uint32_t SIZE_4K = 0x1000; - static const uint32_t MAX_INDEX_IN_SIZE_3K = (0xC00 / (2 * sizeof(uint32_t))) - 1; + static constexpr uint32_t size_4K = 0x1000; // Read Record Timer TS buffer xrt::bo resultBO; try { - resultBO = xrt::bo(hwContext.get_device(), SIZE_4K, XCL_BO_FLAGS_CACHEABLE, instKernel.group_id(1)); + resultBO = xrt::bo(hwContext.get_device(), size_4K, XCL_BO_FLAGS_CACHEABLE, instKernel.group_id(1)); } catch (std::exception& e) { std::stringstream msg; @@ -118,14 +117,27 @@ namespace xdp { // Record Timer TS in JSON // Assuming correct Stub has been called and Write Buffer contains valid data - while (*ptr < MAX_INDEX_IN_SIZE_3K) { - boost::property_tree::ptree ptIdTS; - ptIdTS.put("id", *ptr); - ptr++; - ptIdTS.put("cycle", *ptr); - ptr++; - ptRecordTimerTS.push_back(std::make_pair("", ptIdTS)); + uint32_t numEntries = *ptr; // First 32bits contains the total num of entries + + /* Each record timer entry has 32bit ID and 32bit AIE Timer low value. + * Also, the first 32 bit in the buffer is used to store total number + * of record timer entries written so far. So, max_count_in_size_3K is 1 less + * than total number of entries possible in 3K buffer section. + */ + static constexpr uint32_t max_count_in_size_3K = (0x0C00 / (2 * sizeof(uint32_t))) - 1; + + if (numEntries <= max_count_in_size_3K) { + ptr++; + for (uint32_t i = 0 ; i < numEntries; i++) { + boost::property_tree::ptree ptIdTS; + ptIdTS.put("id", *ptr); + ptr++; + ptIdTS.put("cycle", *ptr); + ptr++; + + ptRecordTimerTS.push_back(std::make_pair("", ptIdTS)); + } } if (ptRecordTimerTS.empty()) {