Skip to content

Commit

Permalink
Use write buffer for AIE Profile, AIE Debug Plugins in Client Device (#…
Browse files Browse the repository at this point in the history
…7757)

* Value field is not needed anymore as only the result will be stored in special 4K Write Buffer

Signed-off-by: IshitaGhosh <[email protected]>

* Use special 4K write buffer on Client device to sync back AIE Profile result

Signed-off-by: IshitaGhosh <[email protected]>

* Use special 4K write buffer on Client device to sync back AIE Debug result

Signed-off-by: IshitaGhosh <[email protected]>

* Use constexpr

Signed-off-by: IshitaGhosh <[email protected]>

* Fix compile error

Signed-off-by: IshitaGhosh <[email protected]>

* Fix compile error

Signed-off-by: IshitaGhosh <[email protected]>

* Fix compile error

Signed-off-by: IshitaGhosh <[email protected]>

* Update kernel name and messages

Signed-off-by: IshitaGhosh <[email protected]>

* Result buffer for Record Timer now contains total number of entries in the first 32 bits and then record timer data.

Signed-off-by: IshitaGhosh <[email protected]>

* Update kernel name in AIE Profile

Signed-off-by: IshitaGhosh <[email protected]>

* Update constant names

Signed-off-by: IshitaGhosh <[email protected]>

---------

Signed-off-by: IshitaGhosh <[email protected]>
  • Loading branch information
IshitaGhosh authored Oct 20, 2023
1 parent a079821 commit a839622
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ enum class module_type {
// Used by by IPU profiling/debug on Windows
typedef struct {
uint64_t perf_address;
uint32_t perf_value;
} profile_data_t;

typedef struct {
Expand Down
40 changes: 27 additions & 13 deletions src/runtime_src/xdp/profile/plugin/aie_debug/aie_debug_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ namespace xdp {
msg << "AIE Debug monitoring AIE tile (" << tile.col << ","
<< tile.row << ") in module " << module << ".";
xrt_core::message::send(severity_level::debug, "XRT", msg.str());
op_profile_data.emplace_back(profile_data_t{Regs[i] + (tile.col << 25) + (tile.row << 20), 0});
op_profile_data.emplace_back(profile_data_t{Regs[i] + (tile.col << 25) + (tile.row << 20)});
counterId++;
}
}
Expand Down Expand Up @@ -251,10 +251,10 @@ namespace xdp {
auto read_op_code_ = XAie_RequestCustomTxnOp(&aieDevInst);

try {
mKernel = xrt::kernel(context, "DPU_1x4_NEW");
mKernel = xrt::kernel(context, "XDP_KERNEL");
} catch (std::exception &e){
std::stringstream msg;
msg << "Unable to find DPU kernel from hardware context. Not configuring AIE Profile. " << e.what() ;
msg << "Unable to find XDP_KERNEL kernel from hardware context. Not configuring AIE Debug. " << e.what() ;
xrt_core::message::send(severity_level::warning, "XRT", msg.str());
return;
}
Expand All @@ -270,7 +270,7 @@ namespace xdp {
instr_bo = xrt::bo(context.get_device(), instr_buf.ibuf_.size(), XCL_BO_FLAGS_CACHEABLE, mKernel.group_id(1));
} catch (std::exception &e){
std::stringstream msg;
msg << "Unable to create the instruction buffer for polling during AIE Profile. " << e.what() << std::endl;
msg << "Unable to create the instruction buffer for polling during AIE Debug. " << e.what() << std::endl;
xrt_core::message::send(severity_level::warning, "XRT", msg.str());
return;
}
Expand All @@ -284,21 +284,34 @@ namespace xdp {
std::stringstream msg;
msg << "Unable to successfully execute AIE Profile polling kernel. " << e.what() << std::endl;
xrt_core::message::send(severity_level::warning, "XRT", msg.str());
return;
}

XAie_ClearTransaction(&aieDevInst);

auto instrbo_map = instr_bo.map<uint8_t*>();
instr_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

// TODO: figure out where the 8 comes from
instrbo_map += sizeof(XAie_TxnHeader) + sizeof(XAie_CustomOpHdr) + 8;
auto output = reinterpret_cast<aie_profile_op_t*>(instrbo_map);
static constexpr uint32_t size_4K = 0x1000;
static constexpr uint32_t offset_3K = 0x0C00;

for (uint32_t i = 0; i < output->count; i++) {
// results BO syncs AIE Debug result from device
xrt::bo result_bo;
try {
result_bo = xrt::bo(context.get_device(), size_4K, XCL_BO_FLAGS_CACHEABLE, mKernel.group_id(1));
} catch (std::exception &e) {
std::stringstream msg;
msg << "Debug Register address/values: 0x" << std::hex << output->profile_data[i].perf_address << ": " << std::dec << output->profile_data[i].perf_value;
xrt_core::message::send(severity_level::debug, "XRT", msg.str());
msg << "Unable to create result buffer for AIE Debug. Cannot get AIE Debug Info." << e.what() << std::endl;
xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str());
return;
}

auto result_bo_map = result_bo.map<uint8_t*>();
result_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

uint32_t* output = reinterpret_cast<uint32_t*>(result_bo_map+offset_3K);

for (uint32_t i = 0; i < op->count; i++) {
std::stringstream msg;
msg << "Debug Register address/values: 0x" << std::hex << op->profile_data[i].perf_address << ": " << std::dec << output[i];
xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str());
}

free(op);
Expand All @@ -310,3 +323,4 @@ namespace xdp {
}

} // end namespace xdp

39 changes: 26 additions & 13 deletions src/runtime_src/xdp/profile/plugin/aie_profile/win/aie_profile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ namespace xdp {

std::vector<uint64_t> Regs = regValues[type];
// 25 is column offset and 20 is row offset for IPU
op_profile_data.emplace_back(profile_data_t{Regs[i] + (col << 25) + (row << 20), 0});
op_profile_data.emplace_back(profile_data_t{Regs[i] + (col << 25) + (row << 20)});

std::vector<uint64_t> values;
values.insert(values.end(), {col, row, phyStartEvent, phyEndEvent, resetEvent, 0, 1000, payload});
Expand Down Expand Up @@ -314,10 +314,10 @@ namespace xdp {
auto context = metadata->getHwContext();

try {
mKernel = xrt::kernel(context, "DPU_PROFILE");
mKernel = xrt::kernel(context, "XDP_KERNEL");
} catch (std::exception &e){
std::stringstream msg;
msg << "Unable to find DPU_PROFILE kernel from hardware context. Failed to configure AIE Profile." << e.what() ;
msg << "Unable to find XDP_KERNEL kernel from hardware context. Failed to configure AIE Profile." << e.what() ;
xrt_core::message::send(severity_level::warning, "XRT", msg.str());
return false;
}
Expand Down Expand Up @@ -518,23 +518,36 @@ namespace xdp {
std::stringstream msg;
msg << "Unable to successfully execute AIE Profile polling kernel. " << e.what() << std::endl;
xrt_core::message::send(severity_level::warning, "XRT", msg.str());
return;
}

XAie_ClearTransaction(&aieDevInst);

auto instrbo_map = instr_bo.map<uint8_t*>();
instr_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

// TODO: figure out where the 8 comes from
instrbo_map += sizeof(XAie_TxnHeader) + sizeof(XAie_CustomOpHdr) + 8;
auto output = reinterpret_cast<aie_profile_op_t*>(instrbo_map);
static constexpr uint32_t size_4K = 0x1000;
static constexpr uint32_t offset_3K = 0x0C00;

for (uint32_t i = 0; i < output->count; i++) {
// results BO syncs profile result from device
xrt::bo result_bo;
try {
result_bo = xrt::bo(context.get_device(), size_4K, XCL_BO_FLAGS_CACHEABLE, mKernel.group_id(1));
} catch (std::exception &e) {
std::stringstream msg;
msg << "Counter address/values: 0x" << std::hex << output->profile_data[i].perf_address << ": " << std::dec << output->profile_data[i].perf_value;
xrt_core::message::send(severity_level::debug, "XRT", msg.str());
msg << "Unable to create result buffer for AIE Profle. Cannot get AIE Profile Info." << e.what() << std::endl;
xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str());
return;
}

auto result_bo_map = result_bo.map<uint8_t*>();
result_bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

uint32_t* output = reinterpret_cast<uint32_t*>(result_bo_map+offset_3K);

for (uint32_t i = 0; i < op->count; i++) {
std::stringstream msg;
msg << "Counter address/values: 0x" << std::hex << op->profile_data[i].perf_address << ": " << std::dec << output[i];
xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str());
std::vector<uint64_t> values = outputValues[i];
values[5] = static_cast<uint64_t>(output->profile_data[i].perf_value); //write pc value
values[5] = static_cast<uint64_t>(output[i]); //write pc value
db->getDynamicInfo().addAIESample(index, timestamp, values);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,12 @@ namespace xdp {
return;
}

static const uint32_t SIZE_4K = 0x1000;
static const uint32_t MAX_INDEX_IN_SIZE_3K = (0xC00 / (2 * sizeof(uint32_t))) - 1;
static constexpr uint32_t size_4K = 0x1000;

// Read Record Timer TS buffer
xrt::bo resultBO;
try {
resultBO = xrt::bo(hwContext.get_device(), SIZE_4K, XCL_BO_FLAGS_CACHEABLE, instKernel.group_id(1));
resultBO = xrt::bo(hwContext.get_device(), size_4K, XCL_BO_FLAGS_CACHEABLE, instKernel.group_id(1));
}
catch (std::exception& e) {
std::stringstream msg;
Expand Down Expand Up @@ -118,14 +117,27 @@ namespace xdp {

// Record Timer TS in JSON
// Assuming correct Stub has been called and Write Buffer contains valid data
while (*ptr < MAX_INDEX_IN_SIZE_3K) {
boost::property_tree::ptree ptIdTS;
ptIdTS.put("id", *ptr);
ptr++;
ptIdTS.put("cycle", *ptr);
ptr++;

ptRecordTimerTS.push_back(std::make_pair("", ptIdTS));
uint32_t numEntries = *ptr; // First 32bits contains the total num of entries

/* Each record timer entry has 32bit ID and 32bit AIE Timer low value.
* Also, the first 32 bit in the buffer is used to store total number
* of record timer entries written so far. So, max_count_in_size_3K is 1 less
* than total number of entries possible in 3K buffer section.
*/
static constexpr uint32_t max_count_in_size_3K = (0x0C00 / (2 * sizeof(uint32_t))) - 1;

if (numEntries <= max_count_in_size_3K) {
ptr++;
for (uint32_t i = 0 ; i < numEntries; i++) {
boost::property_tree::ptree ptIdTS;
ptIdTS.put("id", *ptr);
ptr++;
ptIdTS.put("cycle", *ptr);
ptr++;

ptRecordTimerTS.push_back(std::make_pair("", ptIdTS));
}
}

if (ptRecordTimerTS.empty()) {
Expand Down

0 comments on commit a839622

Please sign in to comment.