Skip to content

Commit

Permalink
Fixes for CR-1167717, CR-1173167, and CR-1173061 (#7681)
Browse files Browse the repository at this point in the history
  • Loading branch information
pgschuey authored Aug 29, 2023
1 parent b01bb53 commit 86dce5a
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace xdp {
struct aiecompiler_options
{
bool broadcast_enable_core;
bool graph_iterator_event;
std::string event_trace;
};

Expand Down
8 changes: 6 additions & 2 deletions src/runtime_src/xdp/profile/database/static_info/aie_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,12 @@ namespace aie {
aiecompiler_options getAIECompilerOptions(const boost::property_tree::ptree& aie_meta)
{
aiecompiler_options aiecompiler_options;
aiecompiler_options.broadcast_enable_core = aie_meta.get("aie_metadata.aiecompiler_options.broadcast_enable_core", false);
aiecompiler_options.event_trace = aie_meta.get("aie_metadata.aiecompiler_options.event_trace", "runtime");
aiecompiler_options.broadcast_enable_core =
aie_meta.get("aie_metadata.aiecompiler_options.broadcast_enable_core", false);
aiecompiler_options.graph_iterator_event =
aie_meta.get("aie_metadata.aiecompiler_options.graph_iterator_event", false);
aiecompiler_options.event_trace =
aie_meta.get("aie_metadata.aiecompiler_options.event_trace", "runtime");
return aiecompiler_options;
}

Expand Down
146 changes: 91 additions & 55 deletions src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cmath>
#include <memory>
#include <cstring>
#include <map>

#include "core/common/message.h"
#include "core/common/time.h"
Expand Down Expand Up @@ -271,60 +272,96 @@ namespace xdp {
return (runningEvents.find(event) != runningEvents.end());
}

uint8_t AieProfile_EdgeImpl::getPortNumberFromEvent(XAie_Events event)
{
switch (event) {
case XAIE_EVENT_PORT_RUNNING_1_CORE:
case XAIE_EVENT_PORT_STALLED_1_CORE:
case XAIE_EVENT_PORT_TLAST_1_PL:
return 1;
default:
return 0;
}
}

// Configure stream switch ports for monitoring purposes
// NOTE: Used to monitor streams: trace, interfaces, and memory tiles
XAie_Events
void
AieProfile_EdgeImpl::configStreamSwitchPorts(XAie_DevInst* aieDevInst, const tile_type& tile,
xaiefal::XAieTile& xaieTile, const XAie_LocType loc,
const module_type type, const XAie_Events event,
const std::string metricSet, const uint8_t channel)
const module_type type, const uint32_t numCounters,
const std::string metricSet, const uint8_t channel0,
const uint8_t channel1, std::vector<XAie_Events>& startEvents,
std::vector<XAie_Events>& endEvents)
{
// Only configure as needed: must be applicable event and only need at most two
if (!isStreamSwitchPortEvent(event))
return event;

auto switchPortRsc = xaieTile.sswitchPort();
auto ret = switchPortRsc->reserve();
if (ret != AieRC::XAIE_OK)
return event;

if (type == module_type::core) {
// AIE Tiles (e.g., trace streams)
// Define stream switch port to monitor core or memory trace
uint8_t traceSelect = (event == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 0 : 1;
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect);
}
else if (type == module_type::shim) {
// Interface tiles (e.g., PLIO, GMIO)
// Grab slave/master and stream ID
// NOTE: stored in getTilesForProfiling() above
auto slaveOrMaster = (tile.itr_mem_col == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
auto streamPortId = static_cast<uint8_t>(tile.itr_mem_row);
switchPortRsc->setPortToSelect(slaveOrMaster, SOUTH, streamPortId);
}
else {
// Memory tiles
if (metricSet.find("trace") != std::string::npos) {
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, 0);
std::map<uint8_t, std::shared_ptr<xaiefal::XAieStreamPortSelect>> switchPortMap;

// Traverse all counters and request monitor ports as needed
for (int i=0; i < numCounters; ++i) {
// Ensure applicable event
auto startEvent = startEvents.at(i);
auto endEvent = endEvents.at(i);
if (!isStreamSwitchPortEvent(startEvent))
continue;

bool newPort = false;
auto portnum = getPortNumberFromEvent(startEvent);

// New port needed: reserver, configure, and store
if (switchPortMap.find(portnum) == switchPortMap.end()) {
auto switchPortRsc = xaieTile.sswitchPort();
if (switchPortRsc->reserve() != AieRC::XAIE_OK)
continue;
newPort = true;
switchPortMap[portnum] = switchPortRsc;

if (type == module_type::core) {
// AIE Tiles (e.g., trace streams)
// Define stream switch port to monitor core or memory trace
uint8_t traceSelect = (startEvent == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 0 : 1;
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect);
}
else if (type == module_type::shim) {
// Interface tiles (e.g., PLIO, GMIO)
// Grab slave/master and stream ID
// NOTE: stored in getTilesForProfiling() above
auto slaveOrMaster = (tile.itr_mem_col == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
auto streamPortId = static_cast<uint8_t>(tile.itr_mem_row);
switchPortRsc->setPortToSelect(slaveOrMaster, SOUTH, streamPortId);
}
else {
// Memory tiles
if (metricSet.find("trace") != std::string::npos) {
switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, 0);
}
else {
uint8_t channel = (portnum == 0) ? channel0 : channel1;
auto slaveOrMaster = (metricSet.find("output") != std::string::npos) ?
XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel);
}
}
}
else {
auto slaveOrMaster = (metricSet.find("output") != std::string::npos) ?
XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER;
switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel);

auto switchPortRsc = switchPortMap[portnum];

// Event options:
// getSSIdleEvent, getSSRunningEvent, getSSStalledEvent, & getSSTlastEvent
XAie_Events ssEvent;
if (isPortRunningEvent(startEvent))
switchPortRsc->getSSRunningEvent(ssEvent);
else
switchPortRsc->getSSStalledEvent(ssEvent);
startEvents.at(i) = ssEvent;
endEvents.at(i) = ssEvent;

if (newPort) {
switchPortRsc->start();
mStreamPorts.push_back(switchPortRsc);
}
}

// Event options:
// getSSIdleEvent, getSSRunningEvent, getSSStalledEvent, & getSSTlastEvent
XAie_Events ssEvent;
if (isPortRunningEvent(event))
switchPortRsc->getSSRunningEvent(ssEvent);
else
switchPortRsc->getSSStalledEvent(ssEvent);

switchPortRsc->start();
mStreamPorts.push_back(switchPortRsc);
return ssEvent;
switchPortMap.clear();
}

void
Expand Down Expand Up @@ -530,24 +567,19 @@ namespace xdp {
auto iter1 = configChannel1.find(tile);
uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second;
uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second;

configEventSelections(aieDevInst, loc, XAIE_MEM_MOD, type, metricSet, channel0, channel1);
configStreamSwitchPorts(aieDevInst, tileMetric.first, xaieTile, loc, type, numFreeCtr,
metricSet, channel0, channel1, startEvents, endEvents);

// Request and configure all available counters for this tile
for (int i=0; i < numFreeCtr; ++i) {
auto startEvent = startEvents.at(i);
auto endEvent = endEvents.at(i);
uint8_t resetEvent = 0;

// Channel number is based on monitoring port 0 or 1
auto channel = (startEvent <= XAIE_EVENT_PORT_TLAST_0_MEM_TILE) ? channel0 : channel1;

// Configure group event before reserving and starting counter
configGroupEvents(aieDevInst, loc, mod, startEvent, metricSet);
auto event = configStreamSwitchPorts(aieDevInst, tileMetric.first, xaieTile, loc, type,
startEvent, metricSet, channel);
if (event != startEvent) {
endEvent = (endEvent == startEvent) ? event : endEvent;
startEvent = event;
}

// Request counter from resource manager
auto perfCounter = xaieModule.perfCounter();
Expand All @@ -556,7 +588,7 @@ namespace xdp {
ret = perfCounter->reserve();
if (ret != XAIE_OK) break;

// Start the counters after group events have been configured
// Start the counter
ret = perfCounter->start();
if (ret != XAIE_OK) break;
mPerfCounters.push_back(perfCounter);
Expand All @@ -568,6 +600,10 @@ namespace xdp {
XAie_EventLogicalToPhysicalConv(aieDevInst, loc, mod, endEvent, &tmpEnd);
uint16_t phyStartEvent = tmpStart + mCounterBases[type];
uint16_t phyEndEvent = tmpEnd + mCounterBases[type];

// Get payload for reporting purposes
auto portnum = getPortNumberFromEvent(startEvent);
uint8_t channel = (portnum == 0) ? channel0 : channel1;
auto payload = getCounterPayload(aieDevInst, tileMetric.first, type, col, row,
startEvent, metricSet, channel);

Expand Down
21 changes: 13 additions & 8 deletions src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ namespace xdp {
bool isValidType(module_type type, XAie_ModuleType mod);
bool isStreamSwitchPortEvent(const XAie_Events event);
bool isPortRunningEvent(const XAie_Events event);
uint8_t getPortNumberFromEvent(XAie_Events event);
void printTileModStats(xaiefal::XAieDev* aieDevice,
const tile_type& tile,
const XAie_ModuleType mod);
Expand All @@ -58,14 +59,17 @@ namespace xdp {
const XAie_ModuleType mod,
const XAie_Events event,
const std::string metricSet);
XAie_Events configStreamSwitchPorts(XAie_DevInst* aieDevInst,
const tile_type& tile,
xaiefal::XAieTile& xaieTile,
const XAie_LocType loc,
const module_type type,
const XAie_Events event,
const std::string metricSet,
const uint8_t channel);
void configStreamSwitchPorts(XAie_DevInst* aieDevInst,
const tile_type& tile,
xaiefal::XAieTile& xaieTile,
const XAie_LocType loc,
const module_type type,
const uint32_t numCounters,
const std::string metricSet,
const uint8_t channel0,
const uint8_t channel1,
std::vector<XAie_Events>& startEvents,
std::vector<XAie_Events>& endEvents);
void configEventSelections(XAie_DevInst* aieDevInst,
const XAie_LocType loc,
const XAie_ModuleType mod,
Expand All @@ -81,6 +85,7 @@ namespace xdp {
uint16_t startEvent,
const std::string metricSet,
const uint8_t channel);

private:
XAie_DevInst* aieDevInst = nullptr;
xaiefal::XAieDev* aieDevice = nullptr;
Expand Down
72 changes: 29 additions & 43 deletions src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,13 @@ namespace xdp {
}

// Get tiles to status
void AIEStatusPlugin::getTilesForStatus(void* handle)
void AIEStatusPlugin::getTilesForStatus()
{
std::shared_ptr<xrt_core::device> device = xrt_core::get_userpf_device(handle);

// Capture all tiles across all graphs
// Note: in the future, we could support user-defined tile sets
auto graphs = xrt_core::edge::aie::get_graphs(device.get());
auto graphs = aie::getValidGraphs(mAieMeta);
for (auto& graph : graphs) {
mGraphCoreTilesMap[graph] = xrt_core::edge::aie::get_event_tiles(device.get(), graph,
xrt_core::edge::aie::module_type::core);
mGraphCoreTilesMap[graph] = aie::getEventTiles(mAieMeta, graph, module_type::core);
}

// Report tiles (debug only)
Expand Down Expand Up @@ -189,7 +186,8 @@ namespace xdp {

// AIE core register offsets
constexpr uint64_t AIE_OFFSET_CORE_STATUS = 0x32004;
auto offset = getAIETileRowOffset(handle);
auto offset = aie::getAIETileRowOffset(mAieMeta);
auto hwGen = aie::getHardwareGeneration(mAieMeta);

// This mask check for following states
// ECC_Scrubbing_Stall
Expand Down Expand Up @@ -220,6 +218,7 @@ namespace xdp {
// Reset values
constexpr uint32_t CORE_RESET_STATUS = 0x2;
constexpr uint32_t CORE_ENABLE_MASK = 0x1;

// Tiles already reported with error(s)
std::set<tile_type> errorTileSet;
// Graph -> total stuck core cycles
Expand Down Expand Up @@ -298,18 +297,26 @@ namespace xdp {
// Check for errors in tile
// NOTE: warning is only issued once per tile
if (errorTileSet.find(tile) == errorTileSet.end()) {
uint8_t coreErrors0 = 0;
uint8_t coreErrors1 = 0;
uint8_t memErrors = 0;
auto loc = XAie_TileLoc(tile.col, tile.row + offset);
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_0_CORE, &coreErrors0);
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_1_CORE, &coreErrors1);

// Memory module
uint8_t memErrors = 0;
XAie_EventReadStatus(aieDevInst, loc, XAIE_MEM_MOD,
XAIE_EVENT_GROUP_ERRORS_MEM, &memErrors);
XAIE_EVENT_GROUP_ERRORS_MEM, &memErrors);

if (coreErrors0 || coreErrors1 || memErrors) {
// Core module
// NOTE: Per CR-1167717, ignore group errors on AIE1 devices
// since instruction event 2 is used as DONE bit.
uint8_t coreErrors0 = 0;
uint8_t coreErrors1 = 0;
if (hwGen > 1) {
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_0_CORE, &coreErrors0);
XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD,
XAIE_EVENT_GROUP_ERRORS_1_CORE, &coreErrors1);
}

if (memErrors || coreErrors0 || coreErrors1) {
std::stringstream errorMessage;
errorMessage << "Error(s) found in tile (" << tile.col << "," << tile.row
<< "). Please view status in Vitis Analyzer for specifics.";
Expand Down Expand Up @@ -402,8 +409,13 @@ namespace xdp {
}
}

// Grab AIE metadata
auto device = xrt_core::get_userpf_device(handle);
auto data = device->get_axlf_section(AIE_METADATA);
aie::readAIEMetadata(data.first, data.second, mAieMeta);

// Update list of tiles to debug
getTilesForStatus(handle);
getTilesForStatus();

// Open the writer for this device
struct xclDeviceInfo2 info;
Expand Down Expand Up @@ -477,30 +489,4 @@ namespace xdp {
mStatusThreadMap.clear();
}

uint16_t AIEStatusPlugin::getAIETileRowOffset(void* handle)
{
static uint16_t rowOffset = 1;
static bool gotValue = false;
if (!gotValue) {
auto device = xrt_core::get_userpf_device(handle);
auto data = device->get_axlf_section(AIE_METADATA);
if (!data.first || !data.second) {
rowOffset = 1;
} else {
boost::property_tree::ptree aie_meta;
read_aie_metadata(data.first, data.second, aie_meta);
rowOffset = aie_meta.get_child("aie_metadata.driver_config.aie_tile_row_start").get_value<uint16_t>();
}
gotValue = true;
}
return rowOffset;
}

void AIEStatusPlugin::read_aie_metadata(const char* data, size_t size, boost::property_tree::ptree& aie_project)
{
std::stringstream aie_stream;
aie_stream.write(data,size);
boost::property_tree::read_json(aie_stream,aie_project);
}

} // end namespace xdp
Loading

0 comments on commit 86dce5a

Please sign in to comment.