From 86dce5a860095c7b4c9f5ca339c0674e1fce3af2 Mon Sep 17 00:00:00 2001 From: pgschuey Date: Tue, 29 Aug 2023 11:11:20 -0600 Subject: [PATCH] Fixes for CR-1167717, CR-1173167, and CR-1173061 (#7681) --- .../database/static_info/aie_constructs.h | 1 + .../profile/database/static_info/aie_util.cpp | 8 +- .../plugin/aie_profile/edge/aie_profile.cpp | 146 +++++++++++------- .../plugin/aie_profile/edge/aie_profile.h | 21 ++- .../plugin/aie_status/aie_status_plugin.cpp | 72 ++++----- .../plugin/aie_status/aie_status_plugin.h | 13 +- .../plugin/aie_trace/aie_trace_metadata.cpp | 18 ++- .../plugin/aie_trace/aie_trace_metadata.h | 2 +- 8 files changed, 158 insertions(+), 123 deletions(-) diff --git a/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h b/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h index beda0bd1719..c7c01a193aa 100644 --- a/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h +++ b/src/runtime_src/xdp/profile/database/static_info/aie_constructs.h @@ -28,6 +28,7 @@ namespace xdp { struct aiecompiler_options { bool broadcast_enable_core; + bool graph_iterator_event; std::string event_trace; }; diff --git a/src/runtime_src/xdp/profile/database/static_info/aie_util.cpp b/src/runtime_src/xdp/profile/database/static_info/aie_util.cpp index ab12706c99c..45afc0c1cfc 100755 --- a/src/runtime_src/xdp/profile/database/static_info/aie_util.cpp +++ b/src/runtime_src/xdp/profile/database/static_info/aie_util.cpp @@ -90,8 +90,12 @@ namespace aie { aiecompiler_options getAIECompilerOptions(const boost::property_tree::ptree& aie_meta) { aiecompiler_options aiecompiler_options; - aiecompiler_options.broadcast_enable_core = aie_meta.get("aie_metadata.aiecompiler_options.broadcast_enable_core", false); - aiecompiler_options.event_trace = aie_meta.get("aie_metadata.aiecompiler_options.event_trace", "runtime"); + aiecompiler_options.broadcast_enable_core = + aie_meta.get("aie_metadata.aiecompiler_options.broadcast_enable_core", false); + aiecompiler_options.graph_iterator_event = + aie_meta.get("aie_metadata.aiecompiler_options.graph_iterator_event", false); + aiecompiler_options.event_trace = + aie_meta.get("aie_metadata.aiecompiler_options.event_trace", "runtime"); return aiecompiler_options; } diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.cpp b/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.cpp index b6fc5291db5..eec7afc7b1d 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "core/common/message.h" #include "core/common/time.h" @@ -271,60 +272,96 @@ namespace xdp { return (runningEvents.find(event) != runningEvents.end()); } + uint8_t AieProfile_EdgeImpl::getPortNumberFromEvent(XAie_Events event) + { + switch (event) { + case XAIE_EVENT_PORT_RUNNING_1_CORE: + case XAIE_EVENT_PORT_STALLED_1_CORE: + case XAIE_EVENT_PORT_TLAST_1_PL: + return 1; + default: + return 0; + } + } + // Configure stream switch ports for monitoring purposes // NOTE: Used to monitor streams: trace, interfaces, and memory tiles - XAie_Events + void AieProfile_EdgeImpl::configStreamSwitchPorts(XAie_DevInst* aieDevInst, const tile_type& tile, xaiefal::XAieTile& xaieTile, const XAie_LocType loc, - const module_type type, const XAie_Events event, - const std::string metricSet, const uint8_t channel) + const module_type type, const uint32_t numCounters, + const std::string metricSet, const uint8_t channel0, + const uint8_t channel1, std::vector& startEvents, + std::vector& endEvents) { - // Only configure as needed: must be applicable event and only need at most two - if (!isStreamSwitchPortEvent(event)) - return event; - - auto switchPortRsc = xaieTile.sswitchPort(); - auto ret = switchPortRsc->reserve(); - if (ret != AieRC::XAIE_OK) - return event; - - if (type == module_type::core) { - // AIE Tiles (e.g., trace streams) - // Define stream switch port to monitor core or memory trace - uint8_t traceSelect = (event == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 0 : 1; - switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect); - } - else if (type == module_type::shim) { - // Interface tiles (e.g., PLIO, GMIO) - // Grab slave/master and stream ID - // NOTE: stored in getTilesForProfiling() above - auto slaveOrMaster = (tile.itr_mem_col == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; - auto streamPortId = static_cast(tile.itr_mem_row); - switchPortRsc->setPortToSelect(slaveOrMaster, SOUTH, streamPortId); - } - else { - // Memory tiles - if (metricSet.find("trace") != std::string::npos) { - switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, 0); + std::map> switchPortMap; + + // Traverse all counters and request monitor ports as needed + for (int i=0; i < numCounters; ++i) { + // Ensure applicable event + auto startEvent = startEvents.at(i); + auto endEvent = endEvents.at(i); + if (!isStreamSwitchPortEvent(startEvent)) + continue; + + bool newPort = false; + auto portnum = getPortNumberFromEvent(startEvent); + + // New port needed: reserver, configure, and store + if (switchPortMap.find(portnum) == switchPortMap.end()) { + auto switchPortRsc = xaieTile.sswitchPort(); + if (switchPortRsc->reserve() != AieRC::XAIE_OK) + continue; + newPort = true; + switchPortMap[portnum] = switchPortRsc; + + if (type == module_type::core) { + // AIE Tiles (e.g., trace streams) + // Define stream switch port to monitor core or memory trace + uint8_t traceSelect = (startEvent == XAIE_EVENT_PORT_RUNNING_0_CORE) ? 0 : 1; + switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, traceSelect); + } + else if (type == module_type::shim) { + // Interface tiles (e.g., PLIO, GMIO) + // Grab slave/master and stream ID + // NOTE: stored in getTilesForProfiling() above + auto slaveOrMaster = (tile.itr_mem_col == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + auto streamPortId = static_cast(tile.itr_mem_row); + switchPortRsc->setPortToSelect(slaveOrMaster, SOUTH, streamPortId); + } + else { + // Memory tiles + if (metricSet.find("trace") != std::string::npos) { + switchPortRsc->setPortToSelect(XAIE_STRMSW_SLAVE, TRACE, 0); + } + else { + uint8_t channel = (portnum == 0) ? channel0 : channel1; + auto slaveOrMaster = (metricSet.find("output") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel); + } + } } - else { - auto slaveOrMaster = (metricSet.find("output") != std::string::npos) ? - XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; - switchPortRsc->setPortToSelect(slaveOrMaster, DMA, channel); + + auto switchPortRsc = switchPortMap[portnum]; + + // Event options: + // getSSIdleEvent, getSSRunningEvent, getSSStalledEvent, & getSSTlastEvent + XAie_Events ssEvent; + if (isPortRunningEvent(startEvent)) + switchPortRsc->getSSRunningEvent(ssEvent); + else + switchPortRsc->getSSStalledEvent(ssEvent); + startEvents.at(i) = ssEvent; + endEvents.at(i) = ssEvent; + + if (newPort) { + switchPortRsc->start(); + mStreamPorts.push_back(switchPortRsc); } } - // Event options: - // getSSIdleEvent, getSSRunningEvent, getSSStalledEvent, & getSSTlastEvent - XAie_Events ssEvent; - if (isPortRunningEvent(event)) - switchPortRsc->getSSRunningEvent(ssEvent); - else - switchPortRsc->getSSStalledEvent(ssEvent); - - switchPortRsc->start(); - mStreamPorts.push_back(switchPortRsc); - return ssEvent; + switchPortMap.clear(); } void @@ -530,7 +567,10 @@ namespace xdp { auto iter1 = configChannel1.find(tile); uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + configEventSelections(aieDevInst, loc, XAIE_MEM_MOD, type, metricSet, channel0, channel1); + configStreamSwitchPorts(aieDevInst, tileMetric.first, xaieTile, loc, type, numFreeCtr, + metricSet, channel0, channel1, startEvents, endEvents); // Request and configure all available counters for this tile for (int i=0; i < numFreeCtr; ++i) { @@ -538,16 +578,8 @@ namespace xdp { auto endEvent = endEvents.at(i); uint8_t resetEvent = 0; - // Channel number is based on monitoring port 0 or 1 - auto channel = (startEvent <= XAIE_EVENT_PORT_TLAST_0_MEM_TILE) ? channel0 : channel1; - + // Configure group event before reserving and starting counter configGroupEvents(aieDevInst, loc, mod, startEvent, metricSet); - auto event = configStreamSwitchPorts(aieDevInst, tileMetric.first, xaieTile, loc, type, - startEvent, metricSet, channel); - if (event != startEvent) { - endEvent = (endEvent == startEvent) ? event : endEvent; - startEvent = event; - } // Request counter from resource manager auto perfCounter = xaieModule.perfCounter(); @@ -556,7 +588,7 @@ namespace xdp { ret = perfCounter->reserve(); if (ret != XAIE_OK) break; - // Start the counters after group events have been configured + // Start the counter ret = perfCounter->start(); if (ret != XAIE_OK) break; mPerfCounters.push_back(perfCounter); @@ -568,6 +600,10 @@ namespace xdp { XAie_EventLogicalToPhysicalConv(aieDevInst, loc, mod, endEvent, &tmpEnd); uint16_t phyStartEvent = tmpStart + mCounterBases[type]; uint16_t phyEndEvent = tmpEnd + mCounterBases[type]; + + // Get payload for reporting purposes + auto portnum = getPortNumberFromEvent(startEvent); + uint8_t channel = (portnum == 0) ? channel0 : channel1; auto payload = getCounterPayload(aieDevInst, tileMetric.first, type, col, row, startEvent, metricSet, channel); diff --git a/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.h b/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.h index 3343257a8ae..dfbf884dc42 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.h +++ b/src/runtime_src/xdp/profile/plugin/aie_profile/edge/aie_profile.h @@ -50,6 +50,7 @@ namespace xdp { bool isValidType(module_type type, XAie_ModuleType mod); bool isStreamSwitchPortEvent(const XAie_Events event); bool isPortRunningEvent(const XAie_Events event); + uint8_t getPortNumberFromEvent(XAie_Events event); void printTileModStats(xaiefal::XAieDev* aieDevice, const tile_type& tile, const XAie_ModuleType mod); @@ -58,14 +59,17 @@ namespace xdp { const XAie_ModuleType mod, const XAie_Events event, const std::string metricSet); - XAie_Events configStreamSwitchPorts(XAie_DevInst* aieDevInst, - const tile_type& tile, - xaiefal::XAieTile& xaieTile, - const XAie_LocType loc, - const module_type type, - const XAie_Events event, - const std::string metricSet, - const uint8_t channel); + void configStreamSwitchPorts(XAie_DevInst* aieDevInst, + const tile_type& tile, + xaiefal::XAieTile& xaieTile, + const XAie_LocType loc, + const module_type type, + const uint32_t numCounters, + const std::string metricSet, + const uint8_t channel0, + const uint8_t channel1, + std::vector& startEvents, + std::vector& endEvents); void configEventSelections(XAie_DevInst* aieDevInst, const XAie_LocType loc, const XAie_ModuleType mod, @@ -81,6 +85,7 @@ namespace xdp { uint16_t startEvent, const std::string metricSet, const uint8_t channel); + private: XAie_DevInst* aieDevInst = nullptr; xaiefal::XAieDev* aieDevice = nullptr; diff --git a/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp b/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp index 761088bdc8d..4ced0676df9 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp @@ -100,16 +100,13 @@ namespace xdp { } // Get tiles to status - void AIEStatusPlugin::getTilesForStatus(void* handle) + void AIEStatusPlugin::getTilesForStatus() { - std::shared_ptr device = xrt_core::get_userpf_device(handle); - // Capture all tiles across all graphs // Note: in the future, we could support user-defined tile sets - auto graphs = xrt_core::edge::aie::get_graphs(device.get()); + auto graphs = aie::getValidGraphs(mAieMeta); for (auto& graph : graphs) { - mGraphCoreTilesMap[graph] = xrt_core::edge::aie::get_event_tiles(device.get(), graph, - xrt_core::edge::aie::module_type::core); + mGraphCoreTilesMap[graph] = aie::getEventTiles(mAieMeta, graph, module_type::core); } // Report tiles (debug only) @@ -189,7 +186,8 @@ namespace xdp { // AIE core register offsets constexpr uint64_t AIE_OFFSET_CORE_STATUS = 0x32004; - auto offset = getAIETileRowOffset(handle); + auto offset = aie::getAIETileRowOffset(mAieMeta); + auto hwGen = aie::getHardwareGeneration(mAieMeta); // This mask check for following states // ECC_Scrubbing_Stall @@ -220,6 +218,7 @@ namespace xdp { // Reset values constexpr uint32_t CORE_RESET_STATUS = 0x2; constexpr uint32_t CORE_ENABLE_MASK = 0x1; + // Tiles already reported with error(s) std::set errorTileSet; // Graph -> total stuck core cycles @@ -298,18 +297,26 @@ namespace xdp { // Check for errors in tile // NOTE: warning is only issued once per tile if (errorTileSet.find(tile) == errorTileSet.end()) { - uint8_t coreErrors0 = 0; - uint8_t coreErrors1 = 0; - uint8_t memErrors = 0; auto loc = XAie_TileLoc(tile.col, tile.row + offset); - XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD, - XAIE_EVENT_GROUP_ERRORS_0_CORE, &coreErrors0); - XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD, - XAIE_EVENT_GROUP_ERRORS_1_CORE, &coreErrors1); + + // Memory module + uint8_t memErrors = 0; XAie_EventReadStatus(aieDevInst, loc, XAIE_MEM_MOD, - XAIE_EVENT_GROUP_ERRORS_MEM, &memErrors); + XAIE_EVENT_GROUP_ERRORS_MEM, &memErrors); - if (coreErrors0 || coreErrors1 || memErrors) { + // Core module + // NOTE: Per CR-1167717, ignore group errors on AIE1 devices + // since instruction event 2 is used as DONE bit. + uint8_t coreErrors0 = 0; + uint8_t coreErrors1 = 0; + if (hwGen > 1) { + XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD, + XAIE_EVENT_GROUP_ERRORS_0_CORE, &coreErrors0); + XAie_EventReadStatus(aieDevInst, loc, XAIE_CORE_MOD, + XAIE_EVENT_GROUP_ERRORS_1_CORE, &coreErrors1); + } + + if (memErrors || coreErrors0 || coreErrors1) { std::stringstream errorMessage; errorMessage << "Error(s) found in tile (" << tile.col << "," << tile.row << "). Please view status in Vitis Analyzer for specifics."; @@ -402,8 +409,13 @@ namespace xdp { } } + // Grab AIE metadata + auto device = xrt_core::get_userpf_device(handle); + auto data = device->get_axlf_section(AIE_METADATA); + aie::readAIEMetadata(data.first, data.second, mAieMeta); + // Update list of tiles to debug - getTilesForStatus(handle); + getTilesForStatus(); // Open the writer for this device struct xclDeviceInfo2 info; @@ -477,30 +489,4 @@ namespace xdp { mStatusThreadMap.clear(); } - uint16_t AIEStatusPlugin::getAIETileRowOffset(void* handle) - { - static uint16_t rowOffset = 1; - static bool gotValue = false; - if (!gotValue) { - auto device = xrt_core::get_userpf_device(handle); - auto data = device->get_axlf_section(AIE_METADATA); - if (!data.first || !data.second) { - rowOffset = 1; - } else { - boost::property_tree::ptree aie_meta; - read_aie_metadata(data.first, data.second, aie_meta); - rowOffset = aie_meta.get_child("aie_metadata.driver_config.aie_tile_row_start").get_value(); - } - gotValue = true; - } - return rowOffset; - } - - void AIEStatusPlugin::read_aie_metadata(const char* data, size_t size, boost::property_tree::ptree& aie_project) - { - std::stringstream aie_stream; - aie_stream.write(data,size); - boost::property_tree::read_json(aie_stream,aie_project); - } - } // end namespace xdp diff --git a/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.h b/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.h index d5fade48f45..723285f0ccd 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.h +++ b/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.h @@ -26,10 +26,10 @@ #include #include "xdp/profile/plugin/vp_base/vp_base_plugin.h" +#include "xdp/profile/database/static_info/aie_util.h" #include "xdp/config.h" #include "core/common/device.h" -#include "core/edge/common/aie_parser.h" #include "xaiefal/xaiefal.hpp" extern "C" { @@ -39,8 +39,6 @@ extern "C" { namespace xdp { - using tile_type = xrt_core::edge::aie::tile_type; - class AIEStatusPlugin : public XDPPlugin { public: @@ -56,21 +54,18 @@ namespace xdp { static bool alive(); private: - void getTilesForStatus(void* handle); + void getTilesForStatus(); void endPoll(); std::string getCoreStatusString(uint32_t status); - uint16_t getAIETileRowOffset(void* handle); - static void read_aie_metadata(const char* data, size_t size, - boost::property_tree::ptree& aie_project); + // Threads used by this plugin void pollDeadlock(uint64_t index, void* handle); void writeStatus(uint64_t index, void* handle, VPWriter* aieWriter); private: - static bool live; - uint32_t mPollingInterval; + boost::property_tree::ptree mAieMeta; // Thread control flags for each device handle std::map> mThreadCtrlMap; diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.cpp b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.cpp index b5f91d85453..102caa05296 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.cpp @@ -103,7 +103,7 @@ namespace xdp { getConfigMetricsForTiles(aieTileMetricsSettings, aieGraphMetricsSettings, module_type::core); getConfigMetricsForTiles(memTileMetricsSettings, memGraphMetricsSettings, module_type::mem_tile); getConfigMetricsForInterfaceTiles(shimTileMetricsSettings, shimGraphMetricsSettings); - setTraceStartControl(); + setTraceStartControl(compilerOptions.graph_iterator_event); } } @@ -158,7 +158,7 @@ namespace xdp { } // Parse trace start time or events - void AieTraceMetadata::setTraceStartControl() + void AieTraceMetadata::setTraceStartControl(bool graphIteratorEvent) { useDelay = false; useGraphIterator = false; @@ -217,9 +217,17 @@ namespace xdp { useDelay = (cycles != 0); delayCycles = cycles; } else if (startType == "iteration") { - // Start trace when graph iterator reaches a threshold - iterationCount = xrt_core::config::get_aie_trace_settings_start_iteration(); - useGraphIterator = (iterationCount != 0); + // Verify AIE was compiled with the proper setting + if (!graphIteratorEvent) { + std::string msg = "Unable to use graph iteration as trace start type. "; + msg.append("Please re-compile AI Engine with --graph-iterator-event=true."); + xrt_core::message::send(severity_level::warning, "XRT", msg); + } + else { + // Start trace when graph iterator reaches a threshold + iterationCount = xrt_core::config::get_aie_trace_settings_start_iteration(); + useGraphIterator = (iterationCount != 0); + } } else if (startType == "kernel_event0") { // Start trace using user events useUserControl = true; diff --git a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h index 0a1afecb57d..4c83841d2e7 100644 --- a/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h +++ b/src/runtime_src/xdp/profile/plugin/aie_trace/aie_trace_metadata.h @@ -37,7 +37,7 @@ class AieTraceMetadata { AieTraceMetadata(uint64_t deviceID, void* handle); void checkSettings(); - void setTraceStartControl(); + void setTraceStartControl(bool graphIteratorEvent); std::vector getSettingsVector(std::string settingsString); uint8_t getMetricSetIndex(std::string metricString);