From e21cee00bb1ae480f106d80ae9cfaa10bcc3e074 Mon Sep 17 00:00:00 2001 From: Jason Villarreal <40246282+jvillarre@users.noreply.github.com> Date: Fri, 17 May 2024 19:55:28 -0700 Subject: [PATCH] Fix for CR-1182821: AIE Status was unable to get good data and was outputting empty files (#8178) * Fix for CR-1182821. The issue was race conditions between destructors of global static objects that was exposed with upgrades to petalinux. Signed-off-by: Jason Villarreal * Removing unintentional space Signed-off-by: Jason Villarreal --------- Signed-off-by: Jason Villarreal --- .../core/edge/user/aie_sys_parser.cpp | 28 ++++++++----------- .../core/edge/user/aie_sys_parser.h | 14 +++++----- .../core/edge/user/device_linux.cpp | 23 +++++++++------ src/runtime_src/core/edge/user/shim.cpp | 1 - src/runtime_src/core/edge/user/zynq_dev.cpp | 8 ++++++ src/runtime_src/core/edge/user/zynq_dev.h | 2 ++ .../plugin/aie_status/aie_status_plugin.cpp | 12 +++++++- 7 files changed, 53 insertions(+), 35 deletions(-) diff --git a/src/runtime_src/core/edge/user/aie_sys_parser.cpp b/src/runtime_src/core/edge/user/aie_sys_parser.cpp index 46deeb8b09b..807ff683ec1 100644 --- a/src/runtime_src/core/edge/user/aie_sys_parser.cpp +++ b/src/runtime_src/core/edge/user/aie_sys_parser.cpp @@ -1,5 +1,6 @@ /** * Copyright (C) 2021 Xilinx, Inc + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -20,8 +21,8 @@ #include -std::fstream aie_sys_parser::sysfs_open_path(const std::string& path, - bool write, bool binary) +std::fstream +aie_sys_parser::sysfs_open_path(const std::string& path, bool write, bool binary) const { std::fstream fs; std::ios::openmode mode = write ? std::ios::out : std::ios::in; @@ -41,13 +42,14 @@ std::fstream aie_sys_parser::sysfs_open_path(const std::string& path, return fs; } -std::fstream aie_sys_parser::sysfs_open(const std::string& entry, - bool write, bool binary) +std::fstream +aie_sys_parser::sysfs_open(const std::string& entry, bool write, bool binary) const { return sysfs_open_path(entry, write, binary); } -void aie_sys_parser::sysfs_get(const std::string& entry, std::vector& sv) +void +aie_sys_parser::sysfs_get(const std::string& entry, std::vector& sv) const { sv.clear(); std::fstream fs = sysfs_open(entry, false, false); @@ -88,8 +90,7 @@ Function parse the above input content for given row and column and generate abo Input is in non-standard format, where ':', '|', and "," are the delimiters. */ void -aie_sys_parser::addrecursive(const int col, const int row, const std::string& tag, const std::string& line, - boost::property_tree::ptree &pt) +aie_sys_parser::addrecursive(const int col, const int row, const std::string& tag, const std::string& line, boost::property_tree::ptree &pt) const { std::string n(tag); boost::property_tree::ptree value; @@ -144,9 +145,9 @@ aie_sys_parser::addrecursive(const int col, const int row, const std::string& ta * If present, reads and parse the content of each sysfs. */ boost::property_tree::ptree -aie_sys_parser::aie_sys_read(const int col, const int row) +aie_sys_parser::aie_sys_read(const int col, const int row) const { - const static std::vector tags{"core","dma","lock","errors","event","bd"}; + const std::vector tags{"core","dma","lock","errors","event","bd"}; std::vector data; boost::property_tree::ptree pt; for(auto& tag:tags) { @@ -160,13 +161,6 @@ aie_sys_parser::aie_sys_read(const int col, const int row) return pt; } -aie_sys_parser *aie_sys_parser::get_parser(const std::string& aiepart) -{ - const std::string sroot = "/sys/class/aie/aiepart_" + aiepart + "/"; - static aie_sys_parser dev(sroot); - return &dev; -} - -aie_sys_parser::aie_sys_parser(const std::string& root) : sysfs_root(root) +aie_sys_parser::aie_sys_parser(const std::string& root) : sysfs_root("sys/class/aie/aiepart_" + root + "/") { } diff --git a/src/runtime_src/core/edge/user/aie_sys_parser.h b/src/runtime_src/core/edge/user/aie_sys_parser.h index 09801ae0a6e..fe39a4c9d23 100644 --- a/src/runtime_src/core/edge/user/aie_sys_parser.h +++ b/src/runtime_src/core/edge/user/aie_sys_parser.h @@ -1,5 +1,6 @@ /** * Copyright (C) 2021 Xilinx, Inc + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -24,20 +25,19 @@ class aie_sys_parser { private: - std::fstream sysfs_open_path(const std::string& path, bool write, bool binary); - std::fstream sysfs_open(const std::string& entry, bool write, bool binary); - void sysfs_get(const std::string& entry, std::vector& sv); + std::fstream sysfs_open_path(const std::string& path, bool write, bool binary) const; + std::fstream sysfs_open(const std::string& entry, bool write, bool binary) const; + void sysfs_get(const std::string& entry, std::vector& sv) const; void addrecursive(const int col, const int row, const std::string& tag, const std::string& line, - boost::property_tree::ptree &pt); + boost::property_tree::ptree &pt) const; std::string sysfs_root; - aie_sys_parser(const std::string& sysfs_base); aie_sys_parser(const aie_sys_parser& s) = delete; aie_sys_parser& operator=(const aie_sys_parser& s) = delete; public: - static aie_sys_parser *get_parser(const std::string& aiepart); - boost::property_tree::ptree aie_sys_read(const int col, const int row); + aie_sys_parser(const std::string& sysfs_base); + boost::property_tree::ptree aie_sys_read(const int col, const int row) const; }; diff --git a/src/runtime_src/core/edge/user/device_linux.cpp b/src/runtime_src/core/edge/user/device_linux.cpp index c31905453e0..367848a9cb2 100644 --- a/src/runtime_src/core/edge/user/device_linux.cpp +++ b/src/runtime_src/core/edge/user/device_linux.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright (C) 2020-2022 Xilinx, Inc -// Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. #include "device_linux.h" #include "xrt.h" #include "zynq_dev.h" @@ -212,12 +212,13 @@ struct aie_core_info_sysfs boost::property_tree::ptree ptarray; aie_metadata_info aie_meta = get_aie_metadata_info(device); const std::string aiepart = std::to_string(aie_meta.shim_row) + "_" + std::to_string(aie_meta.num_cols); + const aie_sys_parser asp(aiepart); /* Loop each all aie core tiles and collect core, dma, events, errors, locks status. */ for (int i = 0; i < aie_meta.num_cols; ++i) for (int j = 0; j < (aie_meta.num_rows-1); ++j) ptarray.push_back(std::make_pair(std::to_string(i) + "_" + std::to_string(j), - aie_sys_parser::get_parser(aiepart)->aie_sys_read(i,(j + aie_meta.core_row)))); + asp.aie_sys_read(i,(j + aie_meta.core_row)))); boost::property_tree::ptree pt; pt.add_child("aie_core",ptarray); @@ -239,11 +240,12 @@ struct aie_shim_info_sysfs boost::property_tree::ptree ptarray; aie_metadata_info aie_meta = get_aie_metadata_info(device); const std::string aiepart = std::to_string(aie_meta.shim_row) + "_" + std::to_string(aie_meta.num_cols); + const aie_sys_parser asp(aiepart); /* Loop all shim tiles and collect all dma, events, errors, locks status */ for (int i=0; i < aie_meta.num_cols; ++i) { ptarray.push_back(std::make_pair(std::to_string(i) + "_" + std::to_string(aie_meta.shim_row), - aie_sys_parser::get_parser(aiepart)->aie_sys_read(i, aie_meta.shim_row))); + asp.aie_sys_read(i, aie_meta.shim_row))); } boost::property_tree::ptree pt; @@ -266,12 +268,15 @@ struct aie_mem_info_sysfs boost::property_tree::ptree ptarray; aie_metadata_info aie_meta = get_aie_metadata_info(device); const std::string aiepart = std::to_string(aie_meta.shim_row) + "_" + std::to_string(aie_meta.num_cols); - - /* Loop all mem tiles and collect all dma, events, errors, locks status */ - for (int i = 0; i < aie_meta.num_cols; ++i) - for (int j = 0; j < (aie_meta.num_mem_row-1); ++j) - ptarray.push_back(std::make_pair(std::to_string(i) + "_" + std::to_string(j), - aie_sys_parser::get_parser(aiepart)->aie_sys_read(i,(j + aie_meta.mem_row)))); + const aie_sys_parser asp(aiepart); + + if (aie_meta.num_mem_row != 0) { + /* Loop all mem tiles and collect all dma, events, errors, locks status */ + for (int i = 0; i < aie_meta.num_cols; ++i) + for (int j = 0; j < (aie_meta.num_mem_row-1); ++j) + ptarray.push_back(std::make_pair(std::to_string(i) + "_" + std::to_string(j), + asp.aie_sys_read(i,(j + aie_meta.mem_row)))); + } boost::property_tree::ptree pt; pt.add_child("aie_mem",ptarray); diff --git a/src/runtime_src/core/edge/user/shim.cpp b/src/runtime_src/core/edge/user/shim.cpp index 349759479b1..ceda5ffd43e 100644 --- a/src/runtime_src/core/edge/user/shim.cpp +++ b/src/runtime_src/core/edge/user/shim.cpp @@ -141,7 +141,6 @@ shim:: xdp::aie::finish_flush_device(this); #endif xdp::aie::ctr::end_poll(this); - xdp::aie::sts::end_poll(this); // The BO cache unmaps and releases all execbo, but this must // be done before the device (mKernelFD) is closed. diff --git a/src/runtime_src/core/edge/user/zynq_dev.cpp b/src/runtime_src/core/edge/user/zynq_dev.cpp index 6874a72736d..7a0a8e60165 100644 --- a/src/runtime_src/core/edge/user/zynq_dev.cpp +++ b/src/runtime_src/core/edge/user/zynq_dev.cpp @@ -1,5 +1,6 @@ /** * Copyright (C) 2019-2022 Xilinx, Inc + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -21,6 +22,8 @@ #include #include "zynq_dev.h" +#include "plugin/xdp/aie_status.h" + static std::fstream sysfs_open_path(const std::string& path, std::string& err, bool write, bool binary) { @@ -153,6 +156,11 @@ zynq_device::zynq_device(const std::string& root) : sysfs_root(root) { } +zynq_device::~zynq_device() +{ + xdp::aie::sts::end_poll(nullptr); +} + std::string get_render_devname() { diff --git a/src/runtime_src/core/edge/user/zynq_dev.h b/src/runtime_src/core/edge/user/zynq_dev.h index 6fe36c615c6..90d57b11c7e 100644 --- a/src/runtime_src/core/edge/user/zynq_dev.h +++ b/src/runtime_src/core/edge/user/zynq_dev.h @@ -1,5 +1,6 @@ /** * Copyright (C) 2019-2022 Xilinx, Inc + * Copyright (C) 2024 Advanced Micro Devices, Inc. - All rights reserved * * Licensed under the Apache License, Version 2.0 (the "License"). You may * not use this file except in compliance with the License. A copy of the @@ -58,6 +59,7 @@ class zynq_device { zynq_device(const std::string& sysfs_base); zynq_device(const zynq_device& s) = delete; zynq_device& operator=(const zynq_device& s) = delete; + ~zynq_device(); }; std::string get_render_devname(); diff --git a/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp b/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp index 5288c204682..907c88afbd9 100755 --- a/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp +++ b/src/runtime_src/xdp/profile/plugin/aie_status/aie_status_plugin.cpp @@ -447,7 +447,16 @@ namespace xdp { // Last chance at writing status reports for (auto w : writers) w->write(false, handle); - + + // When ending polling for a device, if we are on edge we must instead + // shut down all of the threads and not just a single one in order + // to avoid race conditions between the zynq driver destructor and our own. + // + // Currently, Edge is the only supported type of platform so we can + // safely end all threads here, but this must be revisited if we extend + // AIE status functionality to other types of platforms. + endPoll(); + /* // Ask threads to stop mThreadCtrlMap[handle] = false; @@ -468,6 +477,7 @@ namespace xdp { } mThreadCtrlMap.erase(handle); + */ } void AIEStatusPlugin::endPoll()