diff --git a/CMakeLists.txt b/CMakeLists.txt index 4aabb1fd..eee607ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,7 @@ find_package(Doxygen COMPONENTS dot) find_package(x86_energy 2.0 CONFIG) find_package(StdFilesystem REQUIRED) find_package(Sensors) +find_package(Veosinfo) find_package(PkgConfig) if(PkgConfig_FOUND) @@ -118,7 +119,8 @@ CMAKE_DEPENDENT_OPTION(USE_SENSORS "Use the libsensors to read system metrics." add_feature_info("USE_SENSORS" USE_SENSORS "Use the libsensors to read system metrics.") CMAKE_DEPENDENT_OPTION(USE_LIBAUDIT "Use libaudit for syscall name resolution" ON Audit_FOUND OFF) add_feature_info("USE_LIBAUDIT" USE_LIBAUDIT "Use libaudit for syscall name resolution.") - +CMAKE_DEPENDENT_OPTION(USE_VEOSINFO "Use libveosinfo to sample NEC SX-Aurora Tsubasa cards." ON "Veosinfo_FOUND" OFF) +add_feature_info("USE_VEOSINFO" USE_VEOSINFO "Use libveosinfo to sample NEC SX-Aurora Tsubasa cards.") # system configuration checks CHECK_INCLUDE_FILES(linux/hw_breakpoint.h HAVE_HW_BREAKPOINT_H) CHECK_STRUCT_HAS_MEMBER("struct perf_event_attr" clockid linux/perf_event.h HAVE_PERF_EVENT_ATTR_CLOCKID) @@ -267,6 +269,17 @@ if (USE_SENSORS) endif() endif() +if (USE_VEOSINFO) + if(Veosinfo_FOUND) + target_compile_definitions(lo2s PUBLIC HAVE_VEOSINFO) + target_link_libraries(lo2s PRIVATE Veosinfo::veosinfo) + target_sources(lo2s PRIVATE src/monitor/nec_thread_monitor.cpp + src/monitor/nec_monitor_main.cpp) + else() + message(SEND_ERROR "Veosinfo not found but requested.") + endif() +endif() + if (USE_LIBAUDIT) if (Audit_FOUND) target_compile_definitions(lo2s PUBLIC HAVE_LIBAUDIT) diff --git a/cmake/FindVeosinfo.cmake b/cmake/FindVeosinfo.cmake new file mode 100644 index 00000000..80d3b125 --- /dev/null +++ b/cmake/FindVeosinfo.cmake @@ -0,0 +1,18 @@ + +find_path(Veosinfo_INCLUDE_DIRS veosinfo/veosinfo.h PATHS ENV C_INCLUDE_PATH ENV CPATH PATH_SUFFIXES include) + +find_library(Veosinfo_LIBRARIES veosinfo HINT ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) + + +include (FindPackageHandleStandardArgs) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Veosinfo DEFAULT_MSG Veosinfo_LIBRARIES Veosinfo_INCLUDE_DIRS) + +if(Veosinfo_FOUND) + add_library(libveosinfo INTERFACE) + target_link_libraries(libveosinfo INTERFACE ${Veosinfo_LIBRARIES}) + target_include_directories(libveosinfo SYSTEM INTERFACE ${Veosinfo_INCLUDE_DIRS}) + add_library(Veosinfo::veosinfo ALIAS libveosinfo) +endif() + +mark_as_advanced(Veosinfo_LIBRARIES Veosinfo_INCLUDE_DIRS) diff --git a/include/lo2s/config.hpp b/include/lo2s/config.hpp index c68e584f..13ed1127 100644 --- a/include/lo2s/config.hpp +++ b/include/lo2s/config.hpp @@ -92,6 +92,10 @@ struct Config // syscalls bool use_syscalls = false; std::vector syscall_filter; + // NEC SX-Aurora Tsubasa + bool use_nec; + std::chrono::microseconds nec_read_interval; + std::chrono::milliseconds nec_check_interval; }; const Config& config(); diff --git a/include/lo2s/monitor/main_monitor.hpp b/include/lo2s/monitor/main_monitor.hpp index 97a44171..3e58f4c9 100644 --- a/include/lo2s/monitor/main_monitor.hpp +++ b/include/lo2s/monitor/main_monitor.hpp @@ -33,8 +33,10 @@ #endif #include #include +#ifdef HAVE_VEOSINFO +#include +#endif #include -#include #include #include #include @@ -82,6 +84,9 @@ class MainMonitor #ifdef HAVE_SENSORS std::unique_ptr sensors_recorder_; #endif +#ifdef HAVE_VEOSINFO + std::vector> nec_monitors_; +#endif }; } // namespace monitor } // namespace lo2s diff --git a/include/lo2s/monitor/nec_monitor_main.hpp b/include/lo2s/monitor/nec_monitor_main.hpp new file mode 100644 index 00000000..d76d8c95 --- /dev/null +++ b/include/lo2s/monitor/nec_monitor_main.hpp @@ -0,0 +1,69 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include + +extern "C" +{ +#include + +#include +} + +namespace lo2s +{ +namespace nec +{ +class NecMonitorMain : public monitor::ThreadedMonitor +{ +public: + NecMonitorMain(trace::Trace& trace, NecDevice device); + + void stop() override; + +protected: + std::string group() const override + { + return "nec::MonitorMain"; + } + + void run() override; + void finalize_thread() override; + +private: + std::optional get_device_of(Thread thread); + std::vector get_tasks_of(NecDevice device); + std::map monitors_; + trace::Trace& trace_; + NecDevice device_; + std::atomic stopped_; + ve_nodeinfo nodeinfo_; +}; +} // namespace nec +} // namespace lo2s diff --git a/include/lo2s/monitor/nec_thread_monitor.hpp b/include/lo2s/monitor/nec_thread_monitor.hpp new file mode 100644 index 00000000..f377d218 --- /dev/null +++ b/include/lo2s/monitor/nec_thread_monitor.hpp @@ -0,0 +1,58 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2016, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include + +#include +#include +#include + +namespace lo2s +{ +namespace nec +{ +class NecThreadMonitor : public monitor::PollMonitor +{ +public: + NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice device); + +protected: + std::string group() const override + { + return "nec::ThreadMonitor"; + } + + void finalize_thread() override; + + void monitor(int fd) override; + +private: + std::chrono::microseconds nec_read_interval_; + otf2::writer::local& otf2_writer_; + Thread nec_thread_; + trace::Trace& trace_; + NecDevice device_; + perf::CallingContextManager cctx_manager_; +}; +} // namespace nec +} // namespace lo2s diff --git a/include/lo2s/monitor/poll_monitor.hpp b/include/lo2s/monitor/poll_monitor.hpp index a6f0e18b..301d3e67 100644 --- a/include/lo2s/monitor/poll_monitor.hpp +++ b/include/lo2s/monitor/poll_monitor.hpp @@ -51,7 +51,7 @@ class PollMonitor : public ThreadedMonitor protected: void run() override; - void monitor() override; + void monitor(); void add_fd(int fd); diff --git a/include/lo2s/monitor/threaded_monitor.hpp b/include/lo2s/monitor/threaded_monitor.hpp index 4bffe923..a5b5a719 100644 --- a/include/lo2s/monitor/threaded_monitor.hpp +++ b/include/lo2s/monitor/threaded_monitor.hpp @@ -57,7 +57,6 @@ class ThreadedMonitor virtual void run() = 0; void thread_main(); - virtual void monitor() = 0; void register_thread(); diff --git a/include/lo2s/topology.hpp b/include/lo2s/topology.hpp index d13a6ffe..76d73bb0 100644 --- a/include/lo2s/topology.hpp +++ b/include/lo2s/topology.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,26 @@ class Topology return cpus_; } + const std::set nec_devices() const + { + std::set devices; + + const std::regex nec_regex("/sys/class/ve/ve(\\d)"); + + for (auto& dir_entry : std::filesystem::directory_iterator("/sys/class/ve")) + { + std::smatch nec_match; + + auto path = dir_entry.path().string(); + if (std::regex_match(path, nec_match, nec_regex)) + { + devices.emplace(NecDevice(std::stoi(nec_match[1]))); + } + } + + return devices; + } + Core core_of(Cpu cpu) const { return cpu_to_core_.at(cpu); diff --git a/include/lo2s/trace/reg_keys.hpp b/include/lo2s/trace/reg_keys.hpp index 63b01cd2..8d4469d4 100644 --- a/include/lo2s/trace/reg_keys.hpp +++ b/include/lo2s/trace/reg_keys.hpp @@ -74,6 +74,11 @@ struct ByThreadTag }; using ByThread = SimpleKeyType; +struct ByNecThreadTag +{ +}; +using ByNecThread = SimpleKeyType; + struct ByProcessTag { }; @@ -124,6 +129,12 @@ struct ByCounterCollectionTag using ByCounterCollection = SimpleKeyType; +struct ByNecDeviceTag +{ +}; + +using ByNecDevice = SimpleKeyType; + template struct Holder { @@ -132,8 +143,8 @@ struct Holder template <> struct Holder { - using type = otf2::lookup_definition_holder; + using type = otf2::lookup_definition_holder; }; template <> struct Holder @@ -174,13 +185,13 @@ struct Holder { using type = otf2::lookup_definition_holder; + ByExecutionScope, ByNecThread, ByBlockDevice>; }; template <> struct Holder { using type = otf2::lookup_definition_holder; + ByMeasurementScope, ByNecThread, ByBlockDevice>; }; template <> struct Holder diff --git a/include/lo2s/trace/trace.hpp b/include/lo2s/trace/trace.hpp index 79f536de..0005fc45 100644 --- a/include/lo2s/trace/trace.hpp +++ b/include/lo2s/trace/trace.hpp @@ -134,6 +134,7 @@ class Trace otf2::writer::local& syscall_writer(const Cpu& cpu); otf2::writer::local& bio_writer(BlockDevice dev); otf2::writer::local& create_metric_writer(const std::string& name); + otf2::writer::local& nec_writer(NecDevice device, const Thread& nec_thread); otf2::definition::io_handle& block_io_handle(BlockDevice dev); @@ -225,6 +226,12 @@ class Trace { return interrupt_generator_; } + + const otf2::definition::interrupt_generator nec_interrupt_generator() const + { + return nec_interrupt_generator_; + } + const otf2::definition::system_tree_node& system_tree_cpu_node(Cpu cpu) const { return registry_.get(ByCpu(cpu)); @@ -327,6 +334,8 @@ class Trace otf2::definition::interrupt_generator& interrupt_generator_; + otf2::definition::detail::weak_ref + nec_interrupt_generator_; // TODO add location groups (processes), read path from /proc/self/exe symlink std::map thread_names_; diff --git a/include/lo2s/types.hpp b/include/lo2s/types.hpp index bc272b09..84f0c076 100644 --- a/include/lo2s/types.hpp +++ b/include/lo2s/types.hpp @@ -256,6 +256,32 @@ class Package private: int id_; }; + +class NecDevice +{ +public: + explicit NecDevice(int id) : id_(id) + { + } + + int as_int() const + { + return id_; + } + + friend bool operator==(const NecDevice& lhs, const NecDevice& rhs) + { + return lhs.id_ == rhs.id_; + } + friend bool operator<(const NecDevice& lhs, const NecDevice& rhs) + { + return lhs.id_ < rhs.id_; + } + +private: + int id_; +}; + } // namespace lo2s namespace fmt @@ -322,6 +348,27 @@ struct formatter return fmt::format_to(ctx.out(), "cpu {}", cpu.as_int()); } }; + +template <> +struct formatter +{ + constexpr auto parse(format_parse_context& ctx) + { + auto it = ctx.begin(), end = ctx.end(); + if (it != end && *it != '}') + { + throw format_error("invalid format"); + } + + return it; + } + + template + auto format(const lo2s::NecDevice& device, FormatContext& ctx) const + { + return fmt::format_to(ctx.out(), "VE {}", device.as_int()); + } +}; } // namespace fmt namespace std diff --git a/include/lo2s/util.hpp b/include/lo2s/util.hpp index 0df34aa0..47f8db19 100644 --- a/include/lo2s/util.hpp +++ b/include/lo2s/util.hpp @@ -83,8 +83,11 @@ class StringCache std::size_t get_page_size(); std::string get_process_exe(Process process); std::string get_process_comm(Process process); +std::vector get_thread_cmdline(Thread thread); std::string get_task_comm(Process process, Thread thread); +std::string get_nec_thread_comm(Thread thread); + std::chrono::duration get_cpu_time(); std::string get_datetime(); diff --git a/src/config.cpp b/src/config.cpp index 71b577eb..4407c7f5 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -160,6 +160,7 @@ void parse_program_options(int argc, const char** argv) auto& x86_energy_options = parser.group("x86_energy options"); auto& sensors_options = parser.group("sensors options"); auto& io_options = parser.group("I/O recording options"); + auto& nec_options = parser.group("NEC SX-Aurora Tsubasa recording options"); lo2s::Config config; @@ -333,6 +334,16 @@ void parse_program_options(int argc, const char** argv) io_options.toggle("block-io", "Enable recording of block I/O events (requires access to debugfs)"); + nec_options.toggle("nec", "Enable NEC Vector Engine sampling"); + nec_options.option("nec-readout-interval", "NEC sampling interval") + .optional() + .metavar("USEC") + .default_value("1"); + nec_options.option("nec-check-interval", "The interval between checks for new VE processes") + .optional() + .metavar("MSEC") + .default_value("100"); + nitro::options::arguments arguments; try { @@ -358,6 +369,7 @@ void parse_program_options(int argc, const char** argv) config.use_x86_energy = arguments.given("x86-energy"); config.use_sensors = arguments.given("sensors"); config.use_block_io = arguments.given("block-io"); + config.use_nec = arguments.given("nec"); config.command = arguments.positionals(); if (arguments.given("help")) @@ -613,6 +625,12 @@ void parse_program_options(int argc, const char** argv) config.userspace_read_interval = std::chrono::milliseconds(arguments.as("userspace-readout-interval")); + config.nec_read_interval = + std::chrono::microseconds(arguments.as("nec-readout-interval")); + + config.nec_check_interval = + std::chrono::milliseconds(arguments.as("nec-check-interval")); + if (arguments.provided("perf-readout-interval")) { config.perf_read_interval = diff --git a/src/monitor/main_monitor.cpp b/src/monitor/main_monitor.cpp index afebe766..e2a168fe 100644 --- a/src/monitor/main_monitor.cpp +++ b/src/monitor/main_monitor.cpp @@ -116,6 +116,16 @@ MainMonitor::MainMonitor() : trace_(), metrics_(trace_) } } #endif + +#ifdef HAVE_VEOSINFO + + for (auto device : Topology::instance().nec_devices()) + { + nec_monitors_.emplace_back(std::make_unique(trace_, device)); + + nec_monitors_.back()->start(); + } +#endif } void MainMonitor::insert_cached_mmap_events(const RawMemoryMapCache& cached_events) @@ -167,6 +177,13 @@ MainMonitor::~MainMonitor() } } +#ifdef HAVE_VEOSINFO + for (auto& nec_monitor : nec_monitors_) + { + nec_monitor->stop(); + } +#endif + // Notify trace, that we will end recording now. That means, get_time() of this call will be // the last possible timestamp in the trace trace_.end_record(); diff --git a/src/monitor/nec_monitor_main.cpp b/src/monitor/nec_monitor_main.cpp new file mode 100644 index 00000000..4e649ad1 --- /dev/null +++ b/src/monitor/nec_monitor_main.cpp @@ -0,0 +1,134 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#include + +namespace lo2s +{ +namespace nec +{ + +std::optional NecMonitorMain::get_device_of(Thread thread) +{ + // /sys/class/ve/ve0 is not device 0, because that would make too much sense + // So look the device id up here + + for (int i = 0; i < nodeinfo_.total_node_count; i++) + { + if (!ve_check_pid(nodeinfo_.nodeid[i], thread.as_pid_t())) + { + return NecDevice(nodeinfo_.nodeid[i]); + } + } + return std::optional(); +} + +std::vector NecMonitorMain::get_tasks_of(NecDevice device) +{ + std::ifstream task_stream(fmt::format("/sys/class/ve/ve{}/task_id_all", device.as_int())); + + std::vector threads; + + while (true) + { + pid_t pid; + task_stream >> pid; + if (!task_stream) + { + break; + } + threads.emplace_back(Thread(pid)); + } + + return threads; +} + +NecMonitorMain::NecMonitorMain(trace::Trace& trace, NecDevice device) +: ThreadedMonitor(trace, fmt::format("{}", device)), trace_(trace), device_(device), stopped_(false) +{ + auto ret = ve_node_info(&nodeinfo_); + if (ret == -1) + { + Log::error() << "Failed to get Vector Engine node information!"; + throw_errno(); + } +} + +void NecMonitorMain::run() +{ + while (!stopped_) + { + auto threads = get_tasks_of(device_); + for (auto monitor = monitors_.begin(); monitor != monitors_.end();) + { + if (std::find(threads.begin(), threads.end(), monitor->first) == threads.end()) + { + monitor->second.stop(); + monitor = monitors_.erase(monitor); + } + else + { + monitor++; + } + } + + for (auto& thread : threads) + { + if (monitors_.count(thread)) + { + continue; + } + + auto real_device_id = get_device_of(thread); + + if (!real_device_id) + { + Log::warn() << "Could not find real vector accelerator id for " + << thread.as_pid_t(); + continue; + } + + auto ret = monitors_.emplace(std::piecewise_construct, std::forward_as_tuple(thread), + std::forward_as_tuple(thread, trace_, *real_device_id)); + if (ret.second) + { + ret.first->second.start(); + } + } + std::this_thread::sleep_for(config().nec_check_interval); + } +} + +void NecMonitorMain::stop() +{ + stopped_ = true; + thread_.join(); +} + +void NecMonitorMain::finalize_thread() +{ + for (auto& monitor : monitors_) + { + monitor.second.stop(); + } +} +} // namespace nec +} // namespace lo2s diff --git a/src/monitor/nec_thread_monitor.cpp b/src/monitor/nec_thread_monitor.cpp new file mode 100644 index 00000000..be6e77d2 --- /dev/null +++ b/src/monitor/nec_thread_monitor.cpp @@ -0,0 +1,74 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2016, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#include +#include + +extern "C" +{ +#include +} + +#include + +namespace lo2s +{ +namespace nec +{ +NecThreadMonitor::NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice device) +: PollMonitor(trace, fmt::format("VE{} {}", device, thread.as_pid_t()), + std::chrono::duration_cast(config().nec_read_interval)), + nec_read_interval_(config().nec_read_interval), otf2_writer_(trace.nec_writer(device, thread)), + nec_thread_(thread), trace_(trace), device_(device), cctx_manager_(trace) +{ + cctx_manager_.thread_enter(nec_thread_.as_process(), thread); + otf2_writer_.write_calling_context_enter(lo2s::time::now(), cctx_manager_.current(), 2); +} + +void NecThreadMonitor::monitor([[maybe_unused]] int fd) +{ + static int reg[] = { VE_USR_IC }; + uint64_t val; + + auto ret = ve_get_regvals(device_.as_int(), nec_thread_.as_pid_t(), 1, reg, &val); + + if (ret == -1) + { + Log::error() << "Failed to the vector engine instruction counter value!"; + throw_errno(); + } + + otf2::chrono::time_point tp = lo2s::time::now(); + otf2_writer_.write_calling_context_sample(tp, cctx_manager_.sample_ref(val), 2, + trace_.nec_interrupt_generator().ref()); +} + +void NecThreadMonitor::finalize_thread() +{ + if (!cctx_manager_.current().is_undefined()) + { + otf2_writer_.write_calling_context_leave(lo2s::time::now(), cctx_manager_.current()); + } + + cctx_manager_.finalize(&otf2_writer_); +} +} // namespace nec +} // namespace lo2s diff --git a/src/trace/trace.cpp b/src/trace/trace.cpp index be762cf5..00e6a0d7 100644 --- a/src/trace/trace.cpp +++ b/src/trace/trace.cpp @@ -194,6 +194,13 @@ Trace::Trace() } } } + + if (config().use_nec) + { + nec_interrupt_generator_ = registry_.create( + intern("NEC sampling timer"), otf2::common::interrupt_generator_mode_type::count, + otf2::common::base_type::decimal, 0, config().sampling_period); + } } void Trace::begin_record() @@ -404,6 +411,25 @@ otf2::writer::local& Trace::sample_writer(const ExecutionScope& writer_scope) return archive_(location(writer_scope)); } +otf2::writer::local& Trace::nec_writer(NecDevice device, const Thread& nec_thread) +{ + + auto& intern_name = intern(fmt::format("{} {}", device, get_nec_thread_comm(nec_thread))); + + const auto& node = registry_.emplace( + ByNecDevice(device), intern(fmt::format("{}", device)), intern("NEC vector accelerator"), + system_tree_root_node_); + + const auto& nec_location_group = registry_.emplace( + ByNecThread(nec_thread), intern_name, otf2::common::location_group_type::process, node); + + const auto& intern_location = registry_.emplace( + ByNecThread(nec_thread), intern_name, nec_location_group, + otf2::definition::location::location_type::cpu_thread); + + return archive_(intern_location); +} + otf2::writer::local& Trace::syscall_writer(const Cpu& cpu) { MeasurementScope scope = MeasurementScope::syscall(cpu.as_scope()); diff --git a/src/util.cpp b/src/util.cpp index ea41098b..cb100e66 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -327,4 +328,42 @@ std::set parse_list_from_file(std::filesystem::path file) return std::set(); } + +std::vector get_thread_cmdline(Thread thread) +{ + std::ifstream cmdline(fmt::format("/proc/{}/cmdline", thread.as_pid_t())); + std::string cmdline_str; + cmdline >> cmdline_str; + + const char* cmdline_c_str = cmdline_str.c_str(); + std::vector args; + while (cmdline_c_str < cmdline_str.c_str() + cmdline_str.length()) + { + args.emplace_back(std::string(cmdline_c_str)); + cmdline_c_str += args.back().length() + 1; + } + return args; +} + +std::string get_nec_thread_comm(Thread thread) +{ + // We can't use /task/{pid}/comm to get the name of a NEC process because that will contain the + // name of the program offloader (ve_exec) instead of the program that is run. Instead, we have + // to parse the command line of the offloader. Thankfully, the kernel always puts '--' before + // the name of the program run. + auto args = get_thread_cmdline(thread); + for (std::size_t i = 0; i < args.size(); i++) + { + if (args[i] == "--") + { + if (i + 1 < args.size()) + { + return fmt::format("{} ({})", args[i + 1], thread.as_pid_t()); + } + } + } + + // If no '--' is found, fall back to the complete commandline as a name + return std::accumulate(args.begin(), args.end(), std::string("")); +} } // namespace lo2s