diff --git a/include/lo2s/config.hpp b/include/lo2s/config.hpp index 5c2166cb..13ed1127 100644 --- a/include/lo2s/config.hpp +++ b/include/lo2s/config.hpp @@ -95,6 +95,7 @@ struct Config // NEC SX-Aurora Tsubasa bool use_nec; std::chrono::microseconds nec_read_interval; + std::chrono::milliseconds nec_check_interval; }; const Config& config(); diff --git a/include/lo2s/monitor/main_monitor.hpp b/include/lo2s/monitor/main_monitor.hpp index e75949f2..3e58f4c9 100644 --- a/include/lo2s/monitor/main_monitor.hpp +++ b/include/lo2s/monitor/main_monitor.hpp @@ -85,7 +85,7 @@ class MainMonitor std::unique_ptr sensors_recorder_; #endif #ifdef HAVE_VEOSINFO - std::vector> nec_monitors_; + std::vector> nec_monitors_; #endif }; } // namespace monitor diff --git a/include/lo2s/monitor/nec_monitor_main.hpp b/include/lo2s/monitor/nec_monitor_main.hpp index 2df55938..d76d8c95 100644 --- a/include/lo2s/monitor/nec_monitor_main.hpp +++ b/include/lo2s/monitor/nec_monitor_main.hpp @@ -31,7 +31,7 @@ extern "C" { -#include +#include #include } @@ -40,7 +40,7 @@ namespace lo2s { namespace nec { - class NecMonitorMain : public monitor::ThreadedMonitor +class NecMonitorMain : public monitor::ThreadedMonitor { public: NecMonitorMain(trace::Trace& trace, NecDevice device); @@ -57,13 +57,13 @@ namespace nec void finalize_thread() override; private: - std::optional get_device_of(Thread thread); - std::vector get_tasks_of(NecDevice device); + std::optional get_device_of(Thread thread); + std::vector get_tasks_of(NecDevice device); std::map monitors_; trace::Trace& trace_; NecDevice device_; - std::atomic stopped_; + std::atomic stopped_; ve_nodeinfo nodeinfo_; }; -} // namespace monitor +} // namespace nec } // namespace lo2s diff --git a/include/lo2s/monitor/nec_thread_monitor.hpp b/include/lo2s/monitor/nec_thread_monitor.hpp index c21aafe7..f377d218 100644 --- a/include/lo2s/monitor/nec_thread_monitor.hpp +++ b/include/lo2s/monitor/nec_thread_monitor.hpp @@ -31,7 +31,7 @@ namespace lo2s { namespace nec { - class NecThreadMonitor : public monitor::PollMonitor +class NecThreadMonitor : public monitor::PollMonitor { public: NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice device); @@ -47,12 +47,12 @@ namespace nec void monitor(int fd) override; private: - std::chrono::microseconds nec_read_interval_; + std::chrono::microseconds nec_read_interval_; otf2::writer::local& otf2_writer_; Thread nec_thread_; trace::Trace& trace_; NecDevice device_; perf::CallingContextManager cctx_manager_; }; -} // namespace monitor +} // namespace nec } // namespace lo2s diff --git a/include/lo2s/topology.hpp b/include/lo2s/topology.hpp index 1c288ed0..76d73bb0 100644 --- a/include/lo2s/topology.hpp +++ b/include/lo2s/topology.hpp @@ -71,25 +71,25 @@ class Topology return cpus_; } - const std::set nec_devices() const - { - std::set devices; - - const std::regex nec_regex("/sys/class/ve/ve(\\d)"); - - for (auto& dir_entry : std::filesystem::directory_iterator("/sys/class/ve")) + const std::set nec_devices() const { - std::smatch nec_match; + std::set devices; + + const std::regex nec_regex("/sys/class/ve/ve(\\d)"); - auto path = dir_entry.path().string(); - if (std::regex_match(path, nec_match, nec_regex)) + for (auto& dir_entry : std::filesystem::directory_iterator("/sys/class/ve")) { - devices.emplace(NecDevice(std::stoi(nec_match[1]))); + std::smatch nec_match; + + auto path = dir_entry.path().string(); + if (std::regex_match(path, nec_match, nec_regex)) + { + devices.emplace(NecDevice(std::stoi(nec_match[1]))); + } } - } - return devices; - } + return devices; + } Core core_of(Cpu cpu) const { diff --git a/include/lo2s/trace/trace.hpp b/include/lo2s/trace/trace.hpp index 3767ee3d..7a58efd2 100644 --- a/include/lo2s/trace/trace.hpp +++ b/include/lo2s/trace/trace.hpp @@ -228,10 +228,10 @@ class Trace return interrupt_generator_; } - const otf2::definition::interrupt_generator nec_interrupt_generator() const - { - return nec_interrupt_generator_; - } + const otf2::definition::interrupt_generator nec_interrupt_generator() const + { + return nec_interrupt_generator_; + } const otf2::definition::system_tree_node& system_tree_cpu_node(Cpu cpu) const { @@ -260,7 +260,7 @@ class Trace } const otf2::definition::location& location(const ExecutionScope& scope) - { + { MeasurementScope sample_scope = MeasurementScope::sample(scope); const auto& intern_location = registry_.emplace( @@ -335,7 +335,8 @@ class Trace otf2::definition::interrupt_generator& interrupt_generator_; - otf2::definition::detail::weak_ref nec_interrupt_generator_; + otf2::definition::detail::weak_ref + nec_interrupt_generator_; // TODO add location groups (processes), read path from /proc/self/exe symlink std::map thread_names_; diff --git a/include/lo2s/types.hpp b/include/lo2s/types.hpp index c2abe0db..84f0c076 100644 --- a/include/lo2s/types.hpp +++ b/include/lo2s/types.hpp @@ -277,8 +277,9 @@ class NecDevice { return lhs.id_ < rhs.id_; } + private: - int id_; + int id_; }; } // namespace lo2s @@ -348,26 +349,26 @@ struct formatter } }; - template <> - struct formatter - { +template <> +struct formatter +{ constexpr auto parse(format_parse_context& ctx) { - auto it = ctx.begin(), end = ctx.end(); - if(it != end && *it != '}') + auto it = ctx.begin(), end = ctx.end(); + if (it != end && *it != '}') { - throw format_error("invalid format"); + throw format_error("invalid format"); } - return it; + return it; } template auto format(const lo2s::NecDevice& device, FormatContext& ctx) const { - return fmt::format_to(ctx.out(), "VE {}", device.as_int()); + return fmt::format_to(ctx.out(), "VE {}", device.as_int()); } - }; +}; } // namespace fmt namespace std diff --git a/src/config.cpp b/src/config.cpp index b524efb2..4407c7f5 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -334,8 +334,16 @@ void parse_program_options(int argc, const char** argv) io_options.toggle("block-io", "Enable recording of block I/O events (requires access to debugfs)"); - nec_options.toggle("nec", "Enable NEC SX-Aurora Tsubasa sampling"); - nec_options.option("nec-readout-interval", "NEC sampling interval in microseconds").optional().metavar("MSEC").default_value("1"); + nec_options.toggle("nec", "Enable NEC Vector Engine sampling"); + nec_options.option("nec-readout-interval", "NEC sampling interval") + .optional() + .metavar("USEC") + .default_value("1"); + nec_options.option("nec-check-interval", "The interval between checks for new VE processes") + .optional() + .metavar("MSEC") + .default_value("100"); + nitro::options::arguments arguments; try { @@ -618,9 +626,11 @@ void parse_program_options(int argc, const char** argv) std::chrono::milliseconds(arguments.as("userspace-readout-interval")); config.nec_read_interval = - std::chrono::microseconds(arguments.as("nec-readout-interval")); + config.nec_check_interval = + std::chrono::milliseconds(arguments.as("nec-check-interval")); + if (arguments.provided("perf-readout-interval")) { config.perf_read_interval = diff --git a/src/monitor/main_monitor.cpp b/src/monitor/main_monitor.cpp index fbe708c7..e2a168fe 100644 --- a/src/monitor/main_monitor.cpp +++ b/src/monitor/main_monitor.cpp @@ -119,11 +119,11 @@ MainMonitor::MainMonitor() : trace_(), metrics_(trace_) #ifdef HAVE_VEOSINFO - for(auto device : Topology::instance().nec_devices()) - { - nec_monitors_.emplace_back(std::make_unique(trace_, device )); + for (auto device : Topology::instance().nec_devices()) + { + nec_monitors_.emplace_back(std::make_unique(trace_, device)); - nec_monitors_.back()->start(); + nec_monitors_.back()->start(); } #endif } @@ -178,10 +178,10 @@ MainMonitor::~MainMonitor() } #ifdef HAVE_VEOSINFO - for (auto& nec_monitor : nec_monitors_) - { - nec_monitor->stop(); - } + for (auto& nec_monitor : nec_monitors_) + { + nec_monitor->stop(); + } #endif // Notify trace, that we will end recording now. That means, get_time() of this call will be diff --git a/src/monitor/nec_monitor_main.cpp b/src/monitor/nec_monitor_main.cpp index b5933ce2..4e649ad1 100644 --- a/src/monitor/nec_monitor_main.cpp +++ b/src/monitor/nec_monitor_main.cpp @@ -26,16 +26,16 @@ namespace lo2s namespace nec { - std::optional NecMonitorMain::get_device_of(Thread thread) +std::optional NecMonitorMain::get_device_of(Thread thread) { // /sys/class/ve/ve0 is not device 0, because that would make too much sense // So look the device id up here - + for (int i = 0; i < nodeinfo_.total_node_count; i++) { if (!ve_check_pid(nodeinfo_.nodeid[i], thread.as_pid_t())) { - return NecDevice(nodeinfo_.nodeid[i]); + return NecDevice(nodeinfo_.nodeid[i]); } } return std::optional(); @@ -43,7 +43,7 @@ namespace nec std::vector NecMonitorMain::get_tasks_of(NecDevice device) { - std::ifstream task_stream(fmt::format("/sys/class/ve/ve{}/task_id_all", device.as_int())); + std::ifstream task_stream(fmt::format("/sys/class/ve/ve{}/task_id_all", device.as_int())); std::vector threads; @@ -51,33 +51,32 @@ std::vector NecMonitorMain::get_tasks_of(NecDevice device) { pid_t pid; task_stream >> pid; - if(!task_stream) - { + if (!task_stream) + { break; - } + } threads.emplace_back(Thread(pid)); } return threads; } - NecMonitorMain::NecMonitorMain(trace::Trace& trace, NecDevice device) - : ThreadedMonitor(trace, fmt::format("{}", device)), trace_(trace), device_(device), - stopped_(false) - { +NecMonitorMain::NecMonitorMain(trace::Trace& trace, NecDevice device) +: ThreadedMonitor(trace, fmt::format("{}", device)), trace_(trace), device_(device), stopped_(false) +{ auto ret = ve_node_info(&nodeinfo_); if (ret == -1) { Log::error() << "Failed to get Vector Engine node information!"; throw_errno(); } - } +} - void NecMonitorMain::run() - { +void NecMonitorMain::run() +{ while (!stopped_) { - auto threads = get_tasks_of(device_); + auto threads = get_tasks_of(device_); for (auto monitor = monitors_.begin(); monitor != monitors_.end();) { if (std::find(threads.begin(), threads.end(), monitor->first) == threads.end()) @@ -114,9 +113,9 @@ std::vector NecMonitorMain::get_tasks_of(NecDevice device) ret.first->second.start(); } } - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::this_thread::sleep_for(config().nec_check_interval); } - } +} void NecMonitorMain::stop() { @@ -131,5 +130,5 @@ void NecMonitorMain::finalize_thread() monitor.second.stop(); } } -} // namespace monitor +} // namespace nec } // namespace lo2s diff --git a/src/monitor/nec_thread_monitor.cpp b/src/monitor/nec_thread_monitor.cpp index 5fa9e0ed..be6e77d2 100644 --- a/src/monitor/nec_thread_monitor.cpp +++ b/src/monitor/nec_thread_monitor.cpp @@ -34,10 +34,10 @@ namespace lo2s namespace nec { NecThreadMonitor::NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice device) - : PollMonitor(trace, fmt::format("VE{} {}", device, thread.as_pid_t()), std::chrono::duration_cast(config().nec_read_interval)), - nec_read_interval_(config().nec_read_interval), - otf2_writer_(trace.nec_writer(device, thread)), nec_thread_(thread), trace_(trace), - device_(device), cctx_manager_(trace) +: PollMonitor(trace, fmt::format("VE{} {}", device, thread.as_pid_t()), + std::chrono::duration_cast(config().nec_read_interval)), + nec_read_interval_(config().nec_read_interval), otf2_writer_(trace.nec_writer(device, thread)), + nec_thread_(thread), trace_(trace), device_(device), cctx_manager_(trace) { cctx_manager_.thread_enter(nec_thread_.as_process(), thread); otf2_writer_.write_calling_context_enter(lo2s::time::now(), cctx_manager_.current(), 2); @@ -50,11 +50,11 @@ void NecThreadMonitor::monitor([[maybe_unused]] int fd) auto ret = ve_get_regvals(device_.as_int(), nec_thread_.as_pid_t(), 1, reg, &val); - if(ret == -1) - { + if (ret == -1) + { Log::error() << "Failed to the vector engine instruction counter value!"; throw_errno(); - } + } otf2::chrono::time_point tp = lo2s::time::now(); otf2_writer_.write_calling_context_sample(tp, cctx_manager_.sample_ref(val), 2, @@ -70,5 +70,5 @@ void NecThreadMonitor::finalize_thread() cctx_manager_.finalize(&otf2_writer_); } -} // namespace monitor +} // namespace nec } // namespace lo2s diff --git a/src/trace/trace.cpp b/src/trace/trace.cpp index 5075d986..54b43bb1 100644 --- a/src/trace/trace.cpp +++ b/src/trace/trace.cpp @@ -195,12 +195,12 @@ Trace::Trace() } } - if(config().use_nec) - { + if (config().use_nec) + { nec_interrupt_generator_ = registry_.create( intern("NEC sampling timer"), otf2::common::interrupt_generator_mode_type::count, otf2::common::base_type::decimal, 0, config().sampling_period); - } + } } void Trace::begin_record() diff --git a/src/util.cpp b/src/util.cpp index 49e98ef1..cb100e66 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -346,12 +347,10 @@ std::vector get_thread_cmdline(Thread thread) std::string get_nec_thread_comm(Thread thread) { - // For normal processes, /proc/{PID}/comm contains the name of the program running in that - // process. For NEC processes, it instead contains the name of the program loader, ve_exec, So - // instead, we have to extract the name of the actual NEC program we are executing from the - // commandline arguments to ve_exec - std::string thread_name = fmt::format("{}", thread); - + // We can't use /task/{pid}/comm to get the name of a NEC process because that will contain the + // name of the program offloader (ve_exec) instead of the program that is run. Instead, we have + // to parse the command line of the offloader. Thankfully, the kernel always puts '--' before + // the name of the program run. auto args = get_thread_cmdline(thread); for (std::size_t i = 0; i < args.size(); i++) { @@ -359,10 +358,12 @@ std::string get_nec_thread_comm(Thread thread) { if (i + 1 < args.size()) { - thread_name = fmt::format("{} ({})", args[i + 1], thread.as_pid_t()); + return fmt::format("{} ({})", args[i + 1], thread.as_pid_t()); } } } - return thread_name; + + // If no '--' is found, fall back to the complete commandline as a name + return std::accumulate(args.begin(), args.end(), std::string("")); } } // namespace lo2s