From 0ed485b7541a51ffc680dd795e6310928fb24770 Mon Sep 17 00:00:00 2001 From: Christian von Elm Date: Fri, 5 Jan 2024 14:23:20 +0100 Subject: [PATCH] feat(system-mode): Add the ability to have system-mode without process tracking This should reduce the overhead in situations where you really only want to track metrics in system-wide mode. --- include/lo2s/config.hpp | 1 + include/lo2s/perf/counter/metric_writer.hpp | 2 +- include/lo2s/trace/trace.hpp | 22 --------------------- man/lo2s.1.pod | 5 +++++ src/config.cpp | 8 ++++++++ src/monitor/cpu_set_monitor.cpp | 11 ++++++++--- src/monitor/scope_monitor.cpp | 2 +- src/trace/trace.cpp | 21 ++++++++++++++++++-- 8 files changed, 43 insertions(+), 29 deletions(-) diff --git a/include/lo2s/config.hpp b/include/lo2s/config.hpp index 76427bf8..48fc67a8 100644 --- a/include/lo2s/config.hpp +++ b/include/lo2s/config.hpp @@ -66,6 +66,7 @@ struct Config // perf std::size_t mmap_pages; bool exclude_kernel; + bool process_recording; // Instruction sampling bool sampling; std::uint64_t sampling_period; diff --git a/include/lo2s/perf/counter/metric_writer.hpp b/include/lo2s/perf/counter/metric_writer.hpp index da72fa93..23220fd6 100644 --- a/include/lo2s/perf/counter/metric_writer.hpp +++ b/include/lo2s/perf/counter/metric_writer.hpp @@ -38,7 +38,7 @@ class MetricWriter MetricWriter(MeasurementScope scope, trace::Trace& trace) : time_converter_(time::Converter::instance()), writer_(trace.metric_writer(scope)), metric_instance_(trace.metric_instance(trace.perf_metric_class(scope), writer_.location(), - trace.location(scope.scope))), + trace.sample_writer(scope.scope).location())), metric_event_(otf2::chrono::genesis(), metric_instance_) { } diff --git a/include/lo2s/trace/trace.hpp b/include/lo2s/trace/trace.hpp index ad8e71ce..606b174f 100644 --- a/include/lo2s/trace/trace.hpp +++ b/include/lo2s/trace/trace.hpp @@ -306,28 +306,6 @@ class Trace return registry_.get(ByProcess(groups_.get_process(thread))); } - const otf2::definition::location& location(const ExecutionScope& scope) - { - MeasurementScope sample_scope = MeasurementScope::sample(scope); - - const auto& intern_location = registry_.emplace( - ByMeasurementScope(sample_scope), intern(sample_scope.name()), - registry_.get( - ByExecutionScope(groups_.get_parent(scope))), - otf2::definition::location::location_type::cpu_thread); - - comm_locations_group_.add_member(intern_location); - - if (groups_.get_parent(scope).is_process()) - { - registry_ - .get( - ByProcess(groups_.get_process(scope.as_thread()))) - .add_member(intern_location); - } - return intern_location; - } - private: /** Add a thread with the required lock (#mutex_) held. * diff --git a/man/lo2s.1.pod b/man/lo2s.1.pod index 7b00d2b8..b29e3178 100644 --- a/man/lo2s.1.pod +++ b/man/lo2s.1.pod @@ -198,6 +198,11 @@ way to support the large PEBS feature of newer (Skylake+) Intel processors If set, only perf events for processes in the I cgroup are recorded. +=item B<-->[B]B + +If set, process scheduling information is recorded. This is enabled by default in +system-monitoring mode. + =item B<--list-clockids> List the names of clocks that can be used as I argument. diff --git a/src/config.cpp b/src/config.cpp index d4399d6d..4dbb6838 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -228,6 +228,12 @@ void parse_program_options(int argc, const char** argv) general_options.toggle("list-knobs", "List available x86_adapt CPU knobs."); + system_mode_options + .toggle("process-recording", "Record process activity. In system monitoring: " + "(default: enabled)") + .allow_reverse() + .default_value(true); + system_mode_options .toggle("all-cpus", "Start in system-monitoring mode for all CPUs. " "Monitor as long as COMMAND is running or until PID exits.") @@ -535,6 +541,7 @@ void parse_program_options(int argc, const char** argv) { config.monitor_type = lo2s::MonitorType::CPU_SET; config.sampling = false; + config.process_recording = arguments.given("process-recording"); // The check for instruction sampling is a bit more complicated, because the default value // is different depending on the monitoring mode. This check here is only relevant for @@ -585,6 +592,7 @@ void parse_program_options(int argc, const char** argv) } config.monitor_type = lo2s::MonitorType::PROCESS; config.sampling = true; + config.process_recording = false; if (!arguments.given("instruction-sampling")) { diff --git a/src/monitor/cpu_set_monitor.cpp b/src/monitor/cpu_set_monitor.cpp index 44fb8586..c5fddd85 100644 --- a/src/monitor/cpu_set_monitor.cpp +++ b/src/monitor/cpu_set_monitor.cpp @@ -64,7 +64,10 @@ CpuSetMonitor::CpuSetMonitor() : MainMonitor() } } - trace_.add_threads(get_comms_for_running_threads()); + if (config().sampling || config().process_recording) + { + trace_.add_threads(get_comms_for_running_threads()); + } try { @@ -139,8 +142,10 @@ void CpuSetMonitor::run() } } - trace_.add_threads(get_comms_for_running_threads()); - + if (config().sampling || config().process_recording) + { + trace_.add_threads(get_comms_for_running_threads()); + } for (auto& monitor_elem : monitors_) { monitor_elem.second.stop(); diff --git a/src/monitor/scope_monitor.cpp b/src/monitor/scope_monitor.cpp index c97c3d1b..67e54978 100644 --- a/src/monitor/scope_monitor.cpp +++ b/src/monitor/scope_monitor.cpp @@ -45,7 +45,7 @@ namespace monitor ScopeMonitor::ScopeMonitor(ExecutionScope scope, MainMonitor& parent, bool enable_on_exec) : PollMonitor(parent.trace(), scope.name(), config().perf_read_interval), scope_(scope) { - if (config().sampling || scope.is_cpu()) + if (config().sampling || config().process_recording) { sample_writer_ = std::make_unique(scope, parent, parent.trace(), enable_on_exec); diff --git a/src/trace/trace.cpp b/src/trace/trace.cpp index 46df1980..20614939 100644 --- a/src/trace/trace.cpp +++ b/src/trace/trace.cpp @@ -403,12 +403,29 @@ void Trace::add_lo2s_property(const std::string& name, const std::string& value) system_tree_root_node_, intern(property_name), otf2::attribute_value{ intern(value) }); } -otf2::writer::local& Trace::sample_writer(const ExecutionScope& writer_scope) +otf2::writer::local& Trace::sample_writer(const ExecutionScope& scope) { // TODO we call this function in a hot-loop, locking doesn't sound like a good idea std::lock_guard guard(mutex_); - return archive_(location(writer_scope)); + MeasurementScope sample_scope = MeasurementScope::sample(scope); + + const auto& intern_location = registry_.emplace( + ByMeasurementScope(sample_scope), intern(sample_scope.name()), + registry_.get( + ByExecutionScope(groups_.get_parent(scope))), + otf2::definition::location::location_type::cpu_thread); + + comm_locations_group_.add_member(intern_location); + + if (groups_.get_parent(scope).is_process()) + { + registry_ + .get(ByProcess(groups_.get_process(scope.as_thread()))) + .add_member(intern_location); + } + + return archive_(intern_location); } otf2::writer::local& Trace::nec_writer(NecDevice device, const Thread& nec_thread)