Skip to content

Commit

Permalink
feat(system-mode): Add the ability to have system-mode without proces…
Browse files Browse the repository at this point in the history
…s tracking

This should reduce the overhead in situations where you really only want
to track metrics in system-wide mode.
  • Loading branch information
cvonelm committed Aug 19, 2024
1 parent 954e21b commit 0ed485b
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 29 deletions.
1 change: 1 addition & 0 deletions include/lo2s/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ struct Config
// perf
std::size_t mmap_pages;
bool exclude_kernel;
bool process_recording;
// Instruction sampling
bool sampling;
std::uint64_t sampling_period;
Expand Down
2 changes: 1 addition & 1 deletion include/lo2s/perf/counter/metric_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class MetricWriter
MetricWriter(MeasurementScope scope, trace::Trace& trace)
: time_converter_(time::Converter::instance()), writer_(trace.metric_writer(scope)),
metric_instance_(trace.metric_instance(trace.perf_metric_class(scope), writer_.location(),
trace.location(scope.scope))),
trace.sample_writer(scope.scope).location())),
metric_event_(otf2::chrono::genesis(), metric_instance_)
{
}
Expand Down
22 changes: 0 additions & 22 deletions include/lo2s/trace/trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,28 +306,6 @@ class Trace
return registry_.get<otf2::definition::comm>(ByProcess(groups_.get_process(thread)));
}

const otf2::definition::location& location(const ExecutionScope& scope)
{
MeasurementScope sample_scope = MeasurementScope::sample(scope);

const auto& intern_location = registry_.emplace<otf2::definition::location>(
ByMeasurementScope(sample_scope), intern(sample_scope.name()),
registry_.get<otf2::definition::location_group>(
ByExecutionScope(groups_.get_parent(scope))),
otf2::definition::location::location_type::cpu_thread);

comm_locations_group_.add_member(intern_location);

if (groups_.get_parent(scope).is_process())
{
registry_
.get<otf2::definition::comm_group>(
ByProcess(groups_.get_process(scope.as_thread())))
.add_member(intern_location);
}
return intern_location;
}

private:
/** Add a thread with the required lock (#mutex_) held.
*
Expand Down
5 changes: 5 additions & 0 deletions man/lo2s.1.pod
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ way to support the large PEBS feature of newer (Skylake+) Intel processors

If set, only perf events for processes in the I<NAME> cgroup are recorded.

=item B<-->[B<no->]B<process-recording>

If set, process scheduling information is recorded. This is enabled by default in
system-monitoring mode.

=item B<--list-clockids>

List the names of clocks that can be used as I<CLOCKID> argument.
Expand Down
8 changes: 8 additions & 0 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,12 @@ void parse_program_options(int argc, const char** argv)

general_options.toggle("list-knobs", "List available x86_adapt CPU knobs.");

system_mode_options
.toggle("process-recording", "Record process activity. In system monitoring: "
"(default: enabled)")
.allow_reverse()
.default_value(true);

system_mode_options
.toggle("all-cpus", "Start in system-monitoring mode for all CPUs. "
"Monitor as long as COMMAND is running or until PID exits.")
Expand Down Expand Up @@ -535,6 +541,7 @@ void parse_program_options(int argc, const char** argv)
{
config.monitor_type = lo2s::MonitorType::CPU_SET;
config.sampling = false;
config.process_recording = arguments.given("process-recording");

// The check for instruction sampling is a bit more complicated, because the default value
// is different depending on the monitoring mode. This check here is only relevant for
Expand Down Expand Up @@ -585,6 +592,7 @@ void parse_program_options(int argc, const char** argv)
}
config.monitor_type = lo2s::MonitorType::PROCESS;
config.sampling = true;
config.process_recording = false;

if (!arguments.given("instruction-sampling"))
{
Expand Down
11 changes: 8 additions & 3 deletions src/monitor/cpu_set_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ CpuSetMonitor::CpuSetMonitor() : MainMonitor()
}
}

trace_.add_threads(get_comms_for_running_threads());
if (config().sampling || config().process_recording)
{
trace_.add_threads(get_comms_for_running_threads());
}

try
{
Expand Down Expand Up @@ -139,8 +142,10 @@ void CpuSetMonitor::run()
}
}

trace_.add_threads(get_comms_for_running_threads());

if (config().sampling || config().process_recording)
{
trace_.add_threads(get_comms_for_running_threads());
}
for (auto& monitor_elem : monitors_)
{
monitor_elem.second.stop();
Expand Down
2 changes: 1 addition & 1 deletion src/monitor/scope_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace monitor
ScopeMonitor::ScopeMonitor(ExecutionScope scope, MainMonitor& parent, bool enable_on_exec)
: PollMonitor(parent.trace(), scope.name(), config().perf_read_interval), scope_(scope)
{
if (config().sampling || scope.is_cpu())
if (config().sampling || config().process_recording)
{
sample_writer_ =
std::make_unique<perf::sample::Writer>(scope, parent, parent.trace(), enable_on_exec);
Expand Down
21 changes: 19 additions & 2 deletions src/trace/trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,29 @@ void Trace::add_lo2s_property(const std::string& name, const std::string& value)
system_tree_root_node_, intern(property_name), otf2::attribute_value{ intern(value) });
}

otf2::writer::local& Trace::sample_writer(const ExecutionScope& writer_scope)
otf2::writer::local& Trace::sample_writer(const ExecutionScope& scope)
{
// TODO we call this function in a hot-loop, locking doesn't sound like a good idea
std::lock_guard<std::recursive_mutex> guard(mutex_);

return archive_(location(writer_scope));
MeasurementScope sample_scope = MeasurementScope::sample(scope);

const auto& intern_location = registry_.emplace<otf2::definition::location>(
ByMeasurementScope(sample_scope), intern(sample_scope.name()),
registry_.get<otf2::definition::location_group>(
ByExecutionScope(groups_.get_parent(scope))),
otf2::definition::location::location_type::cpu_thread);

comm_locations_group_.add_member(intern_location);

if (groups_.get_parent(scope).is_process())
{
registry_
.get<otf2::definition::comm_group>(ByProcess(groups_.get_process(scope.as_thread())))
.add_member(intern_location);
}

return archive_(intern_location);
}

otf2::writer::local& Trace::nec_writer(NecDevice device, const Thread& nec_thread)
Expand Down

0 comments on commit 0ed485b

Please sign in to comment.