From 9878889139ad5c749fbbc7199cf531d5c87e3741 Mon Sep 17 00:00:00 2001 From: Christian von Elm Date: Sun, 8 Dec 2024 16:00:21 +0100 Subject: [PATCH] Revert "completely remove config() dependency, use event_provider as Event factory" This reverts commit d73cc731254e56d2c4a3220f7b7eb4d6600c12c5. --- include/lo2s/measurement_scope.hpp | 8 - include/lo2s/perf/bio/writer.hpp | 11 +- .../lo2s/perf/counter/counter_collection.hpp | 4 +- .../lo2s/perf/counter/counter_provider.hpp | 10 +- include/lo2s/perf/counter/group/reader.hpp | 4 +- .../lo2s/perf/counter/userspace/reader.hpp | 4 +- .../userspace/userspace_counter_buffer.hpp | 4 + include/lo2s/perf/event.hpp | 105 ++-- include/lo2s/perf/event_provider.hpp | 28 +- include/lo2s/perf/io_reader.hpp | 6 +- include/lo2s/perf/sample/reader.hpp | 9 +- include/lo2s/perf/syscall/reader.hpp | 23 +- include/lo2s/perf/time/reader.hpp | 4 +- include/lo2s/perf/tracepoint/event.hpp | 2 +- include/lo2s/perf/tracepoint/reader.hpp | 9 +- include/lo2s/platform.hpp | 2 +- include/lo2s/trace/reg_keys.hpp | 2 +- include/lo2s/trace/trace.hpp | 2 +- src/config.cpp | 7 +- src/monitor/tracepoint_monitor.cpp | 10 +- src/perf/counter/counter_provider.cpp | 33 +- src/perf/counter/group/reader.cpp | 15 +- src/perf/counter/userspace/reader.cpp | 2 +- src/perf/event.cpp | 470 ++++-------------- src/perf/event_provider.cpp | 431 +++++++++++----- src/perf/time/reader.cpp | 3 +- src/perf/tracepoint/event.cpp | 4 +- src/platform.cpp | 2 +- 28 files changed, 526 insertions(+), 688 deletions(-) diff --git a/include/lo2s/measurement_scope.hpp b/include/lo2s/measurement_scope.hpp index e9c8d114..f3841815 100644 --- a/include/lo2s/measurement_scope.hpp +++ b/include/lo2s/measurement_scope.hpp @@ -34,7 +34,6 @@ enum class MeasurementScopeType BIO, SYSCALL, CUDA, - TRACEPOINT, UNKNOWN }; @@ -86,11 +85,6 @@ struct MeasurementScope return { MeasurementScopeType::CUDA, s }; } - static MeasurementScope tracepoint(ExecutionScope s) - { - return { MeasurementScopeType::TRACEPOINT, s }; - } - friend bool operator==(const MeasurementScope& lhs, const MeasurementScope& rhs) { return (lhs.scope == rhs.scope) && lhs.type == rhs.type; @@ -125,8 +119,6 @@ struct MeasurementScope return fmt::format("syscall events for {}", scope.name()); case lo2s::MeasurementScopeType::CUDA: return fmt::format("cuda kernel events for {}", scope.name()); - case MeasurementScopeType::TRACEPOINT: - return fmt::format("tracepoints for {}", scope.name()); default: throw new std::runtime_error("Unknown ExecutionScopeType!"); } diff --git a/include/lo2s/perf/bio/writer.hpp b/include/lo2s/perf/bio/writer.hpp index 5e7cb593..8f07f993 100644 --- a/include/lo2s/perf/bio/writer.hpp +++ b/include/lo2s/perf/bio/writer.hpp @@ -157,12 +157,9 @@ class Writer std::vector get_tracepoints() { - bio_queue_ = - perf::EventProvider::instance().create_tracepoint_event("block:block_bio_queue"); - bio_issue_ = - perf::EventProvider::instance().create_tracepoint_event("block:block_rq_issue"); - bio_complete_ = - perf::EventProvider::instance().create_tracepoint_event("block:block_rq_complete"); + bio_queue_ = perf::tracepoint::TracepointEvent("block:block_bio_queue"); + bio_issue_ = perf::tracepoint::TracepointEvent("block:block_rq_issue"); + bio_complete_ = perf::tracepoint::TracepointEvent("block:block_rq_complete"); return { bio_queue_, bio_issue_, bio_complete_ }; } @@ -184,11 +181,9 @@ class Writer trace::Trace& trace_; time::Converter& time_converter_; - // Unabailable until get_tracepoints() is called perf::tracepoint::TracepointEvent bio_queue_; perf::tracepoint::TracepointEvent bio_issue_; perf::tracepoint::TracepointEvent bio_complete_; - // The unit "sector" is always 512 bit large, regardless of the actual sector size of the device static constexpr int SECTOR_SIZE = 512; }; diff --git a/include/lo2s/perf/counter/counter_collection.hpp b/include/lo2s/perf/counter/counter_collection.hpp index f1690a24..864aaccb 100644 --- a/include/lo2s/perf/counter/counter_collection.hpp +++ b/include/lo2s/perf/counter/counter_collection.hpp @@ -33,8 +33,8 @@ namespace counter { struct CounterCollection { - Event leader; - std::vector counters; + PerfEvent leader; + std::vector counters; double get_scale(int index) const { diff --git a/include/lo2s/perf/counter/counter_provider.hpp b/include/lo2s/perf/counter/counter_provider.hpp index 3071e225..7f925b55 100644 --- a/include/lo2s/perf/counter/counter_provider.hpp +++ b/include/lo2s/perf/counter/counter_provider.hpp @@ -23,7 +23,7 @@ #include #include -#include +#include #include @@ -49,7 +49,6 @@ class CounterProvider void initialize_group_counters(const std::string& leader, const std::vector& counters); void initialize_userspace_counters(const std::vector& counters); - void initialize_tracepoints(const std::vector& tracepoints); bool has_group_counters(ExecutionScope scope); bool has_userspace_counters(ExecutionScope scope); @@ -58,10 +57,9 @@ class CounterProvider std::vector get_tracepoint_event_names(); private: - Event group_leader_; - std::vector group_events_; - std::vector userspace_events_; - std::vector tracepoint_events_; + PerfEvent group_leader_; + std::vector group_events_; + std::vector userspace_events_; }; } // namespace counter } // namespace perf diff --git a/include/lo2s/perf/counter/group/reader.hpp b/include/lo2s/perf/counter/group/reader.hpp index d0c79915..034a6a00 100644 --- a/include/lo2s/perf/counter/group/reader.hpp +++ b/include/lo2s/perf/counter/group/reader.hpp @@ -60,8 +60,8 @@ class Reader : public EventReader }; protected: - EventGuard counter_leader_; - std::vector counters_; + PerfEventGuard counter_leader_; + std::vector counters_; CounterCollection counter_collection_; GroupCounterBuffer counter_buffer_; }; diff --git a/include/lo2s/perf/counter/userspace/reader.hpp b/include/lo2s/perf/counter/userspace/reader.hpp index 0b5ae22b..fbcfdfcb 100644 --- a/include/lo2s/perf/counter/userspace/reader.hpp +++ b/include/lo2s/perf/counter/userspace/reader.hpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -60,7 +60,7 @@ class Reader UserspaceCounterBuffer counter_buffer_; int timer_fd_; - std::vector counters_; + std::vector counters_; std::vector data_; }; } // namespace userspace diff --git a/include/lo2s/perf/counter/userspace/userspace_counter_buffer.hpp b/include/lo2s/perf/counter/userspace/userspace_counter_buffer.hpp index 99fabbbb..d2d34aca 100644 --- a/include/lo2s/perf/counter/userspace/userspace_counter_buffer.hpp +++ b/include/lo2s/perf/counter/userspace/userspace_counter_buffer.hpp @@ -35,6 +35,10 @@ namespace userspace { struct UserspaceReadFormat { + UserspaceReadFormat() : value(0), time_enabled(0), time_running(0) + { + } + uint64_t value; uint64_t time_enabled; uint64_t time_running; diff --git a/include/lo2s/perf/event.hpp b/include/lo2s/perf/event.hpp index ca0ad958..1a451579 100644 --- a/include/lo2s/perf/event.hpp +++ b/include/lo2s/perf/event.hpp @@ -59,29 +59,34 @@ enum class Availability UNIVERSAL }; -class EventGuard; +class PerfEventGuard; /** * Base class for all Event types * contains common attributes */ -class Event +class PerfEvent { public: - Event([[maybe_unused]] uint64_t addr, bool enable_on_exec = false); - Event(std::string name, perf_type_id type, std::uint64_t config, std::uint64_t config1 = 0); - Event(); + PerfEvent(const std::string& ev_name, bool enable_on_exec = false); + PerfEvent([[maybe_unused]] uint64_t addr, bool enable_on_exec = false); + PerfEvent(std::string name, perf_type_id type, std::uint64_t config, std::uint64_t config1 = 0, + std::set cpus = std::set()); + PerfEvent(); /** - * returns an opened instance of any Event object + * returns an opened instance of any PerfEvent object */ - EventGuard open(std::variant location, int cgroup_fd = -1); - EventGuard open(ExecutionScope location, int cgroup_fd = -1); + PerfEventGuard open(std::variant location, int cgroup_fd = config().cgroup_fd); + PerfEventGuard open(ExecutionScope location, int cgroup_fd = config().cgroup_fd); /** - * returns an opened instance of a Event object after formating it as a leader Event + * returns an opened instance of a PerfEvent object after formating it as a leader Event */ - EventGuard open_as_group_leader(ExecutionScope location, int cgroup_fd = -1); + PerfEventGuard open_as_group_leader(std::variant location, + int cgroup_fd = config().cgroup_fd); + PerfEventGuard open_as_group_leader(ExecutionScope location, + int cgroup_fd = config().cgroup_fd); const Availability& get_availability() const { @@ -123,37 +128,12 @@ class Event unit_ = unit; } - void set_clock_attrs(const bool& use_clockid, const clockid_t& clockid) - { -#ifndef USE_HW_BREAKPOINT_COMPAT - attr_.use_clockid = use_clockid; - attr_.clockid = clockid; -#endif - } - - // When we poll on the fd given by perf_event_open, wakeup, when our buffer is 80% full - // Default behaviour is to wakeup on every event, which is horrible performance wise - void set_watermark(const size_t& mmap_pages) - { - attr_.watermark = 1; - attr_.wakeup_watermark = static_cast(0.8 * mmap_pages * sysconf(_SC_PAGESIZE)); - } - - void set_exclude_kernel(const bool& exclude_kernel) - { - attr_.exclude_kernel = exclude_kernel; - } - void set_sample_period(const int& period); - void set_sample_freq(const uint64_t& freq); - void event_attr_update(std::uint64_t value, const std::string& format); + void set_sample_freq(); + void set_availability(); - void parse_pmu_path(const std::string& ev_name); - void parse_cpus(); const std::set& supported_cpus() const; - bool is_valid() const; - bool event_is_openable(); bool is_available_in(ExecutionScope scope) const { @@ -164,23 +144,22 @@ class Event bool degrade_precision(); - friend bool operator==(const Event& lhs, const Event& rhs) + friend bool operator==(const PerfEvent& lhs, const PerfEvent& rhs) { return !memcmp(&lhs.attr_, &rhs.attr_, sizeof(struct perf_event_attr)); } - friend bool operator<(const Event& lhs, const Event& rhs) + friend bool operator<(const PerfEvent& lhs, const PerfEvent& rhs) { return memcmp(&lhs.attr_, &rhs.attr_, sizeof(struct perf_event_attr)); } - friend bool operator>(const Event& lhs, const Event& rhs) + friend bool operator>(const PerfEvent& lhs, const PerfEvent& rhs) { return memcmp(&lhs.attr_, &rhs.attr_, sizeof(struct perf_event_attr)); } protected: - void update_availability(); void set_common_attrs(bool enable_on_exec); struct perf_event_attr attr_; @@ -190,45 +169,41 @@ class Event std::string name_ = ""; std::set cpus_; Availability availability_ = Availability::UNAVAILABLE; - - std::filesystem::path pmu_path_; - std::string pmu_name_; }; /** * Contains an event parsed from sysfs - * @note call on use_sampling_options() after creation to get a valid + * @note call on as_sample() after creation to get a valid * event, otherwise the availability will be set to UNAVAILABLE */ -class SysfsEvent : public Event +class SysfsEvent : public PerfEvent { public: - using Event::Event; - SysfsEvent(const std::string& ev_name, bool enable_on_exec = false); + using PerfEvent::PerfEvent; - void use_sampling_options(const bool& use_pebs, const bool& sampling, const bool& enable_cct); + void as_sample(); }; /** - * Contains an opened instance of Event. - * Use any Event.open() method to construct an object + * Contains an opened instance of PerfEvent. + * Use any PerfEvent.open() method to construct an object */ -class EventGuard +class PerfEventGuard { public: - EventGuard(); - EventGuard(Event& ev, std::variant location, int group_fd, int cgroup_fd); + PerfEventGuard(); + PerfEventGuard(PerfEvent& ev, std::variant location, int group_fd, int cgroup_fd); - EventGuard(EventGuard&) = delete; - EventGuard& operator=(const EventGuard&) = delete; + PerfEventGuard(PerfEventGuard&) = delete; + PerfEventGuard& operator=(const PerfEventGuard&) = delete; - EventGuard(EventGuard&& other) + PerfEventGuard(PerfEventGuard&& other) { std::swap(fd_, other.fd_); std::swap(ev_, other.ev_); } - EventGuard& operator=(EventGuard&& other) + PerfEventGuard& operator=(PerfEventGuard&& other) { std::swap(fd_, other.fd_); std::swap(ev_, other.ev_); @@ -238,7 +213,10 @@ class EventGuard /** * opens child as a counter of the calling (leader) event */ - EventGuard open_child(Event child, ExecutionScope location, int cgroup_fd = -1); + PerfEventGuard open_child(PerfEvent child, std::variant location, + int cgroup_fd = config().cgroup_fd); + PerfEventGuard open_child(PerfEvent child, ExecutionScope location, + int cgroup_fd = config().cgroup_fd); void enable(); void disable(); @@ -248,8 +226,8 @@ class EventGuard ioctl(fd_, PERF_EVENT_IOC_ID, &id); } - void set_output(const EventGuard& other_ev); - void set_syscall_filter(const std::vector& filter); + void set_output(const PerfEventGuard& other_ev); + void set_syscall_filter(); int get_fd() const { @@ -264,7 +242,6 @@ class EventGuard template T read() { - static_assert(std::is_pod_v == true); T val; if (::read(fd_, &val, sizeof(val)) == -1) @@ -275,14 +252,14 @@ class EventGuard return val; } - ~EventGuard() + ~PerfEventGuard() { close(fd_); } protected: int fd_; - Event ev_; + PerfEvent ev_; }; } // namespace perf diff --git a/include/lo2s/perf/event_provider.hpp b/include/lo2s/perf/event_provider.hpp index 4ee3b456..6ec4a465 100644 --- a/include/lo2s/perf/event_provider.hpp +++ b/include/lo2s/perf/event_provider.hpp @@ -21,13 +21,13 @@ #pragma once +#include + #include #include #include #include -#include - namespace lo2s { namespace perf @@ -45,25 +45,14 @@ class EventProvider return instance_mutable(); } - static Event get_event_by_name(const std::string& name); + static PerfEvent get_event_by_name(const std::string& name); static bool has_event(const std::string& name); - static std::vector get_predefined_events(); + static std::vector get_predefined_events(); static std::vector get_pmu_events(); - static Event fallback_metric_leader_event(); - - static Event create_time_event(uint64_t local_time); - static Event create_raw_event(const std::string& name, perf_type_id type, std::uint64_t config, - std::uint64_t config1 = 0); - static SysfsEvent create_sampling_event(const bool& enable_on_exec); - static SysfsEvent create_raw_sysfs_event(const std::string& name); - static SysfsEvent create_sysfs_event(const std::string& name); - static tracepoint::TracepointEvent - create_raw_tracepoint_event(const std::string& name, const bool& enable_on_exec = false); - static tracepoint::TracepointEvent create_tracepoint_event(const std::string& name, - const bool& enable_on_exec = false); + static PerfEvent fallback_metric_leader_event(); class InvalidEvent : public std::runtime_error { @@ -81,12 +70,9 @@ class EventProvider return e; } - static void apply_config_attrs(Event& event); - static void apply_default_attrs(Event& event); - - Event cache_event(const std::string& name); + PerfEvent cache_event(const std::string& name); - std::unordered_map event_map_; + std::unordered_map event_map_; }; } // namespace perf diff --git a/include/lo2s/perf/io_reader.hpp b/include/lo2s/perf/io_reader.hpp index fcd86c35..eec5b610 100644 --- a/include/lo2s/perf/io_reader.hpp +++ b/include/lo2s/perf/io_reader.hpp @@ -22,7 +22,6 @@ #pragma once #include -#include #include #include #include @@ -61,9 +60,8 @@ struct __attribute((__packed__)) TracepointSampleType struct IoReaderIdentity { - IoReaderIdentity(std::string tracepoint_name, Cpu cpu) : cpu(cpu) + IoReaderIdentity(std::string tracepoint, Cpu cpu) : tracepoint(tracepoint), cpu(cpu) { - tracepoint = EventProvider::instance().create_tracepoint_event(tracepoint_name); } tracepoint::TracepointEvent tracepoint; @@ -155,7 +153,7 @@ class IoReader : public PullReader private: IoReaderIdentity identity_; - EventGuard event_; + PerfEventGuard event_; }; } // namespace perf } // namespace lo2s diff --git a/include/lo2s/perf/sample/reader.hpp b/include/lo2s/perf/sample/reader.hpp index 1ba9bc14..814b578a 100644 --- a/include/lo2s/perf/sample/reader.hpp +++ b/include/lo2s/perf/sample/reader.hpp @@ -21,6 +21,7 @@ #pragma once +#include #include #include #include @@ -84,13 +85,15 @@ class Reader : public EventReader Log::debug() << "initializing event_reader for:" << scope.name() << ", enable_on_exec: " << enable_on_exec; - Event event = EventProvider::instance().create_sampling_event(enable_on_exec); + SysfsEvent event(config().sampling_event, enable_on_exec); + + event.as_sample(); do { try { - event_ = event.open(scope, config().cgroup_fd); + event_ = event.open(scope); } catch (const std::system_error& e) { @@ -158,7 +161,7 @@ class Reader : public EventReader bool has_cct_; private: - EventGuard event_; + PerfEventGuard event_; }; } // namespace sample } // namespace perf diff --git a/include/lo2s/perf/syscall/reader.hpp b/include/lo2s/perf/syscall/reader.hpp index 25e4d154..c30bc88a 100644 --- a/include/lo2s/perf/syscall/reader.hpp +++ b/include/lo2s/perf/syscall/reader.hpp @@ -23,8 +23,8 @@ #include #include -#include #include +#include #include #include #include @@ -68,15 +68,13 @@ class Reader : public EventReader Reader(Cpu cpu) : cpu_(cpu) { - tracepoint::TracepointEvent enter_event = - EventProvider::instance().create_tracepoint_event("raw_syscalls:sys_enter"); - tracepoint::TracepointEvent exit_event = - EventProvider::instance().create_tracepoint_event("raw_syscalls:sys_exit"); + tracepoint::TracepointEvent enter_event("raw_syscalls:sys_enter"); + tracepoint::TracepointEvent exit_event("raw_syscalls:sys_exit"); try { - enter_ev_ = enter_event.open(cpu_, config().cgroup_fd); - exit_ev_ = exit_event.open(cpu_, config().cgroup_fd); + enter_ev_ = enter_event.open(cpu_); + exit_ev_ = exit_event.open(cpu_); } catch (const std::system_error& e) { @@ -89,8 +87,11 @@ class Reader : public EventReader exit_ev_.set_output(enter_ev_); - enter_ev_.set_syscall_filter(config().syscall_filter); - exit_ev_.set_syscall_filter(config().syscall_filter); + if (!config().syscall_filter.empty()) + { + enter_ev_.set_syscall_filter(); + exit_ev_.set_syscall_filter(); + } enter_ev_.enable(); exit_ev_.enable(); @@ -118,8 +119,8 @@ class Reader : public EventReader private: Cpu cpu_; - EventGuard enter_ev_; - EventGuard exit_ev_; + PerfEventGuard enter_ev_; + PerfEventGuard exit_ev_; }; } // namespace syscall diff --git a/include/lo2s/perf/time/reader.hpp b/include/lo2s/perf/time/reader.hpp index ab5f3897..cf5046bc 100644 --- a/include/lo2s/perf/time/reader.hpp +++ b/include/lo2s/perf/time/reader.hpp @@ -22,7 +22,7 @@ #pragma once #include -#include +#include #include #include @@ -67,7 +67,7 @@ class Reader : public EventReader perf::Clock::time_point perf_time; private: - EventGuard ev_instance_; + PerfEventGuard ev_instance_; }; } // namespace time } // namespace perf diff --git a/include/lo2s/perf/tracepoint/event.hpp b/include/lo2s/perf/tracepoint/event.hpp index 0b0b9368..ee4668e8 100644 --- a/include/lo2s/perf/tracepoint/event.hpp +++ b/include/lo2s/perf/tracepoint/event.hpp @@ -33,7 +33,7 @@ namespace tracepoint /** * Contains an event that is addressable via name */ -class TracepointEvent : public Event +class TracepointEvent : public PerfEvent { public: class ParseError : public std::runtime_error diff --git a/include/lo2s/perf/tracepoint/reader.hpp b/include/lo2s/perf/tracepoint/reader.hpp index fab90b36..32d48fbf 100644 --- a/include/lo2s/perf/tracepoint/reader.hpp +++ b/include/lo2s/perf/tracepoint/reader.hpp @@ -21,9 +21,9 @@ #pragma once +#include #include -#include #include #include @@ -114,12 +114,11 @@ class Reader : public EventReader RecordDynamicFormat raw_data; }; - Reader(Cpu cpu, std::string name) - : event_(EventProvider::instance().create_tracepoint_event(name)), cpu_(cpu) + Reader(Cpu cpu, std::string name) : event_(name), cpu_(cpu) { try { - ev_instance_ = event_.open(cpu_, config().cgroup_fd); + ev_instance_ = event_.open(cpu_); } catch (const std::system_error& e) { @@ -161,7 +160,7 @@ class Reader : public EventReader private: Cpu cpu_; - EventGuard ev_instance_; + PerfEventGuard ev_instance_; }; } // namespace tracepoint diff --git a/include/lo2s/platform.hpp b/include/lo2s/platform.hpp index 62d4faa0..b20ccba7 100644 --- a/include/lo2s/platform.hpp +++ b/include/lo2s/platform.hpp @@ -94,6 +94,6 @@ enum class Processor ARM1176 = 204, }; -std::vector get_mem_events(); +std::vector get_mem_events(); } // namespace platform } // namespace lo2s diff --git a/include/lo2s/trace/reg_keys.hpp b/include/lo2s/trace/reg_keys.hpp index 01734c65..1b17f199 100644 --- a/include/lo2s/trace/reg_keys.hpp +++ b/include/lo2s/trace/reg_keys.hpp @@ -137,7 +137,7 @@ struct BySamplingEventName { }; -using BySamplingEvent = SimpleKeyType; +using BySamplingEvent = SimpleKeyType; struct ByCounterCollectionTag { diff --git a/include/lo2s/trace/trace.hpp b/include/lo2s/trace/trace.hpp index b656f725..6741e268 100644 --- a/include/lo2s/trace/trace.hpp +++ b/include/lo2s/trace/trace.hpp @@ -176,7 +176,7 @@ class Trace return cpuid_metric_class_; } - otf2::definition::metric_member& get_event_metric_member(perf::Event event) + otf2::definition::metric_member& get_event_metric_member(perf::PerfEvent event) { return registry_.emplace( BySamplingEvent(event), intern(event.get_name()), intern(event.get_name()), diff --git a/src/config.cpp b/src/config.cpp index e3595346..e79c4335 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -72,7 +72,7 @@ static inline void list_arguments_sorted(std::ostream& os, const std::string& de } static inline void print_availability(std::ostream& os, const std::string& description, - std::vector events) + std::vector events) { std::vector event_names; for (const auto& ev : events) @@ -404,6 +404,7 @@ void parse_program_options(int argc, const char** argv) config.sampling_period = arguments.as("count"); config.enable_cct = arguments.given("call-graph"); config.suppress_ip = arguments.given("no-ip"); + config.tracepoint_events = arguments.get_all("tracepoint"); config.use_x86_energy = arguments.given("x86-energy"); config.use_sensors = arguments.given("sensors"); config.use_block_io = arguments.given("block-io"); @@ -491,7 +492,7 @@ void parse_program_options(int argc, const char** argv) // TODO: find a better solution ? std::vector sys_events = perf::EventProvider::get_pmu_events(); - std::vector events(sys_events.begin(), sys_events.end()); + std::vector events(sys_events.begin(), sys_events.end()); print_availability(std::cout, "Kernel PMU events", events); #ifdef HAVE_LIBPFM @@ -795,8 +796,6 @@ void parse_program_options(int argc, const char** argv) perf_group_events.emplace_back("cpu-cycles"); } - perf::counter::CounterProvider::instance().initialize_tracepoints( - arguments.get_all("tracepoint")); perf::counter::CounterProvider::instance().initialize_group_counters( arguments.get("metric-leader"), perf_group_events); perf::counter::CounterProvider::instance().initialize_userspace_counters(perf_userspace_events); diff --git a/src/monitor/tracepoint_monitor.cpp b/src/monitor/tracepoint_monitor.cpp index 0d937598..5719c40f 100644 --- a/src/monitor/tracepoint_monitor.cpp +++ b/src/monitor/tracepoint_monitor.cpp @@ -36,15 +36,11 @@ namespace monitor TracepointMonitor::TracepointMonitor(trace::Trace& trace, Cpu cpu) : monitor::PollMonitor(trace, "", config().perf_read_interval), cpu_(cpu) { - perf::counter::CounterCollection tracepoint_collection = - perf::counter::CounterProvider::instance().collection_for( - MeasurementScope::tracepoint(cpu_.as_scope())); - - for (const auto& event : tracepoint_collection.counters) + for (const auto& event_name : config().tracepoint_events) { - auto& mc = trace.tracepoint_metric_class(event.get_name()); + auto& mc = trace.tracepoint_metric_class(event_name); std::unique_ptr writer = - std::make_unique(cpu, event.get_name(), trace, mc); + std::make_unique(cpu, event_name, trace, mc); add_fd(writer->fd()); perf_writers_.emplace(std::piecewise_construct, std::forward_as_tuple(writer->fd()), diff --git a/src/perf/counter/counter_provider.cpp b/src/perf/counter/counter_provider.cpp index 91bf8d75..4713f9d2 100644 --- a/src/perf/counter/counter_provider.cpp +++ b/src/perf/counter/counter_provider.cpp @@ -33,24 +33,6 @@ namespace perf { namespace counter { -void CounterProvider::initialize_tracepoints(const std::vector& tracepoints) -{ - assert(tracepoint_events_.empty()); - - for (const auto& ev_name : tracepoints) - { - try - { - tracepoint_events_.emplace_back( - EventProvider::instance().create_raw_tracepoint_event(ev_name)); - } - catch (const perf::EventProvider::InvalidEvent& e) - { - Log::warn() << "'" << ev_name - << "' does not name a known event, ignoring! (reason: " << e.what() << ")"; - } - } -} void CounterProvider::initialize_userspace_counters(const std::vector& counters) { @@ -144,8 +126,7 @@ void CounterProvider::initialize_group_counters(const std::string& leader, CounterCollection CounterProvider::collection_for(MeasurementScope scope) { assert(scope.type == MeasurementScopeType::GROUP_METRIC || - scope.type == MeasurementScopeType::USERSPACE_METRIC || - scope.type == MeasurementScopeType::TRACEPOINT); + scope.type == MeasurementScopeType::USERSPACE_METRIC); CounterCollection res; if (scope.type == MeasurementScopeType::GROUP_METRIC) @@ -162,19 +143,9 @@ CounterCollection CounterProvider::collection_for(MeasurementScope scope) } } } - else if (scope.type == MeasurementScopeType::USERSPACE_METRIC) - { - for (auto& ev : userspace_events_) - { - if (ev.is_available_in(scope.scope)) - { - res.counters.emplace_back(std::move(ev)); - } - } - } else { - for (auto& ev : tracepoint_events_) + for (auto& ev : userspace_events_) { if (ev.is_available_in(scope.scope)) { diff --git a/src/perf/counter/group/reader.cpp b/src/perf/counter/group/reader.cpp index bbd547eb..1492779d 100644 --- a/src/perf/counter/group/reader.cpp +++ b/src/perf/counter/group/reader.cpp @@ -53,21 +53,12 @@ Reader::Reader(ExecutionScope scope, bool enable_on_exec) CounterProvider::instance().collection_for(MeasurementScope::group_metric(scope))), counter_buffer_(counter_collection_.counters.size() + 1) { - if (config().metric_use_frequency) - { - counter_collection_.leader.set_sample_freq(config().metric_frequency); - } - else - { - counter_collection_.leader.set_sample_period(config().metric_count); - } - + counter_collection_.leader.set_sample_freq(); do { try { - counter_leader_ = - counter_collection_.leader.open_as_group_leader(scope, config().cgroup_fd); + counter_leader_ = counter_collection_.leader.open_as_group_leader(scope); } catch (const std::system_error& e) { @@ -96,7 +87,7 @@ Reader::Reader(ExecutionScope scope, bool enable_on_exec) { if (counter_ev.is_available_in(scope)) { - EventGuard counter; + PerfEventGuard counter; counter_ev.get_attr().exclude_kernel = counter_collection_.leader.get_attr().exclude_kernel; do diff --git a/src/perf/counter/userspace/reader.cpp b/src/perf/counter/userspace/reader.cpp index c8342226..26b50158 100644 --- a/src/perf/counter/userspace/reader.cpp +++ b/src/perf/counter/userspace/reader.cpp @@ -53,7 +53,7 @@ Reader::Reader(ExecutionScope scope) { for (auto& event : counter_collection_.counters) { - EventGuard counter; + PerfEventGuard counter; try { diff --git a/src/perf/event.cpp b/src/perf/event.cpp index 7dc3edc4..b3a42042 100644 --- a/src/perf/event.cpp +++ b/src/perf/event.cpp @@ -23,7 +23,6 @@ #include #include -#include #include @@ -38,16 +37,6 @@ namespace lo2s namespace perf { -constexpr std::uint64_t operator"" _u64(unsigned long long int lit) -{ - return static_cast(lit); -} - -constexpr std::uint64_t bit(int bitnumber) -{ - return static_cast(1_u64 << bitnumber); -} - // helper for visit function template struct overloaded : Ts... @@ -57,80 +46,7 @@ struct overloaded : Ts... template overloaded(Ts...) -> overloaded; -template -T read_file_or_else(std::string filename, T or_else) -{ - T val; - std::ifstream stream(filename); - stream >> val; - if (stream.fail()) - { - return or_else; - } - return val; -} - -static std::uint64_t parse_bitmask(const std::string& format) -{ - enum BITMASK_REGEX_GROUPS - { - BM_WHOLE_MATCH, - BM_BEGIN, - BM_END, - }; - - std::uint64_t mask = 0x0; - - static const std::regex bit_mask_regex(R"((\d+)?(?:-(\d+)))"); - const std::sregex_iterator end; - std::smatch bit_mask_match; - for (std::sregex_iterator i = { format.begin(), format.end(), bit_mask_regex }; i != end; ++i) - { - const auto& match = *i; - int start = std::stoi(match[BM_BEGIN]); - int end = (match[BM_END].length() == 0) ? start : std::stoi(match[BM_END]); - - const auto len = (end + 1) - start; - if (start < 0 || end > 63 || len > 64) - { - throw EventProvider::InvalidEvent("invalid config mask"); - } - - /* Set `len` bits and shift them to where they should start. - * 4-bit example: format "1-3" produces mask 0b1110. - * start := 1, end := 3 - * len := 3 + 1 - 1 = 3 - * bits := bit(3) - 1 = 0b1000 - 1 = 0b0111 - * mask := 0b0111 << 1 = 0b1110 - * */ - - // Shifting by 64 bits causes undefined behaviour, so in this case set - // all bits by assigning the maximum possible value for std::uint64_t. - const std::uint64_t bits = - (len == 64) ? std::numeric_limits::max() : bit(len) - 1; - - mask |= bits << start; - } - Log::debug() << std::showbase << std::hex << "config mask: " << format << " = " << mask - << std::dec << std::noshowbase; - return mask; -} - -static constexpr std::uint64_t apply_mask(std::uint64_t value, std::uint64_t mask) -{ - std::uint64_t res = 0; - for (int mask_bit = 0, value_bit = 0; mask_bit < 64; mask_bit++) - { - if (mask & bit(mask_bit)) - { - res |= ((value >> value_bit) & bit(0)) << mask_bit; - value_bit++; - } - } - return res; -} - -Event::Event() : name_("") +PerfEvent::PerfEvent() : name_("") { memset(&attr_, 0, sizeof(attr_)); attr_.size = sizeof(attr_); @@ -143,16 +59,30 @@ Event::Event() : name_("") attr_.exclude_kernel = 1; } -Event::Event([[maybe_unused]] uint64_t addr, bool enable_on_exec) +PerfEvent::PerfEvent(const std::string& ev_name, bool enable_on_exec) : name_(ev_name) { set_common_attrs(enable_on_exec); + PerfEvent other = EventProvider::instance().get_event_by_name(ev_name); + attr_.type = static_cast(other.get_attr().type); + attr_.config = other.get_attr().config; + attr_.config1 = other.get_attr().config1; + cpus_ = other.get_cpus(); + + set_availability(); +} + +PerfEvent::PerfEvent([[maybe_unused]] uint64_t addr, bool enable_on_exec) +{ + set_common_attrs(enable_on_exec); + attr_.exclude_kernel = 1; + #ifndef USE_HW_BREAKPOINT_COMPAT attr_.type = PERF_TYPE_BREAKPOINT; attr_.bp_type = HW_BREAKPOINT_W; - attr_.bp_addr = addr; attr_.bp_len = HW_BREAKPOINT_LEN_8; attr_.wakeup_events = 1; + attr_.bp_addr = addr; #else attr_.type = PERF_TYPE_HARDWARE; attr_.config = PERF_COUNT_HW_INSTRUCTIONS; @@ -161,55 +91,41 @@ Event::Event([[maybe_unused]] uint64_t addr, bool enable_on_exec) #endif } -Event::Event(std::string name, perf_type_id type, std::uint64_t config, std::uint64_t config1) -: name_(name) +PerfEvent::PerfEvent(std::string name, perf_type_id type, std::uint64_t config, + std::uint64_t config1, std::set cpus) +: name_(name), cpus_(cpus) { memset(&attr_, 0, sizeof(attr_)); attr_.size = sizeof(attr_); + attr_.type = -1; attr_.sample_type = PERF_SAMPLE_TIME; attr_.type = type; attr_.config = config; attr_.config1 = config1; - // Needed when scaling multiplexed events, and recognize activation phases - attr_.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; - - try - { - parse_pmu_path(name_); - } - catch (const EventProvider::InvalidEvent&) // ignore - { - } - - parse_cpus(); - update_availability(); + set_availability(); } -void Event::parse_pmu_path(const std::string& ev_name) -{ - static const std::regex ev_name_regex(R"(([a-z0-9-_]+)[\/:]([a-z0-9-_]+)\/?)"); - std::smatch ev_name_match; - - if (!std::regex_match(ev_name, ev_name_match, ev_name_regex)) - { - pmu_path_ = std::filesystem::path(); - throw EventProvider::InvalidEvent("invalid event description format"); - } - - name_ = ev_name_match[2]; - pmu_name_ = ev_name_match[1]; - pmu_path_ = std::filesystem::path("/sys/bus/event_source/devices") / pmu_name_; -} - -void Event::set_common_attrs(bool enable_on_exec) +void PerfEvent::set_common_attrs(bool enable_on_exec) { memset(&attr_, 0, sizeof(attr_)); attr_.size = sizeof(attr_); attr_.type = -1; attr_.disabled = 1; +#ifndef USE_HW_BREAKPOINT_COMPAT + attr_.use_clockid = config().use_clockid; + attr_.clockid = config().clockid; +#endif + + // When we poll on the fd given by perf_event_open, wakeup, when our buffer is 80% full + // Default behaviour is to wakeup on every event, which is horrible performance wise + attr_.watermark = 1; + attr_.wakeup_watermark = + static_cast(0.8 * config().mmap_pages * sysconf(_SC_PAGESIZE)); + + attr_.exclude_kernel = config().exclude_kernel; attr_.sample_period = 1; attr_.enable_on_exec = enable_on_exec; @@ -218,129 +134,48 @@ void Event::set_common_attrs(bool enable_on_exec) attr_.sample_type = PERF_SAMPLE_TIME; } -void Event::event_attr_update(std::uint64_t value, const std::string& format) +void PerfEvent::set_sample_period(const int& period) { - // Parse config terms // - - /* Format: : - * - * We only assign the terms 'config' and 'config1'. - * - * */ - - static constexpr auto npos = std::string::npos; - const auto colon = format.find_first_of(':'); - if (colon == npos) - { - throw EventProvider::InvalidEvent("invalid format description: missing colon"); - } - - const auto target_config = format.substr(0, colon); - const auto mask = parse_bitmask(format.substr(colon + 1)); - - if (target_config == "config") - { - attr_.config |= apply_mask(value, mask); - } - - if (target_config == "config1") - { - attr_.config1 |= apply_mask(value, mask); - } + Log::debug() << "counter::Reader: sample_period: " << period; + attr_.sample_period = period; } -void Event::parse_cpus() +void PerfEvent::set_sample_freq() { - if (pmu_path_.empty()) + attr_.freq = config().metric_use_frequency; + if (attr_.freq) { - for (const auto& cpu : Topology::instance().cpus()) - { - try - { - EventGuard ev_instance = open(cpu.as_scope(), -1); - cpus_.emplace(cpu); - } - catch (const std::system_error& e) - { - } - } - - return; + Log::debug() << "counter::Reader: sample_freq: " << config().metric_frequency; + attr_.sample_freq = config().metric_frequency; } - - // If the processor is heterogenous, "cpus" contains the cores that support this PMU. If the - // PMU is an uncore PMU "cpumask" contains the cores that are logically assigned to that - // PMU. Why there need to be two seperate files instead of one, nobody knows, but simply - // parse both. - auto cpuids = parse_list_from_file(pmu_path_ / "cpus"); - - if (cpuids.empty()) + else { - cpuids = parse_list_from_file(pmu_path_ / "cpumask"); + set_sample_period(config().metric_count); } - - std::transform(cpuids.begin(), cpuids.end(), std::inserter(cpus_, cpus_.end()), - [](uint32_t cpuid) { return Cpu(cpuid); }); } -void Event::set_sample_period(const int& period) -{ - Log::debug() << "counter::Reader: sample_period: " << period; - attr_.sample_period = period; -} - -void Event::set_sample_freq(const uint64_t& freq) -{ - Log::debug() << "counter::Reader: sample_freq: " << freq; - attr_.sample_freq = freq; -} - -const std::set& Event::supported_cpus() const +const std::set& PerfEvent::supported_cpus() const { + if (cpus_.empty()) + { + return Topology::instance().cpus(); + } return cpus_; } -bool Event::is_valid() const +bool PerfEvent::is_valid() const { return (availability_ != Availability::UNAVAILABLE); } -bool Event::event_is_openable() -{ - update_availability(); - - if (!is_valid()) - { - Log::debug() << "perf event not openable, retrying with exclude_kernel=1"; - attr_.exclude_kernel = 1; - update_availability(); - - if (!is_valid()) - { - switch (errno) - { - case ENOTSUP: - Log::debug() << "perf event not supported by the running kernel: " << name_; - break; - default: - Log::debug() << "perf event " << name_ - << " not available: " << std::string(std::strerror(errno)); - break; - } - return false; - } - } - return true; -} - -void Event::update_availability() +void PerfEvent::set_availability() { - EventGuard proc_ev; - EventGuard sys_ev; + PerfEventGuard proc_ev; + PerfEventGuard sys_ev; try { - proc_ev = open(Thread(0)); + proc_ev = open(Thread(0), -1); } catch (const std::system_error& e) { @@ -348,7 +183,7 @@ void Event::update_availability() try { - sys_ev = open(*supported_cpus().begin()); + sys_ev = open(*supported_cpus().begin(), -1); } catch (const std::system_error& e) { @@ -370,7 +205,7 @@ void Event::update_availability() } } -bool Event::degrade_precision() +bool PerfEvent::degrade_precision() { /* reduce exactness of IP can help if the kernel does not support really exact events */ if (attr_.precise_ip == 0) @@ -384,138 +219,19 @@ bool Event::degrade_precision() } } -SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) : Event() +void SysfsEvent::as_sample() { - set_common_attrs(enable_on_exec); - - // Parse event description // - - /* Event description format: - * Name of a Performance Monitoring Unit (PMU) and an event name, - * separated by either '/' or ':' (for perf-like syntax); followed by an - * optional separator: - * - * /[/] - * OR - * :[/] - * - * Examples (both specify the same event): - * - * cpu/cache-misses/ - * cpu:cache-misses - * - * */ - - enum EVENT_DESCRIPTION_REGEX_GROUPS - { - ED_WHOLE_MATCH, - ED_PMU, - ED_NAME, - }; - - parse_pmu_path(ev_name); - - Log::debug() << "parsing event description: pmu='" << pmu_name_ << "', event='" << name_ << "'"; - - // read PMU type id - std::underlying_type::type type = read_file_or_else(pmu_path_ / "type", 0); - if (!type) - { - using namespace std::string_literals; - throw EventProvider::InvalidEvent("unknown PMU '"s + pmu_name_ + "'"); - } - - attr_.type = static_cast(type); - attr_.config = 0; - attr_.config1 = 0; - - // Parse event configuration from sysfs // - - // read event configuration - std::filesystem::path event_path = pmu_path_ / "events" / name_; - std::string ev_cfg = read_file_or_else(event_path, std::string("0")); - if (ev_cfg == "0") - { - using namespace std::string_literals; - throw EventProvider::InvalidEvent("unknown event '"s + name_ + "' for PMU '"s + pmu_name_ + - "'"); - } - - name_ = ev_name; - - /* Event configuration format: - * One or more terms with optional values, separated by ','. (Taken from - * linux/Documentation/ABI/testing/sysfs-bus-event_source-devices-events): - * - * [=][,[=]...] - * - * Example (config for 'cpu/cache-misses' on an Intel Core i5-7200U): - * - * event=0x2e,umask=0x41 - * - * */ - - enum EVENT_CONFIG_REGEX_GROUPS - { - EC_WHOLE_MATCH, - EC_TERM, - EC_VALUE, - }; - - static const std::regex kv_regex(R"(([^=,]+)(?:=([^,]+))?)"); - - Log::debug() << "parsing event configuration: " << ev_cfg; - std::smatch kv_match; - while (std::regex_search(ev_cfg, kv_match, kv_regex)) - { - static const std::string default_value("0x1"); - - const std::string& term = kv_match[EC_TERM]; - const std::string& value = - (kv_match[EC_VALUE].length() != 0) ? kv_match[EC_VALUE] : default_value; - - std::string format = read_file_or_else(pmu_path_ / "format" / term, std::string("0")); - if (format == "0") - { - throw EventProvider::InvalidEvent("cannot read event format"); - } - - static_assert(sizeof(std::uint64_t) >= sizeof(unsigned long), - "May not convert from unsigned long to uint64_t!"); - - std::uint64_t val = std::stol(value, nullptr, 0); - Log::debug() << "parsing config assignment: " << term << " = " << std::hex << std::showbase - << val << std::dec << std::noshowbase; - event_attr_update(val, format); - - ev_cfg = kv_match.suffix(); - } - - Log::debug() << std::hex << std::showbase << "parsed event description: " << pmu_name_ << "/" - << name_ << "/type=" << attr_.type << ",config=" << attr_.config - << ",config1=" << attr_.config1 << std::dec << std::noshowbase << "/"; - - set_scale(read_file_or_else(event_path.replace_extension(".scale"), 1.0)); - set_unit(read_file_or_else(event_path.replace_extension(".unit"), "#")); - - if (!event_is_openable()) - { - throw EventProvider::InvalidEvent( - "Event can not be opened in process- or system-monitoring-mode"); - } -} - -void SysfsEvent::use_sampling_options(const bool& use_pebs, const bool& sampling, - const bool& enable_cct) -{ - if (use_pebs) + if (config().use_pebs) { attr_.use_clockid = 0; } - if (sampling) + attr_.sample_period = config().sampling_period; + + if (config().sampling) { - Log::debug() << "using sampling event \'" << name_ << "\', period: " << attr_.sample_period; + Log::debug() << "using sampling event \'" << config().sampling_event + << "\', period: " << config().sampling_period; attr_.mmap = 1; } @@ -535,7 +251,7 @@ void SysfsEvent::use_sampling_options(const bool& use_pebs, const bool& sampling // TODO see if we can remove remove tid attr_.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_CPU; - if (enable_cct) + if (config().enable_cct) { attr_.sample_type |= PERF_SAMPLE_CALLCHAIN; } @@ -543,27 +259,35 @@ void SysfsEvent::use_sampling_options(const bool& use_pebs, const bool& sampling attr_.precise_ip = 3; // make event available if possible - update_availability(); + set_availability(); } -EventGuard Event::open(std::variant location, int cgroup_fd) +PerfEventGuard PerfEvent::open(std::variant location, int cgroup_fd) { - return EventGuard(*this, location, -1, cgroup_fd); + return PerfEventGuard(*this, location, -1, cgroup_fd); } -EventGuard Event::open(ExecutionScope location, int cgroup_fd) +PerfEventGuard PerfEvent::open(ExecutionScope location, int cgroup_fd) { if (location.is_cpu()) { - return EventGuard(*this, location.as_cpu(), -1, cgroup_fd); + return PerfEventGuard(*this, location.as_cpu(), -1, cgroup_fd); } else { - return EventGuard(*this, location.as_thread(), -1, cgroup_fd); + return PerfEventGuard(*this, location.as_thread(), -1, cgroup_fd); } } -EventGuard Event::open_as_group_leader(ExecutionScope location, int cgroup_fd) +PerfEventGuard PerfEvent::open_as_group_leader(std::variant location, int cgroup_fd) +{ + attr_.read_format |= PERF_FORMAT_GROUP; + attr_.sample_type |= PERF_SAMPLE_READ; + + return open(location, cgroup_fd); +} + +PerfEventGuard PerfEvent::open_as_group_leader(ExecutionScope location, int cgroup_fd) { attr_.read_format |= PERF_FORMAT_GROUP; attr_.sample_type |= PERF_SAMPLE_READ; @@ -571,23 +295,30 @@ EventGuard Event::open_as_group_leader(ExecutionScope location, int cgroup_fd) return open(location, cgroup_fd); } -EventGuard EventGuard::open_child(Event child, ExecutionScope location, int cgroup_fd) +PerfEventGuard PerfEventGuard::open_child(PerfEvent child, std::variant location, + int cgroup_fd) +{ + return PerfEventGuard(child, location, fd_, cgroup_fd); +} + +PerfEventGuard PerfEventGuard::open_child(PerfEvent child, ExecutionScope location, int cgroup_fd) { if (location.is_cpu()) { - return EventGuard(child, location.as_cpu(), fd_, cgroup_fd); + return PerfEventGuard(child, location.as_cpu(), fd_, cgroup_fd); } else { - return EventGuard(child, location.as_thread(), fd_, cgroup_fd); + return PerfEventGuard(child, location.as_thread(), fd_, cgroup_fd); } } -EventGuard::EventGuard() : fd_(-1) +PerfEventGuard::PerfEventGuard() : fd_(-1) { } -EventGuard::EventGuard(Event& ev, std::variant location, int group_fd, int cgroup_fd) +PerfEventGuard::PerfEventGuard(PerfEvent& ev, std::variant location, int group_fd, + int cgroup_fd) : ev_(ev) { // can be deleted when scope gets replaced @@ -598,6 +329,7 @@ EventGuard::EventGuard(Event& ev, std::variant location, int group_ fd_ = perf_event_open(&ev_.get_attr(), scope, group_fd, 0, cgroup_fd); + // other error handling if (fd_ < 0) { throw_errno(); @@ -610,7 +342,7 @@ EventGuard::EventGuard(Event& ev, std::variant location, int group_ } } -void EventGuard::enable() +void PerfEventGuard::enable() { auto ret = ioctl(fd_, PERF_EVENT_IOC_ENABLE); if (ret == -1) @@ -619,7 +351,7 @@ void EventGuard::enable() } } -void EventGuard::disable() +void PerfEventGuard::disable() { auto ret = ioctl(fd_, PERF_EVENT_IOC_DISABLE); if (ret == -1) @@ -628,7 +360,7 @@ void EventGuard::disable() } } -void EventGuard::set_output(const EventGuard& other_ev) +void PerfEventGuard::set_output(const PerfEventGuard& other_ev) { if (ioctl(fd_, PERF_EVENT_IOC_SET_OUTPUT, other_ev.get_fd()) == -1) { @@ -636,15 +368,11 @@ void EventGuard::set_output(const EventGuard& other_ev) } } -void EventGuard::set_syscall_filter(const std::vector& syscall_filter) +void PerfEventGuard::set_syscall_filter() { - if (syscall_filter.empty()) - { - return; - } - std::vector names; - std::transform(syscall_filter.cbegin(), syscall_filter.end(), std::back_inserter(names), + std::transform(config().syscall_filter.cbegin(), config().syscall_filter.end(), + std::back_inserter(names), [](const auto& elem) { return fmt::format("id == {}", elem); }); std::string filter = fmt::format("{}", fmt::join(names, "||")); diff --git a/src/perf/event_provider.cpp b/src/perf/event_provider.cpp index bac0d6fb..5f4d608a 100644 --- a/src/perf/event_provider.cpp +++ b/src/perf/event_provider.cpp @@ -55,7 +55,7 @@ namespace #define PERF_EVENT_HW(name, id) PERF_EVENT(name, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##id) #define PERF_EVENT_SW(name, id) PERF_EVENT(name, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##id) -static lo2s::perf::Event HW_EVENT_TABLE[] = { +static lo2s::perf::PerfEvent HW_EVENT_TABLE[] = { PERF_EVENT_HW("cpu-cycles", CPU_CYCLES), PERF_EVENT_HW("instructions", INSTRUCTIONS), PERF_EVENT_HW("cache-references", CACHE_REFERENCES), @@ -74,7 +74,7 @@ static lo2s::perf::Event HW_EVENT_TABLE[] = { #endif }; -static lo2s::perf::Event SW_EVENT_TABLE[] = { +static lo2s::perf::PerfEvent SW_EVENT_TABLE[] = { PERF_EVENT_SW("cpu-clock", CPU_CLOCK), PERF_EVENT_SW("task-clock", TASK_CLOCK), PERF_EVENT_SW("page-faults", PAGE_FAULTS), @@ -162,20 +162,51 @@ namespace lo2s namespace perf { -static void populate_event_map(std::unordered_map& map) +const SysfsEvent sysfs_read_event(const std::string& ev_name); + +static bool event_is_openable(SysfsEvent& provider) +{ + provider.set_availability(); + + if (!provider.is_valid()) + { + Log::debug() << "perf event not openable, retrying with exclude_kernel=1"; + provider.get_attr().exclude_kernel = 1; + provider.set_availability(); + + if (!provider.is_valid()) + { + switch (errno) + { + case ENOTSUP: + Log::debug() << "perf event not supported by the running kernel: " + << provider.get_name(); + break; + default: + Log::debug() << "perf event " << provider.get_name() + << " not available: " << std::string(std::strerror(errno)); + break; + } + return false; + } + } + return true; +} + +static void populate_event_map(std::unordered_map& map) { Log::info() << "checking available events..."; map.reserve(array_size(HW_EVENT_TABLE) + array_size(SW_EVENT_TABLE) + array_size(CACHE_NAME_TABLE) * array_size(CACHE_OPERATION_TABLE)); for (auto& ev : HW_EVENT_TABLE) { - Event event(ev); + PerfEvent event(ev); map.emplace(event.get_name(), event); } for (auto& ev : SW_EVENT_TABLE) { - Event event(ev); + PerfEvent event(ev); map.emplace(event.get_name(), event); } @@ -187,16 +218,9 @@ static void populate_event_map(std::unordered_map& map) name_fmt.str(std::string()); name_fmt << cache.name << '-' << operation.name; - // can't use create_raw_event here, will create deadlock - Event event(name_fmt.str(), PERF_TYPE_HW_CACHE, - make_cache_config(cache.id, operation.id.op_id, operation.id.result_id)); - - event.set_sample_period(0); - event.set_exclude_kernel(true); - event.set_watermark(16); - event.set_clock_attrs(true, CLOCK_MONOTONIC_RAW); - - map.emplace(name_fmt.str(), event); + map.emplace(name_fmt.str(), PerfEvent(name_fmt.str(), PERF_TYPE_HW_CACHE, + make_cache_config(cache.id, operation.id.op_id, + operation.id.result_id))); } } } @@ -237,8 +261,7 @@ std::vector EventProvider::get_pmu_events() << '/'; try { - SysfsEvent event = EventProvider::instance().create_sysfs_event(event_name.str()); - events.emplace_back(event); + events.emplace_back(sysfs_read_event(event_name.str())); } catch (const EventProvider::InvalidEvent& e) { @@ -250,7 +273,7 @@ std::vector EventProvider::get_pmu_events() return events; } -Event EventProvider::fallback_metric_leader_event() +PerfEvent EventProvider::fallback_metric_leader_event() { Log::debug() << "checking for metric leader event..."; for (auto candidate : { @@ -261,7 +284,7 @@ Event EventProvider::fallback_metric_leader_event() { try { - const Event ev = get_event_by_name(candidate); + const PerfEvent ev = get_event_by_name(candidate); Log::debug() << "found suitable metric leader event: " << candidate; return ev; } @@ -274,16 +297,282 @@ Event EventProvider::fallback_metric_leader_event() throw InvalidEvent{ "no suitable metric leader event found" }; } +static std::uint64_t parse_bitmask(const std::string& format) +{ + enum BITMASK_REGEX_GROUPS + { + BM_WHOLE_MATCH, + BM_BEGIN, + BM_END, + }; + + std::uint64_t mask = 0x0; + + static const std::regex bit_mask_regex(R"((\d+)?(?:-(\d+)))"); + const std::sregex_iterator end; + std::smatch bit_mask_match; + for (std::sregex_iterator i = { format.begin(), format.end(), bit_mask_regex }; i != end; ++i) + { + const auto& match = *i; + int start = std::stoi(match[BM_BEGIN]); + int end = (match[BM_END].length() == 0) ? start : std::stoi(match[BM_END]); + + const auto len = (end + 1) - start; + if (start < 0 || end > 63 || len > 64) + { + throw EventProvider::InvalidEvent("invalid config mask"); + } + + /* Set `len` bits and shift them to where they should start. + * 4-bit example: format "1-3" produces mask 0b1110. + * start := 1, end := 3 + * len := 3 + 1 - 1 = 3 + * bits := bit(3) - 1 = 0b1000 - 1 = 0b0111 + * mask := 0b0111 << 1 = 0b1110 + * */ + + // Shifting by 64 bits causes undefined behaviour, so in this case set + // all bits by assigning the maximum possible value for std::uint64_t. + const std::uint64_t bits = + (len == 64) ? std::numeric_limits::max() : bit(len) - 1; + + mask |= bits << start; + } + Log::debug() << std::showbase << std::hex << "config mask: " << format << " = " << mask + << std::dec << std::noshowbase; + return mask; +} + +static constexpr std::uint64_t apply_mask(std::uint64_t value, std::uint64_t mask) +{ + std::uint64_t res = 0; + for (int mask_bit = 0, value_bit = 0; mask_bit < 64; mask_bit++) + { + if (mask & bit(mask_bit)) + { + res |= ((value >> value_bit) & bit(0)) << mask_bit; + value_bit++; + } + } + return res; +} + +static void event_attr_update(PerfEvent& event, std::uint64_t value, const std::string& format) +{ + // Parse config terms // + + /* Format: : + * + * We only assign the terms 'config' and 'config1'. + * + * */ + + static constexpr auto npos = std::string::npos; + const auto colon = format.find_first_of(':'); + if (colon == npos) + { + throw EventProvider::InvalidEvent("invalid format description: missing colon"); + } + + const auto target_config = format.substr(0, colon); + const auto mask = parse_bitmask(format.substr(colon + 1)); + + if (target_config == "config") + { + event.get_attr().config |= apply_mask(value, mask); + } + + if (target_config == "config1") + { + event.get_attr().config1 |= apply_mask(value, mask); + } +} + /** * takes the name of an event, checks if it can be opened with each cpu and returns a PerfEvent * with a set of working cpus */ -const Event raw_read_event(const std::string& ev_name) +const PerfEvent raw_read_event(const std::string& ev_name) { + uint64_t code = std::stoull(ev_name.substr(1), nullptr, 16); + + PerfEvent ev(0); + ev.get_attr().config = code; + + std::set cpus; + for (const auto& cpu : Topology::instance().cpus()) + { + try + { + PerfEventGuard ev_instance = ev.open(cpu, -1); + cpus.emplace(cpu); + } + catch (const std::system_error& e) + { + } + } + + const PerfEvent event(ev_name, PERF_TYPE_RAW, code, 0, cpus); // Do not check whether the event_is_openable because we don't know whether we are in // system or process mode - return EventProvider::instance().create_raw_event( - ev_name, PERF_TYPE_RAW, std::stoull(ev_name.substr(1), nullptr, 16), 0); + return event; +} + +template +T read_file_or_else(std::string filename, T or_else) +{ + T val; + std::ifstream stream(filename); + stream >> val; + if (stream.fail()) + { + return or_else; + } + return val; +} + +const SysfsEvent sysfs_read_event(const std::string& ev_name) +{ + // Parse event description // + + /* Event description format: + * Name of a Performance Monitoring Unit (PMU) and an event name, + * separated by either '/' or ':' (for perf-like syntax); followed by an + * optional separator: + * + * /[/] + * OR + * :[/] + * + * Examples (both specify the same event): + * + * cpu/cache-misses/ + * cpu:cache-misses + * + * */ + + enum EVENT_DESCRIPTION_REGEX_GROUPS + { + ED_WHOLE_MATCH, + ED_PMU, + ED_NAME, + }; + + static const std::regex ev_name_regex(R"(([a-z0-9-_]+)[\/:]([a-z0-9-_]+)\/?)"); + std::smatch ev_name_match; + + if (!std::regex_match(ev_name, ev_name_match, ev_name_regex)) + { + throw EventProvider::InvalidEvent("invalid event description format"); + } + + const std::string& pmu_name = ev_name_match[ED_PMU]; + const std::string& event_name = ev_name_match[ED_NAME]; + + Log::debug() << "parsing event description: pmu='" << pmu_name << "', event='" << event_name + << "'"; + + const std::filesystem::path pmu_path = + std::filesystem::path("/sys/bus/event_source/devices") / pmu_name; + + // read PMU type id + std::underlying_type::type type = read_file_or_else(pmu_path / "type", 0); + if (!type) + { + using namespace std::string_literals; + throw EventProvider::InvalidEvent("unknown PMU '"s + pmu_name + "'"); + } + + // If the processor is heterogenous, "cpus" contains the cores that support this PMU. If the PMU + // is an uncore PMU "cpumask" contains the cores that are logically assigned to that PMU. Why + // there need to be two seperate files instead of one, nobody knows, but simply parse both. + std::set cpus; + auto cpuids = parse_list_from_file(pmu_path / "cpus"); + + if (cpuids.empty()) + { + cpuids = parse_list_from_file(pmu_path / "cpumask"); + } + + std::transform(cpuids.begin(), cpuids.end(), std::inserter(cpus, cpus.end()), + [](uint32_t cpuid) { return Cpu(cpuid); }); + + SysfsEvent event(ev_name, static_cast(type), 0, 0, cpus); + + // Parse event configuration from sysfs // + + // read event configuration + std::filesystem::path event_path = pmu_path / "events" / event_name; + std::string ev_cfg = read_file_or_else(event_path, std::string("0")); + if (ev_cfg == "0") + { + using namespace std::string_literals; + throw EventProvider::InvalidEvent("unknown event '"s + event_name + "' for PMU '"s + + pmu_name + "'"); + } + + /* Event configuration format: + * One or more terms with optional values, separated by ','. (Taken from + * linux/Documentation/ABI/testing/sysfs-bus-event_source-devices-events): + * + * [=][,[=]...] + * + * Example (config for 'cpu/cache-misses' on an Intel Core i5-7200U): + * + * event=0x2e,umask=0x41 + * + * */ + + enum EVENT_CONFIG_REGEX_GROUPS + { + EC_WHOLE_MATCH, + EC_TERM, + EC_VALUE, + }; + + static const std::regex kv_regex(R"(([^=,]+)(?:=([^,]+))?)"); + + Log::debug() << "parsing event configuration: " << ev_cfg; + std::smatch kv_match; + while (std::regex_search(ev_cfg, kv_match, kv_regex)) + { + static const std::string default_value("0x1"); + + const std::string& term = kv_match[EC_TERM]; + const std::string& value = + (kv_match[EC_VALUE].length() != 0) ? kv_match[EC_VALUE] : default_value; + + std::string format = read_file_or_else(pmu_path / "format" / term, std::string("0")); + if (format == "0") + { + throw EventProvider::InvalidEvent("cannot read event format"); + } + + static_assert(sizeof(std::uint64_t) >= sizeof(unsigned long), + "May not convert from unsigned long to uint64_t!"); + + std::uint64_t val = std::stol(value, nullptr, 0); + Log::debug() << "parsing config assignment: " << term << " = " << std::hex << std::showbase + << val << std::dec << std::noshowbase; + event_attr_update(event, val, format); + + ev_cfg = kv_match.suffix(); + } + + Log::debug() << std::hex << std::showbase << "parsed event description: " << pmu_name << "/" + << event_name << "/type=" << event.get_attr().type + << ",config=" << event.get_attr().config << ",config1=" << event.get_attr().config1 + << std::dec << std::noshowbase << "/"; + + event.set_scale(read_file_or_else(event_path.replace_extension(".scale"), 1.0)); + event.set_unit(read_file_or_else(event_path.replace_extension(".unit"), "#")); + + if (!event_is_openable(event)) + { + throw EventProvider::InvalidEvent( + "Event can not be opened in process- or system-monitoring-mode"); + } + return event; } EventProvider::EventProvider() @@ -291,7 +580,7 @@ EventProvider::EventProvider() populate_event_map(event_map_); } -Event EventProvider::cache_event(const std::string& name) +PerfEvent EventProvider::cache_event(const std::string& name) { // Format for raw events is r followed by a hexadecimal number static const std::regex raw_regex("r[[:xdigit:]]{1,8}"); @@ -306,8 +595,7 @@ Event EventProvider::cache_event(const std::string& name) } else { - SysfsEvent event = EventProvider::instance().create_raw_sysfs_event(name); - return event_map_.emplace(name, event).first->second; + return event_map_.emplace(name, sysfs_read_event(name)).first->second; } } catch (const InvalidEvent& e) @@ -323,7 +611,7 @@ Event EventProvider::cache_event(const std::string& name) * @returns The corresponding PerfEvent if it is available * @throws InvalidEvent if the event is unavailable */ -Event EventProvider::get_event_by_name(const std::string& name) +PerfEvent EventProvider::get_event_by_name(const std::string& name) { auto& ev_map = instance().event_map_; auto event_it = ev_map.find(name); @@ -366,11 +654,11 @@ bool EventProvider::has_event(const std::string& name) } } -std::vector EventProvider::get_predefined_events() +std::vector EventProvider::get_predefined_events() { const auto& ev_map = instance().event_map_; - std::vector events; + std::vector events; events.reserve(ev_map.size()); for (const auto& event : ev_map) @@ -383,92 +671,5 @@ std::vector EventProvider::get_predefined_events() return events; } - -tracepoint::TracepointEvent EventProvider::create_tracepoint_event(const std::string& name, - const bool& enable_on_exec) -{ - tracepoint::TracepointEvent event(name, enable_on_exec); - event.set_sample_period(0); - apply_config_attrs(event); - - return event; -} - -tracepoint::TracepointEvent EventProvider::create_raw_tracepoint_event(const std::string& name, - const bool& enable_on_exec) -{ - tracepoint::TracepointEvent event(name, enable_on_exec); - event.set_sample_period(0); - apply_default_attrs(event); - - return event; -} - -Event EventProvider::create_time_event(uint64_t local_time) -{ - Event event(local_time); - event.set_sample_period(1); - - apply_config_attrs(event); - event.set_exclude_kernel(true); // overwrite config value - - return event; -} - -Event EventProvider::create_raw_event(const std::string& name, perf_type_id type, - std::uint64_t config, std::uint64_t config1) -{ - Event event(name, type, config, config1); - event.set_sample_period(0); - - apply_config_attrs(event); - event.set_exclude_kernel(true); // overwrite config value - - return event; -} - -SysfsEvent EventProvider::create_sampling_event(const bool& enable_on_exec) -{ - SysfsEvent event(config().sampling_event, enable_on_exec); - apply_config_attrs(event); - - event.set_sample_period(config().sampling_period); - event.use_sampling_options(config().use_pebs, config().sampling, config().enable_cct); - - return event; -} - -SysfsEvent EventProvider::create_raw_sysfs_event(const std::string& name) -{ - SysfsEvent event(name); - event.set_sample_period(0); - apply_default_attrs(event); - - return event; -} - -SysfsEvent EventProvider::create_sysfs_event(const std::string& name) -{ - SysfsEvent event(name); - event.set_sample_period(0); - apply_config_attrs(event); - - return event; -} - -void EventProvider::apply_config_attrs(Event& event) -{ - event.set_watermark(config().mmap_pages); - event.set_exclude_kernel(config().exclude_kernel); - event.set_clock_attrs(config().use_clockid, config().clockid); -} - -void EventProvider::apply_default_attrs(Event& event) -{ - event.set_watermark(16); // default mmap-pages value - event.set_exclude_kernel(true); // enabled by default - event.set_clock_attrs(true, CLOCK_MONOTONIC_RAW); -} - } // namespace perf } // namespace lo2s diff --git a/src/perf/time/reader.cpp b/src/perf/time/reader.cpp index 74d71e44..87651e35 100644 --- a/src/perf/time/reader.cpp +++ b/src/perf/time/reader.cpp @@ -19,6 +19,7 @@ * along with lo2s. If not, see . */ +#include #include #include @@ -56,7 +57,7 @@ Reader::Reader() static_assert(sizeof(local_time) == 8, "The local time object must not be a big fat " "object, or the hardware breakpoint won't work."); - Event event = EventProvider::instance().create_time_event((uint64_t)&local_time); + PerfEvent event((uint64_t)&local_time); try { diff --git a/src/perf/tracepoint/event.cpp b/src/perf/tracepoint/event.cpp index 6136014f..791e40d2 100644 --- a/src/perf/tracepoint/event.cpp +++ b/src/perf/tracepoint/event.cpp @@ -30,7 +30,7 @@ namespace tracepoint { TracepointEvent::TracepointEvent(const std::string& name, bool enable_on_exec) -: Event(), name_(name) +: PerfEvent(), name_(name) { set_common_attrs(enable_on_exec); parse_format(); @@ -38,8 +38,6 @@ TracepointEvent::TracepointEvent(const std::string& name, bool enable_on_exec) attr_.config = id_; attr_.type = PERF_TYPE_TRACEPOINT; attr_.sample_type |= PERF_SAMPLE_RAW | PERF_SAMPLE_IDENTIFIER; - - update_availability(); } const std::filesystem::path TracepointEvent::base_path_ = "/sys/kernel/debug/tracing/events"; diff --git a/src/platform.cpp b/src/platform.cpp index 769958fd..0774d081 100644 --- a/src/platform.cpp +++ b/src/platform.cpp @@ -217,7 +217,7 @@ Processor detect_processor(void) return Processor::UNKNOWN; } -std::vector get_mem_events() +std::vector get_mem_events() { static auto proc = detect_processor(); switch (proc)