From 889cb0deeacde4d4fc72c6842f58241fbc274c32 Mon Sep 17 00:00:00 2001 From: Christian von Elm Date: Tue, 17 Oct 2023 10:37:18 +0200 Subject: [PATCH] Block counter events the hacky way --- CMakeLists.txt | 3 + include/lo2s/monitor/bio_monitor.hpp | 201 +++++++++++++++++++++++++ include/lo2s/monitor/posix_monitor.hpp | 16 +- include/lo2s/perf/bio/common.h | 43 ++++++ include/lo2s/trace/reg_keys.hpp | 10 +- include/lo2s/trace/trace.hpp | 1 + src/perf/bio/block.bpf.c | 42 ++++++ src/trace/trace.cpp | 19 +++ 8 files changed, 326 insertions(+), 9 deletions(-) create mode 100644 include/lo2s/monitor/bio_monitor.hpp create mode 100644 include/lo2s/perf/bio/common.h create mode 100644 src/perf/bio/block.bpf.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 46e9e152..29a39029 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -347,7 +347,10 @@ if (USE_BPF) if (BpfObject_FOUND) target_compile_definitions(lo2s PUBLIC HAVE_BPF) bpf_object(open src/perf/posix_io/open.bpf.c) + bpf_object(block_io src/perf/bio/block.bpf.c) add_dependencies(lo2s open_skel) + add_dependencies(lo2s block_io_skel) + target_link_libraries(lo2s PRIVATE block_io_skel) target_link_libraries(lo2s PRIVATE open_skel) else() message(SEND_ERROR "BPF not found but requested.") diff --git a/include/lo2s/monitor/bio_monitor.hpp b/include/lo2s/monitor/bio_monitor.hpp new file mode 100644 index 00000000..da8ebfe1 --- /dev/null +++ b/include/lo2s/monitor/bio_monitor.hpp @@ -0,0 +1,201 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2016, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include "otf2xx/common.hpp" +#include "otf2xx/event/io_create_handle.hpp" +#include "otf2xx/event/io_delete_file.hpp" +#include "otf2xx/event/io_destroy_handle.hpp" +#include +#include + +extern "C" +{ +#include +#include +#include +#include +} +namespace lo2s +{ +namespace monitor +{ +class BioMonitor : public ThreadedMonitor +{ +public: + struct RingBufferDeleter + { + void operator()(struct ring_buffer* rb) + { + ring_buffer__free(rb); + } + }; + + struct SkelDeleter + { + void operator()(struct block_io_bpf* skel) + { + block_io_bpf__destroy(skel); + } + }; + + BioMonitor(trace::Trace& trace) + : ThreadedMonitor(trace, "open() monitor"), trace_(trace), + time_converter_(perf::time::Converter::instance()) + { + // Need to bump memlock rlimit to run anything but the most trivial BPF programs + + struct rlimit rlim_new; + rlim_new.rlim_cur = RLIM_INFINITY; + rlim_new.rlim_max = RLIM_INFINITY; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) + { + throw_errno(); + } + + skel_ = std::unique_ptr(block_io_bpf__block_io_and_load()); + if (!skel_) + { + return; + } + + block_io_bpf__attach(skel_.get()); + + rb_ = std::unique_ptr( + ring_buffer__new(bpf_map__fd(skel_.get()->maps.rb), event_cb, this, NULL)); + + if (!rb_) + { + return; + } + } + + static int libbpf_print_fn(enum libbpf_print_level level, const char* format, va_list args) + { + if (level > LIBBPF_INFO) + return 0; + return vfprintf(stderr, format, args); + } + + void insert_thread(Thread thread [[maybe_unused]]) + { + char insert = 1; + pid_t pid = thread.as_pid_t(); + bpf_map__update_elem(skel_->maps.pids, &pid, sizeof(pid), &insert, sizeof(char), BPF_ANY); + + last_fd_[thread] = -1; + } + void exit_thread(Thread thread [[maybe_unused]]) + { + pid_t pid = thread.as_pid_t(); + bpf_map__delete_elem(skel_->maps.pids, &pid, sizeof(pid), BPF_ANY); + } + + void initialize_thread() override + { + } + + void handle_event(void* data, size_t datasz [[maybe_unused]]) + { + + struct block_event_header* e = (struct posix_event_header*)data; + otf2::writer::local& writer = trace_.block_counter_writer(e->dev); + + } + + void run() override + { + while (!stop_) + { + ring_buffer__poll(rb_.get(), 100); + } + } + + void finalize_thread() override + { + } + + void stop() override + { + stop_ = true; + thread_.join(); + } + static int event_cb(void* ctx, void* data, size_t data_sz) + { + ((BioMonitor*)ctx)->handle_event(data, data_sz); + return 0; + } + + void monitor() + { + } + + std::string group() const override + { + return "BioMonitor"; + } + +private: + struct ThreadFd + { + ThreadFd() : fd(-1), thread(Thread::invalid()) + { + } + + ThreadFd(int fd, pid_t thread) : fd(fd), thread(Thread(thread)) + { + } + friend bool operator<(const ThreadFd& lhs, const ThreadFd& rhs) + { + if (lhs.fd == rhs.fd) + { + return lhs.thread < rhs.thread; + } + + return lhs.fd < rhs.fd; + } + + friend bool operator==(const ThreadFd& lhs, const ThreadFd& rhs) + { + return lhs.fd == rhs.fd && lhs.thread == rhs.thread; + } + + private: + int fd; + Thread thread; + }; + + trace::Trace& trace_; + perf::time::Converter& time_converter_; + + std::map last_fd_; + std::map last_count_; + std::map last_buf_; + std::map instance_; + std::unique_ptr rb_; + std::unique_ptr skel_; + bool stop_ = false; +}; + +} // namespace monitor +} // namespace lo2s diff --git a/include/lo2s/monitor/posix_monitor.hpp b/include/lo2s/monitor/posix_monitor.hpp index 36b55e76..50abc60d 100644 --- a/include/lo2s/monitor/posix_monitor.hpp +++ b/include/lo2s/monitor/posix_monitor.hpp @@ -31,7 +31,7 @@ extern "C" { #include -#include +#include #include #include } @@ -52,9 +52,9 @@ class PosixMonitor : public ThreadedMonitor struct SkelDeleter { - void operator()(struct open_bpf* skel) + void operator()(struct block_io_bpf* skel) { - open_bpf__destroy(skel); + block_io_bpf__destroy(skel); } }; @@ -67,19 +67,19 @@ class PosixMonitor : public ThreadedMonitor struct rlimit rlim_new; rlim_new.rlim_cur = RLIM_INFINITY; rlim_new.rlim_max = RLIM_INFINITY; - + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { throw_errno(); } - skel_ = std::unique_ptr(open_bpf__open_and_load()); + skel_ = std::unique_ptr(block_io_bpf__block_io_and_load()); if (!skel_) { return; } - open_bpf__attach(skel_.get()); + block_io_bpf__attach(skel_.get()); rb_ = std::unique_ptr( ring_buffer__new(bpf_map__fd(skel_.get()->maps.rb), event_cb, this, NULL)); @@ -135,7 +135,7 @@ class PosixMonitor : public ThreadedMonitor } if (e->type == OPEN) { - struct open_event* e = (struct open_event*)data; + struct block_io_event* e = (struct block_io_event*)data; if (e->header.fd >= 3) { @@ -281,7 +281,7 @@ class PosixMonitor : public ThreadedMonitor std::map last_buf_; std::map instance_; std::unique_ptr rb_; - std::unique_ptr skel_; + std::unique_ptr skel_; bool stop_ = false; }; diff --git a/include/lo2s/perf/bio/common.h b/include/lo2s/perf/bio/common.h new file mode 100644 index 00000000..cd303b9c --- /dev/null +++ b/include/lo2s/perf/bio/common.h @@ -0,0 +1,43 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2016, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +struct trace_entry { + short unsigned int type; + unsigned char flags; + unsigned char preempt_count; + int pid; + }; +struct trace_event_raw_sched_process_exec +{ + struct trace_entry ent; + unsigned int dev; + unsigned long long int sector; + unsigned int nr_sector; + unsigned int bytes; +}; + +struct block_io_event { + unsigned long long int sector; + unsigned int dev; + unsigned int bytes; +}; diff --git a/include/lo2s/trace/reg_keys.hpp b/include/lo2s/trace/reg_keys.hpp index 850a04b4..cd80b5b6 100644 --- a/include/lo2s/trace/reg_keys.hpp +++ b/include/lo2s/trace/reg_keys.hpp @@ -32,6 +32,7 @@ #include #include +#include extern "C" { @@ -84,6 +85,13 @@ struct ByBlockDeviceTag }; using ByBlockDevice = SimpleKeyType; + +struct ByBlockCounterDeviceTag +{ +}; + + using ByBlockCounterDevice = SimpleKeyType; + struct ByStringTag { }; @@ -221,7 +229,7 @@ template <> struct Holder { using type = otf2::lookup_definition_holder; + ByMeasurementScope, ByBlockDevice, ByBlockCounterDevice>; }; template <> struct Holder diff --git a/include/lo2s/trace/trace.hpp b/include/lo2s/trace/trace.hpp index ee668693..e1dff1ac 100644 --- a/include/lo2s/trace/trace.hpp +++ b/include/lo2s/trace/trace.hpp @@ -135,6 +135,7 @@ class Trace otf2::writer::local& bio_writer(BlockDevice dev); otf2::writer::local& create_metric_writer(const std::string& name); otf2::writer::local& posix_io_writer(Thread thread); + otf2::writer::local& block_counter_writer(BlockDevice dev); otf2::definition::io_handle& block_io_handle(BlockDevice dev); otf2::definition::io_handle& posix_io_handle(Thread thread, int fd, int instance, diff --git a/src/perf/bio/block.bpf.c b/src/perf/bio/block.bpf.c new file mode 100644 index 00000000..9a2f5a88 --- /dev/null +++ b/src/perf/bio/block.bpf.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2020 Andrii Nakryiko */ +#include +#include +#include + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +/* BPF ringbuf map */ +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024 /* 256 KB */); +} rb SEC(".maps"); + +unsigned int counter = 0; +unsigned int bytes = 0; + +SEC("tp/block/block_rq_issue") +int handle_exec(struct trace_event_raw_sched_process_exec *ctx) +{ + + counter = counter + 1; + bytes = bytes + ctx->bytes; + + if(counter >= 200) + { + struct block_io_event *e; + + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) + { + return 0; + } + e->dev = 0; + e->bytes = bytes; + bpf_ringbuf_submit(e, 0); + + } + + return 0; +} + diff --git a/src/trace/trace.cpp b/src/trace/trace.cpp index 39680d70..7f9a9290 100644 --- a/src/trace/trace.cpp +++ b/src/trace/trace.cpp @@ -467,6 +467,25 @@ otf2::writer::local& Trace::bio_writer(BlockDevice dev) return archive_(intern_location); } +otf2::writer::local& Trace::block_counter_writer(ByBlockDevice dev) +{ + const auto& name = intern(fmt::format("block counter events for {}", dev.name)); + + const auto& node = registry_.emplace( + ByBlockDevice(dev), intern(dev.name), intern("block dev"), bio_system_tree_node_); + + const auto& bio_location_group = registry_.emplace( + ByBlockDevice(dev), name, otf2::common::location_group_type::process, node); + + const auto& intern_location = registry_.emplace( + ByBlockCounterDevice(dev), name, bio_location_group, + otf2::definition::location::location_type::cpu_thread); + + hardware_comm_locations_group_.add_member(intern_location); + + return archive_(intern_location); +} + otf2::writer::local& Trace::posix_io_writer(Thread thread) { MeasurementScope scope = MeasurementScope::posix_io(thread.as_scope());