From e920adedb15a1c60cc2af7d2b74a3f2908e51e50 Mon Sep 17 00:00:00 2001 From: Frank Du Date: Wed, 17 Jan 2024 11:27:19 +0800 Subject: [PATCH] ebpf: add lcore_monitor tools usage: sudo ./lcore_monitor --lcore 30 --t_pid 194145 --bpf_prog lcore_monitor_kern.o Signed-off-by: Frank Du --- tools/ebpf/.gitignore | 4 +- tools/ebpf/Makefile | 13 +- tools/ebpf/README.md | 24 ++++ tools/ebpf/lcore_monitor.c | 238 ++++++++++++++++++++++++++++++++ tools/ebpf/lcore_monitor.h | 25 ++++ tools/ebpf/lcore_monitor_kern.c | 70 ++++++++++ tools/ebpf/log.h | 35 +++++ 7 files changed, 406 insertions(+), 3 deletions(-) create mode 100644 tools/ebpf/lcore_monitor.c create mode 100644 tools/ebpf/lcore_monitor.h create mode 100644 tools/ebpf/lcore_monitor_kern.c create mode 100644 tools/ebpf/log.h diff --git a/tools/ebpf/.gitignore b/tools/ebpf/.gitignore index 3e7279210..053f5a194 100644 --- a/tools/ebpf/.gitignore +++ b/tools/ebpf/.gitignore @@ -1,4 +1,6 @@ # generated file et fentry.skel.h -vmlinux.h \ No newline at end of file +vmlinux.h +*.o +lcore_monitor \ No newline at end of file diff --git a/tools/ebpf/Makefile b/tools/ebpf/Makefile index a3997dcde..4a9717dbd 100644 --- a/tools/ebpf/Makefile +++ b/tools/ebpf/Makefile @@ -2,11 +2,11 @@ # Copyright 2023 Intel Corporation .PHONY: all -all: et xsk.xdp.o +all: et xsk.xdp.o lcore_monitor_kern.o lcore_monitor .PHONY: clean clean: - rm -rf et *.o *.skel.h + rm -rf et *.o *.skel.h lcore_monitor vmlinux.h: bpftool btf dump file /sys/kernel/btf/vmlinux format c > $@ @@ -30,3 +30,12 @@ SKEL_FILES := $(patsubst %.bpf.c,%.skel.h,$(wildcard *.bpf.c)) et: et.c $(SKEL_FILES) gcc -Wall -o $@ $(filter %.c,$^) -include $(SKEL_FILES) -lxdp -l:libbpf.a -lelf -lz + +# Build lcore_monitor_kern ebpf prog +lcore_monitor_kern.o: lcore_monitor_kern.c lcore_monitor.h + clang -g -O2 -target bpf -c lcore_monitor_kern.c -o $@ + llvm-strip -g $@ + +# Build lcore_monitor user prog +lcore_monitor: lcore_monitor.c lcore_monitor.h + gcc -Wall -o lcore_monitor lcore_monitor.c -lbpf -lelf \ No newline at end of file diff --git a/tools/ebpf/README.md b/tools/ebpf/README.md index cfbfbf8ba..ed131d6cc 100644 --- a/tools/ebpf/README.md +++ b/tools/ebpf/README.md @@ -12,6 +12,30 @@ make ## Run +lcore_monitor: a tools to monitor the scheduler even on the IMTL lcore. + +```bash +sudo ./lcore_monitor --lcore 30 --t_pid 194145 --bpf_prog lcore_monitor_kern.o +``` + +The output is like below, inspect the time to check if the lcore is suspending for a long time. + +```bash +main, load bpf object lcore_monitor_kern.o succ +lm_event_handler: sched out 7.789us as comm: migration/30 +lm_event_handler: sched out 7.405us as comm: migration/30 +``` + +The `lcore` and `t_pid` can be get from IMTL running log. + +```bash +MT: MT: 2024-01-17 15:45:14, * * M T D E V S T A T E * * +MT: MT: 2024-01-17 15:45:14, DEV(0): Avr rate, tx: 2610.440314 Mb/s, rx: 0.000278 Mb/s, pkts, tx: 2465879, rx: 6 +MT: MT: 2024-01-17 15:45:14, DEV(1): Avr rate, tx: 0.000000 Mb/s, rx: 2602.470600 Mb/s, pkts, tx: 0, rx: 2465811 +MT: MT: 2024-01-17 15:45:14, SCH(0:sch_0): tasklets 3, lcore 29(t_pid: 190637), avg loop 105 ns +MT: MT: 2024-01-17 15:45:14, SCH(1:sch_1): tasklets 1, lcore 30(t_pid: 190638), avg loop 45 ns +``` + fentry: a simple program to trace udp_send_skb calls, requires kernel > 5.5. ```bash diff --git a/tools/ebpf/lcore_monitor.c b/tools/ebpf/lcore_monitor.c new file mode 100644 index 000000000..2f7060d21 --- /dev/null +++ b/tools/ebpf/lcore_monitor.c @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Intel Corporation + */ + +// clang-format off +#include +#include "lcore_monitor.h" +// clang-format on + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" + +struct lcore_monitor_ctx { + char bpf_prog[64]; + struct lcore_tid_cfg cfg; + struct lcore_tid_event out; +}; + +enum lm_args_cmd { + LM_ARG_UNKNOWN = 0, + LM_ARG_CORE = 0x100, /* start from end of ascii */ + LM_ARG_T_PID, + LM_ARG_BPF_PROG, + LM_ARG_BPF_TRACE, + LM_ARG_HELP, +}; + +static struct option et_args_options[] = { + {"lcore", required_argument, 0, LM_ARG_CORE}, + {"t_pid", required_argument, 0, LM_ARG_T_PID}, + {"bpf_prog", required_argument, 0, LM_ARG_BPF_PROG}, + {"bpf_trace", no_argument, 0, LM_ARG_BPF_TRACE}, + {"help", no_argument, 0, LM_ARG_HELP}, + {0, 0, 0, 0}}; + +static void lm_print_help() { + printf("\n"); + printf("##### Usage: #####\n\n"); + + printf(" Params:\n"); + printf(" --lcore Set the monitor lcore\n"); + printf(" --t_pid Set the monitor t_pid\n"); + printf(" --bpf_prog Set bpf prog path\n"); + printf(" --bpf_trace Enable bpf trace\n"); + printf(" --help Print help info\n"); + + printf("\n"); +} + +static int lm_parse_args(struct lcore_monitor_ctx* ctx, int argc, char** argv) { + int cmd = -1, opt_idx = 0; + + while (1) { + cmd = getopt_long_only(argc, argv, "hv", et_args_options, &opt_idx); + if (cmd == -1) break; + + switch (cmd) { + case LM_ARG_CORE: + ctx->cfg.core_id = atoi(optarg); + break; + case LM_ARG_T_PID: + ctx->cfg.t_pid = atoi(optarg); + break; + case LM_ARG_BPF_PROG: + snprintf(ctx->bpf_prog, sizeof(ctx->bpf_prog), "%s", optarg); + break; + case LM_ARG_BPF_TRACE: + ctx->cfg.bpf_trace = true; + break; + case LM_ARG_HELP: + default: + lm_print_help(); + return -1; + } + } + + return 0; +} + +static bool stop = false; + +static void lm_sig_handler(int signo) { + info("%s, signal %d\n", __func__, signo); + + switch (signo) { + case SIGINT: /* Interrupt from keyboard */ + stop = true; + break; + } + + return; +} + +static int get_process_name_by_pid(pid_t pid, char* process_name, size_t max_len) { + char path[128]; + FILE* fp; + + snprintf(path, sizeof(path), "/proc/%d/comm", pid); + fp = fopen(path, "r"); + + if (!fp) { + err("%s, Failed to open /proc/%d/comm\n", __func__, pid); + return -EIO; + } + + if (fgets(process_name, max_len, fp) == NULL) { + err("%s, Failed to read process name for pid %d\n", __func__, pid); + process_name[0] = '\0'; + } + fclose(fp); + + size_t len = strlen(process_name); + if (len > 0 && process_name[len - 1] == '\n') { + process_name[len - 1] = '\0'; + } + + return 0; +} + +static int lm_event_handler(void* pri, void* data, size_t data_sz) { + struct lcore_monitor_ctx* ctx = pri; + const struct lcore_tid_event* e = data; + + dbg("%s: type %d, ns %" PRIu64 "\n", __func__, e->type, e->ns); + if (e->type == LCORE_SCHED_OUT) { + memcpy(&ctx->out, e, sizeof(ctx->out)); + dbg("%s: out ns %" PRIu64 "\n", __func__, ctx->out.ns); + return 0; + } + + if (e->type == LCORE_SCHED_IN) { + float ns = e->ns - ctx->out.ns; + int next_pid = ctx->out.next_pid; + char process_name[64]; + get_process_name_by_pid(next_pid, process_name, sizeof(process_name)); + info("%s: sched out %.3fus as comm: %s\n", __func__, ns / 1000, process_name); + } + + return 0; +} + +int main(int argc, char** argv) { + struct lcore_monitor_ctx ctx; + int ret; + + memset(&ctx, 0, sizeof(ctx)); + /* default */ + snprintf(ctx.bpf_prog, sizeof(ctx.bpf_prog), "%s", "lcore_monitor_kern.o"); + ret = lm_parse_args(&ctx, argc, argv); + if (ret < 0) return ret; + if (!ctx.cfg.core_id) { + err("%s, no core id define\n", __func__); + lm_print_help(); + return -1; + } + if (!ctx.cfg.t_pid) { + err("%s, no t_pid define\n", __func__); + lm_print_help(); + return -1; + } + + struct bpf_object* obj; + struct bpf_program* prog; + struct bpf_link* link; + + obj = bpf_object__open(ctx.bpf_prog); + if (libbpf_get_error(obj)) { + err("%s, open bpf object %s fail\n", __func__, ctx.bpf_prog); + return -1; + } + if (bpf_object__load(obj)) { + err("%s, load bpf object %s fail\n", __func__, ctx.bpf_prog); + return -1; + } + info("%s, load bpf object %s succ\n", __func__, ctx.bpf_prog); + + uint32_t key = 0; + int map_fd = bpf_object__find_map_fd_by_name(obj, "lm_cfg_map"); + if (map_fd < 0) { + err("%s, get lm_cfg_map fail\n", __func__); + return -1; + } + if (bpf_map_update_elem(map_fd, &key, &ctx.cfg, BPF_ANY) != 0) { + err("%s, update core_id_map fail\n", __func__); + return -1; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog_sched_switch"); + if (!prog) { + err("%s, finding BPF program failed\n", __func__); + return -1; + } + + link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch"); + if (libbpf_get_error(link)) { + err("%s, attaching BPF program to tracepoint failed\n", __func__); + return -1; + } + + int lm_events_fd = bpf_object__find_map_fd_by_name(obj, "lm_events_map"); + if (lm_events_fd < 0) { + err("%s, get lm_events_map fail\n", __func__); + return -1; + } + struct ring_buffer* rb = ring_buffer__new(lm_events_fd, lm_event_handler, &ctx, NULL); + if (!rb) { + err("%s, create ring buffer fail\n", __func__); + return -1; + } + + signal(SIGINT, lm_sig_handler); + + while (!stop) { + ret = ring_buffer__poll(rb, 100); + if (ret == -EINTR) { + ret = 0; + break; + } + if (ret < 0) { + err("%s, polling fail\n", __func__); + break; + } + } + + info("%s, stop now\n", __func__); + bpf_link__destroy(link); + bpf_object__close(obj); + return 0; +} diff --git a/tools/ebpf/lcore_monitor.h b/tools/ebpf/lcore_monitor.h new file mode 100644 index 000000000..308fd4d40 --- /dev/null +++ b/tools/ebpf/lcore_monitor.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Intel Corporation + */ + +#ifndef __LCORE_MONITOR_HEAD_H +#define __LCORE_MONITOR_HEAD_H + +struct lcore_tid_cfg { + uint32_t core_id; + uint32_t t_pid; + uint8_t bpf_trace; +}; + +enum lcore_tid_event_type { + LCORE_SCHED_IN, + LCORE_SCHED_OUT, +}; + +struct lcore_tid_event { + enum lcore_tid_event_type type; + uint64_t ns; + int next_pid; +}; + +#endif \ No newline at end of file diff --git a/tools/ebpf/lcore_monitor_kern.c b/tools/ebpf/lcore_monitor_kern.c new file mode 100644 index 000000000..ab10c96aa --- /dev/null +++ b/tools/ebpf/lcore_monitor_kern.c @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + * Copyright(c) 2023 Intel Corporation + */ + +//clang-format off +#include "vmlinux.h" +//clang-format off + +#include +#include + +#include "lcore_monitor.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct lcore_tid_cfg); +} lm_cfg_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 512 * 1024); +} lm_events_map SEC(".maps"); + +static int lm_event_submit(enum lcore_tid_event_type type, + struct trace_event_raw_sched_switch* args) { + struct lcore_tid_event* e; + + e = bpf_ringbuf_reserve(&lm_events_map, sizeof(*e), 0); + if (!e) { + char fmt[] = "lm event ringbuf reserve fail\n"; + bpf_trace_printk(fmt, sizeof(fmt)); + return 0; + } + + e->type = type; + e->ns = bpf_ktime_get_ns(); + e->next_pid = args->next_pid; + + bpf_ringbuf_submit(e, 0); + return 0; +} + +SEC("tracepoint/sched/sched_switch") +int bpf_prog_sched_switch(struct trace_event_raw_sched_switch* args) { + uint32_t key = 0; + struct lcore_tid_cfg* cfg = bpf_map_lookup_elem(&lm_cfg_map, &key); + if (!cfg) return 0; + + /* core id check */ + if (bpf_get_smp_processor_id() != cfg->core_id) return 0; + + if (cfg->bpf_trace) { + char fmt[] = "prev_pid %d next_pid in\n"; + bpf_trace_printk(fmt, sizeof(fmt), args->prev_pid, args->next_pid); + } + + if (args->prev_pid == cfg->t_pid) { + lm_event_submit(LCORE_SCHED_OUT, args); + } + + if (args->next_pid == cfg->t_pid) { + lm_event_submit(LCORE_SCHED_IN, args); + } + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/ebpf/log.h b/tools/ebpf/log.h new file mode 100644 index 000000000..14f724a96 --- /dev/null +++ b/tools/ebpf/log.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Intel Corporation + */ + +/* Header for log usage */ +#include + +#ifndef _EBPF_LOG_HEAD_H_ +#define _EBPF_LOG_HEAD_H_ + +/* log define */ +#ifdef DEBUG +#define dbg(...) \ + do { \ + printf(__VA_ARGS__); \ + } while (0) +#else +#define dbg(...) \ + do { \ + } while (0) +#endif +#define info(...) \ + do { \ + printf(__VA_ARGS__); \ + } while (0) +#define warn(...) \ + do { \ + printf(__VA_ARGS__); \ + } while (0) +#define err(...) \ + do { \ + printf(__VA_ARGS__); \ + } while (0) + +#endif \ No newline at end of file