diff --git a/labs/debugging-ebpf/debugging-ebpf.tex b/labs/debugging-ebpf/debugging-ebpf.tex
index 7d53cbe006..550cab1d19 100644
--- a/labs/debugging-ebpf/debugging-ebpf.tex
+++ b/labs/debugging-ebpf/debugging-ebpf.tex
@@ -165,7 +165,13 @@ \section{Improving our program}
As a final improvement, we will trace the parent PID as well to know who is starting any program.
\begin{itemize}
\item Edit your eBPF program to read the parent PID. This info can be captured by retrieving the current \code{struct task_struct}, and identifying the relevant fields. Check both Elixir for the layout of \code{struct task_struct}, and \manpage{bpf-helpers}{7} to learn how to get the current task.
- \item We are using CO-RE definition for kernel data (through vmlinux.h), so we can not dereference directly a \code{struct task_struct} in our eBPF program, we must use helpers to retrieve struct fields. You can check \href{https://nakryiko.com/posts/bpf-core-reference-guide/#the-missing-manual}{this blog post from Andrii Nakryiko} to learn about such helpers.
+ \item We are using CO-RE definition for kernel data (through vmlinux.h), so
+ we can not dereference directly a \code{struct task_struct} in our eBPF
+ program, we must use helpers to retrieve struct fields. You can check
+ \href{https://nakryiko.com/posts/bpf-core-reference-guide/#the-missing-manual}{this
+ blog post from Andrii Nakryiko} to learn about such helpers. Also, you will
+ need to check \kstruct{task_struct} to know what field to extract to get the
+ parent PID.
\item Update your userspace program to read and print the newly captured value
\end{itemize}
diff --git a/slides/debugging-system-wide-profiling/bpf_lifecycle.dia b/slides/debugging-system-wide-profiling/bpf_lifecycle.dia
new file mode 100644
index 0000000000..61847f9d1f
--- /dev/null
+++ b/slides/debugging-system-wide-profiling/bpf_lifecycle.dia
@@ -0,0 +1,1100 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ #A4#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #myprog.bpf.c#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #myprog.bpf.o#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #verifier#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #kernel#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #userspace#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #clang#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #bpf()#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #attach#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #program runs
+on event#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #userspace tool#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #map_1#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #myprog#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #map_2#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/slides/debugging-system-wide-profiling/debugging-system-wide-profiling.tex b/slides/debugging-system-wide-profiling/debugging-system-wide-profiling.tex
index 43f0d9b58b..00cf6fa4de 100644
--- a/slides/debugging-system-wide-profiling/debugging-system-wide-profiling.tex
+++ b/slides/debugging-system-wide-profiling/debugging-system-wide-profiling.tex
@@ -177,7 +177,7 @@ \subsection{perf}
\begin{itemize}
\item {\em perf} allows to create dynamic tracepoints on both kernel functions and
user-space functions.
- \item In order to be able to insert probes, \kconfig{CONFIG_KPROBE} must be
+ \item In order to be able to insert probes, \kconfig{CONFIG_KPROBES} must be
enabled in the kernel.
\begin{itemize}
\item Note: {\em libelf} is required to compile {\em perf} with
@@ -644,8 +644,6 @@ \subsection{ftrace and trace-cmd}
...
\end{minted}
\end{block}
-
-
\end{frame}
\begin{frame}[fragile]
@@ -795,6 +793,192 @@ \subsection{ftrace and trace-cmd}
\center\includegraphics[height=0.8\textheight]{slides/debugging-system-wide-profiling/kernelshark.png}
\end{frame}
+\setuplabframe
+{System wide profiling}
+{
+ Profiling a system from userspace to kernel space
+ \begin{itemize}
+ \item Profiling with ftrace, uprobes and kernelshark
+ \item Profiling with perf
+ \end{itemize}
+}
+
+\subsection{LTTng}
+
+\begin{frame}
+ \frametitle{{\em LTTng}}
+ \begin{columns}
+ \column{0.65\textwidth}
+ \begin{itemize}
+ \item LTTng is an open source tracing framework for Linux maintained by
+ the \href{https://www.efficios.com/}{EfficiOS} company.
+ \item LTTng allows understanding the interactions between the kernel and
+ applications (C, C++, Java, Python).
+ \begin{itemize}
+ \item Also expose a \code{/dev/lttng-logger} that can be used from any
+ application.
+ \end{itemize}
+ \item Tracepoints are associated with a payload (data).
+ \item LTTng is focused on low-overhead tracing.
+ \item Uses the Common Trace Format (so traces are readable with other
+ software like babeltrace or trace-compass)
+ \end{itemize}
+ \column{0.35\textwidth}
+ \includegraphics[height=0.3\textheight]{slides/debugging-system-wide-profiling/lttng-logo.jpg}
+ \end{columns}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Tracepoints with {\em LTTng} }
+ \begin{itemize}
+ \item LTTng works with a session daemon that receive all events from kernel
+ and userspace LTTng tracing components.
+ \item LTTng can use and trace the following instrumentation points:
+ \begin{itemize}
+ \item LTTng kernel tracepoints
+ \item kprobes and kretprobes
+ \item Linux kernel system calls
+ \item Linux user space probe
+ \item User space LTTng tracepoints
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Creating userspace tracepoints with {\em LTTng}}
+ \begin{itemize}
+ \item New userspace tracepoints can be defined using LTTng.
+ \item Tracepoints have multiple characteristics:
+ \begin{itemize}
+ \item A provider namespace
+ \item A name identifying the tracepoint
+ \item Parameters of various types (int, char *, etc)
+ \item Fields describing how to display the tracepoint parameters
+ (decimal, hexadecimal, etc) (see \href{https://lttng.org/man/3/lttng-ust/v2.13/}{LTTng-ust} manpage
+ for types)
+ \end{itemize}
+ \item Developpers must perform multiple operations to use UST tracepoint:
+ write a tracepoint provider (.h), write a tracepoint package (.c), build
+ the package, call the tracepoint in the traced application, and finally
+ build the application, linked with lttng-ust library and the package provider.
+ \item LTTng provides the \code{lttng-gen-tp} to ease all those steps,
+ allowing to only write a template (.tp) file.
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Defining a {\em LTTng} tracepoint (1/2)}
+
+ \begin{itemize}
+ \item Tracepoint template (\code{hello_world-tp.tp}):
+ \begin{block}{}
+ \begin{minted}[fontsize=\tiny]{C}
+ LTTNG_UST_TRACEPOINT_EVENT(
+ // Tracepoint provider name
+ hello_world,
+
+ // Tracepoint/event name
+ first_tp,
+
+ // Tracepoint arguments (input)
+ LTTNG_UST_TP_ARGS(
+ char *, text
+ ),
+
+ // Tracepoint/event fields (output)
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_string(message, text)
+ )
+ )
+ \end{minted}
+ \end{block}
+ \item \code{lttng-gen-tp} will take this template file and generate/build
+ all needed files (.h, .c and .o files)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Defining a {\em LTTng} tracepoint (2/2)}
+ \begin{itemize}
+ \item Build tracepoint provider:
+ \end{itemize}
+ \begin{block}{}
+ \begin{minted}[fontsize=\tiny]{console}
+$ lttng-gen-tp hello_world-tp.tp
+ \end{minted}
+ \end{block}
+ \begin{itemize}
+ \item Tracepoint usage (\code{hello_world.c}):
+ \end{itemize}
+ \begin{block}{}
+ \begin{minted}[fontsize=\tiny]{C}
+#include
+#include "hello-tp.h"
+
+int main(int argc, char *argv[])
+{
+ lttng_ust_tracepoint(hello_world, my_first_tracepoint, 23, "hi there!");
+ return 0;
+}
+ \end{minted}
+ \end{block}
+ \begin{itemize}
+ \item Compilation:
+ \end{itemize}
+ \begin{block}{}
+ \begin{minted}[fontsize=\tiny]{console}
+$ gcc hello_world.c hello_world-tp.o -llttng-ust -o hello_world
+ \end{minted}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Using {\em LTTng}}
+ \begin{block}{}
+ \begin{minted}[fontsize=\small]{console}
+$ lttng create my-tracing-session --output=./my_traces
+$ lttng list --kernel
+$ lttng list --userspace
+$ lttng enable-event --userspace hello_world:my_first_tracepoint
+$ lttng enable-event --kernel --syscall open,close,write
+$ lttng start
+$ /* Run your application or do something */
+$ lttng destroy
+$ babeltrace2 ./my_traces
+ \end{minted}
+ \end{block}
+ \begin{itemize}
+ \item You can also use
+ \href{https://eclipse.dev/tracecompass/trace-compass}{trace-compass}
+ to display the traces in a GUI
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Remote tracing with {\em LTTng}}
+ \begin{itemize}
+ \item LTTng allows to record traces over the network.
+ \item Useful for embedded systems with limited storage capabilities.
+ \item On the remote computer, run \code{lttng-relayd} command
+ \end{itemize}
+ \begin{block}{}
+ \begin{minted}[fontsize=\small]{console}
+$ lttng-relayd --output=${PWD}/traces
+ \end{minted}
+ \end{block}
+ \begin{itemize}
+ \item Then on the target, at session creation, use the \code{--set-url}
+ \end{itemize}
+ \begin{block}{}
+ \begin{minted}[fontsize=\small]{console}
+$ lttng create my-session --set-url=net://remote-system
+ \end{minted}
+ \end{block}
+ \begin{itemize}
+ \item Traces will then be recorded directly on the remote computer.
+ \end{itemize}
+\end{frame}
+
\subsection{eBPF}
\begin{frame}{The ancestor: Berkeley Packet filter}
@@ -845,55 +1029,233 @@ \subsection{eBPF}
\begin{frame}
\frametitle{eBPF (1/2)}
\begin{itemize}
- \item \href{https://ebpf.io/}{eBPF} framework in the kernel allows running
- user-written BPF programs within the kernel in a safe and efficient
- way (Added in kernel 3.15)
- \item Execution is event-driven and can be hooked using Kprobes, tracepoints
- and other methods of tracing
- \item Executes complex actions and reports data to userspace for
- events that took place in the kernel.
- \item Used to hook into various places of the kernel: VFS, Network stack,
- syscalls, load balancing, security, etc
- \end{itemize}
- \center\includegraphics[height=0.2\textheight]{slides/debugging-linux-application-stack/logo_ebpf.png}\\
+ \item \href{https://ebpf.io/}{eBPF} is a new framework allowing to run
+ small user programs directly in the kernel, in a safe and efficient way. It
+ has been added in kernel 3.18 but it is still evolving and receiving
+ updates frequently.
+ \item eBPF programs can capture and expose kernel data to userspace, and
+ also alter kernel behavior based on some user-defined rules.
+ \item eBPF is event-driven: an eBPF program is triggered and executed on a
+ specific kernel event
+ \item A major benefit from eBPF is the possibility to reprogram the kernel
+ behavior, without performing kernel development:
+ \begin{itemize}
+ \item no risk of crashing the kernel because of bugs
+ \item faster development cycles to get a new feature ready
+ \end{itemize}
+ \end{itemize}
+ \center\includegraphics[height=0.2\textheight]{slides/debugging-linux-application-stack/logo_ebpf.png}\\
\tiny Image credits: \url{https://ebpf.io/}
\end{frame}
\begin{frame}
\frametitle{eBPF (2/2)}
\begin{itemize}
- \item Programs are loaded using the \code{bpf()} system call
- (\manpage{bpf}{2}) and then verified by the kernel BPF verifier before
- being executed.
+ \item The most notable eBPF features are:
+ \begin{itemize}
+ \item A new instruction set, interpreter and verifier
+ \item A wide variety of "attach" locations, allowing to hook programs
+ almost anywhere in the kernel
+ \item dedicated data structures called "maps", to exchange data between
+ multiple eBPF programs or between programs and userspace
+ \item A dedicated \code{bpf()} syscall to manipulate eBPF programs and data
+ \item plenty of (kernel) helper functions accessible from eBPF programs.
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{eBPF program lifecycle}
+ \begin{center}
+ \includegraphics[height=0.8\textheight]{slides/debugging-system-wide-profiling/bpf_lifecycle.pdf}
+ \end{center}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Kernel configuration for eBPF}
+ \begin{itemize}
+ \item \kconfig{CONFIG_NET} to enable eBPF subsystem
+ \item \kconfig{CONFIG_BPF_SYSCALL} to enable the \code{bpf()} syscall
+ \item \kconfig{CONFIG_BPF_JIT} to enable JIT on programs and so increase performance
+ \item \kconfig{CONFIG_BPF_JIT_ALWAYS_ON} to force JIT
+ \item \kconfigval{CONFIG_BPF_UNPRIV_DEFAULT_OFF}{n} in \textbf{development} to
+ allow eBPF usage without root
+ \item You may then want to enable more general features to "unlock"
+ specific hooking locations:
\begin{itemize}
- \item Check of privileges to execute BPF program
- \item Verifies that the BPF program always runs to completion and does not
- loop forever
+ \item \kconfig{CONFIG_KPROBES} to allow hooking programs on kprobes
+ \item \kconfig{CONFIG_TRACING} to allow hooking programs on kernel tracepoints
+ \item \kconfig{CONFIG_NET_CLS_BPF} to write packets classifiers
+ \item \kconfig{CONFIG_CGROUP_BPF} to attach programs on cgroups hooks
\end{itemize}
- \item Almost all architectures have a BPF JIT support which allows
- translating the BPF format into native CPU instruction, thus being
- (almost) as fast as natively compiled code
- \item BPF programs can return values in maps of various types (hash tables,
- arrays, etc) which allows sharing data between user-space, eBPF
- programs and kernel space.
- \item Only some functions (called helpers) can be called in eBPF programs.
- \item eBPF programs are attached to events (invoked on trigger).
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{eBPF ISA}
+ \begin{itemize}
+ \item eBPF is a "virtual" ISA, defining its own set of instructions: load
+ and store instruction, arithmetic instructions, jump instructions,etc
+ \item It also defines a set of 10 64-bits wide registers as well as a
+ calling convention:
+ \begin{itemize}
+ \item \code{R0}: return value from functions and BPF program
+ \item \code{R1, R2, R3, R4, R5}: function arguments
+ \item \code{R6, R7, R8, R9}: callee-saved registers
+ \item \code{R10}: stack pointer
+ \end{itemize}
+ \end{itemize}
+ \begin{block}{}
+ \begin{minted}[fontsize=\scriptsize]{console}
+; bpf_printk("Hello %s\n", "World");
+ 0: r1 = 0x0 ll
+ 2: r2 = 0xa
+ 3: r3 = 0x0 ll
+ 5: call 0x6
+; return 0;
+ 6: r0 = 0x0
+ 7: exit
+ \end{minted}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{The eBPF verifier}
+ \begin{itemize}
+ \item When loaded into the kernel, a program must first be validated by the
+ eBPF verifier.
+ \item The verifier is a complex piece of software which checks eBPF
+ programs against a set of rules to ensure that running those may not
+ compromise the whole kernel. For example:
+ \begin{itemize}
+ \item a program must always return and so not contain paths which could
+ make them "infinite" (e.g: no infinite loop)
+ \item a program must make sure that a pointer is valid before
+ dereferencing it
+ \item a program can not access arbitrary memory addresses, it must use
+ passed context and available helpers
+ \end{itemize}
+ \item If a program violates one of the verifier rules, it will be rejected.
+ \item Despite the presence of the verifier, you still need to be careful when
+ writing programs ! eBPF programs run with preemption enabled (but CPU
+ migration disabled), so they can still suffer from concurrency issues
+ \begin{itemize}
+ \item Hopefully there are some mechanisms and helpers to avoid those isses,
+ like per-cpu maps types.
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Program types and attach points}
+ \begin{itemize}
+ \item There are different "types" of places to which a program can be
+ hooked
+ \begin{itemize}
+ \item an arbitrary kprobe
+ \item a kernel-defined static tracepoint
+ \item a specific perf event
+ \item throughout the network stack
+ \item and a lot more, see \ksym{bpf_attach_type}
+ \end{itemize}
+ \item A specific attach-point type can only be hooked with a set of
+ specific program types, see \ksym{bpf_prog_type} and
+ \kdochtml{bpf/libbpf/program_types}.
+ \item The program type then defines the data passed to an eBPF program as
+ input when it is invoked. For example:
+ \begin{itemize}
+ \item A \code{BPF_PROG_TYPE_TRACEPOINT} program will receive a structure
+ containing all data returned to userspace by the targeted tracepoint.
+ \item A \code{BPF_PROG_TYPE_SCHED_CLS} program (used to implement packets
+ classifiers) will receive a \kstruct{__sk_buff}, the kernel
+ representation of a socket buffer.
+ \item You can learn about the context passed to any program type by
+ checking \kfile{include/linux/bpf_types.h}
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{eBPF maps}
+ \begin{itemize}
+ \item eBPF programs exchange data with userspace or other programs through
+ maps of different nature:
+ \begin{itemize}
+ \item \code{BPF_MAP_TYPE_ARRAY}: generic array storage. Can be
+ differentiated per cpu
+ \item \code{BPF_MAP_TYPE_HASH}: a storage composed of key-value pairs.
+ Keys can be of different types: \code{__u32}, a device type, an ip address...
+ \item \code{BPF_MAP_TYPE_QUEUE}: a FIFO-type queue
+ \item \code{BPF_MAP_TYPE_CGROUP_STORAGE}: a specific hash map keyed by a
+ cgroup id. There are other types of maps specific to other object types
+ (inodes, tasks, sockets, etc)
+ \item etc...
+ \end{itemize}
+ \item For basic data, it is easier and more efficient to directly use eBPF
+ global variables (no syscalls involved, contrary to maps)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{The \code{bpf()} syscall}
+ \begin{itemize}
+ \item The kernel exposes a \code{bpf()} syscall to allow interacting with the
+ eBPF subsystem
+ \item The syscall takes a set of subcommands, and depending on the
+ subcommand, some specific data:
+ \begin{itemize}
+ \item \ksym{BPF_PROG_LOAD} to load a bpf program
+ \item \ksym{BPF_MAP_CREATE} to allocate maps to be used by a program
+ \item \ksym{BPF_MAP_LOOKUP_ELEM} to search for an entry in a map
+ \item \ksym{BPF_MAP_UPDATE_ELEM} to update an entry in a map
+ \item etc
+ \end{itemize}
+ \item The syscall works with file descriptors pointing to eBPF resources.
+ Those resources (program, maps, links, etc) remain valid while there is at least
+ one program holding a valid file descriptor to it. Those are automatically cleaned
+ once there are no user left.
+ \item For more details, see \manpage{bpf}{2}
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Writing eBPF programs}
\begin{itemize}
- \item eBPF programs can be written in (restricted) C and are compiled
- using clang compiler
- \item BCC (BPF Compiler Collection) provides a toolkit to write BPF
- programs more easily using C language (also provides LUA and Python
- front-ends)
+ \item eBPF programs can either be written directly in raw eBPF assembly or in
+ higher level languages (e.g: C or rust), and are compiled using the clang
+ compiler.
+ \item The kernel provides some helpers that can be called from an eBPF program:
\begin{itemize}
- \item Allows to write tracing and profiling program easily
+ \item \code{bpf_trace_printk} Emits a log to the trace buffer
+ \item \code{bpf_map_{lookup,update,delete}_elem} Manipulates maps
+ \item \code{bpf_probe_{read,write}[_user]} Safely read/write data from/to kernel or userspace
+ \item \code{bpf_get_current_pid_tgid} Returns current Process ID and Thread group ID
+ \item \code{bpf_get_current_uid_gid} Returns current User ID and Group ID
+ \item \code{bpf_get_current_comm} Returns the name of the executable running in the
+ current task
+ \item \code{bpf_get_current_task} Returns the current \kstruct{task_struct}
+ \item Many other helpers are available, see \manpage{bpf-helpers}{7}
+ \end{itemize}
+ \item Kernel also exposes kfuncs (see \kdochtml{bpf/kfuncs}), but contrary
+ to bpf-helpers, those do not belong to the kernel stable interface.
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Manipulating eBPF program}
+ \begin{itemize}
+ \item There are different ways to build, load and manipulate eBPF programs:
+ \begin{itemize}
+ \item One way is to write an eBPF program, build it with clang, and then load it,
+ attach it and read data from it with bare \code{bpf()} calls in a custom
+ userspace program
+ \item One can also use \code{bpftool} on the built ebpf program to
+ manipulate it (load, attach, read maps, etc), without writing any userspace tool
+ \item Or we can write our own eBPF tool thanks to some intermediate libraries which handle most of the
+ hard work, like libbpf
+ \item We can also use specialized frameworks like BCC or bpftrace to really
+ get all operations (bpf program build included) handled
\end{itemize}
- \item {\em bpftrace} is a high level language allowing to easily write tracing
- functions
\end{itemize}
\end{frame}
@@ -917,7 +1279,7 @@ \subsection{eBPF}
\end{itemize}
\column{0.25\textwidth}
\vspace{0.5cm}
- \includegraphics[height=0.2\textheight]{slides/debugging-linux-application-stack/logo_bcc.png}\\
+ \includegraphics[height=0.2\textheight]{slides/debugging-linux-application-stack/logo_bcc.png}\\
\tiny Image credits: \url{https://github.com/iovisor/bcc}
\end{columns}
\end{frame}
@@ -963,8 +1325,25 @@ \subsection{eBPF}
\begin{frame}[fragile]
\frametitle{Using BCC with python}
\begin{itemize}
- \item BCC python support allows to easily write and hook C program for BPF
- tracing.
+ \item BCC exposes a \code{bcc} module, and especially a \code{BPF} class
+ \item eBPF programs are written in C and stored either in external files
+ or directly in a python string.
+ \item When an instance of the \code{BPF} class is created and fed with the
+ program (either as string or file), it automatically builds, loads, and
+ possibly attaches the program
+ \item There are multiple ways to attach a program:
+ \begin{itemize}
+ \item By using a proper program name prefix, depending on the targeted
+ attach point (and so the attach step is performed automatically)
+ \item By explicitely calling the relevant attach method on the \code{BPF}
+ instance created earlier
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Using BCC with python}
+ \begin{itemize}
\item Hook with a {\em kprobe} on the \code{clone()} system call and display \verb+"Hello, World!"+ each
time it is called
\end{itemize}
@@ -987,317 +1366,404 @@ \subsection{eBPF}
\end{frame}
\begin{frame}[fragile]
- \frametitle{bpftrace}
- \begin{columns}
- \column{0.75\textwidth}
- \begin{itemize}
- \item bpftrace is a high level tracing language allowing to write tracing
- expressions easily (\url{https://bpftrace.org/})
- \item Also provide tools to trace various parts of the kernel
- \begin{itemize}
- \item Internally uses LLVM to compile script and BCC to interact with the BPF programs
- \end{itemize}
- \item bpftrace is inspired by awk and C, and predecessor tracers such as DTrace and SystemTap
- \item Rich syntax documented at \url{https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md}
- \end{itemize}
- \column{0.25\textwidth}
- \vspace{0.5cm}
- %% Source: https://commons.wikimedia.org/wiki/File:Elf-layout--en.svg
- \includegraphics[height=0.2\textheight]{slides/debugging-system-wide-profiling/bpftrace.png}\\
- \tiny Image credits: \url{https://bpftrace.org/}
- \end{columns}
+ \frametitle{libbpf}
+ \begin{itemize}
+ \item Instead of using a high level framework like BCC, one can use libbpf to
+ build custom tools with a finer control on every aspect of the program.
+ \item libbpf is a C-based library that aims to ease eBPF programming thanks
+ to the following features:
+ \begin{itemize}
+ \item userspace APIs to handle open/load/attach/teardown of bpf programs
+ \item userspace APIs to interact with attached programs
+ \item eBPF APIs to ease eBPF program writing
+ \end{itemize}
+ \item Packaged in many distributions and build systems (e.g.: Buildroot)
+ \item Learn more at \url{https://libbpf.readthedocs.io/en/latest/}
+ \end{itemize}
\end{frame}
\begin{frame}[fragile]
- \frametitle{bpftrace tools}
-
- \begin{center}
- \includegraphics[height=0.8\textheight]{slides/debugging-system-wide-profiling/bpftrace_tools_early2019.png}\\
- \tiny Image credits: \url{https://www.brendangregg.com/ebpf.html}
- \end{center}
+ \frametitle{eBPF programming with libbpf (1/2)}
+ \begin{block}{\code{my_prog.bpf.c}}
+ \begin{minted}[fontsize=\tiny]{C}
+ #include
+ #include
+ #include
+
+ #define TASK_COMM_LEN 16
+ struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 1);
+ } counter_map SEC(".maps");
+
+ struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[TASK_COMM_LEN];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[TASK_COMM_LEN];
+ int next_pid;
+ int next_prio;
+ };
+ \end{minted}
+ \end{block}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Using bpftrace}
- \begin{itemize}
- \item Counting all syscalls per process:
- \end{itemize}
- \begin{block}{}
- \begin{minted}[fontsize=\small]{console}
-$ sudo bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'
-Attaching 1 probe...
-^C
-@[packagekitd]: 1
-@[GUsbEventThread]: 1
-@[gvfs-afc-volume]: 1
-@[ibus-extension-]: 4
+ \frametitle{eBPF programming with libbpf (2/2)}
+ \begin{block}{\code{my_prog.bpf.c}}
+ \begin{minted}[fontsize=\tiny]{C}
+ SEC("tracepoint/sched/sched_switch")
+ int sched_tracer(struct sched_switch_args *ctx)
+ {
+ __u32 key = 0;
+ __u64 *counter;
+ char *file;
+
+ char fmt[] = "Old task was %s, new task is %s\n";
+ bpf_trace_printk(fmt, sizeof(fmt), ctx->prev_comm, ctx->next_comm);
+
+ counter = bpf_map_lookup_elem(&counter_map, &key);
+ if(counter) {
+ *counter += 1;
+ bpf_map_update_elem(&counter_map, &key, counter, 0);
+ }
+
+ return 0;
+ }
+
+ char LICENSE[] SEC("license") = "Dual BSD/GPL";
\end{minted}
\end{block}
\end{frame}
-\begin{frame}
- \frametitle{eBPF: resources}
- \begin{itemize}
- \item A Beginner’s Guide to eBPF Programming - Liz Rice, 2020
- \begin{itemize}
- \item Slides: \url{https://speakerdeck.com/lizrice/beginners-guide-to-ebpf}
- \item Video: \url{https://www.youtube.com/watch?v=lrSExTfS-iQ}
- \item Resources: \url{https://github.com/lizrice/ebpf-beginners}
- \end{itemize}
- \end{itemize}
- \begin{center}
- \includegraphics[height=0.6\textheight]{slides/debugging-system-wide-profiling/ebpf_liz_rice_2020.png}
- \end{center}
-\end{frame}
-
-\subsection{LTTng}
-\begin{frame}
- \frametitle{{\em LTTng} (1/2)}
- \begin{columns}
- \column{0.65\textwidth}
- \begin{itemize}
- \item LTTng is an open source tracing framework for Linux maintained by
- the \href{https://www.efficios.com/}{EfficiOS} company.
- \item LTTng allows understanding the interactions between the kernel and
- applications (C, C++, Java, Python).
- \begin{itemize}
- \item Also expose a \code{/dev/lttng-logger} that can be used from any
- application.
- \end{itemize}
- \item Tracepoints are associated with a payload (data).
- \item LTTng is focused on low-overhead tracing.
- \item LTTng provides a unified logging of all events (kernel/user).
- \end{itemize}
- \column{0.35\textwidth}
- \includegraphics[height=0.3\textheight]{slides/debugging-system-wide-profiling/lttng-logo.jpg}
- \end{columns}
-\end{frame}
-
-\begin{frame}
- \frametitle{{\em LTTng} (2/2)}
- \begin{itemize}
- \item Uses the \href{https://diamon.org/ctf/}{CTF} trace format (Common
- Trace Format).
- \item LTTng is made of multiple components:
+\begin{frame}[fragile]
+ \frametitle{Building eBPF programs}
+ \begin{itemize}
+ \item An eBPF program written in C can be built into a loadable object
+ thanks to clang:
+ \begin{block}{}
+ \begin{minted}{console}
+ $ clang -target bpf -O2 -g -c my_prog.bpf.c -o my_prog.bpf.o
+ \end{minted}
+ \end{block}
\begin{itemize}
- \item LTTng-tools: Libraries and command-line interface to control tracing.
- \item LTTng-modules: Linux kernel modules to instrument and trace the kernel.
- \item LTTng-UST: Libraries and Java/Python packages to instrument and trace user applications.
+ \item The \code{-g} option allows to add debug information as well as
+ BTF information
\end{itemize}
- \item Already packaged by various distribution (debian, fedora, etc) and
- present in Buildroot and openembedded-core.
- \item Uses a single tool \code{lttng} to control tracing.
- \item No need to recompile the kernel but a few options are need
+ \item GCC can be used too with recent versions
\begin{itemize}
- \item \kconfig{CONFIG_MODULES}, \kconfig{CONFIG_KALLSYMS}, \kconfig{CONFIG_HIGH_RES_TIMERS},
- \kconfig{CONFIG_TRACEPOINTS}, \kconfig{CONFIG_KPROBES}
+ \item the toolchain can be installed with the \code{gcc-bpf} package in
+ Debian/Ubuntu
+ \item it exposes the \code{bpf-unknown-none} target
\end{itemize}
+ \item To easily manipulate this program with a userspace program based on libbpf,
+ we need "skeleton" APIs, which can be generated with to \code{bpftool}
\end{itemize}
\end{frame}
\begin{frame}[fragile]
- \frametitle{LTTng architecture}
- \begin{center}
- \includegraphics[height=0.8\textheight]{slides/debugging-system-wide-profiling/lttng_graph.png}\\
- \tiny Image credits: \url{https://lttng.org/}
- \end{center}
-\end{frame}
-
-\begin{frame}
- \frametitle{Tracepoints with {\em LTTng} }
+ \frametitle{bpftool}
\begin{itemize}
- \item LTTng can use and trace the following instrumentation points:
+ \item \code{bpftool} is a command line tool allowing to interact with bpf
+ object files and the kernel to manipulate bpf programs:
\begin{itemize}
- \item LTTng kernel tracepoints
- \item kprobes and kretprobes
- \item Linux kernel system calls
- \item Linux user space probe
- \item User space LTTng tracepoints
+ \item Load programs into the kernel
+ \item List loaded programs
+ \item Dump program instructions, either as BPF code or JIT code
+ \item List loaded maps
+ \item Dump map content
+ \item Attach programs to hooks (so they can run)
+ \item etc
\end{itemize}
- \item LTTng works with a session daemon that receive all events from kernel
- and userspace LTTng tracing components.
- \item Session daemon should be started as daemon and the user should be in
- the {\em tracing} group.
+ \item You may need to mount the bpf filesystem to be able to pin program
+ (needed to keep a program loaded after bpftool has finished running):
+ \begin{block}{}
+ \begin{minted}{console}
+ $ mount -t bpf none /sys/fs/bpf
+ \end{minted}
+ \end{block}
\end{itemize}
\end{frame}
-\begin{frame}
- \frametitle{Creating userspace tracepoints with {\em LTTng}}
+\begin{frame}[fragile]
+ \frametitle{bpftool}
\begin{itemize}
- \item New userspace tracepoints can be defined using LTTng.
- \item Tracepoints have multiple characteristics:
- \begin{itemize}
- \item A provider namespace
- \item A name identifying the tracepoint
- \item Parameters of various types (int, char *, etc)
- \item Fields describing how to display the tracepoint parameters
- (decimal, hexadecimal, etc)
- \end{itemize}
- \item Tracepoints are defined using a tracepoint provider header file
- template and a tracepoint provider package file.
- \begin{itemize}
- \item The tracepoint provider header file template contains the definition
- of the tracepoints.
- \item The tracepoint provider package is the instantiation of the
- tracepoints.
- \end{itemize}
- \item See \href{https://lttng.org/man/3/lttng-ust/v2.13/}{LTTng-ust} manpage
- for types
+ \item List loaded programs
\end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
- \frametitle{Defining a {\em LTTng} tracepoint (1/2)}
-
+ \begin{block}{}
+ \fontsize{10}{10}\selectfont
+ \begin{minted}{console}
+$ bpftool prog
+348: tracepoint name sched_tracer tag 3051de4551f07909 gpl
+loaded_at 2024-08-06T15:43:11+0200 uid 0
+xlated 376B jited 215B memlock 4096B map_ids 146,148
+btf_id 545
+ \end{minted}
+ \end{block}
\begin{itemize}
- \item Tracepoint provider header file (\code{hello_world-tp.h}):
+ \item Load and attach a program
\end{itemize}
\begin{block}{}
- \begin{minted}[fontsize=\tiny]{C}
-#undef LTTNG_UST_TRACEPOINT_PROVIDER
-#define LTTNG_UST_TRACEPOINT_PROVIDER hello_world
-
-#undef LTTNG_UST_TRACEPOINT_INCLUDE
-#define LTTNG_UST_TRACEPOINT_INCLUDE "./hello-tp.h"
-
-#if !defined(_HELLO_TP_H) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
-#define _HELLO_TP_H
-
-#include
-
-LTTNG_UST_TRACEPOINT_EVENT(
- hello_world,
- my_first_tracepoint,
- LTTNG_UST_TP_ARGS(
- int, my_integer_arg,
- char *, my_string_arg
- ),
- LTTNG_UST_TP_FIELDS(
- lttng_ust_field_integer(int, my_integer_field, my_integer_arg)
- lttng_ust_field_string(my_string_field, my_string_arg)
- )
-)
-#endif /* _HELLO_TP_H */
-
-#include
- \end{minted}
+ \fontsize{10}{10}\selectfont
+ \begin{minted}{console}
+$ mkdir /sys/fs/bpf/myprog
+$ bpftool prog loadall trace_execve.bpf.o /sys/fs/bpf/myprog autoattach
+ \end{minted}
\end{block}
-\end{frame}
-
-\begin{frame}[fragile]
- \frametitle{Defining a {\em LTTng} tracepoint (2/2)}
\begin{itemize}
- \item Tracepoint provider package (\code{hello_world-tp.c}):
+ \item Unload a program
\end{itemize}
\begin{block}{}
- \begin{minted}[fontsize=\tiny]{C}
-#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
-#define LTTNG_UST_TRACEPOINT_DEFINE
-
-#include "hello-tp.h"
- \end{minted}
+ \fontsize{10}{10}\selectfont
+ \begin{minted}{console}
+$ rm -rf /sys/fs/bpf/myprog
+ \end{minted}
\end{block}
+\end{frame}
+\begin{frame}[fragile]
+ \frametitle{bpftool}
\begin{itemize}
- \item Tracepoint usage (\code{hello_world.c}):
+ \item Dump a loaded program
\end{itemize}
\begin{block}{}
- \begin{minted}[fontsize=\tiny]{C}
-#include
-#include "hello-tp.h"
-
-int main(int argc, char *argv[])
-{
- lttng_ust_tracepoint(hello_world, my_first_tracepoint, 23, "hi there!");
- return 0;
-}
- \end{minted}
+ \fontsize{8}{8}\selectfont
+ \begin{minted}{console}
+$ bpftool prog dump xlated id 348
+int sched_tracer(struct sched_switch_args * ctx):
+; int sched_tracer(struct sched_switch_args *ctx)
+ 0: (bf) r4 = r1
+ 1: (b7) r1 = 0
+; __u32 key = 0;
+ 2: (63) *(u32 *)(r10 -4) = r1
+; char fmt[] = "Old task was %s, new task is %s\n";
+ 3: (73) *(u8 *)(r10 -8) = r1
+ 4: (18) r1 = 0xa7325207369206b
+ 6: (7b) *(u64 *)(r10 -16) = r1
+ 7: (18) r1 = 0x7361742077656e20
+[...]
+ \end{minted}
\end{block}
\begin{itemize}
- \item Compilation:
+ \item Dump eBPF program logs
\end{itemize}
\begin{block}{}
- \begin{minted}[fontsize=\tiny]{console}
-$ gcc hello_world.c hello_world-tp.c -llttng-ust -o hello_world
- \end{minted}
+ \fontsize{6}{6}\selectfont
+ \begin{minted}{console}
+$ bpftool prog tracelog
+kworker/u80:0-11 [013] d..41 1796.003605: bpf_trace_printk: Old task was kworker/u80:0, new task is swapper/13
+-0 [013] d..41 1796.003609: bpf_trace_printk: Old task was swapper/13, new task is kworker/u80:0
+sudo-18640 [010] d..41 1796.003613: bpf_trace_printk: Old task was sudo, new task is swapper/10
+-0 [010] d..41 1796.003617: bpf_trace_printk: Old task was swapper/10, new task is sudo
+[...]
+ \end{minted}
\end{block}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Generating tracepoints using \code{lttng-gen-tp}}
+ \frametitle{bpftool}
\begin{itemize}
- \item Writing both the \code{.h} and \code{.c} boilerplate can be avoided
- using \code{lttng-gen-tp}.
- \item \code{lttng-gen-tp} takes a template file (\code{.tp}) as input and will
- generate both the provider header and package files (\code{.h},
- \code{.c} and \code{.o} files):
+ \item List created maps
\end{itemize}
\begin{block}{}
- \begin{minted}[fontsize=\tiny]{C}
- LTTNG_UST_TRACEPOINT_EVENT(
- // Tracepoint provider name
- hello_world,
-
- // Tracepoint/event name
- first_tp,
-
- // Tracepoint arguments (input)
- LTTNG_UST_TP_ARGS(
- char *, text
- ),
-
- // Tracepoint/event fields (output)
- LTTNG_UST_TP_FIELDS(
- lttng_ust_field_string(message, text)
- )
-)
- \end{minted}
+ \fontsize{9}{9}\selectfont
+ \begin{minted}{console}
+$ bpftool map
+80: array name counter_map flags 0x0
+ key 4B value 8B max_entries 1 memlock 256B
+ btf_id 421
+82: array name .rodata.str1.1 flags 0x80
+ key 4B value 33B max_entries 1 memlock 288B
+ frozen
+96: array name libbpf_global flags 0x0
+ key 4B value 32B max_entries 1 memlock 280B
+[...]
+ \end{minted}
\end{block}
-\end{frame}
-
-\begin{frame}[fragile]
- \frametitle{Using {\em LTTng}}
+ \begin{itemize}
+ \item Show a map content
+ \end{itemize}
\begin{block}{}
- \begin{minted}[fontsize=\small]{console}
-$ lttng create my-tracing-session --output=./my_traces
-$ lttng list --kernel
-$ lttng list --userspace
-$ lttng enable-event --userspace hello_world:my_first_tracepoint
-$ lttng enable-event --kernel --syscall open,close,write
-$ lttng start
-$ /* Run your application or do something */
-$ lttng destroy
-$ babeltrace2 ./my_traces
- \end{minted}
+ \fontsize{9}{9}\selectfont
+ \begin{minted}{console}
+$ sudo bpftool map dump id 80
+[{
+ "key": 0,
+ "value": 4877514
+ }
+]
+ \end{minted}
\end{block}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Remote tracing with {\em LTTng}}
+ \frametitle{bpftool}
\begin{itemize}
- \item LTTng allows to record traces over the network.
- \item Useful for embedded systems with limited storage capabilities.
- \item On the remote computer, run \code{lttng-relayd} command
+ \item Generate libbpf APIs to manipulate a program
\end{itemize}
\begin{block}{}
- \begin{minted}[fontsize=\small]{console}
-$ lttng-relayd --output=${PWD}/traces
- \end{minted}
+ \fontsize{9}{9}\selectfont
+ \begin{minted}{console}
+$ bpftool gen skeleton trace_execve.bpf.o name trace_execve > trace_execve.skel.h
+ \end{minted}
\end{block}
\begin{itemize}
- \item Then on the target, at session creation, use the \code{--set-url}
+ \item We can then write our userspace program and benefit from high level
+ APIs to manipulate our eBPF program:
+ \begin{itemize}
+ \item instantiation of a global context object which will have references
+ to all of our programs, maps, links, etc
+ \item loading/attaching/unloading of our programs
+ \item eBPF program directly embedded in the generated header as a byte
+ array
+ \end{itemize}
\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Userspace code with libbpf}
\begin{block}{}
- \begin{minted}[fontsize=\small]{console}
-$ lttng create my-session --set-url=net://remote-system
- \end{minted}
+ \begin{minted}[fontsize=\tiny]{C}
+ #include
+ #include
+ #include
+ #include "trace_sched_switch.skel.h"
+
+ int main(int argc, char *argv[])
+ {
+ struct trace_sched_switch *skel;
+ int key = 0;
+ long counter = 0;
+
+ skel = trace_sched_switch__open_and_load();
+ if(!skel)
+ exit(EXIT_FAILURE);
+ if (trace_sched_switch__attach(skel)) {
+ trace_sched_switch__destroy(skel);
+ exit(EXIT_FAILURE);
+ }
+
+ while(true) {
+ bpf_map__lookup_elem(skel->maps.counter_map, &key, sizeof(key), &counter, sizeof(counter), 0);
+ fprintf(stderr, "Scheduling switch count: %d\n", counter);
+ sleep(1);
+ }
+
+ return 0;
+ }
+ \end{minted}
\end{block}
+\end{frame}
+
+\begin{frame}
+ \frametitle{eBPF programs portability (1/2)}
+ \begin{itemize}
+ \item Kernel internals, contrary to userspace APIs, do not expose stable APIs.
+ This means that an eBPF program manipulating some kernel data may not work
+ with another kernel version
+ \item The CO-RE (Compile Once - Run Everywhere) approach aims to solve this issue
+ and make programs portable between \textbf{kernel versions}. It relies on
+ the following features:
+ \begin{itemize}
+ \item your kernel must be built with
+ \kconfigval{CONFIG_DEBUG_INFO_BTF}{y} to have BTF data embedded. BTF is a
+ format similar to dwarf which encodes data layout and functions
+ signatures in an efficient way.
+ \item your eBPF compiler must be able to emit BTF relocations (both clang
+ and GCC are capable of this on recent versions, with the \code{-g} argument)
+ \item you need a BPF loader capable of processing BPF programs based on BTF data and
+ adjust accordingly data accesses: \code{libbpf} is the de-facto standard bpf
+ loader
+ \item you then need eBPF APIs to read/write to CO-RE relocatable
+ variables. libbpf provides such helpers, like \code{bpf_core_read}
+ \end{itemize}
+ \item To learn more, take a look at
+ \href{https://nakryiko.com/posts/bpf-core-reference-guide/}{Andrii
+ Nakryiko's CO-RE guide}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{eBPF programs portability (2/2)}
\begin{itemize}
- \item Traces will then be recorded directly on the remote computer.
+ \item Despite CO-RE, you may still face different constraints on different
+ kernel versions, because of major features introduction or change, since
+ the eBPF subsystem keeps receiving frequent updates:
+ \begin{itemize}
+ \item eBPF tail calls (which allow a program to call a function ) have
+ been added in version 4.2, and allow to call another program only since
+ version 5.10
+ \item eBPF spin locks have been added in version 5.1 to prevent
+ concurrent accesses to maps shared between cpus.
+ \item Different attach types keep being added, but possibly on different
+ kernel versions when it depends on the architecture: fentry/fexit attach
+ points have been added in kernel 5.5 for x86 but in 6.0 for arm32.
+ \item Any kind of loop (even bounded) was forbidden until version 5.3
+ \item \code{CAP_BPF} capability, allowing a process to perform eBPF tasks, has
+ been added in version 5.8
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+\frametitle{eBPF for tracing/profiling}
+ \begin{itemize}
+ \item eBPF is a very powerful framework to spy on kernel internals: thanks
+ to the wide variety of attach point, you can expose almost any kernel code path and data.
+ \item In the mean time, eBPF programs remain isolated from kernel code,
+ which makes it safe (compared to kernel development) and easy to use.
+ \item Thanks to the in-kernel interpreter and optimizations like JIT compilation, eBPF is very well
+ suited for tracing or profiling with low overhead, even in production
+ environments, while being very flexible.
+ \item This is why eBPF adoption level keeps growing for debugging, tracing
+ and profiling in the Linux ecosystem. As a few examples, we find eBPF usage in:
+ \begin{itemize}
+ \item tracing frameworks like \href{https://github.com/iovisor/bcc}{BCC}
+ and \href{https://github.com/bpftrace/bpftrace}{bpftrace}
+ \item network infrastructure components, like
+ \href{https://github.com/cilium/cilium}{Cilium} or \href{https://github.com/projectcalico/calico}{Calico}
+ \item network packet tracers, like
+ \href{https://github.com/cilium/pwru}{pwru} or
+ \href{https://github.com/feiskyer/dropwatch}{dropwatch}
+ \item And many more, check \href{https://ebpf.io/applications/}{ebpf.io}
+ for more examples
+ \end{itemize}
\end{itemize}
\end{frame}
+\begin{frame}[fragile]
+ \frametitle{eBPF: resources}
+ \begin{itemize}
+ \item BCC tutorial:
+ \url{https://github.com/iovisor/bcc/blob/master/docs/tutorial_bcc_python_developer.md}
+ \item libbpf-bootsrap: \url{https://github.com/libbpf/libbpf-bootstrap}
+ \item A Beginner’s Guide to eBPF Programming - Liz Rice, 2020
+ \begin{itemize}
+ \item Video: \url{https://www.youtube.com/watch?v=lrSExTfS-iQ}
+ \item Resources: \url{https://github.com/lizrice/ebpf-beginners}
+ \end{itemize}
+ \end{itemize}
+ \begin{center}
+ \includegraphics[height=0.4\textheight]{slides/debugging-system-wide-profiling/ebpf_liz_rice_2020.png}
+ \end{center}
+\end{frame}
+
+\setuplabframe
+{System wide profiling}
+{
+ Creating custom tracing tools with eBPF
+ \begin{itemize}
+ \item Tracing with BCC
+ \item Converting a BCC script to libbpf
+ \item Bringing advanced features to the tool
+ \end{itemize}
+}
+
\subsection{Choosing the right tool}
\begin{frame}[fragile]
@@ -1315,16 +1781,8 @@ \subsection{Choosing the right tool}
\item For "constant" load problems, snapshot tools works fine.
\item For sporadic problems, record traces and analyze them.
\end{itemize}
+ \item If you happen to have a complex setup that you often have to bring up,
+ it is likely a sign that you want to ease this setup with some custom tooling:
+ scripting, custom traces, eBPF, etc
\end{itemize}
\end{frame}
-
-\setuplabframe
-{System wide profiling}
-{
- Profiling a system from userspace to kernel space
- \begin{itemize}
- \item Profiling with ftrace, uprobes and kernelshark
- \item Profiling with LTTng and trace-compass
- \item Profiling with perf
- \end{itemize}
-}
diff --git a/slides/debugging-system-wide-profiling/lttng_graph.png b/slides/debugging-system-wide-profiling/lttng_graph.png
deleted file mode 100644
index d15947efa7..0000000000
Binary files a/slides/debugging-system-wide-profiling/lttng_graph.png and /dev/null differ