From d721c5776745eae0e6d50c9d5fe9fcb6354ade8f Mon Sep 17 00:00:00 2001
From: Frantisek Sumsal <frantisek@sumsal.cz>
Date: Wed, 17 Apr 2024 12:03:45 +0200
Subject: [PATCH] test: run dbus-broker under ASan and UBsan

Let's introduce a test that runs dbus-broker under Address Sanitizer and
Undefined Behavior Sanitizer, while running other tests against it.

The setup to achieve this is slightly convoluted, since we need to run
(and restart) sanitized dbus-broker without nuking the host machine. For
that we setup an nspawn-container that re-uses host's rootfs (to some
degree) and overlays our additions on top of that. This way we can test
(not-only) the full user-space boot with sanitized dbus-broker without
risking "damage" to the host machine.
---
 test/integration/fuzz/sanitizers/main.fmf |  20 ++
 test/integration/fuzz/sanitizers/test.sh  | 187 +++++++++++++++++++
 test/integration/util.sh                  | 217 ++++++++++++++++++++++
 3 files changed, 424 insertions(+)
 create mode 100644 test/integration/fuzz/sanitizers/main.fmf
 create mode 100755 test/integration/fuzz/sanitizers/test.sh
 create mode 100644 test/integration/util.sh

diff --git a/test/integration/fuzz/sanitizers/main.fmf b/test/integration/fuzz/sanitizers/main.fmf
new file mode 100644
index 00000000..42a93151
--- /dev/null
+++ b/test/integration/fuzz/sanitizers/main.fmf
@@ -0,0 +1,20 @@
+summary: Run dbus-broker under sanitizers
+test: ./test.sh
+require:
+    - dbus-broker
+    - dbus-daemon
+    - dfuzzer
+    - expat-devel
+    - gcc
+    - gdb
+    - git
+    - glibc-devel
+    - libasan
+    - libubsan
+    - meson
+    - systemd
+    - systemd-container
+    - systemd-devel
+    - systemd-libs
+    - util-linux
+duration: 30m
diff --git a/test/integration/fuzz/sanitizers/test.sh b/test/integration/fuzz/sanitizers/test.sh
new file mode 100755
index 00000000..9b584188
--- /dev/null
+++ b/test/integration/fuzz/sanitizers/test.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# vi: set sw=4 ts=4 et tw=110:
+# shellcheck disable=SC2016
+
+set -eux
+set -o pipefail
+
+# shellcheck source=test/integration/util.sh
+. "$(dirname "$0")/../../util.sh"
+
+export ASAN_OPTIONS=strict_string_checks=1:detect_stack_use_after_return=1:check_initialization_order=1:strict_init_order=1:detect_invalid_pointer_pairs=2:handle_ioctl=1:print_cmdline=1:disable_coredump=0:use_madv_dontdump=1
+export UBSAN_OPTIONS=print_stacktrace=1:print_summary=1:halt_on_error=1
+
+# shellcheck disable=SC2317
+at_exit() {
+    set +ex
+
+    # Let's do some cleanup and export logs if necessary
+
+    # Collect potential coredumps
+    coredumpctl_collect
+    container_destroy
+}
+
+trap at_exit EXIT
+
+export BUILD_DIR="$PWD/build-san"
+
+# Make sure the coredump collecting machinery is working
+coredumpctl_init
+
+: "=== Prepare dbus-broker's source tree ==="
+# The integration test suite runs without access to the source tree it was built from. If we need the source
+# tree (most likely to rebuild dbus-broker) we need to do a little dance to determine the correct references.
+if [[ -n "${PACKIT_TARGET_URL:-}" ]]; then
+    # If we're running in Packit's context, use the set of provided environment variables to checkout the
+    # correct branch (and possibly rebase it on top of the latest source base branch so we always test the
+    # latest revision possible).
+    git clone "$PACKIT_TARGET_URL" dbus-broker
+    cd dbus-broker
+    git checkout "$PACKIT_TARGET_BRANCH"
+    # If we're invoked from a pull request context, rebase on top of the latest source base branch.
+    if [[ -n "${PACKIT_SOURCE_URL:-}" ]]; then
+        git remote add pr "${PACKIT_SOURCE_URL:?}"
+        git fetch pr "${PACKIT_SOURCE_BRANCH:?}"
+        git merge "pr/$PACKIT_SOURCE_BRANCH"
+    fi
+    git log --oneline -5
+elif [[ -n "${DBUS_BROKER_TREE:-}" ]]; then
+    # Useful for quick local debugging when running this script directly, e.g. running
+    #
+    #   # TMT_TEST_DATA=$PWD/logs DBUS_BROKER_TREE=$PWD test/integration/fuzz/sanitizers/test.sh
+    #
+    # from the dbus-broker repo root.
+    cd "${DBUS_BROKER_TREE:?}"
+else
+    # If we're running outside of Packit's context, pull the latest dbus-broker upstream.
+    git clone https://github.com/bus1/dbus-broker dbus-broker
+    git log --oneline -5
+fi
+
+: "=== Build dbus-broker with sanitizers and run the unit test suite ==="
+meson setup "$BUILD_DIR" --wipe -Db_sanitize=address,undefined -Dprefix=/usr
+ninja -C "$BUILD_DIR"
+meson test -C "$BUILD_DIR" --timeout-multiplier=2 --print-errorlogs
+
+: "=== Run tests against dbus-broker running under sanitizers ==="
+# So, this one is a _bit_ convoluted. We want to run dbus-broker under sanitizers, but this bears a couple of
+# issues:
+#
+#   1) We need to restart dbus-broker (and hence the machine we're currently running on)
+#   2) If dbus-broker crashes due to ASan/UBSan error, the whole machine is hosed
+#
+# To make the test a bit more robust without too much effort, let's use systemd-nspawn to run an ephemeral
+# container on top of the current rootfs. To get the "sanitized" dbus-broker into that container, we need to
+# prepare a special rootfs with just the sanitized dbus-broker (and a couple of other things) which we then
+# simply overlay on top of the ephemeral rootfs in the container.
+#
+# This way, we'll do a full user-space boot with a sanitized dbus-broker without affecting the host machine,
+# and without having to build a custom container/VM just for the test.
+container_prepare
+
+# Install our custom-built dbus-broker into the container's overlay
+DESTDIR="$CONTAINER_OVERLAY" ninja -C "$BUILD_DIR" install
+# Pass $ASAN_OPTIONS and $UBSAN_OPTIONS to the dbus-broker service in the container
+mkdir -p "$CONTAINER_OVERLAY/etc/systemd/system/dbus-broker.service.d/"
+cat >"$CONTAINER_OVERLAY/etc/systemd/system/dbus-broker.service.d/sanitizer-env.conf" <<EOF
+[Service]
+Environment=ASAN_OPTIONS=$ASAN_OPTIONS
+Environment=UBSAN_OPTIONS=$UBSAN_OPTIONS
+# Useful for debugging LSan errors, but it's very verbose, hence disabled by default
+#Environment=LSAN_OPTIONS=verbosity=1:log_threads=1
+EOF
+# Do the same for the user unit
+mkdir -p "$CONTAINER_OVERLAY/etc/systemd/user/dbus-broker.service.d/"
+cat >"$CONTAINER_OVERLAY/etc/systemd/user/dbus-broker.service.d/sanitizer-env.conf" <<EOF
+[Service]
+Environment=ASAN_OPTIONS=$ASAN_OPTIONS
+Environment=UBSAN_OPTIONS=$UBSAN_OPTIONS
+# Useful for debugging LSan errors, but it's very verbose, hence disabled by default
+#Environment=LSAN_OPTIONS=verbosity=1:log_threads=1
+EOF
+# Run both dbus-broker-launch and dbus-broker under root instead of the usual "dbus" user. This is necessary
+# to let sanitizers generate stack traces (killing the process on sanitizer error works even without this
+# tweak though, but it's very hard to then tell what went wrong without a stack trace).
+mkdir -p "$CONTAINER_OVERLAY/etc/dbus-1/"
+cat >"$CONTAINER_OVERLAY/etc/dbus-1/system-local.conf" <<EOF
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+<busconfig>
+    <user>root</user>
+</busconfig>
+EOF
+
+check_journal_for_sanitizer_errors() {
+    if journalctl -q -D "/var/log/journal/${CONTAINER_MACHINE_ID:?}" --grep "SUMMARY:.+Sanitizer"; then
+        # Dump all messages recorded for the dbus-broker.service, as that's usually where the stack trace ends
+        # up. If that's not the case, the full container journal is exported on test exit anyway, so we'll
+        # still have everything we need to debug the fail further.
+        journalctl -q -D "/var/log/journal/${CONTAINER_MACHINE_ID:?}" -o short-monotonic --no-hostname -u dbus-broker.service --no-pager
+        exit 1
+    fi
+}
+
+run_and_check() {
+    local run=(container_run)
+    local unpriv=0
+
+    if [[ "$1" == "--unpriv" ]]; then
+        run=(container_run_user testuser)
+        unpriv=1
+        shift
+    fi
+
+    # Run the passed command in the container
+    "${run[@]}" "$@"
+    # Check if dbus-broker is still running...
+    "${run[@]}" systemctl status --full --no-pager dbus-broker.service
+    if [[ $unpriv -ne 0 ]]; then
+        # (check the user instance too, if applicable)
+        "${run[@]}" systemctl status --user --full --no-pager dbus-broker.service
+    fi
+    # ... and if it didn't generate any sanitizer errors
+    check_journal_for_sanitizer_errors
+}
+
+# Start the container and wait until it's fully booted up
+container_start
+# Check if dbus-broker runs under root, see above for reasoning
+container_run bash -xec '[[ $(stat --format=%u /proc/$(systemctl show -P MainPID dbus-broker.service)) -eq 0 ]]'
+# Make _extra_ sure we're running the sanitized dbus-broker with the correct environment
+container_run bash -xec 'ldd /proc/$(systemctl show -P MainPID dbus-broker.service)/exe | grep -qF libasan.so'
+container_run bash -xec 'ldd $(command -v dbus-broker-launch) | grep -qF libasan.so'
+container_run bash -xec 'ldd $(command -v dbus-broker) | grep -qF libasan.so'
+container_run systemctl show -p Environment dbus-broker.service | grep -q ASAN_OPTIONS
+# Do a couple of check for the user instance as well
+container_run_user testuser bash -xec 'ldd /proc/$(systemctl show --user -P MainPID dbus-broker.service)/exe | grep -qF libasan.so'
+container_run_user testuser systemctl show -p Environment dbus-broker.service | grep -q ASAN_OPTIONS
+journalctl -D "/var/log/journal/${CONTAINER_MACHINE_ID:?}" -e -n 10 --no-pager
+check_journal_for_sanitizer_errors
+
+# Now we should have a container ready for our shenanigans
+
+# Let's start with something simple and run dfuzzer on the org.freedesktop.DBus bus
+run_and_check dfuzzer -v -n org.freedesktop.DBus
+# Now run the dfuzzer on the org.freedesktop.systemd1 as well, since it's pretty rich when it comes to
+# signature variations.
+#
+# Since fuzzing the entire systemd bus tree takes way too long (as it spends most of the time fuzzing the
+# /org/freedesktop/systemd1/unit/ objects, which is the same stuff over and over again), let's selectively
+# pick a couple of interesting objects to speed things up.
+#
+# First, fuzz the manager object...
+run_and_check --unpriv dfuzzer -n org.freedesktop.systemd1 -o /org/freedesktop/systemd1
+# ... and then pick first 10 units from the /org/freedesktop/systemd1/unit/ tree.
+while read -r object; do
+    run_and_check --unpriv dfuzzer -n org.freedesktop.systemd1 -o "$object"
+done < <(busctl tree --list --no-legend org.freedesktop.systemd1 | grep /unit/ | head -n10)
+
+# Shut down the container and check for any sanitizer errors, since some of the errors can be detected only
+# after we start shutting things down.
+container_stop
+check_journal_for_sanitizer_errors
+# Also, check if dbus-broker didn't fail during the lifetime of the container
+(! journalctl -q -D "/var/log/journal/$CONTAINER_MACHINE_ID" _PID=1 --grep "dbus-broker.service.*Failed with result")
+
+exit 0
diff --git a/test/integration/util.sh b/test/integration/util.sh
new file mode 100644
index 00000000..5e2a9c3e
--- /dev/null
+++ b/test/integration/util.sh
@@ -0,0 +1,217 @@
+# vi: set sw=4 ts=4 et tw=110:
+# shellcheck shell=bash disable=SC2155
+
+CONTAINER_NAME=""
+CONTAINER_MACHINE_ID=""
+CONTAINER_OVERLAY=""
+
+__COREDUMPCTL_TS=""
+
+# Prepare a systemd-nspawn container so we can test (and restart) dbus-broker safely without risking the
+# underlying test machine.
+#
+# This function prepares a lightweight nspawn container that reuses the rootfs of the underlying test machine
+# to run dbus-broker under various tools (or a completely custom-built dbus-broker version) without risking
+# damage to the underlying test machine. The container simply combines the /etc and /usr directories from the
+# host with our own additions using overlayfs, which is then bind-mounted into the container, so we do a full
+# user-space boot without needing to build a custom image or restart the underlying test machine itself.
+#
+# The function exports/modifies three environment variables:
+#   - $CONTAINER_NAME - container name that can be used to identify the machine in machinectl calls (or in
+#                       direct calls to the systemd-nspawn@.service template)
+#   - $CONTAINER_MACHINE_ID - machine ID of the container, which can be used to locate the container's journal
+#                             under /var/log/journal/$CONTAINER_MACHINE_ID
+#   - $CONTAINER_OVERLAY - upper layer of the container overlayfs that can be used to add additional bits into
+#                          the final container (note that only /etc and /usr subdirectores from this direcory
+#                          are used)
+#
+# Once the container is ready, it can be booted up using container_start(). To execute commands inside the
+# container, container_run() and container_run_user() might come in handy.
+container_prepare() {
+    # Export a couple of env variables which can be used to track/alter the container
+    CONTAINER_NAME="dbus-broker-container-$RANDOM"
+    CONTAINER_MACHINE_ID="$(systemd-id128 new)"
+    CONTAINER_OVERLAY="/var/lib/machines/$CONTAINER_NAME"
+
+    # Switch SELinux to permissive (if enabled), so it doesn't interfere with the container shenanigans below.
+    setenforce 0 || :
+    # We need persistent journal for the systemd-nspawn --link= stuff
+    mkdir -p /var/log/journal
+    journalctl --flush
+
+    # Prepare the nspawn container service
+    mkdir -p "/var/lib/machines/$CONTAINER_NAME"
+    # Notes:
+    #   - with systemd v256+ this can be replaced by systemctl edit --stdin --runtime ..., and the
+    #     mkdir/daemon-reload can be dropped
+    #   - systemd-nspawn can't overlay the whole rootfs (/), so we need to cherry-pick a couple of subdirectories
+    #     we're interested in (in this case it's pretty simple, since dbus-broker installs everything under /usr,
+    #     and we need /etc with our dbus-broker.service override)
+    #   - since the whole container is ephemeral, use --link-journal=host, so the journal directory for the
+    #     container is created on the _host_ under /var/log/journal/<machine-id> and bind-mounted into the
+    #     container; that way we can fetch the container journal for debugging even if something goes horribly
+    #     wrong
+    mkdir -p "/run/systemd/system/systemd-nspawn@$CONTAINER_NAME.service.d"
+    cat >"/run/systemd/system/systemd-nspawn@$CONTAINER_NAME.service.d/override.conf" <<EOF
+    [Service]
+# We'll handle the coredumps on the host instead
+CoredumpReceive=no
+ExecStart=
+ExecStart=systemd-nspawn --quiet --network-veth --keep-unit --machine=%i --boot \
+                         --link-journal=host \
+                         --volatile=yes \
+                         --directory=/ \
+                         --uuid=$CONTAINER_MACHINE_ID \
+                         --hostname=$CONTAINER_NAME \
+                         --overlay=/etc:$CONTAINER_OVERLAY/etc:/etc \
+                         --overlay-ro=/usr:$CONTAINER_OVERLAY/usr:/usr
+EOF
+    systemctl daemon-reload
+
+
+    # Prepare the nspawn container overlay
+    #
+    # Let systemd-nspawn propagate the machine ID and hostname we passed it
+    mkdir "$CONTAINER_OVERLAY"/{etc,usr}/
+    : >"$CONTAINER_OVERLAY/etc/machine-id"
+    : >"$CONTAINER_OVERLAY/etc/hostname"
+    # Create a non-root user, so we can test session bus stuff as well
+    mkdir -p "$CONTAINER_OVERLAY/etc/sysusers.d/"
+    cat >"$CONTAINER_OVERLAY/etc/sysusers.d/testuser.conf" <<EOF
+u testuser - "Test User" /home/testuser
+EOF
+}
+
+# Start the container created by container_prepare() and wait until it boots.
+container_start() {
+    if [[ -z "$CONTAINER_NAME" ]]; then
+        echo >&2 "No container to start (missing call to container_prepare()?)"
+        return 1
+    fi
+
+    machinectl start "$CONTAINER_NAME"
+    timeout --foreground 30s bash -ec "until systemd-run -M $CONTAINER_NAME --wait --pipe true; do sleep .5; done"
+    # is-system-running returns > 0 if the system is running in degraded mode, but we don't care about that, we
+    # just need to wait until the bootup is finished
+    container_run systemctl is-system-running -q --wait || :
+    container_run systemctl status --full --no-pager dbus-broker.service
+    container_run_user testuser systemctl status --user --full --no-pager dbus-broker.service
+}
+
+container_stop() {
+    # Note: machinectl poweroff doesn't wait until the container shuts down completely, stop stop the service
+    #       behind it instead which does wait
+    systemctl stop "systemd-nspawn@${CONTAINER_NAME:?}.service"
+}
+
+# Run a command in a container as a root.
+container_run() {
+    systemd-run -M "${CONTAINER_NAME:?}" --wait --pipe "$@"
+}
+
+# Same as above, but run the command under a specific user.
+container_run_user() {
+    local user="${1:?}"
+    shift
+
+    systemd-run -M "$user@${CONTAINER_NAME:?}" --user --wait --pipe "$@"
+}
+
+container_destroy() {
+    if [[ -z "$CONTAINER_NAME" ]]; then
+        return 0
+    fi
+
+    if systemctl -q is-active "systemd-nspawn@$CONTAINER_NAME.service"; then
+        container_stop
+    fi
+
+    # Export the container journal and sanitizer logs if $TMT_TEST_DATA is set, either by TMT directly or
+    # manually.
+    if [[ -n "${TMT_TEST_DATA:-}" ]]; then
+        mkdir -p "$TMT_TEST_DATA"
+        journalctl -D "/var/log/journal/$CONTAINER_MACHINE_ID" -b -o short-monotonic >"$TMT_TEST_DATA/$CONTAINER_NAME.log"
+    fi
+
+    rm -rf "/var/lib/machines/$CONTAINER_NAME"
+    rm -rf "/var/log/journal/$CONTAINER_MACHINE_ID"
+    rm -rf "/run/systemd/system/systemd-nspawn@$CONTAINER_NAME.service.d"
+    systemctl daemon-reload
+}
+
+coredumpctl_init() {
+    local ec
+
+    if ! systemctl start systemd-coredump.socket; then
+        echo >&2 "Failed to start systemd-coredump.socket"
+        return 1
+    fi
+
+    # Note: coredumpctl returns 1 when no coredumps are found
+    coredumpctl --since=now >/dev/null && ec=0 || ec=$?
+    if [[ $ec -ne 1 ]]; then
+        echo >&2 "coredumpctl is not in operative state"
+        return 1
+    fi
+
+    # Set the internal coredumpctl timestamp, so we consider coredumps only from now on
+    __COREDUMPCTL_TS="$(date +"%Y-%m-%d %H:%M:%S")"
+
+    return 0
+}
+
+# Attempt to dump info about relevant coredumps using the coredumpctl utility.
+#
+# Returns:
+#   0 when no coredumps were found, 1 otherwise
+coredumpctl_collect() (
+    set +ex
+
+    local args=(-q --no-legend --no-pager)
+    local tempfile="$(mktemp)"
+
+    # Register a cleanup handler
+    #
+    # Note: since this function is a technically a subshell, RETURN trap won't work here
+    # shellcheck disable=SC2064
+    trap "rm -f '$tempfile'" EXIT
+
+    if [[ -n "$__COREDUMPCTL_TS" ]]; then
+        args+=(--since "$__COREDUMPCTL_TS")
+    fi
+
+    if ! coredumpctl "${args[@]}" -F COREDUMP_EXE >"$tempfile"; then
+        echo "No relevant coredumps found"
+        return 0
+    fi
+
+    # For each unique executable path call 'coredumpctl info' to get the stack trace and other useful info
+    while read -r path; do
+        local exe
+        local gdb_cmd="set print pretty on\nbt full"
+
+        coredumpctl "${args[@]}" info "$path"
+        # Make sure we use the built binaries for getting gdb trace
+        #
+        # This is relevant mainly for the sanitizers run, where we don't install the just built revision, so
+        # `coredumpctl debug` pulls in a local binary instead of the built one, which produces useless
+        # results.
+        if [[ -v BUILD_DIR && -d $BUILD_DIR ]]; then
+            # The build directory layout of dbus-broker is not flat, so we need to find the binary first
+            exe="$(find "$BUILD_DIR" -executable -name "${path##*/}" | head -n1)"
+            if [[ -n "$exe" ]]; then
+                gdb_cmd="file $exe\nthread apply all bt\n$gdb_cmd"
+            fi
+        fi
+
+        # Attempt to get a full stack trace for the first occurrence of the given executable path
+        if gdb -v >/dev/null; then
+            echo -e "\n"
+            echo "Trying to run gdb with '$gdb_cmd' for '$path'"
+            echo -e "$gdb_cmd" | coredumpctl "${args[@]}" debug "$path"
+            echo -e "\n"
+        fi
+    done < <(sort -u "$tempfile")
+
+    return 1
+)