Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

switchroot: Stop making /sysroot mount private #3292

Merged
merged 2 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/libostree/ostree-impl-system-generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
*
* Note that our unit doesn't run if systemd.volatile is enabled;
* see https://github.com/ostreedev/ostree/pull/856
*
* To avoid having submounts of /var propagate into $stateroot/var, the mount
* is made with slave+shared propagation. This means that /var will receive
* mount events from the parent /sysroot mount, but not vice versa. Adding a
* shared peer group below the slave group means that submounts of /var will
* inherit normal shared propagation. See mount_namespaces(7), Linux
* Documentation/filesystems/sharedsubtree.txt and
* https://github.com/ostreedev/ostree/issues/2086. This also happens in
* ostree-prepare-root.c for the INITRAMFS_MOUNT_VAR case.
*/
if (!g_output_stream_printf (outstream, &bytes_written, cancellable, error,
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
Expand All @@ -243,7 +252,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
"[Mount]\n"
"Where=%s\n"
"What=%s\n"
"Options=bind\n",
"Options=bind,slave,shared\n",
cgwalters marked this conversation as resolved.
Show resolved Hide resolved
var_path, stateroot_var_path))
return FALSE;
if (!g_output_stream_flush (outstream, cancellable, error))
Expand Down
21 changes: 10 additions & 11 deletions src/switchroot/ostree-prepare-root.c
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,16 @@ main (int argc, char *argv[])
{
if (mount ("../../var", TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount ../../var to var");

/* To avoid having submounts of /var propagate into $stateroot/var, the
* mount is made with slave+shared propagation. See the comment in
* ostree-impl-system-generator.c when /var isn't mounted in the
* initramfs for further explanation.
*/
if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SLAVE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to change /var to slave mount");
if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SHARED | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to change /var to slave+shared mount");
}

/* This can be used by other things to signal ostree is in use */
Expand Down Expand Up @@ -687,16 +697,5 @@ main (int argc, char *argv[])
err (EXIT_FAILURE, "failed to make /sysroot read-only");
}

/* The /sysroot mount needs to be private to avoid having a mount for e.g. /var/cache
* also propagate to /sysroot/ostree/deploy/$stateroot/var/cache
*
* Now in reality, today this is overridden by systemd: the *actual* way we fix this up
* is in ostree-remount.c. But let's do it here to express the semantics we want
* at the very start (perhaps down the line systemd will have compile/runtime option
* to say that the initramfs environment did everything right from the start).
*/
if (mount ("none", "sysroot", NULL, MS_PRIVATE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "remounting 'sysroot' private");

exit (EXIT_SUCCESS);
}
9 changes: 0 additions & 9 deletions src/switchroot/ostree-remount.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,6 @@ main (int argc, char *argv[])
}
g_autoptr (GVariantDict) ostree_run_metadata = g_variant_dict_new (ostree_run_metadata_v);

/* The /sysroot mount needs to be private to avoid having a mount for e.g. /var/cache
cgwalters marked this conversation as resolved.
Show resolved Hide resolved
* also propagate to /sysroot/ostree/deploy/$stateroot/var/cache
*
* Today systemd remounts / (recursively) as shared, so we're undoing that as early
* as possible. See also a copy of this in ostree-prepare-root.c.
*/
if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0)
perror ("warning: While remounting /sysroot MS_PRIVATE");

const char *transient_etc = NULL;
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s",
&transient_etc);
Expand Down
140 changes: 140 additions & 0 deletions tests/kolainst/destructive/mount-propagation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/bin/bash
# https://bugzilla.redhat.com/show_bug.cgi?id=1498281
set -xeuo pipefail

. ${KOLA_EXT_DATA}/libinsttest.sh

require_writable_sysroot

get_mount() {
local target=${1:?No target specified}
local pid=${2:?No PID specified}

# findmnt always looks at /proc/self/mountinfo, so we have to first enter the
# mount namespace of the desired process.
nsenter --target "${pid}" --mount -- \
findmnt --json --list --output +PROPAGATION,OPT-FIELDS \
| jq ".filesystems[] | select(.target == \"${target}\")"
}

assert_has_mount() {
local target=${1:?No target specified}
local pid=${2:-$$}
local mount

mount=$(get_mount "${target}" "${pid}")
if [ -n "${mount}" ]; then
echo -e "Process ${pid} has mount '${target}':\n${mount}"
else
cat "/proc/${pid}/mountinfo" >&2
fatal "Mount '${target}' not found in process ${pid}"
fi
}

assert_not_has_mount() {
local target=${1:?No target specified}
local pid=${2:-$$}
local mount

mount=$(get_mount "${target}" "${pid}")
if [ -n "${mount}" ]; then
cat "/proc/${pid}/mountinfo" >&2
fatal "Mount '${target}' found in process ${pid}"
else
echo "Process ${pid} does not have mount '${target}'"
fi
}

test_mounts() {
local stateroot

echo "Root namespace mountinfo:"
cat "/proc/$$/mountinfo"

echo "Sub namespace mountinfo:"
cat "/proc/${ns_pid}/mountinfo"

# Make sure the 2 PIDs are really in different mount namespaces.
root_ns=$(readlink "/proc/$$/ns/mnt")
sub_ns=$(readlink "/proc/${ns_pid}/ns/mnt")
assert_not_streq "${root_ns}" "${sub_ns}"

stateroot=$(rpmostree_query_json '.deployments[0].osname')

# Check the mounts exist in the root namespace and the /var/foo mount has not
# propagated back to /sysroot.
assert_has_mount /var/foo
assert_has_mount /sysroot/bar
assert_not_has_mount "/sysroot/ostree/deploy/${stateroot}/var/foo"

# Repeat with the sub mount namespace.
assert_has_mount /var/foo "${ns_pid}"
assert_has_mount /sysroot/bar "${ns_pid}"
assert_not_has_mount "/sysroot/ostree/deploy/${stateroot}/var/foo" "${ns_pid}"
}

case "${AUTOPKGTEST_REBOOT_MARK:-}" in
"")
mkdir -p /var/foo /sysroot/bar

# Create a process in a separate mount namespace to see if the mounts
# propagate into it correctly.
unshare -m --propagation unchanged -- sleep infinity &
ns_pid=$!

mount -t tmpfs foo /var/foo
mount -t tmpfs bar /sysroot/bar

test_mounts

# Now setup for the same test but with the mounts made early via fstab.
cat >> /etc/fstab <<"EOF"
foo /var/foo tmpfs defaults 0 0
bar /sysroot/bar tmpfs defaults 0 0
EOF

# We want to start a process in a separate namespace after ostree-remount
# has completed but before systemd starts the fstab generated mount units.
cat > /etc/systemd/system/test-mounts.service <<"EOF"
[Unit]
DefaultDependencies=no
After=ostree-remount.service
Before=var-foo.mount sysroot-bar.mount
RequiresMountsFor=/var /sysroot
Conflicts=shutdown.target
Before=shutdown.target
[Service]
Type=exec
ExecStart=/usr/bin/sleep infinity
ProtectSystem=strict
[Install]
WantedBy=local-fs.target
EOF
systemctl enable test-mounts.service

/tmp/autopkgtest-reboot 2
;;
2)
# Check that the test service is running and get its PID.
ns_state=$(systemctl show -P ActiveState test-mounts.service)
assert_streq "${ns_state}" active
ns_pid=$(systemctl show -P MainPID test-mounts.service)

# Make sure that test-mounts.service started after ostree-remount.service
# but before /var/foo and /sysroot/bar were mounted so that we can see if
# the mounts were propagated into its mount namespace.
remount_finished=$(journalctl -o json -g Finished -u ostree-remount.service | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
test_starting=$(journalctl -o json -g Starting -u test-mounts.service | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
test_started=$(journalctl -o json -g Started -u test-mounts.service | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
foo_mounting=$(journalctl -o json -g Mounting -u var-foo.mount | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
bar_mounting=$(journalctl -o json -g Mounting -u sysroot-bar.mount | tail -n1 | jq -r .__MONOTONIC_TIMESTAMP)
test "${remount_finished}" -lt "${test_starting}"
test "${test_started}" -lt "${foo_mounting}"
test "${test_started}" -lt "${bar_mounting}"

test_mounts
;;
*) fatal "Unexpected AUTOPKGTEST_REBOOT_MARK=${AUTOPKGTEST_REBOOT_MARK}" ;;
esac
Loading