Skip to content

Commit

Permalink
Support transient /etc
Browse files Browse the repository at this point in the history
If the `prepare-root.conf` file contains:
```
[etc]
transient=yes
```

Then during prepare-root, an overlayfs is mounted as /etc, with the
upper dir being in /run. If composefs is used, the lower dir is
`usr/etc` from the composefs image , or it is the deployed
`$deploydir/usr/etc`.

Note that for this to work with selinux, the commit must have been
built with OSTREE_REPO_COMMIT_MODIFIER_FLAGS_USRETC_AS_ETC. Otherwise
the lowerdir (/usr/etc) will have the wrong selinux contexts for the
final location of the mount (/etc).

We also set the transient-etc key in the ostree-booted file, pointing it
to the directory that is used for the overlayfs.

There are some additional work happening in ostree-remount, mostly
related to selinux (as this needs to happen post selinux policy
load):

 * Recent versions of selinux-poliy have issues with the overlayfs
   mount being kernel_t, and that is not allowed to manage files as
   needed. This is fixed in
   fedora-selinux/selinux-policy#1893

 * Any /etc files created in the initramfs will not be labeled,
   because the selinux policy has not been loaded. In addition, the
   upper dir is on a tmpfs, and any manually set xattr-based selinux
   labels on those are reset during policy load. To work around this
   ostree-remount will relabel all files on /etc that have
   corresponding files in overlayfs upper dir.

 * During early boot, systemd mounts /run/machine-id on top of
   /etc/machine-id (as /etc is readonly). Later during boot, when etc
   is readwrite, systemd-machine-id-commit.service will remove the
   mount and update the real file under it with the right content. To
   ensure that this keeps working, we need to ensure that when we
   relabel /etc/machine-id we relabel the real (covered) file, not the
   temporary bind-mount.

 * ostree-remount no longer needs to remount /etc read-only in the
   transient-etc case.

Signed-off-by: Alexander Larsson <[email protected]>
  • Loading branch information
alexlarsson committed Oct 9, 2023
1 parent 9847a08 commit 50de0ff
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 9 deletions.
5 changes: 5 additions & 0 deletions Makefile-switchroot.am
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ ostree_remount_SOURCES = \
ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -I$(srcdir)/libglnx
ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libotcore.la libotutil.la libglnx.la

if USE_SELINUX
ostree_remount_CPPFLAGS += $(OT_DEP_SELINUX_CFLAGS)
ostree_remount_LDADD += $(OT_DEP_SELINUX_LIBS)
endif

if USE_COMPOSEFS
ostree_prepare_root_LDADD += libcomposefs.la
endif
Expand Down
4 changes: 4 additions & 0 deletions man/ostree-prepare-root.xml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ License along with this library. If not, see <https://www.gnu.org/licenses/>.
<term><varname>sysroot.readonly</varname></term>
<listitem><para>A boolean value; the default is <literal>false</literal>. If this is set to <literal>true</literal>, then the <literal>/sysroot</literal> mount point is mounted read-only.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>etc.transient</varname></term>
<listitem><para>A boolean value; the default is <literal>false</literal>. If this is set to <literal>true</literal>, then the <literal>/etc</literal> mount point is mounted transiently i.e. a non-persistent location.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>composefs.enabled</varname></term>
<listitem><para>This can be <literal>yes</literal>, <literal>no</literal>. <literal>maybe</literal> or
Expand Down
2 changes: 2 additions & 0 deletions src/libotcore/otcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,5 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error);
#define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed"
// This key will be present if the sysroot-ro flag was found
#define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro"

#define OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC "transient-etc"
55 changes: 48 additions & 7 deletions src/switchroot/ostree-prepare-root.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@
#define SYSROOT_KEY "sysroot"
#define READONLY_KEY "readonly"

#define ETC_KEY "etc"
#define TRANSIENT_KEY "transient"

#define COMPOSEFS_KEY "composefs"
#define ENABLED_KEY "enabled"
#define KEYPATH_KEY "keypath"
Expand Down Expand Up @@ -547,13 +550,51 @@ main (int argc, char *argv[])
* the deployment needs to be created and remounted as read/write. */
if (sysroot_readonly || using_composefs)
{
/* Bind-mount /etc (at deploy path), and remount as writable. */
if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc");
if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT,
NULL)
< 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc");
gboolean etc_transient = FALSE;
if (!ot_keyfile_get_boolean_with_default (config, ETC_KEY, TRANSIENT_KEY, FALSE,
&etc_transient, &error))
errx (EXIT_FAILURE, "Failed to parse etc.transient value: %s", error->message);

if (etc_transient)
{
char *ovldir = "/run/ostree/transient-etc";

g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC,
g_variant_new_string (ovldir));

char *lowerdir = "usr/etc";
if (using_composefs)
lowerdir = TMP_SYSROOT "/usr/etc";

g_autofree char *upperdir = g_build_filename (ovldir, "upper", NULL);
g_autofree char *workdir = g_build_filename (ovldir, "work", NULL);

struct
{
const char *path;
int mode;
} subdirs[] = { { ovldir, 0700 }, { upperdir, 0755 }, { workdir, 0755 } };
for (int i = 0; i < G_N_ELEMENTS (subdirs); i++)
{
if (mkdirat (AT_FDCWD, subdirs[i].path, subdirs[i].mode) < 0)
err (EXIT_FAILURE, "Failed to create dir %s", subdirs[i].path);
}

g_autofree char *ovl_options
= g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", lowerdir, upperdir, workdir);
if (mount ("overlay", TMP_SYSROOT "/etc", "overlay", MS_SILENT, ovl_options) < 0)
err (EXIT_FAILURE, "failed to mount transient etc overlayfs");
}
else
{
/* Bind-mount /etc (at deploy path), and remount as writable. */
if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc");
if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT,
NULL)
< 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc");
}
}

/* Prepare /usr.
Expand Down
98 changes: 96 additions & 2 deletions src/switchroot/ostree-remount.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <unistd.h>
#ifdef HAVE_SELINUX
#include <selinux/restorecon.h>
#endif

#include "ostree-mount-util.h"
#include "otcore.h"
Expand Down Expand Up @@ -76,6 +79,50 @@ do_remount (const char *target, bool writable)
printf ("Remounted %s: %s\n", writable ? "rw" : "ro", target);
}

/* Relabel the directory $real_path, which is going to be an overlayfs mount,
* based on the content of an overlayfs upperdirectory that is in use by the mount.
* The goal is that we relabel in the overlay mount all the files that have been
* modified (directly or via parent copyup operations) since the overlayfs was
* mounted. This will be used for the /etc overlayfs mount where no selinux labels
* are set before selinux policy is loaded.
*/
static void
relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean is_dir)
{
#ifdef HAVE_SELINUX
if (selinux_restorecon (real_path, 0))
err (EXIT_FAILURE, "Failed to relabel %s", real_path);

if (!is_dir)
return;

g_auto (GLnxDirFdIterator) dfd_iter = {
0,
};

if (!glnx_dirfd_iterator_init_at (AT_FDCWD, upper_path, FALSE, &dfd_iter, NULL))
err (EXIT_FAILURE, "Failed to open upper directory %s for relabeling", upper_path);

while (TRUE)
{
struct dirent *dent;

if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, NULL, NULL))
{
err (EXIT_FAILURE, "Failed to read upper directory %s for relabelin", upper_path);
break;
}

if (dent == NULL)
break;

g_autofree char *upper_child = g_build_filename (upper_path, dent->d_name, NULL);
g_autofree char *real_child = g_build_filename (real_path, dent->d_name, NULL);
relabel_dir_for_upper (upper_child, real_child, dent->d_type == DT_DIR);
}
#endif
}

int
main (int argc, char *argv[])
{
Expand Down Expand Up @@ -119,6 +166,52 @@ main (int argc, char *argv[])
if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0)
perror ("warning: While remounting /sysroot MS_PRIVATE");

const char *transient_etc = NULL;
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s",
&transient_etc);

if (transient_etc)
{
/* If the initramfs created any files in /etc (directly or via overlay copy-up) they
* will be unlabeled, because the selinux policy is not loaded until after the
* pivot-root. So, for all files in the upper dir, relabel the corresponding overlay
* file.
*
* Also, note that during boot systemd will create a /run/machine-id ->
* /etc/machine-id bind mount (as /etc is read-only early on). It will then later
* replace this mount with a real one (in systemd-machine-id-commit.service).
*
* We need to label the actual overlayfs file, not the temporary bind-mount. To do
* this we unmount the covering mount before relabeling, but we do so in a temporary
* private namespace to avoid affecting other parts of the system.
*/

glnx_autofd int initial_ns_fd = -1;
if (g_file_test ("/run/machine-id", G_FILE_TEST_EXISTS)
&& g_file_test ("/etc/machine-id", G_FILE_TEST_EXISTS))
{
initial_ns_fd = open ("/proc/self/ns/mnt", O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (initial_ns_fd < 0)
err (EXIT_FAILURE, "Failed to open initial namespace");

if (unshare (CLONE_NEWNS) < 0)
err (EXIT_FAILURE, "Failed to unshare initial namespace");

/* Ensure unmount is not propagated */
if (mount ("none", "/etc", NULL, MS_REC | MS_PRIVATE, NULL) < 0)
err (EXIT_FAILURE, "warning: While remounting /etc MS_PRIVATE");

if (umount2 ("/etc/machine-id", MNT_DETACH) < 0)
err (EXIT_FAILURE, "Failed to unmount machine-id");
}

g_autofree char *upper = g_build_filename (transient_etc, "upper", NULL);
relabel_dir_for_upper (upper, "/etc", TRUE);

if (initial_ns_fd != -1 && setns (initial_ns_fd, CLONE_NEWNS) < 0)
err (EXIT_FAILURE, "Failed to join initial namespace");
}

gboolean root_is_composefs = FALSE;
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_COMPOSEFS, "b",
&root_is_composefs);
Expand All @@ -140,8 +233,9 @@ main (int argc, char *argv[])
do_remount ("/sysroot", !sysroot_configured_readonly);

/* And also make sure to make /etc rw again. We make this conditional on
* sysroot_configured_readonly because only in that case is it a bind-mount. */
if (sysroot_configured_readonly)
* sysroot_configured_readonly && !transient_etc because only in that case is it a
* bind-mount. */
if (sysroot_configured_readonly && !transient_etc)
do_remount ("/etc", true);

/* If /var was created as as an OSTree default bind mount (instead of being a separate
Expand Down

0 comments on commit 50de0ff

Please sign in to comment.