From bf0e46e845c937a0725cec9a5c12965bbbe6fd5f Mon Sep 17 00:00:00 2001 From: Ben Cressey Date: Mon, 1 Jul 2024 22:04:49 +0000 Subject: [PATCH 1/3] buildsys: add erofs root filesystem image feature Signed-off-by: Ben Cressey --- tools/buildsys/src/manifest.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/buildsys/src/manifest.rs b/tools/buildsys/src/manifest.rs index 155d5e78d..259c1a740 100644 --- a/tools/buildsys/src/manifest.rs +++ b/tools/buildsys/src/manifest.rs @@ -212,6 +212,14 @@ default will remain ext4 and xfs is opt-in. xfs-data-partition = true ``` +`erofs-root-partition` changes the filesystem for the root partition from ext4 to erofs. The +default will remain ext4 and erofs is opt-in. + +```ignore +[package.metadata.build-variant.image-features] +erofs-root-partition = true +``` + `uefi-secure-boot` means that the bootloader and kernel are signed. The grub image for the current variant will have a public GPG baked in, and will expect the grub config file to have a valid detached signature. Published artifacts such as AMIs and OVAs will enforce the signature checks @@ -782,6 +790,7 @@ pub enum ImageFeature { GrubSetPrivateVar, SystemdNetworkd, XfsDataPartition, + ErofsRootPartition, UefiSecureBoot, Fips, InPlaceUpdates, @@ -795,6 +804,7 @@ impl TryFrom for ImageFeature { "grub-set-private-var" => Ok(ImageFeature::GrubSetPrivateVar), "systemd-networkd" => Ok(ImageFeature::SystemdNetworkd), "xfs-data-partition" => Ok(ImageFeature::XfsDataPartition), + "erofs-root-partition" => Ok(ImageFeature::ErofsRootPartition), "uefi-secure-boot" => Ok(ImageFeature::UefiSecureBoot), "fips" => Ok(ImageFeature::Fips), "in-place-updates" => Ok(ImageFeature::InPlaceUpdates), @@ -810,6 +820,7 @@ impl fmt::Display for ImageFeature { ImageFeature::GrubSetPrivateVar => write!(f, "GRUB_SET_PRIVATE_VAR"), ImageFeature::SystemdNetworkd => write!(f, "SYSTEMD_NETWORKD"), ImageFeature::XfsDataPartition => write!(f, "XFS_DATA_PARTITION"), + ImageFeature::ErofsRootPartition => write!(f, "EROFS_ROOT_PARTITION"), ImageFeature::UefiSecureBoot => write!(f, "UEFI_SECURE_BOOT"), ImageFeature::Fips => write!(f, "FIPS"), ImageFeature::InPlaceUpdates => write!(f, "IN_PLACE_UPDATES"), From 8d457434c670568403f317b6b9aa02a808b5c10a Mon Sep 17 00:00:00 2001 From: Ben Cressey Date: Sat, 21 Sep 2024 17:47:24 +0000 Subject: [PATCH 2/3] buildsys: mark erofs feature as experimental erofs is a relatively new filesystem, especially when compared with ext4. It shows promising results in terms of improved boot times, but its behavior at runtime on fully loaded nodes is more difficult to predict. Mark the image feature as experimental for now. This allows it to be enabled by downstream variants to facilitate additional experiments, without immediately endorsing it for production use. Signed-off-by: Ben Cressey --- tools/buildsys/src/manifest.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/buildsys/src/manifest.rs b/tools/buildsys/src/manifest.rs index 259c1a740..e68af3ded 100644 --- a/tools/buildsys/src/manifest.rs +++ b/tools/buildsys/src/manifest.rs @@ -498,6 +498,11 @@ impl ManifestInfo { } } } + for experiment in EXPERIMENTAL_IMAGE_FEATURES { + if features.contains(experiment) { + println!("cargo:warning=Image feature {experiment} is experimental; use at your own risk!"); + } + } Some(features) } @@ -797,6 +802,8 @@ pub enum ImageFeature { HostContainers, } +const EXPERIMENTAL_IMAGE_FEATURES: [&ImageFeature; 1] = [&ImageFeature::ErofsRootPartition]; + impl TryFrom for ImageFeature { type Error = Error; fn try_from(s: String) -> Result { From d1baea77efdf0801281b53ce2187764e70a17140 Mon Sep 17 00:00:00 2001 From: Ben Cressey Date: Wed, 3 Jul 2024 16:31:26 +0000 Subject: [PATCH 3/3] twoliter: add support for erofs root filesystem Signed-off-by: Ben Cressey --- twoliter/embedded/build.Dockerfile | 6 ++++ twoliter/embedded/img2img | 45 +++++++++++++++++++--------- twoliter/embedded/imghelper | 22 ++++++++++++++ twoliter/embedded/metadata.spec | 6 ++++ twoliter/embedded/rpm2img | 48 +++++++++++++++++++++--------- 5 files changed, 99 insertions(+), 28 deletions(-) diff --git a/twoliter/embedded/build.Dockerfile b/twoliter/embedded/build.Dockerfile index 87e749ee0..1c85bbdc6 100644 --- a/twoliter/embedded/build.Dockerfile +++ b/twoliter/embedded/build.Dockerfile @@ -203,6 +203,7 @@ ARG GRUB_SET_PRIVATE_VAR ARG UEFI_SECURE_BOOT ARG SYSTEMD_NETWORKD ARG XFS_DATA_PARTITION +ARG EROFS_ROOT_PARTITION ARG IN_PLACE_UPDATES ARG HOST_CONTAINERS ARG FIPS @@ -227,6 +228,7 @@ RUN \ && echo -e -n "${UEFI_SECURE_BOOT:+%bcond_without uefi_secure_boot\n}" >> "${RPM_BCONDS}" \ && echo -e -n "${SYSTEMD_NETWORKD:+%bcond_without systemd_networkd\n}" >> "${RPM_BCONDS}" \ && echo -e -n "${XFS_DATA_PARTITION:+%bcond_without xfs_data_partition\n}" >> "${RPM_BCONDS}" \ + && echo -e -n "${EROFS_ROOT_PARTITION:+%bcond_without erofs_root_partition\n}" >> "${RPM_BCONDS}" \ && echo -e -n "${IN_PLACE_UPDATES:+%bcond_without in_place_updates\n}" >> "${RPM_BCONDS}" \ && echo -e -n "${HOST_CONTAINERS:+%bcond_without host_containers\n}" >> "${RPM_BCONDS}" @@ -331,6 +333,7 @@ ARG DATA_IMAGE_PUBLISH_SIZE_GIB ARG KERNEL_PARAMETERS ARG GRUB_SET_PRIVATE_VAR ARG XFS_DATA_PARTITION +ARG EROFS_ROOT_PARTITION ARG UEFI_SECURE_BOOT ARG IN_PLACE_UPDATES ENV VARIANT=${VARIANT} VERSION_ID=${VERSION_ID} BUILD_ID=${BUILD_ID} \ @@ -369,6 +372,7 @@ RUN --mount=target=/host \ --partition-plan="${PARTITION_PLAN}" \ --ovf-template="/bypass/variants/${VARIANT}/template.ovf" \ ${XFS_DATA_PARTITION:+--with-xfs-data-partition=yes} \ + ${EROFS_ROOT_PARTITION:+--with-erofs-root-partition=yes} \ ${GRUB_SET_PRIVATE_VAR:+--with-grub-set-private-var=yes} \ ${UEFI_SECURE_BOOT:+--with-uefi-secure-boot=yes} \ ${IN_PLACE_UPDATES:+--with-in-place-updates=yes} && \ @@ -473,6 +477,7 @@ ARG PARTITION_PLAN ARG OS_IMAGE_PUBLISH_SIZE_GIB ARG DATA_IMAGE_PUBLISH_SIZE_GIB ARG UEFI_SECURE_BOOT +ARG EROFS_ROOT_PARTITION ARG IN_PLACE_UPDATES ENV VARIANT=${VARIANT} VERSION_ID=${VERSION_ID} BUILD_ID=${BUILD_ID} WORKDIR /root @@ -507,6 +512,7 @@ RUN --mount=target=/host \ --data-image-publish-size-gib="${DATA_IMAGE_PUBLISH_SIZE_GIB}" \ --partition-plan="${PARTITION_PLAN}" \ --ovf-template="/bypass/variants/${VARIANT}/template.ovf" \ + ${EROFS_ROOT_PARTITION:+--with-erofs-root-partition=yes} \ ${UEFI_SECURE_BOOT:+--with-uefi-secure-boot=yes} \ ${IN_PLACE_UPDATES:+--with-in-place-updates=yes} && \ chown -R "${BUILDER_UID}:${BUILDER_UID}" /output/ && \ diff --git a/twoliter/embedded/img2img b/twoliter/embedded/img2img index 9da72ed70..f438ed2d1 100755 --- a/twoliter/embedded/img2img +++ b/twoliter/embedded/img2img @@ -6,6 +6,7 @@ shopt -qs failglob OUTPUT_FMT="raw" OVF_TEMPLATE="" +EROFS_ROOT_PARTITION="no" UEFI_SECURE_BOOT="no" IN_PLACE_UPDATES="no" @@ -21,6 +22,7 @@ for opt in "$@"; do --data-image-publish-size-gib=*) DATA_IMAGE_PUBLISH_SIZE_GIB="${optarg}" ;; --partition-plan=*) PARTITION_PLAN="${optarg}" ;; --ovf-template=*) OVF_TEMPLATE="${optarg}" ;; + --with-erofs-root-partition=*) EROFS_ROOT_PARTITION="${optarg}" ;; --with-uefi-secure-boot=*) UEFI_SECURE_BOOT="${optarg}" ;; --with-in-place-updates=*) IN_PLACE_UPDATES="${optarg}" ;; *) @@ -80,6 +82,10 @@ ROOT_MOUNT="$(mktemp -p "${WORKDIR}" -d root.XXXXXXXXXX)" BOOT_MOUNT="$(mktemp -p "${WORKDIR}" -d boot.XXXXXXXXXX)" EFI_MOUNT="$(mktemp -p "${WORKDIR}" -d efi.XXXXXXXXXX)" +SELINUX_ROOT="/etc/selinux" +SELINUX_POLICY="fortified" +SELINUX_FILE_CONTEXTS="${ROOT_MOUNT}/${SELINUX_ROOT}/${SELINUX_POLICY}/contexts/files/file_contexts" + # Collect partition sizes and offsets from the partition plan. declare -A partsize partoff set_partition_sizes \ @@ -122,6 +128,13 @@ done dd if="${OS_IMAGE}" of="${ROOT_IMAGE}" \ count="${partsize["ROOT-A"]}" bs=1M skip="${partoff["ROOT-A"]}" +# For erofs, extract the root filesystem since we can't modify in-place. +if [[ "${EROFS_ROOT_PARTITION}" == "yes" ]]; then + fsck.erofs --extract="${ROOT_MOUNT}" "${ROOT_IMAGE}" + touch -r "${ROOT_IMAGE}" "${ROOT_MOUNT}" + rm "${ROOT_IMAGE}" +fi + # Extract the boot partition from the OS image, and dump the contents. dd if="${OS_IMAGE}" of="${BOOT_IMAGE}" \ count="${partsize["BOOT-A"]}" bs=1M skip="${partoff["BOOT-A"]}" @@ -149,25 +162,29 @@ install_root_json "${ROOT_MOUNT}" ############################################################################### # Section 4: update root partition and root verity -# shellcheck disable=SC2312 # mapfile is validated elsewhere -mapfile -t new_root_artifacts <<<"$(find "${ROOT_MOUNT}" -type f)" - -# The reason we check index 0 rather than the mapfile length is if `find` fails -# to find an artifact the heredoc to mapfile will assign empty output to 0. -if [[ -z "${new_root_artifacts[0]}" ]]; then - echo "no new root artifacts found" >&2 - exit 1 +if [[ "${EROFS_ROOT_PARTITION}" == "yes" ]]; then + mkfs_root_erofs "${ROOT_MOUNT}" "${ROOT_IMAGE}" "${SELINUX_FILE_CONTEXTS}" else - # Write files from the root mount to the root image. - ROOT_DEBUGFS_STDERR="${WORKDIR}/root.err" - for artifact in "${new_root_artifacts[@]}"; do - cat <>"${ROOT_DEBUGFS_STDERR}" + # shellcheck disable=SC2312 # mapfile is validated elsewhere + mapfile -t new_root_artifacts <<<"$(find "${ROOT_MOUNT}" -type f)" + + # The reason we check index 0 rather than the mapfile length is if `find` fails + # to find an artifact the heredoc to mapfile will assign empty output to 0. + if [[ -z "${new_root_artifacts[0]}" ]]; then + echo "no new root artifacts found" >&2 + exit 1 + else + # Write files from the root mount to the root image. + ROOT_DEBUGFS_STDERR="${WORKDIR}/root.err" + for artifact in "${new_root_artifacts[@]}"; do + cat <>"${ROOT_DEBUGFS_STDERR}" rm ${artifact#"${ROOT_MOUNT}"} write ${artifact} ${artifact#"${ROOT_MOUNT}"} ea_set ${artifact#"${ROOT_MOUNT}"} security.selinux system_u:object_r:os_t:s0 EOF - done - check_debugfs_errors "${ROOT_DEBUGFS_STDERR}" + done + check_debugfs_errors "${ROOT_DEBUGFS_STDERR}" + fi fi # Validate and write root image back to the OS image. diff --git a/twoliter/embedded/imghelper b/twoliter/embedded/imghelper index c4ad05de6..44da01c29 100755 --- a/twoliter/embedded/imghelper +++ b/twoliter/embedded/imghelper @@ -321,6 +321,28 @@ mkfs_data_xfs() { dd if="${bottlerocket_data}" of="${target}" conv=notrunc bs=1M seek="${offset}" } +mkfs_root_erofs() { + local root_mount root_image selinux_file_contexts + root_mount="${1:?}" + root_image="${2:?}" + selinux_file_contexts="${3:?}" + # Ensure the root mount directory is not writable, to avoid permission errors + # when interacting with the root inode at runtime. + chmod 555 "${root_mount}" + # mkfs.erofs optimizations: + # --all-root: use same UID/GID for all files + # -T: use same mtime for all files + # -z lz4hc,12: lz4 for fast decompression, lz4hc level 12 for max compression + # -C 262144: use physical clusters up to 256 KiB to align with EBS I/O size + mkfs.erofs \ + --file-contexts="${selinux_file_contexts}" \ + --all-root \ + -T "$(stat -c '%Y' "${root_mount}/root")" \ + -z lz4hc,12 \ + -C 262144 \ + "${root_image}" "${root_mount}" +} + check_image_size() { local image part_mib image_size part_bytes image="${1:?}" diff --git a/twoliter/embedded/metadata.spec b/twoliter/embedded/metadata.spec index 351901ca0..2f130711b 100644 --- a/twoliter/embedded/metadata.spec +++ b/twoliter/embedded/metadata.spec @@ -50,6 +50,12 @@ Provides: %{_cross_os}image-feature(xfs-data-partition) Provides: %{_cross_os}image-feature(no-xfs-data-partition) %endif +%if %{with erofs_root_partition} +Provides: %{_cross_os}image-feature(erofs-root-partition) +%else +Provides: %{_cross_os}image-feature(no-erofs-root-partition) +%endif + %if %{with fips} Provides: %{_cross_os}image-feature(fips) %else diff --git a/twoliter/embedded/rpm2img b/twoliter/embedded/rpm2img index 356b8f7a0..55cb280f6 100755 --- a/twoliter/embedded/rpm2img +++ b/twoliter/embedded/rpm2img @@ -8,6 +8,7 @@ OVF_TEMPLATE="" GRUB_SET_PRIVATE_VAR="no" XFS_DATA_PARTITION="no" +EROFS_ROOT_PARTITION="no" UEFI_SECURE_BOOT="no" IN_PLACE_UPDATES="no" @@ -26,6 +27,7 @@ for opt in "$@"; do --ovf-template=*) OVF_TEMPLATE="${optarg}" ;; --with-grub-set-private-var=*) GRUB_SET_PRIVATE_VAR="${optarg}" ;; --with-xfs-data-partition=*) XFS_DATA_PARTITION="${optarg}" ;; + --with-erofs-root-partition=*) EROFS_ROOT_PARTITION="${optarg}" ;; --with-uefi-secure-boot=*) UEFI_SECURE_BOOT="${optarg}" ;; --with-in-place-updates=*) IN_PLACE_UPDATES="${optarg}" ;; *) @@ -260,6 +262,13 @@ printf "%s\n" "${INVENTORY_DATA}" >"${OUTPUT_DIR}/application-inventory.json" # Regenerate module dependencies, if possible. KMOD_DIR="${ROOT_MOUNT}/lib/modules" +# First decompress the kernel modules, so they can be recompressed by EROFS. +if [[ "${EROFS_ROOT_PARTITION}" == "yes" ]]; then + find "${KMOD_DIR}" -name '*.ko.gz' -exec gunzip '{}' \; + find "${KMOD_DIR}" -name '*.ko.xz' -exec unxz '{}' \; + find "${KMOD_DIR}" -name '*.ko.zst' -exec unzstd --rm '{}' \; +fi + # shellcheck disable=SC2066 # Quotes are fine here because we only expect one directory to be found. for kver in "$(find "${KMOD_DIR}" -mindepth 1 -maxdepth 1 -type d -printf '%P\n')"; do @@ -280,12 +289,19 @@ install_ca_certs "${ROOT_MOUNT}" # Install 'root.json'. install_root_json "${ROOT_MOUNT}" -# Install licenses. -mksquashfs \ - "${ROOT_MOUNT}"/usr/share/licenses \ - "${ROOT_MOUNT}"/usr/share/bottlerocket/licenses.squashfs \ - -no-exports -all-root -comp zstd -rm -rf "${ROOT_MOUNT}"/var/lib "${ROOT_MOUNT}"/usr/share/licenses/* +# "Install" licenses by compressing them into a squashfs, then removing the +# original files. Skip this step if using erofs, since they will be compressed +# when the filesystem is created. +if [[ "${EROFS_ROOT_PARTITION}" == "no" ]]; then + mksquashfs \ + "${ROOT_MOUNT}"/usr/share/licenses \ + "${ROOT_MOUNT}"/usr/share/bottlerocket/licenses.squashfs \ + -no-exports -all-root -comp zstd + rm -rf "${ROOT_MOUNT}"/usr/share/licenses/* +fi + +# Clean up rpmdb. +rm -rf "${ROOT_MOUNT}"/var/lib if [[ "${ARCH}" == "x86_64" ]]; then # MBR and BIOS-BOOT @@ -377,14 +393,18 @@ else fi # BOTTLEROCKET-ROOT-A -mkdir -p "${ROOT_MOUNT}/lost+found" -ROOT_LABELS=$(setfiles -n -d -F -m -r "${ROOT_MOUNT}" \ - "${SELINUX_FILE_CONTEXTS}" "${ROOT_MOUNT}" | - awk -v root="${ROOT_MOUNT}" '{gsub(root"/","/"); gsub(root,"/"); print "ea_set", $1, "security.selinux", $4}') -mkfs.ext4 -E "lazy_itable_init=0,stride=${ROOT_STRIDE},stripe_width=${ROOT_STRIPE_WIDTH}" \ - -O ^has_journal -b "${VERITY_DATA_BLOCK_SIZE}" -d "${ROOT_MOUNT}" "${ROOT_IMAGE}" "${partsize["ROOT-A"]}M" -echo "${ROOT_LABELS}" | debugfs -w -f - "${ROOT_IMAGE}" -resize2fs -M "${ROOT_IMAGE}" +if [[ "${EROFS_ROOT_PARTITION}" == "yes" ]]; then + mkfs_root_erofs "${ROOT_MOUNT}" "${ROOT_IMAGE}" "${SELINUX_FILE_CONTEXTS}" +else + mkdir -p "${ROOT_MOUNT}/lost+found" + ROOT_LABELS=$(setfiles -n -d -F -m -r "${ROOT_MOUNT}" \ + "${SELINUX_FILE_CONTEXTS}" "${ROOT_MOUNT}" | + awk -v root="${ROOT_MOUNT}" '{gsub(root"/","/"); gsub(root,"/"); print "ea_set", $1, "security.selinux", $4}') + mkfs.ext4 -E "lazy_itable_init=0,stride=${ROOT_STRIDE},stripe_width=${ROOT_STRIPE_WIDTH}" \ + -O ^has_journal -b "${VERITY_DATA_BLOCK_SIZE}" -d "${ROOT_MOUNT}" "${ROOT_IMAGE}" "${partsize["ROOT-A"]}M" + echo "${ROOT_LABELS}" | debugfs -w -f - "${ROOT_IMAGE}" + resize2fs -M "${ROOT_IMAGE}" +fi dd if="${ROOT_IMAGE}" of="${OS_IMAGE}" conv=notrunc bs=1M seek="${partoff["ROOT-A"]}" # BOTTLEROCKET-VERITY-A