diff --git a/overlay.d/05core/usr/lib/dracut/modules.d/40ignition-ostree/ignition-ostree-transposefs.sh b/overlay.d/05core/usr/lib/dracut/modules.d/40ignition-ostree/ignition-ostree-transposefs.sh index 6f89c4007e..62fa977043 100755 --- a/overlay.d/05core/usr/lib/dracut/modules.d/40ignition-ostree/ignition-ostree-transposefs.sh +++ b/overlay.d/05core/usr/lib/dracut/modules.d/40ignition-ostree/ignition-ostree-transposefs.sh @@ -139,17 +139,24 @@ should_autosave_rootfs() { return fi local agcount + # This runs xfs_info on the unmounted filesystem, because mounting an + # XFS filesystem that has grown an excessive number of allocation groups + # can be very slow. eval $(xfs_info "${root_part}" | grep -o 'agcount=[0-9]*') - # Semi-arbitrarily chosen: this is roughly ~64G currently (based on initial - # ag sizing at build time) which seems like a good rootfs size at which to - # discriminate between "throwaway/short-lived systems" and "long-running - # workload systems". It's not like XFS performance is way worse at 128. - if [ "$agcount" -lt 128 ]; then - echo "Filesystem agcount is $agcount; skipping" >&2 + # This is roughly ~700GiB currently (based on initial ag sizing at build time) + # which ensures we grow only on "large" root filesystems. + # Specifically for e.g. OpenShift, this ensures we don't reprovision on default + # worker node root filesystems. + local threshold + threshold=400 + if [ "$agcount" -lt "${threshold}" ]; then + echo "autosave-xfs: ${root_part} agcount=$agcount is lower than threshold=${threshold}" >&2 echo 0 return + else + echo "autosave-xfs: ${root_part} agcount=$agcount meets threshold=${threshold}" >&2 + echo 1 fi - echo 1 } ensure_zram_dev() { diff --git a/tests/kola/root-reprovision/autosave-xfs/test.sh b/tests/kola/root-reprovision/autosave-xfs/test.sh index 5f88d60482..4ac5c12afc 100755 --- a/tests/kola/root-reprovision/autosave-xfs/test.sh +++ b/tests/kola/root-reprovision/autosave-xfs/test.sh @@ -2,8 +2,8 @@ ## kola: ## # This test reprovisions the rootfs automatically. ## tags: "platform-independent reprovision" -## # Trigger automatic XFS reprovisioning -## minDisk: 100 +## # Trigger automatic XFS reprovisioning (heuristic) +## minDisk: 1000 ## # Root reprovisioning requires at least 4GiB of memory. ## minMemory: 4096 ## # This test includes a lot of disk I/O and needs a higher @@ -11,6 +11,7 @@ ## timeoutMin: 15 ## description: Verify the root reprovision with XFS ## on large disk triggers autosaved. +## This test is meant to cover ignition-ostree-transposefs-autosave-xfs.service set -xeuo pipefail @@ -20,10 +21,13 @@ set -xeuo pipefail if [ ! -f /run/ignition-ostree-autosaved-xfs.stamp ]; then fatal "expected autosaved XFS" fi +# Verify we printed something about the agcount +journalctl -u ignition-ostree-transposefs-autosave-xfs.service --grep=agcount ok "autosaved XFS on large disk" eval $(xfs_info / | grep -o 'agcount=[0-9]*') -if [ "$agcount" -gt 4 ]; then - fatal "expected agcount of at most 4, got ${agcount}" +expected=4 +if [ "$agcount" -gt "$expected" ]; then + fatal "expected agcount of at most ${expected}, got ${agcount}" fi ok "low agcount on large disk" diff --git a/tests/kola/root-reprovision/luks/autosave-xfs/test.sh b/tests/kola/root-reprovision/luks/autosave-xfs/test.sh index a3cd80e497..e238a4acd9 100755 --- a/tests/kola/root-reprovision/luks/autosave-xfs/test.sh +++ b/tests/kola/root-reprovision/luks/autosave-xfs/test.sh @@ -10,9 +10,10 @@ ## # timeout value than the default. ## timeoutMin: 15 ## # Trigger automatic XFS reprovisioning -## minDisk: 100 +## minDisk: 1000 ## description: Verify the root reprovision with XFS and TPM ## on large disk triggers autosaved. +## This test is meant to cover ignition-ostree-transposefs-autosave-xfs.service set -xeuo pipefail @@ -27,8 +28,9 @@ if [ -z "${AUTOPKGTEST_REBOOT_MARK:-}" ]; then ok "autosaved XFS on large disk" eval $(xfs_info / | grep -o 'agcount=[0-9]*') - if [ "$agcount" -gt 4 ]; then - fatal "expected agcount of at most 4, got ${agcount}" + expected=4 + if [ "$agcount" -gt "${expected}" ]; then + fatal "expected agcount of at most ${expected}, got ${agcount}" fi ok "low agcount on large disk" fi