Skip to content

Commit

Permalink
transposefs: Only autosave-xfs for much larger filesystems
Browse files Browse the repository at this point in the history
The change in coreos#2320
has been very problematic for OpenShift because our default node
configuration is *always* over the threshold, and that causes
significant latency on instance provisioning.

Experimentally bumping to 400 allocation groups, which is about 700GiB.
This is comfortably about the default OpenShift node root disk sizes,
and returns us to the prior status quo.

While we're here, rework the logging a bit so that we *always*
log the `agcount` for debugging purposes.

Also:

- Only log to stdout for normal conditions
- Include the name of the systemd unit in the test description
  so we can cross-reference
- tests: Hoist the expected agcount of 4 to a common variable
  • Loading branch information
cgwalters authored and HuijingHei committed Oct 10, 2023
1 parent a9d9652 commit 9473d7e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,24 @@ should_autosave_rootfs() {
return
fi
local agcount
# This runs xfs_info on the unmounted filesystem, because mounting an
# XFS filesystem that has grown an excessive number of allocation groups
# can be very slow.
eval $(xfs_info "${root_part}" | grep -o 'agcount=[0-9]*')
# Semi-arbitrarily chosen: this is roughly ~64G currently (based on initial
# ag sizing at build time) which seems like a good rootfs size at which to
# discriminate between "throwaway/short-lived systems" and "long-running
# workload systems". It's not like XFS performance is way worse at 128.
if [ "$agcount" -lt 128 ]; then
echo "Filesystem agcount is $agcount; skipping" >&2
# This is roughly ~700GiB currently (based on initial ag sizing at build time)
# which ensures we grow only on "large" root filesystems.
# Specifically for e.g. OpenShift, this ensures we don't reprovision on default
# worker node root filesystems.
local threshold
threshold=400
if [ "$agcount" -lt "${threshold}" ]; then
echo "autosave-xfs: ${root_part} agcount=$agcount is lower than threshold=${threshold}" >&2
echo 0
return
else
echo "autosave-xfs: ${root_part} agcount=$agcount meets threshold=${threshold}" >&2
echo 1
fi
echo 1
}

ensure_zram_dev() {
Expand Down
12 changes: 8 additions & 4 deletions tests/kola/root-reprovision/autosave-xfs/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
## kola:
## # This test reprovisions the rootfs automatically.
## tags: "platform-independent reprovision"
## # Trigger automatic XFS reprovisioning
## minDisk: 100
## # Trigger automatic XFS reprovisioning (heuristic)
## minDisk: 1000
## # Root reprovisioning requires at least 4GiB of memory.
## minMemory: 4096
## # This test includes a lot of disk I/O and needs a higher
## # timeout value than the default.
## timeoutMin: 15
## description: Verify the root reprovision with XFS
## on large disk triggers autosaved.
## This test is meant to cover ignition-ostree-transposefs-autosave-xfs.service

set -xeuo pipefail

Expand All @@ -20,10 +21,13 @@ set -xeuo pipefail
if [ ! -f /run/ignition-ostree-autosaved-xfs.stamp ]; then
fatal "expected autosaved XFS"
fi
# Verify we printed something about the agcount
journalctl -u ignition-ostree-transposefs-autosave-xfs.service --grep=agcount
ok "autosaved XFS on large disk"

eval $(xfs_info / | grep -o 'agcount=[0-9]*')
if [ "$agcount" -gt 4 ]; then
fatal "expected agcount of at most 4, got ${agcount}"
expected=4
if [ "$agcount" -gt "$expected" ]; then
fatal "expected agcount of at most ${expected}, got ${agcount}"
fi
ok "low agcount on large disk"
8 changes: 5 additions & 3 deletions tests/kola/root-reprovision/luks/autosave-xfs/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
## # timeout value than the default.
## timeoutMin: 15
## # Trigger automatic XFS reprovisioning
## minDisk: 100
## minDisk: 1000
## description: Verify the root reprovision with XFS and TPM
## on large disk triggers autosaved.
## This test is meant to cover ignition-ostree-transposefs-autosave-xfs.service

set -xeuo pipefail

Expand All @@ -27,8 +28,9 @@ if [ -z "${AUTOPKGTEST_REBOOT_MARK:-}" ]; then
ok "autosaved XFS on large disk"

eval $(xfs_info / | grep -o 'agcount=[0-9]*')
if [ "$agcount" -gt 4 ]; then
fatal "expected agcount of at most 4, got ${agcount}"
expected=4
if [ "$agcount" -gt "${expected}" ]; then
fatal "expected agcount of at most ${expected}, got ${agcount}"
fi
ok "low agcount on large disk"
fi
Expand Down

0 comments on commit 9473d7e

Please sign in to comment.