Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add region set options to more tests. #1496

Merged
merged 3 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/buildomat/jobs/test-up-2region-encrypted.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ done
export BINDIR=/var/tmp/bins

# Give this test one hour to finish
jobpid=$$; (sleep $(( 60 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) &
jobpid=$$; (sleep $(( 60 * 60 )); banner fail-timeout; ps -ef; zfs list; pstack $(ps -ef | grep "dsc start" | grep -v grep | awk '{print $2}') | demangle ;kill $jobpid) &

echo "Setup debug logging"
mkdir /tmp/debug
Expand Down
120 changes: 59 additions & 61 deletions tools/test_live_repair.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ trap ctrl_c INT
function ctrl_c() {
echo "Stopping at your request"
${dsc} cmd shutdown
exit 1
}

REGION_ROOT=${REGION_ROOT:-/var/tmp/test_live_repair}
Expand All @@ -44,45 +45,61 @@ if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]] || [[ ! -f "$downstairs" ]];
fi

loops=5
region_sets=1

usage () {
echo "Usage: $0 [-l #]]" >&2
echo " -l loops Number of test loops to perform (default 5)" >&2
echo "Usage: $0 [-l #] [-r #]" >&2
echo " -l loops Number of replacement loops to perform (default 5)" >&2
echo " -r region_sets Number of region sets to create (default 1)" >&2
}

while getopts 'l:' opt; do
while getopts 'l:r:' opt; do
case "$opt" in
l) loops=$OPTARG
;;
r) region_sets=$OPTARG
;;
*) echo "Invalid option"
usage
exit 1
;;
esac
done

((region_count=region_sets*3))
((region_count+=1))
echo "" > "$loop_log"
echo "" > "$test_log"
echo "starting $(date)" | tee "$loop_log"
echo "Tail $test_log for test output"

# NOTE: we are creating a single region set here plus one more region to be
# used by the replacement, and with the assumption that # the default ports
# will be used (8810, 8820, 8830). The test relies on that because we use the
# fourth region-dir for our "replacement". If you change # the number of
# regions, you must also adjust the replacement below.
# No real data was used to come up with these numbers. If you have some data
# then feel free to change things.
if [[ $region_sets -eq 1 ]]; then
extent_size=3000
elif [[ $region_sets -eq 2 ]]; then
extent_size=1500
elif [[ $region_sets -eq 3 ]]; then
extent_size=750
else
extent_size=500
fi

# NOTE: we create the requested number of regions here plus one more region to
# be used by the replace test. We can use dsc to determine what the port will
# be for the final region.
if ! ${dsc} create --cleanup \
--region-dir "$REGION_ROOT" \
--region-count 4 \
--region-count "$region_count" \
--ds-bin "$downstairs" \
--extent-size 4000 \
--extent-size "$extent_size" \
--extent-count 200 >> "$test_log"; then
echo "Failed to create downstairs regions"
exit 1
fi
${dsc} start --ds-bin "$downstairs" \
--region-dir "$REGION_ROOT" \
--region-count 4 >> "$test_log" 2>&1 &
--region-count "$region_count" >> "$test_log" 2>&1 &
dsc_pid=$!
sleep 5
if ! ps -p $dsc_pid > /dev/null; then
Expand All @@ -91,63 +108,44 @@ if ! ps -p $dsc_pid > /dev/null; then
fi

gen=1
# Initial seed for verify file
# Seed the initial volume
echo "$(date) Begin pretest initial fill" | tee -a "$test_log"
if ! "$crucible_test" fill --dsc 127.0.0.1:9998 -q -g "$gen"\
--verify-out "$verify_log" >> "$test_log" 2>&1 ; then
echo Failed on initial verify seed, check "$test_log"
--skip-verify >> "$test_log" 2>&1 ; then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this just to go faster?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The addition of --skip-verify, yeah, that's just to go a little faster.
We don't bother with the --verify-out any longer because we are not stopping and starting crutest several times, so we no longer need to read in the verify info from the previous loop.

The stopping/starting part of the test here was causing problems when we have multiple region sets. The region set that we ended the previous test loop on might not match up with the region set that we start with by default.

echo Failed on initial fill, check "$test_log"
${dsc} cmd shutdown
exit 1
fi
(( gen += 1 ))

# Now run the crutest replace test in a loop
count=1
while [[ $count -le $loops ]]; do
SECONDS=0
cp "$test_log" "$test_log".last
echo "" > "$test_log"
echo "New loop, $count starts now $(date)" >> "$test_log"
"$crucible_test" replace -c 5 \
--dsc 127.0.0.1:9998 \
--replacement 127.0.0.1:8840 \
--stable -g "$gen" --verify-out "$verify_log" \
--verify-at-start \
--verify-in "$verify_log" >> "$test_log" 2>&1
result=$?
if [[ $result -ne 0 ]]; then
touch /var/tmp/ds_test/up 2> /dev/null
(( err += 1 ))
duration=$SECONDS
printf "[%03d] Error $result after %d:%02d\n" "$count" \
$((duration / 60)) $((duration % 60)) | tee -a "$loop_log"
mv "$test_log" "$test_log".lastfail
break
fi
duration=$SECONDS
(( gen += 1 ))
(( pass_total += 1 ))
(( total += duration ))
ave=$(( total / pass_total ))
printf "[%03d/%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d \
last_run_seconds:%d\n" \
"$count" "$loops" \
$((duration / 60)) $((duration % 60)) \
$((ave / 60)) $((ave % 60)) \
$((total / 60)) $((total % 60)) \
"$err" $duration | tee -a "$loop_log"
(( count += 1 ))
# Figure out the port of the last dsc client, this is what we will use for the
# replacement address.
((last_client=region_count - 1))
replacement_port=$(${dsc} cmd port -c $last_client)

# Now run the crutest replace test
SECONDS=0
cp "$test_log" "$test_log".last
echo "" > "$test_log"
echo "$(date) Replacement test starts now" | tee -a "$test_log"
"$crucible_test" replace -c "$loops" \
--dsc 127.0.0.1:9998 \
--replacement 127.0.0.1:"$replacement_port" \
--stable -g "$gen" >> "$test_log" 2>&1
result=$?
duration=$SECONDS
if [[ $result -ne 0 ]]; then
printf "Error $result after %d:%02d\n" \
$((duration / 60)) $((duration % 60)) | tee -a "$loop_log"
cp "$test_log" "$test_log".lastfail
echo "See ${test_log}.lastfail for more info"
else
printf "Test took: %d:%02d\n" \
$((duration / 60)) $((duration % 60)) | tee -a "$loop_log"
fi

done
${dsc} cmd shutdown
wait "$dsc_pid"

sleep 4
echo "Final results:" | tee -a "$loop_log"
printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d last_run_seconds:%d\n" \
"$count" \
$((duration / 60)) $((duration % 60)) \
$((ave / 60)) $((ave % 60)) \
$((total / 60)) $((total % 60)) \
"$err" $duration | tee -a "$loop_log"
echo "$(date) Test ends with $err" >> "$test_log" 2>&1
exit "$err"
echo "$(date) Test ends with $result" | tee -a "$test_log"
exit $result
102 changes: 42 additions & 60 deletions tools/test_replace_special.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ trap ctrl_c INT
function ctrl_c() {
echo "Stopping at your request"
${dsc} cmd shutdown
exit 1
}

REGION_ROOT=${REGION_ROOT:-/var/tmp/test_replace_special}
Expand Down Expand Up @@ -41,36 +42,40 @@ if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]] || [[ ! -f "$downstairs" ]];
fi

loops=5
region_sets=1

usage () {
echo "Usage: $0 [-l #]]" >&2
echo " -l loops Number of test loops to perform (default 5)" >&2
echo "Usage: $0 [-l #] [-r #]" >&2
echo " -l loops Number of test loops to perform (default 5)" >&2
echo " -r region_sets Number of region sets to create (default 1)" >&2
}

while getopts 'l:' opt; do
while getopts 'l:r:' opt; do
case "$opt" in
l) loops=$OPTARG
;;
r) region_sets=$OPTARG
;;
*) echo "Invalid option"
usage
exit 1
;;
esac
done

((region_count=region_sets*3))
((region_count+=1))
echo "" > "$loop_log"
echo "" > "$test_log"
echo "starting $(date)" | tee "$loop_log"
echo "Tail $test_log for test output"

# NOTE: we are creating a single region set here plus one more region to be
# used by the replacement, and with the assumption that # the default ports
# will be used (8810, 8820, 8830). The test relies on that # because we use
# the fourth region-dir for our "replacement". If you change # the number of
# regions, you must also adjust the replacement below.
# NOTE: We creating the requested number of regions here plus one more region
# to be used for replacement. We can use dsc to determine what the port will
# be for the final region
if ! ${dsc} create --cleanup \
--region-dir "$REGION_ROOT" \
--region-count 4 \
--region-count "$region_count" \
--ds-bin "$downstairs" \
--extent-count 400 \
--block-size 4096 >> "$test_log"; then
Expand All @@ -79,7 +84,7 @@ if ! ${dsc} create --cleanup \
fi
${dsc} start --ds-bin "$downstairs" \
--region-dir "$REGION_ROOT" \
--region-count 4 >> "$test_log" 2>&1 &
--region-count "$region_count" >> "$test_log" 2>&1 &
dsc_pid=$!
sleep 5
if ! ps -p $dsc_pid > /dev/null; then
Expand All @@ -90,62 +95,39 @@ fi
gen=1
# Initial seed for verify file
if ! "$crucible_test" fill --dsc 127.0.0.1:9998 -q -g "$gen"\
--verify-out "$verify_log" >> "$test_log" 2>&1 ; then
echo Failed on initial verify seed, check "$test_log"
--skip-verify >> "$test_log" 2>&1 ; then
echo Failed on initial fill, check "$test_log"
${dsc} cmd shutdown
exit 1
fi
(( gen += 1 ))

# Now run the crutest replace-reconcole test in a loop
count=1
while [[ $count -le $loops ]]; do
SECONDS=0
cp "$test_log" "$test_log".last
echo "" > "$test_log"
echo "New loop, $count starts now $(date)" >> "$test_log"
"$crucible_test" replace-reconcile -c 5 \
--dsc 127.0.0.1:9998 \
--replacement 127.0.0.1:8840 \
--stable -g "$gen" --verify-out "$verify_log" \
--verify-at-start \
--verify-in "$verify_log" >> "$test_log" 2>&1
result=$?
if [[ $result -ne 0 ]]; then
touch /var/tmp/ds_test/up 2> /dev/null
(( err += 1 ))
duration=$SECONDS
printf "[%03d] Error $result after %d:%02d\n" "$count" \
$((duration / 60)) $((duration % 60)) | tee -a "$loop_log"
mv "$test_log" "$test_log".lastfail
break
fi
duration=$SECONDS
# Gen should grow by at least the `-c` from crutest
(( gen += 10 ))
(( pass_total += 1 ))
(( total += duration ))
ave=$(( total / pass_total ))
printf "[%03d/%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d \
last_run_seconds:%d\n" \
"$count" "$loops" \
$((duration / 60)) $((duration % 60)) \
$((ave / 60)) $((ave % 60)) \
$((total / 60)) $((total % 60)) \
"$err" $duration | tee -a "$loop_log"
(( count += 1 ))
# Figure out the port of the last dsc client, this is what we will use for the
# replacement address.
((last_client=region_count - 1))
replacement_port=$(${dsc} cmd port -c $last_client)

# Now run the crutest replace-reconcile test
SECONDS=0
cp "$test_log" "$test_log".last
echo "" > "$test_log"
echo "$(date) replace-reconcile starts now" | tee -a "$test_log"
"$crucible_test" replace-reconcile -c "$loops" --dsc 127.0.0.1:9998 \
--replacement 127.0.0.1:"$replacement_port" \
--stable -g "$gen" >> "$test_log" 2>&1
result=$?
duration=$SECONDS
if [[ $result -ne 0 ]]; then
printf "Error $result after %d:%02d\n" \
$((duration / 60)) $((duration % 60)) | tee -a "$loop_log"
cp "$test_log" "$test_log".lastfail
else
printf "Test took %d:%02d\n" \
$((duration / 60)) $((duration % 60)) | tee -a "$loop_log"
fi

done
${dsc} cmd shutdown
wait "$dsc_pid"

sleep 4
echo "Final results:" | tee -a "$loop_log"
printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d last_run_seconds:%d\n" \
"$count" \
$((duration / 60)) $((duration % 60)) \
$((ave / 60)) $((ave % 60)) \
$((total / 60)) $((total % 60)) \
"$err" $duration | tee -a "$loop_log"
echo "$(date) Test ends with $err" >> "$test_log" 2>&1
exit "$err"
echo "$(date) Test ends with $result" | tee -a "$test_log"
exit $result
20 changes: 14 additions & 6 deletions tools/test_replay.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ trap ctrl_c INT
function ctrl_c() {
echo "Stopping at your request"
${dsc} cmd shutdown
exit 1
}

WORK_ROOT=${WORK_ROOT:-/tmp}
Expand All @@ -33,15 +34,21 @@ if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]] || [[ ! -f "$downstairs" ]];
fi

loops=30
region_sets=1

usage () {
echo "Usage: $0 [-l #]]" >&2
echo " -l loops Number of times to cause a replay." >&2
echo " -l loops Number of times to cause a replay." >&2
echo " -r regions Number of region sets to create (default 1)" >&2
}

while getopts 'l:' opt; do
while getopts 'l:r:' opt; do
case "$opt" in
l) loops=$OPTARG
echo "Set loops"
;;
r) region_sets=$OPTARG
echo "Set region sets"
;;
*) echo "Invalid option"
usage
Expand All @@ -50,19 +57,20 @@ while getopts 'l:' opt; do
esac
done

((region_count=region_sets*3))
echo "" > "$test_log"
echo "starting $(date)" | tee "$test_log"
echo "Tail $test_log for test output"

echo "Creating downstairs regions" | tee -a "$test_log"
echo "Creating $region_count downstairs regions" | tee -a "$test_log"
if ! ${dsc} create --cleanup --ds-bin "$downstairs" \
--extent-count 50 >> "$test_log"; then
--extent-count 50 --region-count "$region_count" >> "$test_log"; then
echo "Failed to create downstairs regions"
exit 1
fi

echo "Starting downstairs" | tee -a "$test_log"
${dsc} start --ds-bin "$downstairs" >> "$test_log" 2>&1 &
echo "Starting $region_count downstairs" | tee -a "$test_log"
${dsc} start --ds-bin "$downstairs" --region-count "$region_count" >> "$test_log" 2>&1 &
dsc_pid=$!
sleep 5
if ! ps -p $dsc_pid > /dev/null; then
Expand Down