Skip to content

Commit

Permalink
Fix timing issues in flaky scaledown tests (#189)
Browse files Browse the repository at this point in the history
Terminate a clusterclient directly instead of first closing its client
socket. This avoids that hiredis-cluster might succeed to reconnect to
the server before the server has shutdown.

Close the simulated-redis's listener socket early improves stability.

Add check for alternative output depending on timing in following testcases:
cluster-scale-down-test.sh
dbsize-to-all-nodes-during-scaledown-test.sh
dbsize-to-all-nodes-during-scaledown-test-async.sh
  • Loading branch information
bjosv authored Sep 15, 2023
1 parent 77bd2bb commit 728956c
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 15 deletions.
17 changes: 12 additions & 5 deletions tests/scripts/cluster-scale-down-test.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
#
# To verify the clients behaviour in a cluster scaledown scenario.
# The testcase will send commands, all targeting hash slot 12182, while removing
Expand Down Expand Up @@ -64,7 +64,6 @@ EXPECT CONNECT
EXPECT ["GET", "{foo}1"]
SEND "bar1"
# Forced close. The next command "GET {foo}2" will fail.
CLOSE
EOF
server2=$!

Expand Down Expand Up @@ -97,12 +96,20 @@ if [ $clientexit -ne 0 ]; then
exit $clientexit
fi

# Check the output from clusterclient
expected="bar1
# Check the output from clusterclient, which depends on timing.
# Client sends the command 'GET {foo}2' just after nodeid2 closes its socket.
expected1="bar1
error: Server closed the connection
bar3"

echo "$expected" | diff -u - "$testname.out" || exit 99
# Client sends the command 'GET {foo}2' just before nodeid2 closes its socket.
expected2="bar1
error: Connection reset by peer
bar3"

diff -u "$testname.out" <(echo "$expected1") || \
diff -u "$testname.out" <(echo "$expected2") || \
exit 99

# Clean up
rm "$testname.out"
22 changes: 17 additions & 5 deletions tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ EXPECT ["DBSIZE"]
SEND 10
EXPECT ["DBSIZE"]
SEND 11
# The second command to node2 fails which triggers a slotmap update.
# The second command to node #2 fails which triggers a slotmap update.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT ["DBSIZE"]
Expand All @@ -45,7 +45,7 @@ timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' &
EXPECT CONNECT
EXPECT ["DBSIZE"]
SEND 20
CLOSE
# Forced close. The second command to this node should trigger a slotmap update.
EOF
server2=$!

Expand Down Expand Up @@ -81,14 +81,26 @@ if [ $clientexit -ne 0 ]; then
exit $clientexit
fi

# Check the output from clusterclient
expected="10
# Check the output from clusterclient, which depends on timing.
# Client sends the second 'DBSIZE' to node #2 just after node #2 closes its socket.
expected1="10
20
error: Server closed the connection
11
12"

echo "$expected" | diff -u - "$testname.out" || exit 99
# Client sends the second 'DBSIZE' to node #2 just before node #2 closes its socket.
expected2="10
20
error: Connection reset by peer
11
12"

# The reply "11" from node #1 can come before or after the socket error from node #2.
# Therefore, we sort before comparing.
diff -u <(echo "$expected1" | sort) <(sort "$testname.out") || \
diff -u <(echo "$expected2" | sort) <(sort "$testname.out") || \
exit 99

# Clean up
rm "$testname.out"
20 changes: 15 additions & 5 deletions tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ EXPECT ["DBSIZE"]
SEND 11
EXPECT ["DBSIZE"]
SEND 12
# The second command to node2 fails which triggers a slotmap update pipelined
# The second command to node #2 fails which triggers a slotmap update pipelined
# onto the 3rd DBSIZE to this node.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
Expand All @@ -46,7 +46,7 @@ timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' &
EXPECT CONNECT
EXPECT ["DBSIZE"]
SEND 20
CLOSE
# Forced close. The second command to this node should trigger a slotmap update.
EOF
server2=$!

Expand Down Expand Up @@ -82,14 +82,24 @@ if [ $clientexit -ne 0 ]; then
exit $clientexit
fi

# Check the output from clusterclient
expected="10
# Check the output from clusterclient, which depends on timing.
# Client sends the second 'DBSIZE' to node #2 just after node #2 closes its socket.
expected1="10
20
11
error: Server closed the connection
12"

echo "$expected" | diff -u - "$testname.out" || exit 99
# Client sends the second 'DBSIZE' to node #2 just before node #2 closes its socket.
expected2="10
20
11
error: Connection reset by peer
12"

diff -u "$testname.out" <(echo "$expected1") || \
diff -u "$testname.out" <(echo "$expected2") || \
exit 99

# Clean up
rm "$testname.out"
1 change: 1 addition & 0 deletions tests/scripts/simulated-redis.pl
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ END
unexpected($port, "event: $_");
}
}
close $listener;
print "(port $port) Done.\n" if $debug;
exit;

Expand Down

0 comments on commit 728956c

Please sign in to comment.