From 7d8dba81d7171f5794f4620ce150dfbcf8db5ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Tue, 29 Aug 2023 11:48:14 +0200 Subject: [PATCH 1/6] Update slotmap when redisClusterCommandToNode() fails --- hircluster.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hircluster.c b/hircluster.c index 7ea8eec..a984496 100644 --- a/hircluster.c +++ b/hircluster.c @@ -3037,6 +3037,7 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node, va_list ap; int ret; void *reply; + int updating_slotmap = 0; c = ctx_get_by_node(cc, node); if (c == NULL) { @@ -3055,11 +3056,28 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node, return NULL; } + if (cc->need_update_route) { + /* Pipeline slotmap update on the same connection. */ + if (clusterUpdateRouteSendCommand(cc, c) == REDIS_OK) { + updating_slotmap = 1; + } + } + if (redisGetReply(c, &reply) != REDIS_OK) { __redisClusterSetError(cc, c->err, c->errstr); + cc->need_update_route = 1; return NULL; } + if (updating_slotmap) { + /* Handle reply from pipelined CLUSTER SLOTS or CLUSTER NODES. */ + if (clusterUpdateRouteHandleReply(cc, c) != REDIS_OK) { + /* Ignore error. Update will be triggered on the next command. */ + cc->err = 0; + cc->errstr[0] = '\0'; + } + } + return reply; } From 17bef44c6066d0594fbba483ded63f3df1bc2396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Tue, 29 Aug 2023 11:59:23 +0200 Subject: [PATCH 2/6] Fixup: Skip update slotmap on OOM --- hircluster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hircluster.c b/hircluster.c index a984496..6d01e95 100644 --- a/hircluster.c +++ b/hircluster.c @@ -3065,7 +3065,8 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node, if (redisGetReply(c, &reply) != REDIS_OK) { __redisClusterSetError(cc, c->err, c->errstr); - cc->need_update_route = 1; + if (c->err != REDIS_ERR_OOM) + cc->need_update_route = 1; return NULL; } From 5d49e12038aa0743194b0d8e7b48e48f4bef3a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Wed, 30 Aug 2023 14:26:57 +0200 Subject: [PATCH 3/6] Add dbsize-to-all-nodes-during-scaledown-test for sync API and some corrections --- hircluster.c | 5 +++++ tests/CMakeLists.txt | 4 ++++ tests/clusterclient.c | 4 ++++ .../scripts/dbsize-to-all-nodes-during-scaledown-test.sh | 9 +++++---- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/hircluster.c b/hircluster.c index 6d01e95..7ea0c82 100644 --- a/hircluster.c +++ b/hircluster.c @@ -3047,6 +3047,11 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node, return NULL; } + if (cc->err) { + cc->err = 0; + memset(cc->errstr, '\0', sizeof(cc->errstr)); + } + va_start(ap, format); ret = redisvAppendCommand(c, format, ap); va_end(ap); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 52ae107..ac13eef 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -197,6 +197,10 @@ add_test(NAME dbsize-to-all-nodes-test-async WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") add_test(NAME dbsize-to-all-nodes-during-scaledown-test COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh" + "$" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") +add_test(NAME dbsize-to-all-nodes-during-scaledown-test-async + COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh" "$" WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") add_test(NAME reconnect-test diff --git a/tests/clusterclient.c b/tests/clusterclient.c index 83dda6e..8bb998a 100644 --- a/tests/clusterclient.c +++ b/tests/clusterclient.c @@ -118,6 +118,10 @@ int main(int argc, char **argv) { printReply(reply); } freeReplyObject(reply); + if (ni.route_version != cc->route_version) { + /* Updated slotmap resets the iterator. Abort iteration. */ + break; + } } } else { redisReply *reply = redisClusterCommand(cc, command); diff --git a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh index 315be87..3186346 100755 --- a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh +++ b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh @@ -31,11 +31,12 @@ EXPECT ["DBSIZE"] SEND 10 EXPECT ["DBSIZE"] SEND 11 -# The second command to node2 fails which triggers a slotmap update. -EXPECT ["CLUSTER", "SLOTS"] -SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]] EXPECT ["DBSIZE"] SEND 12 +# The second command to node2 fails which triggers a slotmap update pipelined +# onto the 3rd DBSIZE to this node. +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]] EXPECT CLOSE EOF server1=$! @@ -84,8 +85,8 @@ fi # Check the output from clusterclient expected="10 20 -error: Server closed the connection 11 +error: Server closed the connection 12" echo "$expected" | diff -u - "$testname.out" || exit 99 From d89123ca8c2a7c8b70ca218d71d03c7d880c5311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Wed, 30 Aug 2023 14:38:56 +0200 Subject: [PATCH 4/6] Add back the renamed dbsize-to-all-nodes-during-scaledown-test-async for async API --- ...o-all-nodes-during-scaledown-test-async.sh | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100755 tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh diff --git a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh new file mode 100755 index 0000000..d2c1a77 --- /dev/null +++ b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# +# Verify that commands can be sent using the low-level API to specific +# nodes. The testcase will send each command to all known nodes and +# verify the behaviour when a node is removed from the cluster. +# +# First the command DBSIZE is sent to all (two) nodes successfully, +# then the second node is shutdown. The next DBSIZE command that is sent +# triggers a slotmap update due to the lost node and all following commands +# will therefor only be sent to a single node. +# +# Usage: $0 /path/to/clusterclient-binary + +clientprog=${1:-./clusterclient_async} +testname=dbsize-to-all-nodes-during-scaledown-test-async + +# Sync processes waiting for CONT signals. +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid1=$!; +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid2=$!; + +# Start simulated redis node #1 +timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' & +EXPECT CONNECT +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 8383, ["127.0.0.1", 7401, "nodeid7401"]], [8384, 16383, ["127.0.0.1", 7402, "nodeid7402"]]] +EXPECT CLOSE +EXPECT CONNECT +EXPECT ["DBSIZE"] +SEND 10 +EXPECT ["DBSIZE"] +SEND 11 +# The second command to node2 fails which triggers a slotmap update. +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]] +EXPECT ["DBSIZE"] +SEND 12 +EXPECT CLOSE +EOF +server1=$! + +# Start simulated redis node #2 +timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' & +EXPECT CONNECT +EXPECT ["DBSIZE"] +SEND 20 +CLOSE +EOF +server2=$! + +# Wait until both nodes are ready to accept client connections +wait $syncpid1 $syncpid2; + +# Run client +timeout 5s "$clientprog" 127.0.0.1:7401 > "$testname.out" <<'EOF' +!all +DBSIZE +DBSIZE +# Allow slotmap update to finish. +!sleep +DBSIZE +EOF +clientexit=$? + +# Wait for servers to exit +wait $server1; server1exit=$? +wait $server2; server2exit=$? + +# Check exit statuses +if [ $server1exit -ne 0 ]; then + echo "Simulated server #1 exited with status $server1exit" + exit $server1exit +fi +if [ $server2exit -ne 0 ]; then + echo "Simulated server #2 exited with status $server2exit" + exit $server2exit +fi +if [ $clientexit -ne 0 ]; then + echo "$clientprog exited with status $clientexit" + exit $clientexit +fi + +# Check the output from clusterclient +expected="10 +20 +error: Server closed the connection +11 +12" + +echo "$expected" | diff -u - "$testname.out" || exit 99 + +# Clean up +rm "$testname.out" From 56810abc56db5cdd7e521d0f5e7ea63ec29564f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Thu, 31 Aug 2023 17:33:32 +0200 Subject: [PATCH 5/6] Add docs in README --- README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/README.md b/README.md index 4b27f6d..a6fecc4 100644 --- a/README.md +++ b/README.md @@ -276,6 +276,14 @@ If the cluster topology has changed the Redis node might respond with a redirect which the client will handle, update its slotmap and resend the command to correct node. The reply will in this case arrive from the correct node. +If a node is unreachable, for example if the command times out or if the connect +times out, it can indicated that there has been a failover and the node is no +longer part of the cluster. In this case, `redisClusterCommand` returns NULL and +sets `err` and `errstr` on the cluster context, but additionally, hiredis +cluster schedules a slotmap updated to be performed when the next command is +sent. That means that if you try the same command again, there is a good chance +the command will be sent to another node and the command can succeed. + ### Sending multi-key commands Hiredis-cluster supports mget/mset/del multi-key commands. @@ -297,6 +305,10 @@ reply = redisClusterCommandToNode(clustercontext, node, "DBSIZE"); This function handles printf like arguments similar to `redisClusterCommand()`, but will only attempt to send the command to the given node and will not perform redirects or retries. +If the command times out or the connection to the node fails, slotmap update is +scheduled to be performed by the next command. `redisClusterCommandToNode` also +performs a slotmap update if it has previously been scheduled. + ### Teardown To disconnect and free the context the following function can be used: @@ -547,6 +559,21 @@ set of master nodes. There is no bookkeeping for already iterated nodes when a r which means that a node can be iterated over more than once depending on when the slotmap update happened and the change of cluster nodes. +Note that when `redisClusterCommandToNode` is called, a slotmap update can +happen if it has been scheduled by the previous command, for example if the +previous call to `redisClusterCommandToNode` timed out or the node wasn't +reachable. + +To detect when the slotmap has been updated, you can check if the iterator's +slotmap version (`iter.route_version`) is equal to the current cluster context's +slotmap version (`cc->route_version`). If it isn't, it means that the slotmap +has been updated and the iterator will restart itself at the next call to +`redisClusterNodeNext`. + +Another way to detect that the slotmap has been updated is to [register an event +callback](#events-per-cluster-context) and look for the event +`HIRCLUSTER_EVENT_SLOTMAP_UPDATED`. + ### Random number generator This library uses [random()](https://linux.die.net/man/3/random) while selecting From 28fbd615f820ce70197dafaea0e6e6ad26b0811c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Thu, 31 Aug 2023 17:36:24 +0200 Subject: [PATCH 6/6] Fixup: grammar/spelling --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a6fecc4..88f34a8 100644 --- a/README.md +++ b/README.md @@ -280,9 +280,9 @@ If a node is unreachable, for example if the command times out or if the connect times out, it can indicated that there has been a failover and the node is no longer part of the cluster. In this case, `redisClusterCommand` returns NULL and sets `err` and `errstr` on the cluster context, but additionally, hiredis -cluster schedules a slotmap updated to be performed when the next command is +cluster schedules a slotmap update to be performed when the next command is sent. That means that if you try the same command again, there is a good chance -the command will be sent to another node and the command can succeed. +the command will be sent to another node and the command may succeed. ### Sending multi-key commands @@ -305,9 +305,10 @@ reply = redisClusterCommandToNode(clustercontext, node, "DBSIZE"); This function handles printf like arguments similar to `redisClusterCommand()`, but will only attempt to send the command to the given node and will not perform redirects or retries. -If the command times out or the connection to the node fails, slotmap update is -scheduled to be performed by the next command. `redisClusterCommandToNode` also -performs a slotmap update if it has previously been scheduled. +If the command times out or the connection to the node fails, a slotmap update +is scheduled to be performed when the next command is sent. +`redisClusterCommandToNode` also performs a slotmap update if it has previously +been scheduled. ### Teardown