diff --git a/README.md b/README.md index 4b27f6d..88f34a8 100644 --- a/README.md +++ b/README.md @@ -276,6 +276,14 @@ If the cluster topology has changed the Redis node might respond with a redirect which the client will handle, update its slotmap and resend the command to correct node. The reply will in this case arrive from the correct node. +If a node is unreachable, for example if the command times out or if the connect +times out, it can indicated that there has been a failover and the node is no +longer part of the cluster. In this case, `redisClusterCommand` returns NULL and +sets `err` and `errstr` on the cluster context, but additionally, hiredis +cluster schedules a slotmap update to be performed when the next command is +sent. That means that if you try the same command again, there is a good chance +the command will be sent to another node and the command may succeed. + ### Sending multi-key commands Hiredis-cluster supports mget/mset/del multi-key commands. @@ -297,6 +305,11 @@ reply = redisClusterCommandToNode(clustercontext, node, "DBSIZE"); This function handles printf like arguments similar to `redisClusterCommand()`, but will only attempt to send the command to the given node and will not perform redirects or retries. +If the command times out or the connection to the node fails, a slotmap update +is scheduled to be performed when the next command is sent. +`redisClusterCommandToNode` also performs a slotmap update if it has previously +been scheduled. + ### Teardown To disconnect and free the context the following function can be used: @@ -547,6 +560,21 @@ set of master nodes. There is no bookkeeping for already iterated nodes when a r which means that a node can be iterated over more than once depending on when the slotmap update happened and the change of cluster nodes. +Note that when `redisClusterCommandToNode` is called, a slotmap update can +happen if it has been scheduled by the previous command, for example if the +previous call to `redisClusterCommandToNode` timed out or the node wasn't +reachable. + +To detect when the slotmap has been updated, you can check if the iterator's +slotmap version (`iter.route_version`) is equal to the current cluster context's +slotmap version (`cc->route_version`). If it isn't, it means that the slotmap +has been updated and the iterator will restart itself at the next call to +`redisClusterNodeNext`. + +Another way to detect that the slotmap has been updated is to [register an event +callback](#events-per-cluster-context) and look for the event +`HIRCLUSTER_EVENT_SLOTMAP_UPDATED`. + ### Random number generator This library uses [random()](https://linux.die.net/man/3/random) while selecting diff --git a/hircluster.c b/hircluster.c index 7ea8eec..7ea0c82 100644 --- a/hircluster.c +++ b/hircluster.c @@ -3037,6 +3037,7 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node, va_list ap; int ret; void *reply; + int updating_slotmap = 0; c = ctx_get_by_node(cc, node); if (c == NULL) { @@ -3046,6 +3047,11 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node, return NULL; } + if (cc->err) { + cc->err = 0; + memset(cc->errstr, '\0', sizeof(cc->errstr)); + } + va_start(ap, format); ret = redisvAppendCommand(c, format, ap); va_end(ap); @@ -3055,11 +3061,29 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node, return NULL; } + if (cc->need_update_route) { + /* Pipeline slotmap update on the same connection. */ + if (clusterUpdateRouteSendCommand(cc, c) == REDIS_OK) { + updating_slotmap = 1; + } + } + if (redisGetReply(c, &reply) != REDIS_OK) { __redisClusterSetError(cc, c->err, c->errstr); + if (c->err != REDIS_ERR_OOM) + cc->need_update_route = 1; return NULL; } + if (updating_slotmap) { + /* Handle reply from pipelined CLUSTER SLOTS or CLUSTER NODES. */ + if (clusterUpdateRouteHandleReply(cc, c) != REDIS_OK) { + /* Ignore error. Update will be triggered on the next command. */ + cc->err = 0; + cc->errstr[0] = '\0'; + } + } + return reply; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c138900..046837c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -195,6 +195,10 @@ add_test(NAME dbsize-to-all-nodes-test-async WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") add_test(NAME dbsize-to-all-nodes-during-scaledown-test COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh" + "$" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") +add_test(NAME dbsize-to-all-nodes-during-scaledown-test-async + COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh" "$" WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/") add_test(NAME reconnect-test diff --git a/tests/clusterclient.c b/tests/clusterclient.c index 83dda6e..8bb998a 100644 --- a/tests/clusterclient.c +++ b/tests/clusterclient.c @@ -118,6 +118,10 @@ int main(int argc, char **argv) { printReply(reply); } freeReplyObject(reply); + if (ni.route_version != cc->route_version) { + /* Updated slotmap resets the iterator. Abort iteration. */ + break; + } } } else { redisReply *reply = redisClusterCommand(cc, command); diff --git a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh new file mode 100755 index 0000000..d2c1a77 --- /dev/null +++ b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# +# Verify that commands can be sent using the low-level API to specific +# nodes. The testcase will send each command to all known nodes and +# verify the behaviour when a node is removed from the cluster. +# +# First the command DBSIZE is sent to all (two) nodes successfully, +# then the second node is shutdown. The next DBSIZE command that is sent +# triggers a slotmap update due to the lost node and all following commands +# will therefor only be sent to a single node. +# +# Usage: $0 /path/to/clusterclient-binary + +clientprog=${1:-./clusterclient_async} +testname=dbsize-to-all-nodes-during-scaledown-test-async + +# Sync processes waiting for CONT signals. +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid1=$!; +perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' & +syncpid2=$!; + +# Start simulated redis node #1 +timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' & +EXPECT CONNECT +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 8383, ["127.0.0.1", 7401, "nodeid7401"]], [8384, 16383, ["127.0.0.1", 7402, "nodeid7402"]]] +EXPECT CLOSE +EXPECT CONNECT +EXPECT ["DBSIZE"] +SEND 10 +EXPECT ["DBSIZE"] +SEND 11 +# The second command to node2 fails which triggers a slotmap update. +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]] +EXPECT ["DBSIZE"] +SEND 12 +EXPECT CLOSE +EOF +server1=$! + +# Start simulated redis node #2 +timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' & +EXPECT CONNECT +EXPECT ["DBSIZE"] +SEND 20 +CLOSE +EOF +server2=$! + +# Wait until both nodes are ready to accept client connections +wait $syncpid1 $syncpid2; + +# Run client +timeout 5s "$clientprog" 127.0.0.1:7401 > "$testname.out" <<'EOF' +!all +DBSIZE +DBSIZE +# Allow slotmap update to finish. +!sleep +DBSIZE +EOF +clientexit=$? + +# Wait for servers to exit +wait $server1; server1exit=$? +wait $server2; server2exit=$? + +# Check exit statuses +if [ $server1exit -ne 0 ]; then + echo "Simulated server #1 exited with status $server1exit" + exit $server1exit +fi +if [ $server2exit -ne 0 ]; then + echo "Simulated server #2 exited with status $server2exit" + exit $server2exit +fi +if [ $clientexit -ne 0 ]; then + echo "$clientprog exited with status $clientexit" + exit $clientexit +fi + +# Check the output from clusterclient +expected="10 +20 +error: Server closed the connection +11 +12" + +echo "$expected" | diff -u - "$testname.out" || exit 99 + +# Clean up +rm "$testname.out" diff --git a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh index 315be87..3186346 100755 --- a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh +++ b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh @@ -31,11 +31,12 @@ EXPECT ["DBSIZE"] SEND 10 EXPECT ["DBSIZE"] SEND 11 -# The second command to node2 fails which triggers a slotmap update. -EXPECT ["CLUSTER", "SLOTS"] -SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]] EXPECT ["DBSIZE"] SEND 12 +# The second command to node2 fails which triggers a slotmap update pipelined +# onto the 3rd DBSIZE to this node. +EXPECT ["CLUSTER", "SLOTS"] +SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]] EXPECT CLOSE EOF server1=$! @@ -84,8 +85,8 @@ fi # Check the output from clusterclient expected="10 20 -error: Server closed the connection 11 +error: Server closed the connection 12" echo "$expected" | diff -u - "$testname.out" || exit 99