Skip to content

Commit

Permalink
Update slotmap when redisClusterCommandToNode() fails (#185)
Browse files Browse the repository at this point in the history
Since a failure can be a timeout, we don't want the slotmap update to take more time in the same call. The update is scheduled to be done at the next command instead.
  • Loading branch information
zuiderkwast authored Sep 1, 2023
1 parent 16e6ee8 commit 7c18baf
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 4 deletions.
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,14 @@ If the cluster topology has changed the Redis node might respond with a redirect
which the client will handle, update its slotmap and resend the command to correct node.
The reply will in this case arrive from the correct node.

If a node is unreachable, for example if the command times out or if the connect
times out, it can indicated that there has been a failover and the node is no
longer part of the cluster. In this case, `redisClusterCommand` returns NULL and
sets `err` and `errstr` on the cluster context, but additionally, hiredis
cluster schedules a slotmap update to be performed when the next command is
sent. That means that if you try the same command again, there is a good chance
the command will be sent to another node and the command may succeed.

### Sending multi-key commands

Hiredis-cluster supports mget/mset/del multi-key commands.
Expand All @@ -297,6 +305,11 @@ reply = redisClusterCommandToNode(clustercontext, node, "DBSIZE");
This function handles printf like arguments similar to `redisClusterCommand()`, but will
only attempt to send the command to the given node and will not perform redirects or retries.

If the command times out or the connection to the node fails, a slotmap update
is scheduled to be performed when the next command is sent.
`redisClusterCommandToNode` also performs a slotmap update if it has previously
been scheduled.

### Teardown

To disconnect and free the context the following function can be used:
Expand Down Expand Up @@ -547,6 +560,21 @@ set of master nodes. There is no bookkeeping for already iterated nodes when a r
which means that a node can be iterated over more than once depending on when the slotmap update happened
and the change of cluster nodes.

Note that when `redisClusterCommandToNode` is called, a slotmap update can
happen if it has been scheduled by the previous command, for example if the
previous call to `redisClusterCommandToNode` timed out or the node wasn't
reachable.

To detect when the slotmap has been updated, you can check if the iterator's
slotmap version (`iter.route_version`) is equal to the current cluster context's
slotmap version (`cc->route_version`). If it isn't, it means that the slotmap
has been updated and the iterator will restart itself at the next call to
`redisClusterNodeNext`.

Another way to detect that the slotmap has been updated is to [register an event
callback](#events-per-cluster-context) and look for the event
`HIRCLUSTER_EVENT_SLOTMAP_UPDATED`.

### Random number generator

This library uses [random()](https://linux.die.net/man/3/random) while selecting
Expand Down
24 changes: 24 additions & 0 deletions hircluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -3037,6 +3037,7 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
va_list ap;
int ret;
void *reply;
int updating_slotmap = 0;

c = ctx_get_by_node(cc, node);
if (c == NULL) {
Expand All @@ -3046,6 +3047,11 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
return NULL;
}

if (cc->err) {
cc->err = 0;
memset(cc->errstr, '\0', sizeof(cc->errstr));
}

va_start(ap, format);
ret = redisvAppendCommand(c, format, ap);
va_end(ap);
Expand All @@ -3055,11 +3061,29 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
return NULL;
}

if (cc->need_update_route) {
/* Pipeline slotmap update on the same connection. */
if (clusterUpdateRouteSendCommand(cc, c) == REDIS_OK) {
updating_slotmap = 1;
}
}

if (redisGetReply(c, &reply) != REDIS_OK) {
__redisClusterSetError(cc, c->err, c->errstr);
if (c->err != REDIS_ERR_OOM)
cc->need_update_route = 1;
return NULL;
}

if (updating_slotmap) {
/* Handle reply from pipelined CLUSTER SLOTS or CLUSTER NODES. */
if (clusterUpdateRouteHandleReply(cc, c) != REDIS_OK) {
/* Ignore error. Update will be triggered on the next command. */
cc->err = 0;
cc->errstr[0] = '\0';
}
}

return reply;
}

Expand Down
4 changes: 4 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ add_test(NAME dbsize-to-all-nodes-test-async
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME dbsize-to-all-nodes-during-scaledown-test
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh"
"$<TARGET_FILE:clusterclient>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME dbsize-to-all-nodes-during-scaledown-test-async
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh"
"$<TARGET_FILE:clusterclient_async>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME reconnect-test
Expand Down
4 changes: 4 additions & 0 deletions tests/clusterclient.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ int main(int argc, char **argv) {
printReply(reply);
}
freeReplyObject(reply);
if (ni.route_version != cc->route_version) {
/* Updated slotmap resets the iterator. Abort iteration. */
break;
}
}
} else {
redisReply *reply = redisClusterCommand(cc, command);
Expand Down
94 changes: 94 additions & 0 deletions tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
#
# Verify that commands can be sent using the low-level API to specific
# nodes. The testcase will send each command to all known nodes and
# verify the behaviour when a node is removed from the cluster.
#
# First the command DBSIZE is sent to all (two) nodes successfully,
# then the second node is shutdown. The next DBSIZE command that is sent
# triggers a slotmap update due to the lost node and all following commands
# will therefor only be sent to a single node.
#
# Usage: $0 /path/to/clusterclient-binary

clientprog=${1:-./clusterclient_async}
testname=dbsize-to-all-nodes-during-scaledown-test-async

# Sync processes waiting for CONT signals.
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid1=$!;
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid2=$!;

# Start simulated redis node #1
timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' &
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 8383, ["127.0.0.1", 7401, "nodeid7401"]], [8384, 16383, ["127.0.0.1", 7402, "nodeid7402"]]]
EXPECT CLOSE
EXPECT CONNECT
EXPECT ["DBSIZE"]
SEND 10
EXPECT ["DBSIZE"]
SEND 11
# The second command to node2 fails which triggers a slotmap update.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT ["DBSIZE"]
SEND 12
EXPECT CLOSE
EOF
server1=$!

# Start simulated redis node #2
timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' &
EXPECT CONNECT
EXPECT ["DBSIZE"]
SEND 20
CLOSE
EOF
server2=$!

# Wait until both nodes are ready to accept client connections
wait $syncpid1 $syncpid2;

# Run client
timeout 5s "$clientprog" 127.0.0.1:7401 > "$testname.out" <<'EOF'
!all
DBSIZE
DBSIZE
# Allow slotmap update to finish.
!sleep
DBSIZE
EOF
clientexit=$?

# Wait for servers to exit
wait $server1; server1exit=$?
wait $server2; server2exit=$?

# Check exit statuses
if [ $server1exit -ne 0 ]; then
echo "Simulated server #1 exited with status $server1exit"
exit $server1exit
fi
if [ $server2exit -ne 0 ]; then
echo "Simulated server #2 exited with status $server2exit"
exit $server2exit
fi
if [ $clientexit -ne 0 ]; then
echo "$clientprog exited with status $clientexit"
exit $clientexit
fi

# Check the output from clusterclient
expected="10
20
error: Server closed the connection
11
12"

echo "$expected" | diff -u - "$testname.out" || exit 99

# Clean up
rm "$testname.out"
9 changes: 5 additions & 4 deletions tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ EXPECT ["DBSIZE"]
SEND 10
EXPECT ["DBSIZE"]
SEND 11
# The second command to node2 fails which triggers a slotmap update.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT ["DBSIZE"]
SEND 12
# The second command to node2 fails which triggers a slotmap update pipelined
# onto the 3rd DBSIZE to this node.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT CLOSE
EOF
server1=$!
Expand Down Expand Up @@ -84,8 +85,8 @@ fi
# Check the output from clusterclient
expected="10
20
error: Server closed the connection
11
error: Server closed the connection
12"

echo "$expected" | diff -u - "$testname.out" || exit 99
Expand Down

0 comments on commit 7c18baf

Please sign in to comment.