Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update slotmap on error in ToNode (sync API) #185

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions hircluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -3037,6 +3037,7 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
va_list ap;
int ret;
void *reply;
int updating_slotmap = 0;

c = ctx_get_by_node(cc, node);
if (c == NULL) {
Expand All @@ -3046,6 +3047,11 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
return NULL;
}

if (cc->err) {
cc->err = 0;
memset(cc->errstr, '\0', sizeof(cc->errstr));
}

va_start(ap, format);
ret = redisvAppendCommand(c, format, ap);
va_end(ap);
Expand All @@ -3055,11 +3061,29 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
return NULL;
}

if (cc->need_update_route) {
/* Pipeline slotmap update on the same connection. */
if (clusterUpdateRouteSendCommand(cc, c) == REDIS_OK) {
updating_slotmap = 1;
}
}

if (redisGetReply(c, &reply) != REDIS_OK) {
__redisClusterSetError(cc, c->err, c->errstr);
if (c->err != REDIS_ERR_OOM)
cc->need_update_route = 1;
return NULL;
}

if (updating_slotmap) {
/* Handle reply from pipelined CLUSTER SLOTS or CLUSTER NODES. */
if (clusterUpdateRouteHandleReply(cc, c) != REDIS_OK) {
/* Ignore error. Update will be triggered on the next command. */
cc->err = 0;
cc->errstr[0] = '\0';
}
}

return reply;
}

Expand Down
4 changes: 4 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ add_test(NAME dbsize-to-all-nodes-test-async
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME dbsize-to-all-nodes-during-scaledown-test
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh"
"$<TARGET_FILE:clusterclient>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME dbsize-to-all-nodes-during-scaledown-test-async
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh"
"$<TARGET_FILE:clusterclient_async>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME reconnect-test
Expand Down
4 changes: 4 additions & 0 deletions tests/clusterclient.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ int main(int argc, char **argv) {
printReply(reply);
}
freeReplyObject(reply);
if (ni.route_version != cc->route_version) {
/* Updated slotmap resets the iterator. Abort iteration. */
break;
}
}
} else {
redisReply *reply = redisClusterCommand(cc, command);
Expand Down
94 changes: 94 additions & 0 deletions tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
#
# Verify that commands can be sent using the low-level API to specific
# nodes. The testcase will send each command to all known nodes and
# verify the behaviour when a node is removed from the cluster.
#
# First the command DBSIZE is sent to all (two) nodes successfully,
# then the second node is shutdown. The next DBSIZE command that is sent
# triggers a slotmap update due to the lost node and all following commands
# will therefor only be sent to a single node.
#
# Usage: $0 /path/to/clusterclient-binary

clientprog=${1:-./clusterclient_async}
testname=dbsize-to-all-nodes-during-scaledown-test-async

# Sync processes waiting for CONT signals.
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid1=$!;
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid2=$!;

# Start simulated redis node #1
timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' &
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 8383, ["127.0.0.1", 7401, "nodeid7401"]], [8384, 16383, ["127.0.0.1", 7402, "nodeid7402"]]]
EXPECT CLOSE
EXPECT CONNECT
EXPECT ["DBSIZE"]
SEND 10
EXPECT ["DBSIZE"]
SEND 11
# The second command to node2 fails which triggers a slotmap update.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT ["DBSIZE"]
SEND 12
EXPECT CLOSE
EOF
server1=$!

# Start simulated redis node #2
timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' &
EXPECT CONNECT
EXPECT ["DBSIZE"]
SEND 20
CLOSE
EOF
server2=$!

# Wait until both nodes are ready to accept client connections
wait $syncpid1 $syncpid2;

# Run client
timeout 5s "$clientprog" 127.0.0.1:7401 > "$testname.out" <<'EOF'
!all
DBSIZE
DBSIZE
# Allow slotmap update to finish.
!sleep
DBSIZE
EOF
clientexit=$?

# Wait for servers to exit
wait $server1; server1exit=$?
wait $server2; server2exit=$?

# Check exit statuses
if [ $server1exit -ne 0 ]; then
echo "Simulated server #1 exited with status $server1exit"
exit $server1exit
fi
if [ $server2exit -ne 0 ]; then
echo "Simulated server #2 exited with status $server2exit"
exit $server2exit
fi
if [ $clientexit -ne 0 ]; then
echo "$clientprog exited with status $clientexit"
exit $clientexit
fi

# Check the output from clusterclient
expected="10
20
error: Server closed the connection
11
12"

echo "$expected" | diff -u - "$testname.out" || exit 99

# Clean up
rm "$testname.out"
9 changes: 5 additions & 4 deletions tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ EXPECT ["DBSIZE"]
SEND 10
EXPECT ["DBSIZE"]
SEND 11
# The second command to node2 fails which triggers a slotmap update.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT ["DBSIZE"]
SEND 12
# The second command to node2 fails which triggers a slotmap update pipelined
# onto the 3rd DBSIZE to this node.
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT CLOSE
EOF
server1=$!
Expand Down Expand Up @@ -84,8 +85,8 @@ fi
# Check the output from clusterclient
expected="10
20
error: Server closed the connection
11
error: Server closed the connection
12"

echo "$expected" | diff -u - "$testname.out" || exit 99
Expand Down