Skip to content

Commit

Permalink
Update slotmap when slot is not served by any node (#192)
Browse files Browse the repository at this point in the history
The sync-API will update the slotmap before attempting to send a command
when the slot is not served by any cluster node.

The async-API will initate the slotmap update in parallell so that the next
command might have an updated slotmap.

This also fix an additional issue by using the correct function name for
redisClusterUpdateSlotmap(). The former name cluster_update_route() is not defined
when HIRCLUSTER_NO_OLD_NAMES is defined, which would break the build.

Co-authored-by: Viktor Söderqvist <[email protected]>
  • Loading branch information
bjosv and zuiderkwast authored Sep 29, 2023
1 parent 0a4deb6 commit 0196253
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 22 deletions.
15 changes: 13 additions & 2 deletions hircluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -2230,14 +2230,22 @@ static void *redis_cluster_command_execute(redisClusterContext *cc,

node = node_get_by_table(cc, (uint32_t)command->slot_num);
if (node == NULL) {
goto error;
/* Update the slotmap since the slot is not served. */
if (redisClusterUpdateSlotmap(cc) != REDIS_OK) {
goto error;
}
node = node_get_by_table(cc, (uint32_t)command->slot_num);
if (node == NULL) {
/* Return error since the slot is still not served. */
goto error;
}
}

c = ctx_get_by_node(cc, node);
if (c == NULL || c->err) {
/* Failed to connect. Maybe there was a failover and this node is gone.
* Update slotmap to find out. */
if (cluster_update_route(cc) != REDIS_OK) {
if (redisClusterUpdateSlotmap(cc) != REDIS_OK) {
goto error;
}

Expand Down Expand Up @@ -4135,6 +4143,9 @@ int redisClusterAsyncFormattedCommand(redisClusterAsyncContext *acc,

node = node_get_by_table(cc, (uint32_t)slot_num);
if (node == NULL) {
/* Initiate a slotmap update since the slot is not served. */
throttledUpdateSlotMapAsync(acc, NULL);

/* node_get_by_table() has set the error on cc. */
__redisClusterAsyncSetError(acc, cc->err, cc->errstr);
goto error;
Expand Down
8 changes: 8 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,11 @@ add_test(NAME cluster-scale-down-test
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/cluster-scale-down-test.sh"
"$<TARGET_FILE:clusterclient>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME slots-not-served-test
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/slots-not-served-test.sh"
"$<TARGET_FILE:clusterclient>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
add_test(NAME slots-not-served-test-async
COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/slots-not-served-test-async.sh"
"$<TARGET_FILE:clusterclient_async>"
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
11 changes: 9 additions & 2 deletions tests/clusterclient_async.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,15 @@ void sendNextCommand(int fd, short kind, void *arg) {
} else {
int status = redisClusterAsyncCommand(
acc, replyCallback, (void *)((intptr_t)num_running), cmd);
ASSERT_MSG(status == REDIS_OK, acc->errstr);
num_running++;
if (status == REDIS_OK) {
num_running++;
} else {
printf("error: %s\n", acc->errstr);

/* Schedule a read from stdin and handle next command. */
event_base_once(acc->adapter, -1, EV_TIMEOUT, sendNextCommand,
acc, NULL);
}
}

if (async)
Expand Down
22 changes: 4 additions & 18 deletions tests/ct_connection.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,19 +371,14 @@ void test_async_password_ok(void) {
event_base_free(base);
}

// Connecting to a password protected cluster using
// the async API, providing wrong password.
/* Connect to a password protected cluster using the wrong password.
An eventloop is not attached since it is not needed is this case. */
void test_async_password_wrong(void) {
redisClusterAsyncContext *acc = redisClusterAsyncContextInit();
assert(acc);
redisClusterAsyncSetConnectCallback(acc, callbackExpectOk);
redisClusterAsyncSetDisconnectCallback(acc, callbackExpectOk);
redisClusterSetOptionAddNodes(acc->cc, CLUSTER_NODE_WITH_PASSWORD);
redisClusterSetOptionPassword(acc->cc, "faultypass");

struct event_base *base = event_base_new();
redisClusterLibeventAttach(acc, base);

int ret;
ret = redisClusterConnect2(acc->cc);
assert(ret == REDIS_ERR);
Expand All @@ -401,14 +396,11 @@ void test_async_password_wrong(void) {
assert(acc->err == REDIS_ERR_OTHER);
assert(strcmp(acc->errstr, "slotmap not available") == 0);

event_base_dispatch(base);

redisClusterAsyncFree(acc);
event_base_free(base);
}

// Connecting to a password protected cluster using
// the async API, not providing a password.
/* Connect to a password protected cluster without providing a password.
An eventloop is not attached since it is not needed is this case. */
void test_async_password_missing(void) {
redisClusterAsyncContext *acc = redisClusterAsyncContextInit();
assert(acc);
Expand All @@ -417,9 +409,6 @@ void test_async_password_missing(void) {
redisClusterSetOptionAddNodes(acc->cc, CLUSTER_NODE_WITH_PASSWORD);
// Password not configured

struct event_base *base = event_base_new();
redisClusterLibeventAttach(acc, base);

int ret;
ret = redisClusterConnect2(acc->cc);
assert(ret == REDIS_ERR);
Expand All @@ -434,10 +423,7 @@ void test_async_password_missing(void) {
assert(acc->err == REDIS_ERR_OTHER);
assert(strcmp(acc->errstr, "slotmap not available") == 0);

event_base_dispatch(base);

redisClusterAsyncFree(acc);
event_base_free(base);
}

// Connect to a cluster and authenticate using username and password
Expand Down
67 changes: 67 additions & 0 deletions tests/scripts/slots-not-served-test-async.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

# Usage: $0 /path/to/clusterclient-async

clientprog=${1:-./clusterclient-async}
testname=slots-not-served-test-async

# Sync processes waiting for CONT signals.
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid1=$!;

# Start simulated redis node #1
timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' &
# The initial slotmap is not covering all slots.
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 1, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT CLOSE
# Slotmap update due to slot not served.
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT ["GET", "foo"]
SEND "bar"
EXPECT CLOSE
EOF
server1=$!

# Wait until node is ready to accept client connections
wait $syncpid1;

# Run client
timeout 3s "$clientprog" --events 127.0.0.1:7401 > "$testname.out" <<'EOF'
GET foo
# Allow slotmap update to finish.
!sleep
GET foo
EOF
clientexit=$?

# Wait for server to exit
wait $server1; server1exit=$?

# Check exit statuses
if [ $server1exit -ne 0 ]; then
echo "Simulated server #1 exited with status $server1exit"
exit $server1exit
fi
if [ $clientexit -ne 0 ]; then
echo "$clientprog exited with status $clientexit"
exit $clientexit
fi

# Check the output from clusterclient
expected="Event: slotmap-updated
Event: ready
error: slot not served by any node
Event: slotmap-updated
bar
Event: free-context"

echo "$expected" | diff -u - "$testname.out" || exit 99

# Clean up
rm "$testname.out"
76 changes: 76 additions & 0 deletions tests/scripts/slots-not-served-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash

# Usage: $0 /path/to/clusterclient-binary

clientprog=${1:-./clusterclient}
testname=slots-not-served-test

# Sync processes waiting for CONT signals.
perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
syncpid1=$!;

# Start simulated redis node #1
timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' &
# The initial slotmap is not covering all slots.
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 1, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT CLOSE
# Slotmap update due to the slot for `foo1` is not served.
# The reply is still missing slots.
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 1, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT CLOSE
# Slotmap update due to the slot for `foo2` is not served.
# The reply now has full slot coverage.
EXPECT CONNECT
EXPECT ["CLUSTER", "SLOTS"]
SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
EXPECT CLOSE
EXPECT CONNECT
EXPECT ["GET", "foo2"]
SEND "bar2"
EXPECT CLOSE
EOF
server1=$!

# Wait until node is ready to accept client connections
wait $syncpid1;

# Run client
timeout 3s "$clientprog" --events 127.0.0.1:7401 > "$testname.out" <<'EOF'
GET foo1
GET foo2
EOF
clientexit=$?

# Wait for server to exit
wait $server1; server1exit=$?

# Check exit statuses
if [ $server1exit -ne 0 ]; then
echo "Simulated server #1 exited with status $server1exit"
exit $server1exit
fi
if [ $clientexit -ne 0 ]; then
echo "$clientprog exited with status $clientexit"
exit $clientexit
fi

# Check the output from clusterclient
expected="Event: slotmap-updated
Event: ready
Event: slotmap-updated
error: slot not served by any node
Event: slotmap-updated
bar2
Event: free-context"

echo "$expected" | diff -u - "$testname.out" || exit 99

# Clean up
rm "$testname.out"

0 comments on commit 0196253

Please sign in to comment.