From 7d8dba81d7171f5794f4620ce150dfbcf8db5ea4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Tue, 29 Aug 2023 11:48:14 +0200
Subject: [PATCH 1/6] Update slotmap when redisClusterCommandToNode() fails

---
 hircluster.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/hircluster.c b/hircluster.c
index 7ea8eec..a984496 100644
--- a/hircluster.c
+++ b/hircluster.c
@@ -3037,6 +3037,7 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
     va_list ap;
     int ret;
     void *reply;
+    int updating_slotmap = 0;
 
     c = ctx_get_by_node(cc, node);
     if (c == NULL) {
@@ -3055,11 +3056,28 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
         return NULL;
     }
 
+    if (cc->need_update_route) {
+        /* Pipeline slotmap update on the same connection. */
+        if (clusterUpdateRouteSendCommand(cc, c) == REDIS_OK) {
+            updating_slotmap = 1;
+        }
+    }
+
     if (redisGetReply(c, &reply) != REDIS_OK) {
         __redisClusterSetError(cc, c->err, c->errstr);
+        cc->need_update_route = 1;
         return NULL;
     }
 
+    if (updating_slotmap) {
+        /* Handle reply from pipelined CLUSTER SLOTS or CLUSTER NODES. */
+        if (clusterUpdateRouteHandleReply(cc, c) != REDIS_OK) {
+            /* Ignore error. Update will be triggered on the next command. */
+            cc->err = 0;
+            cc->errstr[0] = '\0';
+        }
+    }
+
     return reply;
 }
 

From 17bef44c6066d0594fbba483ded63f3df1bc2396 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Tue, 29 Aug 2023 11:59:23 +0200
Subject: [PATCH 2/6] Fixup: Skip update slotmap on OOM

---
 hircluster.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hircluster.c b/hircluster.c
index a984496..6d01e95 100644
--- a/hircluster.c
+++ b/hircluster.c
@@ -3065,7 +3065,8 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
 
     if (redisGetReply(c, &reply) != REDIS_OK) {
         __redisClusterSetError(cc, c->err, c->errstr);
-        cc->need_update_route = 1;
+        if (c->err != REDIS_ERR_OOM)
+            cc->need_update_route = 1;
         return NULL;
     }
 

From 5d49e12038aa0743194b0d8e7b48e48f4bef3a65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Wed, 30 Aug 2023 14:26:57 +0200
Subject: [PATCH 3/6] Add dbsize-to-all-nodes-during-scaledown-test for sync
 API and some corrections

---
 hircluster.c                                             | 5 +++++
 tests/CMakeLists.txt                                     | 4 ++++
 tests/clusterclient.c                                    | 4 ++++
 .../scripts/dbsize-to-all-nodes-during-scaledown-test.sh | 9 +++++----
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/hircluster.c b/hircluster.c
index 6d01e95..7ea0c82 100644
--- a/hircluster.c
+++ b/hircluster.c
@@ -3047,6 +3047,11 @@ void *redisClusterCommandToNode(redisClusterContext *cc, redisClusterNode *node,
         return NULL;
     }
 
+    if (cc->err) {
+        cc->err = 0;
+        memset(cc->errstr, '\0', sizeof(cc->errstr));
+    }
+
     va_start(ap, format);
     ret = redisvAppendCommand(c, format, ap);
     va_end(ap);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 52ae107..ac13eef 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -197,6 +197,10 @@ add_test(NAME dbsize-to-all-nodes-test-async
                  WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
 add_test(NAME dbsize-to-all-nodes-during-scaledown-test
          COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh"
+                 "$<TARGET_FILE:clusterclient>"
+                 WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
+add_test(NAME dbsize-to-all-nodes-during-scaledown-test-async
+         COMMAND "${CMAKE_SOURCE_DIR}/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh"
                  "$<TARGET_FILE:clusterclient_async_sequence>"
                  WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/tests/scripts/")
 add_test(NAME reconnect-test
diff --git a/tests/clusterclient.c b/tests/clusterclient.c
index 83dda6e..8bb998a 100644
--- a/tests/clusterclient.c
+++ b/tests/clusterclient.c
@@ -118,6 +118,10 @@ int main(int argc, char **argv) {
                     printReply(reply);
                 }
                 freeReplyObject(reply);
+                if (ni.route_version != cc->route_version) {
+                    /* Updated slotmap resets the iterator. Abort iteration. */
+                    break;
+                }
             }
         } else {
             redisReply *reply = redisClusterCommand(cc, command);
diff --git a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh
index 315be87..3186346 100755
--- a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh
+++ b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test.sh
@@ -31,11 +31,12 @@ EXPECT ["DBSIZE"]
 SEND 10
 EXPECT ["DBSIZE"]
 SEND 11
-# The second command to node2 fails which triggers a slotmap update.
-EXPECT ["CLUSTER", "SLOTS"]
-SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
 EXPECT ["DBSIZE"]
 SEND 12
+# The second command to node2 fails which triggers a slotmap update pipelined
+# onto the 3rd DBSIZE to this node.
+EXPECT ["CLUSTER", "SLOTS"]
+SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
 EXPECT CLOSE
 EOF
 server1=$!
@@ -84,8 +85,8 @@ fi
 # Check the output from clusterclient
 expected="10
 20
-error: Server closed the connection
 11
+error: Server closed the connection
 12"
 
 echo "$expected" | diff -u - "$testname.out" || exit 99

From d89123ca8c2a7c8b70ca218d71d03c7d880c5311 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Wed, 30 Aug 2023 14:38:56 +0200
Subject: [PATCH 4/6] Add back the renamed
 dbsize-to-all-nodes-during-scaledown-test-async for async API

---
 ...o-all-nodes-during-scaledown-test-async.sh | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100755 tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh

diff --git a/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh
new file mode 100755
index 0000000..d2c1a77
--- /dev/null
+++ b/tests/scripts/dbsize-to-all-nodes-during-scaledown-test-async.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+#
+# Verify that commands can be sent using the low-level API to specific
+# nodes. The testcase will send each command to all known nodes and
+# verify the behaviour when a node is removed from the cluster.
+#
+# First the command DBSIZE is sent to all (two) nodes successfully,
+# then the second node is shutdown. The next DBSIZE command that is sent
+# triggers a slotmap update due to the lost node and all following commands
+# will therefor only be sent to a single node.
+#
+# Usage: $0 /path/to/clusterclient-binary
+
+clientprog=${1:-./clusterclient_async}
+testname=dbsize-to-all-nodes-during-scaledown-test-async
+
+# Sync processes waiting for CONT signals.
+perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
+syncpid1=$!;
+perl -we 'use sigtrap "handler", sub{exit}, "CONT"; sleep 1; die "timeout"' &
+syncpid2=$!;
+
+# Start simulated redis node #1
+timeout 5s ./simulated-redis.pl -p 7401 -d --sigcont $syncpid1 <<'EOF' &
+EXPECT CONNECT
+EXPECT ["CLUSTER", "SLOTS"]
+SEND [[0, 8383, ["127.0.0.1", 7401, "nodeid7401"]], [8384, 16383, ["127.0.0.1", 7402, "nodeid7402"]]]
+EXPECT CLOSE
+EXPECT CONNECT
+EXPECT ["DBSIZE"]
+SEND 10
+EXPECT ["DBSIZE"]
+SEND 11
+# The second command to node2 fails which triggers a slotmap update.
+EXPECT ["CLUSTER", "SLOTS"]
+SEND [[0, 16383, ["127.0.0.1", 7401, "nodeid7401"]]]
+EXPECT ["DBSIZE"]
+SEND 12
+EXPECT CLOSE
+EOF
+server1=$!
+
+# Start simulated redis node #2
+timeout 5s ./simulated-redis.pl -p 7402 -d --sigcont $syncpid2 <<'EOF' &
+EXPECT CONNECT
+EXPECT ["DBSIZE"]
+SEND 20
+CLOSE
+EOF
+server2=$!
+
+# Wait until both nodes are ready to accept client connections
+wait $syncpid1 $syncpid2;
+
+# Run client
+timeout 5s "$clientprog" 127.0.0.1:7401 > "$testname.out" <<'EOF'
+!all
+DBSIZE
+DBSIZE
+# Allow slotmap update to finish.
+!sleep
+DBSIZE
+EOF
+clientexit=$?
+
+# Wait for servers to exit
+wait $server1; server1exit=$?
+wait $server2; server2exit=$?
+
+# Check exit statuses
+if [ $server1exit -ne 0 ]; then
+    echo "Simulated server #1 exited with status $server1exit"
+    exit $server1exit
+fi
+if [ $server2exit -ne 0 ]; then
+    echo "Simulated server #2 exited with status $server2exit"
+    exit $server2exit
+fi
+if [ $clientexit -ne 0 ]; then
+    echo "$clientprog exited with status $clientexit"
+    exit $clientexit
+fi
+
+# Check the output from clusterclient
+expected="10
+20
+error: Server closed the connection
+11
+12"
+
+echo "$expected" | diff -u - "$testname.out" || exit 99
+
+# Clean up
+rm "$testname.out"

From 56810abc56db5cdd7e521d0f5e7ea63ec29564f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Thu, 31 Aug 2023 17:33:32 +0200
Subject: [PATCH 5/6] Add docs in README

---
 README.md | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/README.md b/README.md
index 4b27f6d..a6fecc4 100644
--- a/README.md
+++ b/README.md
@@ -276,6 +276,14 @@ If the cluster topology has changed the Redis node might respond with a redirect
 which the client will handle, update its slotmap and resend the command to correct node.
 The reply will in this case arrive from the correct node.
 
+If a node is unreachable, for example if the command times out or if the connect
+times out, it can indicated that there has been a failover and the node is no
+longer part of the cluster. In this case, `redisClusterCommand` returns NULL and
+sets `err` and `errstr` on the cluster context, but additionally, hiredis
+cluster schedules a slotmap updated to be performed when the next command is
+sent. That means that if you try the same command again, there is a good chance
+the command will be sent to another node and the command can succeed.
+
 ### Sending multi-key commands
 
 Hiredis-cluster supports mget/mset/del multi-key commands.
@@ -297,6 +305,10 @@ reply = redisClusterCommandToNode(clustercontext, node, "DBSIZE");
 This function handles printf like arguments similar to `redisClusterCommand()`, but will
 only attempt to send the command to the given node and will not perform redirects or retries.
 
+If the command times out or the connection to the node fails, slotmap update is
+scheduled to be performed by the next command. `redisClusterCommandToNode` also
+performs a slotmap update if it has previously been scheduled.
+
 ### Teardown
 
 To disconnect and free the context the following function can be used:
@@ -547,6 +559,21 @@ set of master nodes. There is no bookkeeping for already iterated nodes when a r
 which means that a node can be iterated over more than once depending on when the slotmap update happened
 and the change of cluster nodes.
 
+Note that when `redisClusterCommandToNode` is called, a slotmap update can
+happen if it has been scheduled by the previous command, for example if the
+previous call to `redisClusterCommandToNode` timed out or the node wasn't
+reachable.
+
+To detect when the slotmap has been updated, you can check if the iterator's
+slotmap version (`iter.route_version`) is equal to the current cluster context's
+slotmap version (`cc->route_version`). If it isn't, it means that the slotmap
+has been updated and the iterator will restart itself at the next call to
+`redisClusterNodeNext`.
+
+Another way to detect that the slotmap has been updated is to [register an event
+callback](#events-per-cluster-context) and look for the event
+`HIRCLUSTER_EVENT_SLOTMAP_UPDATED`.
+
 ### Random number generator
 
 This library uses [random()](https://linux.die.net/man/3/random) while selecting

From 28fbd615f820ce70197dafaea0e6e6ad26b0811c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= <viktor.soderqvist@est.tech>
Date: Thu, 31 Aug 2023 17:36:24 +0200
Subject: [PATCH 6/6] Fixup: grammar/spelling

---
 README.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index a6fecc4..88f34a8 100644
--- a/README.md
+++ b/README.md
@@ -280,9 +280,9 @@ If a node is unreachable, for example if the command times out or if the connect
 times out, it can indicated that there has been a failover and the node is no
 longer part of the cluster. In this case, `redisClusterCommand` returns NULL and
 sets `err` and `errstr` on the cluster context, but additionally, hiredis
-cluster schedules a slotmap updated to be performed when the next command is
+cluster schedules a slotmap update to be performed when the next command is
 sent. That means that if you try the same command again, there is a good chance
-the command will be sent to another node and the command can succeed.
+the command will be sent to another node and the command may succeed.
 
 ### Sending multi-key commands
 
@@ -305,9 +305,10 @@ reply = redisClusterCommandToNode(clustercontext, node, "DBSIZE");
 This function handles printf like arguments similar to `redisClusterCommand()`, but will
 only attempt to send the command to the given node and will not perform redirects or retries.
 
-If the command times out or the connection to the node fails, slotmap update is
-scheduled to be performed by the next command. `redisClusterCommandToNode` also
-performs a slotmap update if it has previously been scheduled.
+If the command times out or the connection to the node fails, a slotmap update
+is scheduled to be performed when the next command is sent.
+`redisClusterCommandToNode` also performs a slotmap update if it has previously
+been scheduled.
 
 ### Teardown