From 0cc5b51aba9a3f07f0809193a04c9fbf9eccaa79 Mon Sep 17 00:00:00 2001 From: "Daniel A. Wozniak" Date: Wed, 27 Dec 2023 16:25:19 -0700 Subject: [PATCH] Fix clustered master startup race condition When starting up a cluster master, sending the initial aes key event needs to wait for the event server to fully start. Prior to this change we'd only try for one second and there was no logging of a failure. --- salt/channel/server.py | 5 ++++- tests/pytests/scenarios/cluster/test_cluster.py | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/salt/channel/server.py b/salt/channel/server.py index 5042aca8506e..045137363add 100644 --- a/salt/channel/server.py +++ b/salt/channel/server.py @@ -965,10 +965,13 @@ def send_aes_key_event(self): with salt.utils.event.get_master_event( self.opts, self.opts["sock_dir"], listen=False ) as event: - event.fire_event( + success = event.fire_event( data, salt.utils.event.tagify(self.opts["id"], "peer", "cluster"), + timeout=30000, # 30 second timeout ) + if not success: + log.error("Unable to send aes key event") def __getstate__(self): return { diff --git a/tests/pytests/scenarios/cluster/test_cluster.py b/tests/pytests/scenarios/cluster/test_cluster.py index 21dea04be8df..35899a2a1e93 100644 --- a/tests/pytests/scenarios/cluster/test_cluster.py +++ b/tests/pytests/scenarios/cluster/test_cluster.py @@ -55,8 +55,6 @@ def test_cluster_key_rotation( if time.monotonic() - start > timeout: assert False, f"Drop file never removed {dfpath}" - time.sleep(30) - keys = set() # Validate the aes session key for all masters match