Skip to content

Commit

Permalink
Fix clustered master startup race condition
Browse files Browse the repository at this point in the history
When starting up a cluster master, sending the initial aes key event
needs to wait for the event server to fully start. Prior to this change
we'd only try for one second and there was no logging of a failure.
  • Loading branch information
dwoz committed Dec 27, 2023
1 parent 0a88399 commit 0cc5b51
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
5 changes: 4 additions & 1 deletion salt/channel/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,10 +965,13 @@ def send_aes_key_event(self):
with salt.utils.event.get_master_event(
self.opts, self.opts["sock_dir"], listen=False
) as event:
event.fire_event(
success = event.fire_event(
data,
salt.utils.event.tagify(self.opts["id"], "peer", "cluster"),
timeout=30000, # 30 second timeout
)
if not success:
log.error("Unable to send aes key event")

def __getstate__(self):
return {
Expand Down
2 changes: 0 additions & 2 deletions tests/pytests/scenarios/cluster/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ def test_cluster_key_rotation(
if time.monotonic() - start > timeout:
assert False, f"Drop file never removed {dfpath}"

time.sleep(30)

keys = set()

# Validate the aes session key for all masters match
Expand Down

0 comments on commit 0cc5b51

Please sign in to comment.