From cbdc14238feb43f210090ef62b01ba0ac40e9aa5 Mon Sep 17 00:00:00 2001 From: Steven Hartland Date: Mon, 14 Oct 2024 12:41:06 +0100 Subject: [PATCH] fix: slow shutdown after signal (#169) Fix shutdown after signal to allow the last client disconnect to trigger a prune check. Previous this would always waiting for the shutdown timeout, which by default is 10 minutes, resulting in an unnecessarily slow shutdown. --- reaper.go | 4 ++- reaper_test.go | 93 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/reaper.go b/reaper.go index 46f4161..ff5916f 100644 --- a/reaper.go +++ b/reaper.go @@ -297,7 +297,9 @@ func (r *reaper) pruneWait(ctx context.Context) (*resources, error) { case addr := <-r.disconnected: clients-- r.logger.Info("client disconnected", fieldAddress, addr, fieldClients, clients) - if clients == 0 && done != nil { + if clients == 0 { + // No clients connected, trigger prune check overriding + // any timeout set by shutdown signal. pruneCheck.Reset(r.cfg.ReconnectionTimeout) } case <-done: diff --git a/reaper_test.go b/reaper_test.go index 74480d4..c7e306d 100644 --- a/reaper_test.go +++ b/reaper_test.go @@ -516,38 +516,79 @@ func TestAbortedClient(t *testing.T) { } func TestShutdownTimeout(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) - t.Cleanup(cancel) - - var log safeBuffer - logger := withLogger(slog.New(slog.NewTextHandler(&log, &slog.HandlerOptions{ - Level: slog.LevelDebug, - }))) - tc := newRunTest() - cli := newMockClient(tc) - r, err := newReaper(ctx, logger, withClient(cli), testConfig) - require.NoError(t, err) + t.Run("slow-timeout", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) + t.Cleanup(cancel) + + var log safeBuffer + logger := withLogger(slog.New(slog.NewTextHandler(&log, &slog.HandlerOptions{ + Level: slog.LevelDebug, + }))) + tc := newRunTest() + cli := newMockClient(tc) + r, err := newReaper(ctx, logger, withClient(cli), testConfig) + require.NoError(t, err) - errCh := make(chan error, 1) - runCtx, runCancel := context.WithCancel(ctx) - t.Cleanup(runCancel) - go func() { - errCh <- r.run(runCtx) - }() + errCh := make(chan error, 1) + runCtx, runCancel := context.WithCancel(ctx) + t.Cleanup(runCancel) + go func() { + errCh <- r.run(runCtx) + }() + + testConnect(ctx, t, r.listener.Addr().String()) + runCancel() + + select { + case err = <-errCh: + require.NoError(t, err) + case <-ctx.Done(): + t.Fatal("timeout", log.String()) + } - require.NoError(t, err) + data := log.String() + require.Contains(t, data, "signal received") + require.Contains(t, data, "shutdown timeout") + require.Contains(t, data, "done") + }) - testConnect(ctx, t, r.listener.Addr().String()) - runCancel() + t.Run("fast-client-disconnect", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*1) + t.Cleanup(cancel) - select { - case err = <-errCh: + var log safeBuffer + logger := withLogger(slog.New(slog.NewTextHandler(&log, &slog.HandlerOptions{ + Level: slog.LevelDebug, + }))) + tc := newRunTest() + cli := newMockClient(tc) + r, err := newReaper(ctx, logger, withClient(cli), testConfig) require.NoError(t, err) - case <-ctx.Done(): - t.Fatal("timeout", log.String()) - } - require.Contains(t, log.String(), "shutdown timeout") + errCh := make(chan error, 1) + runCtx, runCancel := context.WithCancel(ctx) + t.Cleanup(runCancel) + go func() { + errCh <- r.run(runCtx) + }() + + connectCtx, connectCancel := context.WithTimeout(ctx, time.Millisecond*100) + t.Cleanup(connectCancel) + testConnect(connectCtx, t, r.listener.Addr().String()) + runCancel() + + select { + case err = <-errCh: + require.NoError(t, err) + case <-ctx.Done(): + t.Fatal("timeout", log.String()) + } + + data := log.String() + require.Contains(t, data, "signal received") + require.NotContains(t, data, "shutdown timeout") + require.Contains(t, data, "done") + }) } func TestReapContainer(t *testing.T) {