From e4c0f6147f17ba20a87e95099b92fcf79e74ca3e Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Wed, 11 Dec 2024 16:32:43 -0700 Subject: [PATCH] fix: retry logic for pepr store call (#1109) ## Description The primary change in this PR is to properly handle undefined errors in our `retryWithDelay` function. The jest tests added provide coverage of the retry function testing a number of scenarios. ## Related Issue No written issue, but token set failure seen on [this CI run](https://github.com/defenseunicorns/uds-core/actions/runs/12281459257/job/34270405413?pr=1107) and no retries observed in the logs. ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Other (security config, docs update, etc) ## Steps to Validate This one is harder to reproduce the problem on, so the jest tests are a good place to start for validating the expected functionality. To reproduce almost the exact scenario: ```console # Deploy slim dev, with the unicorn flavor to avoid rate limits uds run slim-dev --set flavor=unicorn # Delete pepr store to artifically add some store failure kubectl delete peprstores.pepr.dev -n pepr-system pepr-uds-core-store # Add a test namespace and package kubectl create ns test kubectl apply -f - < of setStoreToken failed, retrying in 2000ms." # This is where previously no retries would be attempted kubectl logs -n pepr-system -l app=pepr-uds-core-watcher --tail -1 | grep "failed, retrying" ``` ## Checklist before merging - [x] Test, docs, adr added or updated as needed - [x] [Contributor Guide](https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md) followed --- .../controllers/keycloak/client-sync.ts | 4 +- src/pepr/operator/controllers/utils.spec.ts | 88 +++++++++++++++++++ src/pepr/operator/controllers/utils.ts | 23 +++-- tasks.yaml | 7 +- 4 files changed, 109 insertions(+), 13 deletions(-) create mode 100644 src/pepr/operator/controllers/utils.spec.ts diff --git a/src/pepr/operator/controllers/keycloak/client-sync.ts b/src/pepr/operator/controllers/keycloak/client-sync.ts index 0a25eead6..6153dd407 100644 --- a/src/pepr/operator/controllers/keycloak/client-sync.ts +++ b/src/pepr/operator/controllers/keycloak/client-sync.ts @@ -173,7 +173,9 @@ async function syncClient( // Write the new token to the store try { - await retryWithDelay(() => Store.setItemAndWait(name, client.registrationAccessToken!), log); + await retryWithDelay(async function setStoreToken() { + return Store.setItemAndWait(name, client.registrationAccessToken!); + }, log); } catch (err) { throw Error( `Failed to set token in store for client '${client.clientId}', package ` + diff --git a/src/pepr/operator/controllers/utils.spec.ts b/src/pepr/operator/controllers/utils.spec.ts new file mode 100644 index 000000000..22d53fedf --- /dev/null +++ b/src/pepr/operator/controllers/utils.spec.ts @@ -0,0 +1,88 @@ +/** + * Copyright 2024 Defense Unicorns + * SPDX-License-Identifier: AGPL-3.0-or-later OR LicenseRef-Defense-Unicorns-Commercial + */ + +import { beforeEach, describe, expect, it, jest } from "@jest/globals"; +import { Logger } from "pino"; +import { retryWithDelay } from "./utils"; + +describe("retryWithDelay", () => { + let mockLogger: Logger; + + beforeEach(() => { + mockLogger = { + warn: jest.fn(), + level: "info", + fatal: jest.fn(), + error: jest.fn(), + info: jest.fn(), + debug: jest.fn(), + trace: jest.fn(), + } as unknown as Logger; + }); + + beforeEach(() => {}); + + it("should succeed on the first attempt", async () => { + const mockFn = jest.fn<() => Promise>().mockResolvedValue("Success"); + + const result = await retryWithDelay(mockFn, mockLogger); + + expect(result).toBe("Success"); + expect(mockFn).toHaveBeenCalledTimes(1); // Called only once + expect(mockLogger.warn).not.toHaveBeenCalled(); // No warnings logged + }); + + it("should retry on failure and eventually succeed", async () => { + const mockFn = jest + .fn<() => Promise>() + .mockRejectedValueOnce(new Error("Fail on 1st try")) // Fail first attempt + .mockResolvedValue("Success"); // Succeed on retry + + const result = await retryWithDelay(mockFn, mockLogger, 3, 100); + + expect(result).toBe("Success"); + expect(mockFn).toHaveBeenCalledTimes(2); // Called twice (1 fail + 1 success) + expect(mockLogger.warn).toHaveBeenCalledTimes(1); // Warned once for the retry + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining("Attempt 1 of mockConstructor failed, retrying in 100ms."), + expect.objectContaining({ error: expect.any(String) }), + ); + }); + + it("should retry when function rejects without an error", async () => { + const mockFn = jest + .fn<() => Promise>() + .mockRejectedValueOnce(undefined) // Rejected with no error + .mockResolvedValue("Success"); // Succeed on retry + + const result = await retryWithDelay(mockFn, mockLogger, 3, 100); + + expect(result).toBe("Success"); + expect(mockFn).toHaveBeenCalledTimes(2); // Called twice (1 fail + 1 success) + expect(mockLogger.warn).toHaveBeenCalledTimes(1); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining("Attempt 1 of mockConstructor failed, retrying in 100ms."), + expect.objectContaining({ error: "Unknown Error" }), + ); + }); + + it("should throw the original error after max retries", async () => { + const error = new Error("Persistent failure"); + const mockFn = jest.fn<() => Promise>().mockRejectedValue(error); // Always fails + + await expect(retryWithDelay(mockFn, mockLogger, 3, 100)).rejects.toThrow("Persistent failure"); + + expect(mockFn).toHaveBeenCalledTimes(3); // Retries up to the limit + expect(mockLogger.warn).toHaveBeenCalledTimes(2); // Logged for each failure except the final one + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining("Attempt 1 of mockConstructor failed, retrying in 100ms."), + expect.objectContaining({ error: error.message }), + ); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining("Attempt 2 of mockConstructor failed, retrying in 100ms."), + expect.objectContaining({ error: error.message }), + ); + }); +}); diff --git a/src/pepr/operator/controllers/utils.ts b/src/pepr/operator/controllers/utils.ts index 25d740199..0a0559301 100644 --- a/src/pepr/operator/controllers/utils.ts +++ b/src/pepr/operator/controllers/utils.ts @@ -99,15 +99,22 @@ export async function retryWithDelay( if (attempt >= retries) { throw err; // Exceeded retries, rethrow the error. } - let error = `${JSON.stringify(err)}`; - // Error responses from network calls (i.e. K8s().Get() will be this shape) - if (err.data?.message) { - error = err.data.message; - // Other error types have a message - } else if (err.message) { - error = err.message; + // We need to account for cases where we are receiving a rejected promise with undefined error + let error = "Unknown Error"; + if (err) { + error = `${JSON.stringify(err)}`; + // Error responses from network calls (i.e. K8s().Get() will be this shape) + if (err.data?.message) { + error = err.data.message; + // Other error types have a message + } else if (err.message) { + error = err.message; + } } - log.warn(`Attempt ${attempt} of ${fn.name} failed, retrying in ${delayMs}ms.`, { error }); + log.warn( + `Attempt ${attempt} of ${fn.name || "anonymous function"} failed, retrying in ${delayMs}ms.`, + { error }, + ); await new Promise(resolve => setTimeout(resolve, delayMs)); } } diff --git a/tasks.yaml b/tasks.yaml index ce2a6986c..ae8390fd7 100644 --- a/tasks.yaml +++ b/tasks.yaml @@ -26,12 +26,11 @@ tasks: - description: "Create the dev cluster" task: setup:create-k3d-cluster - # Note: This currently is broken until https://github.com/zarf-dev/zarf/issues/2713 is resolved - # As a workaround you can edit the `src/istio/values/upstream-values.yaml` file to change ###ZARF_REGISTRY### to docker.io before running + # Note: the `registry-url` flag used here requires uds 0.19.2+ - description: "Deploy the Istio source package with Zarf Dev" - cmd: "uds zarf dev deploy src/istio --flavor upstream --no-progress" + cmd: "uds zarf dev deploy src/istio --flavor upstream --registry-url docker.io --no-progress" - # Note, this abuses the --flavor flag to only install the CRDs from this package - the "crds-only" flavor is not an explicit flavor of the package + # Note: this abuses the --flavor flag to only install the CRDs from this package - the "crds-only" flavor is not an explicit flavor of the package - description: "Deploy the Prometheus-Stack source package with Zarf Dev to only install the CRDs" cmd: "uds zarf dev deploy src/prometheus-stack --flavor crds-only --no-progress"