From 35221f90060bcc5a704ea481b48e8fb1c9341e43 Mon Sep 17 00:00:00 2001 From: windmgc Date: Tue, 11 Jun 2024 17:49:32 +0800 Subject: [PATCH 1/5] fix(vault): apply retry count maximum threshold for secret rotation --- kong/pdk/vault.lua | 15 +++++++++++++++ .../13-vaults/07-resurrect_spec.lua | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/kong/pdk/vault.lua b/kong/pdk/vault.lua index 4a29f405aff8..2bf6ef0dc21e 100644 --- a/kong/pdk/vault.lua +++ b/kong/pdk/vault.lua @@ -51,6 +51,8 @@ local decode_json = cjson.decode local NEGATIVELY_CACHED_VALUE = "\0" local ROTATION_INTERVAL = tonumber(os.getenv("KONG_VAULT_ROTATION_INTERVAL"), 10) or 60 +local SECRETS_RETRY_KEY_PREFIX = "retry_count:" +local SECRETS_RETRY_COUNT_THRESHOLD = tonumber(os.getenv("KONG_VAULT_SECRETS_RETRY_COUNT_THRESHOLD"), 10) or 60 local DAO_MAX_TTL = constants.DATABASE.DAO_MAX_TTL @@ -1238,6 +1240,10 @@ local function new(self) -- @treturn true|nil `true` after successfully rotating a secret, otherwise `nil` -- @treturn string|nil a string describing an error if there was one local function rotate_secret(old_cache_key, caching_strategy) + if old_cache_key:sub(1, #SECRETS_RETRY_KEY_PREFIX) == SECRETS_RETRY_KEY_PREFIX then + return true + end + local reference, err = parse_cache_key(old_cache_key) if not reference then -- invalid cache keys are removed (in general should never happen) @@ -1274,6 +1280,15 @@ local function new(self) -- we should refresh the secret at this point local ok, err = get_from_vault(reference, strategy, config, new_cache_key, parsed_reference) if not ok then + local retry_count = tonumber(SECRETS_CACHE:get(SECRETS_RETRY_KEY_PREFIX .. new_cache_key) or 0, 10) + if retry_count >= SECRETS_RETRY_COUNT_THRESHOLD then + SECRETS_CACHE:delete(new_cache_key) + SECRETS_CACHE:delete(SECRETS_RETRY_KEY_PREFIX .. new_cache_key) + return nil, fmt("could not retrieve value for reference %s (%s) after %d retries, removing from cache and stop rotation", reference, err, SECRETS_RETRY_COUNT_THRESHOLD) + + else + SECRETS_CACHE:incr(SECRETS_RETRY_KEY_PREFIX .. new_cache_key, 1, 0) + end return nil, fmt("could not retrieve value for reference %s (%s)", reference, err) end diff --git a/spec/02-integration/13-vaults/07-resurrect_spec.lua b/spec/02-integration/13-vaults/07-resurrect_spec.lua index 0f4ca99422de..31d49a5c179a 100644 --- a/spec/02-integration/13-vaults/07-resurrect_spec.lua +++ b/spec/02-integration/13-vaults/07-resurrect_spec.lua @@ -133,6 +133,7 @@ describe("vault resurrect_ttl and rotation (#" .. strategy .. ") #" .. vault.nam lazy_setup(function() helpers.setenv("KONG_LUA_PATH_OVERRIDE", LUA_PATH) helpers.setenv("KONG_VAULT_ROTATION_INTERVAL", "1") + helpers.setenv("KONG_VAULT_SECRETS_RETRY_COUNT_THRESHOLD", "2") vault:setup() vault:create_secret(secret, "init") @@ -225,9 +226,12 @@ describe("vault resurrect_ttl and rotation (#" .. strategy .. ") #" .. vault.nam vault:update_secret(secret, "old", { ttl = 2, resurrect_ttl = 2 }) check_plugin_secret("old", 5) vault:delete_secret(secret) + helpers.clean_logfile() ngx.sleep(2.5) check_plugin_secret("old", 5) check_plugin_secret("", 5) + + assert.logfile().has.line(fmt([[could not retrieve value for reference .*vault.*%s\/%s.* after 2 retries, removing from cache and stop rotation]], vault.prefix, secret), false, 4) end) end) From ef8ae0891e719aa99b4e4b5e08bc254f4efb97ad Mon Sep 17 00:00:00 2001 From: windmgc Date: Tue, 11 Jun 2024 17:53:12 +0800 Subject: [PATCH 2/5] docs(changelog): add changelog --- .../fix-vault-rotation-remove-refs-exceed-retry-count.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelog/unreleased/kong/fix-vault-rotation-remove-refs-exceed-retry-count.yml diff --git a/changelog/unreleased/kong/fix-vault-rotation-remove-refs-exceed-retry-count.yml b/changelog/unreleased/kong/fix-vault-rotation-remove-refs-exceed-retry-count.yml new file mode 100644 index 000000000000..c6ea587f4bab --- /dev/null +++ b/changelog/unreleased/kong/fix-vault-rotation-remove-refs-exceed-retry-count.yml @@ -0,0 +1,5 @@ +message: | + Fixed an issue where vault references that no longer used would still be rotated. A maximum retry count threshold is added + with a default value 60, to prevent the reference from being rotated indefinitely. +type: bugfix +scope: Core From bf3b914930321c9d7c909b5a4daccb4758f36d1d Mon Sep 17 00:00:00 2001 From: windmgc Date: Tue, 11 Jun 2024 19:55:57 +0800 Subject: [PATCH 3/5] fix(vault): reset retry count when resolve successfully --- kong/pdk/vault.lua | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kong/pdk/vault.lua b/kong/pdk/vault.lua index 2bf6ef0dc21e..6cbda46df9dd 100644 --- a/kong/pdk/vault.lua +++ b/kong/pdk/vault.lua @@ -1281,6 +1281,8 @@ local function new(self) local ok, err = get_from_vault(reference, strategy, config, new_cache_key, parsed_reference) if not ok then local retry_count = tonumber(SECRETS_CACHE:get(SECRETS_RETRY_KEY_PREFIX .. new_cache_key) or 0, 10) + -- secrets that are failed resolving for more than SECRETS_RETRY_COUNT_THRESHOLD + -- times will be removed from the cache and stop rotation if retry_count >= SECRETS_RETRY_COUNT_THRESHOLD then SECRETS_CACHE:delete(new_cache_key) SECRETS_CACHE:delete(SECRETS_RETRY_KEY_PREFIX .. new_cache_key) @@ -1290,6 +1292,9 @@ local function new(self) SECRETS_CACHE:incr(SECRETS_RETRY_KEY_PREFIX .. new_cache_key, 1, 0) end return nil, fmt("could not retrieve value for reference %s (%s)", reference, err) + + else + SECRETS_CACHE:delete(SECRETS_RETRY_KEY_PREFIX .. new_cache_key) end return true From 6df34cfc017fba7a39d634a412ebac761adb47eb Mon Sep 17 00:00:00 2001 From: windmgc Date: Tue, 11 Jun 2024 20:01:13 +0800 Subject: [PATCH 4/5] fix(vault): cleanup old count --- kong/pdk/vault.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kong/pdk/vault.lua b/kong/pdk/vault.lua index 6cbda46df9dd..136214364eeb 100644 --- a/kong/pdk/vault.lua +++ b/kong/pdk/vault.lua @@ -1248,6 +1248,7 @@ local function new(self) if not reference then -- invalid cache keys are removed (in general should never happen) SECRETS_CACHE:delete(old_cache_key) + SECRETS_CACHE:delete(SECRETS_RETRY_KEY_PREFIX .. old_cache_key) return nil, err end @@ -1255,12 +1256,14 @@ local function new(self) if not strategy then -- invalid cache keys are removed (e.g. a vault entity could have been removed) SECRETS_CACHE:delete(old_cache_key) + SECRETS_CACHE:delete(SECRETS_RETRY_KEY_PREFIX .. old_cache_key) return nil, fmt("could not parse reference %s (%s)", reference, err) end if old_cache_key ~= new_cache_key then -- config has changed, thus the old cache key can be removed SECRETS_CACHE:delete(old_cache_key) + SECRETS_CACHE:delete(SECRETS_RETRY_KEY_PREFIX .. old_cache_key) end -- The ttl for this key, is the TTL + the resurrect time From 861139291b5030bfa2173f6f7376db5943b70fb3 Mon Sep 17 00:00:00 2001 From: windmgc Date: Tue, 9 Jul 2024 13:44:24 +0800 Subject: [PATCH 5/5] style(*): rename var --- kong/pdk/vault.lua | 8 ++++---- spec/02-integration/13-vaults/07-resurrect_spec.lua | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kong/pdk/vault.lua b/kong/pdk/vault.lua index 136214364eeb..1813eb2eee16 100644 --- a/kong/pdk/vault.lua +++ b/kong/pdk/vault.lua @@ -52,7 +52,7 @@ local decode_json = cjson.decode local NEGATIVELY_CACHED_VALUE = "\0" local ROTATION_INTERVAL = tonumber(os.getenv("KONG_VAULT_ROTATION_INTERVAL"), 10) or 60 local SECRETS_RETRY_KEY_PREFIX = "retry_count:" -local SECRETS_RETRY_COUNT_THRESHOLD = tonumber(os.getenv("KONG_VAULT_SECRETS_RETRY_COUNT_THRESHOLD"), 10) or 60 +local SECRETS_ROTATION_MAX_RETRIES = tonumber(os.getenv("KONG_VAULT_SECRETS_ROTATION_MAX_RETRIES"), 10) or 60 local DAO_MAX_TTL = constants.DATABASE.DAO_MAX_TTL @@ -1284,12 +1284,12 @@ local function new(self) local ok, err = get_from_vault(reference, strategy, config, new_cache_key, parsed_reference) if not ok then local retry_count = tonumber(SECRETS_CACHE:get(SECRETS_RETRY_KEY_PREFIX .. new_cache_key) or 0, 10) - -- secrets that are failed resolving for more than SECRETS_RETRY_COUNT_THRESHOLD + -- secrets that are failed resolving for more than SECRETS_ROTATION_MAX_RETRIES -- times will be removed from the cache and stop rotation - if retry_count >= SECRETS_RETRY_COUNT_THRESHOLD then + if retry_count >= SECRETS_ROTATION_MAX_RETRIES then SECRETS_CACHE:delete(new_cache_key) SECRETS_CACHE:delete(SECRETS_RETRY_KEY_PREFIX .. new_cache_key) - return nil, fmt("could not retrieve value for reference %s (%s) after %d retries, removing from cache and stop rotation", reference, err, SECRETS_RETRY_COUNT_THRESHOLD) + return nil, fmt("could not retrieve value for reference %s (%s) after %d retries, removing from cache and stop rotation", reference, err, SECRETS_ROTATION_MAX_RETRIES) else SECRETS_CACHE:incr(SECRETS_RETRY_KEY_PREFIX .. new_cache_key, 1, 0) diff --git a/spec/02-integration/13-vaults/07-resurrect_spec.lua b/spec/02-integration/13-vaults/07-resurrect_spec.lua index 31d49a5c179a..d9ac84122eb7 100644 --- a/spec/02-integration/13-vaults/07-resurrect_spec.lua +++ b/spec/02-integration/13-vaults/07-resurrect_spec.lua @@ -133,7 +133,7 @@ describe("vault resurrect_ttl and rotation (#" .. strategy .. ") #" .. vault.nam lazy_setup(function() helpers.setenv("KONG_LUA_PATH_OVERRIDE", LUA_PATH) helpers.setenv("KONG_VAULT_ROTATION_INTERVAL", "1") - helpers.setenv("KONG_VAULT_SECRETS_RETRY_COUNT_THRESHOLD", "2") + helpers.setenv("KONG_VAULT_SECRETS_ROTATION_MAX_RETRIES", "2") vault:setup() vault:create_secret(secret, "init")