Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(vault): postpone vault reference resolving on init_worker #12554

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/unreleased/kong/fix-vault-init-worker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
message: fix vault initialization by postponing vault reference resolving on init_worker
type: bugfix
scope: Core
6 changes: 3 additions & 3 deletions kong/db/schema/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1778,7 +1778,7 @@ function Schema:process_auto_fields(data, context, nulls, opts)
if err then
kong.log.warn("unable to resolve reference ", value, " (", err, ")")
else
kong.log.warn("unable to resolve reference ", value)
kong.log.notice("unable to resolve reference ", value)
end

value = ""
Expand Down Expand Up @@ -1817,7 +1817,7 @@ function Schema:process_auto_fields(data, context, nulls, opts)
if err then
kong.log.warn("unable to resolve reference ", value[i], " (", err, ")")
else
kong.log.warn("unable to resolve reference ", value[i])
kong.log.notice("unable to resolve reference ", value[i])
end

value[i] = ""
Expand Down Expand Up @@ -1863,7 +1863,7 @@ function Schema:process_auto_fields(data, context, nulls, opts)
if err then
kong.log.warn("unable to resolve reference ", v, " (", err, ")")
else
kong.log.warn("unable to resolve reference ", v)
kong.log.notice("unable to resolve reference ", v)
end

value[k] = ""
Expand Down
119 changes: 101 additions & 18 deletions kong/pdk/vault.lua
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ local function new(self)
local SECRETS_CACHE = ngx.shared.kong_secrets
local SECRETS_CACHE_MIN_TTL = ROTATION_INTERVAL * 2

local INIT_SECRETS = {}
local INIT_WORKER_SECRETS = {}
local STRATEGIES = {}
local SCHEMAS = {}
local CONFIGS = {}
Expand Down Expand Up @@ -618,7 +620,7 @@ local function new(self)

if not vault then
if err then
self.log.notice("could not find vault (", prefix, "): ", err)
return nil, fmt("could not find vault (%s): %s", prefix, err)
end

return nil, fmt("could not find vault (%s)", prefix)
Expand Down Expand Up @@ -823,10 +825,15 @@ local function new(self)
-- If the value is not found in these caches and `cache_only` is not `truthy`,
-- it attempts to retrieve the value from a vault.
--
-- On init worker phase the resolving of the secrets is postponed to a timer,
-- and in this case the function returns `""` when it fails to find a value
-- in a cache. This is because of current limitations in platform that disallows
-- using cosockets/coroutines in that phase.
--
-- @local
-- @function get
-- @tparam string reference the reference key to lookup
-- @tparam boolean cache_only optional boolean flag (if set to `true`,
-- @tparam[opt] boolean cache_only optional boolean flag (if set to `true`,
-- the function will not attempt to retrieve the value from the vault)
-- @treturn string the retrieved value corresponding to the provided reference,
-- or `nil` (when found negatively cached, or in case of an error)
Expand All @@ -843,19 +850,40 @@ local function new(self)

local strategy, err, config, cache_key, parsed_reference = get_strategy(reference)
if not strategy then
-- this can fail on init as the lmdb cannot be accessed and secondly,
-- because the data is not yet inserted into LMDB when using KONG_DECLARATIVE_CONFIG.
if get_phase() == "init" then
if not INIT_SECRETS[cache_key] then
INIT_SECRETS[reference] = true
INIT_SECRETS[#INIT_SECRETS + 1] = reference
end

return ""
end

return nil, err
end

value = SECRETS_CACHE:get(cache_key)
if cache_only and not value then
return nil, "could not find cached value"
end

if value == NEGATIVELY_CACHED_VALUE then
return nil
end

if not value then
if cache_only then
return nil, "could not find cached value"
end

-- this can fail on init worker as there is no cosockets / coroutines available
if get_phase() == "init_worker" then
if not INIT_WORKER_SECRETS[cache_key] then
INIT_WORKER_SECRETS[cache_key] = true
INIT_WORKER_SECRETS[#INIT_WORKER_SECRETS + 1] = cache_key
end

return ""
end

return get_from_vault(reference, strategy, config, cache_key, parsed_reference)
end

Expand Down Expand Up @@ -885,7 +913,7 @@ local function new(self)
-- update_from_cache("{vault://env/example}", record, "field" })
local function update_from_cache(reference, record, field)
local value, err = get(reference, true)
if not value then
if err then
self.log.warn("error updating secret reference ", reference, ": ", err)
end

Expand Down Expand Up @@ -1238,19 +1266,20 @@ local function new(self)


---
-- Function `rotate_secrets` rotates the secrets in the shared dictionary cache (SHDICT).
-- Function `rotate_secrets` rotates the secrets.
--
-- It iterates over all keys in the SHDICT and, if a key corresponds to a reference and the
-- It iterates over all keys in the secrets and, if a key corresponds to a reference and the
-- ttl of the key is less than or equal to the resurrection period, it refreshes the value
-- associated with the reference.
--
-- @local
-- @function rotate_secrets
-- @treturn boolean `true` after it has finished iterating over all keys in the SHDICT
local function rotate_secrets()
-- @tparam table secrets the secrets to rotate
-- @treturn boolean `true` after it has finished iterating over all keys in the secrets
local function rotate_secrets(secrets)
local phase = get_phase()
local caching_strategy = get_caching_strategy()
for _, cache_key in ipairs(SECRETS_CACHE:get_keys(0)) do
for _, cache_key in ipairs(secrets) do
yield(true, phase)

local ok, err = rotate_secret(cache_key, caching_strategy)
Expand All @@ -1264,20 +1293,69 @@ local function new(self)


---
-- A recurring secrets rotation timer handler.
-- Function `rotate_secrets_cache` rotates the secrets in the shared dictionary cache.
--
-- @local
-- @function rotate_secrets_cache
-- @treturn boolean `true` after it has finished iterating over all keys in the shared dictionary cache
local function rotate_secrets_cache()
return rotate_secrets(SECRETS_CACHE:get_keys(0))
end


---
-- Function `rotate_secrets_init_worker` rotates the secrets in init worker cache
--
-- On init worker the secret resolving is postponed to a timer because init worker
-- cannot cosockets / coroutines, and there is no other workaround currently.
--
-- @local
-- @function rotate_secrets_init_worker
-- @treturn boolean `true` after it has finished iterating over all keys in the init worker cache
local function rotate_secrets_init_worker()
local _, err, err2
if INIT_SECRETS then
_, err = rotate_references(INIT_SECRETS)
end

if INIT_WORKER_SECRETS then
_, err2 = rotate_secrets(INIT_WORKER_SECRETS)
end

if err or err2 then
return nil, err or err2
end

return true
end


---
-- A secrets rotation timer handler.
--
-- Uses a node-level mutex to prevent multiple threads/workers running it the same time.
--
-- @local
-- @function rotate_secrets_timer
-- @tparam boolean premature `true` if server is shutting down.
local function rotate_secrets_timer(premature)
-- @tparam boolean premature `true` if server is shutting down
-- @tparam[opt] boolean init `true` when this is a one of init_worker timer run
-- By default rotates the secrets in shared dictionary cache.
local function rotate_secrets_timer(premature, init)
if premature then
return
return true
end

local ok, err = concurrency.with_worker_mutex(ROTATION_MUTEX_OPTS, rotate_secrets)
local ok, err = concurrency.with_worker_mutex(ROTATION_MUTEX_OPTS, init and rotate_secrets_init_worker or rotate_secrets_cache)
if not ok and err ~= "timeout" then
self.log.err("rotating secrets failed (", err, ")")
end

if init then
INIT_SECRETS = nil
INIT_WORKER_SECRETS = nil
end

return true
end


Expand Down Expand Up @@ -1316,7 +1394,7 @@ local function new(self)
-- refresh all the secrets
local _, err = self.timer:named_at("secret-rotation-on-crud-event", 0, rotate_secrets_timer)
if err then
self.log.err("could not schedule timer to rotate vault secret references: ", err)
self.log.err("could not schedule timer to rotate vault secret references on crud event: ", err)
end
end

Expand Down Expand Up @@ -1345,6 +1423,11 @@ local function new(self)
if err then
self.log.err("could not schedule timer to rotate vault secret references: ", err)
end

local _, err = self.timer:named_at("secret-rotation-on-init", 0, rotate_secrets_timer, true)
if err then
self.log.err("could not schedule timer to rotate vault secret references on init: ", err)
end
end


Expand Down
Loading
Loading