Skip to content

Commit

Permalink
perf(core): reduce LMDB size by optimizing key format (#14028)
Browse files Browse the repository at this point in the history
1. redesign the key format for LMDB
2. remove the key with * workspace
3. search all the workspaces if upper user calls `select()` API without providing the specific workspace id

KAG-5704
  • Loading branch information
chobits authored Dec 23, 2024
1 parent d07425b commit 1458161
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 44 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
message: "Reduced the LMDB storage space by optimizing the key format."
type: performance
scope: Core
64 changes: 37 additions & 27 deletions kong/db/declarative/import.lua
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ local function workspace_id(schema, options)
return get_workspace_id()
end

-- global query, like routes:each(page_size, GLOBAL_QUERY_OPTS)
if options.workspace == null then
return GLOBAL_WORKSPACE_TAG
end
Expand Down Expand Up @@ -243,26 +244,39 @@ local function find_ws(entities, name)
end


-- unique key
local function unique_field_key(schema_name, ws_id, field, value)
return string_format("%s|%s|%s|%s", schema_name, ws_id, field, sha256_hex(value))
return string_format("U|%s|%s|%s|%s", schema_name, field, ws_id, sha256_hex(value))
end


-- foreign key
local function foreign_field_key_prefix(schema_name, ws_id, field, foreign_id)
return string_format("%s|%s|%s|%s|", schema_name, ws_id, field, foreign_id)
if ws_id == GLOBAL_WORKSPACE_TAG then
return string_format("F|%s|%s|%s|", schema_name, field, foreign_id)
end

return string_format("F|%s|%s|%s|%s|", schema_name, field, foreign_id, ws_id)
end


local function foreign_field_key(schema_name, ws_id, field, foreign_id, pk)
assert(ws_id ~= GLOBAL_WORKSPACE_TAG)
return foreign_field_key_prefix(schema_name, ws_id, field, foreign_id) .. pk
end

-- item key
local function item_key_prefix(schema_name, ws_id)
return string_format("%s|%s|*|", schema_name, ws_id)
if ws_id == GLOBAL_WORKSPACE_TAG then
return string_format("I|%s|", schema_name)
end

return string_format("I|%s|%s|", schema_name, ws_id)
end


local function item_key(schema_name, ws_id, pk_str)
assert(ws_id ~= GLOBAL_WORKSPACE_TAG)
return item_key_prefix(schema_name, ws_id) .. pk_str
end

Expand Down Expand Up @@ -307,10 +321,6 @@ local function _set_entity_for_txn(t, entity_name, item, options, is_delete)
-- store serialized entity into lmdb
t:set(itm_key, itm_value)

-- for global query
local global_key = item_key(entity_name, GLOBAL_WORKSPACE_TAG, pk)
t:set(global_key, idx_value)

-- select_by_cache_key
if schema.cache_key then
local cache_key = dao:cache_key(item)
Expand Down Expand Up @@ -347,12 +357,9 @@ local function _set_entity_for_txn(t, entity_name, item, options, is_delete)
value_str = pk_string(kong.db[fdata_reference].schema, value)
end

for _, wid in ipairs {field_ws_id, GLOBAL_WORKSPACE_TAG} do
local key = unique_field_key(entity_name, wid, fname, value_str or value)

-- store item_key or nil into lmdb
t:set(key, idx_value)
end
local key = unique_field_key(entity_name, field_ws_id, fname, value_str or value)
-- store item_key or nil into lmdb
t:set(key, idx_value)
end

if is_foreign then
Expand All @@ -361,12 +368,9 @@ local function _set_entity_for_txn(t, entity_name, item, options, is_delete)

value_str = pk_string(kong.db[fdata_reference].schema, value)

for _, wid in ipairs {field_ws_id, GLOBAL_WORKSPACE_TAG} do
local key = foreign_field_key(entity_name, wid, fname, value_str, pk)

-- store item_key or nil into lmdb
t:set(key, idx_value)
end
local key = foreign_field_key(entity_name, field_ws_id, fname, value_str, pk)
-- store item_key or nil into lmdb
t:set(key, idx_value)
end

::continue::
Expand All @@ -380,18 +384,22 @@ end
-- the provided LMDB txn object, this operation is only safe
-- is the entity does not already exist inside the LMDB database
--
-- The actual item key is: <entity_name>|<ws_id>|*|<pk_string>
-- The actual item key is: I|<entity_name>|<ws_id>|<pk_string>
--
-- This function sets the following:
-- This function sets the following key-value pairs:
--
-- * <entity_name>|<ws_id>|*|<pk_string> => serialized item
-- * <entity_name>|*|*|<pk_string> => actual item key
-- key: I|<entity_name>|<ws_id>|<pk_string>
-- value: serialized item
--
-- * <entity_name>|<ws_id>|<unique_field_name>|sha256(field_value) => actual item key
-- * <entity_name>|*|<unique_field_name>|sha256(field_value) => actual item key
-- key: U|<entity_name>|<unique_field_name>|<ws_id>|sha256(field_value)
-- value: actual item key
--
-- * <entity_name>|<ws_id>|<foreign_field_name>|<foreign_key>|<pk_string> => actual item key
-- * <entity_name>|*|<foreign_field_name>|<foreign_key>|<pk_string> => actual item key
-- key: F|<entity_name>|<foreign_field_name>|<foreign_key>|<ws_id>|<pk_string>
-- value: actual item key
--
-- The format of the key string follows the sequence of the construction order:
-- `item type > entity name > specific item info > workspace id > item uuid`
-- This order makes it easier to query all entities using API lmdb_prefix.page().
--
-- DO NOT touch `item`, or else the entity will be changed
local function insert_entity_for_txn(t, entity_name, item, options)
Expand Down Expand Up @@ -608,4 +616,6 @@ return {
load_into_cache_with_events = load_into_cache_with_events,
insert_entity_for_txn = insert_entity_for_txn,
delete_entity_for_txn = delete_entity_for_txn,

GLOBAL_WORKSPACE_TAG = GLOBAL_WORKSPACE_TAG,
}
1 change: 1 addition & 0 deletions kong/db/declarative/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ _M.load_into_cache_with_events = declarative_import.load_into_cache_with_events
_M.insert_entity_for_txn = declarative_import.insert_entity_for_txn
_M.delete_entity_for_txn = declarative_import.delete_entity_for_txn
_M.workspace_id = declarative_import.workspace_id
_M.GLOBAL_WORKSPACE_TAG = declarative_import.GLOBAL_WORKSPACE_TAG


return _M
36 changes: 25 additions & 11 deletions kong/db/strategies/off/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ local item_key = declarative.item_key
local item_key_prefix = declarative.item_key_prefix
local workspace_id = declarative.workspace_id
local foreign_field_key_prefix = declarative.foreign_field_key_prefix
local GLOBAL_WORKSPACE_TAG = declarative.GLOBAL_WORKSPACE_TAG


local PROCESS_AUTO_FIELDS_OPTS = {
Expand All @@ -38,11 +39,6 @@ _mt.__index = _mt
local UNINIT_WORKSPACE_ID = "00000000-0000-0000-0000-000000000000"


local function need_follow(ws_id)
return ws_id == "*"
end


local function get_default_workspace()
if kong.default_workspace == UNINIT_WORKSPACE_ID then
local res = kong.db.workspaces:select_by_name("default")
Expand Down Expand Up @@ -221,11 +217,11 @@ end
-- ws_id here.
local function page_for_tags(self, size, offset, options)
-- /:entitiy?tags=:tags
-- search all key-values: <entity_name>|*|*|<pk_string> => actual item key
-- search all key-values: I|<entity_name>|*|<pk_string> => actual item key
if self.schema.name ~= "tags" then
local prefix = item_key_prefix(self.schema.name, "*") -- "<entity_name>|*|*|"
local prefix = item_key_prefix(self.schema.name, "*") -- "I|<entity_name>|"
local items, err, offset = page_for_prefix(self, prefix, size, offset,
options, true)
options)
if not items then
return nil, err
end
Expand Down Expand Up @@ -279,7 +275,7 @@ local function page_for_tags(self, size, offset, options)
local rows, err

rows, err, offset_token = page_for_prefix(self, prefix, size, offset_token,
options, true, dao.schema)
options, false, dao.schema)
if not rows then
return nil, err
end
Expand Down Expand Up @@ -324,7 +320,7 @@ local function page(self, size, offset, options)
return page_for_tags(self, size, offset, options)
end

return page_for_prefix(self, prefix, size, offset, options, need_follow(ws_id))
return page_for_prefix(self, prefix, size, offset, options)
end


Expand All @@ -333,8 +329,26 @@ local function select(self, pk, options)
local schema = self.schema
local ws_id = workspace_id(schema, options)
local pk = pk_string(schema, pk)

-- if no specific ws_id is provided, we need to search all workspace ids
if ws_id == GLOBAL_WORKSPACE_TAG then
for workspace, err in kong.db.workspaces:each() do
if err then
return nil, err
end

local key = item_key(schema.name, workspace.id, pk)
local entity = select_by_key(schema, key)
if entity then
return entity
end
end

return nil, "not found"
end

local key = item_key(schema.name, ws_id, pk)
return select_by_key(schema, key, need_follow(ws_id))
return select_by_key(schema, key)
end


Expand Down
4 changes: 2 additions & 2 deletions spec/01-unit/01-db/10-declarative_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ keyauth_credentials:
it("utilizes the schema name, workspace id, field name, and checksum of the field value", function()
local key = unique_field_key("services", "123", "fieldname", "test", false)
assert.is_string(key)
assert.equals("services|123|fieldname|" .. sha256_hex("test"), key)
assert.equals("U|services|fieldname|123|" .. sha256_hex("test"), key)
end)

-- since rpc sync the param `unique_across_ws` is useless
-- this test case is just for compatibility
it("does not omits the workspace id when 'unique_across_ws' is 'true'", function()
local key = unique_field_key("services", "123", "fieldname", "test", true)
assert.equals("services|123|fieldname|" .. sha256_hex("test"), key)
assert.equals("U|services|fieldname|123|" .. sha256_hex("test"), key)
end)
end)

Expand Down
13 changes: 9 additions & 4 deletions spec/01-unit/01-db/11-declarative_lmdb_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,12 @@ describe("#off preserve nulls", function()
local id, item = next(entities.basicauth_credentials)

-- format changed after rpc sync
-- item key
local cache_key = concat({
"I|",
"basicauth_credentials|",
item.ws_id,
"|*|",
"|",
id
})

Expand All @@ -226,13 +228,16 @@ describe("#off preserve nulls", function()
if plugin.name == PLUGIN_NAME then

-- format changed after rpc sync
-- foreign key:
cache_key = concat({
"F|",
"plugins|",
plugin.ws_id,
"|route|",
"route|",
plugin.route.id,
"|",
plugin.id
plugin.ws_id,
"|",
plugin.id,
})
value, err, hit_lvl = lmdb.get(cache_key)
assert.is_nil(err)
Expand Down

1 comment on commit 1458161

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bazel Build

Docker image available kong/kong:145816122ec8066c03502a794617721dc8058b15
Artifacts available https://github.com/Kong/kong/actions/runs/12462300057

Please sign in to comment.