From 6192fb97a1a1fcb2082f5bb69288a15a8e067c41 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 21 Feb 2024 17:59:25 +0800 Subject: [PATCH 001/126] refactor(dns): new dns client library --- kong/conf_loader/constants.lua | 1 + kong/globalpatches.lua | 4 + kong/resty/dns/client.lua | 7 + kong/resty/dns_client/init.lua | 634 +++++++ kong/resty/dns_client/utils.lua | 235 +++ kong/templates/kong_defaults.lua | 1 + spec/01-unit/09-balancer/01-generic_spec.lua | 7 +- .../03-consistent_hashing_spec.lua | 2 + .../09-balancer/04-round_robin_spec.lua | 5 + spec/01-unit/21-dns-client/02-client_spec.lua | 1 + .../21-dns-client/03-client_cache_spec.lua | 1 + .../30-new-dns-client/01-utils_spec.lua | 411 +++++ .../30-new-dns-client/02-old_client_spec.lua | 1500 +++++++++++++++++ .../03-old_client_cache_spec.lua | 674 ++++++++ spec/fixtures/shared_dict.lua | 3 + spec/helpers/dns.lua | 23 +- 16 files changed, 3498 insertions(+), 11 deletions(-) create mode 100644 kong/resty/dns_client/init.lua create mode 100644 kong/resty/dns_client/utils.lua create mode 100644 spec/01-unit/30-new-dns-client/01-utils_spec.lua create mode 100644 spec/01-unit/30-new-dns-client/02-old_client_spec.lua create mode 100644 spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua diff --git a/kong/conf_loader/constants.lua b/kong/conf_loader/constants.lua index cda8a9a9ccdb..dbf0cb6def91 100644 --- a/kong/conf_loader/constants.lua +++ b/kong/conf_loader/constants.lua @@ -370,6 +370,7 @@ local CONF_PARSERS = { dns_not_found_ttl = { typ = "number" }, dns_error_ttl = { typ = "number" }, dns_no_sync = { typ = "boolean" }, + legacy_dns_client = { typ = "boolean" }, privileged_worker = { typ = "boolean", deprecated = { diff --git a/kong/globalpatches.lua b/kong/globalpatches.lua index 397c4fc7c4e8..8d2a318568e3 100644 --- a/kong/globalpatches.lua +++ b/kong/globalpatches.lua @@ -409,6 +409,10 @@ return function(options) local seeded = {} local randomseed = math.randomseed + if options.rbusted then + _G.math.native_randomseed = randomseed + end + _G.math.randomseed = function() local pid = ngx.worker.pid() local id diff --git a/kong/resty/dns/client.lua b/kong/resty/dns/client.lua index 03625790ee58..57735597b34c 100644 --- a/kong/resty/dns/client.lua +++ b/kong/resty/dns/client.lua @@ -19,6 +19,13 @@ -- @author Thijs Schreijer -- @license Apache 2.0 +-- Use the new dns client library instead. If you want to switch to the original +-- one, you can set `legacy_dns_client = on` in kong.conf. +if ngx.shared.kong_dns_cache and not _G.legacy_dns_client then + package.loaded["kong.resty.dns_client"] = nil + return require("kong.resty.dns_client") +end + local _ local utils = require("kong.resty.dns.utils") local fileexists = require("pl.path").exists diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua new file mode 100644 index 000000000000..08828c6fde41 --- /dev/null +++ b/kong/resty/dns_client/init.lua @@ -0,0 +1,634 @@ +-- vim: ts=4 sts=4 sw=4 et: + +local utils = require("kong.resty.dns_client.utils") +local mlcache = require("kong.resty.mlcache") +local resolver = require("resty.dns.resolver") + +local now = ngx.now +local log = ngx.log +local WARN = ngx.WARN +local ALERT = ngx.ALERT +local math_min = math.min +local timer_at = ngx.timer.at +local table_insert = table.insert +local ipv6_bracket = utils.ipv6_bracket +-- local deep_copy = function (t) return t end -- TODO require("kong.tools.utils").deep_copy + +-- debug +--[[ +local json = require("cjson").encode +local logt = table_insert +local logt = function (...) end +]] + +-- Constants and default values +local DEFAULT_ERROR_TTL = 1 -- unit: second +local DEFAULT_STALE_TTL = 4 +local DEFAULT_EMPTY_TTL = 30 + +local DEFAULT_IP_TTL = 10 * 365 * 24 * 60 * 60 -- 10 years + +local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } + +local TYPE_SRV = resolver.TYPE_SRV +local TYPE_A = resolver.TYPE_A +local TYPE_AAAA = resolver.TYPE_AAAA +local TYPE_CNAME = resolver.TYPE_CNAME +local TYPE_LAST = -1 + +local valid_type_names = { + SRV = TYPE_SRV, + A = TYPE_A, + AAAA = TYPE_AAAA, + CNAME = TYPE_CNAME, + LAST = TYPE_LAST, +} + +local hitstrs = { + [1] = "hit_lru", + [2] = "hit_shm", +} + +local errstrs = { -- client specific errors + [100] = "cache only lookup failed", + [101] = "empty record received", +} + +local EMPTY_ANSWERS = { errcode = 3, errstr = "name error" } + + +--- APIs +local _M = {} +local mt = { __index = _M } + +-- copy TYPE_* +for k,v in pairs(resolver) do + if type(k) == "string" and k:sub(1,5) == "TYPE_" then + _M[k] = v + end +end +_M.TYPE_LAST = -1 + + +local function stats_init(stats, name) + if not stats[name] then + stats[name] = {} + end +end + + +local function stats_count(stats, name, key) + stats[name][key] = (stats[name][key] or 0) + 1 +end + + +-- For TYPE_LAST: the DNS record from the last successful query +local valid_types = { + [ TYPE_SRV ] = true, + [ TYPE_A ] = true, + [ TYPE_AAAA ] = true, + [ TYPE_CNAME ] = true, +} + +local function insert_last_type(cache, name, qtype) + if valid_types[qtype] then + cache:set("last:" .. name, { ttl = 0 }, qtype) + end +end + + +local function get_last_type(cache, name) + return cache:get("last:" .. name) +end + + +-- insert hosts into cache +local function init_hosts(cache, path, preferred_ip_type) + local hosts, err = utils.parse_hosts(path) + if not hosts then + log(WARN, "Invalid hosts file: ", err) + hosts = {} + end + + if not hosts.localhost then + hosts.localhost = { + ipv4 = "127.0.0.1", + ipv6 = "[::1]", + } + end + + local function insert_answer(name, qtype, address) + if not address then + return + end + + local key = name .. ":" .. qtype + local answers = { + ttl = DEFAULT_IP_TTL, + expire = now() + DEFAULT_IP_TTL, + { + name = name, + type = qtype, + address = address, + class = 1, + ttl = DEFAULT_IP_TTL, + }, + } + cache:set(key, { ttl = DEFAULT_IP_TTL }, answers) + end + + for name, address in pairs(hosts) do + name = name:lower() + if address.ipv4 then + insert_answer(name, TYPE_A, address.ipv4) + insert_last_type(cache, name, TYPE_A) + end + if address.ipv6 then + insert_answer(name, TYPE_AAAA, address.ipv6) + if not address.ipv4 or preferred_ip_type == TYPE_AAAA then + insert_last_type(cache, name, TYPE_AAAA) + end + end + end + + return hosts +end + + +function _M.new(opts) + if not opts then + return nil, "no options table specified" + end + + local enable_ipv6 = opts.enable_ipv6 + + -- parse resolv.conf + local resolv, err = utils.parse_resolv_conf(opts.resolv_conf, enable_ipv6) + if not resolv then + log(WARN, "Invalid resolv.conf: ", err) + resolv = { options = {} } + end + + -- init the resolver options for lua-resty-dns + local nameservers = (opts.nameservers and #opts.nameservers > 0) and + opts.nameservers or resolv.nameservers + if not nameservers or #nameservers == 0 then + log(WARN, "Invalid configuration, no nameservers specified") + end + + local r_opts = { + nameservers = nameservers, + retrans = opts.retrans or resolv.options.attempts or 5, + timeout = opts.timeout or resolv.options.timeout or 2000, -- ms + no_random = opts.no_random or not resolv.options.rotate, + } + + -- init the mlcache + local lock_timeout = r_opts.timeout / 1000 * r_opts.retrans + 1 -- s + + local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { + lru_size = opts.cache_size or 10000, + ipc_shm = "kong_dns_cache_ipc", + resty_lock_opts = { + timeout = lock_timeout, + exptimeout = lock_timeout + 1, + }, + -- miss cache + shm_miss = "kong_dns_cache_miss", + neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + }) + if not cache then + return nil, "could not create mlcache: " .. err + end + + if opts.cache_purge then + cache:purge(true) + end + + -- TODO: add an async task to call cache:update() to update L1/LRU-cache + -- for the inserted value from other workers + + -- parse order + local search_types = {} + local order = opts.order or DEFAULT_ORDER + local preferred_ip_type + for _, typstr in ipairs(order) do + local qtype = valid_type_names[typstr:upper()] + if not qtype then + return nil, "Invalid dns record type in order array: " .. typstr + end + table_insert(search_types, qtype) + if (qtype == TYPE_A or qtype == TYPE_AAAA) and not preferred_ip_type then + preferred_ip_type = qtype + end + end + preferred_ip_type = preferred_ip_type or TYPE_A + + if #search_types == 0 then + return nil, "Invalid order array: empty record types" + end + + -- parse hosts + local hosts = init_hosts(cache, opts.hosts, preferred_ip_type) + + return setmetatable({ + r_opts = r_opts, + cache = cache, + valid_ttl = opts.valid_ttl, + error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, + stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, + empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + resolv = opts._resolv or resolv, + hosts = hosts, + enable_ipv6 = enable_ipv6, + search_types = search_types, + stats = {} + }, mt) +end + + +local function process_answers(self, qname, qtype, answers) + local errcode = answers.errcode + if errcode then + answers.ttl = errcode == 3 and self.empty_ttl or self.error_ttl + -- For compatibility, the balancer subsystem needs to use this field. + answers.expire = now() + answers.ttl + return answers + end + + local processed_answers = {} + local cname_answer + + local ttl = self.valid_ttl or 0xffffffff + + for _, answer in ipairs(answers) do + answer.name = answer.name:lower() + + if answer.type == TYPE_CNAME then + cname_answer = answer -- use the last one as the real cname + + elseif answer.type == qtype then + -- A compromise regarding https://github.com/Kong/kong/pull/3088 + if answer.type == TYPE_AAAA then + answer.address = ipv6_bracket(answer.address) + elseif answer.type == TYPE_SRV then + answer.target = ipv6_bracket(answer.target) + end + + table.insert(processed_answers, answer) + end + + if self.valid_ttl then + answer.ttl = self.valid_ttl + else + ttl = math_min(ttl, answer.ttl) + end + end + + if #processed_answers == 0 then + if not cname_answer then + return { + errcode = 101, + errstr = errstrs[101], + ttl = self.empty_ttl, + --expire = now() + self.empty_ttl, + } + end + + table_insert(processed_answers, cname_answer) + end + + processed_answers.ttl = ttl + processed_answers.expire = now() + ttl + + return processed_answers +end + + +local function resolve_query(self, name, qtype, tries) + -- logt(tries, "query") + + local key = name .. ":" .. qtype + stats_count(self.stats, key, "query") + + local r, err = resolver:new(self.r_opts) + if not r then + return nil, "failed to instantiate the resolver: " .. err + end + + local options = { additional_section = true, qtype = qtype } + local answers, err = r:query(name, options) + if r.destroy then + r:destroy() + end + + if not answers then + stats_count(self.stats, key, "query_fail") + return nil, "DNS server error: " .. (err or "unknown") + end + + answers = process_answers(self, name, qtype, answers) + + stats_count(self.stats, key, answers.errstr and + "query_err:" .. answers.errstr or "query_succ") + + -- logt(tries, answers.errstr or #answers) + + return answers, nil, answers.ttl +end + + +local function start_stale_update_task(self, key, name, qtype) + stats_count(self.stats, key, "stale") + + timer_at(0, function (premature) + if premature then return end + + local answers = resolve_query(self, name, qtype, {}) + if answers and (not answers.errcode or answers.errcode == 3) then + self.cache:set(key, { ttl = answers.ttl }, + answers.errcode == 3 and nil or answers) + insert_last_type(self.cache, name, qtype) + end + end) +end + + +local function resolve_name_type_callback(self, name, qtype, opts, tries) + local key = name .. ":" .. qtype + + local ttl, _, answers = self.cache:peek(key, true) + if answers and not answers.expired then + ttl = (ttl or 0) + self.stale_ttl + if ttl > 0 then + start_stale_update_task(self, key, name, qtype) + answers.expired = true + answers.ttl = ttl + answers.expire = now() + ttl + return answers, nil, ttl + end + end + + if opts.cache_only then + return { errcode = 100, errstr = errstrs[100] }, nil, -1 + end + + local answers, err, ttl = resolve_query(self, name, qtype, tries) + + if answers and answers.errcode == 3 then + return nil -- empty record for shm_miss cache + end + + return answers, err, ttl +end + + +local function detect_recursion(opts, key) + local rn = opts.resolved_names + if not rn then + rn = {} + opts.resolved_names = rn + end + local detected = rn[key] + -- TODO delete + if detected then + log(ALERT, "detect recursion for name:", key) + end + rn[key] = true + return detected +end + + +local function resolve_name_type(self, name, qtype, opts, tries) + local key = name .. ":" .. qtype + + stats_init(self.stats, key) + -- logt(tries, key) + + if detect_recursion(opts, key) then + stats_count(self.stats, key, "fail_recur") + return nil, "recursion detected for name: " .. key + end + + local answers, err, hit_level = self.cache:get(key, nil, + resolve_name_type_callback, + self, name, qtype, opts, tries) + if err and err:sub(1, #"callback") == "callback" then + log(ALERT, err) + end + + if not answers and not err then + answers = EMPTY_ANSWERS + end + + if hit_level and hit_level < 3 then + stats_count(self.stats, key, hitstrs[hit_level]) + -- logt(tries, hitstrs[hit_level]) + end + + if err or answers.errcode then + err = err or "DNS server replied error: " .. answers.errstr + table_insert(tries, { name, qtype, err }) + end + + return answers, err +end + + +local function get_search_types(self, name, qtype) + local input_types = qtype and { qtype } or self.search_types + local checked_types = {} + local types = {} + + for _, qtype in ipairs(input_types) do + if qtype == TYPE_LAST then + qtype = get_last_type(self.cache, name) + end + if qtype and not checked_types[qtype] then + table.insert(types, qtype) + checked_types[qtype] = true + end + end + + return types +end + + +local function check_and_get_ip_answers(name) + if name:match("^%d+%.%d+%.%d+%.%d+$") then -- IPv4 + return {{ name = name, class = 1, type = TYPE_A, address = name }} + end + + if name:match(":") then -- IPv6 + return {{ name = name, class = 1, type = TYPE_AAAA, address = ipv6_bracket(name) }} + end + + return nil +end + + +local function resolve_names_and_types(self, name, opts, tries) + local answers = check_and_get_ip_answers(name) + if answers then + answers.ttl = 10 * 365 * 24 * 60 * 60 + answers.expire = now() + answers.ttl + return answers, nil, tries + end + + local types = get_search_types(self, name, opts.qtype) + local names = utils.search_names(name, self.resolv, self.hosts) + + for _, qtype in ipairs(types) do + for _, qname in ipairs(names) do + local answers, err = resolve_name_type(self, qname, qtype, opts, tries) + + -- severe error occurred + if not answers then + return nil, err, tries + end + + if not answers.errcode then + insert_last_type(self.cache, qname, qtype) -- cache TYPE_LAST + return answers, nil, tries + end + end + end + + -- not found in the search iteration + return nil, "no available records", tries +end + + +local function resolve_all(self, name, opts, tries) + local key = "fast:" .. name .. ":" .. (opts.qtype or "all") + -- logt(tries, key) + + stats_init(self.stats, name) + stats_count(self.stats, name, "runs") + + if detect_recursion(opts, key) then + stats_count(self.stats, name, "fail_recur") + return nil, "recursion detected for name: " .. name + end + + -- lookup fastly with the key `fast::/all` + local answers, err, hit_level = self.cache:get(key) + if not answers or answers.expired then + stats_count(self.stats, name, "miss") + + answers, err, tries = resolve_names_and_types(self, name, opts, tries) + if not opts.cache_only and answers then + --assert(answers.ttl) + --assert(answers.expire) + self.cache:set(key, { ttl = answers.ttl }, answers) + end + + else + stats_count(self.stats, name, hitstrs[hit_level]) + -- logt(tries, hitstrs[hit_level]) + end + + -- dereference CNAME + if opts.qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then + -- logt(tries, "cname") + stats_count(self.stats, name, "cname") + return resolve_all(self, answers[1].cname, opts, tries) + end + + stats_count(self.stats, name, answers and "succ" or "fail") + + return answers, err, tries +end + + +-- resolve all `name`s and `type`s combinations and return first usable answers +-- `name`s: produced by resolv.conf options: `search`, `ndots` and `domain` +-- `type`s: SRV, A, AAAA, CNAME +-- +-- @opts: +-- `return_random`: default `false`, return only one random IP address +-- `cache_only`: default `false`, retrieve data only from the internal cache +-- `qtype`: specified query type instead of its own search types +function _M:resolve(name, opts, tries) + name = name:lower() + opts = opts or {} + tries = tries or {} + + local answers, err, tries = resolve_all(self, name, opts, tries) + if not answers or not opts.return_random then + return answers, err, tries + end + + -- option: return_random + if answers[1].type == TYPE_SRV then + local answer = utils.get_wrr_ans(answers) + opts.port = answer.port ~= 0 and answer.port or opts.port + -- TODO: SRV recursive name and target how to handle + return self:resolve(answer.target, opts, tries) + end + + return utils.get_rr_ans(answers).address, opts.port, tries +end + + +-- compatible with original DNS client library +-- These APIs will be deprecated if fully replacing the original one. +local dns_client + +function _M.init(opts) + opts = opts or {} + opts.valid_ttl = opts.validTtl + opts.error_ttl = opts.badTtl + opts.stale_ttl = opts.staleTtl + opts.cache_size = opts.cacheSize + opts.cache_purge = true + + local client, err = _M.new(opts) + if not client then + return nil, err + end + + dns_client = client + return true +end + + +-- New and old libraries have the same function name. +_M._resolve = _M.resolve + +function _M.resolve(name, r_opts, cache_only, tries) + local opts = { cache_only = cache_only } + return dns_client:_resolve(name, opts, tries) +end + + +function _M.toip(name, port, cache_only, tries) + local opts = { cache_only = cache_only, return_random = true , port = port } + return dns_client:_resolve(name, opts, tries) +end + + +-- For testing + +if package.loaded.busted then + function _M.getobj() + return dns_client + end + function _M.getcache() + return { + set = function (self, k, v, ttl) + self.cache:set(k, {ttl = ttl or 0}, v) + end, + cache = dns_client.cache, + } + end + function _M:insert_last_type(name, qtype) + insert_last_type(self.cache, name, qtype) + end + function _M:get_last_type(name) + return get_last_type(self.cache, name) + end +end + + +return _M diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua new file mode 100644 index 000000000000..998d84a7cb44 --- /dev/null +++ b/kong/resty/dns_client/utils.lua @@ -0,0 +1,235 @@ +-- vim: ts=4 sts=4 sw=4 et: + + +local utils = require("kong.resty.dns.utils") +local pl_utils = require("pl.utils") + +local math_random = math.random +local table_insert = table.insert +local table_remove = table.remove + +local DEFAULT_HOSTS_FILE = "/etc/hosts" +local DEFAULT_RESOLV_CONF = "/etc/resolv.conf" + + +local _M = {} + + +-- checks the hostname type +-- @return "ipv4", "ipv6", or "name" +function _M.hostname_type(name) + local remainder, colons = name:gsub(":", "") + if colons > 1 then + return "ipv6" + end + if remainder:match("^[%d%.]+$") then + return "ipv4" + end + return "name" +end + + +-- parses a hostname with an optional port +-- IPv6 addresses are always returned in square brackets +-- @param name the string to check (this may contain a port number) +-- @return `name/ip` + `port (or nil)` + `type ("ipv4", "ipv6" or "name")` +function _M.parse_hostname(name) + local t = _M.hostname_type(name) + if t == "ipv4" or t == "name" then + local ip, port = name:match("^([^:]+)%:*(%d*)$") + return ip, tonumber(port), t + end + -- ipv6 + if name:match("%[") then -- brackets, so possibly a port + local ip, port = name:match("^%[([^%]]+)%]*%:*(%d*)$") + return "[" .. ip .. "]", tonumber(port), t + end + return "[" .. name .. "]", nil, t -- no brackets also means no port +end + + +local function get_lines(path) + if type(path) == "table" then + return path + end + return pl_utils.readlines(path or DEFAULT_HOSTS_FILE) +end + + +function _M.parse_hosts(path, enable_ipv6) + local lines, err = get_lines(path or DEFAULT_HOSTS_FILE) + if not lines then + return nil, err + end + + local hosts = {} + for _, line in ipairs(lines) do + -- Remove leading/trailing whitespaces and split by whitespace + local parts = {} + for part in line:gmatch("%S+") do + if part:sub(1, 1) == '#' then + break + end + table.insert(parts, part:lower()) + end + + -- Check if the line contains an IP address followed by hostnames + if #parts >= 2 then + local ip, _, family = _M.parse_hostname(parts[1]) + if family ~= "name" then -- ipv4/ipv6 + for i = 2, #parts do + local host = parts[i] + local v = hosts[host] + if not v then + v = {} + hosts[host] = v + end + v[family] = v[family] or ip -- prefer to use the first ip + end + end + end + end + return hosts +end + + +-- TODO: need to rewrite it instead of calling parseResolvConf +function _M.parse_resolv_conf(path, enable_ipv6) + local resolv, err = utils.parseResolvConf(path or DEFAULT_RESOLV_CONF) + if not resolv then + return nil, err + end + resolv = utils.applyEnv(resolv) + resolv.options = resolv.options or {} + resolv.ndots = resolv.options.ndots or 1 + resolv.search = resolv.search or (resolv.domain and { resolv.domain }) + -- remove special domain like "." + if resolv.search then + for i = #resolv.search, 1, -1 do + if resolv.search[i] == "." then + table_remove(resolv.search, i) + end + end + end + -- nameservers + if resolv.nameserver then + local nameservers = {} + for _, address in ipairs(resolv.nameserver) do + local ip, port, t = utils.parseHostname(address) + if t == "ipv4" or + (t == "ipv6" and not ip:find([[%]], nil, true) and enable_ipv6) + then + table_insert(nameservers, port and { ip, port } or ip) + end + end + resolv.nameservers = nameservers + end + return resolv +end + + +function _M.is_fqdn(name, ndots) + local _, dot_count = name:gsub("%.", "") + return (dot_count >= ndots) or (name:sub(-1) == ".") +end + + +-- construct names from resolv options: search, ndots and domain +function _M.search_names(name, resolv, hosts) + if not resolv.search or _M.is_fqdn(name, resolv.ndots) then + return { name } + end + + local names = {} + for _, suffix in ipairs(resolv.search) do + table_insert(names, name .. "." .. suffix) + end + if hosts and hosts[name] then + table_insert(names, 1, name) + else + table_insert(names, name) + end + return names +end + + +function _M.ipv6_bracket(name) + if name:match("^[^[].*:") then -- not rigorous, but sufficient + return "[" .. name .. "]" + end + return name +end + + +-- util APIs to balance @answers + +function _M.get_rr_ans(answers) + answers.last = (answers.last or 0) % #answers + 1 + return answers[answers.last] +end + + +-- based on the Nginx's SWRR algorithm and lua-resty-balancer +local function swrr_next(answers) + local total = 0 + local best = nil -- best answer in answers[] + + for _, answer in ipairs(answers) do + local w = (answer.weight == 0) and 0.1 or answer.weight -- rfc 2782 + local cw = answer.cw + w + answer.cw = cw + if not best or cw > best.cw then + best = answer + end + total = total + w + end + + best.cw = best.cw - total + return best +end + + +local function swrr_init(answers) + for _, answer in ipairs(answers) do + answer.cw = 0 -- current weight + end + -- random start + for _ = 1, math_random(#answers) do + swrr_next(answers) + end +end + + +-- gather all records with the lowest priority into one array (answers.l) +-- and return it +local function filter_lowest_priority_answers(answers) + local lowest_priority = answers[1].priority + local l = {} -- lowest priority list + + for _, answer in ipairs(answers) do + if answer.priority < lowest_priority then + lowest_priority = answer.priority + l = { answer } + elseif answer.priority == lowest_priority then + table.insert(l, answer) + end + end + + answers.l = l + return l +end + + +function _M.get_wrr_ans(answers) + local l = answers.l or filter_lowest_priority_answers(answers) + + -- perform round robin selection on lowest priority answers @l + if not l[1].cw then + swrr_init(l) + end + + return swrr_next(l) +end + + +return _M diff --git a/kong/templates/kong_defaults.lua b/kong/templates/kong_defaults.lua index ce532fd4b7ca..6a33c351d3a0 100644 --- a/kong/templates/kong_defaults.lua +++ b/kong/templates/kong_defaults.lua @@ -168,6 +168,7 @@ dns_cache_size = 10000 dns_not_found_ttl = 30 dns_error_ttl = 1 dns_no_sync = off +legacy_dns_client = off dedicated_config_processing = on worker_consistency = eventual diff --git a/spec/01-unit/09-balancer/01-generic_spec.lua b/spec/01-unit/09-balancer/01-generic_spec.lua index ec4c58f1c60c..4a8daddd1daf 100644 --- a/spec/01-unit/09-balancer/01-generic_spec.lua +++ b/spec/01-unit/09-balancer/01-generic_spec.lua @@ -1198,7 +1198,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro }, }, b:getStatus()) - dnsExpire(record) + dnsExpire(client, record) dnsSRV({ { name = "srvrecord.test", target = "1.1.1.1", port = 9000, weight = 20 }, { name = "srvrecord.test", target = "2.2.2.2", port = 9001, weight = 20 }, @@ -1382,7 +1382,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro }, b:getStatus()) -- update weight, through dns renewal - dnsExpire(record) + dnsExpire(client, record) dnsSRV({ { name = "srvrecord.test", target = "1.1.1.1", port = 9000, weight = 20 }, { name = "srvrecord.test", target = "2.2.2.2", port = 9001, weight = 20 }, @@ -1695,6 +1695,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro -- update DNS with a new backend IP -- balancer should now recover since a new healthy backend is available record.expire = 0 + dnsExpire(client, record) dnsA({ { name = "getkong.test", address = "5.6.7.8", ttl = 60 }, }) @@ -1820,7 +1821,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro { host = "notachanceinhell.this.name.exists.konghq.test", port = 4321, - dns = "dns server error: 3 name error", + dns = "no available records", nodeWeight = 100, weight = { total = 0, diff --git a/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua b/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua index 17f46f46fa5b..6d1fdc7b1737 100644 --- a/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua +++ b/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua @@ -21,6 +21,7 @@ local sleep = helpers.sleep local dnsSRV = function(...) return helpers.dnsSRV(client, ...) end local dnsA = function(...) return helpers.dnsA(client, ...) end local dnsAAAA = function(...) return helpers.dnsAAAA(client, ...) end +local dnsExpire = helpers.dnsExpire @@ -844,6 +845,7 @@ describe("[consistent_hashing]", function() -- expire the existing record record.expire = 0 record.expired = true + dnsExpire(client, record) -- do a lookup to trigger the async lookup client.resolve("really.really.really.does.not.exist.host.test", {qtype = client.TYPE_A}) sleep(1) -- provide time for async lookup to complete diff --git a/spec/01-unit/09-balancer/04-round_robin_spec.lua b/spec/01-unit/09-balancer/04-round_robin_spec.lua index 35f63f2c4522..427061bb8f83 100644 --- a/spec/01-unit/09-balancer/04-round_robin_spec.lua +++ b/spec/01-unit/09-balancer/04-round_robin_spec.lua @@ -19,6 +19,7 @@ local sleep = helpers.sleep local dnsSRV = function(...) return helpers.dnsSRV(client, ...) end local dnsA = function(...) return helpers.dnsA(client, ...) end local dnsAAAA = function(...) return helpers.dnsAAAA(client, ...) end +local dnsExpire = helpers.dnsExpire local unset_register = {} @@ -1039,6 +1040,7 @@ describe("[round robin balancer]", function() -- expire the existing record record.expire = 0 record.expired = true + dnsExpire(client, record) -- do a lookup to trigger the async lookup client.resolve("really.really.really.does.not.exist.hostname.test", {qtype = client.TYPE_A}) sleep(0.5) -- provide time for async lookup to complete @@ -1149,6 +1151,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + dnsExpire(client, record) dnsAAAA({ -- create a new record (identical) { name = "mashape.test", address = "::1" }, { name = "mashape.test", address = "::2" }, @@ -1282,6 +1285,7 @@ describe("[round robin balancer]", function() local test_name = "really.really.really.does.not.exist.hostname.test" local ttl = 0.1 local staleTtl = 0 -- stale ttl = 0, force lookup upon expiring + client.getobj().stale_ttl = 0 local record = dnsA({ { name = test_name, address = "1.2.3.4", ttl = ttl }, }, staleTtl) @@ -1304,6 +1308,7 @@ describe("[round robin balancer]", function() assert.is_nil(ip) assert.equal(port, "Balancer is unhealthy") end + client.getobj().stale_ttl = 4 end) it("renewed DNS A record; unhealthy entries remain unhealthy after renewal", function() local record = dnsA({ diff --git a/spec/01-unit/21-dns-client/02-client_spec.lua b/spec/01-unit/21-dns-client/02-client_spec.lua index acd597ec2ec2..bf97abbd171c 100644 --- a/spec/01-unit/21-dns-client/02-client_spec.lua +++ b/spec/01-unit/21-dns-client/02-client_spec.lua @@ -39,6 +39,7 @@ describe("[DNS client]", function() local client, resolver before_each(function() + _G.legacy_dns_client = true client = require("kong.resty.dns.client") resolver = require("resty.dns.resolver") diff --git a/spec/01-unit/21-dns-client/03-client_cache_spec.lua b/spec/01-unit/21-dns-client/03-client_cache_spec.lua index eb57d1ec2a24..c86cf57577d3 100644 --- a/spec/01-unit/21-dns-client/03-client_cache_spec.lua +++ b/spec/01-unit/21-dns-client/03-client_cache_spec.lua @@ -22,6 +22,7 @@ describe("[DNS client cache]", function() local client, resolver before_each(function() + _G.legacy_dns_client = true client = require("kong.resty.dns.client") resolver = require("resty.dns.resolver") diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua new file mode 100644 index 000000000000..2a4ead69bfbe --- /dev/null +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -0,0 +1,411 @@ +local utils = require "kong.resty.dns_client.utils" +local tempfilename = require("pl.path").tmpname +local writefile = require("pl.utils").writefile +local splitlines = require("pl.stringx").splitlines + +describe("[utils]", function () + + describe("is_fqdn(name, ndots)", function () + it("test @name: end with `.`", function () + assert.is_true(utils.is_fqdn("www.", 2)) + assert.is_true(utils.is_fqdn("www.example.", 3)) + assert.is_true(utils.is_fqdn("www.example.com.", 4)) + end) + + it("test @ndots", function () + assert.is_true(utils.is_fqdn("www", 0)) + + assert.is_false(utils.is_fqdn("www", 1)) + assert.is_true(utils.is_fqdn("www.example", 1)) + assert.is_true(utils.is_fqdn("www.example.com", 1)) + + assert.is_false(utils.is_fqdn("www", 2)) + assert.is_false(utils.is_fqdn("www.example", 2)) + assert.is_true(utils.is_fqdn("www.example.com", 2)) + assert.is_true(utils.is_fqdn("www1.www2.example.com", 2)) + end) + end) + + describe("search_names()", function () + it("empty resolv, not apply the search list", function () + local resolv = {} + local names = utils.search_names("www.example.com", resolv) + assert.same(names, { "www.example.com" }) + end) + + it("FQDN name: end with `.`, not apply the search list", function () + local names = utils.search_names("www.example.com.", { ndots = 1 }) + assert.same(names, { "www.example.com." }) + -- name with 3 dots, and ndots=4 > 3 + local names = utils.search_names("www.example.com.", { ndots = 4 }) + assert.same(names, { "www.example.com." }) + end) + + it("name dots number >= ndots, not apply the search list", function () + local resolv = { + ndots = 1, + search = { "example.net" }, + } + local names = utils.search_names("www.example.com", resolv) + assert.same(names, { "www.example.com" }) + + local names = utils.search_names("example.com", resolv) + assert.same(names, { "example.com" }) + end) + + it("name dots number <= ndots, apply the search list", function () + local resolv = { + ndots = 2, + search = { "example.net" }, + } + local names = utils.search_names("www", resolv) + assert.same(names, { "www.example.net", "www" }) + + local names = utils.search_names("www1.www2", resolv) + assert.same(names, { "www1.www2.example.net", "www1.www2" }) + + local names = utils.search_names("www1.www2.www3", resolv) + assert.same(names, { "www1.www2.www3" }) -- not apply + + local resolv = { + ndots = 2, + search = { "example.net", "example.com" }, + } + local names = utils.search_names("www", resolv) + assert.same(names, { "www.example.net", "www.example.com", "www" }) + + local names = utils.search_names("www1.www2", resolv) + assert.same(names, { "www1.www2.example.net", "www1.www2.example.com", "www1.www2" }) + + local names = utils.search_names("www1.www2.www3", resolv) + assert.same(names, { "www1.www2.www3" }) -- not apply + end) + end) + + describe("round robin getion", function () + + local function get_and_count(answers, n, get_ans) + local count = {} + for _ = 1, n do + local answer = get_ans(answers) + count[answer.target] = (count[answer.target] or 0) + 1 + end + return count + end + + it("rr", function () + local answers = { + { target = "1" }, -- 25% + { target = "2" }, -- 25% + { target = "3" }, -- 25% + { target = "4" }, -- 25% + } + local count = get_and_count(answers, 100, utils.get_rr_ans) + assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) + end) + + it("swrr", function () + -- simple one + local answers = { + { target = "w5-p10-a", weight = 5, priority = 10, }, -- hit 100% + } + local count = get_and_count(answers, 20, utils.get_wrr_ans) + assert.same(count, { ["w5-p10-a"] = 20 }) + + -- only get the lowest priority + local answers = { + { target = "w5-p10-a", weight = 5, priority = 10, }, -- hit 50% + { target = "w5-p20", weight = 5, priority = 20, }, -- hit 0% + { target = "w5-p10-b", weight = 5, priority = 10, }, -- hit 50% + { target = "w0-p10", weight = 0, priority = 10, }, -- hit 0% + } + local count = get_and_count(answers, 20, utils.get_wrr_ans) + assert.same(count, { ["w5-p10-a"] = 10, ["w5-p10-b"] = 10 }) + + -- weight: 6, 3, 1 + local answers = { + { target = "w6", weight = 6, priority = 10, }, -- hit 60% + { target = "w3", weight = 3, priority = 10, }, -- hit 30% + { target = "w1", weight = 1, priority = 10, }, -- hit 10% + } + local count = get_and_count(answers, 100 * 1000, utils.get_wrr_ans) + assert.same(count, { ["w6"] = 60000, ["w3"] = 30000, ["w1"] = 10000 }) + + -- random start + _G.math.native_randomseed(9975098) -- math.randomseed() ignores @seed + local answers1 = { + { target = "1", weight = 1, priority = 10, }, + { target = "2", weight = 1, priority = 10, }, + { target = "3", weight = 1, priority = 10, }, + { target = "4", weight = 1, priority = 10, }, + } + local answers2 = { + { target = "1", weight = 1, priority = 10, }, + { target = "2", weight = 1, priority = 10, }, + { target = "3", weight = 1, priority = 10, }, + { target = "4", weight = 1, priority = 10, }, + } + + local a1 = utils.get_wrr_ans(answers1) + local a2 = utils.get_wrr_ans(answers2) + assert.not_equal(a1.target, a2.target) + + -- weight 0 as 0.1 + local answers = { + { target = "w0", weight = 0, priority = 10, }, + { target = "w1", weight = 1, priority = 10, }, + { target = "w2", weight = 0, priority = 10, }, + { target = "w3", weight = 0, priority = 10, }, + } + local count = get_and_count(answers, 100, utils.get_wrr_ans) + assert.same(count, { ["w0"] = 7, ["w1"] = 77, ["w2"] = 8, ["w3"] = 8 }) + + -- weight 0 and lowest priority + local answers = { + { target = "w0-a", weight = 0, priority = 0, }, + { target = "w1", weight = 1, priority = 10, }, -- hit 0% + { target = "w0-b", weight = 0, priority = 0, }, + { target = "w0-c", weight = 0, priority = 0, }, + } + local count = get_and_count(answers, 100, utils.get_wrr_ans) + assert.same(count["w1"], nil) + + -- all weights are 0 + local answers = { + { target = "1", weight = 0, priority = 10, }, + { target = "2", weight = 0, priority = 10, }, + { target = "3", weight = 0, priority = 10, }, + { target = "4", weight = 0, priority = 10, }, + } + local count = get_and_count(answers, 100, utils.get_wrr_ans) + assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) + end) + end) + + describe("parsing 'resolv.conf':", function() + + -- override os.getenv to insert env variables + local old_getenv = os.getenv + local envvars -- whatever is in this table, gets served first + before_each(function() + envvars = {} + os.getenv = function(name) -- luacheck: ignore + return envvars[name] or old_getenv(name) + end + end) + + after_each(function() + os.getenv = old_getenv -- luacheck: ignore + envvars = nil + end) + + it("tests parsing when the 'resolv.conf' file does not exist", function() + local result, err = utils.parse_resolv_conf("non/existing/file") + assert.is.Nil(result) + assert.is.string(err) + end) + + it("tests parsing when the 'resolv.conf' file is empty", function() + local filename = tempfilename() + writefile(filename, "") + local resolv, err = utils.parse_resolv_conf(filename) + os.remove(filename) + assert.is.same({ ndots = 1, options = {} }, resolv) + assert.is.Nil(err) + end) + + it("tests parsing 'resolv.conf' with multiple comment types", function() + local file = splitlines( +[[# this is just a comment line +# at the top of the file + +domain myservice.com + +nameserver 198.51.100.0 +nameserver 2001:db8::1 ; and a comment here +nameserver 198.51.100.0:1234 ; this one has a port number (limited systems support this) +nameserver 1.2.3.4 ; this one is 4th, so should be ignored + +# search is commented out, test below for a mutually exclusive one +#search domaina.com domainb.com + +sortlist list1 list2 #list3 is not part of it + +options ndots:2 +options timeout:3 +options attempts:4 + +options debug +options rotate ; let's see about a comment here +options no-check-names +options inet6 +; here's annother comment +options ip6-bytestring +options ip6-dotint +options no-ip6-dotint +options edns0 +options single-request +options single-request-reopen +options no-tld-query +options use-vc +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.equal("myservice.com", resolv.domain) + assert.is.same({ "198.51.100.0", "2001:db8::1", "198.51.100.0:1234" }, resolv.nameserver) + assert.is.same({ "list1", "list2" }, resolv.sortlist) + assert.is.same({ ndots = 2, timeout = 3, attempts = 4, debug = true, rotate = true, + ["no-check-names"] = true, inet6 = true, ["ip6-bytestring"] = true, + ["ip6-dotint"] = nil, -- overridden by the next one, mutually exclusive + ["no-ip6-dotint"] = true, edns0 = true, ["single-request"] = true, + ["single-request-reopen"] = true, ["no-tld-query"] = true, ["use-vc"] = true}, + resolv.options) + end) + + it("tests parsing 'resolv.conf' with mutual exclusive domain vs search", function() + local file = splitlines( +[[domain myservice.com + +# search is overriding domain above +search domaina.com domainb.com + +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.Nil(resolv.domain) + assert.is.same({ "domaina.com", "domainb.com" }, resolv.search) + end) + + it("tests parsing 'resolv.conf' with max search entries MAXSEARCH", function() + local file = splitlines( +[[ + +search domain1.com domain2.com domain3.com domain4.com domain5.com domain6.com domain7.com + +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.Nil(resolv.domain) + assert.is.same({ + "domain1.com", + "domain2.com", + "domain3.com", + "domain4.com", + "domain5.com", + "domain6.com", + }, resolv.search) + end) + + it("tests parsing 'resolv.conf' with environment variables", function() + local file = splitlines( +[[# this is just a comment line +domain myservice.com + +nameserver 198.51.100.0 +nameserver 198.51.100.1 ; and a comment here + +options ndots:1 +]]) + envvars.LOCALDOMAIN = "domaina.com domainb.com" + envvars.RES_OPTIONS = "ndots:2 debug" + + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + + + assert.is.Nil(resolv.domain) -- must be nil, mutually exclusive + assert.is.same({ "domaina.com", "domainb.com" }, resolv.search) + + assert.is.same({ ndots = 2, debug = true }, resolv.options) + end) + + it("tests parsing 'resolv.conf' with non-existing environment variables", function() + local file = splitlines( +[[# this is just a comment line +domain myservice.com + +nameserver 198.51.100.0 +nameserver 198.51.100.1 ; and a comment here + +options ndots:2 +]]) + envvars.LOCALDOMAIN = "" + envvars.RES_OPTIONS = "" + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.equals("myservice.com", resolv.domain) -- must be nil, mutually exclusive + assert.is.same({ ndots = 2 }, resolv.options) + end) + + it("skip ipv6 nameservers with scopes", function() + local file = splitlines( +[[# this is just a comment line +nameserver [fe80::1%enp0s20f0u1u1] +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.same({}, resolv.nameservers) + end) + + end) + + describe("parsing 'hosts':", function() + + it("tests parsing when the 'hosts' file does not exist", function() + local result, err = utils.parse_hosts("non/existing/file") + assert.is.Nil(result) + assert.is.string(err) + end) + + it("tests parsing when the 'hosts' file is empty", function() + local filename = tempfilename() + writefile(filename, "") + local reverse = utils.parse_hosts(filename) + os.remove(filename) + assert.is.same({}, reverse) + end) + + it("tests parsing 'hosts'", function() + local hostsfile = splitlines( +[[# The localhost entry should be in every HOSTS file and is used +# to point back to yourself. + +127.0.0.1 # only ip address, this one will be ignored + +127.0.0.1 localhost +::1 localhost + +# My test server for the website + +192.168.1.2 test.computer.com + 192.168.1.3 ftp.COMPUTER.com alias1 alias2 +192.168.1.4 smtp.computer.com alias3 #alias4 +192.168.1.5 smtp.computer.com alias3 #doubles, first one should win + +#Blocking known malicious sites +127.0.0.1 admin.abcsearch.com +127.0.0.2 www3.abcsearch.com #[Browseraid] +127.0.0.3 www.abcsearch.com wwwsearch #[Restricted Zone site] + +[::1] alsolocalhost #support IPv6 in brackets +]]) + local reverse = utils.parse_hosts(hostsfile) + assert.is.equal("127.0.0.1", reverse.localhost.ipv4) + assert.is.equal("[::1]", reverse.localhost.ipv6) + + assert.is.equal("192.168.1.2", reverse["test.computer.com"].ipv4) + + assert.is.equal("192.168.1.3", reverse["ftp.computer.com"].ipv4) + assert.is.equal("192.168.1.3", reverse["alias1"].ipv4) + assert.is.equal("192.168.1.3", reverse["alias2"].ipv4) + + assert.is.equal("192.168.1.4", reverse["smtp.computer.com"].ipv4) + assert.is.equal("192.168.1.4", reverse["alias3"].ipv4) + + assert.is.equal("192.168.1.4", reverse["smtp.computer.com"].ipv4) -- .1.4; first one wins! + assert.is.equal("192.168.1.4", reverse["alias3"].ipv4) -- .1.4; first one wins! + + assert.is.equal("[::1]", reverse["alsolocalhost"].ipv6) + end) + end) +end) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua new file mode 100644 index 000000000000..9a8a72d59bbe --- /dev/null +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -0,0 +1,1500 @@ +-- This test case file originates from the old version of the DNS client and has +-- been modified to adapt to the new version of the DNS client. + +local _writefile = require("pl.utils").writefile +local tmpname = require("pl.path").tmpname +local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy + +-- hosted in Route53 in the AWS sandbox +local TEST_DOMAIN = "kong-gateway-testing.link" +local TEST_NS = "192.51.100.0" + +local TEST_NSS = { TEST_NS } + +local NOT_FOUND_ERROR = 'no available records' + +local function assert_same_answers(a1, a2) + a1 = cycle_aware_deep_copy(a1) + a1.ttl = nil + a1.expire = nil + + a2 = cycle_aware_deep_copy(a2) + a2.ttl = nil + a2.expire = nil + + assert.same(a1, a2) +end + +describe("[DNS client]", function() + + local resolver, client, query_func, old_udp, receive_func + + local resolv_path, hosts_path + + local function writefile(path, text) + _writefile(path, type(text) == "table" and table.concat(text, "\n") or text) + end + + local function client_new(opts) + opts = opts or {} + opts.resolv_conf = resolv_path + opts.hosts = hosts_path + opts.cache_purge = true + return client.new(opts) + end + + lazy_setup(function() + -- create temp resolv.conf and hosts + resolv_path = tmpname() + hosts_path = tmpname() + ngx.log(ngx.DEBUG, "create temp resolv.conf:", resolv_path, + " hosts:", hosts_path) + + -- hook sock:receive to do timeout test + old_udp = ngx.socket.udp + + _G.ngx.socket.udp = function (...) + local sock = old_udp(...) + + local old_receive = sock.receive + + sock.receive = function (...) + if receive_func then + receive_func(...) + end + return old_receive(...) + end + + return sock + end + + end) + + lazy_teardown(function() + if resolv_path then + os.remove(resolv_path) + end + if hosts_path then + os.remove(hosts_path) + end + + _G.ngx.socket.udp = old_udp + end) + + before_each(function() + -- inject r.query + package.loaded["resty.dns.resolver"] = nil + resolver = require("resty.dns.resolver") + + -- replace this `query_func` upvalue to spy on resolver query calls. + query_func = function(self, original_query_func, name, options) + return original_query_func(self, name, options) + end + + local old_new = resolver.new + resolver.new = function(...) + local r, err = old_new(...) + if not r then + return nil, err + end + local original_query_func = r.query + r.query = function(self, ...) + return query_func(self, original_query_func, ...) + end + return r + end + + -- restore its API overlapped by the compatible layer + package.loaded["kong.resty.dns_client"] = nil + client = require("kong.resty.dns_client") + client.resolve = client._resolve + end) + + after_each(function() + package.loaded["resty.dns.resolver"] = nil + resolver = nil + query_func = nil + + package.loaded["kong.resty.dns.client"] = nil + client = nil + + receive_func = nil + end) + + + describe("initialization", function() + + it("succeeds if hosts/resolv.conf fails", function() + local cli, err = client.new({ + nameservers = TEST_NSS, + hosts = "non/existent/file", + resolv_conf = "non/exitent/file", + }) + assert.is.Nil(err) + assert.same(cli.r_opts.nameservers, TEST_NSS) + end) + + describe("inject localhost", function() + + it("if absent", function() + writefile(resolv_path, "") + writefile(hosts_path, "") -- empty hosts + + local cli = assert(client_new()) + local answers = cli.cache:get("localhost:28") + assert.equal("[::1]", answers[1].address) + + answers = cli.cache:get("localhost:1") + assert.equal("127.0.0.1", answers[1].address) + + answers = cli:resolve("localhost") + assert.equal("127.0.0.1", answers[1].address) + end) + + it("not if ipv4 exists", function() + writefile(hosts_path, "1.2.3.4 localhost") + local cli = assert(client_new()) + + -- IPv6 is not defined + local answers = cli.cache:get("localhost:28") + assert.is_nil(answers) + + -- IPv4 is not overwritten + answers = cli.cache:get("localhost:1") + assert.equal("1.2.3.4", answers[1].address) + end) + + it("not if ipv6 exists", function() + writefile(hosts_path, "::1:2:3:4 localhost") + local cli = assert(client_new()) + + -- IPv6 is not overwritten + local answers = cli.cache:get("localhost:28") + assert.equal("[::1:2:3:4]", answers[1].address) + + -- IPv4 is not defined + answers = cli.cache:get("localhost:1") + assert.is_nil(answers) + end) + end) + end) + + + describe("iterating searches", function() + local function hook_query_func_get_list() + local list = {} + query_func = function(self, original_query_func, name, options) + table.insert(list, name .. ":" .. options.qtype) + return {} -- empty answers + end + return list + end + + describe("without type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("host") + + assert.same(answers, nil) + assert.same(err, "no available records") + assert.same({ + 'host.one.com:33', + 'host.two.com:33', + 'host:33', + 'host.one.com:1', + 'host.two.com:1', + 'host:1', + 'host.one.com:28', + 'host.two.com:28', + 'host:28', + 'host.one.com:5', + 'host.two.com:5', + 'host:5', + }, list) + end) + + it("works with a 'search .' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search .", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("host") + + assert.same(answers, nil) + assert.same(err, "no available records") + assert.same({ + 'host:33', + 'host:1', + 'host:28', + 'host:5', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("host") + + assert.same(answers, nil) + assert.same(err, "no available records") + assert.same({ + 'host.local.domain.com:33', + 'host:33', + 'host.local.domain.com:1', + 'host:1', + 'host.local.domain.com:28', + 'host:28', + 'host.local.domain.com:5', + 'host:5', + }, list) + end) + + it("handles last successful type", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:insert_last_type("host", resolver.TYPE_CNAME) + + cli:resolve("host") + + assert.same({ + 'host.one.com:5', + 'host.two.com:5', + 'host:5', + 'host.one.com:33', + 'host.two.com:33', + 'host:33', + 'host.one.com:1', + 'host.two.com:1', + 'host:1', + 'host.one.com:28', + 'host.two.com:28', + 'host:28', + }, list) + end) + end) + + describe("FQDN without type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("host.") + + assert.same({ + 'host.:33', + 'host.:1', + 'host.:28', + 'host.:5', + }, list) + end) + + it("works with a 'search .' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search .", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("host.") + + assert.same({ + 'host.:33', + 'host.:1', + 'host.:28', + 'host.:5', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("host.") + + assert.same({ + 'host.:33', + 'host.:1', + 'host.:28', + 'host.:5', + }, list) + end) + + it("handles last successful type", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:insert_last_type("host.", resolver.TYPE_CNAME) + + cli:resolve("host.") + assert.same({ + 'host.:5', + 'host.:33', + 'host.:1', + 'host.:28', + }, list) + end) + + end) + + describe("with type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + cli:resolve("host") + + assert.same({ + 'host.one.com:28', + 'host.two.com:28', + 'host:28', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + cli:resolve("host") + + assert.same({ + 'host.local.domain.com:28', + 'host:28', + }, list) + end) + + it("ignores last successful type", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + cli:insert_last_type("host", resolver.TYPE_CNAME) + + cli:resolve("host") + assert.same({ + 'host.one.com:28', + 'host.two.com:28', + 'host:28', + }, list) + end) + + end) + + describe("FQDN with type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + cli:resolve("host.") + assert.same({ + 'host.:28', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + cli:resolve("host.") + + assert.same({ + 'host.:28', + }, list) + end) + + it("ignores last successful type", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + cli:insert_last_type("host", resolver.TYPE_CNAME) + + cli:resolve("host.") + + assert.same({ + 'host.:28', + }, list) + end) + end) + + it("honours 'ndots'", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("local.host") + + assert.same({ + 'local.host:33', + 'local.host:1', + 'local.host:28', + 'local.host:5', + }, list) + end) + + it("hosts file always resolves first, overriding `ndots`", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.com two.com", + "options ndots:1", + }) + writefile(hosts_path, { + "127.0.0.1 host", + "::1 host", + }) + + local list = hook_query_func_get_list() + -- perferred IP type: IPv4 (A takes priority in order) + local cli = assert(client_new({ order = { "LAST", "SRV", "A", "AAAA" } })) + local answers = cli:resolve("host") + assert.same(answers[1].address, "127.0.0.1") + assert.same({}, list) -- hit on cache, so no query to the nameserver + + -- perferred IP type: IPv6 (AAAA takes priority in order) + local cli = assert(client_new({ order = { "LAST", "SRV", "AAAA", "A" } })) + local answers = cli:resolve("host") + assert.same(answers[1].address, "[::1]") + assert.same({}, list) + end) + end) + + -- This test will report an alert-level error message, ignore it. + it("low-level callback error", function() + receive_func = function(...) + error("CALLBACK") + end + + local cli = assert(client_new()) + + local orig_log = ngx.log + _G.ngx.log = function (...) end -- mute ALERT log + local answers, err = cli:resolve("srv.timeout.com") + _G.ngx.log = orig_log + assert.is_nil(answers) + assert.match("callback threw an error:.*CALLBACK", err) + end) + + describe("timeout", function () + it("dont try other types with the low-level error", function() + -- KAG-2300 https://github.com/Kong/kong/issues/10182 + -- When timed out, don't keep trying with other answers types. + writefile(resolv_path, { + "nameserver 198.51.100.0", + "options timeout:1", + "options attempts:3", + }) + + local query_count = 0 + query_func = function(self, original_query_func, name, options) + assert(options.qtype == resolver.TYPE_SRV) + query_count = query_count + 1 + return original_query_func(self, name, options) + end + + local receive_count = 0 + receive_func = function(...) + receive_count = receive_count + 1 + return nil, "timeout" + end + + local cli = assert(client_new()) + assert.same(cli.r_opts.retrans, 3) + assert.same(cli.r_opts.timeout, 1) + + local answers, err = cli:resolve("srv.timeout.com") + assert.is_nil(answers) + assert.match("DNS server error: failed to receive reply from UDP server .*: timeout", err) + assert.same(receive_count, 3) + assert.same(query_count, 1) + end) + + -- KAG-2300 - https://github.com/Kong/kong/issues/10182 + -- If we encounter a timeout while talking to the DNS server, + -- expect the total timeout to be close to timeout * attemps parameters + for _, attempts in ipairs({1, 2}) do + for _, timeout in ipairs({1, 2}) do + it("options: timeout: " .. timeout .. " seconds, attempts: " .. attempts .. " times", function() + query_func = function(self, original_query_func, name, options) + ngx.sleep(math.min(timeout, 5)) + return nil, "timeout" .. timeout .. attempts + end + writefile(resolv_path, { + "nameserver 198.51.100.0", + "options timeout:" .. timeout, + "options attempts:" .. attempts, + }) + local cli = assert(client_new()) + assert.same(cli.r_opts.retrans, attempts) + assert.same(cli.r_opts.timeout, timeout) + + local start_time = ngx.now() + local answers = cli:resolve("timeout.com") + assert.is.Nil(answers) + assert.is("DNS server error: timeout" .. timeout .. attempts) + local duration = ngx.now() - start_time + assert.truthy(duration < (timeout * attempts + 1)) + end) + end + end + end) + + it("fetching answers without nameservers errors", function() + writefile(resolv_path, "") + local host = TEST_DOMAIN + local typ = resolver.TYPE_A + + local cli = assert(client_new()) + local answers, err = cli:resolve(host, { qtype = typ }) + assert.is_nil(answers) + assert.same(err, "failed to instantiate the resolver: no nameservers specified") + end) + + it("fetching CNAME answers", function() + local host = "smtp."..TEST_DOMAIN + local typ = resolver.TYPE_CNAME + + local cli = assert(client_new({ nameservers = TEST_NSS })) + local answers = cli:resolve(host, { qtype = typ }) + + assert.are.equal(host, answers[1].name) + assert.are.equal(typ, answers[1].type) + assert.are.equal(#answers, 1) + end) + + it("fetching CNAME answers FQDN", function() + local host = "smtp."..TEST_DOMAIN + local typ = resolver.TYPE_CNAME + + local cli = assert(client_new({ nameservers = TEST_NSS })) + local answers = cli:resolve(host .. ".", { qtype = typ }) + + assert.are.equal(host, answers[1].name) -- answers name does not contain "." + assert.are.equal(typ, answers[1].type) + assert.are.equal(#answers, 1) + end) + + it("cache hit and ttl", function() + -- TOOD: The special 0-ttl record may cause this test failed + -- [{"name":"kong-gateway-testing.link","class":1,"address":"198.51.100.0", + -- "ttl":0,"type":1,"section":1}] + local host = TEST_DOMAIN + + local cli = assert(client_new({ nameservers = TEST_NSS })) + local answers = cli:resolve(host) + assert.are.equal(host, answers[1].name) + + local wait_time = 1 + ngx.sleep(wait_time) + + -- fetch again, now from cache + local answers2 = assert(cli:resolve(host)) + assert.are.equal(answers, answers2) -- same table from L1 cache + + local ttl, _, value = cli.cache:peek("fast:" .. host .. ":all") + assert.same(answers, value) + local ttl_diff = answers.ttl - ttl + assert(math.abs(ttl_diff - wait_time) < 1, + ("ttl diff:%s s should be near to %s s"):format(ttl_diff, wait_time)) + end) + + it("fetching names case insensitive", function() + query_func = function(self, original_query_func, name, options) + return {{ + name = "some.UPPER.case", + type = resolver.TYPE_A, + ttl = 30, + }} + end + local cli = assert(client_new({ nameservers = TEST_NSS })) + local answers = cli:resolve("some.upper.CASE") + + assert.equal(1, #answers) + assert.equal("some.upper.case", answers[1].name) + end) + + it("fetching multiple A answers", function() + local host = "atest."..TEST_DOMAIN + local cli = assert(client_new({ nameservers = TEST_NSS, order = {"LAST", "A"}})) + local answers = assert(cli:resolve(host)) + assert.are.equal(#answers, 2) + assert.are.equal(host, answers[1].name) + assert.are.equal(resolver.TYPE_A, answers[1].type) + assert.are.equal(host, answers[2].name) + assert.are.equal(resolver.TYPE_A, answers[2].type) + end) + + it("fetching multiple A answers FQDN", function() + local host = "atest."..TEST_DOMAIN + local cli = assert(client_new({ nameservers = TEST_NSS, order = {"LAST", "A"}})) + local answers = assert(cli:resolve(host .. ".")) + assert.are.equal(#answers, 2) + assert.are.equal(host, answers[1].name) + assert.are.equal(resolver.TYPE_A, answers[1].type) + assert.are.equal(host, answers[2].name) + assert.are.equal(resolver.TYPE_A, answers[2].type) + end) + + it("fetching A answers redirected through 2 CNAME answerss (un-typed)", function() + writefile(resolv_path, "") -- search {} empty + + local host = "smtp."..TEST_DOMAIN + + local cli = assert(client_new({ nameservers = TEST_NSS })) + local answers = assert(cli:resolve(host)) + + -- check first CNAME + local key1 = host .. ":" .. resolver.TYPE_CNAME + local entry1 = cli.cache:get(key1) + assert.same(nil, entry1) + + assert.same({ + ["kong-gateway-testing.link"] = { + miss = 1, + runs = 1, + succ = 1 + }, + ["kong-gateway-testing.link:1"] = { + query = 1, + query_succ = 1 + }, + ["kong-gateway-testing.link:33"] = { + query = 1, + ["query_err:empty record received"] = 1 + }, + ["smtp.kong-gateway-testing.link"] = { + cname = 1, + miss = 1, + runs = 1 + }, + ["smtp.kong-gateway-testing.link:33"] = { + query = 1, + query_succ = 1 + } + }, cli.stats) + + -- check last successful lookup references + local lastsuccess = cli:get_last_type(answers[1].name) + assert.are.equal(resolver.TYPE_A, lastsuccess) + end) + + it("fetching multiple SRV answerss (un-typed)", function() + local host = "srvtest."..TEST_DOMAIN + local typ = resolver.TYPE_SRV + + -- un-typed lookup + local cli = assert(client_new({ nameservers = TEST_NSS})) + local answers = assert(cli:resolve(host)) + assert.are.equal(host, answers[1].name) + assert.are.equal(typ, answers[1].type) + assert.are.equal(host, answers[2].name) + assert.are.equal(typ, answers[2].type) + assert.are.equal(host, answers[3].name) + assert.are.equal(typ, answers[3].type) + assert.are.equal(#answers, 3) + end) + + it("fetching multiple SRV answerss through CNAME (un-typed)", function() + writefile(resolv_path, "") -- search {} empty + local host = "cname2srv."..TEST_DOMAIN + local typ = resolver.TYPE_SRV + + -- un-typed lookup + local cli = assert(client_new({ nameservers = TEST_NSS})) + local answers = assert(cli:resolve(host)) + + -- first check CNAME + local key = host .. ":" .. resolver.TYPE_CNAME + local entry = cli.cache:get(key) + assert.same(nil, entry) + + assert.same({ + ["cname2srv.kong-gateway-testing.link"] = { + miss = 1, + runs = 1, + succ = 1 + }, + ["cname2srv.kong-gateway-testing.link:33"] = { + query = 1, + query_succ = 1 + } + }, cli.stats) + + -- check final target + assert.are.equal(typ, answers[1].type) + assert.are.equal(typ, answers[2].type) + assert.are.equal(typ, answers[3].type) + assert.are.equal(#answers, 3) + end) + + it("fetching non-type-matching answerss", function() + local host = "srvtest."..TEST_DOMAIN + local typ = resolver.TYPE_A --> the entry is SRV not A + + writefile(resolv_path, "") -- search {} empty + local cli = assert(client_new({ nameservers = TEST_NSS})) + local answers, err = cli:resolve(host, { qtype = typ }) + assert.is_nil(answers) -- returns nil + assert.same("no available records", err) + end) + + it("fetching non-existing answerss", function() + local host = "IsNotHere."..TEST_DOMAIN + + writefile(resolv_path, "") -- search {} empty + local cli = assert(client_new({ nameservers = TEST_NSS})) + local answers, err = cli:resolve(host) + assert.is_nil(answers) + assert.equal("no available records", err) + end) + + it("fetching IP address", function() + local cli = assert(client_new({ nameservers = TEST_NSS})) + + local host = "1.2.3.4" + local answers = cli:resolve(host) + assert.same(answers[1].address, host) + + local host = "[1:2::3:4]" + local answers = cli:resolve(host) + assert.same(answers[1].address, host) + + local host = "1:2::3:4" + local answers = cli:resolve(host) + assert.same(answers[1].address, "[" .. host .. "]") + + -- ignore ipv6 format error, it only check ':' + local host = "[invalid ipv6 address:::]" + local answers = cli:resolve(host) + assert.same(answers[1].address, host) + end) + + it("fetching IPv6 in an SRV answers adds brackets",function() + local host = "hello.world" + local address = "::1" + local entry = {{ + type = resolver.TYPE_SRV, + target = address, + port = 321, + weight = 10, + priority = 10, + class = 1, + name = host, + ttl = 10, + }} + + query_func = function(self, original_query_func, name, options) + if name == host and options.qtype == resolver.TYPE_SRV then + return entry + end + return original_query_func(self, name, options) + end + + local cli = assert(client_new({ nameservers = TEST_NSS})) + local answers = cli:resolve( host, { qtype = resolver.TYPE_SRV }) + assert.equal("["..address.."]", answers[1].target) + end) + + it("recursive lookups failure - single resolve", function() + query_func = function(self, original_query_func, name, opts) + if name ~= "hello.world" and (opts or {}).qtype ~= resolver.TYPE_CNAME then + return original_query_func(self, name, opts) + end + return {{ + type = resolver.TYPE_CNAME, + cname = "hello.world", + class = 1, + name = "hello.world", + ttl = 30, + }} + end + + local cli = assert(client_new({ nameservers = TEST_NSS})) + local answers, err, _ = cli:resolve("hello.world") + assert.is_nil(answers) + assert.are.equal("recursion detected for name: hello.world", err) + end) + + it("recursive lookups failure - single", function() + local entry1 = {{ + type = resolver.TYPE_CNAME, + cname = "hello.world", + class = 1, + name = "hello.world", + ttl = 0, + }} + + -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table + local cli = assert(client_new({ nameservers = TEST_NSS})) + -- insert in the cache + cli.cache:set(entry1[1].name .. ":" .. entry1[1].type, { ttl = 0 }, entry1) + local answers, err, _ = cli:resolve("hello.world", { cache_only = true }) + assert.is_nil(answers) + assert.are.equal("recursion detected for name: hello.world", err) + end) + + it("recursive lookups failure - multi", function() + local entry1 = {{ + type = resolver.TYPE_CNAME, + cname = "bye.bye.world", + class = 1, + name = "hello.world", + ttl = 0, + }} + local entry2 = {{ + type = resolver.TYPE_CNAME, + cname = "hello.world", + class = 1, + name = "bye.bye.world", + ttl = 0, + }} + + -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table + local cli = assert(client_new({ nameservers = TEST_NSS})) + -- insert in the cache + cli.cache:set(entry1[1].name .. ":" .. entry1[1].type, { ttl = 0 }, entry1) + cli.cache:set(entry2[1].name .. ":" .. entry2[1].type, { ttl = 0 }, entry2) + local answers, err, _ = cli:resolve("hello.world", { cache_only = true }) + assert.is_nil(answers) + assert.are.equal("recursion detected for name: hello.world", err) + end) + + it("resolving from the /etc/hosts file; preferred A or AAAA order", function() + writefile(hosts_path, { + "127.3.2.1 localhost", + "1::2 localhost", + }) + local cli = assert(client_new({ + nameservers = TEST_NSS, + order = {"SRV", "CNAME", "A", "AAAA"} + })) + assert.equal(resolver.TYPE_A, cli:get_last_type("localhost")) -- success set to A as it is the preferred option + + local cli = assert(client_new({ + nameservers = TEST_NSS, + order = {"SRV", "CNAME", "AAAA", "A"} + })) + assert.equal(resolver.TYPE_AAAA, cli:get_last_type("localhost")) -- success set to AAAA as it is the preferred option + end) + + + it("resolving from the /etc/hosts file", function() + writefile(hosts_path, { + "127.3.2.1 localhost", + "1::2 localhost", + "123.123.123.123 mashape", + "1234::1234 kong.for.president", + }) + + local cli = assert(client_new({ nameservers = TEST_NSS })) + + local answers, err = cli:resolve("localhost", {qtype = resolver.TYPE_A}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "127.3.2.1") + + answers, err = cli:resolve("localhost", {qtype = resolver.TYPE_AAAA}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "[1::2]") + + answers, err = cli:resolve("mashape", {qtype = resolver.TYPE_A}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "123.123.123.123") + + answers, err = cli:resolve("kong.for.president", {qtype = resolver.TYPE_AAAA}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "[1234::1234]") + end) + + describe("toip() function", function() + it("A/AAAA-answers, round-robin",function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local host = "atest."..TEST_DOMAIN + local answers = assert(cli:resolve(host)) + answers.last = nil -- make sure to clean + local ips = {} + for _,answers in ipairs(answers) do ips[answers.address] = true end + local order = {} + for n = 1, #answers do + local ip = cli:resolve(host, { return_random = true }) + ips[ip] = nil + order[n] = ip + end + -- this table should be empty again + assert.is_nil(next(ips)) + -- do again, and check same order + for n = 1, #order do + local ip = cli:resolve(host, { return_random = true }) + assert.same(order[n], ip) + end + end) + it("SRV-answers, round-robin on lowest prio",function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local host = "hello.world.test" + local entry = { + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 8000, + weight = 5, + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 8001, + weight = 5, + priority = 20, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 8002, + weight = 5, + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + } + -- insert in the cache + cli.cache:set(entry[1].name .. ":" .. entry[1].type, {ttl=0}, entry) + + local results = {} + for _ = 1,20 do + local _, port = cli:resolve(host, { return_random = true }) + results[port] = (results[port] or 0) + 1 + end + + -- 20 passes, each should get 10 + assert.equal(0, results[8001] or 0) --priority 20, no hits + assert.equal(10, results[8000] or 0) --priority 10, 50% of hits + assert.equal(10, results[8002] or 0) --priority 10, 50% of hits + end) + it("SRV-answers with 1 entry, round-robin",function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local host = "hello.world" + local entry = {{ + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 321, + weight = 10, + priority = 10, + class = 1, + name = host, + ttl = 10, + }} + -- insert in the cache + cli.cache:set(entry[1].name .. ":" .. entry[1].type, { ttl=0 }, entry) + + -- repeated lookups, as the first will simply serve the first entry + -- and the only second will setup the round-robin scheme, this is + -- specific for the SRV answers type, due to the weights + for _ = 1 , 10 do + local ip, port = cli:resolve(host, { return_random = true }) + assert.same("1.2.3.4", ip) + assert.same(321, port) + end + end) + it("SRV-answers with 0-weight, round-robin",function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local host = "hello.world" + local entry = { + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 321, + weight = 0, --> weight 0 + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.5", + port = 321, + weight = 50, --> weight 50 + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.6", + port = 321, + weight = 50, --> weight 50 + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + } + -- insert in the cache + cli.cache:set(entry[1].name .. ":" .. entry[1].type, { ttl = 0 }, entry) + + -- weight 0 will be weight 1, without any reduction in weight + -- of the other ones. + local track = {} + for _ = 1 , 2002 do --> run around twice + local ip, _ = assert(cli:resolve(host, { return_random = true })) + track[ip] = (track[ip] or 0) + 1 + end + assert.equal(1000, track["1.2.3.5"]) + assert.equal(1000, track["1.2.3.6"]) + assert.equal(2, track["1.2.3.4"]) + end) + it("port passing",function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local entry_a = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "a.answers.test", + ttl = 10, + }} + local entry_srv = {{ + type = resolver.TYPE_SRV, + target = "a.answers.test", + port = 8001, + weight = 5, + priority = 20, + class = 1, + name = "srv.answers.test", + ttl = 10, + }} + -- insert in the cache + cli.cache:set(entry_a[1].name..":"..entry_a[1].type, { ttl = 0 }, entry_a) + cli.cache:set(entry_srv[1].name..":"..entry_srv[1].type, { ttl = 0 }, entry_srv) + local ip, port + local host = "a.answers.test" + ip,port = cli:resolve(host, { return_random = true }) + assert.is_string(ip) + assert.is_nil(port) + + ip, port = cli:resolve(host, { return_random = true, port = 1234 }) + assert.is_string(ip) + assert.equal(1234, port) + + host = "srv.answers.test" + ip, port = cli:resolve(host, { return_random = true }) + assert.is_string(ip) + assert.is_number(port) + + ip, port = cli:resolve(host, { return_random = true, port = 0 }) + assert.is_string(ip) + assert.is_number(port) + assert.is_not.equal(0, port) + end) + + it("port passing if SRV port=0",function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local ip, port, host + + host = "srvport0."..TEST_DOMAIN + ip, port = cli:resolve(host, { return_random = true, port = 10 }) + assert.is_string(ip) + assert.is_number(port) + assert.is_equal(10, port) + + ip, port = cli:resolve(host, { return_random = true }) + assert.is_string(ip) + assert.is_nil(port) + end) + + it("recursive SRV pointing to itself",function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local answers, port, host, err, _ + host = "srvrecurse."..TEST_DOMAIN + + -- resolve SRV specific should return the answers including its + -- recursive entry + answers, err, _ = cli:resolve(host, { qtype = resolver.TYPE_SRV }) + assert.is_table(answers) + assert.equal(1, #answers) + assert.equal(host, answers[1].target) + assert.equal(host, answers[1].name) + assert.is_nil(err) + + -- default order, SRV, A; the recursive SRV answers fails, and it falls + -- back to the IP4 address + _, port, _ = cli:resolve(host, { return_random = true }) + assert.same(port, "recursion detected for name: srvrecurse.kong-gateway-testing.link") + end) + + it("resolving in correct answers-type order",function() + local function config(cli) + -- function to insert 2 answerss in the cache + local A_entry = {{ + type = resolver.TYPE_A, + address = "5.6.7.8", + class = 1, + name = "hello.world", + ttl = 10, + }} + local AAAA_entry = {{ + type = resolver.TYPE_AAAA, + address = "::1", + class = 1, + name = "hello.world", + ttl = 10, + }} + -- insert in the cache + cli.cache:set(A_entry[1].name..":"..A_entry[1].type, { ttl=0 }, A_entry) + cli.cache:set(AAAA_entry[1].name..":"..AAAA_entry[1].type, { ttl=0 }, AAAA_entry) + end + local cli = assert(client_new({ nameservers = TEST_NSS, order = {"AAAA", "A"} })) + config(cli) + local ip,err = cli:resolve("hello.world", { return_random = true }) + assert.same(err, nil) + assert.equals(ip, "::1") + local cli = assert(client_new({ nameservers = TEST_NSS, order = {"A", "AAAA"}})) + config(cli) + ip = cli:resolve("hello.world", { return_random = true }) + assert.equals(ip, "5.6.7.8") + end) + it("handling of empty responses", function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local empty_entry = { + touch = 0, + expire = 0, + } + -- insert in the cache + cli.cache[resolver.TYPE_A..":".."hello.world"] = empty_entry + + -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table + local ip, port = cli:resolve("hello.world", { return_random = true, port = 123, cache_only = true }) + assert.is_nil(ip) + assert.is.string(port) -- error message + end) + it("recursive lookups failure", function() + local cli = assert(client_new({ nameservers = TEST_NSS })) + local entry1 = {{ + type = resolver.TYPE_CNAME, + cname = "bye.bye.world", + class = 1, + name = "hello.world", + ttl = 10, + }} + local entry2 = {{ + type = resolver.TYPE_CNAME, + cname = "hello.world", + class = 1, + name = "bye.bye.world", + ttl = 10, + }} + -- insert in the cache + cli.cache:set(entry1[1].name..":"..entry1[1].type, { ttl = 0 }, entry1) + cli.cache:set(entry2[1].name..":"..entry2[1].type, { ttl = 0 }, entry2) + + -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table + local ip, port, _ = cli:resolve("hello.world", { return_random = true, port = 123, cache_only = true }) + assert.is_nil(ip) + assert.are.equal("recursion detected for name: hello.world", port) + end) + end) + + it("verifies valid_ttl", function() + local valid_ttl = 0.1 + local empty_ttl = 0.1 + local stale_ttl = 0.1 + local qname = "konghq.com" + local cli = assert(client_new({ + nameservers = TEST_NSS, + empty_ttl = empty_ttl, + stale_ttl = stale_ttl, + valid_ttl = valid_ttl, + })) + -- mock query function to return a default answers + query_func = function(self, original_query_func, name, options) + return {{ + type = resolver.TYPE_A, + address = "5.6.7.8", + class = 1, + name = qname, + ttl = 10, + }} -- will add new field .ttl = valid_ttl + end + + local answers, _, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.equal(valid_ttl, answers.ttl) + + local ttl = cli.cache:peek("fast:" .. qname .. ":1") + assert.is_near(valid_ttl, ttl, 0.1) + end) + + it("verifies ttl and caching of empty responses and name errors", function() + --empty/error responses should be cached for a configurable time + local empty_ttl = 0.1 + local stale_ttl = 0.1 + local qname = "really.really.really.does.not.exist."..TEST_DOMAIN + local cli = assert(client_new({ + nameservers = TEST_NSS, + empty_ttl = empty_ttl, + stale_ttl = stale_ttl, + })) + + -- mock query function to count calls + local call_count = 0 + query_func = function(self, original_query_func, name, options) + call_count = call_count + 1 + return original_query_func(self, name, options) + end + + -- make a first request, populating the cache + local answers1, answers2, err1, err2, _ + answers1, err1, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers1) + assert.are.equal(1, call_count) + assert.are.equal(NOT_FOUND_ERROR, err1) + answers1, err1 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) + assert.is_nil(answers1) + assert.is_nil(err1) -- nil, nil for cache miss + + -- make a second request, result from cache, still called only once + answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(1, call_count) + assert.are.equal(NOT_FOUND_ERROR, err2) + answers2, err2 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) + assert.is_nil(answers2) + assert.is_nil(err2) -- nil, nil for cache miss + + -- wait for expiry of _ttl and retry, still called only once + ngx.sleep(empty_ttl+0.5 * stale_ttl) + + -- we cant start stale-updating task for cache missed empty answers + answers2, err2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(NOT_FOUND_ERROR, err2) + assert.are.equal(2, call_count) + + answers2, err2 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) + assert.is_nil(answers2) + assert.is_nil(err2) -- nil, nil for cache miss + + -- wait for expiry of stale_ttl and retry, should be called twice now + ngx.sleep(0.75 * stale_ttl) + assert.are.equal(2, call_count) + answers2, err2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(NOT_FOUND_ERROR, err2) + assert.are.equal(2, call_count) + + answers2, err2 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) + assert.is_nil(answers2) + assert.is_nil(err2) -- nil, nil for cache miss + end) + + it("verifies ttl and caching of (other) dns errors", function() + --empty responses should be cached for a configurable time + local error_ttl = 0.1 + local stale_ttl = 0.1 + local qname = "realname.com" + local cli = assert(client_new({ + nameservers = TEST_NSS, + error_ttl = error_ttl, + stale_ttl = stale_ttl, + })) + + -- mock query function to count calls, and return errors + local call_count = 0 + query_func = function(self, original_query_func, name, options) + call_count = call_count + 1 + return { errcode = 5, errstr = "refused" } + end + + -- initial request to populate the cache + local answers1, answers2, err1, err2, _ + answers1, err1, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers1) + assert.are.equal(call_count, 1) + assert.are.equal("no available records", err1) + answers1 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + + -- try again, HIT from cache, not stale + answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(call_count, 1) + assert.are.equal(err1, err2) + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.are.equal(answers1, answers2) + assert.falsy(answers1.expired) + + -- wait for expiry of ttl and retry, HIT and stale + ngx.sleep(error_ttl + 0.5 * stale_ttl) + answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(call_count, 1) + assert.are.equal(err1, err2) + + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.is_true(answers2.expired) + answers2.expired = nil -- clear to be same with answers1 + assert_same_answers(answers1, answers2) + answers2.expired = true + + -- async stale updating task + ngx.sleep(0.1 * stale_ttl) + assert.are.equal(call_count, 2) + + -- wait for expiry of stale_ttl and retry, 2 calls, new result + ngx.sleep(0.75 * stale_ttl) + assert.are.equal(call_count, 2) + + answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(call_count, 3) + assert.are.equal(err1, err2) + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.are_not.equal(answers1, answers2) -- a new answers + assert.falsy(answers2.expired) + end) + + describe("verifies the polling of dns queries, retries, and wait times", function() + local function threads_resolve(nthreads, name, cli) + cli = cli or assert(client_new({ nameservers = TEST_NSS })) + -- we're going to schedule a whole bunch of queries (lookup & stores answers) + local coros = {} + local answers_list = {} + for _ = 1, nthreads do + local co = ngx.thread.spawn(function () + coroutine.yield(coroutine.running()) + local answers, err = cli:resolve(name, { qtype = resolver.TYPE_A }) + table.insert(answers_list, (answers or err)) + end) + table.insert(coros, co) + end + for _, co in ipairs(coros) do + ngx.thread.wait(co) + end + return answers_list + end + + it("simultaneous lookups are synchronized to 1 lookup", function() + local call_count = 0 + query_func = function(self, original_query_func, name, options) + call_count = call_count + 1 + ngx.sleep(0.5) -- block all other threads + return original_query_func(self, name, options) + end + + local answers_list = threads_resolve(10, TEST_DOMAIN) + + assert(call_count == 1) + for _, answers in ipairs(answers_list) do + assert.same(answers_list[1], answers) + end + end) + + it("timeout while waiting", function() + + local ip = "1.4.2.3" + local timeout = 500 -- ms + local name = TEST_DOMAIN + -- insert a stub thats waits and returns a fixed answers + query_func = function() + -- `+ 2` s ensures that the resty-lock expires + ngx.sleep(timeout / 1000 + 2) + return {{ + type = resolver.TYPE_A, + address = ip, + class = 1, + name = name, + ttl = 10, + }} + end + + local cli = assert(client_new({ + nameservers = TEST_NSS, + timeout = timeout, + retrans = 1, + })) + local answers_list = threads_resolve(10, name, cli) + + -- answers[1~9] are equal, as they all will wait for the first response + for i = 1, 9 do + assert.equal("could not acquire callback lock: timeout", answers_list[i]) + end + -- answers[10] comes from synchronous DNS access of the first request + assert.equal(ip, answers_list[10][1]["address"]) + end) + end) + +end) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua new file mode 100644 index 000000000000..ce82118dc759 --- /dev/null +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -0,0 +1,674 @@ +-- This test case file originates from the old version of the DNS client and has -- been modified to adapt to the new version of the DNS client. + +local utils = require("kong.tools.utils") +local _writefile = require("pl.utils").writefile +local tmpname = require("pl.path").tmpname +local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy + +-- hosted in Route53 in the AWS sandbox +local TEST_NS = "198.51.100.0" + +local TEST_NSS = { TEST_NS } + +local gettime = ngx.now +local sleep = ngx.sleep + +local function assert_same_answers(a1, a2) + a1 = cycle_aware_deep_copy(a1) + a1.ttl = nil + a1.expire = nil + + a2 = cycle_aware_deep_copy(a2) + a2.ttl = nil + a2.expire = nil + + assert.same(a1, a2) +end + +describe("[DNS client cache]", function() + local resolver, client, query_func, old_udp, receive_func + + local resolv_path, hosts_path + + local function writefile(path, text) + _writefile(path, type(text) == "table" and table.concat(text, "\n") or text) + end + + local function client_new(opts) + opts = opts or {} + opts.resolv_conf = resolv_path + opts.hosts = hosts_path + opts.nameservers = opts.nameservers or TEST_NSS + opts.cache_purge = true + return client.new(opts) + end + + lazy_setup(function() + -- create temp resolv.conf and hosts + resolv_path = tmpname() + hosts_path = tmpname() + ngx.log(ngx.DEBUG, "create temp resolv.conf:", resolv_path, + " hosts:", hosts_path) + + -- hook sock:receive to do timeout test + old_udp = ngx.socket.udp + + _G.ngx.socket.udp = function (...) + local sock = old_udp(...) + + local old_receive = sock.receive + + sock.receive = function (...) + if receive_func then + receive_func(...) + end + return old_receive(...) + end + + return sock + end + + end) + + lazy_teardown(function() + if resolv_path then + os.remove(resolv_path) + end + if hosts_path then + os.remove(hosts_path) + end + + _G.ngx.socket.udp = old_udp + end) + + before_each(function() + -- inject r.query + package.loaded["resty.dns.resolver"] = nil + resolver = require("resty.dns.resolver") + + -- replace this `query_func` upvalue to spy on resolver query calls. + query_func = function(self, original_query_func, name, options) + return original_query_func(self, name, options) + end + + local old_new = resolver.new + resolver.new = function(...) + local r, err = old_new(...) + if not r then + return nil, err + end + local original_query_func = r.query + r.query = function(self, ...) + return query_func(self, original_query_func, ...) + end + return r + end + + -- restore its API overlapped by the compatible layer + package.loaded["kong.resty.dns_client"] = nil + client = require("kong.resty.dns_client") + client.resolve = client._resolve + end) + + after_each(function() + package.loaded["resty.dns.resolver"] = nil + resolver = nil + query_func = nil + + package.loaded["kong.resty.dns.client"] = nil + client = nil + + receive_func = nil + end) + + describe("shortnames caching", function() + + local cli, mock_records, config + before_each(function() + writefile(resolv_path, "search domain.com") + config = { + nameservers = { "198.51.100.0" }, + ndots = 1, + search = { "domain.com" }, + hosts = {}, + order = { "LAST", "SRV", "A", "AAAA", "CNAME" }, + error_ttl = 0.5, + stale_ttl = 0.5, + enable_ipv6 = false, + } + cli = assert(client_new(config)) + + query_func = function(self, original_query_func, qname, opts) + return mock_records[qname..":"..opts.qtype] or { errcode = 3, errstr = "name error" } + end + end) + + it("are stored in cache without type", function() + mock_records = { + ["myhost1.domain.com:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost1.domain.com", + ttl = 30, + }} + } + + local answers = cli:resolve("myhost1") + assert.equal(answers, cli.cache:get("fast:myhost1:all")) + end) + + it("are stored in cache with type", function() + mock_records = { + ["myhost2.domain.com:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost2.domain.com", + ttl = 30, + }} + } + + local answers = cli:resolve("myhost2", { qtype = resolver.TYPE_A }) + assert.equal(answers, cli.cache:get("fast:myhost2:" .. resolver.TYPE_A)) + end) + + it("are resolved from cache without type", function() + mock_records = {} + cli.cache:set("fast:myhost3:all", {ttl=30+4}, {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost3.domain.com", + ttl = 30, + }, + ttl = 30, + expire = gettime() + 30, + }) + + local answers = cli:resolve("myhost3") + assert.same(answers, cli.cache:get("fast:myhost3:all")) + end) + + it("are resolved from cache with type", function() + mock_records = {} + local cli = client_new() + cli.cache:set("fast:myhost4:" .. resolver.TYPE_A, {ttl=30+4}, {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost4.domain.com", + ttl = 30, + }, + ttl = 30, + expire = gettime() + 30, + }) + + local answers = cli:resolve("myhost4", { qtype = resolver.TYPE_A }) + assert.equal(answers, cli.cache:get("fast:myhost4:" .. resolver.TYPE_A)) + end) + + it("of dereferenced CNAME are stored in cache", function() + mock_records = { + ["myhost5.domain.com:"..resolver.TYPE_CNAME] = {{ + type = resolver.TYPE_CNAME, + class = 1, + name = "myhost5.domain.com", + cname = "mytarget.domain.com", + ttl = 30, + }}, + ["mytarget.domain.com:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "mytarget.domain.com", + ttl = 30, + }} + } + local answers = cli:resolve("myhost5") + assert_same_answers(mock_records["mytarget.domain.com:"..resolver.TYPE_A], answers) -- not the test, intermediate validation + + -- the type un-specificc query was the CNAME, so that should be in the + -- shorname cache + answers = cli.cache:get("fast:myhost5:all") + assert_same_answers(mock_records["myhost5.domain.com:"..resolver.TYPE_CNAME], answers) + end) + + it("ttl in cache is honored for short name entries", function() + -- in the short name case the same record is inserted again in the cache + -- and the lru-ttl has to be calculated, make sure it is correct + mock_records = { + ["myhost6.domain.com:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost6.domain.com", + ttl = 0.1, + }} + } + local mock_copy = utils.cycle_aware_deep_copy(mock_records) + + -- resolve and check whether we got the mocked record + local answers = cli:resolve("myhost6") + assert_same_answers(answers, mock_records["myhost6.domain.com:"..resolver.TYPE_A]) + + -- replace our mocked list with the copy made (new table, so no equality) + mock_records = mock_copy + + -- wait for expiring + sleep(0.1 + config.stale_ttl / 2) + + -- fresh result, but it should not affect answers2 + mock_records["myhost6.domain.com:"..resolver.TYPE_A][1].tag = "new" -- TODO flakyness + + -- resolve again, now getting same record, but stale, this will trigger + -- background refresh query + local answers2 = cli:resolve("myhost6") + assert.is_true(answers2.expired) -- stale; marked as expired + answers2.expired = nil + assert_same_answers(answers2, answers) + answers2.expired = true + + -- wait for refresh to complete + sleep(0.1) + + -- resolve and check whether we got the new record from the mock copy + local answers3 = cli:resolve("myhost6") + assert.not_equal(answers, answers3) -- must be a different record now + assert_same_answers(answers3, mock_records["myhost6.domain.com:"..resolver.TYPE_A]) + + -- the 'answers3' resolve call above will also trigger a new background query + -- (because the sleep of 0.1 equals the records ttl of 0.1) + -- so let's yield to activate that background thread now. If not done so, + -- the `after_each` will clear `query_func` and an error will appear on the + -- next test after this one that will yield. + sleep(0.1) + end) + + it("errors are not stored", function() + local rec = { + errcode = 4, + errstr = "server failure", + } + mock_records = { + ["myhost7.domain.com:"..resolver.TYPE_A] = rec, + ["myhost7:"..resolver.TYPE_A] = rec, + } + + local answers, err = cli:resolve("myhost7", { qtype = resolver.TYPE_A }) + assert.is_nil(answers) + -- TODO: check tries for detailed error + --assert.equal("dns server error: 4 server failure", err) + assert.equal("no available records", err) + assert.is_nil(cli.cache:get("fast:myhost7:" .. resolver.TYPE_A)) + end) + + it("name errors are not stored", function() + local rec = { + errcode = 3, + errstr = "name error", + } + mock_records = { + ["myhost8.domain.com:"..resolver.TYPE_A] = rec, + ["myhost8:"..resolver.TYPE_A] = rec, + } + + local answers, err = cli:resolve("myhost8", { qtype = resolver.TYPE_A }) + assert.is_nil(answers) + -- TODO + --assert.equal("dns server error: 3 name error", err) + assert.equal("no available records", err) + assert.is_nil(cli.cache:get("fast:myhost8:" .. resolver.TYPE_A)) + end) + + end) + + + describe("fqdn caching", function() + + local cli, mock_records, config + before_each(function() + writefile(resolv_path, "search domain.com") + config = { + nameservers = { "198.51.100.0" }, + ndots = 1, + search = { "domain.com" }, + hosts = {}, + resolvConf = {}, + order = { "LAST", "SRV", "A", "AAAA", "CNAME" }, + error_ttl = 0.5, + stale_ttl = 0.5, + enable_ipv6 = false, + } + cli = assert(client_new(config)) + + query_func = function(self, original_query_func, qname, opts) + return mock_records[qname..":"..opts.qtype] or { errcode = 3, errstr = "name error" } + end + end) + + it("errors do not replace stale records", function() + local rec1 = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost9.domain.com", + ttl = 0.1, + }} + mock_records = { + ["myhost9.domain.com:"..resolver.TYPE_A] = rec1, + } + + local answers, err = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_nil(err) + -- check that the cache is properly populated + assert_same_answers(rec1, answers) + answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + assert_same_answers(rec1, answers) + + sleep(0.15) -- make sure we surpass the ttl of 0.1 of the record, so it is now stale. + -- new mock records, such that we return server failures installed of records + local rec2 = { + errcode = 4, + errstr = "server failure", + } + mock_records = { + ["myhost9.domain.com:"..resolver.TYPE_A] = rec2, + ["myhost9:"..resolver.TYPE_A] = rec2, + } + -- doing a resolve will trigger the background query now + answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_true(answers.expired) -- we get the stale record, now marked as expired + -- wait again for the background query to complete + sleep(0.1) + -- background resolve is now complete, check the cache, it should still have the + -- stale record, and it should not have been replaced by the error + -- + answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + assert.is_true(answers.expired) + answers.expired = nil + assert_same_answers(rec1, answers) + end) + + it("name errors do replace stale records", function() + local rec1 = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost9.domain.com", + ttl = 0.1, + }} + mock_records = { + ["myhost9.domain.com:"..resolver.TYPE_A] = rec1, + } + + local answers, err = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_nil(err) + -- check that the cache is properly populated + assert_same_answers(rec1, answers) + answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + assert_same_answers(rec1, answers) + + sleep(0.15) -- make sure we surpass the ttl of 0.1 of the record, so it is now stale. + -- clear mock records, such that we return name errors instead of records + local rec2 = { + errcode = 3, + errstr = "name error", + } + mock_records = { + ["myhost9.domain.com:"..resolver.TYPE_A] = rec2, + ["myhost9:"..resolver.TYPE_A] = rec2, + } + -- doing a resolve will trigger the background query now + answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_true(answers.expired) -- we get the stale record, now marked as expired + -- wait again for the background query to complete + sleep(0.1) + -- background resolve is now complete, check the cache, it should now have been + -- replaced by the name error + assert.equal(rec2, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) + end) + + it("empty records do not replace stale records", function() + local rec1 = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost9.domain.com", + ttl = 0.1, + }} + mock_records = { + ["myhost9.domain.com:"..resolver.TYPE_A] = rec1, + } + + local answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + -- check that the cache is properly populated + assert_same_answers(rec1, answers) + assert_same_answers(rec1, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) + + sleep(0.15) -- stale + -- clear mock records, such that we return name errors instead of records + local rec2 = {} + mock_records = { + ["myhost9.domain.com:"..resolver.TYPE_A] = rec2, + ["myhost9:"..resolver.TYPE_A] = rec2, + } + -- doing a resolve will trigger the background query now + answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_true(answers.expired) -- we get the stale record, now marked as expired + -- wait again for the background query to complete + sleep(0.1) + -- background resolve is now complete, check the cache, it should still have the + -- stale record, and it should not have been replaced by the empty record + answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + assert.is_true(answers.expired) -- we get the stale record, now marked as expired + answers.expired = nil + assert_same_answers(rec1, answers) + end) + + it("AS records do replace stale records", function() + -- when the additional section provides recordds, they should be stored + -- in the cache, as in some cases lookups of certain types (eg. CNAME) are + -- blocked, and then we rely on the A record to get them in the AS + -- (additional section), but then they must be stored obviously. + local CNAME1 = { + type = resolver.TYPE_CNAME, + cname = "myotherhost.domain.com", + class = 1, + name = "myhost9.domain.com", + ttl = 0.1, + } + local A2 = { + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myotherhost.domain.com", + ttl = 60, + } + mock_records = setmetatable({ + ["myhost9.domain.com:"..resolver.TYPE_CNAME] = { utils.cycle_aware_deep_copy(CNAME1) }, -- copy to make it different + ["myhost9.domain.com:"..resolver.TYPE_A] = { CNAME1, A2 }, -- not there, just a reference and target + ["myotherhost.domain.com:"..resolver.TYPE_A] = { A2 }, + }, { + -- do not do lookups, return empty on anything else + __index = function(self, key) + --print("looking for ",key) + return {} + end, + }) + + assert(cli:resolve("myhost9", { qtype = resolver.TYPE_CNAME })) + ngx.sleep(0.2) -- wait for it to become stale + assert(cli:resolve("myhost9"), { return_random = true }) + + local cached = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_CNAME) + assert.same(nil, cached) + end) + + end) + +-- ============================================== +-- success type caching +-- ============================================== + + + describe("success types", function() + + local cli + local mock_records + before_each(function() + writefile(resolv_path, "search domain.com") + local config = { + ndots = 1, + search = { "domain.com" }, + hosts = {}, + resolvConf = {}, + order = { "LAST", "SRV", "A", "AAAA", "CNAME" }, + error_ttl = 0.5, + stale_ttl = 0.5, + enable_ipv6 = false, + } + cli = assert(client_new(config)) + + query_func = function(self, original_query_func, qname, opts) + return mock_records[qname..":"..opts.qtype] or { errcode = 3, errstr = "name error" } + end + end) + + it("in add. section are not stored for non-listed types", function() + mock_records = { + ["demo.service.consul:" .. resolver.TYPE_SRV] = { + { + type = resolver.TYPE_SRV, + class = 1, + name = "demo.service.consul", + target = "192.168.5.232.node.api_test.consul", + priority = 1, + weight = 1, + port = 32776, + ttl = 0, + }, { + type = resolver.TYPE_TXT, -- Not in the `order` as configured ! + class = 1, + name = "192.168.5.232.node.api_test.consul", + txt = "consul-network-segment=", + ttl = 0, + }, + } + } + cli:resolve("demo.service.consul", { return_random = true }) + local success = cli.cache:get("192.168.5.232.node.api_test.consul") + assert.not_equal(resolver.TYPE_TXT, success) + end) + + it("in add. section are stored for listed types", function() + mock_records = { + ["demo.service.consul:" .. resolver.TYPE_SRV] = { + { + type = resolver.TYPE_SRV, + class = 1, + name = "demo.service.consul", + target = "192.168.5.232.node.api_test.consul", + priority = 1, + weight = 1, + port = 32776, + ttl = 0, + }, { + type = resolver.TYPE_A, -- In configured `order` ! + class = 1, + name = "192.168.5.232.node.api_test.consul", + address = "192.168.5.232", + ttl = 0, + }, { + type = resolver.TYPE_TXT, -- Not in the `order` as configured ! + class = 1, + name = "192.168.5.232.node.api_test.consul", + txt = "consul-network-segment=", + ttl = 0, + }, + } + } + local _, err, tries = cli:resolve("demo.service.consul", { return_random = true }) + assert.same(err, "no available records") + assert.same({ + { + "192.168.5.232.node.api_test.consul", + 33, + "DNS server replied error: name error" + }, + { + "192.168.5.232.node.api_test.consul", + 1, + "DNS server replied error: name error" + }, + { + "192.168.5.232.node.api_test.consul", + 28, + "DNS server replied error: name error" + }, + { + "192.168.5.232.node.api_test.consul", + 5, + "DNS server replied error: name error" + } + }, tries) + end) + + it("are not overwritten by add. section info", function() + mock_records = { + ["demo.service.consul:" .. resolver.TYPE_SRV] = { + { + type = resolver.TYPE_SRV, + class = 1, + name = "demo.service.consul", + target = "192.168.5.232.node.api_test.consul", + priority = 1, + weight = 1, + port = 32776, + ttl = 0, + }, { + type = resolver.TYPE_A, -- In configured `order` ! + class = 1, + name = "another.name.consul", + address = "192.168.5.232", + ttl = 0, + }, + } + } + cli:insert_last_type("another.name.consul", resolver.TYPE_AAAA) + cli:resolve("demo.service.consul", { return_random = true }) + local success = cli:get_last_type("another.name.consul") + assert.equal(resolver.TYPE_AAAA, success) + end) + + end) + + + describe("hosts entries", function() + -- hosts file names are cached for 10 years, verify that + -- it is not overwritten with valid_ttl settings. + -- Regressions reported in https://github.com/Kong/kong/issues/7444 + local cli, mock_records, config -- luacheck: ignore + writefile(resolv_path, "") + writefile(hosts_path, "127.0.0.1 myname.lan") + before_each(function() + config = { + nameservers = { "198.51.100.0" }, + --hosts = {"127.0.0.1 myname.lan"}, + --resolvConf = {}, + valid_ttl = 0.1, + stale_ttl = 0, + } + + cli = assert(client_new(config)) + end) + + it("entries from hosts file ignores valid_ttl overrides, Kong/kong #7444", function() + ngx.sleep(0.2) -- must be > valid_ttl + stale_ttl + + local record = cli.cache:get("myname.lan:1") + assert.equal("127.0.0.1", record[1].address) + end) + end) + +end) diff --git a/spec/fixtures/shared_dict.lua b/spec/fixtures/shared_dict.lua index c552376ecaff..7e6c350fea03 100644 --- a/spec/fixtures/shared_dict.lua +++ b/spec/fixtures/shared_dict.lua @@ -13,6 +13,9 @@ local dicts = { "kong_db_cache_2 16m", "kong_db_cache_miss 12m", "kong_db_cache_miss_2 12m", + "kong_dns_cache 10m", + "kong_dns_cache_miss 10m", + "kong_dns_cache_ipc 5m", "kong_mock_upstream_loggers 10m", "kong_secrets 5m", "test_vault 5m", diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 4f8bf45333ec..1ff5771519c3 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -37,7 +37,10 @@ end --- Expires a record now. -- @param record a DNS record previously created -function _M.dnsExpire(record) +function _M.dnsExpire(client, record) + local dnscache = client.getcache() + dnscache:set(record[1].name .. ":" .. record[1].type, nil) + dnscache:set("fast:" .. record[1].name .. ":" .. "all", nil) record.expire = gettime() - 1 end @@ -76,12 +79,13 @@ function _M.dnsSRV(client, records, staleTtl) -- set timeouts records.touch = gettime() records.expire = gettime() + records[1].ttl + records.ttl = records[1].ttl -- create key, and insert it - local key = records[1].type..":"..records[1].name + local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) -- insert last-succesful lookup type - dnscache:set(records[1].name, records[1].type) + client.getobj():insert_last_type(records[1].name, records[1].type) return records end @@ -117,12 +121,14 @@ function _M.dnsA(client, records, staleTtl) -- set timeouts records.touch = gettime() records.expire = gettime() + records[1].ttl + records.ttl = records[1].ttl -- create key, and insert it - local key = records[1].type..":"..records[1].name - dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) + local key = records[1].name..":"..records[1].type + --dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) + dnscache:set(key, records, records[1].ttl) -- insert last-succesful lookup type - dnscache:set(records[1].name, records[1].type) + client.getobj():insert_last_type(records[1].name, records[1].type) return records end @@ -157,12 +163,13 @@ function _M.dnsAAAA(client, records, staleTtl) -- set timeouts records.touch = gettime() records.expire = gettime() + records[1].ttl + records.ttl = records[1].ttl -- create key, and insert it - local key = records[1].type..":"..records[1].name + local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) -- insert last-succesful lookup type - dnscache:set(records[1].name, records[1].type) + client.getobj():insert_last_type(records[1].name, records[1].type) return records end From 6d29383a0f750a8b397dac092c58ed3476268217 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 21 Feb 2024 18:09:29 +0800 Subject: [PATCH 002/126] add files to kong-3.7.0-0.rockspec --- kong-3.8.0-0.rockspec | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kong-3.8.0-0.rockspec b/kong-3.8.0-0.rockspec index 22e1a2b937e7..b0df906afd4e 100644 --- a/kong-3.8.0-0.rockspec +++ b/kong-3.8.0-0.rockspec @@ -115,6 +115,10 @@ build = { ["kong.resty.dns.client"] = "kong/resty/dns/client.lua", ["kong.resty.dns.utils"] = "kong/resty/dns/utils.lua", + + ["kong.resty.dns_client"] = "kong/resty/dns_client/init.lua", + ["kong.resty.dns_client.utils"] = "kong/resty/dns_client/utils.lua", + ["kong.resty.ctx"] = "kong/resty/ctx.lua", ["kong.resty.mlcache"] = "kong/resty/mlcache/init.lua", From 8d1e4663ffbacb047a1f79485894723dfa935c48 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 21 Feb 2024 23:29:53 +0800 Subject: [PATCH 003/126] 30-new-dns-client/02-old_client_spec.lua: use CI nameserver instead --- .../30-new-dns-client/02-old_client_spec.lua | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 9a8a72d59bbe..955e90e4709a 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -37,7 +37,7 @@ describe("[DNS client]", function() local function client_new(opts) opts = opts or {} - opts.resolv_conf = resolv_path + opts.resolv_conf = opts.resolv_conf or resolv_path opts.hosts = hosts_path opts.cache_purge = true return client.new(opts) @@ -624,7 +624,7 @@ describe("[DNS client]", function() local host = "smtp."..TEST_DOMAIN local typ = resolver.TYPE_CNAME - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local answers = cli:resolve(host, { qtype = typ }) assert.are.equal(host, answers[1].name) @@ -636,7 +636,7 @@ describe("[DNS client]", function() local host = "smtp."..TEST_DOMAIN local typ = resolver.TYPE_CNAME - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local answers = cli:resolve(host .. ".", { qtype = typ }) assert.are.equal(host, answers[1].name) -- answers name does not contain "." @@ -650,7 +650,7 @@ describe("[DNS client]", function() -- "ttl":0,"type":1,"section":1}] local host = TEST_DOMAIN - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local answers = cli:resolve(host) assert.are.equal(host, answers[1].name) @@ -676,7 +676,7 @@ describe("[DNS client]", function() ttl = 30, }} end - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local answers = cli:resolve("some.upper.CASE") assert.equal(1, #answers) @@ -685,7 +685,7 @@ describe("[DNS client]", function() it("fetching multiple A answers", function() local host = "atest."..TEST_DOMAIN - local cli = assert(client_new({ nameservers = TEST_NSS, order = {"LAST", "A"}})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"LAST", "A"}})) local answers = assert(cli:resolve(host)) assert.are.equal(#answers, 2) assert.are.equal(host, answers[1].name) @@ -696,7 +696,7 @@ describe("[DNS client]", function() it("fetching multiple A answers FQDN", function() local host = "atest."..TEST_DOMAIN - local cli = assert(client_new({ nameservers = TEST_NSS, order = {"LAST", "A"}})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"LAST", "A"}})) local answers = assert(cli:resolve(host .. ".")) assert.are.equal(#answers, 2) assert.are.equal(host, answers[1].name) @@ -710,7 +710,7 @@ describe("[DNS client]", function() local host = "smtp."..TEST_DOMAIN - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers = assert(cli:resolve(host)) -- check first CNAME @@ -753,7 +753,7 @@ describe("[DNS client]", function() local typ = resolver.TYPE_SRV -- un-typed lookup - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers = assert(cli:resolve(host)) assert.are.equal(host, answers[1].name) assert.are.equal(typ, answers[1].type) @@ -770,7 +770,7 @@ describe("[DNS client]", function() local typ = resolver.TYPE_SRV -- un-typed lookup - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers = assert(cli:resolve(host)) -- first check CNAME @@ -802,7 +802,7 @@ describe("[DNS client]", function() local typ = resolver.TYPE_A --> the entry is SRV not A writefile(resolv_path, "") -- search {} empty - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers, err = cli:resolve(host, { qtype = typ }) assert.is_nil(answers) -- returns nil assert.same("no available records", err) @@ -812,14 +812,14 @@ describe("[DNS client]", function() local host = "IsNotHere."..TEST_DOMAIN writefile(resolv_path, "") -- search {} empty - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers, err = cli:resolve(host) assert.is_nil(answers) assert.equal("no available records", err) end) it("fetching IP address", function() - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local host = "1.2.3.4" local answers = cli:resolve(host) @@ -860,7 +860,7 @@ describe("[DNS client]", function() return original_query_func(self, name, options) end - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers = cli:resolve( host, { qtype = resolver.TYPE_SRV }) assert.equal("["..address.."]", answers[1].target) end) @@ -879,7 +879,7 @@ describe("[DNS client]", function() }} end - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local answers, err, _ = cli:resolve("hello.world") assert.is_nil(answers) assert.are.equal("recursion detected for name: hello.world", err) @@ -895,7 +895,7 @@ describe("[DNS client]", function() }} -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) -- insert in the cache cli.cache:set(entry1[1].name .. ":" .. entry1[1].type, { ttl = 0 }, entry1) local answers, err, _ = cli:resolve("hello.world", { cache_only = true }) @@ -920,7 +920,7 @@ describe("[DNS client]", function() }} -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table - local cli = assert(client_new({ nameservers = TEST_NSS})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) -- insert in the cache cli.cache:set(entry1[1].name .. ":" .. entry1[1].type, { ttl = 0 }, entry1) cli.cache:set(entry2[1].name .. ":" .. entry2[1].type, { ttl = 0 }, entry2) @@ -935,13 +935,13 @@ describe("[DNS client]", function() "1::2 localhost", }) local cli = assert(client_new({ - nameservers = TEST_NSS, + resolv_conf = "/etc/resolv.conf", order = {"SRV", "CNAME", "A", "AAAA"} })) assert.equal(resolver.TYPE_A, cli:get_last_type("localhost")) -- success set to A as it is the preferred option local cli = assert(client_new({ - nameservers = TEST_NSS, + resolv_conf = "/etc/resolv.conf", order = {"SRV", "CNAME", "AAAA", "A"} })) assert.equal(resolver.TYPE_AAAA, cli:get_last_type("localhost")) -- success set to AAAA as it is the preferred option @@ -977,7 +977,7 @@ describe("[DNS client]", function() describe("toip() function", function() it("A/AAAA-answers, round-robin",function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local host = "atest."..TEST_DOMAIN local answers = assert(cli:resolve(host)) answers.last = nil -- make sure to clean @@ -998,7 +998,7 @@ describe("[DNS client]", function() end end) it("SRV-answers, round-robin on lowest prio",function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local host = "hello.world.test" local entry = { { @@ -1047,7 +1047,7 @@ describe("[DNS client]", function() assert.equal(10, results[8002] or 0) --priority 10, 50% of hits end) it("SRV-answers with 1 entry, round-robin",function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local host = "hello.world" local entry = {{ type = resolver.TYPE_SRV, @@ -1072,7 +1072,7 @@ describe("[DNS client]", function() end end) it("SRV-answers with 0-weight, round-robin",function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local host = "hello.world" local entry = { { @@ -1121,7 +1121,7 @@ describe("[DNS client]", function() assert.equal(2, track["1.2.3.4"]) end) it("port passing",function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local entry_a = {{ type = resolver.TYPE_A, address = "1.2.3.4", @@ -1164,7 +1164,7 @@ describe("[DNS client]", function() end) it("port passing if SRV port=0",function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local ip, port, host host = "srvport0."..TEST_DOMAIN @@ -1179,7 +1179,7 @@ describe("[DNS client]", function() end) it("recursive SRV pointing to itself",function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers, port, host, err, _ host = "srvrecurse."..TEST_DOMAIN @@ -1219,18 +1219,18 @@ describe("[DNS client]", function() cli.cache:set(A_entry[1].name..":"..A_entry[1].type, { ttl=0 }, A_entry) cli.cache:set(AAAA_entry[1].name..":"..AAAA_entry[1].type, { ttl=0 }, AAAA_entry) end - local cli = assert(client_new({ nameservers = TEST_NSS, order = {"AAAA", "A"} })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"AAAA", "A"} })) config(cli) local ip,err = cli:resolve("hello.world", { return_random = true }) assert.same(err, nil) assert.equals(ip, "::1") - local cli = assert(client_new({ nameservers = TEST_NSS, order = {"A", "AAAA"}})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"A", "AAAA"} })) config(cli) ip = cli:resolve("hello.world", { return_random = true }) assert.equals(ip, "5.6.7.8") end) it("handling of empty responses", function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local empty_entry = { touch = 0, expire = 0, @@ -1244,7 +1244,7 @@ describe("[DNS client]", function() assert.is.string(port) -- error message end) it("recursive lookups failure", function() - local cli = assert(client_new({ nameservers = TEST_NSS })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local entry1 = {{ type = resolver.TYPE_CNAME, cname = "bye.bye.world", @@ -1276,7 +1276,7 @@ describe("[DNS client]", function() local stale_ttl = 0.1 local qname = "konghq.com" local cli = assert(client_new({ - nameservers = TEST_NSS, + resolv_conf = "/etc/resolv.conf", empty_ttl = empty_ttl, stale_ttl = stale_ttl, valid_ttl = valid_ttl, @@ -1305,7 +1305,7 @@ describe("[DNS client]", function() local stale_ttl = 0.1 local qname = "really.really.really.does.not.exist."..TEST_DOMAIN local cli = assert(client_new({ - nameservers = TEST_NSS, + resolv_conf = "/etc/resolv.conf", empty_ttl = empty_ttl, stale_ttl = stale_ttl, })) @@ -1368,7 +1368,7 @@ describe("[DNS client]", function() local stale_ttl = 0.1 local qname = "realname.com" local cli = assert(client_new({ - nameservers = TEST_NSS, + resolv_conf = "/etc/resolv.conf", error_ttl = error_ttl, stale_ttl = stale_ttl, })) @@ -1429,7 +1429,7 @@ describe("[DNS client]", function() describe("verifies the polling of dns queries, retries, and wait times", function() local function threads_resolve(nthreads, name, cli) - cli = cli or assert(client_new({ nameservers = TEST_NSS })) + cli = cli or assert(client_new({ resolv_conf = "/etc/resolv.conf" })) -- we're going to schedule a whole bunch of queries (lookup & stores answers) local coros = {} local answers_list = {} @@ -1482,7 +1482,7 @@ describe("[DNS client]", function() end local cli = assert(client_new({ - nameservers = TEST_NSS, + resolv_conf = "/etc/resolv.conf", timeout = timeout, retrans = 1, })) From bf40be81038ffe9f13a8316de299a2b67447aeb9 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 22 Feb 2024 00:31:22 +0800 Subject: [PATCH 004/126] return last answer error if no available answers --- kong/resty/dns_client/init.lua | 9 +++++---- kong/runloop/balancer/init.lua | 1 + spec/01-unit/09-balancer/01-generic_spec.lua | 2 +- .../30-new-dns-client/02-old_client_spec.lua | 16 ++++++++-------- .../03-old_client_cache_spec.lua | 18 +++++++----------- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 08828c6fde41..1dcb93b4b3fc 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -51,7 +51,7 @@ local hitstrs = { local errstrs = { -- client specific errors [100] = "cache only lookup failed", - [101] = "empty record received", + [101] = "no available records", } local EMPTY_ANSWERS = { errcode = 3, errstr = "name error" } @@ -427,7 +427,7 @@ local function resolve_name_type(self, name, qtype, opts, tries) end if err or answers.errcode then - err = err or "DNS server replied error: " .. answers.errstr + err = err or ("dns server error: %s %s"):format(answers.errcode, answers.errstr) table_insert(tries, { name, qtype, err }) end @@ -478,9 +478,10 @@ local function resolve_names_and_types(self, name, opts, tries) local types = get_search_types(self, name, opts.qtype) local names = utils.search_names(name, self.resolv, self.hosts) + local err for _, qtype in ipairs(types) do for _, qname in ipairs(names) do - local answers, err = resolve_name_type(self, qname, qtype, opts, tries) + answers, err = resolve_name_type(self, qname, qtype, opts, tries) -- severe error occurred if not answers then @@ -495,7 +496,7 @@ local function resolve_names_and_types(self, name, opts, tries) end -- not found in the search iteration - return nil, "no available records", tries + return nil, err, tries end diff --git a/kong/runloop/balancer/init.lua b/kong/runloop/balancer/init.lua index 550c1055d84e..94caf967dfaf 100644 --- a/kong/runloop/balancer/init.lua +++ b/kong/runloop/balancer/init.lua @@ -371,6 +371,7 @@ local function execute(balancer_data, ctx) if not ip then log(ERR, "DNS resolution failed: ", port, ". Tried: ", tostring(try_list)) if port == "dns server error: 3 name error" or + port == "dns server error: 101 no available records" or port == "dns client error: 101 empty record received" then return nil, "name resolution failed", 503 end diff --git a/spec/01-unit/09-balancer/01-generic_spec.lua b/spec/01-unit/09-balancer/01-generic_spec.lua index 4a8daddd1daf..dc7bf33a940f 100644 --- a/spec/01-unit/09-balancer/01-generic_spec.lua +++ b/spec/01-unit/09-balancer/01-generic_spec.lua @@ -1821,7 +1821,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro { host = "notachanceinhell.this.name.exists.konghq.test", port = 4321, - dns = "no available records", + dns = "dns server error: 3 name error", nodeWeight = 100, weight = { total = 0, diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 955e90e4709a..cc8c417d031f 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -11,7 +11,7 @@ local TEST_NS = "192.51.100.0" local TEST_NSS = { TEST_NS } -local NOT_FOUND_ERROR = 'no available records' +local NOT_FOUND_ERROR = 'dns server error: 3 name error' local function assert_same_answers(a1, a2) a1 = cycle_aware_deep_copy(a1) @@ -203,7 +203,7 @@ describe("[DNS client]", function() local answers, err = cli:resolve("host") assert.same(answers, nil) - assert.same(err, "no available records") + assert.same(err, "dns server error: 101 no available records") assert.same({ 'host.one.com:33', 'host.two.com:33', @@ -232,7 +232,7 @@ describe("[DNS client]", function() local answers, err = cli:resolve("host") assert.same(answers, nil) - assert.same(err, "no available records") + assert.same(err, "dns server error: 101 no available records") assert.same({ 'host:33', 'host:1', @@ -253,7 +253,7 @@ describe("[DNS client]", function() local answers, err = cli:resolve("host") assert.same(answers, nil) - assert.same(err, "no available records") + assert.same(err, "dns server error: 101 no available records") assert.same({ 'host.local.domain.com:33', 'host:33', @@ -730,7 +730,7 @@ describe("[DNS client]", function() }, ["kong-gateway-testing.link:33"] = { query = 1, - ["query_err:empty record received"] = 1 + ["query_err:no available records"] = 1 }, ["smtp.kong-gateway-testing.link"] = { cname = 1, @@ -805,7 +805,7 @@ describe("[DNS client]", function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers, err = cli:resolve(host, { qtype = typ }) assert.is_nil(answers) -- returns nil - assert.same("no available records", err) + assert.equal("dns server error: 101 no available records", err) end) it("fetching non-existing answerss", function() @@ -815,7 +815,7 @@ describe("[DNS client]", function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers, err = cli:resolve(host) assert.is_nil(answers) - assert.equal("no available records", err) + assert.equal("dns server error: 3 name error", err) end) it("fetching IP address", function() @@ -1385,7 +1385,7 @@ describe("[DNS client]", function() answers1, err1, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.is_nil(answers1) assert.are.equal(call_count, 1) - assert.are.equal("no available records", err1) + assert.are.equal("dns server error: 5 refused", err1) answers1 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) -- try again, HIT from cache, not stale diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index ce82118dc759..5c3e31ce37c8 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -297,9 +297,7 @@ describe("[DNS client cache]", function() local answers, err = cli:resolve("myhost7", { qtype = resolver.TYPE_A }) assert.is_nil(answers) - -- TODO: check tries for detailed error - --assert.equal("dns server error: 4 server failure", err) - assert.equal("no available records", err) + assert.equal("dns server error: 4 server failure", err) assert.is_nil(cli.cache:get("fast:myhost7:" .. resolver.TYPE_A)) end) @@ -315,9 +313,7 @@ describe("[DNS client cache]", function() local answers, err = cli:resolve("myhost8", { qtype = resolver.TYPE_A }) assert.is_nil(answers) - -- TODO - --assert.equal("dns server error: 3 name error", err) - assert.equal("no available records", err) + assert.equal("dns server error: 3 name error", err) assert.is_nil(cli.cache:get("fast:myhost8:" .. resolver.TYPE_A)) end) @@ -589,27 +585,27 @@ describe("[DNS client cache]", function() } } local _, err, tries = cli:resolve("demo.service.consul", { return_random = true }) - assert.same(err, "no available records") + assert.same(err, "dns server error: 3 name error") assert.same({ { "192.168.5.232.node.api_test.consul", 33, - "DNS server replied error: name error" + "dns server error: 3 name error", }, { "192.168.5.232.node.api_test.consul", 1, - "DNS server replied error: name error" + "dns server error: 3 name error", }, { "192.168.5.232.node.api_test.consul", 28, - "DNS server replied error: name error" + "dns server error: 3 name error", }, { "192.168.5.232.node.api_test.consul", 5, - "DNS server replied error: name error" + "dns server error: 3 name error", } }, tries) end) From a46b3c748aff2ab9dacd0fab7181aebb05cf9f75 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 22 Feb 2024 10:59:13 +0800 Subject: [PATCH 005/126] set _G.busted_legacy_dns_client for original 21-dns-client/ tests --- kong/resty/dns/client.lua | 14 +++++++------- spec/01-unit/21-dns-client/02-client_spec.lua | 3 ++- .../01-unit/21-dns-client/03-client_cache_spec.lua | 3 ++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/kong/resty/dns/client.lua b/kong/resty/dns/client.lua index 57735597b34c..9121e4c17426 100644 --- a/kong/resty/dns/client.lua +++ b/kong/resty/dns/client.lua @@ -1,3 +1,10 @@ +-- Use the new dns client library instead. If you want to switch to the original +-- one, you can set `legacy_dns_client = on` in kong.conf. +if ngx.shared.kong_dns_cache and not _G.busted_legacy_dns_client then + package.loaded["kong.resty.dns_client"] = nil + return require("kong.resty.dns_client") +end + -------------------------------------------------------------------------- -- DNS client. -- @@ -19,13 +26,6 @@ -- @author Thijs Schreijer -- @license Apache 2.0 --- Use the new dns client library instead. If you want to switch to the original --- one, you can set `legacy_dns_client = on` in kong.conf. -if ngx.shared.kong_dns_cache and not _G.legacy_dns_client then - package.loaded["kong.resty.dns_client"] = nil - return require("kong.resty.dns_client") -end - local _ local utils = require("kong.resty.dns.utils") local fileexists = require("pl.path").exists diff --git a/spec/01-unit/21-dns-client/02-client_spec.lua b/spec/01-unit/21-dns-client/02-client_spec.lua index bf97abbd171c..e5a88c8e8d9c 100644 --- a/spec/01-unit/21-dns-client/02-client_spec.lua +++ b/spec/01-unit/21-dns-client/02-client_spec.lua @@ -39,7 +39,7 @@ describe("[DNS client]", function() local client, resolver before_each(function() - _G.legacy_dns_client = true + _G.busted_legacy_dns_client = true client = require("kong.resty.dns.client") resolver = require("resty.dns.resolver") @@ -72,6 +72,7 @@ describe("[DNS client]", function() end) after_each(function() + _G.busted_legacy_dns_client = nil package.loaded["kong.resty.dns.client"] = nil package.loaded["resty.dns.resolver"] = nil client = nil diff --git a/spec/01-unit/21-dns-client/03-client_cache_spec.lua b/spec/01-unit/21-dns-client/03-client_cache_spec.lua index c86cf57577d3..448bd8b8a923 100644 --- a/spec/01-unit/21-dns-client/03-client_cache_spec.lua +++ b/spec/01-unit/21-dns-client/03-client_cache_spec.lua @@ -22,7 +22,7 @@ describe("[DNS client cache]", function() local client, resolver before_each(function() - _G.legacy_dns_client = true + _G.busted_legacy_dns_client = true client = require("kong.resty.dns.client") resolver = require("resty.dns.resolver") @@ -56,6 +56,7 @@ describe("[DNS client cache]", function() end) after_each(function() + _G.busted_legacy_dns_client = nil package.loaded["kong.resty.dns.client"] = nil package.loaded["resty.dns.resolver"] = nil client = nil From b40e3ec1e1ae846fd40948da1f4714e8cc83cf74 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 22 Feb 2024 15:40:38 +0800 Subject: [PATCH 006/126] chores: better comment --- kong/resty/dns_client/init.lua | 4 ++-- .../30-new-dns-client/02-old_client_spec.lua | 4 ++-- .../03-old_client_cache_spec.lua | 18 +++++++++--------- spec/helpers/dns.lua | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 1dcb93b4b3fc..9c45a2bfd947 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -501,7 +501,7 @@ end local function resolve_all(self, name, opts, tries) - local key = "fast:" .. name .. ":" .. (opts.qtype or "all") + local key = "short:" .. name .. ":" .. (opts.qtype or "all") -- logt(tries, key) stats_init(self.stats, name) @@ -512,7 +512,7 @@ local function resolve_all(self, name, opts, tries) return nil, "recursion detected for name: " .. name end - -- lookup fastly with the key `fast::/all` + -- lookup fastly with the key `short::/all` local answers, err, hit_level = self.cache:get(key) if not answers or answers.expired then stats_count(self.stats, name, "miss") diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index cc8c417d031f..8eccf857c2cb 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -661,7 +661,7 @@ describe("[DNS client]", function() local answers2 = assert(cli:resolve(host)) assert.are.equal(answers, answers2) -- same table from L1 cache - local ttl, _, value = cli.cache:peek("fast:" .. host .. ":all") + local ttl, _, value = cli.cache:peek("short:" .. host .. ":all") assert.same(answers, value) local ttl_diff = answers.ttl - ttl assert(math.abs(ttl_diff - wait_time) < 1, @@ -1295,7 +1295,7 @@ describe("[DNS client]", function() local answers, _, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.equal(valid_ttl, answers.ttl) - local ttl = cli.cache:peek("fast:" .. qname .. ":1") + local ttl = cli.cache:peek("short:" .. qname .. ":1") assert.is_near(valid_ttl, ttl, 0.1) end) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 5c3e31ce37c8..fc8cee1b57b4 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -155,7 +155,7 @@ describe("[DNS client cache]", function() } local answers = cli:resolve("myhost1") - assert.equal(answers, cli.cache:get("fast:myhost1:all")) + assert.equal(answers, cli.cache:get("short:myhost1:all")) end) it("are stored in cache with type", function() @@ -170,12 +170,12 @@ describe("[DNS client cache]", function() } local answers = cli:resolve("myhost2", { qtype = resolver.TYPE_A }) - assert.equal(answers, cli.cache:get("fast:myhost2:" .. resolver.TYPE_A)) + assert.equal(answers, cli.cache:get("short:myhost2:" .. resolver.TYPE_A)) end) it("are resolved from cache without type", function() mock_records = {} - cli.cache:set("fast:myhost3:all", {ttl=30+4}, {{ + cli.cache:set("short:myhost3:all", {ttl=30+4}, {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, @@ -187,13 +187,13 @@ describe("[DNS client cache]", function() }) local answers = cli:resolve("myhost3") - assert.same(answers, cli.cache:get("fast:myhost3:all")) + assert.same(answers, cli.cache:get("short:myhost3:all")) end) it("are resolved from cache with type", function() mock_records = {} local cli = client_new() - cli.cache:set("fast:myhost4:" .. resolver.TYPE_A, {ttl=30+4}, {{ + cli.cache:set("short:myhost4:" .. resolver.TYPE_A, {ttl=30+4}, {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, @@ -205,7 +205,7 @@ describe("[DNS client cache]", function() }) local answers = cli:resolve("myhost4", { qtype = resolver.TYPE_A }) - assert.equal(answers, cli.cache:get("fast:myhost4:" .. resolver.TYPE_A)) + assert.equal(answers, cli.cache:get("short:myhost4:" .. resolver.TYPE_A)) end) it("of dereferenced CNAME are stored in cache", function() @@ -230,7 +230,7 @@ describe("[DNS client cache]", function() -- the type un-specificc query was the CNAME, so that should be in the -- shorname cache - answers = cli.cache:get("fast:myhost5:all") + answers = cli.cache:get("short:myhost5:all") assert_same_answers(mock_records["myhost5.domain.com:"..resolver.TYPE_CNAME], answers) end) @@ -298,7 +298,7 @@ describe("[DNS client cache]", function() local answers, err = cli:resolve("myhost7", { qtype = resolver.TYPE_A }) assert.is_nil(answers) assert.equal("dns server error: 4 server failure", err) - assert.is_nil(cli.cache:get("fast:myhost7:" .. resolver.TYPE_A)) + assert.is_nil(cli.cache:get("short:myhost7:" .. resolver.TYPE_A)) end) it("name errors are not stored", function() @@ -314,7 +314,7 @@ describe("[DNS client cache]", function() local answers, err = cli:resolve("myhost8", { qtype = resolver.TYPE_A }) assert.is_nil(answers) assert.equal("dns server error: 3 name error", err) - assert.is_nil(cli.cache:get("fast:myhost8:" .. resolver.TYPE_A)) + assert.is_nil(cli.cache:get("short:myhost8:" .. resolver.TYPE_A)) end) end) diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 1ff5771519c3..497e6047091d 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -40,7 +40,7 @@ end function _M.dnsExpire(client, record) local dnscache = client.getcache() dnscache:set(record[1].name .. ":" .. record[1].type, nil) - dnscache:set("fast:" .. record[1].name .. ":" .. "all", nil) + dnscache:set("short:" .. record[1].name .. ":" .. "all", nil) record.expire = gettime() - 1 end From b40e417280f5f44d39f4eccaf29b420f57aea37b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 22 Feb 2024 16:21:40 +0800 Subject: [PATCH 007/126] add changelog --- changelog/unreleased/kong/refactor_dns_client.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog/unreleased/kong/refactor_dns_client.yml diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml new file mode 100644 index 000000000000..c49e03a42d4c --- /dev/null +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -0,0 +1,3 @@ +message: refactor and implement a new DNS client library +type: feature +scope: Core From 78d54a33beec6d5b950cbff50065888fe5978608 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 26 Feb 2024 13:11:53 +0800 Subject: [PATCH 008/126] automatically refresh stale-but-in-use records after @stale_refresh_interval --- kong/resty/dns_client/init.lua | 37 ++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 9c45a2bfd947..d323ac4713bf 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -12,7 +12,6 @@ local math_min = math.min local timer_at = ngx.timer.at local table_insert = table.insert local ipv6_bracket = utils.ipv6_bracket --- local deep_copy = function (t) return t end -- TODO require("kong.tools.utils").deep_copy -- debug --[[ @@ -26,8 +25,6 @@ local DEFAULT_ERROR_TTL = 1 -- unit: second local DEFAULT_STALE_TTL = 4 local DEFAULT_EMPTY_TTL = 30 -local DEFAULT_IP_TTL = 10 * 365 * 24 * 60 * 60 -- 10 years - local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } local TYPE_SRV = resolver.TYPE_SRV @@ -57,7 +54,7 @@ local errstrs = { -- client specific errors local EMPTY_ANSWERS = { errcode = 3, errstr = "name error" } ---- APIs +-- APIs local _M = {} local mt = { __index = _M } @@ -82,7 +79,7 @@ local function stats_count(stats, name, key) end --- For TYPE_LAST: the DNS record from the last successful query +-- lookup or set TYPE_LAST (the DNS record type from the last successful query) local valid_types = { [ TYPE_SRV ] = true, [ TYPE_A ] = true, @@ -90,6 +87,7 @@ local valid_types = { [ TYPE_CNAME ] = true, } + local function insert_last_type(cache, name, qtype) if valid_types[qtype] then cache:set("last:" .. name, { ttl = 0 }, qtype) @@ -122,19 +120,21 @@ local function init_hosts(cache, path, preferred_ip_type) return end + local ttl = 10 * 365 * 24 * 60 * 60 -- 10 years ttl for hosts entries + local key = name .. ":" .. qtype local answers = { - ttl = DEFAULT_IP_TTL, - expire = now() + DEFAULT_IP_TTL, + ttl = ttl, + expire = now() + ttl, { name = name, type = qtype, address = address, class = 1, - ttl = DEFAULT_IP_TTL, + ttl = ttl, }, } - cache:set(key, { ttl = DEFAULT_IP_TTL }, answers) + cache:set(key, { ttl = ttl }, answers) end for name, address in pairs(hosts) do @@ -238,6 +238,7 @@ function _M.new(opts) error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + stale_refresh_interval = opts.stale_refresh_interval or 5, resolv = opts._resolv or resolv, hosts = hosts, enable_ipv6 = enable_ipv6, @@ -358,13 +359,23 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) local key = name .. ":" .. qtype local ttl, _, answers = self.cache:peek(key, true) - if answers and not answers.expired then - ttl = (ttl or 0) + self.stale_ttl + if answers and ttl then + if not answers.expired then + ttl = ttl + self.stale_ttl + answers.expire = now() + ttl + answers.expired = true + + else + ttl = ttl + (answers.expire - now()) + end + + -- automatically refresh the stale-but-in-use record after this interval + -- to avoid the need for inter-process communication + ttl = math_min(ttl, self.stale_refresh_interval) + if ttl > 0 then start_stale_update_task(self, key, name, qtype) - answers.expired = true answers.ttl = ttl - answers.expire = now() + ttl return answers, nil, ttl end end From 07fbc345405c0455e766d6c76cf2f9ae64f2202d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 27 Feb 2024 14:16:00 +0800 Subject: [PATCH 009/126] revert "automatically refresh stale-but-in-use records after @stale_refresh_interval" --- kong/resty/dns_client/init.lua | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index d323ac4713bf..9191186957cb 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -238,7 +238,6 @@ function _M.new(opts) error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, - stale_refresh_interval = opts.stale_refresh_interval or 5, resolv = opts._resolv or resolv, hosts = hosts, enable_ipv6 = enable_ipv6, @@ -359,22 +358,12 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) local key = name .. ":" .. qtype local ttl, _, answers = self.cache:peek(key, true) - if answers and ttl then - if not answers.expired then - ttl = ttl + self.stale_ttl - answers.expire = now() + ttl - answers.expired = true - - else - ttl = ttl + (answers.expire - now()) - end - - -- automatically refresh the stale-but-in-use record after this interval - -- to avoid the need for inter-process communication - ttl = math_min(ttl, self.stale_refresh_interval) - + if answers and ttl and not answers.expired then + ttl = ttl + self.stale_ttl if ttl > 0 then start_stale_update_task(self, key, name, qtype) + answers.expire = now() + ttl + answers.expired = true answers.ttl = ttl return answers, nil, ttl end From e77bd8db48cd6db470012d7685a8b27614391658 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 27 Feb 2024 16:07:12 +0800 Subject: [PATCH 010/126] add kong_dns_cache{_miss} shared dict into templates/nginx_kong.lua --- kong/templates/nginx_kong.lua | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kong/templates/nginx_kong.lua b/kong/templates/nginx_kong.lua index db83ba957827..78f2ad8e2fa4 100644 --- a/kong/templates/nginx_kong.lua +++ b/kong/templates/nginx_kong.lua @@ -23,6 +23,11 @@ lua_shared_dict kong_db_cache ${{MEM_CACHE_SIZE}}; lua_shared_dict kong_db_cache_miss 12m; lua_shared_dict kong_secrets 5m; +> if not legacy_dns_client then +lua_shared_dict kong_dns_cache 12m; +lua_shared_dict kong_dns_cache_miss 5m; +> end + underscores_in_headers on; > if ssl_cipher_suite == 'old' then lua_ssl_conf_command CipherString DEFAULT:@SECLEVEL=0; From 2ed541e904b5d6696db73595948d217cfb6e67ec Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 27 Feb 2024 18:06:24 +0800 Subject: [PATCH 011/126] only purge cache for test cases --- kong/resty/dns_client/init.lua | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 9191186957cb..d2e567bdb6c9 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -582,7 +582,6 @@ function _M.init(opts) opts.error_ttl = opts.badTtl opts.stale_ttl = opts.staleTtl opts.cache_size = opts.cacheSize - opts.cache_purge = true local client, err = _M.new(opts) if not client then @@ -629,6 +628,12 @@ if package.loaded.busted then function _M:get_last_type(name) return get_last_type(self.cache, name) end + _M._init = _M.init + function _M.init(opts) + opts = opts or {} + opts.cache_purge = true + return _M._init(opts) + end end From 18c9c65b0332f0c978cf17d0bd53afc0e1f0a9ef Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 28 Feb 2024 12:03:39 +0800 Subject: [PATCH 012/126] use kong.worker_events instead of mlcache shm based ipc --- kong/resty/dns_client/init.lua | 49 ++++++++++++++----- .../09-balancer/04-round_robin_spec.lua | 40 ++++++++------- spec/fixtures/shared_dict.lua | 1 - 3 files changed, 61 insertions(+), 29 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index d2e567bdb6c9..68fe1ff3c6a4 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -6,6 +6,7 @@ local resolver = require("resty.dns.resolver") local now = ngx.now local log = ngx.log +local ERR = ngx.ERR local WARN = ngx.WARN local ALERT = ngx.ALERT local math_min = math.min @@ -186,13 +187,38 @@ function _M.new(opts) -- init the mlcache local lock_timeout = r_opts.timeout / 1000 * r_opts.retrans + 1 -- s + local resty_lock_opts = { + timeout = lock_timeout, + exptimeout = lock_timeout + 1, + } + + local ipc_source = "dns_client_mlcache" + local ipc = { + register_listeners = function(events) + if not kong or not kong.worker_events then + return + end + for _, ev in pairs(events) do + kong.worker_events.register(function(data) ev.handler(data) end, + ipc_source, ev.channel) + end + end, + broadcast = function(channel, data) + if not kong or not kong.worker_events then + return + end + local ok, err = kong.worker_events.post(ipc_source, channel, data) + if not ok then + log(ERR, "failed to post event '", ipc_source, "', '", channel, + "': ", err) + end + end, + } + local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { lru_size = opts.cache_size or 10000, - ipc_shm = "kong_dns_cache_ipc", - resty_lock_opts = { - timeout = lock_timeout, - exptimeout = lock_timeout + 1, - }, + ipc = ipc, + resty_lock_opts = resty_lock_opts, -- miss cache shm_miss = "kong_dns_cache_miss", neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, @@ -205,9 +231,6 @@ function _M.new(opts) cache:purge(true) end - -- TODO: add an async task to call cache:update() to update L1/LRU-cache - -- for the inserted value from other workers - -- parse order local search_types = {} local order = opts.order or DEFAULT_ORDER @@ -423,7 +446,7 @@ local function resolve_name_type(self, name, qtype, opts, tries) if hit_level and hit_level < 3 then stats_count(self.stats, key, hitstrs[hit_level]) - -- logt(tries, hitstrs[hit_level]) + -- logt(tries, "2nd-get-" .. hitstrs[hit_level]) end if err or answers.errcode then @@ -515,6 +538,7 @@ local function resolve_all(self, name, opts, tries) -- lookup fastly with the key `short::/all` local answers, err, hit_level = self.cache:get(key) if not answers or answers.expired then + -- logt(tries, "miss") stats_count(self.stats, name, "miss") answers, err, tries = resolve_names_and_types(self, name, opts, tries) @@ -526,7 +550,7 @@ local function resolve_all(self, name, opts, tries) else stats_count(self.stats, name, hitstrs[hit_level]) - -- logt(tries, hitstrs[hit_level]) + -- logt(tries, "short-get-" .. hitstrs[hit_level]) end -- dereference CNAME @@ -616,9 +640,12 @@ if package.loaded.busted then end function _M.getcache() return { - set = function (self, k, v, ttl) + set = function(self, k, v, ttl) self.cache:set(k, {ttl = ttl or 0}, v) end, + delete = function(self, k) + self.cache:delete(k) + end, cache = dns_client.cache, } end diff --git a/spec/01-unit/09-balancer/04-round_robin_spec.lua b/spec/01-unit/09-balancer/04-round_robin_spec.lua index 427061bb8f83..90b74f99e00b 100644 --- a/spec/01-unit/09-balancer/04-round_robin_spec.lua +++ b/spec/01-unit/09-balancer/04-round_robin_spec.lua @@ -19,7 +19,6 @@ local sleep = helpers.sleep local dnsSRV = function(...) return helpers.dnsSRV(client, ...) end local dnsA = function(...) return helpers.dnsA(client, ...) end local dnsAAAA = function(...) return helpers.dnsAAAA(client, ...) end -local dnsExpire = helpers.dnsExpire local unset_register = {} @@ -618,7 +617,7 @@ describe("[round robin balancer]", function() end) it("does not hit the resolver when 'cache_only' is set", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, }) local b = check_balancer(new_balancer { hosts = { { name = "mashape.test", port = 80, weight = 5 } }, @@ -626,6 +625,7 @@ describe("[round robin balancer]", function() wheelSize = 10, }) record.expire = gettime() - 1 -- expire current dns cache record + sleep(0.2) dnsA({ -- create a new record { name = "mashape.test", address = "5.6.7.8" }, }) @@ -1019,7 +1019,7 @@ describe("[round robin balancer]", function() end) it("weight change for unresolved record, updates properly", function() local record = dnsA({ - { name = "really.really.really.does.not.exist.hostname.test", address = "1.2.3.4" }, + { name = "really.really.really.does.not.exist.hostname.test", address = "1.2.3.4", ttl = 0.1 }, }) dnsAAAA({ { name = "getkong.test", address = "::1" }, @@ -1040,7 +1040,7 @@ describe("[round robin balancer]", function() -- expire the existing record record.expire = 0 record.expired = true - dnsExpire(client, record) + sleep(0.2) -- do a lookup to trigger the async lookup client.resolve("really.really.really.does.not.exist.hostname.test", {qtype = client.TYPE_A}) sleep(0.5) -- provide time for async lookup to complete @@ -1104,8 +1104,8 @@ describe("[round robin balancer]", function() end) it("renewed DNS A record; no changes", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, - { name = "mashape.test", address = "1.2.3.5" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, + { name = "mashape.test", address = "1.2.3.5", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1120,6 +1120,8 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) + dnsA({ -- create a new record (identical) { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.5" }, @@ -1135,8 +1137,8 @@ describe("[round robin balancer]", function() it("renewed DNS AAAA record; no changes", function() local record = dnsAAAA({ - { name = "mashape.test", address = "::1" }, - { name = "mashape.test", address = "::2" }, + { name = "mashape.test", address = "::1" , ttl = 0.1 }, + { name = "mashape.test", address = "::2" , ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1151,7 +1153,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record - dnsExpire(client, record) + sleep(0.2) dnsAAAA({ -- create a new record (identical) { name = "mashape.test", address = "::1" }, { name = "mashape.test", address = "::2" }, @@ -1166,9 +1168,9 @@ describe("[round robin balancer]", function() end) it("renewed DNS SRV record; no changes", function() local record = dnsSRV({ - { name = "gelato.test", target = "1.2.3.6", port = 8001, weight = 5 }, - { name = "gelato.test", target = "1.2.3.6", port = 8002, weight = 5 }, - { name = "gelato.test", target = "1.2.3.6", port = 8003, weight = 5 }, + { name = "gelato.test", target = "1.2.3.6", port = 8001, weight = 5, ttl = 0.1 }, + { name = "gelato.test", target = "1.2.3.6", port = 8002, weight = 5, ttl = 0.1 }, + { name = "gelato.test", target = "1.2.3.6", port = 8003, weight = 5, ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1183,6 +1185,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) dnsSRV({ -- create a new record (identical) { name = "gelato.test", target = "1.2.3.6", port = 8001, weight = 5 }, { name = "gelato.test", target = "1.2.3.6", port = 8002, weight = 5 }, @@ -1198,8 +1201,8 @@ describe("[round robin balancer]", function() end) it("renewed DNS A record; address changes", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, - { name = "mashape.test", address = "1.2.3.5" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, + { name = "mashape.test", address = "1.2.3.5", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1215,6 +1218,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) dnsA({ -- insert an updated record { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.6" }, -- target updated @@ -1232,7 +1236,7 @@ describe("[round robin balancer]", function() -- 2016/11/07 16:48:33 [error] 81932#0: *2 recv() failed (61: Connection refused), context: ngx.timer local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1256,6 +1260,7 @@ describe("[round robin balancer]", function() }, }) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) -- run entire wheel to make sure the expired one is requested, so it can fail for _ = 1, b.wheelSize do b:getPeer() end -- the only indice is now getkong.test @@ -1312,8 +1317,8 @@ describe("[round robin balancer]", function() end) it("renewed DNS A record; unhealthy entries remain unhealthy after renewal", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, - { name = "mashape.test", address = "1.2.3.5" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, + { name = "mashape.test", address = "1.2.3.5", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1347,6 +1352,7 @@ describe("[round robin balancer]", function() local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) dnsA({ -- create a new record (identical) { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.5" }, diff --git a/spec/fixtures/shared_dict.lua b/spec/fixtures/shared_dict.lua index 7e6c350fea03..563066fb088f 100644 --- a/spec/fixtures/shared_dict.lua +++ b/spec/fixtures/shared_dict.lua @@ -15,7 +15,6 @@ local dicts = { "kong_db_cache_miss_2 12m", "kong_dns_cache 10m", "kong_dns_cache_miss 10m", - "kong_dns_cache_ipc 5m", "kong_mock_upstream_loggers 10m", "kong_secrets 5m", "test_vault 5m", From 8d22bdf8742d2255add7de419e1d5437b4f6cba2 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 28 Feb 2024 12:05:18 +0800 Subject: [PATCH 013/126] remove debug log for @tries --- kong/resty/dns_client/init.lua | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 68fe1ff3c6a4..0891ed7d6b3b 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -14,13 +14,6 @@ local timer_at = ngx.timer.at local table_insert = table.insert local ipv6_bracket = utils.ipv6_bracket --- debug ---[[ -local json = require("cjson").encode -local logt = table_insert -local logt = function (...) end -]] - -- Constants and default values local DEFAULT_ERROR_TTL = 1 -- unit: second local DEFAULT_STALE_TTL = 4 @@ -329,8 +322,6 @@ end local function resolve_query(self, name, qtype, tries) - -- logt(tries, "query") - local key = name .. ":" .. qtype stats_count(self.stats, key, "query") @@ -355,8 +346,6 @@ local function resolve_query(self, name, qtype, tries) stats_count(self.stats, key, answers.errstr and "query_err:" .. answers.errstr or "query_succ") - -- logt(tries, answers.errstr or #answers) - return answers, nil, answers.ttl end @@ -426,7 +415,6 @@ local function resolve_name_type(self, name, qtype, opts, tries) local key = name .. ":" .. qtype stats_init(self.stats, key) - -- logt(tries, key) if detect_recursion(opts, key) then stats_count(self.stats, key, "fail_recur") @@ -446,7 +434,6 @@ local function resolve_name_type(self, name, qtype, opts, tries) if hit_level and hit_level < 3 then stats_count(self.stats, key, hitstrs[hit_level]) - -- logt(tries, "2nd-get-" .. hitstrs[hit_level]) end if err or answers.errcode then @@ -525,7 +512,6 @@ end local function resolve_all(self, name, opts, tries) local key = "short:" .. name .. ":" .. (opts.qtype or "all") - -- logt(tries, key) stats_init(self.stats, name) stats_count(self.stats, name, "runs") @@ -538,7 +524,6 @@ local function resolve_all(self, name, opts, tries) -- lookup fastly with the key `short::/all` local answers, err, hit_level = self.cache:get(key) if not answers or answers.expired then - -- logt(tries, "miss") stats_count(self.stats, name, "miss") answers, err, tries = resolve_names_and_types(self, name, opts, tries) @@ -550,12 +535,10 @@ local function resolve_all(self, name, opts, tries) else stats_count(self.stats, name, hitstrs[hit_level]) - -- logt(tries, "short-get-" .. hitstrs[hit_level]) end -- dereference CNAME if opts.qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then - -- logt(tries, "cname") stats_count(self.stats, name, "cname") return resolve_all(self, answers[1].cname, opts, tries) end From 3a7104d64745308e1e081a6878d29ce4a6585528 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 28 Feb 2024 13:07:40 +0800 Subject: [PATCH 014/126] support req_dyn_hook.run_hooks --- kong/resty/dns_client/init.lua | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 0891ed7d6b3b..3525e99d7212 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -14,6 +14,8 @@ local timer_at = ngx.timer.at local table_insert = table.insert local ipv6_bracket = utils.ipv6_bracket +local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks + -- Constants and default values local DEFAULT_ERROR_TTL = 1 -- unit: second local DEFAULT_STALE_TTL = 4 @@ -424,7 +426,7 @@ local function resolve_name_type(self, name, qtype, opts, tries) local answers, err, hit_level = self.cache:get(key, nil, resolve_name_type_callback, self, name, qtype, opts, tries) - if err and err:sub(1, #"callback") == "callback" then + if err and err:sub(1, 8) == "callback" then log(ALERT, err) end @@ -432,6 +434,12 @@ local function resolve_name_type(self, name, qtype, opts, tries) answers = EMPTY_ANSWERS end + local ctx = ngx.ctx + if ctx and ctx.has_timing then + req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", + (hit_level and hit_level < 3)) + end + if hit_level and hit_level < 3 then stats_count(self.stats, key, hitstrs[hit_level]) end @@ -534,6 +542,12 @@ local function resolve_all(self, name, opts, tries) end else + local ctx = ngx.ctx + if ctx and ctx.has_timing then + req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", + (hit_level and hit_level < 3)) + end + stats_count(self.stats, name, hitstrs[hit_level]) end From 01dfaf2f0cda41767a62bcbcd1a9cbca16fbb421 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 28 Feb 2024 16:37:43 +0800 Subject: [PATCH 015/126] supports __tostring of @tries table (error list) --- kong/resty/dns_client/init.lua | 31 +++++++++---------- .../03-old_client_cache_spec.lua | 12 +++---- 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 3525e99d7212..a154f68e824b 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -9,6 +9,7 @@ local log = ngx.log local ERR = ngx.ERR local WARN = ngx.WARN local ALERT = ngx.ALERT +local cjson_encode = require("cjson.safe").encode local math_min = math.min local timer_at = ngx.timer.at local table_insert = table.insert @@ -37,6 +38,13 @@ local valid_type_names = { LAST = TYPE_LAST, } +local typstrs = { + [TYPE_SRV] = "SRV", + [TYPE_A] = "A", + [TYPE_AAAA] = "AAAA", + [TYPE_CNAME] = "CNAME", +} + local hitstrs = { [1] = "hit_lru", [2] = "hit_shm", @@ -63,6 +71,9 @@ end _M.TYPE_LAST = -1 +local tries_mt = { __tostring = cjson_encode } + + local function stats_init(stats, name) if not stats[name] then stats[name] = {} @@ -76,16 +87,8 @@ end -- lookup or set TYPE_LAST (the DNS record type from the last successful query) -local valid_types = { - [ TYPE_SRV ] = true, - [ TYPE_A ] = true, - [ TYPE_AAAA ] = true, - [ TYPE_CNAME ] = true, -} - - local function insert_last_type(cache, name, qtype) - if valid_types[qtype] then + if typstrs[qtype] then cache:set("last:" .. name, { ttl = 0 }, qtype) end end @@ -404,10 +407,6 @@ local function detect_recursion(opts, key) opts.resolved_names = rn end local detected = rn[key] - -- TODO delete - if detected then - log(ALERT, "detect recursion for name:", key) - end rn[key] = true return detected end @@ -446,7 +445,7 @@ local function resolve_name_type(self, name, qtype, opts, tries) if err or answers.errcode then err = err or ("dns server error: %s %s"):format(answers.errcode, answers.errstr) - table_insert(tries, { name, qtype, err }) + table_insert(tries, { name .. ":" .. typstrs[qtype], err }) end return answers, err @@ -529,7 +528,7 @@ local function resolve_all(self, name, opts, tries) return nil, "recursion detected for name: " .. name end - -- lookup fastly with the key `short::/all` + -- quickly lookup with the key `short::all` or `short::` local answers, err, hit_level = self.cache:get(key) if not answers or answers.expired then stats_count(self.stats, name, "miss") @@ -574,7 +573,7 @@ end function _M:resolve(name, opts, tries) name = name:lower() opts = opts or {} - tries = tries or {} + tries = setmetatable(tries or {}, tries_mt) local answers, err, tries = resolve_all(self, name, opts, tries) if not answers or not opts.return_random then diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index fc8cee1b57b4..8346fbe209df 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -588,23 +588,19 @@ describe("[DNS client cache]", function() assert.same(err, "dns server error: 3 name error") assert.same({ { - "192.168.5.232.node.api_test.consul", - 33, + "192.168.5.232.node.api_test.consul:SRV", "dns server error: 3 name error", }, { - "192.168.5.232.node.api_test.consul", - 1, + "192.168.5.232.node.api_test.consul:A", "dns server error: 3 name error", }, { - "192.168.5.232.node.api_test.consul", - 28, + "192.168.5.232.node.api_test.consul:AAAA", "dns server error: 3 name error", }, { - "192.168.5.232.node.api_test.consul", - 5, + "192.168.5.232.node.api_test.consul:CNAME", "dns server error: 3 name error", } }, tries) From 157650b1e616e9ffc3a381b292bfaa2accd10c02 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 29 Feb 2024 15:47:34 +0800 Subject: [PATCH 016/126] coding style: use a 2-space indentation and localized some variables --- kong/resty/dns_client/init.lua | 926 ++++++++++++++++---------------- kong/resty/dns_client/utils.lua | 302 ++++++----- 2 files changed, 614 insertions(+), 614 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index a154f68e824b..bd646fd5ecbd 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -1,19 +1,26 @@ --- vim: ts=4 sts=4 sw=4 et: - +local cjson = require("cjson.safe") local utils = require("kong.resty.dns_client.utils") local mlcache = require("kong.resty.mlcache") local resolver = require("resty.dns.resolver") -local now = ngx.now -local log = ngx.log -local ERR = ngx.ERR -local WARN = ngx.WARN -local ALERT = ngx.ALERT -local cjson_encode = require("cjson.safe").encode -local math_min = math.min -local timer_at = ngx.timer.at -local table_insert = table.insert -local ipv6_bracket = utils.ipv6_bracket +local get_rr_ans = utils.get_rr_ans +local get_wrr_ans = utils.get_wrr_ans +local parse_hosts = utils.parse_hosts +local ipv6_bracket = utils.ipv6_bracket +local search_names = utils.search_names + +local now = ngx.now +local log = ngx.log +local ERR = ngx.ERR +local WARN = ngx.WARN +local ALERT = ngx.ALERT +local timer_at = ngx.timer.at + +local type = type +local pairs = pairs +local ipairs = ipairs +local math_min = math.min +local table_insert = table.insert local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks @@ -31,28 +38,28 @@ local TYPE_CNAME = resolver.TYPE_CNAME local TYPE_LAST = -1 local valid_type_names = { - SRV = TYPE_SRV, - A = TYPE_A, - AAAA = TYPE_AAAA, - CNAME = TYPE_CNAME, - LAST = TYPE_LAST, + SRV = TYPE_SRV, + A = TYPE_A, + AAAA = TYPE_AAAA, + CNAME = TYPE_CNAME, + LAST = TYPE_LAST, } local typstrs = { - [TYPE_SRV] = "SRV", - [TYPE_A] = "A", - [TYPE_AAAA] = "AAAA", - [TYPE_CNAME] = "CNAME", + [TYPE_SRV] = "SRV", + [TYPE_A] = "A", + [TYPE_AAAA] = "AAAA", + [TYPE_CNAME] = "CNAME", } local hitstrs = { - [1] = "hit_lru", - [2] = "hit_shm", + [1] = "hit_lru", + [2] = "hit_shm", } local errstrs = { -- client specific errors - [100] = "cache only lookup failed", - [101] = "no available records", + [100] = "cache only lookup failed", + [101] = "no available records", } local EMPTY_ANSWERS = { errcode = 3, errstr = "name error" } @@ -64,501 +71,497 @@ local mt = { __index = _M } -- copy TYPE_* for k,v in pairs(resolver) do - if type(k) == "string" and k:sub(1,5) == "TYPE_" then - _M[k] = v - end + if type(k) == "string" and k:sub(1,5) == "TYPE_" then + _M[k] = v + end end _M.TYPE_LAST = -1 -local tries_mt = { __tostring = cjson_encode } +local tries_mt = { __tostring = cjson.encode } local function stats_init(stats, name) - if not stats[name] then - stats[name] = {} - end + if not stats[name] then + stats[name] = {} + end end local function stats_count(stats, name, key) - stats[name][key] = (stats[name][key] or 0) + 1 + stats[name][key] = (stats[name][key] or 0) + 1 end -- lookup or set TYPE_LAST (the DNS record type from the last successful query) local function insert_last_type(cache, name, qtype) - if typstrs[qtype] then - cache:set("last:" .. name, { ttl = 0 }, qtype) - end + if typstrs[qtype] then + cache:set("last:" .. name, { ttl = 0 }, qtype) + end end local function get_last_type(cache, name) - return cache:get("last:" .. name) + return cache:get("last:" .. name) end -- insert hosts into cache local function init_hosts(cache, path, preferred_ip_type) - local hosts, err = utils.parse_hosts(path) - if not hosts then - log(WARN, "Invalid hosts file: ", err) - hosts = {} - end - - if not hosts.localhost then - hosts.localhost = { - ipv4 = "127.0.0.1", - ipv6 = "[::1]", - } - end - - local function insert_answer(name, qtype, address) - if not address then - return - end - - local ttl = 10 * 365 * 24 * 60 * 60 -- 10 years ttl for hosts entries - - local key = name .. ":" .. qtype - local answers = { - ttl = ttl, - expire = now() + ttl, - { - name = name, - type = qtype, - address = address, - class = 1, - ttl = ttl, - }, - } - cache:set(key, { ttl = ttl }, answers) - end - - for name, address in pairs(hosts) do - name = name:lower() - if address.ipv4 then - insert_answer(name, TYPE_A, address.ipv4) - insert_last_type(cache, name, TYPE_A) - end - if address.ipv6 then - insert_answer(name, TYPE_AAAA, address.ipv6) - if not address.ipv4 or preferred_ip_type == TYPE_AAAA then - insert_last_type(cache, name, TYPE_AAAA) - end - end - end - - return hosts -end - - -function _M.new(opts) - if not opts then - return nil, "no options table specified" - end - - local enable_ipv6 = opts.enable_ipv6 - - -- parse resolv.conf - local resolv, err = utils.parse_resolv_conf(opts.resolv_conf, enable_ipv6) - if not resolv then - log(WARN, "Invalid resolv.conf: ", err) - resolv = { options = {} } - end - - -- init the resolver options for lua-resty-dns - local nameservers = (opts.nameservers and #opts.nameservers > 0) and - opts.nameservers or resolv.nameservers - if not nameservers or #nameservers == 0 then - log(WARN, "Invalid configuration, no nameservers specified") - end - - local r_opts = { - nameservers = nameservers, - retrans = opts.retrans or resolv.options.attempts or 5, - timeout = opts.timeout or resolv.options.timeout or 2000, -- ms - no_random = opts.no_random or not resolv.options.rotate, + local hosts, err = parse_hosts(path) + if not hosts then + log(WARN, "Invalid hosts file: ", err) + hosts = {} + end + + if not hosts.localhost then + hosts.localhost = { + ipv4 = "127.0.0.1", + ipv6 = "[::1]", } + end - -- init the mlcache - local lock_timeout = r_opts.timeout / 1000 * r_opts.retrans + 1 -- s + local function insert_answer(name, qtype, address) + if not address then + return + end - local resty_lock_opts = { - timeout = lock_timeout, - exptimeout = lock_timeout + 1, - } + local ttl = 10 * 365 * 24 * 60 * 60 -- 10 years ttl for hosts entries - local ipc_source = "dns_client_mlcache" - local ipc = { - register_listeners = function(events) - if not kong or not kong.worker_events then - return - end - for _, ev in pairs(events) do - kong.worker_events.register(function(data) ev.handler(data) end, - ipc_source, ev.channel) - end - end, - broadcast = function(channel, data) - if not kong or not kong.worker_events then - return - end - local ok, err = kong.worker_events.post(ipc_source, channel, data) - if not ok then - log(ERR, "failed to post event '", ipc_source, "', '", channel, - "': ", err) - end - end, + local key = name .. ":" .. qtype + local answers = { + ttl = ttl, + expire = now() + ttl, + { + name = name, + type = qtype, + address = address, + class = 1, + ttl = ttl, + }, } + cache:set(key, { ttl = ttl }, answers) + end - local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { - lru_size = opts.cache_size or 10000, - ipc = ipc, - resty_lock_opts = resty_lock_opts, - -- miss cache - shm_miss = "kong_dns_cache_miss", - neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, - }) - if not cache then - return nil, "could not create mlcache: " .. err + for name, address in pairs(hosts) do + name = name:lower() + if address.ipv4 then + insert_answer(name, TYPE_A, address.ipv4) + insert_last_type(cache, name, TYPE_A) end - - if opts.cache_purge then - cache:purge(true) + if address.ipv6 then + insert_answer(name, TYPE_AAAA, address.ipv6) + if not address.ipv4 or preferred_ip_type == TYPE_AAAA then + insert_last_type(cache, name, TYPE_AAAA) + end end + end - -- parse order - local search_types = {} - local order = opts.order or DEFAULT_ORDER - local preferred_ip_type - for _, typstr in ipairs(order) do - local qtype = valid_type_names[typstr:upper()] - if not qtype then - return nil, "Invalid dns record type in order array: " .. typstr - end - table_insert(search_types, qtype) - if (qtype == TYPE_A or qtype == TYPE_AAAA) and not preferred_ip_type then - preferred_ip_type = qtype - end - end - preferred_ip_type = preferred_ip_type or TYPE_A + return hosts +end - if #search_types == 0 then - return nil, "Invalid order array: empty record types" - end - -- parse hosts - local hosts = init_hosts(cache, opts.hosts, preferred_ip_type) - - return setmetatable({ - r_opts = r_opts, - cache = cache, - valid_ttl = opts.valid_ttl, - error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, - stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, - empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, - resolv = opts._resolv or resolv, - hosts = hosts, - enable_ipv6 = enable_ipv6, - search_types = search_types, - stats = {} - }, mt) +function _M.new(opts) + if not opts then + return nil, "no options table specified" + end + + -- parse resolv.conf + local resolv, err = utils.parse_resolv_conf(opts.resolv_conf, opts.enable_ipv6) + if not resolv then + log(WARN, "Invalid resolv.conf: ", err) + resolv = { options = {} } + end + + -- init the resolver options for lua-resty-dns + local nameservers = (opts.nameservers and #opts.nameservers > 0) and + opts.nameservers or resolv.nameservers + if not nameservers or #nameservers == 0 then + log(WARN, "Invalid configuration, no nameservers specified") + end + + local r_opts = { + retrans = opts.retrans or resolv.options.attempts or 5, + timeout = opts.timeout or resolv.options.timeout or 2000, -- ms + no_random = opts.no_random or not resolv.options.rotate, + nameservers = nameservers, + } + + -- init the mlcache + local lock_timeout = r_opts.timeout / 1000 * r_opts.retrans + 1 -- s + + local resty_lock_opts = { + timeout = lock_timeout, + exptimeout = lock_timeout + 1, + } + + local ipc_source = "dns_client_mlcache" + local ipc = { + register_listeners = function(events) + if not kong or not kong.worker_events then + return + end + for _, ev in pairs(events) do + kong.worker_events.register(function(data) ev.handler(data) end, + ipc_source, ev.channel) + end + end, + broadcast = function(channel, data) + if not kong or not kong.worker_events then + return + end + local ok, err = kong.worker_events.post(ipc_source, channel, data) + if not ok then + log(ERR, "failed to post event '", ipc_source, "', '", channel, "': ", err) + end + end, + } + + local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { + ipc = ipc, + neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + lru_size = opts.cache_size or 10000, + shm_miss = "kong_dns_cache_miss", + resty_lock_opts = resty_lock_opts, + }) + + if not cache then + return nil, "could not create mlcache: " .. err + end + + if opts.cache_purge then + cache:purge(true) + end + + -- parse order + local search_types = {} + local order = opts.order or DEFAULT_ORDER + local preferred_ip_type + for _, typstr in ipairs(order) do + local qtype = valid_type_names[typstr:upper()] + if not qtype then + return nil, "Invalid dns record type in order array: " .. typstr + end + + table_insert(search_types, qtype) + + if (qtype == TYPE_A or qtype == TYPE_AAAA) and not preferred_ip_type then + preferred_ip_type = qtype + end + end + preferred_ip_type = preferred_ip_type or TYPE_A + + if #search_types == 0 then + return nil, "Invalid order array: empty record types" + end + + -- parse hosts + local hosts = init_hosts(cache, opts.hosts, preferred_ip_type) + + return setmetatable({ + cache = cache, + stats = {}, + hosts = hosts, + r_opts = r_opts, + resolv = opts._resolv or resolv, + valid_ttl = opts.valid_ttl, + error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, + stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, + empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + search_types = search_types, + }, mt) end local function process_answers(self, qname, qtype, answers) - local errcode = answers.errcode - if errcode then - answers.ttl = errcode == 3 and self.empty_ttl or self.error_ttl - -- For compatibility, the balancer subsystem needs to use this field. - answers.expire = now() + answers.ttl - return answers - end + local errcode = answers.errcode + if errcode then + answers.ttl = errcode == 3 and self.empty_ttl or self.error_ttl + -- compatible with balancer, which needs this field + answers.expire = now() + answers.ttl + return answers + end - local processed_answers = {} - local cname_answer + local processed_answers = {} + local cname_answer - local ttl = self.valid_ttl or 0xffffffff + local ttl = self.valid_ttl or 0xffffffff - for _, answer in ipairs(answers) do - answer.name = answer.name:lower() + for _, answer in ipairs(answers) do + answer.name = answer.name:lower() - if answer.type == TYPE_CNAME then - cname_answer = answer -- use the last one as the real cname + if answer.type == TYPE_CNAME then + cname_answer = answer -- use the last one as the real cname - elseif answer.type == qtype then - -- A compromise regarding https://github.com/Kong/kong/pull/3088 - if answer.type == TYPE_AAAA then - answer.address = ipv6_bracket(answer.address) - elseif answer.type == TYPE_SRV then - answer.target = ipv6_bracket(answer.target) - end + elseif answer.type == qtype then + -- compatible with balancer, see https://github.com/Kong/kong/pull/3088 + if answer.type == TYPE_AAAA then + answer.address = ipv6_bracket(answer.address) + elseif answer.type == TYPE_SRV then + answer.target = ipv6_bracket(answer.target) + end - table.insert(processed_answers, answer) - end + table.insert(processed_answers, answer) + end - if self.valid_ttl then - answer.ttl = self.valid_ttl - else - ttl = math_min(ttl, answer.ttl) - end + if self.valid_ttl then + answer.ttl = self.valid_ttl + else + ttl = math_min(ttl, answer.ttl) end + end - if #processed_answers == 0 then - if not cname_answer then - return { - errcode = 101, - errstr = errstrs[101], - ttl = self.empty_ttl, - --expire = now() + self.empty_ttl, - } - end - - table_insert(processed_answers, cname_answer) + if #processed_answers == 0 then + if not cname_answer then + return { + errcode = 101, + errstr = errstrs[101], + ttl = self.empty_ttl, + -- expire = now() + self.empty_ttl, + } end - processed_answers.ttl = ttl - processed_answers.expire = now() + ttl + table_insert(processed_answers, cname_answer) + end + + processed_answers.ttl = ttl + processed_answers.expire = now() + ttl - return processed_answers + return processed_answers end local function resolve_query(self, name, qtype, tries) - local key = name .. ":" .. qtype - stats_count(self.stats, key, "query") + local key = name .. ":" .. qtype + stats_count(self.stats, key, "query") - local r, err = resolver:new(self.r_opts) - if not r then - return nil, "failed to instantiate the resolver: " .. err - end + local r, err = resolver:new(self.r_opts) + if not r then + return nil, "failed to instantiate the resolver: " .. err + end - local options = { additional_section = true, qtype = qtype } - local answers, err = r:query(name, options) - if r.destroy then - r:destroy() - end + local options = { additional_section = true, qtype = qtype } + local answers, err = r:query(name, options) + if r.destroy then + r:destroy() + end - if not answers then - stats_count(self.stats, key, "query_fail") - return nil, "DNS server error: " .. (err or "unknown") - end + if not answers then + stats_count(self.stats, key, "query_fail") + return nil, "DNS server error: " .. (err or "unknown") + end - answers = process_answers(self, name, qtype, answers) + answers = process_answers(self, name, qtype, answers) - stats_count(self.stats, key, answers.errstr and - "query_err:" .. answers.errstr or "query_succ") + stats_count(self.stats, key, answers.errstr and "query_err:" .. answers.errstr + or "query_succ") - return answers, nil, answers.ttl + return answers, nil, answers.ttl end local function start_stale_update_task(self, key, name, qtype) - stats_count(self.stats, key, "stale") - - timer_at(0, function (premature) - if premature then return end - - local answers = resolve_query(self, name, qtype, {}) - if answers and (not answers.errcode or answers.errcode == 3) then - self.cache:set(key, { ttl = answers.ttl }, - answers.errcode == 3 and nil or answers) - insert_last_type(self.cache, name, qtype) - end - end) + stats_count(self.stats, key, "stale") + + timer_at(0, function (premature) + if premature then return end + + local answers = resolve_query(self, name, qtype, {}) + if answers and (not answers.errcode or answers.errcode == 3) then + self.cache:set(key, { ttl = answers.ttl }, + answers.errcode == 3 and nil or answers) + insert_last_type(self.cache, name, qtype) + end + end) end local function resolve_name_type_callback(self, name, qtype, opts, tries) - local key = name .. ":" .. qtype + local key = name .. ":" .. qtype - local ttl, _, answers = self.cache:peek(key, true) - if answers and ttl and not answers.expired then - ttl = ttl + self.stale_ttl - if ttl > 0 then - start_stale_update_task(self, key, name, qtype) - answers.expire = now() + ttl - answers.expired = true - answers.ttl = ttl - return answers, nil, ttl - end + local ttl, _, answers = self.cache:peek(key, true) + if answers and ttl and not answers.expired then + ttl = ttl + self.stale_ttl + if ttl > 0 then + start_stale_update_task(self, key, name, qtype) + answers.expire = now() + ttl + answers.expired = true + answers.ttl = ttl + return answers, nil, ttl end + end - if opts.cache_only then - return { errcode = 100, errstr = errstrs[100] }, nil, -1 - end + if opts.cache_only then + return { errcode = 100, errstr = errstrs[100] }, nil, -1 + end - local answers, err, ttl = resolve_query(self, name, qtype, tries) + local answers, err, ttl = resolve_query(self, name, qtype, tries) - if answers and answers.errcode == 3 then - return nil -- empty record for shm_miss cache - end + if answers and answers.errcode == 3 then + return nil -- empty record for shm_miss cache + end - return answers, err, ttl + return answers, err, ttl end local function detect_recursion(opts, key) - local rn = opts.resolved_names - if not rn then - rn = {} - opts.resolved_names = rn - end - local detected = rn[key] - rn[key] = true - return detected + local rn = opts.resolved_names + if not rn then + rn = {} + opts.resolved_names = rn + end + local detected = rn[key] + rn[key] = true + return detected end local function resolve_name_type(self, name, qtype, opts, tries) - local key = name .. ":" .. qtype - - stats_init(self.stats, key) - - if detect_recursion(opts, key) then - stats_count(self.stats, key, "fail_recur") - return nil, "recursion detected for name: " .. key - end - - local answers, err, hit_level = self.cache:get(key, nil, - resolve_name_type_callback, - self, name, qtype, opts, tries) - if err and err:sub(1, 8) == "callback" then - log(ALERT, err) - end - - if not answers and not err then - answers = EMPTY_ANSWERS - end - - local ctx = ngx.ctx - if ctx and ctx.has_timing then - req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", - (hit_level and hit_level < 3)) - end - - if hit_level and hit_level < 3 then - stats_count(self.stats, key, hitstrs[hit_level]) - end - - if err or answers.errcode then - err = err or ("dns server error: %s %s"):format(answers.errcode, answers.errstr) - table_insert(tries, { name .. ":" .. typstrs[qtype], err }) - end - - return answers, err + local key = name .. ":" .. qtype + + stats_init(self.stats, key) + + if detect_recursion(opts, key) then + stats_count(self.stats, key, "fail_recur") + return nil, "recursion detected for name: " .. key + end + + local answers, err, hit_level = self.cache:get(key, nil, + resolve_name_type_callback, + self, name, qtype, opts, tries) + if err and err:sub(1, 8) == "callback" then + log(ALERT, err) + end + + if not answers and not err then + answers = EMPTY_ANSWERS + end + + local ctx = ngx.ctx + if ctx and ctx.has_timing then + req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", + (hit_level and hit_level < 3)) + end + + if hit_level and hit_level < 3 then + stats_count(self.stats, key, hitstrs[hit_level]) + end + + if err or answers.errcode then + err = err or ("dns server error: %s %s"):format(answers.errcode, answers.errstr) + table_insert(tries, { name .. ":" .. typstrs[qtype], err }) + end + + return answers, err end local function get_search_types(self, name, qtype) - local input_types = qtype and { qtype } or self.search_types - local checked_types = {} - local types = {} - - for _, qtype in ipairs(input_types) do - if qtype == TYPE_LAST then - qtype = get_last_type(self.cache, name) - end - if qtype and not checked_types[qtype] then - table.insert(types, qtype) - checked_types[qtype] = true - end + local input_types = qtype and { qtype } or self.search_types + local checked_types = {} + local types = {} + + for _, qtype in ipairs(input_types) do + if qtype == TYPE_LAST then + qtype = get_last_type(self.cache, name) + end + if qtype and not checked_types[qtype] then + table.insert(types, qtype) + checked_types[qtype] = true end + end - return types + return types end local function check_and_get_ip_answers(name) - if name:match("^%d+%.%d+%.%d+%.%d+$") then -- IPv4 - return {{ name = name, class = 1, type = TYPE_A, address = name }} - end + if name:match("^%d+%.%d+%.%d+%.%d+$") then -- IPv4 + return {{ name = name, class = 1, type = TYPE_A, address = name }} + end - if name:match(":") then -- IPv6 - return {{ name = name, class = 1, type = TYPE_AAAA, address = ipv6_bracket(name) }} - end + if name:match(":") then -- IPv6 + return {{ name = name, class = 1, type = TYPE_AAAA, address = ipv6_bracket(name) }} + end - return nil + return nil end local function resolve_names_and_types(self, name, opts, tries) - local answers = check_and_get_ip_answers(name) - if answers then - answers.ttl = 10 * 365 * 24 * 60 * 60 - answers.expire = now() + answers.ttl + local answers = check_and_get_ip_answers(name) + if answers then + answers.ttl = 10 * 365 * 24 * 60 * 60 + answers.expire = now() + answers.ttl + return answers, nil, tries + end + + local types = get_search_types(self, name, opts.qtype) + local names = search_names(name, self.resolv, self.hosts) + + local err + for _, qtype in ipairs(types) do + for _, qname in ipairs(names) do + answers, err = resolve_name_type(self, qname, qtype, opts, tries) + + -- severe error occurred + if not answers then + return nil, err, tries + end + + if not answers.errcode then + insert_last_type(self.cache, qname, qtype) -- cache TYPE_LAST return answers, nil, tries + end end + end - local types = get_search_types(self, name, opts.qtype) - local names = utils.search_names(name, self.resolv, self.hosts) - - local err - for _, qtype in ipairs(types) do - for _, qname in ipairs(names) do - answers, err = resolve_name_type(self, qname, qtype, opts, tries) - - -- severe error occurred - if not answers then - return nil, err, tries - end - - if not answers.errcode then - insert_last_type(self.cache, qname, qtype) -- cache TYPE_LAST - return answers, nil, tries - end - end - end - - -- not found in the search iteration - return nil, err, tries + -- not found in the search iteration + return nil, err, tries end local function resolve_all(self, name, opts, tries) - local key = "short:" .. name .. ":" .. (opts.qtype or "all") + local key = "short:" .. name .. ":" .. (opts.qtype or "all") - stats_init(self.stats, name) - stats_count(self.stats, name, "runs") + stats_init(self.stats, name) + stats_count(self.stats, name, "runs") - if detect_recursion(opts, key) then - stats_count(self.stats, name, "fail_recur") - return nil, "recursion detected for name: " .. name - end - - -- quickly lookup with the key `short::all` or `short::` - local answers, err, hit_level = self.cache:get(key) - if not answers or answers.expired then - stats_count(self.stats, name, "miss") - - answers, err, tries = resolve_names_and_types(self, name, opts, tries) - if not opts.cache_only and answers then - --assert(answers.ttl) - --assert(answers.expire) - self.cache:set(key, { ttl = answers.ttl }, answers) - end + if detect_recursion(opts, key) then + stats_count(self.stats, name, "fail_recur") + return nil, "recursion detected for name: " .. name + end - else - local ctx = ngx.ctx - if ctx and ctx.has_timing then - req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", - (hit_level and hit_level < 3)) - end + -- quickly lookup with the key `short::all` or `short::` + local answers, err, hit_level = self.cache:get(key) + if not answers or answers.expired then + stats_count(self.stats, name, "miss") - stats_count(self.stats, name, hitstrs[hit_level]) + answers, err, tries = resolve_names_and_types(self, name, opts, tries) + if not opts.cache_only and answers then + self.cache:set(key, { ttl = answers.ttl }, answers) end - -- dereference CNAME - if opts.qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then - stats_count(self.stats, name, "cname") - return resolve_all(self, answers[1].cname, opts, tries) + else + local ctx = ngx.ctx + if ctx and ctx.has_timing then + req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", + (hit_level and hit_level < 3)) end - stats_count(self.stats, name, answers and "succ" or "fail") + stats_count(self.stats, name, hitstrs[hit_level]) + end - return answers, err, tries + -- dereference CNAME + if opts.qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then + stats_count(self.stats, name, "cname") + return resolve_all(self, answers[1].cname, opts, tries) + end + + stats_count(self.stats, name, answers and "succ" or "fail") + + return answers, err, tries end @@ -571,24 +574,23 @@ end -- `cache_only`: default `false`, retrieve data only from the internal cache -- `qtype`: specified query type instead of its own search types function _M:resolve(name, opts, tries) - name = name:lower() - opts = opts or {} - tries = setmetatable(tries or {}, tries_mt) + name = name:lower() + opts = opts or {} + tries = setmetatable(tries or {}, tries_mt) - local answers, err, tries = resolve_all(self, name, opts, tries) - if not answers or not opts.return_random then - return answers, err, tries - end + local answers, err, tries = resolve_all(self, name, opts, tries) + if not answers or not opts.return_random then + return answers, err, tries + end - -- option: return_random - if answers[1].type == TYPE_SRV then - local answer = utils.get_wrr_ans(answers) - opts.port = answer.port ~= 0 and answer.port or opts.port - -- TODO: SRV recursive name and target how to handle - return self:resolve(answer.target, opts, tries) - end + -- option: return_random + if answers[1].type == TYPE_SRV then + local answer = get_wrr_ans(answers) + opts.port = answer.port ~= 0 and answer.port or opts.port + return self:resolve(answer.target, opts, tries) + end - return utils.get_rr_ans(answers).address, opts.port, tries + return get_rr_ans(answers).address, opts.port, tries end @@ -597,19 +599,19 @@ end local dns_client function _M.init(opts) - opts = opts or {} - opts.valid_ttl = opts.validTtl - opts.error_ttl = opts.badTtl - opts.stale_ttl = opts.staleTtl - opts.cache_size = opts.cacheSize - - local client, err = _M.new(opts) - if not client then - return nil, err - end - - dns_client = client - return true + opts = opts or {} + opts.valid_ttl = opts.validTtl + opts.error_ttl = opts.badTtl + opts.stale_ttl = opts.staleTtl + opts.cache_size = opts.cacheSize + + local client, err = _M.new(opts) + if not client then + return nil, err + end + + dns_client = client + return true end @@ -617,46 +619,46 @@ end _M._resolve = _M.resolve function _M.resolve(name, r_opts, cache_only, tries) - local opts = { cache_only = cache_only } - return dns_client:_resolve(name, opts, tries) + local opts = { cache_only = cache_only } + return dns_client:_resolve(name, opts, tries) end function _M.toip(name, port, cache_only, tries) - local opts = { cache_only = cache_only, return_random = true , port = port } - return dns_client:_resolve(name, opts, tries) + local opts = { cache_only = cache_only, return_random = true , port = port } + return dns_client:_resolve(name, opts, tries) end -- For testing if package.loaded.busted then - function _M.getobj() - return dns_client - end - function _M.getcache() - return { - set = function(self, k, v, ttl) - self.cache:set(k, {ttl = ttl or 0}, v) - end, - delete = function(self, k) - self.cache:delete(k) - end, - cache = dns_client.cache, - } - end - function _M:insert_last_type(name, qtype) - insert_last_type(self.cache, name, qtype) - end - function _M:get_last_type(name) - return get_last_type(self.cache, name) - end - _M._init = _M.init - function _M.init(opts) - opts = opts or {} - opts.cache_purge = true - return _M._init(opts) - end + function _M.getobj() + return dns_client + end + function _M.getcache() + return { + set = function(self, k, v, ttl) + self.cache:set(k, {ttl = ttl or 0}, v) + end, + delete = function(self, k) + self.cache:delete(k) + end, + cache = dns_client.cache, + } + end + function _M:insert_last_type(name, qtype) + insert_last_type(self.cache, name, qtype) + end + function _M:get_last_type(name) + return get_last_type(self.cache, name) + end + _M._init = _M.init + function _M.init(opts) + opts = opts or {} + opts.cache_purge = true + return _M._init(opts) + end end diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 998d84a7cb44..06d023d9e989 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -1,14 +1,12 @@ --- vim: ts=4 sts=4 sw=4 et: - - local utils = require("kong.resty.dns.utils") -local pl_utils = require("pl.utils") -local math_random = math.random -local table_insert = table.insert -local table_remove = table.remove +local math_random = math.random +local table_insert = table.insert +local table_remove = table.remove -local DEFAULT_HOSTS_FILE = "/etc/hosts" +local readlines = require("pl.utils").readlines + +local DEFAULT_HOSTS_FILE = "/etc/hosts" local DEFAULT_RESOLV_CONF = "/etc/resolv.conf" @@ -18,14 +16,14 @@ local _M = {} -- checks the hostname type -- @return "ipv4", "ipv6", or "name" function _M.hostname_type(name) - local remainder, colons = name:gsub(":", "") - if colons > 1 then - return "ipv6" - end - if remainder:match("^[%d%.]+$") then - return "ipv4" - end - return "name" + local remainder, colons = name:gsub(":", "") + if colons > 1 then + return "ipv6" + end + if remainder:match("^[%d%.]+$") then + return "ipv4" + end + return "name" end @@ -34,201 +32,201 @@ end -- @param name the string to check (this may contain a port number) -- @return `name/ip` + `port (or nil)` + `type ("ipv4", "ipv6" or "name")` function _M.parse_hostname(name) - local t = _M.hostname_type(name) - if t == "ipv4" or t == "name" then - local ip, port = name:match("^([^:]+)%:*(%d*)$") - return ip, tonumber(port), t - end - -- ipv6 - if name:match("%[") then -- brackets, so possibly a port - local ip, port = name:match("^%[([^%]]+)%]*%:*(%d*)$") - return "[" .. ip .. "]", tonumber(port), t - end - return "[" .. name .. "]", nil, t -- no brackets also means no port + local t = _M.hostname_type(name) + if t == "ipv4" or t == "name" then + local ip, port = name:match("^([^:]+)%:*(%d*)$") + return ip, tonumber(port), t + end + -- ipv6 + if name:match("%[") then -- brackets, so possibly a port + local ip, port = name:match("^%[([^%]]+)%]*%:*(%d*)$") + return "[" .. ip .. "]", tonumber(port), t + end + return "[" .. name .. "]", nil, t -- no brackets also means no port end local function get_lines(path) - if type(path) == "table" then - return path - end - return pl_utils.readlines(path or DEFAULT_HOSTS_FILE) + if type(path) == "table" then + return path + end + return readlines(path or DEFAULT_HOSTS_FILE) end function _M.parse_hosts(path, enable_ipv6) - local lines, err = get_lines(path or DEFAULT_HOSTS_FILE) - if not lines then - return nil, err - end - - local hosts = {} - for _, line in ipairs(lines) do - -- Remove leading/trailing whitespaces and split by whitespace - local parts = {} - for part in line:gmatch("%S+") do - if part:sub(1, 1) == '#' then - break - end - table.insert(parts, part:lower()) - end - - -- Check if the line contains an IP address followed by hostnames - if #parts >= 2 then - local ip, _, family = _M.parse_hostname(parts[1]) - if family ~= "name" then -- ipv4/ipv6 - for i = 2, #parts do - local host = parts[i] - local v = hosts[host] - if not v then - v = {} - hosts[host] = v - end - v[family] = v[family] or ip -- prefer to use the first ip - end - end + local lines, err = get_lines(path or DEFAULT_HOSTS_FILE) + if not lines then + return nil, err + end + + local hosts = {} + for _, line in ipairs(lines) do + -- Remove leading/trailing whitespaces and split by whitespace + local parts = {} + for part in line:gmatch("%S+") do + if part:sub(1, 1) == '#' then + break + end + table.insert(parts, part:lower()) + end + + -- Check if the line contains an IP address followed by hostnames + if #parts >= 2 then + local ip, _, family = _M.parse_hostname(parts[1]) + if family ~= "name" then -- ipv4/ipv6 + for i = 2, #parts do + local host = parts[i] + local v = hosts[host] + if not v then + v = {} + hosts[host] = v + end + v[family] = v[family] or ip -- prefer to use the first ip end + end end - return hosts + end + return hosts end -- TODO: need to rewrite it instead of calling parseResolvConf function _M.parse_resolv_conf(path, enable_ipv6) - local resolv, err = utils.parseResolvConf(path or DEFAULT_RESOLV_CONF) - if not resolv then - return nil, err - end - resolv = utils.applyEnv(resolv) - resolv.options = resolv.options or {} - resolv.ndots = resolv.options.ndots or 1 - resolv.search = resolv.search or (resolv.domain and { resolv.domain }) - -- remove special domain like "." - if resolv.search then - for i = #resolv.search, 1, -1 do - if resolv.search[i] == "." then - table_remove(resolv.search, i) - end - end - end - -- nameservers - if resolv.nameserver then - local nameservers = {} - for _, address in ipairs(resolv.nameserver) do - local ip, port, t = utils.parseHostname(address) - if t == "ipv4" or - (t == "ipv6" and not ip:find([[%]], nil, true) and enable_ipv6) - then - table_insert(nameservers, port and { ip, port } or ip) - end - end - resolv.nameservers = nameservers - end - return resolv + local resolv, err = utils.parseResolvConf(path or DEFAULT_RESOLV_CONF) + if not resolv then + return nil, err + end + resolv = utils.applyEnv(resolv) + resolv.options = resolv.options or {} + resolv.ndots = resolv.options.ndots or 1 + resolv.search = resolv.search or (resolv.domain and { resolv.domain }) + -- remove special domain like "." + if resolv.search then + for i = #resolv.search, 1, -1 do + if resolv.search[i] == "." then + table_remove(resolv.search, i) + end + end + end + -- nameservers + if resolv.nameserver then + local nameservers = {} + for _, address in ipairs(resolv.nameserver) do + local ip, port, t = utils.parseHostname(address) + if t == "ipv4" or + (t == "ipv6" and not ip:find([[%]], nil, true) and enable_ipv6) then + table_insert(nameservers, port and { ip, port } or ip) + end + end + resolv.nameservers = nameservers + end + return resolv end function _M.is_fqdn(name, ndots) - local _, dot_count = name:gsub("%.", "") - return (dot_count >= ndots) or (name:sub(-1) == ".") + local _, dot_count = name:gsub("%.", "") + return (dot_count >= ndots) or (name:sub(-1) == ".") end -- construct names from resolv options: search, ndots and domain function _M.search_names(name, resolv, hosts) - if not resolv.search or _M.is_fqdn(name, resolv.ndots) then - return { name } - end - - local names = {} - for _, suffix in ipairs(resolv.search) do - table_insert(names, name .. "." .. suffix) - end - if hosts and hosts[name] then - table_insert(names, 1, name) - else - table_insert(names, name) - end - return names + if not resolv.search or _M.is_fqdn(name, resolv.ndots) then + return { name } + end + + local names = {} + for _, suffix in ipairs(resolv.search) do + table_insert(names, name .. "." .. suffix) + end + if hosts and hosts[name] then + table_insert(names, 1, name) + else + table_insert(names, name) + end + return names end function _M.ipv6_bracket(name) - if name:match("^[^[].*:") then -- not rigorous, but sufficient - return "[" .. name .. "]" - end - return name + if name:match("^[^[].*:") then -- not rigorous, but sufficient + return "[" .. name .. "]" + end + return name end -- util APIs to balance @answers function _M.get_rr_ans(answers) - answers.last = (answers.last or 0) % #answers + 1 - return answers[answers.last] + answers.last = (answers.last or 0) % #answers + 1 + return answers[answers.last] end -- based on the Nginx's SWRR algorithm and lua-resty-balancer local function swrr_next(answers) - local total = 0 - local best = nil -- best answer in answers[] - - for _, answer in ipairs(answers) do - local w = (answer.weight == 0) and 0.1 or answer.weight -- rfc 2782 - local cw = answer.cw + w - answer.cw = cw - if not best or cw > best.cw then - best = answer - end - total = total + w + local total = 0 + local best = nil -- best answer in answers[] + + for _, answer in ipairs(answers) do + local w = (answer.weight == 0) and 0.1 or answer.weight -- rfc 2782 + local cw = answer.cw + w + answer.cw = cw + if not best or cw > best.cw then + best = answer end + total = total + w + end - best.cw = best.cw - total - return best + best.cw = best.cw - total + return best end local function swrr_init(answers) - for _, answer in ipairs(answers) do - answer.cw = 0 -- current weight - end - -- random start - for _ = 1, math_random(#answers) do - swrr_next(answers) - end + for _, answer in ipairs(answers) do + answer.cw = 0 -- current weight + end + -- random start + for _ = 1, math_random(#answers) do + swrr_next(answers) + end end -- gather all records with the lowest priority into one array (answers.l) -- and return it local function filter_lowest_priority_answers(answers) - local lowest_priority = answers[1].priority - local l = {} -- lowest priority list - - for _, answer in ipairs(answers) do - if answer.priority < lowest_priority then - lowest_priority = answer.priority - l = { answer } - elseif answer.priority == lowest_priority then - table.insert(l, answer) - end + local lowest_priority = answers[1].priority + local l = {} -- lowest priority list + + for _, answer in ipairs(answers) do + if answer.priority < lowest_priority then + lowest_priority = answer.priority + l = { answer } + + elseif answer.priority == lowest_priority then + table.insert(l, answer) end + end - answers.l = l - return l + answers.l = l + return l end function _M.get_wrr_ans(answers) - local l = answers.l or filter_lowest_priority_answers(answers) + local l = answers.l or filter_lowest_priority_answers(answers) - -- perform round robin selection on lowest priority answers @l - if not l[1].cw then - swrr_init(l) - end + -- perform round robin selection on lowest priority answers @l + if not l[1].cw then + swrr_init(l) + end - return swrr_next(l) + return swrr_next(l) end From cb9293c48f9ee6f031faabeb0667549842b0ab0c Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 29 Feb 2024 15:58:22 +0800 Subject: [PATCH 017/126] coding style: change table.insert to table_insert --- kong/resty/dns_client/init.lua | 4 ++-- kong/resty/dns_client/utils.lua | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index bd646fd5ecbd..fac4dbe319af 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -301,7 +301,7 @@ local function process_answers(self, qname, qtype, answers) answer.target = ipv6_bracket(answer.target) end - table.insert(processed_answers, answer) + table_insert(processed_answers, answer) end if self.valid_ttl then @@ -467,7 +467,7 @@ local function get_search_types(self, name, qtype) qtype = get_last_type(self.cache, name) end if qtype and not checked_types[qtype] then - table.insert(types, qtype) + table_insert(types, qtype) checked_types[qtype] = true end end diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 06d023d9e989..bb4df2368a82 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -50,7 +50,7 @@ local function get_lines(path) if type(path) == "table" then return path end - return readlines(path or DEFAULT_HOSTS_FILE) + return readlines(path) end From 1340b979c34dfed510901be1e2aeeda9c752f432 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 29 Feb 2024 16:33:40 +0800 Subject: [PATCH 018/126] fix test case of stale updating task --- .../30-new-dns-client/03-old_client_cache_spec.lua | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 8346fbe209df..dfcdac83be80 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -1,4 +1,5 @@ --- This test case file originates from the old version of the DNS client and has -- been modified to adapt to the new version of the DNS client. +-- This test case file originates from the old version of the DNS client and has +-- been modified to adapt to the new version of the DNS client. local utils = require("kong.tools.utils") local _writefile = require("pl.utils").writefile @@ -259,11 +260,12 @@ describe("[DNS client cache]", function() sleep(0.1 + config.stale_ttl / 2) -- fresh result, but it should not affect answers2 - mock_records["myhost6.domain.com:"..resolver.TYPE_A][1].tag = "new" -- TODO flakyness + mock_records["myhost6.domain.com:"..resolver.TYPE_A][1].tag = "new" -- resolve again, now getting same record, but stale, this will trigger -- background refresh query local answers2 = cli:resolve("myhost6") + assert.falsy(answers2[1].tag) assert.is_true(answers2.expired) -- stale; marked as expired answers2.expired = nil assert_same_answers(answers2, answers) @@ -274,6 +276,8 @@ describe("[DNS client cache]", function() -- resolve and check whether we got the new record from the mock copy local answers3 = cli:resolve("myhost6") + assert.equal(answers3[1].tag, "new") + assert.falsy(answers3.expired) assert.not_equal(answers, answers3) -- must be a different record now assert_same_answers(answers3, mock_records["myhost6.domain.com:"..resolver.TYPE_A]) From 3b4f0c644371ac6ba97b3e2157a1b8faed3c2f39 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 29 Feb 2024 17:11:03 +0800 Subject: [PATCH 019/126] fix bug: should insert `nil` value as missed data into mlcache --- kong/resty/dns_client/init.lua | 7 +++---- .../01-unit/30-new-dns-client/03-old_client_cache_spec.lua | 4 ++-- spec/helpers/dns.lua | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index fac4dbe319af..2c284620e6ee 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -368,8 +368,7 @@ local function start_stale_update_task(self, key, name, qtype) local answers = resolve_query(self, name, qtype, {}) if answers and (not answers.errcode or answers.errcode == 3) then - self.cache:set(key, { ttl = answers.ttl }, - answers.errcode == 3 and nil or answers) + self.cache:set(key, { ttl = answers.ttl }, answers.errcode ~= 3 and answers or nil) insert_last_type(self.cache, name, qtype) end end) @@ -428,8 +427,8 @@ local function resolve_name_type(self, name, qtype, opts, tries) end local answers, err, hit_level = self.cache:get(key, nil, - resolve_name_type_callback, - self, name, qtype, opts, tries) + resolve_name_type_callback, + self, name, qtype, opts, tries) if err and err:sub(1, 8) == "callback" then log(ALERT, err) end diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index dfcdac83be80..7f657aa474b4 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -425,8 +425,8 @@ describe("[DNS client cache]", function() -- wait again for the background query to complete sleep(0.1) -- background resolve is now complete, check the cache, it should now have been - -- replaced by the name error - assert.equal(rec2, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) + -- replaced by `nil` value (the name error is saved into mlcache miss_shm) + assert.equal(nil, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) end) it("empty records do not replace stale records", function() diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 497e6047091d..154f7437b2d3 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -125,7 +125,6 @@ function _M.dnsA(client, records, staleTtl) -- create key, and insert it local key = records[1].name..":"..records[1].type - --dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) dnscache:set(key, records, records[1].ttl) -- insert last-succesful lookup type client.getobj():insert_last_type(records[1].name, records[1].type) From 0c3f595d7bbbca5b797fa512f46bb5dbcf4fd7ec Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 5 Mar 2024 12:04:51 +0800 Subject: [PATCH 020/126] simplify injecting resolver.query logic in tests 30-new-dns-client/* --- .../30-new-dns-client/02-old_client_spec.lua | 16 +++------------ .../03-old_client_cache_spec.lua | 20 +++---------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 8eccf857c2cb..e4dd497e30b4 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -86,22 +86,12 @@ describe("[DNS client]", function() package.loaded["resty.dns.resolver"] = nil resolver = require("resty.dns.resolver") - -- replace this `query_func` upvalue to spy on resolver query calls. + local original_query_func = resolver.query query_func = function(self, original_query_func, name, options) return original_query_func(self, name, options) end - - local old_new = resolver.new - resolver.new = function(...) - local r, err = old_new(...) - if not r then - return nil, err - end - local original_query_func = r.query - r.query = function(self, ...) - return query_func(self, original_query_func, ...) - end - return r + resolver.query = function(self, ...) + return query_func(self, original_query_func, ...) end -- restore its API overlapped by the compatible layer diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 7f657aa474b4..16dd4dc28560 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -86,23 +86,9 @@ describe("[DNS client cache]", function() -- inject r.query package.loaded["resty.dns.resolver"] = nil resolver = require("resty.dns.resolver") - - -- replace this `query_func` upvalue to spy on resolver query calls. - query_func = function(self, original_query_func, name, options) - return original_query_func(self, name, options) - end - - local old_new = resolver.new - resolver.new = function(...) - local r, err = old_new(...) - if not r then - return nil, err - end - local original_query_func = r.query - r.query = function(self, ...) - return query_func(self, original_query_func, ...) - end - return r + local original_query_func = resolver.query + resolver.query = function(self, ...) + return query_func(self, original_query_func, ...) end -- restore its API overlapped by the compatible layer From 904f8b3d0e3e29a905d2ec7e41c8654d6fa0523f Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 6 Mar 2024 11:49:29 +0800 Subject: [PATCH 021/126] optimize cache inserting logic to avoid unnecessary IPC to broadcast invalidation events --- kong/resty/dns_client/init.lua | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 2c284620e6ee..54bf35a35700 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -95,8 +95,9 @@ end -- lookup or set TYPE_LAST (the DNS record type from the last successful query) local function insert_last_type(cache, name, qtype) - if typstrs[qtype] then - cache:set("last:" .. name, { ttl = 0 }, qtype) + local key = "last:" .. name + if typstrs[qtype] and cache:get(key) ~= qtype then + cache:set(key, { ttl = 0 }, qtype) end end @@ -140,7 +141,10 @@ local function init_hosts(cache, path, preferred_ip_type) ttl = ttl, }, } - cache:set(key, { ttl = ttl }, answers) + -- insert via the `:get` callback to prevent inter-process communication + cache:get(key, nil, function() + return answers, nil, ttl + end) end for name, address in pairs(hosts) do @@ -161,6 +165,9 @@ local function init_hosts(cache, path, preferred_ip_type) end +-- distinguish the worker_events sources registered by different new() instances +local ipc_counter = 0 + function _M.new(opts) if not opts then return nil, "no options table specified" @@ -195,15 +202,15 @@ function _M.new(opts) exptimeout = lock_timeout + 1, } - local ipc_source = "dns_client_mlcache" + ipc_counter = ipc_counter + 1 + local ipc_source = "dns_client_mlcache#" .. ipc_counter local ipc = { register_listeners = function(events) if not kong or not kong.worker_events then return end for _, ev in pairs(events) do - kong.worker_events.register(function(data) ev.handler(data) end, - ipc_source, ev.channel) + kong.worker_events.register(ev.handler, ipc_source, ev.channel) end end, broadcast = function(channel, data) From 8877f9e58d3254f2eed774254f8e0e681b09deeb Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 29 Feb 2024 17:36:37 +0800 Subject: [PATCH 022/126] avoid running callback from local worker's events and add tests for IPC --- kong/resty/dns_client/init.lua | 8 +- .../30-new-dns-client/04-client_ipc_spec.lua | 56 ++++++++++++++ .../kong/plugins/dns-client-test/handler.lua | 74 +++++++++++++++++++ .../kong/plugins/dns-client-test/schema.lua | 12 +++ 4 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua create mode 100644 spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua create mode 100644 spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 54bf35a35700..0bbf3c2184e5 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -209,8 +209,14 @@ function _M.new(opts) if not kong or not kong.worker_events then return end + local cwid = ngx.worker.id() for _, ev in pairs(events) do - kong.worker_events.register(ev.handler, ipc_source, ev.channel) + local handler = function(data, event, source, wid) + if cwid ~= wid then + ev.handler(data) + end + end + kong.worker_events.register(handler, ipc_source, ev.channel) end end, broadcast = function(channel, data) diff --git a/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua b/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua new file mode 100644 index 000000000000..441c9958fb35 --- /dev/null +++ b/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua @@ -0,0 +1,56 @@ +local helpers = require "spec.helpers" +local pl_file = require "pl.file" + + +local function count_log_lines(pattern) + local cfg = helpers.test_conf + local logs = pl_file.read(cfg.prefix .. "/" .. cfg.proxy_error_log) + local _, count = logs:gsub(pattern, "") + return count +end + + +describe("[dns-client] inter-process communication:",function() + local num_workers = 2 + + setup(function() + local bp = helpers.get_db_utils("postgres", { + "routes", + "services", + "plugins", + }, { + "dns-client-test", + }) + + bp.plugins:insert { + name = "dns-client-test", + } + + assert(helpers.start_kong({ + nginx_conf = "spec/fixtures/custom_nginx.template", + plugins = "bundled,dns-client-test", + nginx_main_worker_processes = num_workers, + })) + end) + + teardown(function() + helpers.stop_kong() + end) + + it("stale updating task broadcast events", function() + helpers.wait_until(function() + return count_log_lines("DNS query completed") == num_workers + end, 5) + + assert.same(count_log_lines("first:query:ipc.com"), 1) + assert.same(count_log_lines("first:answers:1.2.3.4"), num_workers) + + assert.same(count_log_lines("stale:query:ipc.com"), 1) + assert.same(count_log_lines("stale:answers:1.2.3.4."), num_workers) + + assert.same(count_log_lines("stale:broadcast:ipc.com:33"), 1) + -- "stale:lru ..." means the progress of the two workers is about the same. + -- "first:lru ..." means one of the workers is far behind the other. + assert.same(count_log_lines(":lru delete:ipc.com:33"), 1) + end) +end) diff --git a/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua new file mode 100644 index 000000000000..1cdfb2021cd9 --- /dev/null +++ b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua @@ -0,0 +1,74 @@ +-- The test case 04-client_ipc_spec.lua will load this plugin and check its +-- generated error logs. + +local DnsClientTestHandler = { + VERSION = "1.0", + PRIORITY = 1000, +} + + +local log = ngx.log +local ERR = ngx.ERR +local PRE = "dns-client-test:" + + +local function test() + local phase = "" + local host = "ipc.com" + + -- inject resolver.query + require("resty.dns.resolver").query = function(self, name, opts) + log(ERR, PRE, phase, "query:", name) + return {{ + type = opts.qtype, + address = "1.2.3.4", + target = "1.2.3.4", + class = 1, + name = name, + ttl = 0.1, + }} + end + + local dns_client = require("kong.tools.dns")() + local cli = dns_client.new({}) + + -- inject broadcast + local orig_broadcast = cli.cache.broadcast + cli.cache.broadcast = function(channel, data) + log(ERR, PRE, phase, "broadcast:", data) + orig_broadcast(channel, data) + end + + -- inject lrucahce.delete + local orig_delete = cli.cache.lru.delete + cli.cache.lru.delete = function(self, key) + log(ERR, PRE, phase, "lru delete:", key) + orig_delete(self, key) + end + + -- phase 1: two processes try to get answers and trigger only one query + phase = "first:" + local answers = cli:_resolve(host) + log(ERR, PRE, phase, "answers:", answers[1].address) + + -- wait records to be stale + ngx.sleep(0.5) + + -- phase 2: get the stale record and trigger only one stale-updating task, + -- the stale-updating task will update the record and broadcast + -- the lru cache invalidation event to other workers + phase = "stale:" + local answers = cli:_resolve(host) + log(ERR, PRE, phase, "answers:", answers[1].address) + + -- tests end + log(ERR, PRE, "DNS query completed") +end + + +function DnsClientTestHandler:init_worker() + ngx.timer.at(0, test) +end + + +return DnsClientTestHandler diff --git a/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua new file mode 100644 index 000000000000..8b6c80ad59e7 --- /dev/null +++ b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua @@ -0,0 +1,12 @@ +return { + name = "dns-client-test", + fields = { + { + config = { + type = "record", + fields = { + }, + }, + }, + }, +} From 0338f1b98ab796a86985eeae93578ffdbed463e6 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 7 Mar 2024 18:06:43 +0800 Subject: [PATCH 023/126] coding style fix and keep the error string consistent with the previous version --- kong/resty/dns_client/init.lua | 75 ++++++++++++------- kong/resty/dns_client/utils.lua | 13 ++-- kong/runloop/balancer/init.lua | 1 - .../30-new-dns-client/01-utils_spec.lua | 18 ++--- .../30-new-dns-client/02-old_client_spec.lua | 10 +-- 5 files changed, 69 insertions(+), 48 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 0bbf3c2184e5..380919c99956 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -3,11 +3,11 @@ local utils = require("kong.resty.dns_client.utils") local mlcache = require("kong.resty.mlcache") local resolver = require("resty.dns.resolver") -local get_rr_ans = utils.get_rr_ans -local get_wrr_ans = utils.get_wrr_ans local parse_hosts = utils.parse_hosts local ipv6_bracket = utils.ipv6_bracket local search_names = utils.search_names +local get_round_robin_answers = utils.get_round_robin_answers +local get_weighted_round_robin_answers = utils.get_weighted_round_robin_answers local now = ngx.now local log = ngx.log @@ -52,17 +52,24 @@ local typstrs = { [TYPE_CNAME] = "CNAME", } +local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale + local hitstrs = { [1] = "hit_lru", [2] = "hit_shm", + [3] = "hit_cb", + [4] = "hit_stale", } -local errstrs = { -- client specific errors - [100] = "cache only lookup failed", - [101] = "no available records", -} - -local EMPTY_ANSWERS = { errcode = 3, errstr = "name error" } +-- server replied error from the DNS protocol +local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" +local NAME_ERROR_ANSWERS = { errcode = NAME_ERROR_CODE, errstr = "name error" } +-- client specific error +local CACHE_ONLY_EC = 100 +local CACHE_ONLY_ESTR = "cache only lookup failed" +local CACHE_ONLY_ANSWERS = { errcode = CACHE_ONLY_EC, errstr = CACHE_ONLY_ESTR } +local EMPTY_RECORD_EC = 101 +local EMPTY_RECORD_ESTR = "empty record received" -- APIs @@ -206,6 +213,8 @@ function _M.new(opts) local ipc_source = "dns_client_mlcache#" .. ipc_counter local ipc = { register_listeners = function(events) + -- The DNS client library will be required in globalpatches before Kong + -- initializes worker_events. if not kong or not kong.worker_events then return end @@ -289,7 +298,7 @@ end local function process_answers(self, qname, qtype, answers) local errcode = answers.errcode if errcode then - answers.ttl = errcode == 3 and self.empty_ttl or self.error_ttl + answers.ttl = errcode == NAME_ERROR_CODE and self.empty_ttl or self.error_ttl -- compatible with balancer, which needs this field answers.expire = now() + answers.ttl return answers @@ -327,9 +336,9 @@ local function process_answers(self, qname, qtype, answers) if #processed_answers == 0 then if not cname_answer then return { - errcode = 101, - errstr = errstrs[101], - ttl = self.empty_ttl, + errcode = EMPTY_RECORD_EC, + errstr = EMPTY_RECORD_ESTR, + ttl = self.empty_ttl, -- expire = now() + self.empty_ttl, } end @@ -337,8 +346,8 @@ local function process_answers(self, qname, qtype, answers) table_insert(processed_answers, cname_answer) end - processed_answers.ttl = ttl processed_answers.expire = now() + ttl + processed_answers.ttl = ttl return processed_answers end @@ -377,11 +386,14 @@ local function start_stale_update_task(self, key, name, qtype) stats_count(self.stats, key, "stale") timer_at(0, function (premature) - if premature then return end + if premature then + return + end local answers = resolve_query(self, name, qtype, {}) - if answers and (not answers.errcode or answers.errcode == 3) then - self.cache:set(key, { ttl = answers.ttl }, answers.errcode ~= 3 and answers or nil) + if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then + self.cache:set(key, { ttl = answers.ttl }, + answers.errcode ~= NAME_ERROR_CODE and answers or nil) insert_last_type(self.cache, name, qtype) end end) @@ -391,6 +403,8 @@ end local function resolve_name_type_callback(self, name, qtype, opts, tries) local key = name .. ":" .. qtype + -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then + -- initiates an asynchronous background updating task to refresh it. local ttl, _, answers = self.cache:peek(key, true) if answers and ttl and not answers.expired then ttl = ttl + self.stale_ttl @@ -404,12 +418,12 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) end if opts.cache_only then - return { errcode = 100, errstr = errstrs[100] }, nil, -1 + return CACHE_ONLY_MISS_ANSWERS, nil, -1 end local answers, err, ttl = resolve_query(self, name, qtype, tries) - if answers and answers.errcode == 3 then + if answers and answers.errcode == NAME_ERROR_CODE then return nil -- empty record for shm_miss cache end @@ -442,26 +456,32 @@ local function resolve_name_type(self, name, qtype, opts, tries) local answers, err, hit_level = self.cache:get(key, nil, resolve_name_type_callback, self, name, qtype, opts, tries) + -- check for runtime errors in the callback if err and err:sub(1, 8) == "callback" then log(ALERT, err) end + -- restore the nil value in mlcache shm_miss to "name error" answers if not answers and not err then - answers = EMPTY_ANSWERS + answers = NAME_ERROR_ANSWERS end local ctx = ngx.ctx if ctx and ctx.has_timing then req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", - (hit_level and hit_level < 3)) + (hit_level and hit_level < HIT_L3)) end - if hit_level and hit_level < 3 then + -- hit L1 lru or L2 shm + if hit_level and hit_level < HIT_L3 then stats_count(self.stats, key, hitstrs[hit_level]) end if err or answers.errcode then - err = err or ("dns server error: %s %s"):format(answers.errcode, answers.errstr) + if not err then + local src = answers.errcode < CACHE_ONLY_EC and "server" or "client" + err = ("dns %s error: %s %s"):format(src, answers.errcode, answers.errstr) + end table_insert(tries, { name .. ":" .. typstrs[qtype], err }) end @@ -509,6 +529,8 @@ local function resolve_names_and_types(self, name, opts, tries) return answers, nil, tries end + -- TODO: For better performance, it may be necessary to rewrite it as an + -- iterative function. local types = get_search_types(self, name, opts.qtype) local names = search_names(name, self.resolv, self.hosts) @@ -535,6 +557,7 @@ end local function resolve_all(self, name, opts, tries) + -- key like "short:example.com:all" or "short:example.com:5" local key = "short:" .. name .. ":" .. (opts.qtype or "all") stats_init(self.stats, name) @@ -545,7 +568,7 @@ local function resolve_all(self, name, opts, tries) return nil, "recursion detected for name: " .. name end - -- quickly lookup with the key `short::all` or `short::` + -- quickly lookup with the key "short::all" or "short::" local answers, err, hit_level = self.cache:get(key) if not answers or answers.expired then stats_count(self.stats, name, "miss") @@ -559,7 +582,7 @@ local function resolve_all(self, name, opts, tries) local ctx = ngx.ctx if ctx and ctx.has_timing then req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", - (hit_level and hit_level < 3)) + (hit_level and hit_level < HIT_L3)) end stats_count(self.stats, name, hitstrs[hit_level]) @@ -597,12 +620,12 @@ function _M:resolve(name, opts, tries) -- option: return_random if answers[1].type == TYPE_SRV then - local answer = get_wrr_ans(answers) + local answer = get_weighted_round_robin_answers(answers) opts.port = answer.port ~= 0 and answer.port or opts.port return self:resolve(answer.target, opts, tries) end - return get_rr_ans(answers).address, opts.port, tries + return get_round_robin_answers(answers).address, opts.port, tries end diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index bb4df2368a82..1aa8b2084d25 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -160,7 +160,7 @@ end -- util APIs to balance @answers -function _M.get_rr_ans(answers) +function _M.get_round_robin_answers(answers) answers.last = (answers.last or 0) % #answers + 1 return answers[answers.last] end @@ -197,11 +197,10 @@ local function swrr_init(answers) end --- gather all records with the lowest priority into one array (answers.l) --- and return it +-- gather records with the lowest priority in SRV record local function filter_lowest_priority_answers(answers) local lowest_priority = answers[1].priority - local l = {} -- lowest priority list + local l = {} -- lowest priority records list for _, answer in ipairs(answers) do if answer.priority < lowest_priority then @@ -213,13 +212,13 @@ local function filter_lowest_priority_answers(answers) end end - answers.l = l + answers.lowest_prio_records = l return l end -function _M.get_wrr_ans(answers) - local l = answers.l or filter_lowest_priority_answers(answers) +function _M.get_weighted_round_robin_answers(answers) + local l = answers.lowest_prio_records or filter_lowest_priority_answers(answers) -- perform round robin selection on lowest priority answers @l if not l[1].cw then diff --git a/kong/runloop/balancer/init.lua b/kong/runloop/balancer/init.lua index 94caf967dfaf..550c1055d84e 100644 --- a/kong/runloop/balancer/init.lua +++ b/kong/runloop/balancer/init.lua @@ -371,7 +371,6 @@ local function execute(balancer_data, ctx) if not ip then log(ERR, "DNS resolution failed: ", port, ". Tried: ", tostring(try_list)) if port == "dns server error: 3 name error" or - port == "dns server error: 101 no available records" or port == "dns client error: 101 empty record received" then return nil, "name resolution failed", 503 end diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index 2a4ead69bfbe..368adb5f084d 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -100,7 +100,7 @@ describe("[utils]", function () { target = "3" }, -- 25% { target = "4" }, -- 25% } - local count = get_and_count(answers, 100, utils.get_rr_ans) + local count = get_and_count(answers, 100, utils.get_round_robin_answers) assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) end) @@ -109,7 +109,7 @@ describe("[utils]", function () local answers = { { target = "w5-p10-a", weight = 5, priority = 10, }, -- hit 100% } - local count = get_and_count(answers, 20, utils.get_wrr_ans) + local count = get_and_count(answers, 20, utils.get_weighted_round_robin_answers) assert.same(count, { ["w5-p10-a"] = 20 }) -- only get the lowest priority @@ -119,7 +119,7 @@ describe("[utils]", function () { target = "w5-p10-b", weight = 5, priority = 10, }, -- hit 50% { target = "w0-p10", weight = 0, priority = 10, }, -- hit 0% } - local count = get_and_count(answers, 20, utils.get_wrr_ans) + local count = get_and_count(answers, 20, utils.get_weighted_round_robin_answers) assert.same(count, { ["w5-p10-a"] = 10, ["w5-p10-b"] = 10 }) -- weight: 6, 3, 1 @@ -128,7 +128,7 @@ describe("[utils]", function () { target = "w3", weight = 3, priority = 10, }, -- hit 30% { target = "w1", weight = 1, priority = 10, }, -- hit 10% } - local count = get_and_count(answers, 100 * 1000, utils.get_wrr_ans) + local count = get_and_count(answers, 100 * 1000, utils.get_weighted_round_robin_answers) assert.same(count, { ["w6"] = 60000, ["w3"] = 30000, ["w1"] = 10000 }) -- random start @@ -146,8 +146,8 @@ describe("[utils]", function () { target = "4", weight = 1, priority = 10, }, } - local a1 = utils.get_wrr_ans(answers1) - local a2 = utils.get_wrr_ans(answers2) + local a1 = utils.get_weighted_round_robin_answers(answers1) + local a2 = utils.get_weighted_round_robin_answers(answers2) assert.not_equal(a1.target, a2.target) -- weight 0 as 0.1 @@ -157,7 +157,7 @@ describe("[utils]", function () { target = "w2", weight = 0, priority = 10, }, { target = "w3", weight = 0, priority = 10, }, } - local count = get_and_count(answers, 100, utils.get_wrr_ans) + local count = get_and_count(answers, 100, utils.get_weighted_round_robin_answers) assert.same(count, { ["w0"] = 7, ["w1"] = 77, ["w2"] = 8, ["w3"] = 8 }) -- weight 0 and lowest priority @@ -167,7 +167,7 @@ describe("[utils]", function () { target = "w0-b", weight = 0, priority = 0, }, { target = "w0-c", weight = 0, priority = 0, }, } - local count = get_and_count(answers, 100, utils.get_wrr_ans) + local count = get_and_count(answers, 100, utils.get_weighted_round_robin_answers) assert.same(count["w1"], nil) -- all weights are 0 @@ -177,7 +177,7 @@ describe("[utils]", function () { target = "3", weight = 0, priority = 10, }, { target = "4", weight = 0, priority = 10, }, } - local count = get_and_count(answers, 100, utils.get_wrr_ans) + local count = get_and_count(answers, 100, utils.get_weighted_round_robin_answers) assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) end) end) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index e4dd497e30b4..7c8f659eed00 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -193,7 +193,7 @@ describe("[DNS client]", function() local answers, err = cli:resolve("host") assert.same(answers, nil) - assert.same(err, "dns server error: 101 no available records") + assert.same(err, "dns client error: 101 empty record received") assert.same({ 'host.one.com:33', 'host.two.com:33', @@ -222,7 +222,7 @@ describe("[DNS client]", function() local answers, err = cli:resolve("host") assert.same(answers, nil) - assert.same(err, "dns server error: 101 no available records") + assert.same(err, "dns client error: 101 empty record received") assert.same({ 'host:33', 'host:1', @@ -243,7 +243,7 @@ describe("[DNS client]", function() local answers, err = cli:resolve("host") assert.same(answers, nil) - assert.same(err, "dns server error: 101 no available records") + assert.same(err, "dns client error: 101 empty record received") assert.same({ 'host.local.domain.com:33', 'host:33', @@ -720,7 +720,7 @@ describe("[DNS client]", function() }, ["kong-gateway-testing.link:33"] = { query = 1, - ["query_err:no available records"] = 1 + ["query_err:empty record received"] = 1 }, ["smtp.kong-gateway-testing.link"] = { cname = 1, @@ -795,7 +795,7 @@ describe("[DNS client]", function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers, err = cli:resolve(host, { qtype = typ }) assert.is_nil(answers) -- returns nil - assert.equal("dns server error: 101 no available records", err) + assert.equal("dns client error: 101 empty record received", err) end) it("fetching non-existing answerss", function() From a8cc8927bdf1977fd5ec97b0b56b9c2984e4b0be Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 7 Mar 2024 18:07:59 +0800 Subject: [PATCH 024/126] fix shared_dict shm size --- spec/fixtures/shared_dict.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/fixtures/shared_dict.lua b/spec/fixtures/shared_dict.lua index 563066fb088f..26f2c4edecba 100644 --- a/spec/fixtures/shared_dict.lua +++ b/spec/fixtures/shared_dict.lua @@ -13,8 +13,8 @@ local dicts = { "kong_db_cache_2 16m", "kong_db_cache_miss 12m", "kong_db_cache_miss_2 12m", - "kong_dns_cache 10m", - "kong_dns_cache_miss 10m", + "kong_dns_cache 12m", + "kong_dns_cache_miss 5m", "kong_mock_upstream_loggers 10m", "kong_secrets 5m", "test_vault 5m", From c7f8cba095b21b14a61a148216ef2c75259292ef Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 7 Mar 2024 18:11:28 +0800 Subject: [PATCH 025/126] fix typo: CACHE_ONLY_MISS_ANSWERS -> CACHE_ONLY_ANSWERS --- kong/resty/dns_client/init.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 380919c99956..240f22725bef 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -418,7 +418,7 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) end if opts.cache_only then - return CACHE_ONLY_MISS_ANSWERS, nil, -1 + return CACHE_ONLY_ANSWERS, nil, -1 end local answers, err, ttl = resolve_query(self, name, qtype, tries) From cb3b499cc104d68e7dde154867c3dbba1d8fdf7b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 7 Mar 2024 20:37:04 +0800 Subject: [PATCH 026/126] fix test case: 01-request-debug_spec.lua: dns cache hit --- kong/resty/dns_client/init.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 240f22725bef..c38085c5dbef 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -468,7 +468,7 @@ local function resolve_name_type(self, name, qtype, opts, tries) local ctx = ngx.ctx if ctx and ctx.has_timing then - req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", + req_dyn_hook_run_hooks("timing", "dns:cache_lookup", (hit_level and hit_level < HIT_L3)) end @@ -581,7 +581,7 @@ local function resolve_all(self, name, opts, tries) else local ctx = ngx.ctx if ctx and ctx.has_timing then - req_dyn_hook_run_hooks(ctx, "timing", "dns:cache_lookup", + req_dyn_hook_run_hooks("timing", "dns:cache_lookup", (hit_level and hit_level < HIT_L3)) end From 50e253c7ab7f8f24bdbb430cc4f79b87dd81e197 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 14:58:46 +0800 Subject: [PATCH 027/126] copy the provided opts table with new function copy_options --- kong/resty/dns_client/init.lua | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index c38085c5dbef..1d5b1b2f7997 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -3,12 +3,6 @@ local utils = require("kong.resty.dns_client.utils") local mlcache = require("kong.resty.mlcache") local resolver = require("resty.dns.resolver") -local parse_hosts = utils.parse_hosts -local ipv6_bracket = utils.ipv6_bracket -local search_names = utils.search_names -local get_round_robin_answers = utils.get_round_robin_answers -local get_weighted_round_robin_answers = utils.get_weighted_round_robin_answers - local now = ngx.now local log = ngx.log local ERR = ngx.ERR @@ -22,7 +16,14 @@ local ipairs = ipairs local math_min = math.min local table_insert = table.insert +local parse_hosts = utils.parse_hosts +local ipv6_bracket = utils.ipv6_bracket +local search_names = utils.search_names +local get_round_robin_answers = utils.get_round_robin_answers +local get_weighted_round_robin_answers = utils.get_weighted_round_robin_answers + local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks +local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy -- Constants and default values local DEFAULT_ERROR_TTL = 1 -- unit: second @@ -431,12 +432,9 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) end +-- detect circular references in DNS CNAME or SRV records local function detect_recursion(opts, key) local rn = opts.resolved_names - if not rn then - rn = {} - opts.resolved_names = rn - end local detected = rn[key] rn[key] = true return detected @@ -600,6 +598,16 @@ local function resolve_all(self, name, opts, tries) end +local function copy_options(opts) + if opts.resolved_names then + return opts + end + opts = cycle_aware_deep_copy(opts) + opts.resolved_names = {} -- for detecting circular references in DNS records + return opts +end + + -- resolve all `name`s and `type`s combinations and return first usable answers -- `name`s: produced by resolv.conf options: `search`, `ndots` and `domain` -- `type`s: SRV, A, AAAA, CNAME @@ -610,7 +618,7 @@ end -- `qtype`: specified query type instead of its own search types function _M:resolve(name, opts, tries) name = name:lower() - opts = opts or {} + opts = copy_options(opts or {}) tries = setmetatable(tries or {}, tries_mt) local answers, err, tries = resolve_all(self, name, opts, tries) From 1812538b531904919e0ca576d18174fe26c2c6d1 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 15:15:07 +0800 Subject: [PATCH 028/126] create timer using a static function instead of recreating closures --- kong/resty/dns_client/init.lua | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 1d5b1b2f7997..e61bcd49b33d 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -383,21 +383,27 @@ local function resolve_query(self, name, qtype, tries) end +local function stale_update_task(premature, self, key, name, qtype) + if premature then + return + end + + local answers = resolve_query(self, name, qtype, {}) + if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then + self.cache:set(key, { ttl = answers.ttl }, + answers.errcode ~= NAME_ERROR_CODE and answers or nil) + insert_last_type(self.cache, name, qtype) + end +end + + local function start_stale_update_task(self, key, name, qtype) stats_count(self.stats, key, "stale") - timer_at(0, function (premature) - if premature then - return - end - - local answers = resolve_query(self, name, qtype, {}) - if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then - self.cache:set(key, { ttl = answers.ttl }, - answers.errcode ~= NAME_ERROR_CODE and answers or nil) - insert_last_type(self.cache, name, qtype) - end - end) + local ok, err = timer_at(0, stale_update_task, self, key, name, qtype) + if not ok then + log(ALERT, "failed to start a timer for update a stale DNS record: ", err) + end end From 4a954c9ea2d81a52c617416b6b6683b2cbc2fc29 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 15:34:41 +0800 Subject: [PATCH 029/126] add constant LONG_LASTING_TTL for 10 years ttl value --- kong/resty/dns_client/init.lua | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index e61bcd49b33d..13914faffef7 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -25,10 +25,14 @@ local get_weighted_round_robin_answers = utils.get_weighted_round_robin_answers local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy + -- Constants and default values + local DEFAULT_ERROR_TTL = 1 -- unit: second local DEFAULT_STALE_TTL = 4 local DEFAULT_EMPTY_TTL = 30 +-- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings +local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } @@ -74,6 +78,7 @@ local EMPTY_RECORD_ESTR = "empty record received" -- APIs + local _M = {} local mt = { __index = _M } @@ -135,7 +140,7 @@ local function init_hosts(cache, path, preferred_ip_type) return end - local ttl = 10 * 365 * 24 * 60 * 60 -- 10 years ttl for hosts entries + local ttl = LONG_LASTING_TTL local key = name .. ":" .. qtype local answers = { @@ -528,7 +533,7 @@ end local function resolve_names_and_types(self, name, opts, tries) local answers = check_and_get_ip_answers(name) if answers then - answers.ttl = 10 * 365 * 24 * 60 * 60 + answers.ttl = LONG_LASTING_TTL answers.expire = now() + answers.ttl return answers, nil, tries end From 92b40df61dcaf4a27094d956e82dc6c6badf2e9b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 15:40:59 +0800 Subject: [PATCH 030/126] add comment for maximum TTL value: 0xffffffff --- kong/resty/dns_client/init.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 13914faffef7..e11c473fc24b 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -313,7 +313,7 @@ local function process_answers(self, qname, qtype, answers) local processed_answers = {} local cname_answer - local ttl = self.valid_ttl or 0xffffffff + local ttl = self.valid_ttl or 0xffffffff -- 0xffffffff for maximum TTL value for _, answer in ipairs(answers) do answer.name = answer.name:lower() From 7acc0e0a79c22a4ea532731460396e41c4663659 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 15:53:34 +0800 Subject: [PATCH 031/126] fix coding styles and add more comments --- kong/resty/dns_client/utils.lua | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 1aa8b2084d25..a0ff315e88af 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -68,7 +68,7 @@ function _M.parse_hosts(path, enable_ipv6) if part:sub(1, 1) == '#' then break end - table.insert(parts, part:lower()) + table_insert(parts, part:lower()) end -- Check if the line contains an IP address followed by hostnames @@ -115,7 +115,8 @@ function _M.parse_resolv_conf(path, enable_ipv6) for _, address in ipairs(resolv.nameserver) do local ip, port, t = utils.parseHostname(address) if t == "ipv4" or - (t == "ipv6" and not ip:find([[%]], nil, true) and enable_ipv6) then + (t == "ipv6" and not ip:find([[%]], nil, true) and enable_ipv6) + then table_insert(nameservers, port and { ip, port } or ip) end end @@ -172,7 +173,8 @@ local function swrr_next(answers) local best = nil -- best answer in answers[] for _, answer in ipairs(answers) do - local w = (answer.weight == 0) and 0.1 or answer.weight -- rfc 2782 + -- 0.1 gives weight 0 record a minimal chance of being chosen (rfc 2782) + local w = (answer.weight == 0) and 0.1 or answer.weight local cw = answer.cw + w answer.cw = cw if not best or cw > best.cw then @@ -208,7 +210,7 @@ local function filter_lowest_priority_answers(answers) l = { answer } elseif answer.priority == lowest_priority then - table.insert(l, answer) + table_insert(l, answer) end end From c57a44938bcfc79fdeb5877a76210e74c16b4259 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 16:10:03 +0800 Subject: [PATCH 032/126] add comment for sleep(0.2) in 04-round_robin_spec.lua --- spec/01-unit/09-balancer/04-round_robin_spec.lua | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spec/01-unit/09-balancer/04-round_robin_spec.lua b/spec/01-unit/09-balancer/04-round_robin_spec.lua index 90b74f99e00b..1a34deda53f5 100644 --- a/spec/01-unit/09-balancer/04-round_robin_spec.lua +++ b/spec/01-unit/09-balancer/04-round_robin_spec.lua @@ -625,7 +625,7 @@ describe("[round robin balancer]", function() wheelSize = 10, }) record.expire = gettime() - 1 -- expire current dns cache record - sleep(0.2) + sleep(0.2) -- wait for record expiration dnsA({ -- create a new record { name = "mashape.test", address = "5.6.7.8" }, }) @@ -1040,7 +1040,7 @@ describe("[round robin balancer]", function() -- expire the existing record record.expire = 0 record.expired = true - sleep(0.2) + sleep(0.2) -- wait for record expiration -- do a lookup to trigger the async lookup client.resolve("really.really.really.does.not.exist.hostname.test", {qtype = client.TYPE_A}) sleep(0.5) -- provide time for async lookup to complete @@ -1120,7 +1120,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record - sleep(0.2) + sleep(0.2) -- wait for record expiration dnsA({ -- create a new record (identical) { name = "mashape.test", address = "1.2.3.4" }, @@ -1153,7 +1153,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record - sleep(0.2) + sleep(0.2) -- wait for record expiration dnsAAAA({ -- create a new record (identical) { name = "mashape.test", address = "::1" }, { name = "mashape.test", address = "::2" }, @@ -1185,7 +1185,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record - sleep(0.2) + sleep(0.2) -- wait for record expiration dnsSRV({ -- create a new record (identical) { name = "gelato.test", target = "1.2.3.6", port = 8001, weight = 5 }, { name = "gelato.test", target = "1.2.3.6", port = 8002, weight = 5 }, @@ -1218,7 +1218,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record - sleep(0.2) + sleep(0.2) -- wait for record expiration dnsA({ -- insert an updated record { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.6" }, -- target updated @@ -1260,7 +1260,7 @@ describe("[round robin balancer]", function() }, }) record.expire = gettime() -1 -- expire current dns cache record - sleep(0.2) + sleep(0.2) -- wait for record expiration -- run entire wheel to make sure the expired one is requested, so it can fail for _ = 1, b.wheelSize do b:getPeer() end -- the only indice is now getkong.test @@ -1352,7 +1352,7 @@ describe("[round robin balancer]", function() local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record - sleep(0.2) + sleep(0.2) -- wait for record expiration dnsA({ -- create a new record (identical) { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.5" }, From 89882ddb648b26521c54c87e2b5193089bba82c7 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 16:16:25 +0800 Subject: [PATCH 033/126] coding style: removed unnecessary blank line in 04-round_robin_spec.lua --- spec/01-unit/09-balancer/04-round_robin_spec.lua | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/01-unit/09-balancer/04-round_robin_spec.lua b/spec/01-unit/09-balancer/04-round_robin_spec.lua index 1a34deda53f5..edc5ef811771 100644 --- a/spec/01-unit/09-balancer/04-round_robin_spec.lua +++ b/spec/01-unit/09-balancer/04-round_robin_spec.lua @@ -1121,7 +1121,6 @@ describe("[round robin balancer]", function() local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record sleep(0.2) -- wait for record expiration - dnsA({ -- create a new record (identical) { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.5" }, From de35aff3ca3d778a7f032d6a376fde7b9467a47f Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 8 Mar 2024 17:55:03 +0800 Subject: [PATCH 034/126] fixed flakiness of stale updating test case in 03-old_client_cache_spec.lua --- .../30-new-dns-client/03-old_client_cache_spec.lua | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 16dd4dc28560..dc29bca1c6a6 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -221,7 +221,8 @@ describe("[DNS client cache]", function() assert_same_answers(mock_records["myhost5.domain.com:"..resolver.TYPE_CNAME], answers) end) - it("ttl in cache is honored for short name entries", function() + it("ttl in cache is honored for short name entries #ttt", function() + local ttl = 0.2 -- in the short name case the same record is inserted again in the cache -- and the lru-ttl has to be calculated, make sure it is correct mock_records = { @@ -230,7 +231,7 @@ describe("[DNS client cache]", function() address = "1.2.3.4", class = 1, name = "myhost6.domain.com", - ttl = 0.1, + ttl = ttl, }} } local mock_copy = utils.cycle_aware_deep_copy(mock_records) @@ -243,7 +244,7 @@ describe("[DNS client cache]", function() mock_records = mock_copy -- wait for expiring - sleep(0.1 + config.stale_ttl / 2) + sleep(ttl + config.stale_ttl / 2) -- fresh result, but it should not affect answers2 mock_records["myhost6.domain.com:"..resolver.TYPE_A][1].tag = "new" @@ -257,8 +258,9 @@ describe("[DNS client cache]", function() assert_same_answers(answers2, answers) answers2.expired = true - -- wait for refresh to complete - sleep(0.1) + -- wait for the refresh to complete. Ensure that the sleeping time is less + -- than ttl, avoiding the updated record from becoming stale again. + sleep(ttl / 2) -- resolve and check whether we got the new record from the mock copy local answers3 = cli:resolve("myhost6") From 49846e36786e90d0266723fe887fa2843f03e66a Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 12 Mar 2024 14:48:54 +0800 Subject: [PATCH 035/126] fix error message and update test case titles --- kong/resty/dns_client/init.lua | 2 +- spec/01-unit/30-new-dns-client/01-utils_spec.lua | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index e11c473fc24b..f232402e0943 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -407,7 +407,7 @@ local function start_stale_update_task(self, key, name, qtype) local ok, err = timer_at(0, stale_update_task, self, key, name, qtype) if not ok then - log(ALERT, "failed to start a timer for update a stale DNS record: ", err) + log(ALERT, "failed to start a timer to update stale DNS records: ", err) end end diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index 368adb5f084d..05e843e95342 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -41,7 +41,7 @@ describe("[utils]", function () assert.same(names, { "www.example.com." }) end) - it("name dots number >= ndots, not apply the search list", function () + it("dots number in the name >= ndots, not apply the search list", function () local resolv = { ndots = 1, search = { "example.net" }, @@ -53,7 +53,7 @@ describe("[utils]", function () assert.same(names, { "example.com" }) end) - it("name dots number <= ndots, apply the search list", function () + it("dots number in the name < ndots, apply the search list", function () local resolv = { ndots = 2, search = { "example.net" }, @@ -82,8 +82,7 @@ describe("[utils]", function () end) end) - describe("round robin getion", function () - + describe("answer selection", function () local function get_and_count(answers, n, get_ans) local count = {} for _ = 1, n do @@ -93,7 +92,7 @@ describe("[utils]", function () return count end - it("rr", function () + it("round-robin", function () local answers = { { target = "1" }, -- 25% { target = "2" }, -- 25% @@ -104,7 +103,7 @@ describe("[utils]", function () assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) end) - it("swrr", function () + it("slight weight round-robin", function () -- simple one local answers = { { target = "w5-p10-a", weight = 5, priority = 10, }, -- hit 100% From 6e3fcdf99342e4eca4192a6840a46ac95e802176 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 12 Mar 2024 18:12:01 +0800 Subject: [PATCH 036/126] fix bug that stale records will be not updated if querying nameserver failed --- kong/resty/dns_client/init.lua | 52 +++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index f232402e0943..867f2325fd94 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -14,6 +14,7 @@ local type = type local pairs = pairs local ipairs = ipairs local math_min = math.min +local math_random = math.random local table_insert = table.insert local parse_hosts = utils.parse_hosts @@ -28,11 +29,12 @@ local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy -- Constants and default values -local DEFAULT_ERROR_TTL = 1 -- unit: second -local DEFAULT_STALE_TTL = 4 -local DEFAULT_EMPTY_TTL = 30 +local DEFAULT_ERROR_TTL = 1 -- unit: second +local DEFAULT_STALE_TTL = 4 +local DEFAULT_EMPTY_TTL = 30 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings -local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 +local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 +local STALE_UPDATE_DELAY = 5 local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } @@ -388,24 +390,43 @@ local function resolve_query(self, name, qtype, tries) end -local function stale_update_task(premature, self, key, name, qtype) +local function stale_update_task(premature, self, key, name, qtype, short_key, ttl) if premature then return end local answers = resolve_query(self, name, qtype, {}) - if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then - self.cache:set(key, { ttl = answers.ttl }, - answers.errcode ~= NAME_ERROR_CODE and answers or nil) + if not answers then + -- retry update after failure + local retry_delay = math_random(STALE_UPDATE_DELAY, STALE_UPDATE_DELAY * 2) + ttl = ttl - retry_delay + if ttl < 0 then + return -- no need to retry if it exceeds the stale_ttl + end + local ok, err = timer_at(retry_delay, stale_update_task, self, key, name, + qtype, short_key, ttl) + if not ok then + log(ALERT, "failed to start a timer to re-update stale DNS records: ", err) + end + return + end + + if not answers.errcode or answers.errcode == NAME_ERROR_CODE then + local value = answers.errcode ~= NAME_ERROR_CODE and answers or nil + self.cache:set(key, { ttl = answers.ttl }, value) insert_last_type(self.cache, name, qtype) + + -- simply invalidate it and let the search iteration choose the correct one + self.cache:delete(short_key) end end -local function start_stale_update_task(self, key, name, qtype) +local function start_stale_update_task(self, key, name, qtype, short_key, ttl) stats_count(self.stats, key, "stale") - local ok, err = timer_at(0, stale_update_task, self, key, name, qtype) + local ok, err = timer_at(0, stale_update_task, self, key, name, qtype, + short_key, ttl) if not ok then log(ALERT, "failed to start a timer to update stale DNS records: ", err) end @@ -421,7 +442,7 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) if answers and ttl and not answers.expired then ttl = ttl + self.stale_ttl if ttl > 0 then - start_stale_update_task(self, key, name, qtype) + start_stale_update_task(self, key, name, qtype, opts.short_key, ttl) answers.expire = now() + ttl answers.expired = true answers.ttl = ttl @@ -568,6 +589,7 @@ end local function resolve_all(self, name, opts, tries) -- key like "short:example.com:all" or "short:example.com:5" local key = "short:" .. name .. ":" .. (opts.qtype or "all") + opts.short_key = key -- save for later use in the stale update task stats_init(self.stats, name) stats_count(self.stats, name, "runs") @@ -579,12 +601,14 @@ local function resolve_all(self, name, opts, tries) -- quickly lookup with the key "short::all" or "short::" local answers, err, hit_level = self.cache:get(key) - if not answers or answers.expired then + if not answers then stats_count(self.stats, name, "miss") - answers, err, tries = resolve_names_and_types(self, name, opts, tries) if not opts.cache_only and answers then - self.cache:set(key, { ttl = answers.ttl }, answers) + -- insert via the `:get` callback to prevent inter-process communication + self.cache:get(key, nil, function() + return answers, nil, answers.ttl + end) end else From cb1781dd36e3e9e718b8ed45900b427adcd13697 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 13 Mar 2024 15:37:00 +0800 Subject: [PATCH 037/126] compatible with original dns client: skip the SRV record pointing to itself --- kong/resty/dns_client/init.lua | 6 +++++- .../30-new-dns-client/02-old_client_spec.lua | 17 ++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 867f2325fd94..2d6c02dcdd71 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -331,7 +331,11 @@ local function process_answers(self, qname, qtype, answers) answer.target = ipv6_bracket(answer.target) end - table_insert(processed_answers, answer) + -- skip the SRV record pointing to itself, + -- see https://github.com/Kong/lua-resty-dns-client/pull/3 + if not (answer.type == TYPE_SRV and answer.target == qname) then + table_insert(processed_answers, answer) + end end if self.valid_ttl then diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 7c8f659eed00..cda0e41c4cf0 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -1170,22 +1170,21 @@ describe("[DNS client]", function() it("recursive SRV pointing to itself",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) - local answers, port, host, err, _ + local ip, answers, port, host, err, _ host = "srvrecurse."..TEST_DOMAIN - -- resolve SRV specific should return the answers including its + -- resolve SRV specific should _not_ return the answers including its -- recursive entry answers, err, _ = cli:resolve(host, { qtype = resolver.TYPE_SRV }) - assert.is_table(answers) - assert.equal(1, #answers) - assert.equal(host, answers[1].target) - assert.equal(host, answers[1].name) - assert.is_nil(err) + assert.same(answers, nil) + assert.same(err, "dns client error: 101 empty record received") -- default order, SRV, A; the recursive SRV answers fails, and it falls -- back to the IP4 address - _, port, _ = cli:resolve(host, { return_random = true }) - assert.same(port, "recursion detected for name: srvrecurse.kong-gateway-testing.link") + ip, port, _ = cli:resolve(host, { return_random = true }) + assert.is_string(ip) + assert.is_equal("10.0.0.44", ip) + assert.is_nil(port) end) it("resolving in correct answers-type order",function() From a9916e56f41a9166a702cef4bfef6eb3ec5003f4 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 14 Mar 2024 15:17:56 +0800 Subject: [PATCH 038/126] revert shm_miss feature, which makes source code more complex --- kong/resty/dns_client/init.lua | 13 +-------- kong/templates/nginx_kong.lua | 1 - .../30-new-dns-client/02-old_client_spec.lua | 27 ++++++++----------- .../03-old_client_cache_spec.lua | 4 +-- spec/fixtures/shared_dict.lua | 1 - spec/helpers/dns.lua | 4 +-- 6 files changed, 16 insertions(+), 34 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 2d6c02dcdd71..e3037943d184 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -251,7 +251,6 @@ function _M.new(opts) ipc = ipc, neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, lru_size = opts.cache_size or 10000, - shm_miss = "kong_dns_cache_miss", resty_lock_opts = resty_lock_opts, }) @@ -416,8 +415,7 @@ local function stale_update_task(premature, self, key, name, qtype, short_key, t end if not answers.errcode or answers.errcode == NAME_ERROR_CODE then - local value = answers.errcode ~= NAME_ERROR_CODE and answers or nil - self.cache:set(key, { ttl = answers.ttl }, value) + self.cache:set(key, { ttl = answers.ttl }, answers) insert_last_type(self.cache, name, qtype) -- simply invalidate it and let the search iteration choose the correct one @@ -460,10 +458,6 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) local answers, err, ttl = resolve_query(self, name, qtype, tries) - if answers and answers.errcode == NAME_ERROR_CODE then - return nil -- empty record for shm_miss cache - end - return answers, err, ttl end @@ -495,11 +489,6 @@ local function resolve_name_type(self, name, qtype, opts, tries) log(ALERT, err) end - -- restore the nil value in mlcache shm_miss to "name error" answers - if not answers and not err then - answers = NAME_ERROR_ANSWERS - end - local ctx = ngx.ctx if ctx and ctx.has_timing then req_dyn_hook_run_hooks("timing", "dns:cache_lookup", diff --git a/kong/templates/nginx_kong.lua b/kong/templates/nginx_kong.lua index 78f2ad8e2fa4..af8035350b24 100644 --- a/kong/templates/nginx_kong.lua +++ b/kong/templates/nginx_kong.lua @@ -25,7 +25,6 @@ lua_shared_dict kong_secrets 5m; > if not legacy_dns_client then lua_shared_dict kong_dns_cache 12m; -lua_shared_dict kong_dns_cache_miss 5m; > end underscores_in_headers on; diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index cda0e41c4cf0..160ea6605ce4 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -1312,31 +1312,26 @@ describe("[DNS client]", function() assert.is_nil(answers1) assert.are.equal(1, call_count) assert.are.equal(NOT_FOUND_ERROR, err1) - answers1, err1 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) - assert.is_nil(answers1) - assert.is_nil(err1) -- nil, nil for cache miss + answers1 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) -- make a second request, result from cache, still called only once answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.is_nil(answers2) assert.are.equal(1, call_count) assert.are.equal(NOT_FOUND_ERROR, err2) - answers2, err2 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) - assert.is_nil(answers2) - assert.is_nil(err2) -- nil, nil for cache miss + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.equal(answers1, answers2) + assert.falsy(answers2.expired) - -- wait for expiry of _ttl and retry, still called only once + -- wait for expiry of ttl and retry, still called only once ngx.sleep(empty_ttl+0.5 * stale_ttl) - - -- we cant start stale-updating task for cache missed empty answers answers2, err2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.is_nil(answers2) + assert.are.equal(1, call_count) assert.are.equal(NOT_FOUND_ERROR, err2) - assert.are.equal(2, call_count) - answers2, err2 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) - assert.is_nil(answers2) - assert.is_nil(err2) -- nil, nil for cache miss + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.is_true(answers2.expired) -- by now, record is marked as expired -- wait for expiry of stale_ttl and retry, should be called twice now ngx.sleep(0.75 * stale_ttl) @@ -1346,9 +1341,9 @@ describe("[DNS client]", function() assert.are.equal(NOT_FOUND_ERROR, err2) assert.are.equal(2, call_count) - answers2, err2 = cli.cache:get(qname .. ":" .. resolver.TYPE_A) - assert.is_nil(answers2) - assert.is_nil(err2) -- nil, nil for cache miss + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.not_equal(answers1, answers2) + assert.falsy(answers2.expired) -- new answers, not expired end) it("verifies ttl and caching of (other) dns errors", function() diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index dc29bca1c6a6..c4040355ea3e 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -413,8 +413,8 @@ describe("[DNS client cache]", function() -- wait again for the background query to complete sleep(0.1) -- background resolve is now complete, check the cache, it should now have been - -- replaced by `nil` value (the name error is saved into mlcache miss_shm) - assert.equal(nil, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) + -- replaced by the name error + assert.equal(rec2, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) end) it("empty records do not replace stale records", function() diff --git a/spec/fixtures/shared_dict.lua b/spec/fixtures/shared_dict.lua index 26f2c4edecba..17ab30ba0b6f 100644 --- a/spec/fixtures/shared_dict.lua +++ b/spec/fixtures/shared_dict.lua @@ -14,7 +14,6 @@ local dicts = { "kong_db_cache_miss 12m", "kong_db_cache_miss_2 12m", "kong_dns_cache 12m", - "kong_dns_cache_miss 5m", "kong_mock_upstream_loggers 10m", "kong_secrets 5m", "test_vault 5m", diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 154f7437b2d3..9a63d02cd65c 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -39,8 +39,8 @@ end -- @param record a DNS record previously created function _M.dnsExpire(client, record) local dnscache = client.getcache() - dnscache:set(record[1].name .. ":" .. record[1].type, nil) - dnscache:set("short:" .. record[1].name .. ":" .. "all", nil) + dnscache:delete(record[1].name .. ":" .. record[1].type) + dnscache:delete("short:" .. record[1].name .. ":" .. "all") record.expire = gettime() - 1 end From 4a5516c007ec3918b473909958da66c3ee338554 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 13 Mar 2024 21:50:41 +0800 Subject: [PATCH 039/126] support admin API "/dns" to get statistics --- kong/api/routes/kong.lua | 17 +- kong/resty/dns_client/init.lua | 25 ++- .../30-new-dns-client/02-old_client_spec.lua | 4 +- .../30-new-dns-client/05-client_stat_spec.lua | 154 ++++++++++++++++++ .../04-admin_api/26-dns_client_spec.lua | 44 +++++ 5 files changed, 235 insertions(+), 9 deletions(-) create mode 100644 spec/01-unit/30-new-dns-client/05-client_stat_spec.lua create mode 100644 spec/02-integration/04-admin_api/26-dns_client_spec.lua diff --git a/kong/api/routes/kong.lua b/kong/api/routes/kong.lua index d2fa8a59443c..8bfb3139b26f 100644 --- a/kong/api/routes/kong.lua +++ b/kong/api/routes/kong.lua @@ -269,5 +269,20 @@ return { } return kong.response.exit(200, body) end - } + }, + ["/dns"] = { + GET = function (self, db, helpers) + if not kong.dns.stats then + return kong.response.exit(404, { message = "not support for legacy DNS client" }) + end + local body = { + worker = { + id = ngx.worker.id() or -1, + count = ngx.worker.count(), + }, + stats = kong.dns.stats(), + } + return kong.response.exit(200, body) + end + }, } diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index e3037943d184..36b3a075a7c7 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -380,13 +380,13 @@ local function resolve_query(self, name, qtype, tries) end if not answers then - stats_count(self.stats, key, "query_fail") + stats_count(self.stats, key, "query_fail_nameserver") return nil, "DNS server error: " .. (err or "unknown") end answers = process_answers(self, name, qtype, answers) - stats_count(self.stats, key, answers.errstr and "query_err:" .. answers.errstr + stats_count(self.stats, key, answers.errstr and "query_fail:" .. answers.errstr or "query_succ") return answers, nil, answers.ttl @@ -595,7 +595,6 @@ local function resolve_all(self, name, opts, tries) -- quickly lookup with the key "short::all" or "short::" local answers, err, hit_level = self.cache:get(key) if not answers then - stats_count(self.stats, name, "miss") answers, err, tries = resolve_names_and_types(self, name, opts, tries) if not opts.cache_only and answers then -- insert via the `:get` callback to prevent inter-process communication @@ -604,6 +603,7 @@ local function resolve_all(self, name, opts, tries) end) end + stats_count(self.stats, name, answers and "miss" or "fail") else local ctx = ngx.ctx if ctx and ctx.has_timing then @@ -620,8 +620,6 @@ local function resolve_all(self, name, opts, tries) return resolve_all(self, answers[1].cname, opts, tries) end - stats_count(self.stats, name, answers and "succ" or "fail") - return answers, err, tries end @@ -701,6 +699,23 @@ function _M.toip(name, port, cache_only, tries) end +-- for example, "example.com:33" -> "example.com:SRV" +local function format_key(key) + local qname, qtype = key:match("([^:]+):(%d+)") -- match "(qname):(qtype)" + return qtype and qname .. ":" .. (typstrs[tonumber(qtype)] or qtype) + or key +end + + +function _M.stats() + local stats = {} + for k, v in pairs(dns_client.stats) do + stats[format_key(k)] = v + end + return stats +end + + -- For testing if package.loaded.busted then diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 160ea6605ce4..19c7887a57db 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -712,7 +712,6 @@ describe("[DNS client]", function() ["kong-gateway-testing.link"] = { miss = 1, runs = 1, - succ = 1 }, ["kong-gateway-testing.link:1"] = { query = 1, @@ -720,7 +719,7 @@ describe("[DNS client]", function() }, ["kong-gateway-testing.link:33"] = { query = 1, - ["query_err:empty record received"] = 1 + ["query_fail:empty record received"] = 1 }, ["smtp.kong-gateway-testing.link"] = { cname = 1, @@ -772,7 +771,6 @@ describe("[DNS client]", function() ["cname2srv.kong-gateway-testing.link"] = { miss = 1, runs = 1, - succ = 1 }, ["cname2srv.kong-gateway-testing.link:33"] = { query = 1, diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua new file mode 100644 index 000000000000..bb1026658c19 --- /dev/null +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -0,0 +1,154 @@ +local helpers = require "spec.helpers" +local sleep = ngx.sleep + +describe("[DNS client stats]", function() + local resolver, client, query_func + + local function client_new(opts) + opts = opts or {} + opts.hosts = {} + opts.nameservers = { "198.51.100.0" } -- placeholder, not used + return client.new(opts) + end + + before_each(function() + -- inject r.query + package.loaded["resty.dns.resolver"] = nil + resolver = require("resty.dns.resolver") + resolver.query = function(...) + if not query_func then + return nil + end + return query_func(...) + end + + -- restore its API overlapped by the compatible layer + package.loaded["kong.resty.dns_client"] = nil + client = require("kong.resty.dns_client") + client.resolve = client._resolve + end) + + after_each(function() + package.loaded["resty.dns.resolver"] = nil + resolver = nil + query_func = nil + + package.loaded["kong.resty.dns.client"] = nil + client = nil + end) + + describe("stats", function() + local cli, mock_records, config + before_each(function() + config = { + order = { "LAST", "A", "CNAME" }, + error_ttl = 0.1, + empty_ttl = 0.1, + stale_ttl = 1, + } + cli = assert(client_new(config)) + + query_func = function(self, qname, opts) + local records = mock_records[qname..":"..opts.qtype] + if type(records) == "string" then + return nil, records -- as error message + end + return records or { errcode = 3, errstr = "name error" } + end + end) + + it("stats", function() + mock_records = { + ["hit.com:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "hit.com", + ttl = 30, + }}, + ["nameserver_fail.com:" .. resolver.TYPE_A] = "nameserver failed", + ["recursion.com:" .. resolver.TYPE_CNAME] = {{ + type = resolver.TYPE_CNAME, + cname = "recursion.com", + class = 1, + name = "recursion.com", + ttl = 30, + }}, + ["stale.com" .. resolver.TYPE_A] = {{ + type = resolver.TYPE_CNAME, + address = "stale.com", + class = 1, + name = "stale.com", + ttl = 0.1, + }}, + } + + -- "hit_lru" + cli:resolve("hit.com") + cli:resolve("hit.com") + -- "hit_shm" + cli.cache.lru:delete("short:hit.com:all") + cli:resolve("hit.com") + + -- "query_err:nameserver failed" + cli:resolve("nameserver_fail.com") + + -- "fail_recur" + cli:resolve("recursion.com") + + -- "stale" + cli:resolve("stale.com") + sleep(0.2) + cli:resolve("stale.com") + + assert.same({ + ["hit.com"] = { + ["hit_lru"] = 1, + ["runs"] = 3, + ["miss"] = 1, + ["hit_shm"] = 1 + }, + ["hit.com:1"] = { + ["query"] = 1, + ["query_succ"] = 1 + }, + ["recursion.com"] = { + ["fail_recur"] = 1, + ["runs"] = 2, + ["miss"] = 1, + ["cname"] = 1 + }, + ["recursion.com:1"] = { + ["query"] = 1, + ["query_fail:name error"] = 1 + }, + ["recursion.com:5"] = { + ["query"] = 1, + ["query_succ"] = 1 + }, + ["nameserver_fail.com"] = { + ["fail"] = 1, + ["runs"] = 1 + }, + ["nameserver_fail.com:1"] = { + ["query"] = 1, + ["query_fail_nameserver"] = 1 + }, + ["stale.com"] = { + ["fail"] = 2, + ["runs"] = 2 + }, + ["stale.com:1"] = { + ["query"] = 1, + ["query_fail:name error"] = 1, + ["stale"] = 1 + }, + ["stale.com:5"] = { + ["query"] = 1, + ["query_fail:name error"] = 1, + ["stale"] = 1 + } + }, cli.stats) + end) + end) +end) diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua new file mode 100644 index 000000000000..232b1e8c1e27 --- /dev/null +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -0,0 +1,44 @@ +local helpers = require "spec.helpers" +local cjson = require "cjson" + + +for _, strategy in helpers.each_strategy() do + describe("Admin API - DNS client route with [#" .. strategy .. "]" , function() + local client + + lazy_setup(function() + helpers.get_db_utils(strategy) + + assert(helpers.start_kong({ + database = strategy, + nginx_conf = "spec/fixtures/custom_nginx.template", + })) + + client = helpers.admin_client() + end) + + teardown(function() + if client then + client:close() + end + helpers.stop_kong() + end) + + it("/dns", function () + local res = assert(client:send { + method = "GET", + path = "/dns", + headers = { ["Content-Type"] = "application/json" } + }) + + local body = assert.res_status(200 , res) + local json = cjson.decode(body) + + assert(type(json.worker.id) == "number") + assert(type(json.worker.count) == "number") + + assert(type(json.stats) == "table") + assert(type(json.stats["127.0.0.1"].runs) == "number") + end) + end) +end From c4eedb7e46458d14a70d80e821163e5301d144e7 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 14 Mar 2024 17:23:10 +0800 Subject: [PATCH 040/126] fix lint error --- kong/resty/dns_client/init.lua | 1 - spec/01-unit/30-new-dns-client/05-client_stat_spec.lua | 1 - 2 files changed, 2 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 36b3a075a7c7..66adc1dd8c71 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -70,7 +70,6 @@ local hitstrs = { -- server replied error from the DNS protocol local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" -local NAME_ERROR_ANSWERS = { errcode = NAME_ERROR_CODE, errstr = "name error" } -- client specific error local CACHE_ONLY_EC = 100 local CACHE_ONLY_ESTR = "cache only lookup failed" diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua index bb1026658c19..602a54084972 100644 --- a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -1,4 +1,3 @@ -local helpers = require "spec.helpers" local sleep = ngx.sleep describe("[DNS client stats]", function() From 936a25d7be5c656c35997a3bd0ec0c3eac2d405d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 14 Mar 2024 17:37:12 +0800 Subject: [PATCH 041/126] complete the release file: refactor_dns_client.yml --- changelog/unreleased/kong/refactor_dns_client.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml index c49e03a42d4c..1560db82c9b2 100644 --- a/changelog/unreleased/kong/refactor_dns_client.yml +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -1,3 +1,8 @@ -message: refactor and implement a new DNS client library +message: > + Refactor and implement a new DNS client library + 1. Utilize mlcache as the DNS record cache + 2. Introduce observable statistics and admin API `/dns` to retrieve them + 3. Introduce the `legacy_dns_client` option to toggle whether to use the new library + 4. Deprecate the `dns_no_sync` option; it always uses synchronous queries type: feature scope: Core From ada4b54e9e81e9665a3f177926f0a030d5f5946f Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 15 Mar 2024 10:27:56 +0800 Subject: [PATCH 042/126] chore: assign TYPE_LAST to _M.TYPE_LAST instead of -1 --- kong/resty/dns_client/init.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 66adc1dd8c71..3ad73ead59f9 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -89,7 +89,7 @@ for k,v in pairs(resolver) do _M[k] = v end end -_M.TYPE_LAST = -1 +_M.TYPE_LAST = TYPE_LAST local tries_mt = { __tostring = cjson.encode } From 5d124db8fafcfb59d7354bbfda5c3bc5371697b6 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 15 Mar 2024 16:25:36 +0800 Subject: [PATCH 043/126] Update release file Co-authored-by: Keery Nie --- changelog/unreleased/kong/refactor_dns_client.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml index 1560db82c9b2..ba5e52a575c1 100644 --- a/changelog/unreleased/kong/refactor_dns_client.yml +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -1,8 +1,8 @@ message: > - Refactor and implement a new DNS client library - 1. Utilize mlcache as the DNS record cache - 2. Introduce observable statistics and admin API `/dns` to retrieve them - 3. Introduce the `legacy_dns_client` option to toggle whether to use the new library - 4. Deprecate the `dns_no_sync` option; it always uses synchronous queries + Starting from this version, a new DNS client library has been implemented and added into Kong. The new DNS client library has the following changes + - The new library now leverages `lua-resty-mlcache` for DNS record caching. + - Introduced observable statistics for the new DNS client, and a new Admin API `/dns` to retrieve them. + - Introduced a new option `legacy_dns_client` to toggle whether to use the new DNS client library. + - Deprecated the `dns_no_sync` option in the context of the new DNS client library. With the new library, DNS queries will always be executed synchronously. The `dns_no_sync` option remains functional with the legacy DNS client library. type: feature scope: Core From 8f15f13289a5ff2ae1979c64747519b36219de49 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 18 Mar 2024 11:01:22 +0800 Subject: [PATCH 044/126] fix text of `dns_no_sync` option in refactor_dns_client.yml --- changelog/unreleased/kong/refactor_dns_client.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml index ba5e52a575c1..88d031d5e3d8 100644 --- a/changelog/unreleased/kong/refactor_dns_client.yml +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -3,6 +3,6 @@ message: > - The new library now leverages `lua-resty-mlcache` for DNS record caching. - Introduced observable statistics for the new DNS client, and a new Admin API `/dns` to retrieve them. - Introduced a new option `legacy_dns_client` to toggle whether to use the new DNS client library. - - Deprecated the `dns_no_sync` option in the context of the new DNS client library. With the new library, DNS queries will always be executed synchronously. The `dns_no_sync` option remains functional with the legacy DNS client library. + - Deprecated the `dns_no_sync` option in the context of the new DNS client library. With the new library, multiple DNS queries for the same name will always be synchronized (even across workers). The `dns_no_sync` option remains functional with the legacy DNS client library. type: feature scope: Core From 6cc8d4e2cf0dee8f83da1f1cd8ba20d841fb9a5d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 18 Mar 2024 13:50:32 +0800 Subject: [PATCH 045/126] process the scenario of timeout=0 in /etc/resolv.conf --- kong/resty/dns_client/utils.lua | 8 ++++++++ spec/01-unit/30-new-dns-client/01-utils_spec.lua | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index a0ff315e88af..94994b78f83d 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -1,5 +1,8 @@ local utils = require("kong.resty.dns.utils") +local log = ngx.log +local NOTICE = ngx.NOTICE + local math_random = math.random local table_insert = table.insert local table_remove = table.remove @@ -101,6 +104,11 @@ function _M.parse_resolv_conf(path, enable_ipv6) resolv.options = resolv.options or {} resolv.ndots = resolv.options.ndots or 1 resolv.search = resolv.search or (resolv.domain and { resolv.domain }) + -- check if timeout is 0s + if resolv.options.timeout and resolv.options.timeout == 0 then + resolv.options.timeout = 2000 -- 2000ms is lua-resty-dns default + log(NOTICE, "A non-positive timeout of 0s is configured in resolv.conf. Setting it to 2000ms.") + end -- remove special domain like "." if resolv.search then for i = #resolv.search, 1, -1 do diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index 05e843e95342..eef471803acb 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -275,6 +275,12 @@ search domaina.com domainb.com assert.is.same({ "domaina.com", "domainb.com" }, resolv.search) end) + it("tests parsing 'resolv.conf' with 'timeout = 0'", function() + local file = splitlines("options timeout:0") + local resolv = utils.parse_resolv_conf(file) + assert.equal(2000, resolv.options.timeout) + end) + it("tests parsing 'resolv.conf' with max search entries MAXSEARCH", function() local file = splitlines( [[ From 39d18cb411a2b123a1bea6005117c7b1a096419b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 18 Mar 2024 17:54:21 +0800 Subject: [PATCH 046/126] chores(*): fix coding style; add comments; make constant records readonly --- kong/api/routes/kong.lua | 1 + kong/resty/dns_client/init.lua | 37 ++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/kong/api/routes/kong.lua b/kong/api/routes/kong.lua index 8bfb3139b26f..6e8ca0d2c9a2 100644 --- a/kong/api/routes/kong.lua +++ b/kong/api/routes/kong.lua @@ -275,6 +275,7 @@ return { if not kong.dns.stats then return kong.response.exit(404, { message = "not support for legacy DNS client" }) end + local body = { worker = { id = ngx.worker.id() or -1, diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 3ad73ead59f9..84a228b59caa 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -1,5 +1,6 @@ local cjson = require("cjson.safe") local utils = require("kong.resty.dns_client.utils") +local tablex = require("pl.tablex") local mlcache = require("kong.resty.mlcache") local resolver = require("resty.dns.resolver") @@ -73,7 +74,7 @@ local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" -- client specific error local CACHE_ONLY_EC = 100 local CACHE_ONLY_ESTR = "cache only lookup failed" -local CACHE_ONLY_ANSWERS = { errcode = CACHE_ONLY_EC, errstr = CACHE_ONLY_ESTR } +local CACHE_ONLY_ANSWERS = tablex.readonly({ errcode = CACHE_ONLY_EC, errstr = CACHE_ONLY_ESTR }) local EMPTY_RECORD_EC = 101 local EMPTY_RECORD_ESTR = "empty record received" @@ -92,7 +93,7 @@ end _M.TYPE_LAST = TYPE_LAST -local tries_mt = { __tostring = cjson.encode } +local tries_mt = { __tostring = cjson.encode, } local function stats_init(stats, name) @@ -163,10 +164,12 @@ local function init_hosts(cache, path, preferred_ip_type) for name, address in pairs(hosts) do name = name:lower() + if address.ipv4 then insert_answer(name, TYPE_A, address.ipv4) insert_last_type(cache, name, TYPE_A) end + if address.ipv6 then insert_answer(name, TYPE_AAAA, address.ipv6) if not address.ipv4 or preferred_ip_type == TYPE_AAAA then @@ -225,6 +228,7 @@ function _M.new(opts) if not kong or not kong.worker_events then return end + local cwid = ngx.worker.id() for _, ev in pairs(events) do local handler = function(data, event, source, wid) @@ -232,13 +236,16 @@ function _M.new(opts) ev.handler(data) end end + kong.worker_events.register(handler, ipc_source, ev.channel) end end, + broadcast = function(channel, data) if not kong or not kong.worker_events then return end + local ok, err = kong.worker_events.post(ipc_source, channel, data) if not ok then log(ERR, "failed to post event '", ipc_source, "', '", channel, "': ", err) @@ -325,6 +332,7 @@ local function process_answers(self, qname, qtype, answers) -- compatible with balancer, see https://github.com/Kong/kong/pull/3088 if answer.type == TYPE_AAAA then answer.address = ipv6_bracket(answer.address) + elseif answer.type == TYPE_SRV then answer.target = ipv6_bracket(answer.target) end @@ -405,6 +413,7 @@ local function stale_update_task(premature, self, key, name, qtype, short_key, t if ttl < 0 then return -- no need to retry if it exceeds the stale_ttl end + local ok, err = timer_at(retry_delay, stale_update_task, self, key, name, qtype, short_key, ttl) if not ok then @@ -504,6 +513,7 @@ local function resolve_name_type(self, name, qtype, opts, tries) local src = answers.errcode < CACHE_ONLY_EC and "server" or "client" err = ("dns %s error: %s %s"):format(src, answers.errcode, answers.errstr) end + table_insert(tries, { name .. ":" .. typstrs[qtype], err }) end @@ -520,6 +530,7 @@ local function get_search_types(self, name, qtype) if qtype == TYPE_LAST then qtype = get_last_type(self.cache, name) end + if qtype and not checked_types[qtype] then table_insert(types, qtype) checked_types[qtype] = true @@ -532,11 +543,15 @@ end local function check_and_get_ip_answers(name) if name:match("^%d+%.%d+%.%d+%.%d+$") then -- IPv4 - return {{ name = name, class = 1, type = TYPE_A, address = name }} + return { + { name = name, class = 1, type = TYPE_A, address = name }, + } end - if name:match(":") then -- IPv6 - return {{ name = name, class = 1, type = TYPE_AAAA, address = ipv6_bracket(name) }} + if name:find(":", 1, true) then -- IPv6 + return { + { name = name, class = 1, type = TYPE_AAAA, address = ipv6_bracket(name) }, + } end return nil @@ -545,7 +560,7 @@ end local function resolve_names_and_types(self, name, opts, tries) local answers = check_and_get_ip_answers(name) - if answers then + if answers then -- domain name is IP literal answers.ttl = LONG_LASTING_TTL answers.expire = now() + answers.ttl return answers, nil, tries @@ -596,6 +611,9 @@ local function resolve_all(self, name, opts, tries) if not answers then answers, err, tries = resolve_names_and_types(self, name, opts, tries) if not opts.cache_only and answers then + -- If another worker resolved the name between these two `:get`, it can + -- work as expected and will not introduce a race condition. + -- -- insert via the `:get` callback to prevent inter-process communication self.cache:get(key, nil, function() return answers, nil, answers.ttl @@ -627,6 +645,7 @@ local function copy_options(opts) if opts.resolved_names then return opts end + opts = cycle_aware_deep_copy(opts) opts.resolved_names = {} -- for detecting circular references in DNS records return opts @@ -721,23 +740,29 @@ if package.loaded.busted then function _M.getobj() return dns_client end + function _M.getcache() return { set = function(self, k, v, ttl) self.cache:set(k, {ttl = ttl or 0}, v) end, + delete = function(self, k) self.cache:delete(k) end, + cache = dns_client.cache, } end + function _M:insert_last_type(name, qtype) insert_last_type(self.cache, name, qtype) end + function _M:get_last_type(name) return get_last_type(self.cache, name) end + _M._init = _M.init function _M.init(opts) opts = opts or {} From 1e19818df9c5694514a2c7a0da1e096a7d4322a0 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 18 Mar 2024 18:10:40 +0800 Subject: [PATCH 047/126] add a comment to explain of the concurrenct control of asynchronous tasks --- kong/resty/dns_client/init.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 84a228b59caa..1047da6a1a51 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -452,6 +452,8 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) if answers and ttl and not answers.expired then ttl = ttl + self.stale_ttl if ttl > 0 then + -- The asynchronous task's concurrent control is ensured by mlcache, + -- which utilizes lua-resty-lock before executing this callback. start_stale_update_task(self, key, name, qtype, opts.short_key, ttl) answers.expire = now() + ttl answers.expired = true From 0d631723660767a80d346bfab145cb6ae4f83a53 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 19 Mar 2024 11:10:46 +0800 Subject: [PATCH 048/126] fixed lock_timeout: r:query() has two IO operations send() & receive() --- kong/resty/dns_client/init.lua | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 1047da6a1a51..d001a4635857 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -212,7 +212,10 @@ function _M.new(opts) } -- init the mlcache - local lock_timeout = r_opts.timeout / 1000 * r_opts.retrans + 1 -- s + + -- maximum timeout for the underlying r:query() operation to complete + -- socket timeout * retrans * 2 calls for send and receive + 1s extra delay + local lock_timeout = r_opts.timeout / 1000 * r_opts.retrans * 2 + 1 -- s local resty_lock_opts = { timeout = lock_timeout, From b7a6ccc8c8968cd477260ab942ae4df9a2854bc6 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 19 Mar 2024 17:40:46 +0800 Subject: [PATCH 049/126] automatically refresh stale-but-in-use records every 60s triggered by the upper caller --- kong/resty/dns_client/init.lua | 52 +++++++++++++--------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index d001a4635857..a6fc5d4a90d0 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -15,7 +15,6 @@ local type = type local pairs = pairs local ipairs = ipairs local math_min = math.min -local math_random = math.random local table_insert = table.insert local parse_hosts = utils.parse_hosts @@ -35,7 +34,6 @@ local DEFAULT_STALE_TTL = 4 local DEFAULT_EMPTY_TTL = 30 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 -local STALE_UPDATE_DELAY = 5 local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } @@ -70,7 +68,7 @@ local hitstrs = { } -- server replied error from the DNS protocol -local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" +local NAME_ERROR_EC = 3 -- response code 3 as "Name Error" or "NXDOMAIN" -- client specific error local CACHE_ONLY_EC = 100 local CACHE_ONLY_ESTR = "cache only lookup failed" @@ -314,7 +312,7 @@ end local function process_answers(self, qname, qtype, answers) local errcode = answers.errcode if errcode then - answers.ttl = errcode == NAME_ERROR_CODE and self.empty_ttl or self.error_ttl + answers.ttl = errcode == NAME_ERROR_EC and self.empty_ttl or self.error_ttl -- compatible with balancer, which needs this field answers.expire = now() + answers.ttl return answers @@ -403,29 +401,13 @@ local function resolve_query(self, name, qtype, tries) end -local function stale_update_task(premature, self, key, name, qtype, short_key, ttl) +local function stale_update_task(premature, self, key, name, qtype, short_key) if premature then return end local answers = resolve_query(self, name, qtype, {}) - if not answers then - -- retry update after failure - local retry_delay = math_random(STALE_UPDATE_DELAY, STALE_UPDATE_DELAY * 2) - ttl = ttl - retry_delay - if ttl < 0 then - return -- no need to retry if it exceeds the stale_ttl - end - - local ok, err = timer_at(retry_delay, stale_update_task, self, key, name, - qtype, short_key, ttl) - if not ok then - log(ALERT, "failed to start a timer to re-update stale DNS records: ", err) - end - return - end - - if not answers.errcode or answers.errcode == NAME_ERROR_CODE then + if answers and (not answers.errcode or answers.errcode == NAME_ERROR_EC) then self.cache:set(key, { ttl = answers.ttl }, answers) insert_last_type(self.cache, name, qtype) @@ -435,11 +417,10 @@ local function stale_update_task(premature, self, key, name, qtype, short_key, t end -local function start_stale_update_task(self, key, name, qtype, short_key, ttl) +local function start_stale_update_task(self, key, name, qtype, short_key) stats_count(self.stats, key, "stale") - local ok, err = timer_at(0, stale_update_task, self, key, name, qtype, - short_key, ttl) + local ok, err = timer_at(0, stale_update_task, self, key, name, qtype, short_key) if not ok then log(ALERT, "failed to start a timer to update stale DNS records: ", err) end @@ -452,14 +433,22 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then -- initiates an asynchronous background updating task to refresh it. local ttl, _, answers = self.cache:peek(key, true) - if answers and ttl and not answers.expired then - ttl = ttl + self.stale_ttl - if ttl > 0 then - -- The asynchronous task's concurrent control is ensured by mlcache, - -- which utilizes lua-resty-lock before executing this callback. - start_stale_update_task(self, key, name, qtype, opts.short_key, ttl) + if answers and ttl then + if not answers.expired then answers.expire = now() + ttl answers.expired = true + ttl = ttl + self.stale_ttl + + else + ttl = ttl + (answers.expire - now()) + end + + -- trigger the update task by the upper caller every 60 seconds + ttl = math_min(ttl, 60) + + if ttl > 0 then + -- mlcache's internal lock mechanism ensures concurrent control + start_stale_update_task(self, key, name, qtype, opts.short_key) answers.ttl = ttl return answers, nil, ttl end @@ -470,7 +459,6 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) end local answers, err, ttl = resolve_query(self, name, qtype, tries) - return answers, err, ttl end From e484ecf3f0fe659b9da7a133a2beed760dd7b70c Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 21 Mar 2024 16:35:21 +0800 Subject: [PATCH 050/126] added kong/resty/dns_client/README.md --- kong/resty/dns_client/README.md | 117 ++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 kong/resty/dns_client/README.md diff --git a/kong/resty/dns_client/README.md b/kong/resty/dns_client/README.md new file mode 100644 index 000000000000..06b0ebd05e50 --- /dev/null +++ b/kong/resty/dns_client/README.md @@ -0,0 +1,117 @@ +Name +==== + +The module is currently Kong only, and builds on top of the `lua-resty-dns` and the kong's `lua-resty-mlcache` library. + +Table of Contents +================= + +* [Name](#name) +* [APIs](#apis) + * [new](#new) + * [query](#query) + +# APIs + +The following APIs are for internal development use only within Kong. In the current version, the new DNS library still needs to be compatible with the original DNS library. Therefore, the functions listed below cannot be directly invoked. For example, the `_M:resolve` function in the following APIs will be replaced to ensure compatibility with the previous DNS library API interface specifications `_M.resolve`. + +## new + +**syntax:** *c, err = dns_client.new(opts)* +**context:** any + +** Functionality: ** + +Creates a dns client object. Returns nil and a message string on error. + +Perform a series of initialization operations: + +* parse `host` file +* parse `resolv.conf` file (used by the underlying `lua-resty-dns` library) +* initialize multiple TTL options +* create a mlcache object and initialize it + +** Input paramenters: ** + +`@opts` It accepts a options table argument. The following options are supported: + +* TTL options + * `valid_ttl` + * same to the option `dns_valid_ttl` in kong.conf + * `stale_ttl` + * same to the option `dns_stale_ttl` in kong.conf + * `empty_ttl` + * same to the option `dns_not_found_ttl` in kong.conf + * `bad_ttl` + * same to the option `dns_error_ttl` in kong.conf +* `hosts` (default: `/etc/hosts`) + * the path of `hosts` file +* `resolv_conf` (default: `/etc/resolv.conf`) + * the path of `resolv.conf` file, it will be parsed and passed into the underlying `lua-resty-dns` library. +* `order` (default: `{ "LAST", "SRV", "A", "AAAA", "CNAME" }`) + * the order in which to resolve different record types, it's similar to the option `dns_order` in kong.conf. + * The `LAST` type means the type of the last successful lookup (for the specified name). +* `enable_ipv6` (default: `ture`) + * whether to support IPv6 servers when when getting nameservers from `resolv.conf` +* options for the underlying `lua-resty-dns` library + * `retrans` (default: `5`) + * the total number of times of retransmitting the DNS request when receiving a DNS response times out according to the timeout setting. When trying to retransmit the query, the next nameserver according to the round-robin algorithm will be picked up. + * If not given, it is taken from `resolv.conf` option `options attempts:`. + * `timeout` (default: `2000`) + * the time in milliseconds for waiting for the response for a single attempt of request transmission + * If not given, it is taken from `resolv.conf` option `options timeout:`. But note that its unit in `resolv.conf` is second. + * `no_random` (default: `true`) + * a boolean flag controls whether to randomly pick the nameserver to query first, if `true` will always start with the first nameserver listed. + * If not given, it is taken from `resolv.conf` option `rotate` (inverted). + * `nameservers` + * a list of nameservers to be used. Each nameserver entry can be either a single hostname string or a table holding both the hostname string and the port number. For exmaple, `{"8.8.8.8", {"8.8.4.4", 53} }`. + * If not given, it is taken from `resolv.conf` option `nameserver`. + +[Back to TOC](#table-of-contents) + +## resolve + +**syntax:** *answers, err, tries? = resolve(name, opts?, tries?)* +**context:** *rewrite_by_lua\*;, access_by_lua\*;, content_by_lua\*;, ngx.timer.\*;* + +** Functionality: ** + +Performs a DNS resolution + +1. First, use the key `short::all` to query mlcache to see if there are any results available for quick use. If results are found, return them directly. +2. If there are no results available for quick use in the cache, then query all keys (`:`) extended from this domain name . + 1. The method for calculating extended keys is as follows: + 1. The domain `` is extended based on the `ndots`, `search`, and `domain` settings in `resolv.conf`. + 2. The `` is extended based on the `dns_order` parameter. + 2. Loop through all keys to query them. Once a usable result is found, return it. Also, store the DNS record result in mlcache with the key `short::all`. + 1. Use this key (`:`) to query mlcache. If it is not found, it triggers the L3 callback of `mlcache:get` to query the DNS server and process data that has expired but is still usable (`resolve_name_type_callback`). + 2. Use `mlcache:peek` to check if the missed and expired key still exists in the shared dictionary. If it does, return it directly to mlcache and trigger an asynchronous background task to update the expired data (`start_stale_update_task`). The maximum time that expired data can be reused is `stale_ttl`, but the maximum TTL returned to mlcache cannot exceed 60s. This way, if the expired key is not successfully updated by the background task after 60s, it can still be reused by calling the `resolve` function from the upper layer to trigger the L3 callback to continue executing this logic and initiate another background task for updating. + 1. For example, with a `stale_ttl` of 3600s, if the background task fails to update the record due to network issues during this time, and the upper-level application continues to call resolve to get the domain name result, it will trigger a background task to query the DNS result for that domain name every 60s, resulting in approximately 60 background tasks being triggered (3600s/60s). + + +** Return value: ** + +* Return value `answers, err` + * Return one array-like Lua table contains all the records + * Return one ip address and port from records if `opts.return_random = true` + * In this scenario, `answers` would hold an address, while `err` would contain either a port number or an error message, like `address, port` or `nil, err` + * If the server returns a non-zero error code, it will return `nil` and a string describing the error in this record. + * For exmaple, `nil, "dns server error: name error"`, the server returned a result with error code 3 (NXDOMAIN). + * In case of severe errors, such network error or server's malformed DNS record response, it will return `nil` and a string describing the error instead. For example: + * `nil, "recursion detected for name: example.com:5"`, it detected a loop or recursion while attempting to resolve `example.com:CNAME`. + * `nil, "dns server error: failed to send request to UDP server 10.0.0.1:53: timeout"`, there was a network issue. +* Return value and input parameter `@tries?`: + * If provided as an empty table, it will be returned as a third result. This table will be an array containing the error message for each (if any) failed try. + * For example, `[["lambda.ab-cdef-1.amazonaws.com:SRV","dns server error: 3 name error"], ["lambda.ab-cdef-1.amazonaws.com:A","dns server error: 3 name error"]]`, both attempts failed due to a DNS server error with error code 3 (NXDOMAIN), indicating a name error. + +** Input parameters: ** + +* `@name`: the domain name to resolve +* `@opts`: It accepts a options table argument. The following options are supported: + * `cache_only` (default: `false`) + * control whether to solely retrieve data from the internal cache without querying to the nameserver + * `return_random` (default: `true`) + * control whether to return either a single randomly selected IP address or all available records +* `@tries?` : see the above section `Return value and input paramter @tries?` + +[Back to TOC](#table-of-contents) From ad2cdb5653234b530777511cd7c0739c1d813f80 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 21 Mar 2024 16:41:13 +0800 Subject: [PATCH 051/126] change statistics API path from /dns to /status/dns --- kong/api/routes/kong.lua | 2 +- spec/02-integration/04-admin_api/26-dns_client_spec.lua | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kong/api/routes/kong.lua b/kong/api/routes/kong.lua index 6e8ca0d2c9a2..fd6702c15478 100644 --- a/kong/api/routes/kong.lua +++ b/kong/api/routes/kong.lua @@ -270,7 +270,7 @@ return { return kong.response.exit(200, body) end }, - ["/dns"] = { + ["/status/dns"] = { GET = function (self, db, helpers) if not kong.dns.stats then return kong.response.exit(404, { message = "not support for legacy DNS client" }) diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua index 232b1e8c1e27..641d99d4bae7 100644 --- a/spec/02-integration/04-admin_api/26-dns_client_spec.lua +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -24,10 +24,10 @@ for _, strategy in helpers.each_strategy() do helpers.stop_kong() end) - it("/dns", function () + it("/status/dns", function () local res = assert(client:send { method = "GET", - path = "/dns", + path = "/status/dns", headers = { ["Content-Type"] = "application/json" } }) From b0afa32a46b6cfd8c029b92d1297f084948f8ffe Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 21 Mar 2024 17:35:43 +0800 Subject: [PATCH 052/126] d11y: add key-value "query_last_time": " " into statistics --- kong/resty/dns_client/init.lua | 14 +++++++++++++- .../30-new-dns-client/02-old_client_spec.lua | 10 +++++++++- .../30-new-dns-client/05-client_stat_spec.lua | 10 ++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index a6fc5d4a90d0..838cd45d4d0b 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -106,6 +106,11 @@ local function stats_count(stats, name, key) end +local function stats_set(stats, name, key, value) + stats[name][key] = value +end + + -- lookup or set TYPE_LAST (the DNS record type from the last successful query) local function insert_last_type(cache, name, qtype) local key = "last:" .. name @@ -381,15 +386,22 @@ local function resolve_query(self, name, qtype, tries) return nil, "failed to instantiate the resolver: " .. err end + local start_time = now() + local options = { additional_section = true, qtype = qtype } local answers, err = r:query(name, options) if r.destroy then r:destroy() end + local query_time = now() - start_time -- the time taken for the DNS query + local time_str = ("%.3f %.3f"):format(start_time, query_time) + + stats_set(self.stats, key, "query_last_time", time_str) + if not answers then stats_count(self.stats, key, "query_fail_nameserver") - return nil, "DNS server error: " .. (err or "unknown") + return nil, "DNS server error: " .. err .. ", Query Time: " .. time_str end answers = process_answers(self, name, qtype, answers) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 19c7887a57db..317e8aa78175 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -564,7 +564,7 @@ describe("[DNS client]", function() local answers, err = cli:resolve("srv.timeout.com") assert.is_nil(answers) - assert.match("DNS server error: failed to receive reply from UDP server .*: timeout", err) + assert.match("DNS server error: failed to receive reply from UDP server .*: timeout, Query Time: %d+%.%d+ 0.%d+", err) assert.same(receive_count, 3) assert.same(query_count, 1) end) @@ -708,6 +708,10 @@ describe("[DNS client]", function() local entry1 = cli.cache:get(key1) assert.same(nil, entry1) + for k,v in pairs(cli.stats) do + v.query_last_time = nil + end + assert.same({ ["kong-gateway-testing.link"] = { miss = 1, @@ -767,6 +771,10 @@ describe("[DNS client]", function() local entry = cli.cache:get(key) assert.same(nil, entry) + for k,v in pairs(cli.stats) do + v.query_last_time = nil + end + assert.same({ ["cname2srv.kong-gateway-testing.link"] = { miss = 1, diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua index 602a54084972..2bf31e52e8bf 100644 --- a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -100,6 +100,16 @@ describe("[DNS client stats]", function() sleep(0.2) cli:resolve("stale.com") + local query_last_time + for k, v in pairs(cli.stats) do + if v.query_last_time then + query_last_time = v.query_last_time + v.query_last_time = nil + end + end + + assert.match("^%d+%.%d+ 0%.%d+$", query_last_time) + assert.same({ ["hit.com"] = { ["hit_lru"] = 1, From 3e533f783db371c19215bf57a25562b6e3ce72ff Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 21 Mar 2024 17:50:06 +0800 Subject: [PATCH 053/126] fixed markdown format of kong/resty/dns_client/README.md --- kong/resty/dns_client/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kong/resty/dns_client/README.md b/kong/resty/dns_client/README.md index 06b0ebd05e50..dede9507313b 100644 --- a/kong/resty/dns_client/README.md +++ b/kong/resty/dns_client/README.md @@ -20,7 +20,7 @@ The following APIs are for internal development use only within Kong. In the cur **syntax:** *c, err = dns_client.new(opts)* **context:** any -** Functionality: ** +**Functionality:** Creates a dns client object. Returns nil and a message string on error. @@ -31,7 +31,7 @@ Perform a series of initialization operations: * initialize multiple TTL options * create a mlcache object and initialize it -** Input paramenters: ** +**Input paramenters:** `@opts` It accepts a options table argument. The following options are supported: @@ -72,9 +72,9 @@ Perform a series of initialization operations: ## resolve **syntax:** *answers, err, tries? = resolve(name, opts?, tries?)* -**context:** *rewrite_by_lua\*;, access_by_lua\*;, content_by_lua\*;, ngx.timer.\*;* +**context:** *rewrite_by_lua\*, access_by_lua\*, content_by_lua\*, ngx.timer.\*;* -** Functionality: ** +**Functionality:** Performs a DNS resolution @@ -89,7 +89,7 @@ Performs a DNS resolution 1. For example, with a `stale_ttl` of 3600s, if the background task fails to update the record due to network issues during this time, and the upper-level application continues to call resolve to get the domain name result, it will trigger a background task to query the DNS result for that domain name every 60s, resulting in approximately 60 background tasks being triggered (3600s/60s). -** Return value: ** +**Return value:** * Return value `answers, err` * Return one array-like Lua table contains all the records @@ -104,7 +104,7 @@ Performs a DNS resolution * If provided as an empty table, it will be returned as a third result. This table will be an array containing the error message for each (if any) failed try. * For example, `[["lambda.ab-cdef-1.amazonaws.com:SRV","dns server error: 3 name error"], ["lambda.ab-cdef-1.amazonaws.com:A","dns server error: 3 name error"]]`, both attempts failed due to a DNS server error with error code 3 (NXDOMAIN), indicating a name error. -** Input parameters: ** +**Input parameters:** * `@name`: the domain name to resolve * `@opts`: It accepts a options table argument. The following options are supported: From 04e5a72dbfe6c57a75fa8fe659e540e9b5f2b1ee Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 25 Mar 2024 16:09:46 +0800 Subject: [PATCH 054/126] fix format for kong/resty/dns_client/README.md --- kong/resty/dns_client/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kong/resty/dns_client/README.md b/kong/resty/dns_client/README.md index dede9507313b..d5798711a45d 100644 --- a/kong/resty/dns_client/README.md +++ b/kong/resty/dns_client/README.md @@ -9,7 +9,7 @@ Table of Contents * [Name](#name) * [APIs](#apis) * [new](#new) - * [query](#query) + * [resolve](#resolve) # APIs @@ -17,7 +17,7 @@ The following APIs are for internal development use only within Kong. In the cur ## new -**syntax:** *c, err = dns_client.new(opts)* +**syntax:** *c, err = dns_client.new(opts)* **context:** any **Functionality:** @@ -71,8 +71,8 @@ Perform a series of initialization operations: ## resolve -**syntax:** *answers, err, tries? = resolve(name, opts?, tries?)* -**context:** *rewrite_by_lua\*, access_by_lua\*, content_by_lua\*, ngx.timer.\*;* +**syntax:** *answers, err, tries? = resolve(name, opts?, tries?)* +**context:** *rewrite_by_lua\*, access_by_lua\*, content_by_lua\*, ngx.timer.\** **Functionality:** From 47bae5f9098bdd137955efa1090be5279ded1c6d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 25 Mar 2024 18:16:01 +0800 Subject: [PATCH 055/126] add debug logs --- kong/resty/dns_client/init.lua | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 838cd45d4d0b..e5621c6694f9 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -8,6 +8,7 @@ local now = ngx.now local log = ngx.log local ERR = ngx.ERR local WARN = ngx.WARN +local DEBUG = ngx.DEBUG local ALERT = ngx.ALERT local timer_at = ngx.timer.at @@ -368,6 +369,11 @@ local function process_answers(self, qname, qtype, answers) end table_insert(processed_answers, cname_answer) + + log(DEBUG, "processed cname:", cname_answer.cname) + + else + log(DEBUG, "processed ans:", #processed_answers) end processed_answers.expire = now() + ttl @@ -399,6 +405,8 @@ local function resolve_query(self, name, qtype, tries) stats_set(self.stats, key, "query_last_time", time_str) + log(DEBUG, "r:query() ans:", answers and #answers or "-", " t:", time_str) + if not answers then stats_count(self.stats, key, "query_fail_nameserver") return nil, "DNS server error: " .. err .. ", Query Time: " .. time_str @@ -459,6 +467,8 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) ttl = math_min(ttl, 60) if ttl > 0 then + log(DEBUG, "start stale update task ", key, " ttl:", ttl) + -- mlcache's internal lock mechanism ensures concurrent control start_stale_update_task(self, key, name, qtype, opts.short_key) answers.ttl = ttl @@ -502,6 +512,8 @@ local function resolve_name_type(self, name, qtype, opts, tries) log(ALERT, err) end + log(DEBUG, "cache lookup ", key, " ans:", answers and #answers or "-", " hlv:", hit_level or "-") + local ctx = ngx.ctx if ctx and ctx.has_timing then req_dyn_hook_run_hooks("timing", "dns:cache_lookup", @@ -614,6 +626,8 @@ local function resolve_all(self, name, opts, tries) -- quickly lookup with the key "short::all" or "short::" local answers, err, hit_level = self.cache:get(key) if not answers then + log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") + answers, err, tries = resolve_names_and_types(self, name, opts, tries) if not opts.cache_only and answers then -- If another worker resolved the name between these two `:get`, it can @@ -627,6 +641,8 @@ local function resolve_all(self, name, opts, tries) stats_count(self.stats, name, answers and "miss" or "fail") else + log(DEBUG, "quickly cache lookup ", key, " ans:", #answers, " hlv:", hit_level or "-") + local ctx = ngx.ctx if ctx and ctx.has_timing then req_dyn_hook_run_hooks("timing", "dns:cache_lookup", From e789fc6e77dcc4187caf97ecb665059a6c92a48d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 26 Mar 2024 11:29:02 +0800 Subject: [PATCH 056/126] fix refactor_dns_client.yml to make it more user-friendly --- changelog/unreleased/kong/refactor_dns_client.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml index 88d031d5e3d8..b561aa7a5e72 100644 --- a/changelog/unreleased/kong/refactor_dns_client.yml +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -1,8 +1,7 @@ message: > Starting from this version, a new DNS client library has been implemented and added into Kong. The new DNS client library has the following changes - - The new library now leverages `lua-resty-mlcache` for DNS record caching. - - Introduced observable statistics for the new DNS client, and a new Admin API `/dns` to retrieve them. - - Introduced a new option `legacy_dns_client` to toggle whether to use the new DNS client library. + - Introduced global caching for DNS records across workers, significantly reducing the query load on DNS servers. + - Introduced observable statistics for the new DNS client, and a new Admin API `/status/dns` to retrieve them. - Deprecated the `dns_no_sync` option in the context of the new DNS client library. With the new library, multiple DNS queries for the same name will always be synchronized (even across workers). The `dns_no_sync` option remains functional with the legacy DNS client library. type: feature scope: Core From 2ccc33debc5371555501658a154a76db2c8368fa Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 26 Mar 2024 19:54:55 +0800 Subject: [PATCH 057/126] chore: use string_lower instead of :lower() for debugging --- kong/resty/dns_client/init.lua | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index e5621c6694f9..bcc4324d0afd 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -16,6 +16,7 @@ local type = type local pairs = pairs local ipairs = ipairs local math_min = math.min +local string_lower = string.lower local table_insert = table.insert local parse_hosts = utils.parse_hosts @@ -167,7 +168,7 @@ local function init_hosts(cache, path, preferred_ip_type) end for name, address in pairs(hosts) do - name = name:lower() + name = string_lower(name) if address.ipv4 then insert_answer(name, TYPE_A, address.ipv4) @@ -330,7 +331,7 @@ local function process_answers(self, qname, qtype, answers) local ttl = self.valid_ttl or 0xffffffff -- 0xffffffff for maximum TTL value for _, answer in ipairs(answers) do - answer.name = answer.name:lower() + answer.name = string_lower(answer.name) if answer.type == TYPE_CNAME then cname_answer = answer -- use the last one as the real cname @@ -409,6 +410,7 @@ local function resolve_query(self, name, qtype, tries) if not answers then stats_count(self.stats, key, "query_fail_nameserver") + err = err or "unknown" return nil, "DNS server error: " .. err .. ", Query Time: " .. time_str end @@ -682,7 +684,7 @@ end -- `cache_only`: default `false`, retrieve data only from the internal cache -- `qtype`: specified query type instead of its own search types function _M:resolve(name, opts, tries) - name = name:lower() + name = string_lower(name) opts = copy_options(opts or {}) tries = setmetatable(tries or {}, tries_mt) From c3bcbcf61a2a8ffbb8e2277599d7918ff04c5a4b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 12:37:49 +0800 Subject: [PATCH 058/126] chores: refactor variable names --- kong/resty/dns_client/init.lua | 44 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index bcc4324d0afd..54db2f37f010 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -45,7 +45,7 @@ local TYPE_AAAA = resolver.TYPE_AAAA local TYPE_CNAME = resolver.TYPE_CNAME local TYPE_LAST = -1 -local valid_type_names = { +local NAME_TO_TYPE = { SRV = TYPE_SRV, A = TYPE_A, AAAA = TYPE_AAAA, @@ -53,7 +53,7 @@ local valid_type_names = { LAST = TYPE_LAST, } -local typstrs = { +local TYPE_TO_NAME = { [TYPE_SRV] = "SRV", [TYPE_A] = "A", [TYPE_AAAA] = "AAAA", @@ -62,7 +62,7 @@ local typstrs = { local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale -local hitstrs = { +local HIT_LEVEL_TO_NAME = { [1] = "hit_lru", [2] = "hit_shm", [3] = "hit_cb", @@ -70,13 +70,13 @@ local hitstrs = { } -- server replied error from the DNS protocol -local NAME_ERROR_EC = 3 -- response code 3 as "Name Error" or "NXDOMAIN" +local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" -- client specific error -local CACHE_ONLY_EC = 100 -local CACHE_ONLY_ESTR = "cache only lookup failed" -local CACHE_ONLY_ANSWERS = tablex.readonly({ errcode = CACHE_ONLY_EC, errstr = CACHE_ONLY_ESTR }) -local EMPTY_RECORD_EC = 101 -local EMPTY_RECORD_ESTR = "empty record received" +local CACHE_ONLY_ERROR_CODE = 100 +local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" +local CACHE_ONLY_ANSWERS = tablex.readonly({ errcode = CACHE_ONLY_ERROR_CODE, errstr = CACHE_ONLY_ERROR_MESSAGE }) +local EMPTY_RECORD_ERROR_CODE = 101 +local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" -- APIs @@ -93,7 +93,7 @@ end _M.TYPE_LAST = TYPE_LAST -local tries_mt = { __tostring = cjson.encode, } +local TRIES_MT = { __tostring = cjson.encode, } local function stats_init(stats, name) @@ -116,7 +116,7 @@ end -- lookup or set TYPE_LAST (the DNS record type from the last successful query) local function insert_last_type(cache, name, qtype) local key = "last:" .. name - if typstrs[qtype] and cache:get(key) ~= qtype then + if TYPE_TO_NAME[qtype] and cache:get(key) ~= qtype then cache:set(key, { ttl = 0 }, qtype) end end @@ -281,7 +281,7 @@ function _M.new(opts) local order = opts.order or DEFAULT_ORDER local preferred_ip_type for _, typstr in ipairs(order) do - local qtype = valid_type_names[typstr:upper()] + local qtype = NAME_TO_TYPE[typstr:upper()] if not qtype then return nil, "Invalid dns record type in order array: " .. typstr end @@ -319,7 +319,7 @@ end local function process_answers(self, qname, qtype, answers) local errcode = answers.errcode if errcode then - answers.ttl = errcode == NAME_ERROR_EC and self.empty_ttl or self.error_ttl + answers.ttl = errcode == NAME_ERROR_CODE and self.empty_ttl or self.error_ttl -- compatible with balancer, which needs this field answers.expire = now() + answers.ttl return answers @@ -362,8 +362,8 @@ local function process_answers(self, qname, qtype, answers) if #processed_answers == 0 then if not cname_answer then return { - errcode = EMPTY_RECORD_EC, - errstr = EMPTY_RECORD_ESTR, + errcode = EMPTY_RECORD_ERROR_CODE, + errstr = EMPTY_RECORD_ERROR_MESSAGE, ttl = self.empty_ttl, -- expire = now() + self.empty_ttl, } @@ -429,7 +429,7 @@ local function stale_update_task(premature, self, key, name, qtype, short_key) end local answers = resolve_query(self, name, qtype, {}) - if answers and (not answers.errcode or answers.errcode == NAME_ERROR_EC) then + if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then self.cache:set(key, { ttl = answers.ttl }, answers) insert_last_type(self.cache, name, qtype) @@ -524,16 +524,16 @@ local function resolve_name_type(self, name, qtype, opts, tries) -- hit L1 lru or L2 shm if hit_level and hit_level < HIT_L3 then - stats_count(self.stats, key, hitstrs[hit_level]) + stats_count(self.stats, key, HIT_LEVEL_TO_NAME[hit_level]) end if err or answers.errcode then if not err then - local src = answers.errcode < CACHE_ONLY_EC and "server" or "client" + local src = answers.errcode < CACHE_ONLY_ERROR_CODE and "server" or "client" err = ("dns %s error: %s %s"):format(src, answers.errcode, answers.errstr) end - table_insert(tries, { name .. ":" .. typstrs[qtype], err }) + table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) end return answers, err @@ -651,7 +651,7 @@ local function resolve_all(self, name, opts, tries) (hit_level and hit_level < HIT_L3)) end - stats_count(self.stats, name, hitstrs[hit_level]) + stats_count(self.stats, name, HIT_LEVEL_TO_NAME[hit_level]) end -- dereference CNAME @@ -686,7 +686,7 @@ end function _M:resolve(name, opts, tries) name = string_lower(name) opts = copy_options(opts or {}) - tries = setmetatable(tries or {}, tries_mt) + tries = setmetatable(tries or {}, TRIES_MT) local answers, err, tries = resolve_all(self, name, opts, tries) if not answers or not opts.return_random then @@ -743,7 +743,7 @@ end -- for example, "example.com:33" -> "example.com:SRV" local function format_key(key) local qname, qtype = key:match("([^:]+):(%d+)") -- match "(qname):(qtype)" - return qtype and qname .. ":" .. (typstrs[tonumber(qtype)] or qtype) + return qtype and qname .. ":" .. (TYPE_TO_NAME[tonumber(qtype)] or qtype) or key end From 223ac014b4c307a0d141e49e054ffda43e0f8ff8 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 13:10:40 +0800 Subject: [PATCH 059/126] fixed coding style(add spaces) and fix resolv.options.timeout checking --- kong/resty/dns_client/utils.lua | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 94994b78f83d..b7e57a0591d2 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -23,9 +23,11 @@ function _M.hostname_type(name) if colons > 1 then return "ipv6" end + if remainder:match("^[%d%.]+$") then return "ipv4" end + return "name" end @@ -40,11 +42,13 @@ function _M.parse_hostname(name) local ip, port = name:match("^([^:]+)%:*(%d*)$") return ip, tonumber(port), t end + -- ipv6 if name:match("%[") then -- brackets, so possibly a port local ip, port = name:match("^%[([^%]]+)%]*%:*(%d*)$") return "[" .. ip .. "]", tonumber(port), t end + return "[" .. name .. "]", nil, t -- no brackets also means no port end @@ -90,6 +94,7 @@ function _M.parse_hosts(path, enable_ipv6) end end end + return hosts end @@ -100,15 +105,19 @@ function _M.parse_resolv_conf(path, enable_ipv6) if not resolv then return nil, err end + resolv = utils.applyEnv(resolv) resolv.options = resolv.options or {} resolv.ndots = resolv.options.ndots or 1 resolv.search = resolv.search or (resolv.domain and { resolv.domain }) + -- check if timeout is 0s - if resolv.options.timeout and resolv.options.timeout == 0 then + if resolv.options.timeout and resolv.options.timeout <= 0 then + log(NOTICE, "A non-positive timeout of ", resolv.options.timeout, + "s is configured in resolv.conf. Setting it to 2000ms.") resolv.options.timeout = 2000 -- 2000ms is lua-resty-dns default - log(NOTICE, "A non-positive timeout of 0s is configured in resolv.conf. Setting it to 2000ms.") end + -- remove special domain like "." if resolv.search then for i = #resolv.search, 1, -1 do @@ -117,9 +126,11 @@ function _M.parse_resolv_conf(path, enable_ipv6) end end end + -- nameservers if resolv.nameserver then local nameservers = {} + for _, address in ipairs(resolv.nameserver) do local ip, port, t = utils.parseHostname(address) if t == "ipv4" or @@ -128,6 +139,7 @@ function _M.parse_resolv_conf(path, enable_ipv6) table_insert(nameservers, port and { ip, port } or ip) end end + resolv.nameservers = nameservers end return resolv @@ -150,11 +162,13 @@ function _M.search_names(name, resolv, hosts) for _, suffix in ipairs(resolv.search) do table_insert(names, name .. "." .. suffix) end + if hosts and hosts[name] then table_insert(names, 1, name) else table_insert(names, name) end + return names end @@ -200,6 +214,7 @@ local function swrr_init(answers) for _, answer in ipairs(answers) do answer.cw = 0 -- current weight end + -- random start for _ = 1, math_random(#answers) do swrr_next(answers) From e9a648560ac9a81ec81f615ff4cc544292fd5eda Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 13:16:25 +0800 Subject: [PATCH 060/126] move ip address answers generating logic into cache:get callback Co-authored-by: Aapo Talvensaari --- kong/resty/dns_client/init.lua | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 54db2f37f010..8efd668b930e 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -147,23 +147,19 @@ local function init_hosts(cache, path, preferred_ip_type) return end - local ttl = LONG_LASTING_TTL - - local key = name .. ":" .. qtype - local answers = { - ttl = ttl, - expire = now() + ttl, - { - name = name, - type = qtype, - address = address, - class = 1, - ttl = ttl, - }, - } -- insert via the `:get` callback to prevent inter-process communication - cache:get(key, nil, function() - return answers, nil, ttl + cache:get(name .. ":" .. qtype, nil, function() + return { + ttl = LONG_LASTING_TTL, + expire = now() + LONG_LASTING_TTL, + { + name = name, + type = qtype, + address = address, + class = 1, + ttl = LONG_LASTING_TTL, + }, + }, nil, LONG_LASTING_TTL end) end From 6668b7c3261e52364acbc93c0bcca0df9dcc7e6b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 15:28:38 +0800 Subject: [PATCH 061/126] modify some table_insert to "t[i] = v" and check order instead of check search_types --- kong/resty/dns_client/init.lua | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 8efd668b930e..f4848303c28e 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -273,26 +273,28 @@ function _M.new(opts) end -- parse order - local search_types = {} + if opts.order and #opts.order == 0 then + return nil, "Invalid order array: empty record types" + end + local order = opts.order or DEFAULT_ORDER + local search_types = {} local preferred_ip_type - for _, typstr in ipairs(order) do + + for i, typstr in ipairs(order) do local qtype = NAME_TO_TYPE[typstr:upper()] if not qtype then return nil, "Invalid dns record type in order array: " .. typstr end - table_insert(search_types, qtype) + search_types[i] = qtype if (qtype == TYPE_A or qtype == TYPE_AAAA) and not preferred_ip_type then preferred_ip_type = qtype end end - preferred_ip_type = preferred_ip_type or TYPE_A - if #search_types == 0 then - return nil, "Invalid order array: empty record types" - end + preferred_ip_type = preferred_ip_type or TYPE_A -- parse hosts local hosts = init_hosts(cache, opts.hosts, preferred_ip_type) @@ -365,7 +367,7 @@ local function process_answers(self, qname, qtype, answers) } end - table_insert(processed_answers, cname_answer) + processed_answers[1] = cname_answer log(DEBUG, "processed cname:", cname_answer.cname) From 1ceb711287821b5838433ae034be9c5070e8e00e Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 16:37:39 +0800 Subject: [PATCH 062/126] use empty table for opts as default value in _M.new() --- kong/resty/dns_client/init.lua | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index f4848303c28e..00f5e2553b57 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -187,9 +187,7 @@ end local ipc_counter = 0 function _M.new(opts) - if not opts then - return nil, "no options table specified" - end + opts = opts or {} -- parse resolv.conf local resolv, err = utils.parse_resolv_conf(opts.resolv_conf, opts.enable_ipv6) From 23f0dc5ad5962562e0f9850b269953e33389859c Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 17:21:21 +0800 Subject: [PATCH 063/126] perf: return body directly instead of creating a local variable Co-authored-by: Aapo Talvensaari --- kong/api/routes/kong.lua | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kong/api/routes/kong.lua b/kong/api/routes/kong.lua index fd6702c15478..bece016f7c5e 100644 --- a/kong/api/routes/kong.lua +++ b/kong/api/routes/kong.lua @@ -276,14 +276,13 @@ return { return kong.response.exit(404, { message = "not support for legacy DNS client" }) end - local body = { + return kong.response.exit(200, { worker = { id = ngx.worker.id() or -1, count = ngx.worker.count(), }, stats = kong.dns.stats(), - } - return kong.response.exit(200, body) + }) end }, } From f3cca273d2784f1464c4a59df1b93cead7a6606e Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 17:47:20 +0800 Subject: [PATCH 064/126] fix status code to 501 if dns stats not implemented for API "/status/dns" Co-authored-by: Aapo Talvensaari --- kong/api/routes/kong.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/api/routes/kong.lua b/kong/api/routes/kong.lua index bece016f7c5e..c4621d8b216b 100644 --- a/kong/api/routes/kong.lua +++ b/kong/api/routes/kong.lua @@ -273,7 +273,7 @@ return { ["/status/dns"] = { GET = function (self, db, helpers) if not kong.dns.stats then - return kong.response.exit(404, { message = "not support for legacy DNS client" }) + return kong.response.exit(501, { message = "not implemented with the legacy DNS client" }) end return kong.response.exit(200, { From 22bb75511f80606aa648a816b7108856adb90eb9 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 27 Mar 2024 18:15:40 +0800 Subject: [PATCH 065/126] perf: convert variables (localhosts/empty_answers) to constants --- kong/resty/dns_client/init.lua | 28 ++++++------------- kong/resty/dns_client/utils.lua | 15 +++++++++- .../30-new-dns-client/01-utils_spec.lua | 9 +++--- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 00f5e2553b57..f98b53a12410 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -129,18 +129,7 @@ end -- insert hosts into cache local function init_hosts(cache, path, preferred_ip_type) - local hosts, err = parse_hosts(path) - if not hosts then - log(WARN, "Invalid hosts file: ", err) - hosts = {} - end - - if not hosts.localhost then - hosts.localhost = { - ipv4 = "127.0.0.1", - ipv6 = "[::1]", - } - end + local hosts = parse_hosts(path) local function insert_answer(name, qtype, address) if not address then @@ -308,6 +297,12 @@ function _M.new(opts) stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, search_types = search_types, + -- quickly accessible constant empty answers + empty_answers = { + errcode = EMPTY_RECORD_ERROR_CODE, + errstr = EMPTY_RECORD_ERROR_MESSAGE, + ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + }, }, mt) end @@ -316,8 +311,6 @@ local function process_answers(self, qname, qtype, answers) local errcode = answers.errcode if errcode then answers.ttl = errcode == NAME_ERROR_CODE and self.empty_ttl or self.error_ttl - -- compatible with balancer, which needs this field - answers.expire = now() + answers.ttl return answers end @@ -357,12 +350,7 @@ local function process_answers(self, qname, qtype, answers) if #processed_answers == 0 then if not cname_answer then - return { - errcode = EMPTY_RECORD_ERROR_CODE, - errstr = EMPTY_RECORD_ERROR_MESSAGE, - ttl = self.empty_ttl, - -- expire = now() + self.empty_ttl, - } + return self.empty_answers end processed_answers[1] = cname_answer diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index b7e57a0591d2..297177eaa712 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -12,6 +12,13 @@ local readlines = require("pl.utils").readlines local DEFAULT_HOSTS_FILE = "/etc/hosts" local DEFAULT_RESOLV_CONF = "/etc/resolv.conf" +local LOCALHOST = { + ipv4 = "127.0.0.1", + ipv6 = "[::1]", +} + +local DEFAULT_HOSTS = { localhost = LOCALHOST } + local _M = {} @@ -64,10 +71,12 @@ end function _M.parse_hosts(path, enable_ipv6) local lines, err = get_lines(path or DEFAULT_HOSTS_FILE) if not lines then - return nil, err + log(NOTICE, "Invalid hosts file: ", err) + return DEFAULT_HOSTS end local hosts = {} + for _, line in ipairs(lines) do -- Remove leading/trailing whitespaces and split by whitespace local parts = {} @@ -95,6 +104,10 @@ function _M.parse_hosts(path, enable_ipv6) end end + if not hosts.localhost then + hosts.localhost = LOCALHOST + end + return hosts end diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index eef471803acb..138b920ce974 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -357,17 +357,16 @@ nameserver [fe80::1%enp0s20f0u1u1] describe("parsing 'hosts':", function() it("tests parsing when the 'hosts' file does not exist", function() - local result, err = utils.parse_hosts("non/existing/file") - assert.is.Nil(result) - assert.is.string(err) + local result = utils.parse_hosts("non/existing/file") + assert.same({ localhost = { ipv4 = "127.0.0.1", ipv6 = "[::1]" } }, result) end) it("tests parsing when the 'hosts' file is empty", function() local filename = tempfilename() writefile(filename, "") - local reverse = utils.parse_hosts(filename) + local result = utils.parse_hosts(filename) os.remove(filename) - assert.is.same({}, reverse) + assert.same({ localhost = { ipv4 = "127.0.0.1", ipv6 = "[::1]" } }, result) end) it("tests parsing 'hosts'", function() From 4ed3f797211359bed92b0e0c16fd561b496accb0 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 28 Mar 2024 10:47:20 +0800 Subject: [PATCH 066/126] perf: firstly check for tailing dot in is_fqdn Co-authored-by: Thijs Schreijer --- kong/resty/dns_client/utils.lua | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 297177eaa712..6aa05e70b85e 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -160,8 +160,11 @@ end function _M.is_fqdn(name, ndots) + if name:sub(-1) == "." then + return true + end local _, dot_count = name:gsub("%.", "") - return (dot_count >= ndots) or (name:sub(-1) == ".") + return (dot_count >= ndots) end From 408bbaffdad9298acb62c17e74ffb6cf5ac9d4f3 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 28 Mar 2024 10:47:59 +0800 Subject: [PATCH 067/126] chore: better comment for parseResolvConf-TODO Co-authored-by: Thijs Schreijer --- kong/resty/dns_client/utils.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 6aa05e70b85e..74feb5e6d810 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -112,7 +112,7 @@ function _M.parse_hosts(path, enable_ipv6) end --- TODO: need to rewrite it instead of calling parseResolvConf +-- TODO: need to rewrite it instead of calling parseResolvConf from the old library function _M.parse_resolv_conf(path, enable_ipv6) local resolv, err = utils.parseResolvConf(path or DEFAULT_RESOLV_CONF) if not resolv then From c75e7d64a3113e8f218583582b817c2624b47052 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 28 Mar 2024 11:21:29 +0800 Subject: [PATCH 068/126] ensure valid_ttl doesn't exceed maximum ttl 0xffffffff Co-authored-by: Aapo Talvensaari --- kong/resty/dns_client/init.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index f98b53a12410..ec5c0f0a8fa9 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -317,7 +317,7 @@ local function process_answers(self, qname, qtype, answers) local processed_answers = {} local cname_answer - local ttl = self.valid_ttl or 0xffffffff -- 0xffffffff for maximum TTL value + local ttl = math_min(self.valid_ttl or 0xffffffff, 0xffffffff) -- 0xffffffff for maximum TTL value for _, answer in ipairs(answers) do answer.name = string_lower(answer.name) From 5b758b2258a950bc79b1d8505d075d5caf82c379 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 28 Mar 2024 11:23:22 +0800 Subject: [PATCH 069/126] chore: rename get_round_robin_answers to get_next_round_robin_answers --- kong/resty/dns_client/init.lua | 4 ++-- kong/resty/dns_client/utils.lua | 2 +- spec/01-unit/30-new-dns-client/01-utils_spec.lua | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index ec5c0f0a8fa9..51166b0f216e 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -22,7 +22,7 @@ local table_insert = table.insert local parse_hosts = utils.parse_hosts local ipv6_bracket = utils.ipv6_bracket local search_names = utils.search_names -local get_round_robin_answers = utils.get_round_robin_answers +local get_next_round_robin_answers = utils.get_next_round_robin_answers local get_weighted_round_robin_answers = utils.get_weighted_round_robin_answers local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks @@ -684,7 +684,7 @@ function _M:resolve(name, opts, tries) return self:resolve(answer.target, opts, tries) end - return get_round_robin_answers(answers).address, opts.port, tries + return get_next_round_robin_answers(answers).address, opts.port, tries end diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 74feb5e6d810..9c93df1e917c 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -199,7 +199,7 @@ end -- util APIs to balance @answers -function _M.get_round_robin_answers(answers) +function _M.get_next_round_robin_answers(answers) answers.last = (answers.last or 0) % #answers + 1 return answers[answers.last] end diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index 138b920ce974..11f4b95180a0 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -99,7 +99,7 @@ describe("[utils]", function () { target = "3" }, -- 25% { target = "4" }, -- 25% } - local count = get_and_count(answers, 100, utils.get_round_robin_answers) + local count = get_and_count(answers, 100, utils.get_next_round_robin_answers) assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) end) From bf5f756ed4235bac79701f8f59e089870868531b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 28 Mar 2024 17:06:16 +0800 Subject: [PATCH 070/126] perf: dont use table as input parameters for APIs and add a new API `resolve_address` --- kong/resty/dns_client/init.lua | 95 ++++++++----------- .../30-new-dns-client/02-old_client_spec.lua | 8 +- .../03-old_client_cache_spec.lua | 8 +- 3 files changed, 54 insertions(+), 57 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 51166b0f216e..1a54704c5d3a 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -26,7 +26,6 @@ local get_next_round_robin_answers = utils.get_next_round_robin_answers local get_weighted_round_robin_answers = utils.get_weighted_round_robin_answers local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks -local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy -- Constants and default values @@ -433,7 +432,7 @@ local function start_stale_update_task(self, key, name, qtype, short_key) end -local function resolve_name_type_callback(self, name, qtype, opts, tries) +local function resolve_name_type_callback(self, name, qtype, cache_only, short_key, tries) local key = name .. ":" .. qtype -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then @@ -456,13 +455,13 @@ local function resolve_name_type_callback(self, name, qtype, opts, tries) log(DEBUG, "start stale update task ", key, " ttl:", ttl) -- mlcache's internal lock mechanism ensures concurrent control - start_stale_update_task(self, key, name, qtype, opts.short_key) + start_stale_update_task(self, key, name, qtype, short_key) answers.ttl = ttl return answers, nil, ttl end end - if opts.cache_only then + if cache_only then return CACHE_ONLY_ANSWERS, nil, -1 end @@ -472,27 +471,27 @@ end -- detect circular references in DNS CNAME or SRV records -local function detect_recursion(opts, key) - local rn = opts.resolved_names - local detected = rn[key] - rn[key] = true +local function detect_recursion(resolved_names, key) + local detected = resolved_names[key] + resolved_names[key] = true return detected end -local function resolve_name_type(self, name, qtype, opts, tries) +local function resolve_name_type(self, name, qtype, cache_only, short_key, tries, resolved_names) local key = name .. ":" .. qtype stats_init(self.stats, key) - if detect_recursion(opts, key) then + if detect_recursion(resolved_names, key) then stats_count(self.stats, key, "fail_recur") return nil, "recursion detected for name: " .. key end local answers, err, hit_level = self.cache:get(key, nil, resolve_name_type_callback, - self, name, qtype, opts, tries) + self, name, qtype, cache_only, + short_key, tries) -- check for runtime errors in the callback if err and err:sub(1, 8) == "callback" then log(ALERT, err) @@ -561,7 +560,8 @@ local function check_and_get_ip_answers(name) end -local function resolve_names_and_types(self, name, opts, tries) +-- resolve all `name`s and `type`s combinations and return first usable answers +local function resolve_names_and_types(self, name, typ, cache_only, short_key, tries, resolved_names) local answers = check_and_get_ip_answers(name) if answers then -- domain name is IP literal answers.ttl = LONG_LASTING_TTL @@ -571,14 +571,14 @@ local function resolve_names_and_types(self, name, opts, tries) -- TODO: For better performance, it may be necessary to rewrite it as an -- iterative function. - local types = get_search_types(self, name, opts.qtype) + local types = get_search_types(self, name, typ) local names = search_names(name, self.resolv, self.hosts) local err for _, qtype in ipairs(types) do for _, qname in ipairs(names) do - answers, err = resolve_name_type(self, qname, qtype, opts, tries) - + answers, err = resolve_name_type(self, qname, qtype, cache_only, + short_key, tries, resolved_names) -- severe error occurred if not answers then return nil, err, tries @@ -596,15 +596,17 @@ local function resolve_names_and_types(self, name, opts, tries) end -local function resolve_all(self, name, opts, tries) +local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) + name = string_lower(name) + tries = setmetatable(tries or {}, TRIES_MT) + -- key like "short:example.com:all" or "short:example.com:5" - local key = "short:" .. name .. ":" .. (opts.qtype or "all") - opts.short_key = key -- save for later use in the stale update task + local key = "short:" .. name .. ":" .. (qtype or "all") stats_init(self.stats, name) stats_count(self.stats, name, "runs") - if detect_recursion(opts, key) then + if detect_recursion(resolved_names, key) then stats_count(self.stats, name, "fail_recur") return nil, "recursion detected for name: " .. name end @@ -614,11 +616,12 @@ local function resolve_all(self, name, opts, tries) if not answers then log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") - answers, err, tries = resolve_names_and_types(self, name, opts, tries) - if not opts.cache_only and answers then + answers, err, tries = resolve_names_and_types(self, name, qtype, cache_only, + key, tries, resolved_names) + if not cache_only and answers then -- If another worker resolved the name between these two `:get`, it can -- work as expected and will not introduce a race condition. - -- + -- insert via the `:get` callback to prevent inter-process communication self.cache:get(key, nil, function() return answers, nil, answers.ttl @@ -639,52 +642,36 @@ local function resolve_all(self, name, opts, tries) end -- dereference CNAME - if opts.qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then + if qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then stats_count(self.stats, name, "cname") - return resolve_all(self, answers[1].cname, opts, tries) + return resolve_all(self, answers[1].cname, qtype, cache_only, tries, resolved_names) end return answers, err, tries end -local function copy_options(opts) - if opts.resolved_names then - return opts - end - - opts = cycle_aware_deep_copy(opts) - opts.resolved_names = {} -- for detecting circular references in DNS records - return opts +function _M:resolve(name, qtype, cache_only, tries) + return resolve_all(self, name, qtype, cache_only, tries, {}) end --- resolve all `name`s and `type`s combinations and return first usable answers --- `name`s: produced by resolv.conf options: `search`, `ndots` and `domain` --- `type`s: SRV, A, AAAA, CNAME --- --- @opts: --- `return_random`: default `false`, return only one random IP address --- `cache_only`: default `false`, retrieve data only from the internal cache --- `qtype`: specified query type instead of its own search types -function _M:resolve(name, opts, tries) - name = string_lower(name) - opts = copy_options(opts or {}) - tries = setmetatable(tries or {}, TRIES_MT) +function _M:resolve_address(name, port, cache_only, tries, resolved_names) + resolved_names = resolved_names or {} - local answers, err, tries = resolve_all(self, name, opts, tries) - if not answers or not opts.return_random then - return answers, err, tries + local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, resolved_names) + if not answers then + return nil, err, tries end - -- option: return_random + -- non-nil answers and return_random if answers[1].type == TYPE_SRV then local answer = get_weighted_round_robin_answers(answers) - opts.port = answer.port ~= 0 and answer.port or opts.port - return self:resolve(answer.target, opts, tries) + port = (answer.port ~= 0 and answer.port) or port + return self:resolve_address(answer.target, port, cache_only, tries, resolved_names) end - return get_next_round_robin_answers(answers).address, opts.port, tries + return get_next_round_robin_answers(answers).address, port, tries end @@ -713,14 +700,12 @@ end _M._resolve = _M.resolve function _M.resolve(name, r_opts, cache_only, tries) - local opts = { cache_only = cache_only } - return dns_client:_resolve(name, opts, tries) + return dns_client:_resolve(name, r_opts and r_opts.qtype, cache_only, tries) end function _M.toip(name, port, cache_only, tries) - local opts = { cache_only = cache_only, return_random = true , port = port } - return dns_client:_resolve(name, opts, tries) + return dns_client:resolve_address(name, port, cache_only, tries) end diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 317e8aa78175..d21a8b9d3bd3 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -97,7 +97,13 @@ describe("[DNS client]", function() -- restore its API overlapped by the compatible layer package.loaded["kong.resty.dns_client"] = nil client = require("kong.resty.dns_client") - client.resolve = client._resolve + client.resolve = function (self, name, opts, tries) + if opts and opts.return_random then + return self:resolve_address(name, opts.port, opts.cache_only, tries) + else + return self:_resolve(name, opts and opts.qtype, opts and opts.cache_only, tries) + end + end end) after_each(function() diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index c4040355ea3e..de36e4ffb0f8 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -94,7 +94,13 @@ describe("[DNS client cache]", function() -- restore its API overlapped by the compatible layer package.loaded["kong.resty.dns_client"] = nil client = require("kong.resty.dns_client") - client.resolve = client._resolve + client.resolve = function (self, name, opts, tries) + if opts and opts.return_random then + return self:resolve_address(name, opts.port, opts.cache_only, tries) + else + return self:_resolve(name, opts and opts.qtype, opts and opts.cache_only, tries) + end + end end) after_each(function() From 10be035011e2a2cc3fb2c684744aceeb486194c3 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 28 Mar 2024 17:06:56 +0800 Subject: [PATCH 071/126] README.md: add apis `resolve_address` and and fix format --- kong/resty/dns_client/README.md | 112 +++++++++++++++++++------------- 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/kong/resty/dns_client/README.md b/kong/resty/dns_client/README.md index d5798711a45d..626594d7d49b 100644 --- a/kong/resty/dns_client/README.md +++ b/kong/resty/dns_client/README.md @@ -10,6 +10,7 @@ Table of Contents * [APIs](#apis) * [new](#new) * [resolve](#resolve) + * [resolve_address](#resolve_address) # APIs @@ -22,48 +23,44 @@ The following APIs are for internal development use only within Kong. In the cur **Functionality:** -Creates a dns client object. Returns nil and a message string on error. +Creates a dns client object. Returns `nil` and a message string on error. -Perform a series of initialization operations: +Performs a series of initialization operations: -* parse `host` file -* parse `resolv.conf` file (used by the underlying `lua-resty-dns` library) -* initialize multiple TTL options -* create a mlcache object and initialize it +* parse `host` file, +* parse `resolv.conf` file (used by the underlying `lua-resty-dns` library), +* initialize multiple TTL options, +* create a mlcache object and initialize it. -**Input paramenters:** +**Input parameters:** `@opts` It accepts a options table argument. The following options are supported: -* TTL options - * `valid_ttl` - * same to the option `dns_valid_ttl` in kong.conf - * `stale_ttl` - * same to the option `dns_stale_ttl` in kong.conf - * `empty_ttl` - * same to the option `dns_not_found_ttl` in kong.conf - * `bad_ttl` - * same to the option `dns_error_ttl` in kong.conf -* `hosts` (default: `/etc/hosts`) - * the path of `hosts` file -* `resolv_conf` (default: `/etc/resolv.conf`) +* TTL options: + * `valid_ttl`: same to the option `dns_valid_ttl` in `kong.conf`. + * `stale_ttl`: same to the option `dns_stale_ttl` in `kong.conf`. + * `empty_ttl`: same to the option `dns_not_found_ttl` in `kong.conf`. + * `bad_ttl`: same to the option `dns_error_ttl` in `kong.conf`. +* `hosts`: (default: `/etc/hosts`) + * the path of `hosts` file. +* `resolv_conf`: (default: `/etc/resolv.conf`) * the path of `resolv.conf` file, it will be parsed and passed into the underlying `lua-resty-dns` library. -* `order` (default: `{ "LAST", "SRV", "A", "AAAA", "CNAME" }`) - * the order in which to resolve different record types, it's similar to the option `dns_order` in kong.conf. +* `order`: (default: `{ "LAST", "SRV", "A", "AAAA", "CNAME" }`) + * the order in which to resolve different record types, it's similar to the option `dns_order` in `kong.conf`. * The `LAST` type means the type of the last successful lookup (for the specified name). -* `enable_ipv6` (default: `ture`) - * whether to support IPv6 servers when when getting nameservers from `resolv.conf` -* options for the underlying `lua-resty-dns` library - * `retrans` (default: `5`) +* `enable_ipv6`: (default: `true`) + * whether to support IPv6 servers when getting nameservers from `resolv.conf`. +* options for the underlying `lua-resty-dns` library: + * `retrans`: (default: `5`) * the total number of times of retransmitting the DNS request when receiving a DNS response times out according to the timeout setting. When trying to retransmit the query, the next nameserver according to the round-robin algorithm will be picked up. * If not given, it is taken from `resolv.conf` option `options attempts:`. - * `timeout` (default: `2000`) - * the time in milliseconds for waiting for the response for a single attempt of request transmission + * `timeout`: (default: `2000`) + * the time in milliseconds for waiting for the response for a single attempt of request transmission. * If not given, it is taken from `resolv.conf` option `options timeout:`. But note that its unit in `resolv.conf` is second. - * `no_random` (default: `true`) - * a boolean flag controls whether to randomly pick the nameserver to query first, if `true` will always start with the first nameserver listed. + * `no_random`: (default: `true`) + * a boolean flag controls whether to randomly pick the nameserver to query first. If `true`, it always starts with the first nameserver listed. * If not given, it is taken from `resolv.conf` option `rotate` (inverted). - * `nameservers` + * `nameservers`: * a list of nameservers to be used. Each nameserver entry can be either a single hostname string or a table holding both the hostname string and the port number. For exmaple, `{"8.8.8.8", {"8.8.4.4", 53} }`. * If not given, it is taken from `resolv.conf` option `nameserver`. @@ -71,15 +68,15 @@ Perform a series of initialization operations: ## resolve -**syntax:** *answers, err, tries? = resolve(name, opts?, tries?)* +**syntax:** *answers, err, tries? = resolve(qname, qtype, cache_only, tries?)* **context:** *rewrite_by_lua\*, access_by_lua\*, content_by_lua\*, ngx.timer.\** **Functionality:** -Performs a DNS resolution +Performs a DNS resolution. -1. First, use the key `short::all` to query mlcache to see if there are any results available for quick use. If results are found, return them directly. -2. If there are no results available for quick use in the cache, then query all keys (`:`) extended from this domain name . +1. First, use the key `short::all` (or `short::` if `@qtype` is not `nil`) to query mlcache to see if there are any results available for quick use. If results are found, return them directly. +2. If there are no results available for quick use in the cache, then query all keys (`:`) extended from this domain name. 1. The method for calculating extended keys is as follows: 1. The domain `` is extended based on the `ndots`, `search`, and `domain` settings in `resolv.conf`. 2. The `` is extended based on the `dns_order` parameter. @@ -91,10 +88,8 @@ Performs a DNS resolution **Return value:** -* Return value `answers, err` - * Return one array-like Lua table contains all the records - * Return one ip address and port from records if `opts.return_random = true` - * In this scenario, `answers` would hold an address, while `err` would contain either a port number or an error message, like `address, port` or `nil, err` +* Return value `answers, err`: + * Return one array-like Lua table contains all the records. * If the server returns a non-zero error code, it will return `nil` and a string describing the error in this record. * For exmaple, `nil, "dns server error: name error"`, the server returned a result with error code 3 (NXDOMAIN). * In case of severe errors, such network error or server's malformed DNS record response, it will return `nil` and a string describing the error instead. For example: @@ -106,12 +101,39 @@ Performs a DNS resolution **Input parameters:** -* `@name`: the domain name to resolve -* `@opts`: It accepts a options table argument. The following options are supported: - * `cache_only` (default: `false`) - * control whether to solely retrieve data from the internal cache without querying to the nameserver - * `return_random` (default: `true`) - * control whether to return either a single randomly selected IP address or all available records -* `@tries?` : see the above section `Return value and input paramter @tries?` +* `@qname`: the domain name to resolve. +* `@qtype`: (optional: `nil` or DNS TYPE value) + * specify the query type instead of `self.order` types. +* `@cache_only`: (optional: `boolean`) + * control whether to solely retrieve data from the internal cache without querying to the nameserver. +* `@tries?`: see the above section `Return value and input paramter @tries?`. + +[Back to TOC](#table-of-contents) + +## resolve_address + +**syntax:** *ip, port_or_err, tries? = resolve_address(name, port, cache_only, tries?)* +**context:** *rewrite_by_lua\*, access_by_lua\*, content_by_lua\*, ngx.timer.\** + +**Functionality:** + +Performs a DNS resolution, and return a single randomly selected address (IP and port number). + +When calling multiple times on cached records, it will apply load-balancing based on a round-robin (RR) scheme. For SRV records, this will be a _weighted_ round-robin (WRR) scheme (because of the weights it will be randomized). It will apply the round-robin schemes on each level individually. + +**Return value:** + +* Return value `ip, port_or_err`: + * Return one IP address and port number from records. + * Return `nil, err` if errors occur, with `err` containing an error message. +* Return value and input parameter `@tries?`: same as `@tries?` of `resolve` API. + +**Input parameters:** + +* `@name`: the domain name to resolve. +* `@port`: (optional: `nil` or port number) + * default port number to return if none was found in the lookup chain (only SRV records carry port information, SRV with `port=0` will be ignored). +* `@cache_only`: (optional: `boolean`) + * control whether to solely retrieve data from the internal cache without querying to the nameserver. [Back to TOC](#table-of-contents) From f28ee49ee31c249db3bea7b28b4676ae175f90df Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 28 Mar 2024 18:02:42 +0800 Subject: [PATCH 072/126] perf: convert some variables local constants --- kong/resty/dns_client/init.lua | 48 ++++++++++--------- .../30-new-dns-client/02-old_client_spec.lua | 2 +- .../03-old_client_cache_spec.lua | 2 +- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 1a54704c5d3a..493240400429 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -1,6 +1,5 @@ local cjson = require("cjson.safe") local utils = require("kong.resty.dns_client.utils") -local tablex = require("pl.tablex") local mlcache = require("kong.resty.mlcache") local resolver = require("resty.dns.resolver") @@ -12,12 +11,14 @@ local DEBUG = ngx.DEBUG local ALERT = ngx.ALERT local timer_at = ngx.timer.at -local type = type -local pairs = pairs -local ipairs = ipairs -local math_min = math.min -local string_lower = string.lower -local table_insert = table.insert +local type = type +local pairs = pairs +local ipairs = ipairs +local math_min = math.min +local string_lower = string.lower +local table_insert = table.insert +local table_isempty = require("table.isempty") +local tablex_readonly = require("pl.tablex").readonly local parse_hosts = utils.parse_hosts local ipv6_bracket = utils.ipv6_bracket @@ -36,6 +37,8 @@ local DEFAULT_EMPTY_TTL = 30 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 +local PERSISTENT_CACHE_TTL = { ttl = 0 } -- used for mlcache:set + local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } local TYPE_SRV = resolver.TYPE_SRV @@ -57,6 +60,7 @@ local TYPE_TO_NAME = { [TYPE_A] = "A", [TYPE_AAAA] = "AAAA", [TYPE_CNAME] = "CNAME", + [TYPE_LAST] = "LAST", } local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale @@ -73,7 +77,7 @@ local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXD -- client specific error local CACHE_ONLY_ERROR_CODE = 100 local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" -local CACHE_ONLY_ANSWERS = tablex.readonly({ errcode = CACHE_ONLY_ERROR_CODE, errstr = CACHE_ONLY_ERROR_MESSAGE }) +local CACHE_ONLY_ANSWERS = tablex_readonly({ errcode = CACHE_ONLY_ERROR_CODE, errstr = CACHE_ONLY_ERROR_MESSAGE }) local EMPTY_RECORD_ERROR_CODE = 101 local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" @@ -116,7 +120,7 @@ end local function insert_last_type(cache, name, qtype) local key = "last:" .. name if TYPE_TO_NAME[qtype] and cache:get(key) ~= qtype then - cache:set(key, { ttl = 0 }, qtype) + cache:set(key, PERSISTENT_CACHE_TTL, qtype) end end @@ -185,9 +189,9 @@ function _M.new(opts) end -- init the resolver options for lua-resty-dns - local nameservers = (opts.nameservers and #opts.nameservers > 0) and + local nameservers = (opts.nameservers and not table_isempty(opts.nameservers)) and opts.nameservers or resolv.nameservers - if not nameservers or #nameservers == 0 then + if not nameservers or table_isempty(nameservers) then log(WARN, "Invalid configuration, no nameservers specified") end @@ -259,7 +263,7 @@ function _M.new(opts) end -- parse order - if opts.order and #opts.order == 0 then + if opts.order and table_isempty(opts.order) then return nil, "Invalid order array: empty record types" end @@ -297,11 +301,11 @@ function _M.new(opts) empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, search_types = search_types, -- quickly accessible constant empty answers - empty_answers = { + EMPTY_ANSWERS = tablex_readonly({ errcode = EMPTY_RECORD_ERROR_CODE, errstr = EMPTY_RECORD_ERROR_MESSAGE, ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, - }, + }), }, mt) end @@ -321,6 +325,12 @@ local function process_answers(self, qname, qtype, answers) for _, answer in ipairs(answers) do answer.name = string_lower(answer.name) + if self.valid_ttl then + answer.ttl = self.valid_ttl + else + ttl = math_min(ttl, answer.ttl) + end + if answer.type == TYPE_CNAME then cname_answer = answer -- use the last one as the real cname @@ -339,17 +349,11 @@ local function process_answers(self, qname, qtype, answers) table_insert(processed_answers, answer) end end - - if self.valid_ttl then - answer.ttl = self.valid_ttl - else - ttl = math_min(ttl, answer.ttl) - end end - if #processed_answers == 0 then + if table_isempty(processed_answers) then if not cname_answer then - return self.empty_answers + return self.EMPTY_ANSWERS end processed_answers[1] = cname_answer diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index d21a8b9d3bd3..ab3d3dd4c043 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -3,7 +3,7 @@ local _writefile = require("pl.utils").writefile local tmpname = require("pl.path").tmpname -local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy +local cycle_aware_deep_copy = require("kong.tools.table").cycle_aware_deep_copy -- hosted in Route53 in the AWS sandbox local TEST_DOMAIN = "kong-gateway-testing.link" diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index de36e4ffb0f8..1a20b8b43de5 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -4,7 +4,7 @@ local utils = require("kong.tools.utils") local _writefile = require("pl.utils").writefile local tmpname = require("pl.path").tmpname -local cycle_aware_deep_copy = require("kong.tools.utils").cycle_aware_deep_copy +local cycle_aware_deep_copy = require("kong.tools.table").cycle_aware_deep_copy -- hosted in Route53 in the AWS sandbox local TEST_NS = "198.51.100.0" From 52420432a7ff533c9cde52d69ec1a7cc73905a7f Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 29 Mar 2024 09:39:29 +0800 Subject: [PATCH 073/126] improve readability: list _M.TYPE_XXX value directly Co-authored-by: Thijs Schreijer --- kong/resty/dns_client/init.lua | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 493240400429..a5d6ca4c40ad 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -84,17 +84,15 @@ local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" -- APIs -local _M = {} +local _M = { + TYPE_SRV = TYPE_SRV, + TYPE_A = TYPE_A, + TYPE_AAAA = TYPE_AAAA, + TYPE_CNAME = TYPE_CNAME, + TYPE_LAST = TYPE_LAST, +} local mt = { __index = _M } --- copy TYPE_* -for k,v in pairs(resolver) do - if type(k) == "string" and k:sub(1,5) == "TYPE_" then - _M[k] = v - end -end -_M.TYPE_LAST = TYPE_LAST - local TRIES_MT = { __tostring = cjson.encode, } From e9d570f3b36381d73b60a2785fe8ac2a1281f80b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 29 Mar 2024 09:44:28 +0800 Subject: [PATCH 074/126] refactor function name and fix lint issue --- kong/resty/dns_client/init.lua | 45 +++++++++---------- kong/resty/dns_client/utils.lua | 4 +- .../30-new-dns-client/01-utils_spec.lua | 18 ++++---- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index a5d6ca4c40ad..4c903572cbe2 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -11,7 +11,6 @@ local DEBUG = ngx.DEBUG local ALERT = ngx.ALERT local timer_at = ngx.timer.at -local type = type local pairs = pairs local ipairs = ipairs local math_min = math.min @@ -23,8 +22,8 @@ local tablex_readonly = require("pl.tablex").readonly local parse_hosts = utils.parse_hosts local ipv6_bracket = utils.ipv6_bracket local search_names = utils.search_names -local get_next_round_robin_answers = utils.get_next_round_robin_answers -local get_weighted_round_robin_answers = utils.get_weighted_round_robin_answers +local get_next_round_robin_answer = utils.get_next_round_robin_answer +local get_next_weighted_round_robin_answer = utils.get_next_weighted_round_robin_answer local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks @@ -97,19 +96,19 @@ local mt = { __index = _M } local TRIES_MT = { __tostring = cjson.encode, } -local function stats_init(stats, name) +local function stats_init_name(stats, name) if not stats[name] then stats[name] = {} end end -local function stats_count(stats, name, key) +local function stats_increment(stats, name, key) stats[name][key] = (stats[name][key] or 0) + 1 end -local function stats_set(stats, name, key, value) +local function stats_set_count(stats, name, key, value) stats[name][key] = value end @@ -371,7 +370,7 @@ end local function resolve_query(self, name, qtype, tries) local key = name .. ":" .. qtype - stats_count(self.stats, key, "query") + stats_increment(self.stats, key, "query") local r, err = resolver:new(self.r_opts) if not r then @@ -389,20 +388,20 @@ local function resolve_query(self, name, qtype, tries) local query_time = now() - start_time -- the time taken for the DNS query local time_str = ("%.3f %.3f"):format(start_time, query_time) - stats_set(self.stats, key, "query_last_time", time_str) + stats_set_count(self.stats, key, "query_last_time", time_str) log(DEBUG, "r:query() ans:", answers and #answers or "-", " t:", time_str) if not answers then - stats_count(self.stats, key, "query_fail_nameserver") + stats_increment(self.stats, key, "query_fail_nameserver") err = err or "unknown" return nil, "DNS server error: " .. err .. ", Query Time: " .. time_str end answers = process_answers(self, name, qtype, answers) - stats_count(self.stats, key, answers.errstr and "query_fail:" .. answers.errstr - or "query_succ") + stats_increment(self.stats, key, answers.errstr and "query_fail:" .. answers.errstr + or "query_succ") return answers, nil, answers.ttl end @@ -425,7 +424,7 @@ end local function start_stale_update_task(self, key, name, qtype, short_key) - stats_count(self.stats, key, "stale") + stats_increment(self.stats, key, "stale") local ok, err = timer_at(0, stale_update_task, self, key, name, qtype, short_key) if not ok then @@ -483,10 +482,10 @@ end local function resolve_name_type(self, name, qtype, cache_only, short_key, tries, resolved_names) local key = name .. ":" .. qtype - stats_init(self.stats, key) + stats_init_name(self.stats, key) if detect_recursion(resolved_names, key) then - stats_count(self.stats, key, "fail_recur") + stats_increment(self.stats, key, "fail_recur") return nil, "recursion detected for name: " .. key end @@ -509,7 +508,7 @@ local function resolve_name_type(self, name, qtype, cache_only, short_key, tries -- hit L1 lru or L2 shm if hit_level and hit_level < HIT_L3 then - stats_count(self.stats, key, HIT_LEVEL_TO_NAME[hit_level]) + stats_increment(self.stats, key, HIT_LEVEL_TO_NAME[hit_level]) end if err or answers.errcode then @@ -605,11 +604,11 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) -- key like "short:example.com:all" or "short:example.com:5" local key = "short:" .. name .. ":" .. (qtype or "all") - stats_init(self.stats, name) - stats_count(self.stats, name, "runs") + stats_init_name(self.stats, name) + stats_increment(self.stats, name, "runs") if detect_recursion(resolved_names, key) then - stats_count(self.stats, name, "fail_recur") + stats_increment(self.stats, name, "fail_recur") return nil, "recursion detected for name: " .. name end @@ -630,7 +629,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) end) end - stats_count(self.stats, name, answers and "miss" or "fail") + stats_increment(self.stats, name, answers and "miss" or "fail") else log(DEBUG, "quickly cache lookup ", key, " ans:", #answers, " hlv:", hit_level or "-") @@ -640,12 +639,12 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) (hit_level and hit_level < HIT_L3)) end - stats_count(self.stats, name, HIT_LEVEL_TO_NAME[hit_level]) + stats_increment(self.stats, name, HIT_LEVEL_TO_NAME[hit_level]) end -- dereference CNAME if qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then - stats_count(self.stats, name, "cname") + stats_increment(self.stats, name, "cname") return resolve_all(self, answers[1].cname, qtype, cache_only, tries, resolved_names) end @@ -668,12 +667,12 @@ function _M:resolve_address(name, port, cache_only, tries, resolved_names) -- non-nil answers and return_random if answers[1].type == TYPE_SRV then - local answer = get_weighted_round_robin_answers(answers) + local answer = get_next_weighted_round_robin_answer(answers) port = (answer.port ~= 0 and answer.port) or port return self:resolve_address(answer.target, port, cache_only, tries, resolved_names) end - return get_next_round_robin_answers(answers).address, port, tries + return get_next_round_robin_answer(answers).address, port, tries end diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 9c93df1e917c..6c71c8807692 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -199,7 +199,7 @@ end -- util APIs to balance @answers -function _M.get_next_round_robin_answers(answers) +function _M.get_next_round_robin_answer(answers) answers.last = (answers.last or 0) % #answers + 1 return answers[answers.last] end @@ -258,7 +258,7 @@ local function filter_lowest_priority_answers(answers) end -function _M.get_weighted_round_robin_answers(answers) +function _M.get_next_weighted_round_robin_answer(answers) local l = answers.lowest_prio_records or filter_lowest_priority_answers(answers) -- perform round robin selection on lowest priority answers @l diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index 11f4b95180a0..77527f10bb68 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -99,7 +99,7 @@ describe("[utils]", function () { target = "3" }, -- 25% { target = "4" }, -- 25% } - local count = get_and_count(answers, 100, utils.get_next_round_robin_answers) + local count = get_and_count(answers, 100, utils.get_next_round_robin_answer) assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) end) @@ -108,7 +108,7 @@ describe("[utils]", function () local answers = { { target = "w5-p10-a", weight = 5, priority = 10, }, -- hit 100% } - local count = get_and_count(answers, 20, utils.get_weighted_round_robin_answers) + local count = get_and_count(answers, 20, utils.get_next_weighted_round_robin_answer) assert.same(count, { ["w5-p10-a"] = 20 }) -- only get the lowest priority @@ -118,7 +118,7 @@ describe("[utils]", function () { target = "w5-p10-b", weight = 5, priority = 10, }, -- hit 50% { target = "w0-p10", weight = 0, priority = 10, }, -- hit 0% } - local count = get_and_count(answers, 20, utils.get_weighted_round_robin_answers) + local count = get_and_count(answers, 20, utils.get_next_weighted_round_robin_answer) assert.same(count, { ["w5-p10-a"] = 10, ["w5-p10-b"] = 10 }) -- weight: 6, 3, 1 @@ -127,7 +127,7 @@ describe("[utils]", function () { target = "w3", weight = 3, priority = 10, }, -- hit 30% { target = "w1", weight = 1, priority = 10, }, -- hit 10% } - local count = get_and_count(answers, 100 * 1000, utils.get_weighted_round_robin_answers) + local count = get_and_count(answers, 100 * 1000, utils.get_next_weighted_round_robin_answer) assert.same(count, { ["w6"] = 60000, ["w3"] = 30000, ["w1"] = 10000 }) -- random start @@ -145,8 +145,8 @@ describe("[utils]", function () { target = "4", weight = 1, priority = 10, }, } - local a1 = utils.get_weighted_round_robin_answers(answers1) - local a2 = utils.get_weighted_round_robin_answers(answers2) + local a1 = utils.get_next_weighted_round_robin_answer(answers1) + local a2 = utils.get_next_weighted_round_robin_answer(answers2) assert.not_equal(a1.target, a2.target) -- weight 0 as 0.1 @@ -156,7 +156,7 @@ describe("[utils]", function () { target = "w2", weight = 0, priority = 10, }, { target = "w3", weight = 0, priority = 10, }, } - local count = get_and_count(answers, 100, utils.get_weighted_round_robin_answers) + local count = get_and_count(answers, 100, utils.get_next_weighted_round_robin_answer) assert.same(count, { ["w0"] = 7, ["w1"] = 77, ["w2"] = 8, ["w3"] = 8 }) -- weight 0 and lowest priority @@ -166,7 +166,7 @@ describe("[utils]", function () { target = "w0-b", weight = 0, priority = 0, }, { target = "w0-c", weight = 0, priority = 0, }, } - local count = get_and_count(answers, 100, utils.get_weighted_round_robin_answers) + local count = get_and_count(answers, 100, utils.get_next_weighted_round_robin_answer) assert.same(count["w1"], nil) -- all weights are 0 @@ -176,7 +176,7 @@ describe("[utils]", function () { target = "3", weight = 0, priority = 10, }, { target = "4", weight = 0, priority = 10, }, } - local count = get_and_count(answers, 100, utils.get_weighted_round_robin_answers) + local count = get_and_count(answers, 100, utils.get_next_weighted_round_robin_answer) assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) end) end) From 14fe836c4df6ae01f3ead616843ae817adab2270 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 29 Mar 2024 12:57:50 +0800 Subject: [PATCH 075/126] refactor function names for better test --- kong/resty/dns_client/init.lua | 11 ++--------- kong/resty/dns_client/utils.lua | 2 +- spec/01-unit/09-balancer/01-generic_spec.lua | 1 + .../09-balancer/02-least_connections_spec.lua | 1 + .../09-balancer/03-consistent_hashing_spec.lua | 1 + spec/01-unit/09-balancer/04-round_robin_spec.lua | 3 +++ spec/01-unit/09-balancer/06-latency_spec.lua | 1 + .../30-new-dns-client/02-old_client_spec.lua | 14 +++++++------- .../30-new-dns-client/03-old_client_cache_spec.lua | 4 ++-- spec/helpers/dns.lua | 6 +++--- 10 files changed, 22 insertions(+), 22 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 4c903572cbe2..28d8f0900323 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -748,20 +748,13 @@ if package.loaded.busted then } end - function _M:insert_last_type(name, qtype) + function _M:_insert_last_type(name, qtype) -- export as different name! insert_last_type(self.cache, name, qtype) end - function _M:get_last_type(name) + function _M:_get_last_type(name) -- export as different name! return get_last_type(self.cache, name) end - - _M._init = _M.init - function _M.init(opts) - opts = opts or {} - opts.cache_purge = true - return _M._init(opts) - end end diff --git a/kong/resty/dns_client/utils.lua b/kong/resty/dns_client/utils.lua index 6c71c8807692..13aaad08046f 100644 --- a/kong/resty/dns_client/utils.lua +++ b/kong/resty/dns_client/utils.lua @@ -240,7 +240,7 @@ end -- gather records with the lowest priority in SRV record local function filter_lowest_priority_answers(answers) - local lowest_priority = answers[1].priority + local lowest_priority = answers[1].priority -- SRV record MUST have `priority` field local l = {} -- lowest priority records list for _, answer in ipairs(answers) do diff --git a/spec/01-unit/09-balancer/01-generic_spec.lua b/spec/01-unit/09-balancer/01-generic_spec.lua index dc7bf33a940f..b56fb1ad8f5b 100644 --- a/spec/01-unit/09-balancer/01-generic_spec.lua +++ b/spec/01-unit/09-balancer/01-generic_spec.lua @@ -214,6 +214,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro -- so that CI and docker can have reliable results -- but remove `search` and `domain` search = {}, + cache_purge = true, }) snapshot = assert:snapshot() assert:set_parameter("TableFormatLevel", 10) diff --git a/spec/01-unit/09-balancer/02-least_connections_spec.lua b/spec/01-unit/09-balancer/02-least_connections_spec.lua index 3db545dec093..caae6c8bbe07 100644 --- a/spec/01-unit/09-balancer/02-least_connections_spec.lua +++ b/spec/01-unit/09-balancer/02-least_connections_spec.lua @@ -219,6 +219,7 @@ describe("[least-connections]", function() resolvConf = { "nameserver 198.51.100.0" }, + cache_purge = true, }) snapshot = assert:snapshot() end) diff --git a/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua b/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua index 6d1fdc7b1737..aaecbdd4301f 100644 --- a/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua +++ b/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua @@ -266,6 +266,7 @@ describe("[consistent_hashing]", function() -- so that CI and docker can have reliable results -- but remove `search` and `domain` search = {}, + cache_purge = true, }) snapshot = assert:snapshot() end) diff --git a/spec/01-unit/09-balancer/04-round_robin_spec.lua b/spec/01-unit/09-balancer/04-round_robin_spec.lua index edc5ef811771..4e045685e810 100644 --- a/spec/01-unit/09-balancer/04-round_robin_spec.lua +++ b/spec/01-unit/09-balancer/04-round_robin_spec.lua @@ -304,6 +304,7 @@ describe("[round robin balancer]", function() -- so that CI and docker can have reliable results -- but remove `search` and `domain` search = {}, + cache_purge = true, }) snapshot = assert:snapshot() end) @@ -412,6 +413,7 @@ describe("[round robin balancer]", function() resolvConf = { "nameserver 127.0.0.1:22000" -- make sure dns query fails }, + cache_purge = true, }) -- create balancer local b = check_balancer(new_balancer { @@ -1257,6 +1259,7 @@ describe("[round robin balancer]", function() resolvConf = { "nameserver 127.0.0.1:22000" -- make sure dns query fails }, + cache_purge = true, }) record.expire = gettime() -1 -- expire current dns cache record sleep(0.2) -- wait for record expiration diff --git a/spec/01-unit/09-balancer/06-latency_spec.lua b/spec/01-unit/09-balancer/06-latency_spec.lua index 89def3b45299..be9a23279e78 100644 --- a/spec/01-unit/09-balancer/06-latency_spec.lua +++ b/spec/01-unit/09-balancer/06-latency_spec.lua @@ -218,6 +218,7 @@ describe("[latency]", function() resolvConf = { "nameserver 198.51.100.0" }, + cache_purge = true, }) snapshot = assert:snapshot() end) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index ab3d3dd4c043..8ff93fd19b8d 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -271,7 +271,7 @@ describe("[DNS client]", function() local list = hook_query_func_get_list() local cli = assert(client_new()) - cli:insert_last_type("host", resolver.TYPE_CNAME) + cli:_insert_last_type("host", resolver.TYPE_CNAME) cli:resolve("host") @@ -359,7 +359,7 @@ describe("[DNS client]", function() local list = hook_query_func_get_list() local cli = assert(client_new()) - cli:insert_last_type("host.", resolver.TYPE_CNAME) + cli:_insert_last_type("host.", resolver.TYPE_CNAME) cli:resolve("host.") assert.same({ @@ -417,7 +417,7 @@ describe("[DNS client]", function() local list = hook_query_func_get_list() local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type - cli:insert_last_type("host", resolver.TYPE_CNAME) + cli:_insert_last_type("host", resolver.TYPE_CNAME) cli:resolve("host") assert.same({ @@ -470,7 +470,7 @@ describe("[DNS client]", function() local list = hook_query_func_get_list() local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type - cli:insert_last_type("host", resolver.TYPE_CNAME) + cli:_insert_last_type("host", resolver.TYPE_CNAME) cli:resolve("host.") @@ -743,7 +743,7 @@ describe("[DNS client]", function() }, cli.stats) -- check last successful lookup references - local lastsuccess = cli:get_last_type(answers[1].name) + local lastsuccess = cli:_get_last_type(answers[1].name) assert.are.equal(resolver.TYPE_A, lastsuccess) end) @@ -940,13 +940,13 @@ describe("[DNS client]", function() resolv_conf = "/etc/resolv.conf", order = {"SRV", "CNAME", "A", "AAAA"} })) - assert.equal(resolver.TYPE_A, cli:get_last_type("localhost")) -- success set to A as it is the preferred option + assert.equal(resolver.TYPE_A, cli:_get_last_type("localhost")) -- success set to A as it is the preferred option local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"SRV", "CNAME", "AAAA", "A"} })) - assert.equal(resolver.TYPE_AAAA, cli:get_last_type("localhost")) -- success set to AAAA as it is the preferred option + assert.equal(resolver.TYPE_AAAA, cli:_get_last_type("localhost")) -- success set to AAAA as it is the preferred option end) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 1a20b8b43de5..ab37574c2dcc 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -625,9 +625,9 @@ describe("[DNS client cache]", function() }, } } - cli:insert_last_type("another.name.consul", resolver.TYPE_AAAA) + cli:_insert_last_type("another.name.consul", resolver.TYPE_AAAA) cli:resolve("demo.service.consul", { return_random = true }) - local success = cli:get_last_type("another.name.consul") + local success = cli:_get_last_type("another.name.consul") assert.equal(resolver.TYPE_AAAA, success) end) diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 9a63d02cd65c..689f8a980c9d 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -85,7 +85,7 @@ function _M.dnsSRV(client, records, staleTtl) local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) -- insert last-succesful lookup type - client.getobj():insert_last_type(records[1].name, records[1].type) + client.getobj():_insert_last_type(records[1].name, records[1].type) return records end @@ -127,7 +127,7 @@ function _M.dnsA(client, records, staleTtl) local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl) -- insert last-succesful lookup type - client.getobj():insert_last_type(records[1].name, records[1].type) + client.getobj():_insert_last_type(records[1].name, records[1].type) return records end @@ -168,7 +168,7 @@ function _M.dnsAAAA(client, records, staleTtl) local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) -- insert last-succesful lookup type - client.getobj():insert_last_type(records[1].name, records[1].type) + client.getobj():_insert_last_type(records[1].name, records[1].type) return records end From 6cba090e63ea3be782d6295939c23f645ed34c44 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 29 Mar 2024 13:01:56 +0800 Subject: [PATCH 076/126] chores: do not check for r.destroy before using it --- kong/resty/dns_client/init.lua | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 28d8f0900323..75f48c6cfd5e 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -381,9 +381,7 @@ local function resolve_query(self, name, qtype, tries) local options = { additional_section = true, qtype = qtype } local answers, err = r:query(name, options) - if r.destroy then - r:destroy() - end + r:destroy() local query_time = now() - start_time -- the time taken for the DNS query local time_str = ("%.3f %.3f"):format(start_time, query_time) From 07a2f75f0b884413bfd520f86c61c013247a0333 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 29 Mar 2024 15:36:19 +0800 Subject: [PATCH 077/126] 1 --- kong/resty/dns_client/init.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kong/resty/dns_client/init.lua b/kong/resty/dns_client/init.lua index 75f48c6cfd5e..78ace243504b 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/resty/dns_client/init.lua @@ -232,6 +232,8 @@ function _M.new(opts) end end, + -- @channel: event channel name, such as "mlcache:invalidate:dns_cache" + -- @data: mlcache's key name, such as ":" broadcast = function(channel, data) if not kong or not kong.worker_events then return From 59fa5f3e771881fa39164b1e5af9b642eb71a77b Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Sun, 31 Mar 2024 23:37:43 +0800 Subject: [PATCH 078/126] move library path to kong/dns --- kong-3.8.0-0.rockspec | 4 ++-- kong/{resty/dns_client => dns}/README.md | 0 kong/{resty/dns_client/init.lua => dns/client.lua} | 2 +- kong/{resty/dns_client => dns}/utils.lua | 0 kong/resty/dns/client.lua | 4 ++-- spec/01-unit/30-new-dns-client/01-utils_spec.lua | 2 +- spec/01-unit/30-new-dns-client/02-old_client_spec.lua | 4 ++-- spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua | 4 ++-- spec/01-unit/30-new-dns-client/05-client_stat_spec.lua | 4 ++-- 9 files changed, 12 insertions(+), 12 deletions(-) rename kong/{resty/dns_client => dns}/README.md (100%) rename kong/{resty/dns_client/init.lua => dns/client.lua} (99%) rename kong/{resty/dns_client => dns}/utils.lua (100%) diff --git a/kong-3.8.0-0.rockspec b/kong-3.8.0-0.rockspec index b0df906afd4e..e60441ef32da 100644 --- a/kong-3.8.0-0.rockspec +++ b/kong-3.8.0-0.rockspec @@ -116,8 +116,8 @@ build = { ["kong.resty.dns.client"] = "kong/resty/dns/client.lua", ["kong.resty.dns.utils"] = "kong/resty/dns/utils.lua", - ["kong.resty.dns_client"] = "kong/resty/dns_client/init.lua", - ["kong.resty.dns_client.utils"] = "kong/resty/dns_client/utils.lua", + ["kong.dns.client"] = "kong/dns/client.lua", + ["kong.dns.utils"] = "kong/dns/utils.lua", ["kong.resty.ctx"] = "kong/resty/ctx.lua", diff --git a/kong/resty/dns_client/README.md b/kong/dns/README.md similarity index 100% rename from kong/resty/dns_client/README.md rename to kong/dns/README.md diff --git a/kong/resty/dns_client/init.lua b/kong/dns/client.lua similarity index 99% rename from kong/resty/dns_client/init.lua rename to kong/dns/client.lua index 78ace243504b..783bd49f4f77 100644 --- a/kong/resty/dns_client/init.lua +++ b/kong/dns/client.lua @@ -1,5 +1,5 @@ local cjson = require("cjson.safe") -local utils = require("kong.resty.dns_client.utils") +local utils = require("kong.dns.utils") local mlcache = require("kong.resty.mlcache") local resolver = require("resty.dns.resolver") diff --git a/kong/resty/dns_client/utils.lua b/kong/dns/utils.lua similarity index 100% rename from kong/resty/dns_client/utils.lua rename to kong/dns/utils.lua diff --git a/kong/resty/dns/client.lua b/kong/resty/dns/client.lua index 9121e4c17426..0c7359c54ea1 100644 --- a/kong/resty/dns/client.lua +++ b/kong/resty/dns/client.lua @@ -1,8 +1,8 @@ -- Use the new dns client library instead. If you want to switch to the original -- one, you can set `legacy_dns_client = on` in kong.conf. if ngx.shared.kong_dns_cache and not _G.busted_legacy_dns_client then - package.loaded["kong.resty.dns_client"] = nil - return require("kong.resty.dns_client") + package.loaded["kong.dns.client"] = nil + return require("kong.dns.client") end -------------------------------------------------------------------------- diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index 77527f10bb68..43b657dcfff8 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -1,4 +1,4 @@ -local utils = require "kong.resty.dns_client.utils" +local utils = require "kong.dns.utils" local tempfilename = require("pl.path").tmpname local writefile = require("pl.utils").writefile local splitlines = require("pl.stringx").splitlines diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 8ff93fd19b8d..f1fa5b076f7b 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -95,8 +95,8 @@ describe("[DNS client]", function() end -- restore its API overlapped by the compatible layer - package.loaded["kong.resty.dns_client"] = nil - client = require("kong.resty.dns_client") + package.loaded["kong.dns.client"] = nil + client = require("kong.dns.client") client.resolve = function (self, name, opts, tries) if opts and opts.return_random then return self:resolve_address(name, opts.port, opts.cache_only, tries) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index ab37574c2dcc..bed69ec29d60 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -92,8 +92,8 @@ describe("[DNS client cache]", function() end -- restore its API overlapped by the compatible layer - package.loaded["kong.resty.dns_client"] = nil - client = require("kong.resty.dns_client") + package.loaded["kong.dns.client"] = nil + client = require("kong.dns.client") client.resolve = function (self, name, opts, tries) if opts and opts.return_random then return self:resolve_address(name, opts.port, opts.cache_only, tries) diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua index 2bf31e52e8bf..fbe458b3568c 100644 --- a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -22,8 +22,8 @@ describe("[DNS client stats]", function() end -- restore its API overlapped by the compatible layer - package.loaded["kong.resty.dns_client"] = nil - client = require("kong.resty.dns_client") + package.loaded["kong.dns.client"] = nil + client = require("kong.dns.client") client.resolve = client._resolve end) From 81746b875c7a1eb1d76ba321cf4cd9232efcb46d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Apr 2024 10:16:51 +0800 Subject: [PATCH 079/126] mark it TODO to convert ipc to a module contant --- kong/dns/client.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 783bd49f4f77..a70490390328 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -210,6 +210,8 @@ function _M.new(opts) exptimeout = lock_timeout + 1, } + -- TODO: convert the ipc a module constant, currently we need to use the + -- ipc_source to distinguish sources of different DNS client events. ipc_counter = ipc_counter + 1 local ipc_source = "dns_client_mlcache#" .. ipc_counter local ipc = { From bb436a417870150daee75a89534ce063ac315139 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Apr 2024 10:43:16 +0800 Subject: [PATCH 080/126] use do-end block to wrap init_hosts and insert_answer_into_cache --- kong/dns/client.lua | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index a70490390328..0b3a0315b9ea 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -127,15 +127,8 @@ local function get_last_type(cache, name) end --- insert hosts into cache -local function init_hosts(cache, path, preferred_ip_type) - local hosts = parse_hosts(path) - - local function insert_answer(name, qtype, address) - if not address then - return - end - +local init_hosts do + local function insert_answer_into_cache(cache, address, name, qtype) -- insert via the `:get` callback to prevent inter-process communication cache:get(name .. ":" .. qtype, nil, function() return { @@ -152,23 +145,28 @@ local function init_hosts(cache, path, preferred_ip_type) end) end - for name, address in pairs(hosts) do - name = string_lower(name) + -- insert hosts into cache + function init_hosts(cache, path, preferred_ip_type) + local hosts = parse_hosts(path) - if address.ipv4 then - insert_answer(name, TYPE_A, address.ipv4) - insert_last_type(cache, name, TYPE_A) - end + for name, address in pairs(hosts) do + name = string_lower(name) - if address.ipv6 then - insert_answer(name, TYPE_AAAA, address.ipv6) - if not address.ipv4 or preferred_ip_type == TYPE_AAAA then - insert_last_type(cache, name, TYPE_AAAA) + if address.ipv4 then + insert_answer_into_cache(cache, address.ipv4, name, TYPE_A) + insert_last_type(cache, name, TYPE_A) + end + + if address.ipv6 then + insert_answer_into_cache(cache, address.ipv6, name, TYPE_AAAA) + if not address.ipv4 or preferred_ip_type == TYPE_AAAA then + insert_last_type(cache, name, TYPE_AAAA) + end end end - end - return hosts + return hosts + end end From 45d6d86f4f34f10f379b181930e5c8b39c7a0d00 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Apr 2024 13:08:08 +0800 Subject: [PATCH 081/126] add comments and test cases for API utils.ipv6_bracket --- kong/dns/utils.lua | 3 ++- .../01-unit/30-new-dns-client/01-utils_spec.lua | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 13aaad08046f..e18146053778 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -189,8 +189,9 @@ function _M.search_names(name, resolv, hosts) end +-- add square brackets around IPv6 addresses if a non-strict check detects them function _M.ipv6_bracket(name) - if name:match("^[^[].*:") then -- not rigorous, but sufficient + if name:match("^[^[].*:") then -- not start with '[' and contains ':' return "[" .. name .. "]" end return name diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index 43b657dcfff8..ba8e663a8370 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -82,6 +82,23 @@ describe("[utils]", function () end) end) + describe("ipv6_bracket()", function () + it("IPv6 address", function () + assert.equal(utils.ipv6_bracket("::1"), "[::1]") + assert.equal(utils.ipv6_bracket("[::1]"), "[::1]") + assert.equal(utils.ipv6_bracket("2001:db8::1"), "[2001:db8::1]") + assert.equal(utils.ipv6_bracket("[2001:db8::1]"), "[2001:db8::1]") + end) + + it("IPv4 address", function () + assert.equal(utils.ipv6_bracket("127.0.0.1"), "127.0.0.1") + end) + + it("host name", function () + assert.equal(utils.ipv6_bracket("example.com"), "example.com") + end) + end) + describe("answer selection", function () local function get_and_count(answers, n, get_ans) local count = {} From 46f68b556de04b66e90e557f5e5289ed5221c8f7 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Apr 2024 13:24:36 +0800 Subject: [PATCH 082/126] remove unused kong.tools.utils requirement in test cases --- spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index bed69ec29d60..90f8e1e15e3a 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -1,7 +1,6 @@ -- This test case file originates from the old version of the DNS client and has -- been modified to adapt to the new version of the DNS client. -local utils = require("kong.tools.utils") local _writefile = require("pl.utils").writefile local tmpname = require("pl.path").tmpname local cycle_aware_deep_copy = require("kong.tools.table").cycle_aware_deep_copy @@ -240,7 +239,7 @@ describe("[DNS client cache]", function() ttl = ttl, }} } - local mock_copy = utils.cycle_aware_deep_copy(mock_records) + local mock_copy = cycle_aware_deep_copy(mock_records) -- resolve and check whether we got the mocked record local answers = cli:resolve("myhost6") @@ -480,7 +479,7 @@ describe("[DNS client cache]", function() ttl = 60, } mock_records = setmetatable({ - ["myhost9.domain.com:"..resolver.TYPE_CNAME] = { utils.cycle_aware_deep_copy(CNAME1) }, -- copy to make it different + ["myhost9.domain.com:"..resolver.TYPE_CNAME] = { cycle_aware_deep_copy(CNAME1) }, -- copy to make it different ["myhost9.domain.com:"..resolver.TYPE_A] = { CNAME1, A2 }, -- not there, just a reference and target ["myotherhost.domain.com:"..resolver.TYPE_A] = { A2 }, }, { From d6847bc2fe10ff59e8b183e0d6feaf36357861fc Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Apr 2024 14:32:54 +0800 Subject: [PATCH 083/126] add comments for cwid checking and hosts --- kong/dns/client.lua | 2 +- kong/dns/utils.lua | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 0b3a0315b9ea..156d3f201220 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -223,7 +223,7 @@ function _M.new(opts) local cwid = ngx.worker.id() for _, ev in pairs(events) do local handler = function(data, event, source, wid) - if cwid ~= wid then + if cwid ~= wid then -- Current worker has handled this event. ev.handler(data) end end diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index e18146053778..564048b4f8d6 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -179,6 +179,8 @@ function _M.search_names(name, resolv, hosts) table_insert(names, name .. "." .. suffix) end + -- Always search for the original name. + -- If found in the hosts file, prioritize its search. if hosts and hosts[name] then table_insert(names, 1, name) else From 011255b7e80ce2d44918b828f1d63f4d7ecfec66 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Apr 2024 14:43:12 +0800 Subject: [PATCH 084/126] chores: fix some coding styles --- kong/dns/client.lua | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 156d3f201220..b8f7b120d097 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -10,6 +10,7 @@ local WARN = ngx.WARN local DEBUG = ngx.DEBUG local ALERT = ngx.ALERT local timer_at = ngx.timer.at +local worker_id = ngx.worker.id local pairs = pairs local ipairs = ipairs @@ -90,7 +91,7 @@ local _M = { TYPE_CNAME = TYPE_CNAME, TYPE_LAST = TYPE_LAST, } -local mt = { __index = _M } +local MT = { __index = _M } local TRIES_MT = { __tostring = cjson.encode, } @@ -220,7 +221,7 @@ function _M.new(opts) return end - local cwid = ngx.worker.id() + local cwid = worker_id() for _, ev in pairs(events) do local handler = function(data, event, source, wid) if cwid ~= wid then -- Current worker has handled this event. @@ -305,7 +306,7 @@ function _M.new(opts) errstr = EMPTY_RECORD_ERROR_MESSAGE, ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, }), - }, mt) + }, MT) end @@ -330,21 +331,23 @@ local function process_answers(self, qname, qtype, answers) ttl = math_min(ttl, answer.ttl) end - if answer.type == TYPE_CNAME then + local answer_type = answer.type + + if answer_type == TYPE_CNAME then cname_answer = answer -- use the last one as the real cname - elseif answer.type == qtype then + elseif answer_type == qtype then -- compatible with balancer, see https://github.com/Kong/kong/pull/3088 - if answer.type == TYPE_AAAA then + if answer_type == TYPE_AAAA then answer.address = ipv6_bracket(answer.address) - elseif answer.type == TYPE_SRV then + elseif answer_type == TYPE_SRV then answer.target = ipv6_bracket(answer.target) end -- skip the SRV record pointing to itself, -- see https://github.com/Kong/lua-resty-dns-client/pull/3 - if not (answer.type == TYPE_SRV and answer.target == qname) then + if not (answer_type == TYPE_SRV and answer.target == qname) then table_insert(processed_answers, answer) end end @@ -370,7 +373,7 @@ local function process_answers(self, qname, qtype, answers) end -local function resolve_query(self, name, qtype, tries) +local function resolve_query(self, name, qtype) local key = name .. ":" .. qtype stats_increment(self.stats, key, "query") @@ -412,7 +415,7 @@ local function stale_update_task(premature, self, key, name, qtype, short_key) return end - local answers = resolve_query(self, name, qtype, {}) + local answers = resolve_query(self, name, qtype) if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then self.cache:set(key, { ttl = answers.ttl }, answers) insert_last_type(self.cache, name, qtype) @@ -466,7 +469,7 @@ local function resolve_name_type_callback(self, name, qtype, cache_only, short_k return CACHE_ONLY_ANSWERS, nil, -1 end - local answers, err, ttl = resolve_query(self, name, qtype, tries) + local answers, err, ttl = resolve_query(self, name, qtype) return answers, err, ttl end @@ -630,6 +633,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) end stats_increment(self.stats, name, answers and "miss" or "fail") + else log(DEBUG, "quickly cache lookup ", key, " ans:", #answers, " hlv:", hit_level or "-") From 4c224b4b2f4bd880986e0e5b936df45b728c7243 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 2 Apr 2024 16:05:03 +0800 Subject: [PATCH 085/126] Update kong/dns/README.md: remove `the` word Co-authored-by: Thijs Schreijer --- kong/dns/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index 626594d7d49b..5b634140867a 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -1,7 +1,7 @@ Name ==== -The module is currently Kong only, and builds on top of the `lua-resty-dns` and the kong's `lua-resty-mlcache` library. +The module is currently Kong only, and builds on top of the `lua-resty-dns` and kong's `lua-resty-mlcache` library. Table of Contents ================= From 7793247f4b6ad03854d2212c18ac72d2742afb61 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 2 Apr 2024 15:48:31 +0800 Subject: [PATCH 086/126] remove use of readonly function for cached DNS records --- kong/dns/client.lua | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index b8f7b120d097..b5a7f2e612ad 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -18,7 +18,6 @@ local math_min = math.min local string_lower = string.lower local table_insert = table.insert local table_isempty = require("table.isempty") -local tablex_readonly = require("pl.tablex").readonly local parse_hosts = utils.parse_hosts local ipv6_bracket = utils.ipv6_bracket @@ -77,7 +76,7 @@ local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXD -- client specific error local CACHE_ONLY_ERROR_CODE = 100 local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" -local CACHE_ONLY_ANSWERS = tablex_readonly({ errcode = CACHE_ONLY_ERROR_CODE, errstr = CACHE_ONLY_ERROR_MESSAGE }) +local CACHE_ONLY_ANSWERS = { errcode = CACHE_ONLY_ERROR_CODE, errstr = CACHE_ONLY_ERROR_MESSAGE } local EMPTY_RECORD_ERROR_CODE = 101 local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" @@ -300,12 +299,16 @@ function _M.new(opts) stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, search_types = search_types, + + -- TODO: Make the table readonly. But if `string.buffer.encode/decode` and + -- `pl.tablex.readonly` are called on it, it will become empty table. + -- -- quickly accessible constant empty answers - EMPTY_ANSWERS = tablex_readonly({ + EMPTY_ANSWERS = { errcode = EMPTY_RECORD_ERROR_CODE, errstr = EMPTY_RECORD_ERROR_MESSAGE, ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, - }), + }, }, MT) end @@ -355,6 +358,7 @@ local function process_answers(self, qname, qtype, answers) if table_isempty(processed_answers) then if not cname_answer then + log(DEBUG, "processed ans:empty") return self.EMPTY_ANSWERS end @@ -393,7 +397,7 @@ local function resolve_query(self, name, qtype) stats_set_count(self.stats, key, "query_last_time", time_str) - log(DEBUG, "r:query() ans:", answers and #answers or "-", " t:", time_str) + log(DEBUG, "r:query(", key, ") ans:", answers and #answers or "-", " t:", time_str) if not answers then stats_increment(self.stats, key, "query_fail_nameserver") From 4eaace2a7c36fdf721e7b654d94282f3d2da9f50 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 2 Apr 2024 16:04:03 +0800 Subject: [PATCH 087/126] fix coding style: localize SWRR logic --- kong/dns/utils.lua | 92 +++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 564048b4f8d6..3fda5284b2ba 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -208,68 +208,70 @@ function _M.get_next_round_robin_answer(answers) end --- based on the Nginx's SWRR algorithm and lua-resty-balancer -local function swrr_next(answers) - local total = 0 - local best = nil -- best answer in answers[] - - for _, answer in ipairs(answers) do - -- 0.1 gives weight 0 record a minimal chance of being chosen (rfc 2782) - local w = (answer.weight == 0) and 0.1 or answer.weight - local cw = answer.cw + w - answer.cw = cw - if not best or cw > best.cw then - best = answer +do + -- based on the Nginx's SWRR algorithm and lua-resty-balancer + local function swrr_next(answers) + local total = 0 + local best = nil -- best answer in answers[] + + for _, answer in ipairs(answers) do + -- 0.1 gives weight 0 record a minimal chance of being chosen (rfc 2782) + local w = (answer.weight == 0) and 0.1 or answer.weight + local cw = answer.cw + w + answer.cw = cw + if not best or cw > best.cw then + best = answer + end + total = total + w end - total = total + w + + best.cw = best.cw - total + return best end - best.cw = best.cw - total - return best -end + local function swrr_init(answers) + for _, answer in ipairs(answers) do + answer.cw = 0 -- current weight + end -local function swrr_init(answers) - for _, answer in ipairs(answers) do - answer.cw = 0 -- current weight + -- random start + for _ = 1, math_random(#answers) do + swrr_next(answers) + end end - -- random start - for _ = 1, math_random(#answers) do - swrr_next(answers) - end -end + -- gather records with the lowest priority in SRV record + local function filter_lowest_priority_answers(answers) + local lowest_priority = answers[1].priority -- SRV record MUST have `priority` field + local l = {} -- lowest priority records list --- gather records with the lowest priority in SRV record -local function filter_lowest_priority_answers(answers) - local lowest_priority = answers[1].priority -- SRV record MUST have `priority` field - local l = {} -- lowest priority records list + for _, answer in ipairs(answers) do + if answer.priority < lowest_priority then + lowest_priority = answer.priority + l = { answer } - for _, answer in ipairs(answers) do - if answer.priority < lowest_priority then - lowest_priority = answer.priority - l = { answer } - - elseif answer.priority == lowest_priority then - table_insert(l, answer) + elseif answer.priority == lowest_priority then + table_insert(l, answer) + end end + + answers.lowest_prio_records = l + return l end - answers.lowest_prio_records = l - return l -end + function _M.get_next_weighted_round_robin_answer(answers) + local l = answers.lowest_prio_records or filter_lowest_priority_answers(answers) -function _M.get_next_weighted_round_robin_answer(answers) - local l = answers.lowest_prio_records or filter_lowest_priority_answers(answers) + -- perform round robin selection on lowest priority answers @l + if not l[1].cw then + swrr_init(l) + end - -- perform round robin selection on lowest priority answers @l - if not l[1].cw then - swrr_init(l) + return swrr_next(l) end - - return swrr_next(l) end From 4f31f161baeb6ae2e22417c35380e4e514e75211 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 2 Apr 2024 17:25:51 +0800 Subject: [PATCH 088/126] re-insert hosts entries to cache if it is evicted --- kong/dns/client.lua | 48 ++++++++++++------- kong/dns/utils.lua | 10 +--- .../30-new-dns-client/02-old_client_spec.lua | 20 ++++++++ 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index b5a7f2e612ad..75eccb35cf00 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -128,44 +128,51 @@ end local init_hosts do - local function insert_answer_into_cache(cache, address, name, qtype) - -- insert via the `:get` callback to prevent inter-process communication - cache:get(name .. ":" .. qtype, nil, function() - return { + local function insert_answer_into_cache(cache, hosts_cache, address, name, qtype) + local key = name .. ":" .. qtype + local answers = { + ttl = LONG_LASTING_TTL, + expire = now() + LONG_LASTING_TTL, + { + name = name, + type = qtype, + address = address, + class = 1, ttl = LONG_LASTING_TTL, - expire = now() + LONG_LASTING_TTL, - { - name = name, - type = qtype, - address = address, - class = 1, - ttl = LONG_LASTING_TTL, - }, - }, nil, LONG_LASTING_TTL + }, + } + + -- insert via the `:get` callback to prevent inter-process communication + cache:get(key, nil, function() + return answers, nil, LONG_LASTING_TTL end) + + -- used for the host entry eviction + hosts_cache[key] = answers end -- insert hosts into cache function init_hosts(cache, path, preferred_ip_type) local hosts = parse_hosts(path) + local hosts_cache = {} for name, address in pairs(hosts) do name = string_lower(name) if address.ipv4 then - insert_answer_into_cache(cache, address.ipv4, name, TYPE_A) + insert_answer_into_cache(cache, hosts_cache, address.ipv4, name, TYPE_A) insert_last_type(cache, name, TYPE_A) end if address.ipv6 then - insert_answer_into_cache(cache, address.ipv6, name, TYPE_AAAA) + insert_answer_into_cache(cache, hosts_cache, address.ipv6, name, TYPE_AAAA) if not address.ipv4 or preferred_ip_type == TYPE_AAAA then insert_last_type(cache, name, TYPE_AAAA) end end end - return hosts + return hosts, hosts_cache end end @@ -286,7 +293,7 @@ function _M.new(opts) preferred_ip_type = preferred_ip_type or TYPE_A -- parse hosts - local hosts = init_hosts(cache, opts.hosts, preferred_ip_type) + local hosts, hosts_cache = init_hosts(cache, opts.hosts, preferred_ip_type) return setmetatable({ cache = cache, @@ -298,6 +305,7 @@ function _M.new(opts) error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + hosts_cache = hosts_cache, search_types = search_types, -- TODO: Make the table readonly. But if `string.buffer.encode/decode` and @@ -443,6 +451,12 @@ end local function resolve_name_type_callback(self, name, qtype, cache_only, short_key, tries) local key = name .. ":" .. qtype + -- check if this key exists in the hosts file (it maybe evicted from cache) + local answers = self.hosts_cache[key] + if answers then + return answers, nil, answers.ttl + end + -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then -- initiates an asynchronous background updating task to refresh it. local ttl, _, answers = self.cache:peek(key, true) diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 3fda5284b2ba..7b2f631951f9 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -170,7 +170,7 @@ end -- construct names from resolv options: search, ndots and domain function _M.search_names(name, resolv, hosts) - if not resolv.search or _M.is_fqdn(name, resolv.ndots) then + if not resolv.search or _M.is_fqdn(name, resolv.ndots) or (hosts and hosts[name]) then return { name } end @@ -179,13 +179,7 @@ function _M.search_names(name, resolv, hosts) table_insert(names, name .. "." .. suffix) end - -- Always search for the original name. - -- If found in the hosts file, prioritize its search. - if hosts and hosts[name] then - table_insert(names, 1, name) - else - table_insert(names, name) - end + table_insert(names, name) -- append the original name at last return names end diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index f1fa5b076f7b..fa77b0573244 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -172,6 +172,26 @@ describe("[DNS client]", function() answers = cli.cache:get("localhost:1") assert.is_nil(answers) end) + + it("cache evication", function() + writefile(hosts_path, "::1:2:3:4 localhost") + local cli = assert(client_new()) + + local answers = cli.cache:get("localhost:28") + assert.equal("[::1:2:3:4]", answers[1].address) + + -- evict it + cli.cache:delete("localhost:28") + answers = cli.cache:get("localhost:28") + assert.equal(nil, answers) + + -- resolve and re-insert it into cache + answers = cli:resolve("localhost") + assert.equal("[::1:2:3:4]", answers[1].address) + + answers = cli.cache:get("localhost:28") + assert.equal("[::1:2:3:4]", answers[1].address) + end) end) end) From 75f45c8d9a1852f163ebed8625f55279dfda028e Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Sun, 7 Apr 2024 09:57:29 +0800 Subject: [PATCH 089/126] chores: remove = aligning --- kong/dns/client.lua | 132 ++++++++++++++++++++++---------------------- kong/dns/utils.lua | 8 +-- 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 75eccb35cf00..36e790100105 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -3,63 +3,63 @@ local utils = require("kong.dns.utils") local mlcache = require("kong.resty.mlcache") local resolver = require("resty.dns.resolver") -local now = ngx.now -local log = ngx.log -local ERR = ngx.ERR -local WARN = ngx.WARN -local DEBUG = ngx.DEBUG -local ALERT = ngx.ALERT -local timer_at = ngx.timer.at +local now = ngx.now +local log = ngx.log +local ERR = ngx.ERR +local WARN = ngx.WARN +local DEBUG = ngx.DEBUG +local ALERT = ngx.ALERT +local timer_at = ngx.timer.at local worker_id = ngx.worker.id -local pairs = pairs -local ipairs = ipairs -local math_min = math.min -local string_lower = string.lower -local table_insert = table.insert -local table_isempty = require("table.isempty") +local pairs = pairs +local ipairs = ipairs +local math_min = math.min +local string_lower = string.lower +local table_insert = table.insert +local table_isempty = require("table.isempty") -local parse_hosts = utils.parse_hosts -local ipv6_bracket = utils.ipv6_bracket -local search_names = utils.search_names -local get_next_round_robin_answer = utils.get_next_round_robin_answer -local get_next_weighted_round_robin_answer = utils.get_next_weighted_round_robin_answer +local parse_hosts = utils.parse_hosts +local ipv6_bracket = utils.ipv6_bracket +local search_names = utils.search_names +local get_next_round_robin_answer = utils.get_next_round_robin_answer +local get_next_weighted_round_robin_answer = utils.get_next_weighted_round_robin_answer local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks -- Constants and default values -local DEFAULT_ERROR_TTL = 1 -- unit: second -local DEFAULT_STALE_TTL = 4 -local DEFAULT_EMPTY_TTL = 30 +local DEFAULT_ERROR_TTL = 1 -- unit: second +local DEFAULT_STALE_TTL = 4 +local DEFAULT_EMPTY_TTL = 30 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings -local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 +local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 local PERSISTENT_CACHE_TTL = { ttl = 0 } -- used for mlcache:set local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } -local TYPE_SRV = resolver.TYPE_SRV -local TYPE_A = resolver.TYPE_A -local TYPE_AAAA = resolver.TYPE_AAAA -local TYPE_CNAME = resolver.TYPE_CNAME -local TYPE_LAST = -1 +local TYPE_SRV = resolver.TYPE_SRV +local TYPE_A = resolver.TYPE_A +local TYPE_AAAA = resolver.TYPE_AAAA +local TYPE_CNAME = resolver.TYPE_CNAME +local TYPE_LAST = -1 local NAME_TO_TYPE = { - SRV = TYPE_SRV, - A = TYPE_A, - AAAA = TYPE_AAAA, - CNAME = TYPE_CNAME, - LAST = TYPE_LAST, + SRV = TYPE_SRV, + A = TYPE_A, + AAAA = TYPE_AAAA, + CNAME = TYPE_CNAME, + LAST = TYPE_LAST, } local TYPE_TO_NAME = { - [TYPE_SRV] = "SRV", - [TYPE_A] = "A", - [TYPE_AAAA] = "AAAA", - [TYPE_CNAME] = "CNAME", - [TYPE_LAST] = "LAST", + [TYPE_SRV] = "SRV", + [TYPE_A] = "A", + [TYPE_AAAA] = "AAAA", + [TYPE_CNAME] = "CNAME", + [TYPE_LAST] = "LAST", } local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale @@ -72,23 +72,23 @@ local HIT_LEVEL_TO_NAME = { } -- server replied error from the DNS protocol -local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" +local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" -- client specific error -local CACHE_ONLY_ERROR_CODE = 100 -local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" +local CACHE_ONLY_ERROR_CODE = 100 +local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" local CACHE_ONLY_ANSWERS = { errcode = CACHE_ONLY_ERROR_CODE, errstr = CACHE_ONLY_ERROR_MESSAGE } -local EMPTY_RECORD_ERROR_CODE = 101 -local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" +local EMPTY_RECORD_ERROR_CODE = 101 +local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" -- APIs local _M = { - TYPE_SRV = TYPE_SRV, - TYPE_A = TYPE_A, - TYPE_AAAA = TYPE_AAAA, - TYPE_CNAME = TYPE_CNAME, - TYPE_LAST = TYPE_LAST, + TYPE_SRV = TYPE_SRV, + TYPE_A = TYPE_A, + TYPE_AAAA = TYPE_AAAA, + TYPE_CNAME = TYPE_CNAME, + TYPE_LAST = TYPE_LAST, } local MT = { __index = _M } @@ -198,9 +198,9 @@ function _M.new(opts) end local r_opts = { - retrans = opts.retrans or resolv.options.attempts or 5, - timeout = opts.timeout or resolv.options.timeout or 2000, -- ms - no_random = opts.no_random or not resolv.options.rotate, + retrans = opts.retrans or resolv.options.attempts or 5, + timeout = opts.timeout or resolv.options.timeout or 2000, -- ms + no_random = opts.no_random or not resolv.options.rotate, nameservers = nameservers, } @@ -254,9 +254,9 @@ function _M.new(opts) } local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { - ipc = ipc, - neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, - lru_size = opts.cache_size or 10000, + ipc = ipc, + neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + lru_size = opts.cache_size or 10000, resty_lock_opts = resty_lock_opts, }) @@ -296,17 +296,17 @@ function _M.new(opts) local hosts, hosts_cache = init_hosts(cache, opts.hosts, preferred_ip_type) return setmetatable({ - cache = cache, - stats = {}, - hosts = hosts, - r_opts = r_opts, - resolv = opts._resolv or resolv, - valid_ttl = opts.valid_ttl, - error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, - stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, - empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, - hosts_cache = hosts_cache, - search_types = search_types, + cache = cache, + stats = {}, + hosts = hosts, + r_opts = r_opts, + resolv = opts._resolv or resolv, + valid_ttl = opts.valid_ttl, + error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, + stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, + empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + hosts_cache = hosts_cache, + search_types = search_types, -- TODO: Make the table readonly. But if `string.buffer.encode/decode` and -- `pl.tablex.readonly` are called on it, it will become empty table. @@ -314,8 +314,8 @@ function _M.new(opts) -- quickly accessible constant empty answers EMPTY_ANSWERS = { errcode = EMPTY_RECORD_ERROR_CODE, - errstr = EMPTY_RECORD_ERROR_MESSAGE, - ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + errstr = EMPTY_RECORD_ERROR_MESSAGE, + ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, }, }, MT) end diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 7b2f631951f9..1847f9ed8950 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -3,13 +3,13 @@ local utils = require("kong.resty.dns.utils") local log = ngx.log local NOTICE = ngx.NOTICE -local math_random = math.random -local table_insert = table.insert -local table_remove = table.remove +local math_random = math.random +local table_insert = table.insert +local table_remove = table.remove local readlines = require("pl.utils").readlines -local DEFAULT_HOSTS_FILE = "/etc/hosts" +local DEFAULT_HOSTS_FILE = "/etc/hosts" local DEFAULT_RESOLV_CONF = "/etc/resolv.conf" local LOCALHOST = { From 47a895d53ae5c14ea846e0aebf09f55c0e1b8a94 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Sun, 7 Apr 2024 10:13:14 +0800 Subject: [PATCH 090/126] remove empty table creation in hot code paths --- kong/dns/client.lua | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 36e790100105..53268a8328b1 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -494,6 +494,10 @@ end -- detect circular references in DNS CNAME or SRV records local function detect_recursion(resolved_names, key) + if not resolved_names then + return + end + local detected = resolved_names[key] resolved_names[key] = true return detected @@ -639,7 +643,8 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") answers, err, tries = resolve_names_and_types(self, name, qtype, cache_only, - key, tries, resolved_names) + key, tries, resolve_names or { [key] = true }) + if not cache_only and answers then -- If another worker resolved the name between these two `:get`, it can -- work as expected and will not introduce a race condition. @@ -667,7 +672,8 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) -- dereference CNAME if qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then stats_increment(self.stats, name, "cname") - return resolve_all(self, answers[1].cname, qtype, cache_only, tries, resolved_names) + return resolve_all(self, answers[1].cname, qtype, cache_only, tries, + resolved_names or { [key] = true }) end return answers, err, tries @@ -675,7 +681,7 @@ end function _M:resolve(name, qtype, cache_only, tries) - return resolve_all(self, name, qtype, cache_only, tries, {}) + return resolve_all(self, name, qtype, cache_only, tries) end From 2cb279c7cf8d7d058978dfbe0b388de7e129fc42 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Sun, 7 Apr 2024 11:24:56 +0800 Subject: [PATCH 091/126] fix lint error: resolve_names -> resolved_names --- kong/dns/client.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 53268a8328b1..98898ee73533 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -643,7 +643,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") answers, err, tries = resolve_names_and_types(self, name, qtype, cache_only, - key, tries, resolve_names or { [key] = true }) + key, tries, resolved_names or { [key] = true }) if not cache_only and answers then -- If another worker resolved the name between these two `:get`, it can From 4727cf794161f7961f87e792bbbf2cd443e6f92f Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Sun, 7 Apr 2024 12:14:00 +0800 Subject: [PATCH 092/126] chores: fix a couple of missing localizations --- kong/dns/client.lua | 3 +++ kong/dns/utils.lua | 3 +++ 2 files changed, 6 insertions(+) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 98898ee73533..4f6d9453544a 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -14,6 +14,9 @@ local worker_id = ngx.worker.id local pairs = pairs local ipairs = ipairs +local tonumber = tonumber +local setmetatable = setmetatable + local math_min = math.min local string_lower = string.lower local table_insert = table.insert diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 1847f9ed8950..d3b702cd8f67 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -3,6 +3,9 @@ local utils = require("kong.resty.dns.utils") local log = ngx.log local NOTICE = ngx.NOTICE +local type = type +local ipairs = ipairs +local tonumber = tonumber local math_random = math.random local table_insert = table.insert local table_remove = table.remove From 7c5fd97e8d7623ca31cb17838172a2483c6b9536 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 8 Apr 2024 09:31:23 +0800 Subject: [PATCH 093/126] fix opts initialization in _M.init() --- kong/dns/client.lua | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 4f6d9453544a..8c4b31c58b9f 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -712,11 +712,12 @@ end local dns_client function _M.init(opts) - opts = opts or {} - opts.valid_ttl = opts.validTtl - opts.error_ttl = opts.badTtl - opts.stale_ttl = opts.staleTtl - opts.cache_size = opts.cacheSize + if opts then + opts.valid_ttl = opts.valid_ttl or opts.validTtl + opts.error_ttl = opts.error_ttl or opts.badTtl + opts.stale_ttl = opts.stale_ttl or opts.staleTtl + opts.cache_size = opts.cache_size or opts.cacheSize + end local client, err = _M.new(opts) if not client then From 444f47bc2e3ba7946a98b27e569822ad87e7ea07 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 8 Apr 2024 10:00:12 +0800 Subject: [PATCH 094/126] remove local variable options for r:query --- kong/dns/client.lua | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 8c4b31c58b9f..7e71e9c20945 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -399,8 +399,7 @@ local function resolve_query(self, name, qtype) local start_time = now() - local options = { additional_section = true, qtype = qtype } - local answers, err = r:query(name, options) + local answers, err = r:query(name, { additional_section = true, qtype = qtype }) r:destroy() local query_time = now() - start_time -- the time taken for the DNS query From 96f9329ff130c1eb34023217a008d8573ce8a1ec Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 8 Apr 2024 12:01:22 +0800 Subject: [PATCH 095/126] avoid checking for `ngx.ctx.has_timing` in recursion --- kong/dns/client.lua | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 7e71e9c20945..282d9c1ac9d6 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -334,7 +334,8 @@ local function process_answers(self, qname, qtype, answers) local processed_answers = {} local cname_answer - local ttl = math_min(self.valid_ttl or 0xffffffff, 0xffffffff) -- 0xffffffff for maximum TTL value + -- 0xffffffff for maximum TTL value + local ttl = math_min(self.valid_ttl or 0xffffffff, 0xffffffff) for _, answer in ipairs(answers) do answer.name = string_lower(answer.name) @@ -497,7 +498,7 @@ end -- detect circular references in DNS CNAME or SRV records local function detect_recursion(resolved_names, key) if not resolved_names then - return + return nil end local detected = resolved_names[key] @@ -506,7 +507,7 @@ local function detect_recursion(resolved_names, key) end -local function resolve_name_type(self, name, qtype, cache_only, short_key, tries, resolved_names) +local function resolve_name_type(self, name, qtype, cache_only, short_key, tries, resolved_names, has_timing) local key = name .. ":" .. qtype stats_init_name(self.stats, key) @@ -527,8 +528,7 @@ local function resolve_name_type(self, name, qtype, cache_only, short_key, tries log(DEBUG, "cache lookup ", key, " ans:", answers and #answers or "-", " hlv:", hit_level or "-") - local ctx = ngx.ctx - if ctx and ctx.has_timing then + if has_timing then req_dyn_hook_run_hooks("timing", "dns:cache_lookup", (hit_level and hit_level < HIT_L3)) end @@ -589,7 +589,7 @@ end -- resolve all `name`s and `type`s combinations and return first usable answers -local function resolve_names_and_types(self, name, typ, cache_only, short_key, tries, resolved_names) +local function resolve_names_and_types(self, name, typ, cache_only, short_key, tries, resolved_names, has_timing) local answers = check_and_get_ip_answers(name) if answers then -- domain name is IP literal answers.ttl = LONG_LASTING_TTL @@ -606,7 +606,7 @@ local function resolve_names_and_types(self, name, typ, cache_only, short_key, t for _, qtype in ipairs(types) do for _, qname in ipairs(names) do answers, err = resolve_name_type(self, qname, qtype, cache_only, - short_key, tries, resolved_names) + short_key, tries, resolved_names, has_timing) -- severe error occurred if not answers then return nil, err, tries @@ -624,7 +624,7 @@ local function resolve_names_and_types(self, name, typ, cache_only, short_key, t end -local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) +local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, has_timing) name = string_lower(name) tries = setmetatable(tries or {}, TRIES_MT) @@ -645,7 +645,9 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") answers, err, tries = resolve_names_and_types(self, name, qtype, cache_only, - key, tries, resolved_names or { [key] = true }) + key, tries, + resolved_names or { [key] = true }, + has_timing) if not cache_only and answers then -- If another worker resolved the name between these two `:get`, it can @@ -662,8 +664,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) else log(DEBUG, "quickly cache lookup ", key, " ans:", #answers, " hlv:", hit_level or "-") - local ctx = ngx.ctx - if ctx and ctx.has_timing then + if has_timing then req_dyn_hook_run_hooks("timing", "dns:cache_lookup", (hit_level and hit_level < HIT_L3)) end @@ -675,7 +676,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names) if qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then stats_increment(self.stats, name, "cname") return resolve_all(self, answers[1].cname, qtype, cache_only, tries, - resolved_names or { [key] = true }) + resolved_names or { [key] = true }, has_timing) end return answers, err, tries @@ -683,29 +684,39 @@ end function _M:resolve(name, qtype, cache_only, tries) - return resolve_all(self, name, qtype, cache_only, tries) + return resolve_all(self, name, qtype, cache_only, tries, nil, + ngx.ctx and ngx.ctx.has_timing) end -function _M:resolve_address(name, port, cache_only, tries, resolved_names) +-- Implement `resolve_address` separately as `_resolve_address` with the `has_timing` +-- parameter so that it avoids checking for `ngx.ctx.has_timing` in recursion. +local function _resolve_address(self, name, port, cache_only, tries, resolved_names, has_timing) resolved_names = resolved_names or {} - local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, resolved_names) + local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, + resolved_names, has_timing) if not answers then return nil, err, tries end - -- non-nil answers and return_random if answers[1].type == TYPE_SRV then local answer = get_next_weighted_round_robin_answer(answers) port = (answer.port ~= 0 and answer.port) or port - return self:resolve_address(answer.target, port, cache_only, tries, resolved_names) + return _resolve_address(self, answer.target, port, cache_only, tries, + resolved_names, has_timing) end return get_next_round_robin_answer(answers).address, port, tries end +function _M:resolve_address(name, port, cache_only, tries) + return _resolve_address(self, name, port, cache_only, tries, nil, + ngx.ctx and ngx.ctx.has_timing) +end + + -- compatible with original DNS client library -- These APIs will be deprecated if fully replacing the original one. local dns_client From afdfcb6ca129d7aeb25fa076bddd201b9c702775 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 8 Apr 2024 14:27:57 +0800 Subject: [PATCH 096/126] use `legacy_dns_client` switch to check if we need to reply 501 in /status/dns --- kong/api/routes/kong.lua | 2 +- .../04-admin_api/26-dns_client_spec.lua | 37 ++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/kong/api/routes/kong.lua b/kong/api/routes/kong.lua index c4621d8b216b..633083a6d5fb 100644 --- a/kong/api/routes/kong.lua +++ b/kong/api/routes/kong.lua @@ -272,7 +272,7 @@ return { }, ["/status/dns"] = { GET = function (self, db, helpers) - if not kong.dns.stats then + if kong.configuration.legacy_dns_client then return kong.response.exit(501, { message = "not implemented with the legacy DNS client" }) end diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua index 641d99d4bae7..14bc52e7bc69 100644 --- a/spec/02-integration/04-admin_api/26-dns_client_spec.lua +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -24,7 +24,7 @@ for _, strategy in helpers.each_strategy() do helpers.stop_kong() end) - it("/status/dns", function () + it("/status/dns - status code 200", function () local res = assert(client:send { method = "GET", path = "/status/dns", @@ -41,4 +41,39 @@ for _, strategy in helpers.each_strategy() do assert(type(json.stats["127.0.0.1"].runs) == "number") end) end) + + describe("Admin API - DNS client route with [#" .. strategy .. "]" , function() + local client + + lazy_setup(function() + helpers.get_db_utils(strategy) + + assert(helpers.start_kong({ + database = strategy, + nginx_conf = "spec/fixtures/custom_nginx.template", + legacy_dns_client = true, + })) + + client = helpers.admin_client() + end) + + teardown(function() + if client then + client:close() + end + helpers.stop_kong() + end) + + it("/status/dns - status code 501", function () + local res = assert(client:send { + method = "GET", + path = "/status/dns", + headers = { ["Content-Type"] = "application/json" } + }) + + local body = assert.res_status(501, res) + local json = cjson.decode(body) + assert.same("not implemented with the legacy DNS client", json.message) + end) + end) end From b1fa80ae1e90d54c1c317f96c191773428398733 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 8 Apr 2024 16:08:30 +0800 Subject: [PATCH 097/126] added debug log for EE test cases --- kong/dns/client.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 282d9c1ac9d6..b91eb882c21f 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -722,6 +722,8 @@ end local dns_client function _M.init(opts) + log(DEBUG, "(re)configuring dns client") + if opts then opts.valid_ttl = opts.valid_ttl or opts.validTtl opts.error_ttl = opts.error_ttl or opts.badTtl From 8ebc53812953c06c76e9e730c20fbf7d9ca4c292 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 8 Apr 2024 17:24:35 +0800 Subject: [PATCH 098/126] chores: fixed lines exceeding 80 characters by a large margin --- kong/dns/client.lua | 57 ++++++++++++++++++++++++++++++--------------- kong/dns/utils.lua | 7 ++++-- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index b91eb882c21f..abc7046f174d 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -76,10 +76,15 @@ local HIT_LEVEL_TO_NAME = { -- server replied error from the DNS protocol local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" + -- client specific error local CACHE_ONLY_ERROR_CODE = 100 local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" -local CACHE_ONLY_ANSWERS = { errcode = CACHE_ONLY_ERROR_CODE, errstr = CACHE_ONLY_ERROR_MESSAGE } +local CACHE_ONLY_ANSWERS = { + errcode = CACHE_ONLY_ERROR_CODE, + errstr = CACHE_ONLY_ERROR_MESSAGE, +} + local EMPTY_RECORD_ERROR_CODE = 101 local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" @@ -194,8 +199,10 @@ function _M.new(opts) end -- init the resolver options for lua-resty-dns - local nameservers = (opts.nameservers and not table_isempty(opts.nameservers)) and - opts.nameservers or resolv.nameservers + local nameservers = (opts.nameservers and not table_isempty(opts.nameservers)) + and opts.nameservers + or resolv.nameservers + if not nameservers or table_isempty(nameservers) then log(WARN, "Invalid configuration, no nameservers specified") end @@ -408,7 +415,8 @@ local function resolve_query(self, name, qtype) stats_set_count(self.stats, key, "query_last_time", time_str) - log(DEBUG, "r:query(", key, ") ans:", answers and #answers or "-", " t:", time_str) + log(DEBUG, "r:query(", key, ") ans:", answers and #answers or "-", + " t:", time_str) if not answers then stats_increment(self.stats, key, "query_fail_nameserver") @@ -418,8 +426,9 @@ local function resolve_query(self, name, qtype) answers = process_answers(self, name, qtype, answers) - stats_increment(self.stats, key, answers.errstr and "query_fail:" .. answers.errstr - or "query_succ") + stats_increment(self.stats, key, answers.errstr and + "query_fail:" .. answers.errstr or + "query_succ") return answers, nil, answers.ttl end @@ -451,7 +460,8 @@ local function start_stale_update_task(self, key, name, qtype, short_key) end -local function resolve_name_type_callback(self, name, qtype, cache_only, short_key, tries) +local function resolve_name_type_callback(self, name, qtype, cache_only, + short_key, tries) local key = name .. ":" .. qtype -- check if this key exists in the hosts file (it maybe evicted from cache) @@ -507,7 +517,8 @@ local function detect_recursion(resolved_names, key) end -local function resolve_name_type(self, name, qtype, cache_only, short_key, tries, resolved_names, has_timing) +local function resolve_name_type(self, name, qtype, cache_only, short_key, + tries, resolved_names, has_timing) local key = name .. ":" .. qtype stats_init_name(self.stats, key) @@ -526,7 +537,8 @@ local function resolve_name_type(self, name, qtype, cache_only, short_key, tries log(ALERT, err) end - log(DEBUG, "cache lookup ", key, " ans:", answers and #answers or "-", " hlv:", hit_level or "-") + log(DEBUG, "cache lookup ", key, " ans:", answers and #answers or "-", + " hlv:", hit_level or "-") if has_timing then req_dyn_hook_run_hooks("timing", "dns:cache_lookup", @@ -589,7 +601,9 @@ end -- resolve all `name`s and `type`s combinations and return first usable answers -local function resolve_names_and_types(self, name, typ, cache_only, short_key, tries, resolved_names, has_timing) +local function resolve_names_and_types(self, name, typ, cache_only, short_key, + tries, resolved_names, has_timing) + local answers = check_and_get_ip_answers(name) if answers then -- domain name is IP literal answers.ttl = LONG_LASTING_TTL @@ -606,7 +620,8 @@ local function resolve_names_and_types(self, name, typ, cache_only, short_key, t for _, qtype in ipairs(types) do for _, qname in ipairs(names) do answers, err = resolve_name_type(self, qname, qtype, cache_only, - short_key, tries, resolved_names, has_timing) + short_key, tries, resolved_names, + has_timing) -- severe error occurred if not answers then return nil, err, tries @@ -624,7 +639,8 @@ local function resolve_names_and_types(self, name, typ, cache_only, short_key, t end -local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, has_timing) +local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, + has_timing) name = string_lower(name) tries = setmetatable(tries or {}, TRIES_MT) @@ -645,9 +661,9 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") answers, err, tries = resolve_names_and_types(self, name, qtype, cache_only, - key, tries, - resolved_names or { [key] = true }, - has_timing) + key, tries, + resolved_names or { [key] = true }, + has_timing) if not cache_only and answers then -- If another worker resolved the name between these two `:get`, it can @@ -662,7 +678,8 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, stats_increment(self.stats, name, answers and "miss" or "fail") else - log(DEBUG, "quickly cache lookup ", key, " ans:", #answers, " hlv:", hit_level or "-") + log(DEBUG, "quickly cache lookup ", key, " ans:", #answers, + " hlv:", hit_level or "-") if has_timing then req_dyn_hook_run_hooks("timing", "dns:cache_lookup", @@ -689,9 +706,11 @@ function _M:resolve(name, qtype, cache_only, tries) end --- Implement `resolve_address` separately as `_resolve_address` with the `has_timing` --- parameter so that it avoids checking for `ngx.ctx.has_timing` in recursion. -local function _resolve_address(self, name, port, cache_only, tries, resolved_names, has_timing) +-- Implement `resolve_address` separately as `_resolve_address` with the +-- `has_timing` parameter so that it avoids checking for `ngx.ctx.has_timing` +-- in recursion. +local function _resolve_address(self, name, port, cache_only, tries, + resolved_names, has_timing) resolved_names = resolved_names or {} local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index d3b702cd8f67..2491fc359b57 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -173,7 +173,9 @@ end -- construct names from resolv options: search, ndots and domain function _M.search_names(name, resolv, hosts) - if not resolv.search or _M.is_fqdn(name, resolv.ndots) or (hosts and hosts[name]) then + if not resolv.search or _M.is_fqdn(name, resolv.ndots) or + (hosts and hosts[name]) + then return { name } end @@ -241,7 +243,8 @@ do -- gather records with the lowest priority in SRV record local function filter_lowest_priority_answers(answers) - local lowest_priority = answers[1].priority -- SRV record MUST have `priority` field + -- SRV record MUST have `priority` field + local lowest_priority = answers[1].priority local l = {} -- lowest priority records list for _, answer in ipairs(answers) do From 95fc78e48f094edb7a8900fe75112e8d9403ed61 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 30 May 2024 17:04:10 +0800 Subject: [PATCH 099/126] compatible with the modified req dyc debug API --- kong/dns/client.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index abc7046f174d..46cf2122a571 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -28,7 +28,7 @@ local search_names = utils.search_names local get_next_round_robin_answer = utils.get_next_round_robin_answer local get_next_weighted_round_robin_answer = utils.get_next_weighted_round_robin_answer -local req_dyn_hook_run_hooks = require("kong.dynamic_hook").run_hooks +local req_dyn_hook_run_hook = require("kong.dynamic_hook").run_hook -- Constants and default values @@ -541,7 +541,7 @@ local function resolve_name_type(self, name, qtype, cache_only, short_key, " hlv:", hit_level or "-") if has_timing then - req_dyn_hook_run_hooks("timing", "dns:cache_lookup", + req_dyn_hook_run_hook("timing", "dns:cache_lookup", (hit_level and hit_level < HIT_L3)) end @@ -682,7 +682,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, " hlv:", hit_level or "-") if has_timing then - req_dyn_hook_run_hooks("timing", "dns:cache_lookup", + req_dyn_hook_run_hook("timing", "dns:cache_lookup", (hit_level and hit_level < HIT_L3)) end From 22cd510ae92ddf77ebff34e63d0d7ec1964753c3 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 29 May 2024 15:51:36 +0800 Subject: [PATCH 100/126] remove the logic of CNAME and recursive detection --- kong/dns/README.md | 3 +- kong/dns/client.lua | 92 ++++-------- spec/01-unit/14-dns_spec.lua | 1 + .../30-new-dns-client/02-old_client_spec.lua | 139 ++---------------- .../03-old_client_cache_spec.lua | 38 +---- .../30-new-dns-client/05-client_stat_spec.lua | 29 ---- spec/02-integration/05-proxy/05-dns_spec.lua | 2 +- .../01-instrumentations_spec.lua | 4 +- .../01-request-debug_spec.lua | 1 + 9 files changed, 48 insertions(+), 261 deletions(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index 5b634140867a..f41335eac87e 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -45,7 +45,7 @@ Performs a series of initialization operations: * the path of `hosts` file. * `resolv_conf`: (default: `/etc/resolv.conf`) * the path of `resolv.conf` file, it will be parsed and passed into the underlying `lua-resty-dns` library. -* `order`: (default: `{ "LAST", "SRV", "A", "AAAA", "CNAME" }`) +* `order`: (default: `{ "LAST", "SRV", "A", "AAAA" }`) * the order in which to resolve different record types, it's similar to the option `dns_order` in `kong.conf`. * The `LAST` type means the type of the last successful lookup (for the specified name). * `enable_ipv6`: (default: `true`) @@ -93,7 +93,6 @@ Performs a DNS resolution. * If the server returns a non-zero error code, it will return `nil` and a string describing the error in this record. * For exmaple, `nil, "dns server error: name error"`, the server returned a result with error code 3 (NXDOMAIN). * In case of severe errors, such network error or server's malformed DNS record response, it will return `nil` and a string describing the error instead. For example: - * `nil, "recursion detected for name: example.com:5"`, it detected a loop or recursion while attempting to resolve `example.com:CNAME`. * `nil, "dns server error: failed to send request to UDP server 10.0.0.1:53: timeout"`, there was a network issue. * Return value and input parameter `@tries?`: * If provided as an empty table, it will be returned as a third result. This table will be an array containing the error message for each (if any) failed try. diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 46cf2122a571..64f6c08ff70e 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -41,19 +41,17 @@ local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 local PERSISTENT_CACHE_TTL = { ttl = 0 } -- used for mlcache:set -local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA", "CNAME" } +local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA" } local TYPE_SRV = resolver.TYPE_SRV local TYPE_A = resolver.TYPE_A local TYPE_AAAA = resolver.TYPE_AAAA -local TYPE_CNAME = resolver.TYPE_CNAME local TYPE_LAST = -1 local NAME_TO_TYPE = { SRV = TYPE_SRV, A = TYPE_A, AAAA = TYPE_AAAA, - CNAME = TYPE_CNAME, LAST = TYPE_LAST, } @@ -61,7 +59,6 @@ local TYPE_TO_NAME = { [TYPE_SRV] = "SRV", [TYPE_A] = "A", [TYPE_AAAA] = "AAAA", - [TYPE_CNAME] = "CNAME", [TYPE_LAST] = "LAST", } @@ -95,7 +92,6 @@ local _M = { TYPE_SRV = TYPE_SRV, TYPE_A = TYPE_A, TYPE_AAAA = TYPE_AAAA, - TYPE_CNAME = TYPE_CNAME, TYPE_LAST = TYPE_LAST, } local MT = { __index = _M } @@ -284,10 +280,17 @@ function _M.new(opts) end local order = opts.order or DEFAULT_ORDER + local search_types = {} local preferred_ip_type for i, typstr in ipairs(order) do + + -- TODO: delete this compatibility code in subsequent commits + if typstr:upper() == "CNAME" then + goto continue + end + local qtype = NAME_TO_TYPE[typstr:upper()] if not qtype then return nil, "Invalid dns record type in order array: " .. typstr @@ -298,6 +301,8 @@ function _M.new(opts) if (qtype == TYPE_A or qtype == TYPE_AAAA) and not preferred_ip_type then preferred_ip_type = qtype end + + ::continue:: end preferred_ip_type = preferred_ip_type or TYPE_A @@ -339,7 +344,6 @@ local function process_answers(self, qname, qtype, answers) end local processed_answers = {} - local cname_answer -- 0xffffffff for maximum TTL value local ttl = math_min(self.valid_ttl or 0xffffffff, 0xffffffff) @@ -355,10 +359,7 @@ local function process_answers(self, qname, qtype, answers) local answer_type = answer.type - if answer_type == TYPE_CNAME then - cname_answer = answer -- use the last one as the real cname - - elseif answer_type == qtype then + if answer_type == qtype then -- compatible with balancer, see https://github.com/Kong/kong/pull/3088 if answer_type == TYPE_AAAA then answer.address = ipv6_bracket(answer.address) @@ -376,19 +377,12 @@ local function process_answers(self, qname, qtype, answers) end if table_isempty(processed_answers) then - if not cname_answer then - log(DEBUG, "processed ans:empty") - return self.EMPTY_ANSWERS - end - - processed_answers[1] = cname_answer - - log(DEBUG, "processed cname:", cname_answer.cname) - - else - log(DEBUG, "processed ans:", #processed_answers) + log(DEBUG, "processed ans:empty") + return self.EMPTY_ANSWERS end + log(DEBUG, "processed ans:", #processed_answers) + processed_answers.expire = now() + ttl processed_answers.ttl = ttl @@ -505,29 +499,12 @@ local function resolve_name_type_callback(self, name, qtype, cache_only, end --- detect circular references in DNS CNAME or SRV records -local function detect_recursion(resolved_names, key) - if not resolved_names then - return nil - end - - local detected = resolved_names[key] - resolved_names[key] = true - return detected -end - - local function resolve_name_type(self, name, qtype, cache_only, short_key, - tries, resolved_names, has_timing) + tries, has_timing) local key = name .. ":" .. qtype stats_init_name(self.stats, key) - if detect_recursion(resolved_names, key) then - stats_increment(self.stats, key, "fail_recur") - return nil, "recursion detected for name: " .. key - end - local answers, err, hit_level = self.cache:get(key, nil, resolve_name_type_callback, self, name, qtype, cache_only, @@ -602,7 +579,7 @@ end -- resolve all `name`s and `type`s combinations and return first usable answers local function resolve_names_and_types(self, name, typ, cache_only, short_key, - tries, resolved_names, has_timing) + tries, has_timing) local answers = check_and_get_ip_answers(name) if answers then -- domain name is IP literal @@ -620,8 +597,7 @@ local function resolve_names_and_types(self, name, typ, cache_only, short_key, for _, qtype in ipairs(types) do for _, qname in ipairs(names) do answers, err = resolve_name_type(self, qname, qtype, cache_only, - short_key, tries, resolved_names, - has_timing) + short_key, tries, has_timing) -- severe error occurred if not answers then return nil, err, tries @@ -639,8 +615,7 @@ local function resolve_names_and_types(self, name, typ, cache_only, short_key, end -local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, - has_timing) +local function resolve_all(self, name, qtype, cache_only, tries, has_timing) name = string_lower(name) tries = setmetatable(tries or {}, TRIES_MT) @@ -650,20 +625,13 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, stats_init_name(self.stats, name) stats_increment(self.stats, name, "runs") - if detect_recursion(resolved_names, key) then - stats_increment(self.stats, name, "fail_recur") - return nil, "recursion detected for name: " .. name - end - -- quickly lookup with the key "short::all" or "short::" local answers, err, hit_level = self.cache:get(key) if not answers then log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") answers, err, tries = resolve_names_and_types(self, name, qtype, cache_only, - key, tries, - resolved_names or { [key] = true }, - has_timing) + key, tries, has_timing) if not cache_only and answers then -- If another worker resolved the name between these two `:get`, it can @@ -689,19 +657,12 @@ local function resolve_all(self, name, qtype, cache_only, tries, resolved_names, stats_increment(self.stats, name, HIT_LEVEL_TO_NAME[hit_level]) end - -- dereference CNAME - if qtype ~= TYPE_CNAME and answers and answers[1].type == TYPE_CNAME then - stats_increment(self.stats, name, "cname") - return resolve_all(self, answers[1].cname, qtype, cache_only, tries, - resolved_names or { [key] = true }, has_timing) - end - return answers, err, tries end function _M:resolve(name, qtype, cache_only, tries) - return resolve_all(self, name, qtype, cache_only, tries, nil, + return resolve_all(self, name, qtype, cache_only, tries, ngx.ctx and ngx.ctx.has_timing) end @@ -709,12 +670,9 @@ end -- Implement `resolve_address` separately as `_resolve_address` with the -- `has_timing` parameter so that it avoids checking for `ngx.ctx.has_timing` -- in recursion. -local function _resolve_address(self, name, port, cache_only, tries, - resolved_names, has_timing) - resolved_names = resolved_names or {} - +local function _resolve_address(self, name, port, cache_only, tries, has_timing) local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, - resolved_names, has_timing) + has_timing) if not answers then return nil, err, tries end @@ -723,7 +681,7 @@ local function _resolve_address(self, name, port, cache_only, tries, local answer = get_next_weighted_round_robin_answer(answers) port = (answer.port ~= 0 and answer.port) or port return _resolve_address(self, answer.target, port, cache_only, tries, - resolved_names, has_timing) + has_timing) end return get_next_round_robin_answer(answers).address, port, tries @@ -731,7 +689,7 @@ end function _M:resolve_address(name, port, cache_only, tries) - return _resolve_address(self, name, port, cache_only, tries, nil, + return _resolve_address(self, name, port, cache_only, tries, ngx.ctx and ngx.ctx.has_timing) end diff --git a/spec/01-unit/14-dns_spec.lua b/spec/01-unit/14-dns_spec.lua index fda591d4df64..677977593cf3 100644 --- a/spec/01-unit/14-dns_spec.lua +++ b/spec/01-unit/14-dns_spec.lua @@ -29,6 +29,7 @@ local function setup_it_block() nameservers = { "198.51.100.0" }, enable_ipv6 = true, order = { "LAST", "SRV", "A", "CNAME" }, + cache_purge = true, } end diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index fa77b0573244..8d2ad46cf0af 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -230,9 +230,6 @@ describe("[DNS client]", function() 'host.one.com:28', 'host.two.com:28', 'host:28', - 'host.one.com:5', - 'host.two.com:5', - 'host:5', }, list) end) @@ -253,7 +250,6 @@ describe("[DNS client]", function() 'host:33', 'host:1', 'host:28', - 'host:5', }, list) end) @@ -277,8 +273,6 @@ describe("[DNS client]", function() 'host:1', 'host.local.domain.com:28', 'host:28', - 'host.local.domain.com:5', - 'host:5', }, list) end) @@ -296,9 +290,6 @@ describe("[DNS client]", function() cli:resolve("host") assert.same({ - 'host.one.com:5', - 'host.two.com:5', - 'host:5', 'host.one.com:33', 'host.two.com:33', 'host:33', @@ -328,7 +319,6 @@ describe("[DNS client]", function() 'host.:33', 'host.:1', 'host.:28', - 'host.:5', }, list) end) @@ -347,7 +337,6 @@ describe("[DNS client]", function() 'host.:33', 'host.:1', 'host.:28', - 'host.:5', }, list) end) @@ -366,7 +355,6 @@ describe("[DNS client]", function() 'host.:33', 'host.:1', 'host.:28', - 'host.:5', }, list) end) @@ -383,7 +371,6 @@ describe("[DNS client]", function() cli:resolve("host.") assert.same({ - 'host.:5', 'host.:33', 'host.:1', 'host.:28', @@ -515,7 +502,6 @@ describe("[DNS client]", function() 'local.host:33', 'local.host:1', 'local.host:28', - 'local.host:5', }, list) end) @@ -727,7 +713,7 @@ describe("[DNS client]", function() local host = "smtp."..TEST_DOMAIN local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) - local answers = assert(cli:resolve(host)) + assert(cli:resolve(host)) -- check first CNAME local key1 = host .. ":" .. resolver.TYPE_CNAME @@ -739,31 +725,21 @@ describe("[DNS client]", function() end assert.same({ - ["kong-gateway-testing.link"] = { - miss = 1, - runs = 1, - }, - ["kong-gateway-testing.link:1"] = { - query = 1, - query_succ = 1 - }, - ["kong-gateway-testing.link:33"] = { - query = 1, - ["query_fail:empty record received"] = 1 - }, ["smtp.kong-gateway-testing.link"] = { - cname = 1, - miss = 1, - runs = 1 + miss = 1, + runs = 1 + }, + ["smtp.kong-gateway-testing.link:1"] = { + query = 1, + query_succ = 1 }, ["smtp.kong-gateway-testing.link:33"] = { - query = 1, - query_succ = 1 - } - }, cli.stats) + query = 1, + ["query_fail:empty record received"] = 1 } + }, cli.stats) -- check last successful lookup references - local lastsuccess = cli:_get_last_type(answers[1].name) + local lastsuccess = cli:_get_last_type(host) assert.are.equal(resolver.TYPE_A, lastsuccess) end) @@ -887,70 +863,6 @@ describe("[DNS client]", function() assert.equal("["..address.."]", answers[1].target) end) - it("recursive lookups failure - single resolve", function() - query_func = function(self, original_query_func, name, opts) - if name ~= "hello.world" and (opts or {}).qtype ~= resolver.TYPE_CNAME then - return original_query_func(self, name, opts) - end - return {{ - type = resolver.TYPE_CNAME, - cname = "hello.world", - class = 1, - name = "hello.world", - ttl = 30, - }} - end - - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - local answers, err, _ = cli:resolve("hello.world") - assert.is_nil(answers) - assert.are.equal("recursion detected for name: hello.world", err) - end) - - it("recursive lookups failure - single", function() - local entry1 = {{ - type = resolver.TYPE_CNAME, - cname = "hello.world", - class = 1, - name = "hello.world", - ttl = 0, - }} - - -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - -- insert in the cache - cli.cache:set(entry1[1].name .. ":" .. entry1[1].type, { ttl = 0 }, entry1) - local answers, err, _ = cli:resolve("hello.world", { cache_only = true }) - assert.is_nil(answers) - assert.are.equal("recursion detected for name: hello.world", err) - end) - - it("recursive lookups failure - multi", function() - local entry1 = {{ - type = resolver.TYPE_CNAME, - cname = "bye.bye.world", - class = 1, - name = "hello.world", - ttl = 0, - }} - local entry2 = {{ - type = resolver.TYPE_CNAME, - cname = "hello.world", - class = 1, - name = "bye.bye.world", - ttl = 0, - }} - - -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - -- insert in the cache - cli.cache:set(entry1[1].name .. ":" .. entry1[1].type, { ttl = 0 }, entry1) - cli.cache:set(entry2[1].name .. ":" .. entry2[1].type, { ttl = 0 }, entry2) - local answers, err, _ = cli:resolve("hello.world", { cache_only = true }) - assert.is_nil(answers) - assert.are.equal("recursion detected for name: hello.world", err) - end) - it("resolving from the /etc/hosts file; preferred A or AAAA order", function() writefile(hosts_path, { "127.3.2.1 localhost", @@ -958,13 +870,13 @@ describe("[DNS client]", function() }) local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", - order = {"SRV", "CNAME", "A", "AAAA"} + order = {"SRV", "A", "AAAA"} })) assert.equal(resolver.TYPE_A, cli:_get_last_type("localhost")) -- success set to A as it is the preferred option local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", - order = {"SRV", "CNAME", "AAAA", "A"} + order = {"SRV", "AAAA", "A"} })) assert.equal(resolver.TYPE_AAAA, cli:_get_last_type("localhost")) -- success set to AAAA as it is the preferred option end) @@ -1264,31 +1176,6 @@ describe("[DNS client]", function() assert.is_nil(ip) assert.is.string(port) -- error message end) - it("recursive lookups failure", function() - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - local entry1 = {{ - type = resolver.TYPE_CNAME, - cname = "bye.bye.world", - class = 1, - name = "hello.world", - ttl = 10, - }} - local entry2 = {{ - type = resolver.TYPE_CNAME, - cname = "hello.world", - class = 1, - name = "bye.bye.world", - ttl = 10, - }} - -- insert in the cache - cli.cache:set(entry1[1].name..":"..entry1[1].type, { ttl = 0 }, entry1) - cli.cache:set(entry2[1].name..":"..entry2[1].type, { ttl = 0 }, entry2) - - -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table - local ip, port, _ = cli:resolve("hello.world", { return_random = true, port = 123, cache_only = true }) - assert.is_nil(ip) - assert.are.equal("recursion detected for name: hello.world", port) - end) end) it("verifies valid_ttl", function() diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 90f8e1e15e3a..a0578a6ca141 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -123,7 +123,7 @@ describe("[DNS client cache]", function() ndots = 1, search = { "domain.com" }, hosts = {}, - order = { "LAST", "SRV", "A", "AAAA", "CNAME" }, + order = { "LAST", "SRV", "A", "AAAA" }, error_ttl = 0.5, stale_ttl = 0.5, enable_ipv6 = false, @@ -200,33 +200,7 @@ describe("[DNS client cache]", function() assert.equal(answers, cli.cache:get("short:myhost4:" .. resolver.TYPE_A)) end) - it("of dereferenced CNAME are stored in cache", function() - mock_records = { - ["myhost5.domain.com:"..resolver.TYPE_CNAME] = {{ - type = resolver.TYPE_CNAME, - class = 1, - name = "myhost5.domain.com", - cname = "mytarget.domain.com", - ttl = 30, - }}, - ["mytarget.domain.com:"..resolver.TYPE_A] = {{ - type = resolver.TYPE_A, - address = "1.2.3.4", - class = 1, - name = "mytarget.domain.com", - ttl = 30, - }} - } - local answers = cli:resolve("myhost5") - assert_same_answers(mock_records["mytarget.domain.com:"..resolver.TYPE_A], answers) -- not the test, intermediate validation - - -- the type un-specificc query was the CNAME, so that should be in the - -- shorname cache - answers = cli.cache:get("short:myhost5:all") - assert_same_answers(mock_records["myhost5.domain.com:"..resolver.TYPE_CNAME], answers) - end) - - it("ttl in cache is honored for short name entries #ttt", function() + it("ttl in cache is honored for short name entries", function() local ttl = 0.2 -- in the short name case the same record is inserted again in the cache -- and the lru-ttl has to be calculated, make sure it is correct @@ -328,7 +302,7 @@ describe("[DNS client cache]", function() search = { "domain.com" }, hosts = {}, resolvConf = {}, - order = { "LAST", "SRV", "A", "AAAA", "CNAME" }, + order = { "LAST", "SRV", "A", "AAAA" }, error_ttl = 0.5, stale_ttl = 0.5, enable_ipv6 = false, @@ -516,7 +490,7 @@ describe("[DNS client cache]", function() search = { "domain.com" }, hosts = {}, resolvConf = {}, - order = { "LAST", "SRV", "A", "AAAA", "CNAME" }, + order = { "LAST", "SRV", "A", "AAAA" }, error_ttl = 0.5, stale_ttl = 0.5, enable_ipv6 = false, @@ -596,10 +570,6 @@ describe("[DNS client cache]", function() "192.168.5.232.node.api_test.consul:AAAA", "dns server error: 3 name error", }, - { - "192.168.5.232.node.api_test.consul:CNAME", - "dns server error: 3 name error", - } }, tries) end) diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua index fbe458b3568c..a613ebd1c8f1 100644 --- a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -66,13 +66,6 @@ describe("[DNS client stats]", function() ttl = 30, }}, ["nameserver_fail.com:" .. resolver.TYPE_A] = "nameserver failed", - ["recursion.com:" .. resolver.TYPE_CNAME] = {{ - type = resolver.TYPE_CNAME, - cname = "recursion.com", - class = 1, - name = "recursion.com", - ttl = 30, - }}, ["stale.com" .. resolver.TYPE_A] = {{ type = resolver.TYPE_CNAME, address = "stale.com", @@ -92,9 +85,6 @@ describe("[DNS client stats]", function() -- "query_err:nameserver failed" cli:resolve("nameserver_fail.com") - -- "fail_recur" - cli:resolve("recursion.com") - -- "stale" cli:resolve("stale.com") sleep(0.2) @@ -121,20 +111,6 @@ describe("[DNS client stats]", function() ["query"] = 1, ["query_succ"] = 1 }, - ["recursion.com"] = { - ["fail_recur"] = 1, - ["runs"] = 2, - ["miss"] = 1, - ["cname"] = 1 - }, - ["recursion.com:1"] = { - ["query"] = 1, - ["query_fail:name error"] = 1 - }, - ["recursion.com:5"] = { - ["query"] = 1, - ["query_succ"] = 1 - }, ["nameserver_fail.com"] = { ["fail"] = 1, ["runs"] = 1 @@ -152,11 +128,6 @@ describe("[DNS client stats]", function() ["query_fail:name error"] = 1, ["stale"] = 1 }, - ["stale.com:5"] = { - ["query"] = 1, - ["query_fail:name error"] = 1, - ["stale"] = 1 - } }, cli.stats) end) end) diff --git a/spec/02-integration/05-proxy/05-dns_spec.lua b/spec/02-integration/05-proxy/05-dns_spec.lua index 9607352a26ce..3e2c9475723c 100644 --- a/spec/02-integration/05-proxy/05-dns_spec.lua +++ b/spec/02-integration/05-proxy/05-dns_spec.lua @@ -108,7 +108,7 @@ for _, strategy in helpers.each_strategy() do local service = bp.services:insert { name = "tests-retries", - host = "nowthisdoesnotexistatall", + host = "nowthisdoesnotexistatall.test", path = "/exist", port = 80, protocol = "http" diff --git a/spec/02-integration/14-observability/01-instrumentations_spec.lua b/spec/02-integration/14-observability/01-instrumentations_spec.lua index 781c85cd8fb2..0d9af1927995 100644 --- a/spec/02-integration/14-observability/01-instrumentations_spec.lua +++ b/spec/02-integration/14-observability/01-instrumentations_spec.lua @@ -524,7 +524,7 @@ for _, strategy in helpers.each_strategy() do -- intentionally trigger a DNS query error local service = bp.services:insert({ name = "inexist-host-service", - host = "really-inexist-host", + host = "really-inexist-host.test", port = 80, }) @@ -558,7 +558,7 @@ for _, strategy in helpers.each_strategy() do local dns_spans = assert_has_spans("kong.dns", spans) local upstream_dns for _, dns_span in ipairs(dns_spans) do - if dns_span.attributes["dns.record.domain"] == "really-inexist-host" then + if dns_span.attributes["dns.record.domain"] == "really-inexist-host.test" then upstream_dns = dns_span break end diff --git a/spec/02-integration/21-request-debug/01-request-debug_spec.lua b/spec/02-integration/21-request-debug/01-request-debug_spec.lua index 13d626f474cd..ac9047bc32d1 100644 --- a/spec/02-integration/21-request-debug/01-request-debug_spec.lua +++ b/spec/02-integration/21-request-debug/01-request-debug_spec.lua @@ -113,6 +113,7 @@ local function get_token_file_content(deployment) assert(deployment == "hybrid", "unknown deploy mode") path = pl_path.join(DP_PREFIX, TOKEN_FILE) end + print(path) return pl_file.read(path) end From 03aefed1e7007aaba74bd4cb338ce4b4ccd0273e Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 4 Jun 2024 16:57:47 +0800 Subject: [PATCH 101/126] remove LAST type logic --- kong/dns/README.md | 22 +- kong/dns/client.lua | 470 ++++++++---------- kong/dns/utils.lua | 6 + .../09-balancer/04-round_robin_spec.lua | 2 + .../30-new-dns-client/01-utils_spec.lua | 94 ++-- .../30-new-dns-client/02-old_client_spec.lua | 263 ++++------ .../03-old_client_cache_spec.lua | 226 ++------- .../30-new-dns-client/04-client_ipc_spec.lua | 14 +- .../30-new-dns-client/05-client_stat_spec.lua | 121 +++-- .../04-admin_api/26-dns_client_spec.lua | 2 +- .../kong/plugins/dns-client-test/handler.lua | 2 +- spec/helpers/dns.lua | 13 +- 12 files changed, 497 insertions(+), 738 deletions(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index f41335eac87e..a838f6518aaa 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -45,9 +45,8 @@ Performs a series of initialization operations: * the path of `hosts` file. * `resolv_conf`: (default: `/etc/resolv.conf`) * the path of `resolv.conf` file, it will be parsed and passed into the underlying `lua-resty-dns` library. -* `order`: (default: `{ "LAST", "SRV", "A", "AAAA" }`) +* `order`: (default: `{ "SRV", "A", "AAAA" }`) * the order in which to resolve different record types, it's similar to the option `dns_order` in `kong.conf`. - * The `LAST` type means the type of the last successful lookup (for the specified name). * `enable_ipv6`: (default: `true`) * whether to support IPv6 servers when getting nameservers from `resolv.conf`. * options for the underlying `lua-resty-dns` library: @@ -75,16 +74,15 @@ Performs a series of initialization operations: Performs a DNS resolution. -1. First, use the key `short::all` (or `short::` if `@qtype` is not `nil`) to query mlcache to see if there are any results available for quick use. If results are found, return them directly. -2. If there are no results available for quick use in the cache, then query all keys (`:`) extended from this domain name. - 1. The method for calculating extended keys is as follows: - 1. The domain `` is extended based on the `ndots`, `search`, and `domain` settings in `resolv.conf`. - 2. The `` is extended based on the `dns_order` parameter. - 2. Loop through all keys to query them. Once a usable result is found, return it. Also, store the DNS record result in mlcache with the key `short::all`. - 1. Use this key (`:`) to query mlcache. If it is not found, it triggers the L3 callback of `mlcache:get` to query the DNS server and process data that has expired but is still usable (`resolve_name_type_callback`). - 2. Use `mlcache:peek` to check if the missed and expired key still exists in the shared dictionary. If it does, return it directly to mlcache and trigger an asynchronous background task to update the expired data (`start_stale_update_task`). The maximum time that expired data can be reused is `stale_ttl`, but the maximum TTL returned to mlcache cannot exceed 60s. This way, if the expired key is not successfully updated by the background task after 60s, it can still be reused by calling the `resolve` function from the upper layer to trigger the L3 callback to continue executing this logic and initiate another background task for updating. - 1. For example, with a `stale_ttl` of 3600s, if the background task fails to update the record due to network issues during this time, and the upper-level application continues to call resolve to get the domain name result, it will trigger a background task to query the DNS result for that domain name every 60s, resulting in approximately 60 background tasks being triggered (3600s/60s). - +1. First, use the key `:all` (or `:` if `@qtype` is not `nil`) to query mlcache to see if there are any results available. If results are found, return them directly. +2. If there are no results available in the cache, it triggers the L3 callback of `mlcache:get` to query records from the DNS servers, details are as follows: + 1. Check if `` has an IP address in the `hosts` file, return if found. + 2. Check if `` is an IP address itself, return if true. + 3. Use `mlcache:peek` to check if the expired key still exists in the shared dictionary. If it does, return it directly to mlcache and trigger an asynchronous background task to update the expired data (`start_stale_update_task`). The maximum time that expired data can be reused is `stale_ttl`, but the maximum TTL returned to mlcache cannot exceed 60s. This way, if the expired key is not successfully updated by the background task after 60s, it can still be reused by calling the `resolve` function from the upper layer to trigger the L3 callback to continue executing this logic and initiate another background task for updating. + 1. For example, with a `stale_ttl` of 3600s, if the background task fails to update the record due to network issues during this time, and the upper-level application continues to call resolve to get the domain name result, it will trigger a background task to query the DNS result for that domain name every 60s, resulting in approximately 60 background tasks being triggered (3600s/60s). + 4. Query the DNS server, with `:` combinations: + 1. The `` is extended according to settings in `resolv.conf`, such as `ndots`, `search`, and `domain`. + 2. The `` is extended based on the `dns_order` parameter. **Return value:** diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 64f6c08ff70e..3fba4c503836 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -7,6 +7,7 @@ local now = ngx.now local log = ngx.log local ERR = ngx.ERR local WARN = ngx.WARN +local NOTICE = ngx.NOTICE local DEBUG = ngx.DEBUG local ALERT = ngx.ALERT local timer_at = ngx.timer.at @@ -18,6 +19,7 @@ local tonumber = tonumber local setmetatable = setmetatable local math_min = math.min +local math_floor = math.floor local string_lower = string.lower local table_insert = table.insert local table_isempty = require("table.isempty") @@ -33,33 +35,26 @@ local req_dyn_hook_run_hook = require("kong.dynamic_hook").run_hook -- Constants and default values +local PREFIX = "[dns_client] " + local DEFAULT_ERROR_TTL = 1 -- unit: second local DEFAULT_STALE_TTL = 4 local DEFAULT_EMPTY_TTL = 30 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 -local PERSISTENT_CACHE_TTL = { ttl = 0 } -- used for mlcache:set - -local DEFAULT_ORDER = { "LAST", "SRV", "A", "AAAA" } +local DEFAULT_ORDER = { "SRV", "A", "AAAA" } local TYPE_SRV = resolver.TYPE_SRV local TYPE_A = resolver.TYPE_A local TYPE_AAAA = resolver.TYPE_AAAA -local TYPE_LAST = -1 - -local NAME_TO_TYPE = { - SRV = TYPE_SRV, - A = TYPE_A, - AAAA = TYPE_AAAA, - LAST = TYPE_LAST, -} +local TYPE_A_AAAA = -1 -- used to resolve IP addresses for SRV targets local TYPE_TO_NAME = { [TYPE_SRV] = "SRV", [TYPE_A] = "A", [TYPE_AAAA] = "AAAA", - [TYPE_LAST] = "LAST", + [TYPE_A_AAAA] = "A/AAAA", } local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale @@ -67,7 +62,7 @@ local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale local HIT_LEVEL_TO_NAME = { [1] = "hit_lru", [2] = "hit_shm", - [3] = "hit_cb", + [3] = "miss", [4] = "hit_stale", } @@ -92,7 +87,6 @@ local _M = { TYPE_SRV = TYPE_SRV, TYPE_A = TYPE_A, TYPE_AAAA = TYPE_AAAA, - TYPE_LAST = TYPE_LAST, } local MT = { __index = _M } @@ -117,23 +111,8 @@ local function stats_set_count(stats, name, key, value) end --- lookup or set TYPE_LAST (the DNS record type from the last successful query) -local function insert_last_type(cache, name, qtype) - local key = "last:" .. name - if TYPE_TO_NAME[qtype] and cache:get(key) ~= qtype then - cache:set(key, PERSISTENT_CACHE_TTL, qtype) - end -end - - -local function get_last_type(cache, name) - return cache:get("last:" .. name) -end - - local init_hosts do local function insert_answer_into_cache(cache, hosts_cache, address, name, qtype) - local key = name .. ":" .. qtype local answers = { ttl = LONG_LASTING_TTL, expire = now() + LONG_LASTING_TTL, @@ -146,33 +125,25 @@ local init_hosts do }, } - -- insert via the `:get` callback to prevent inter-process communication - cache:get(key, nil, function() - return answers, nil, LONG_LASTING_TTL - end) - - -- used for the host entry eviction - hosts_cache[key] = answers + hosts_cache[name .. ":" .. qtype] = answers + hosts_cache[name .. ":" .. TYPE_A_AAAA] = answers + hosts_cache[name .. ":all"] = answers end -- insert hosts into cache - function init_hosts(cache, path, preferred_ip_type) + function init_hosts(cache, path) local hosts = parse_hosts(path) local hosts_cache = {} for name, address in pairs(hosts) do name = string_lower(name) - if address.ipv4 then - insert_answer_into_cache(cache, hosts_cache, address.ipv4, name, TYPE_A) - insert_last_type(cache, name, TYPE_A) - end - if address.ipv6 then insert_answer_into_cache(cache, hosts_cache, address.ipv6, name, TYPE_AAAA) - if not address.ipv4 or preferred_ip_type == TYPE_AAAA then - insert_last_type(cache, name, TYPE_AAAA) - end + end + + if address.ipv4 then + insert_answer_into_cache(cache, hosts_cache, address.ipv4, name, TYPE_A) end end @@ -187,10 +158,34 @@ local ipc_counter = 0 function _M.new(opts) opts = opts or {} + local enable_ipv4, enable_ipv6, enable_srv + + opts.order = opts.order or DEFAULT_ORDER + + for i, typstr in ipairs(opts.order) do + typstr = typstr:upper() + + if typstr == "A" then + enable_ipv4 = true + + elseif typstr == "AAAA" then + enable_ipv6 = true + + elseif typstr == "SRV" then + enable_srv = true + + elseif typstr ~= "LAST" and typstr ~= "CNAME" then + return nil, "Invalid dns record type in order array: " .. typstr + end + end + + log(NOTICE, PREFIX, PREFIX, "supported types: ", enable_srv and "srv " or "", + enable_ipv4 and "ipv4 " or "", enable_ipv6 and "ipv6 " or "") + -- parse resolv.conf local resolv, err = utils.parse_resolv_conf(opts.resolv_conf, opts.enable_ipv6) if not resolv then - log(WARN, "Invalid resolv.conf: ", err) + log(WARN, PREFIX, "Invalid resolv.conf: ", err) resolv = { options = {} } end @@ -200,7 +195,7 @@ function _M.new(opts) or resolv.nameservers if not nameservers or table_isempty(nameservers) then - log(WARN, "Invalid configuration, no nameservers specified") + log(WARN, PREFIX, "Invalid configuration, no nameservers specified") end local r_opts = { @@ -254,7 +249,7 @@ function _M.new(opts) local ok, err = kong.worker_events.post(ipc_source, channel, data) if not ok then - log(ERR, "failed to post event '", ipc_source, "', '", channel, "': ", err) + log(ERR, PREFIX, "failed to post event '", ipc_source, "', '", channel, "': ", err) end end, } @@ -263,6 +258,7 @@ function _M.new(opts) ipc = ipc, neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, lru_size = opts.cache_size or 10000, + shm_locks = ngx.shared.kong_locks and "kong_locks", resty_lock_opts = resty_lock_opts, }) @@ -274,41 +270,8 @@ function _M.new(opts) cache:purge(true) end - -- parse order - if opts.order and table_isempty(opts.order) then - return nil, "Invalid order array: empty record types" - end - - local order = opts.order or DEFAULT_ORDER - - local search_types = {} - local preferred_ip_type - - for i, typstr in ipairs(order) do - - -- TODO: delete this compatibility code in subsequent commits - if typstr:upper() == "CNAME" then - goto continue - end - - local qtype = NAME_TO_TYPE[typstr:upper()] - if not qtype then - return nil, "Invalid dns record type in order array: " .. typstr - end - - search_types[i] = qtype - - if (qtype == TYPE_A or qtype == TYPE_AAAA) and not preferred_ip_type then - preferred_ip_type = qtype - end - - ::continue:: - end - - preferred_ip_type = preferred_ip_type or TYPE_A - -- parse hosts - local hosts, hosts_cache = init_hosts(cache, opts.hosts, preferred_ip_type) + local hosts, hosts_cache = init_hosts(cache, opts.hosts) return setmetatable({ cache = cache, @@ -320,8 +283,10 @@ function _M.new(opts) error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + enable_srv = enable_srv, + enable_ipv4 = enable_ipv4, + enable_ipv6 = enable_ipv6, hosts_cache = hosts_cache, - search_types = search_types, -- TODO: Make the table readonly. But if `string.buffer.encode/decode` and -- `pl.tablex.readonly` are called on it, it will become empty table. @@ -368,20 +333,16 @@ local function process_answers(self, qname, qtype, answers) answer.target = ipv6_bracket(answer.target) end - -- skip the SRV record pointing to itself, - -- see https://github.com/Kong/lua-resty-dns-client/pull/3 - if not (answer_type == TYPE_SRV and answer.target == qname) then - table_insert(processed_answers, answer) - end + table_insert(processed_answers, answer) end end if table_isempty(processed_answers) then - log(DEBUG, "processed ans:empty") + log(DEBUG, PREFIX, "processed ans:empty") return self.EMPTY_ANSWERS end - log(DEBUG, "processed ans:", #processed_answers) + log(DEBUG, PREFIX, "processed ans:", #processed_answers) processed_answers.expire = now() + ttl processed_answers.ttl = ttl @@ -390,8 +351,10 @@ local function process_answers(self, qname, qtype, answers) end -local function resolve_query(self, name, qtype) +local function resolve_query(self, name, qtype, tries) local key = name .. ":" .. qtype + + stats_init_name(self.stats, key) stats_increment(self.stats, key, "query") local r, err = resolver:new(self.r_opts) @@ -399,23 +362,24 @@ local function resolve_query(self, name, qtype) return nil, "failed to instantiate the resolver: " .. err end - local start_time = now() + local start = now() local answers, err = r:query(name, { additional_section = true, qtype = qtype }) r:destroy() - local query_time = now() - start_time -- the time taken for the DNS query - local time_str = ("%.3f %.3f"):format(start_time, query_time) + local duration = math_floor((now() - start) * 1000) - stats_set_count(self.stats, key, "query_last_time", time_str) + stats_set_count(self.stats, key, "query_last_time", duration) - log(DEBUG, "r:query(", key, ") ans:", answers and #answers or "-", - " t:", time_str) + log(DEBUG, PREFIX, "r:query(", key, ") ans:", answers and #answers or "-", + " t:", duration, " ms") + -- network error or malformed DNS response if not answers then stats_increment(self.stats, key, "query_fail_nameserver") - err = err or "unknown" - return nil, "DNS server error: " .. err .. ", Query Time: " .. time_str + err = "DNS server error: " .. tostring(err) .. ", took " .. duration .. " ms" + table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) + return nil, err end answers = process_answers(self, name, qtype, answers) @@ -424,139 +388,100 @@ local function resolve_query(self, name, qtype) "query_fail:" .. answers.errstr or "query_succ") - return answers, nil, answers.ttl -end - - -local function stale_update_task(premature, self, key, name, qtype, short_key) - if premature then - return + -- DNS response error + if answers.errcode then + err = ("dns %s error: %s %s"):format( + answers.errcode < CACHE_ONLY_ERROR_CODE and "server" or "client", + answers.errcode, answers.errstr) + table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) end - local answers = resolve_query(self, name, qtype) - if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then - self.cache:set(key, { ttl = answers.ttl }, answers) - insert_last_type(self.cache, name, qtype) - - -- simply invalidate it and let the search iteration choose the correct one - self.cache:delete(short_key) - end + return answers end -local function start_stale_update_task(self, key, name, qtype, short_key) - stats_increment(self.stats, key, "stale") +-- resolve all `name`s and return first usable answers +local function resolve_query_names(self, names, qtype, tries) + local answers, err - local ok, err = timer_at(0, stale_update_task, self, key, name, qtype, short_key) - if not ok then - log(ALERT, "failed to start a timer to update stale DNS records: ", err) + for _, qname in ipairs(names) do + answers, err = resolve_query(self, qname, qtype, tries) + + -- severe error occurred + if not answers then + return nil, err + end + + if not answers.errcode then + return answers, nil, answers.ttl + end end + + -- not found in the search iteration + return answers, nil, answers.ttl end -local function resolve_name_type_callback(self, name, qtype, cache_only, - short_key, tries) - local key = name .. ":" .. qtype +local function resolve_query_types(self, name, qtype, tries) + local names = search_names(name, self.resolv, self.hosts) + local answers, err, ttl - -- check if this key exists in the hosts file (it maybe evicted from cache) - local answers = self.hosts_cache[key] - if answers then - return answers, nil, answers.ttl + -- the specific type + if qtype and qtype ~= TYPE_A_AAAA then + return resolve_query_names(self, names, qtype, tries) end - -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then - -- initiates an asynchronous background updating task to refresh it. - local ttl, _, answers = self.cache:peek(key, true) - if answers and ttl then - if not answers.expired then - answers.expire = now() + ttl - answers.expired = true - ttl = ttl + self.stale_ttl - - else - ttl = ttl + (answers.expire - now()) + -- query SRV for nil type + if self.enable_srv and qtype == nil then + answers, err, ttl = resolve_query_names(self, names, TYPE_SRV, tries) + if not answers or not answers.errcode then + return answers, err, ttl end + end - -- trigger the update task by the upper caller every 60 seconds - ttl = math_min(ttl, 60) - - if ttl > 0 then - log(DEBUG, "start stale update task ", key, " ttl:", ttl) - - -- mlcache's internal lock mechanism ensures concurrent control - start_stale_update_task(self, key, name, qtype, short_key) - answers.ttl = ttl - return answers, nil, ttl + -- query A/AAAA for nil or TYPE_A_AAAA type + if self.enable_ipv4 then + answers, err, ttl = resolve_query_names(self, names, TYPE_A, tries) + if not answers or not answers.errcode then + return answers, err, ttl end end - if cache_only then - return CACHE_ONLY_ANSWERS, nil, -1 + if self.enable_ipv6 then + answers, err, ttl = resolve_query_names(self, names, TYPE_AAAA, tries) + if not answers or not answers.errcode then + return answers, err, ttl + end end - local answers, err, ttl = resolve_query(self, name, qtype) return answers, err, ttl end -local function resolve_name_type(self, name, qtype, cache_only, short_key, - tries, has_timing) - local key = name .. ":" .. qtype - - stats_init_name(self.stats, key) - - local answers, err, hit_level = self.cache:get(key, nil, - resolve_name_type_callback, - self, name, qtype, cache_only, - short_key, tries) - -- check for runtime errors in the callback - if err and err:sub(1, 8) == "callback" then - log(ALERT, err) - end - - log(DEBUG, "cache lookup ", key, " ans:", answers and #answers or "-", - " hlv:", hit_level or "-") - - if has_timing then - req_dyn_hook_run_hook("timing", "dns:cache_lookup", - (hit_level and hit_level < HIT_L3)) +local function stale_update_task(premature, self, key, name, qtype) + if premature then + return end - -- hit L1 lru or L2 shm - if hit_level and hit_level < HIT_L3 then - stats_increment(self.stats, key, HIT_LEVEL_TO_NAME[hit_level]) + local tries = setmetatable({}, TRIES_MT) + local answers = resolve_query_types(self, name, qtype, tries) + if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then + self.cache:set(key, { ttl = answers.ttl }, answers) end - if err or answers.errcode then - if not err then - local src = answers.errcode < CACHE_ONLY_ERROR_CODE and "server" or "client" - err = ("dns %s error: %s %s"):format(src, answers.errcode, answers.errstr) - end - - table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) + if not answers or answers.errcode then + log(WARN, PREFIX, "Updating stale DNS records failed. Tried: ", tostring(tries)) end - - return answers, err end -local function get_search_types(self, name, qtype) - local input_types = qtype and { qtype } or self.search_types - local checked_types = {} - local types = {} - - for _, qtype in ipairs(input_types) do - if qtype == TYPE_LAST then - qtype = get_last_type(self.cache, name) - end +local function start_stale_update_task(self, key, name, qtype) + stats_increment(self.stats, key, "stale") - if qtype and not checked_types[qtype] then - table_insert(types, qtype) - checked_types[qtype] = true - end + local ok, err = timer_at(0, stale_update_task, self, key, name, qtype) + if not ok then + log(ALERT, PREFIX, "failed to start a timer to update stale DNS records: ", err) end - - return types end @@ -577,10 +502,8 @@ local function check_and_get_ip_answers(name) end --- resolve all `name`s and `type`s combinations and return first usable answers -local function resolve_names_and_types(self, name, typ, cache_only, short_key, - tries, has_timing) - +local function resolve_callback(self, name, qtype, cache_only, tries) + -- check if name is ip address local answers = check_and_get_ip_answers(name) if answers then -- domain name is IP literal answers.ttl = LONG_LASTING_TTL @@ -588,73 +511,83 @@ local function resolve_names_and_types(self, name, typ, cache_only, short_key, return answers, nil, tries end - -- TODO: For better performance, it may be necessary to rewrite it as an - -- iterative function. - local types = get_search_types(self, name, typ) - local names = search_names(name, self.resolv, self.hosts) + -- check if this key exists in the hosts file (it maybe evicted from cache) + local key = name .. ":" .. (qtype or "all") + local answers = self.hosts_cache[key] + if answers then + return answers, nil, answers.ttl + end - local err - for _, qtype in ipairs(types) do - for _, qname in ipairs(names) do - answers, err = resolve_name_type(self, qname, qtype, cache_only, - short_key, tries, has_timing) - -- severe error occurred - if not answers then - return nil, err, tries - end + -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then + -- initiates an asynchronous background updating task to refresh it. + local ttl, _, answers = self.cache:peek(key, true) + if answers and ttl then + if not answers.expired then + answers.expire = now() + ttl + answers.expired = true + ttl = ttl + self.stale_ttl - if not answers.errcode then - insert_last_type(self.cache, qname, qtype) -- cache TYPE_LAST - return answers, nil, tries - end + else + ttl = ttl + (answers.expire - now()) + end + + -- trigger the update task by the upper caller every 60 seconds + ttl = math_min(ttl, 60) + + if ttl > 0 then + log(DEBUG, PREFIX, "start stale update task ", key, " ttl:", ttl) + + -- mlcache's internal lock mechanism ensures concurrent control + start_stale_update_task(self, key, name, qtype) + answers.ttl = ttl + return answers, nil, ttl end end - -- not found in the search iteration - return nil, err, tries + if cache_only then + return CACHE_ONLY_ANSWERS, nil, -1 + end + + return resolve_query_types(self, name, qtype, tries) end local function resolve_all(self, name, qtype, cache_only, tries, has_timing) name = string_lower(name) - tries = setmetatable(tries or {}, TRIES_MT) - - -- key like "short:example.com:all" or "short:example.com:5" - local key = "short:" .. name .. ":" .. (qtype or "all") - - stats_init_name(self.stats, name) - stats_increment(self.stats, name, "runs") - -- quickly lookup with the key "short::all" or "short::" - local answers, err, hit_level = self.cache:get(key) - if not answers then - log(DEBUG, "quickly cache lookup ", key, " ans:- hlvl:", hit_level or "-") + tries = setmetatable(tries or {}, TRIES_MT) - answers, err, tries = resolve_names_and_types(self, name, qtype, cache_only, - key, tries, has_timing) + -- key like "example.com:" + local key = name .. ":" .. (qtype or "all") + log(DEBUG, PREFIX, "resolve_all ", key) - if not cache_only and answers then - -- If another worker resolved the name between these two `:get`, it can - -- work as expected and will not introduce a race condition. + stats_init_name(self.stats, key) + stats_increment(self.stats, key, "runs") - -- insert via the `:get` callback to prevent inter-process communication - self.cache:get(key, nil, function() - return answers, nil, answers.ttl - end) - end + local answers, err, hit_level = self.cache:get(key, nil, resolve_callback, + self, name, qtype, cache_only, + tries) + -- check for runtime errors in the callback + if err and err:sub(1, 8) == "callback" then + log(ALERT, PREFIX, err) + end - stats_increment(self.stats, name, answers and "miss" or "fail") + local hit_str = hit_level and HIT_LEVEL_TO_NAME[hit_level] or "fail" + stats_increment(self.stats, key, hit_str) - else - log(DEBUG, "quickly cache lookup ", key, " ans:", #answers, - " hlv:", hit_level or "-") + log(DEBUG, PREFIX, "cache lookup ", key, " ans:", answers and #answers or "-", + " hlv:", hit_str) - if has_timing then - req_dyn_hook_run_hook("timing", "dns:cache_lookup", - (hit_level and hit_level < HIT_L3)) - end + if has_timing then + req_dyn_hook_run_hook("timing", "dns:cache_lookup", + (hit_level and hit_level < HIT_L3)) + end - stats_increment(self.stats, name, HIT_LEVEL_TO_NAME[hit_level]) + if answers and answers.errcode then + err = ("dns %s error: %s %s"):format( + answers.errcode < CACHE_ONLY_ERROR_CODE and "server" or "client", + answers.errcode, answers.errstr) + return nil, err, tries end return answers, err, tries @@ -667,30 +600,27 @@ function _M:resolve(name, qtype, cache_only, tries) end --- Implement `resolve_address` separately as `_resolve_address` with the --- `has_timing` parameter so that it avoids checking for `ngx.ctx.has_timing` --- in recursion. -local function _resolve_address(self, name, port, cache_only, tries, has_timing) +function _M:resolve_address(name, port, cache_only, tries) + local has_timing = ngx.ctx and ngx.ctx.has_timing + local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, has_timing) if not answers then return nil, err, tries end - if answers[1].type == TYPE_SRV then + if answers and answers[1].type == TYPE_SRV then local answer = get_next_weighted_round_robin_answer(answers) port = (answer.port ~= 0 and answer.port) or port - return _resolve_address(self, answer.target, port, cache_only, tries, - has_timing) + answers, err, tries = resolve_all(self, answer.target, TYPE_A_AAAA, + cache_only, tries, has_timing) end - return get_next_round_robin_answer(answers).address, port, tries -end - + if not answers then + return nil, err, tries + end -function _M:resolve_address(name, port, cache_only, tries) - return _resolve_address(self, name, port, cache_only, tries, - ngx.ctx and ngx.ctx.has_timing) + return get_next_round_robin_answer(answers).address, port, tries end @@ -699,7 +629,7 @@ end local dns_client function _M.init(opts) - log(DEBUG, "(re)configuring dns client") + log(DEBUG, PREFIX, "(re)configuring dns client") if opts then opts.valid_ttl = opts.valid_ttl or opts.validTtl @@ -731,7 +661,7 @@ function _M.toip(name, port, cache_only, tries) end --- for example, "example.com:33" -> "example.com:SRV" +-- "_ldap._tcp.example.com:33" -> "_ldap._tcp.example.com:SRV" local function format_key(key) local qname, qtype = key:match("([^:]+):(%d+)") -- match "(qname):(qtype)" return qtype and qname .. ":" .. (TYPE_TO_NAME[tonumber(qtype)] or qtype) @@ -768,14 +698,6 @@ if package.loaded.busted then cache = dns_client.cache, } end - - function _M:_insert_last_type(name, qtype) -- export as different name! - insert_last_type(self.cache, name, qtype) - end - - function _M:_get_last_type(name) -- export as different name! - return get_last_type(self.cache, name) - end end diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 2491fc359b57..999dcd3f5f95 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -1,6 +1,7 @@ local utils = require("kong.resty.dns.utils") local log = ngx.log + local NOTICE = ngx.NOTICE local type = type @@ -171,6 +172,11 @@ function _M.is_fqdn(name, ndots) end +function _M.is_srv(name) + return name:sub(1, 1) == "_" and name:find("%._") ~= nil +end + + -- construct names from resolv options: search, ndots and domain function _M.search_names(name, resolv, hosts) if not resolv.search or _M.is_fqdn(name, resolv.ndots) or diff --git a/spec/01-unit/09-balancer/04-round_robin_spec.lua b/spec/01-unit/09-balancer/04-round_robin_spec.lua index 4e045685e810..341ec4fe459b 100644 --- a/spec/01-unit/09-balancer/04-round_robin_spec.lua +++ b/spec/01-unit/09-balancer/04-round_robin_spec.lua @@ -19,6 +19,7 @@ local sleep = helpers.sleep local dnsSRV = function(...) return helpers.dnsSRV(client, ...) end local dnsA = function(...) return helpers.dnsA(client, ...) end local dnsAAAA = function(...) return helpers.dnsAAAA(client, ...) end +local dnsExpire = helpers.dnsExpire local unset_register = {} @@ -1042,6 +1043,7 @@ describe("[round robin balancer]", function() -- expire the existing record record.expire = 0 record.expired = true + dnsExpire(client, record) sleep(0.2) -- wait for record expiration -- do a lookup to trigger the async lookup client.resolve("really.really.really.does.not.exist.hostname.test", {qtype = client.TYPE_A}) diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua index ba8e663a8370..37c8fcdd7d9e 100644 --- a/spec/01-unit/30-new-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -9,7 +9,7 @@ describe("[utils]", function () it("test @name: end with `.`", function () assert.is_true(utils.is_fqdn("www.", 2)) assert.is_true(utils.is_fqdn("www.example.", 3)) - assert.is_true(utils.is_fqdn("www.example.com.", 4)) + assert.is_true(utils.is_fqdn("www.example.test.", 4)) end) it("test @ndots", function () @@ -17,28 +17,28 @@ describe("[utils]", function () assert.is_false(utils.is_fqdn("www", 1)) assert.is_true(utils.is_fqdn("www.example", 1)) - assert.is_true(utils.is_fqdn("www.example.com", 1)) + assert.is_true(utils.is_fqdn("www.example.test", 1)) assert.is_false(utils.is_fqdn("www", 2)) assert.is_false(utils.is_fqdn("www.example", 2)) - assert.is_true(utils.is_fqdn("www.example.com", 2)) - assert.is_true(utils.is_fqdn("www1.www2.example.com", 2)) + assert.is_true(utils.is_fqdn("www.example.test", 2)) + assert.is_true(utils.is_fqdn("www1.www2.example.test", 2)) end) end) describe("search_names()", function () it("empty resolv, not apply the search list", function () local resolv = {} - local names = utils.search_names("www.example.com", resolv) - assert.same(names, { "www.example.com" }) + local names = utils.search_names("www.example.test", resolv) + assert.same(names, { "www.example.test" }) end) it("FQDN name: end with `.`, not apply the search list", function () - local names = utils.search_names("www.example.com.", { ndots = 1 }) - assert.same(names, { "www.example.com." }) + local names = utils.search_names("www.example.test.", { ndots = 1 }) + assert.same(names, { "www.example.test." }) -- name with 3 dots, and ndots=4 > 3 - local names = utils.search_names("www.example.com.", { ndots = 4 }) - assert.same(names, { "www.example.com." }) + local names = utils.search_names("www.example.test.", { ndots = 4 }) + assert.same(names, { "www.example.test." }) end) it("dots number in the name >= ndots, not apply the search list", function () @@ -46,11 +46,11 @@ describe("[utils]", function () ndots = 1, search = { "example.net" }, } - local names = utils.search_names("www.example.com", resolv) - assert.same(names, { "www.example.com" }) + local names = utils.search_names("www.example.test", resolv) + assert.same(names, { "www.example.test" }) - local names = utils.search_names("example.com", resolv) - assert.same(names, { "example.com" }) + local names = utils.search_names("example.test", resolv) + assert.same(names, { "example.test" }) end) it("dots number in the name < ndots, apply the search list", function () @@ -69,13 +69,13 @@ describe("[utils]", function () local resolv = { ndots = 2, - search = { "example.net", "example.com" }, + search = { "example.net", "example.test" }, } local names = utils.search_names("www", resolv) - assert.same(names, { "www.example.net", "www.example.com", "www" }) + assert.same(names, { "www.example.net", "www.example.test", "www" }) local names = utils.search_names("www1.www2", resolv) - assert.same(names, { "www1.www2.example.net", "www1.www2.example.com", "www1.www2" }) + assert.same(names, { "www1.www2.example.net", "www1.www2.example.test", "www1.www2" }) local names = utils.search_names("www1.www2.www3", resolv) assert.same(names, { "www1.www2.www3" }) -- not apply @@ -95,7 +95,7 @@ describe("[utils]", function () end) it("host name", function () - assert.equal(utils.ipv6_bracket("example.com"), "example.com") + assert.equal(utils.ipv6_bracket("example.test"), "example.test") end) end) @@ -235,7 +235,7 @@ describe("[utils]", function () [[# this is just a comment line # at the top of the file -domain myservice.com +domain myservice.test nameserver 198.51.100.0 nameserver 2001:db8::1 ; and a comment here @@ -243,7 +243,7 @@ nameserver 198.51.100.0:1234 ; this one has a port number (limited systems suppo nameserver 1.2.3.4 ; this one is 4th, so should be ignored # search is commented out, test below for a mutually exclusive one -#search domaina.com domainb.com +#search domaina.test domainb.test sortlist list1 list2 #list3 is not part of it @@ -267,7 +267,7 @@ options use-vc ]]) local resolv, err = utils.parse_resolv_conf(file) assert.is.Nil(err) - assert.is.equal("myservice.com", resolv.domain) + assert.is.equal("myservice.test", resolv.domain) assert.is.same({ "198.51.100.0", "2001:db8::1", "198.51.100.0:1234" }, resolv.nameserver) assert.is.same({ "list1", "list2" }, resolv.sortlist) assert.is.same({ ndots = 2, timeout = 3, attempts = 4, debug = true, rotate = true, @@ -280,16 +280,16 @@ options use-vc it("tests parsing 'resolv.conf' with mutual exclusive domain vs search", function() local file = splitlines( -[[domain myservice.com +[[domain myservice.test # search is overriding domain above -search domaina.com domainb.com +search domaina.test domainb.test ]]) local resolv, err = utils.parse_resolv_conf(file) assert.is.Nil(err) assert.is.Nil(resolv.domain) - assert.is.same({ "domaina.com", "domainb.com" }, resolv.search) + assert.is.same({ "domaina.test", "domainb.test" }, resolv.search) end) it("tests parsing 'resolv.conf' with 'timeout = 0'", function() @@ -302,33 +302,33 @@ search domaina.com domainb.com local file = splitlines( [[ -search domain1.com domain2.com domain3.com domain4.com domain5.com domain6.com domain7.com +search domain1.test domain2.test domain3.test domain4.test domain5.test domain6.test domain7.test ]]) local resolv, err = utils.parse_resolv_conf(file) assert.is.Nil(err) assert.is.Nil(resolv.domain) assert.is.same({ - "domain1.com", - "domain2.com", - "domain3.com", - "domain4.com", - "domain5.com", - "domain6.com", + "domain1.test", + "domain2.test", + "domain3.test", + "domain4.test", + "domain5.test", + "domain6.test", }, resolv.search) end) it("tests parsing 'resolv.conf' with environment variables", function() local file = splitlines( [[# this is just a comment line -domain myservice.com +domain myservice.test nameserver 198.51.100.0 nameserver 198.51.100.1 ; and a comment here options ndots:1 ]]) - envvars.LOCALDOMAIN = "domaina.com domainb.com" + envvars.LOCALDOMAIN = "domaina.test domainb.test" envvars.RES_OPTIONS = "ndots:2 debug" local resolv, err = utils.parse_resolv_conf(file) @@ -336,7 +336,7 @@ options ndots:1 assert.is.Nil(resolv.domain) -- must be nil, mutually exclusive - assert.is.same({ "domaina.com", "domainb.com" }, resolv.search) + assert.is.same({ "domaina.test", "domainb.test" }, resolv.search) assert.is.same({ ndots = 2, debug = true }, resolv.options) end) @@ -344,7 +344,7 @@ options ndots:1 it("tests parsing 'resolv.conf' with non-existing environment variables", function() local file = splitlines( [[# this is just a comment line -domain myservice.com +domain myservice.test nameserver 198.51.100.0 nameserver 198.51.100.1 ; and a comment here @@ -355,7 +355,7 @@ options ndots:2 envvars.RES_OPTIONS = "" local resolv, err = utils.parse_resolv_conf(file) assert.is.Nil(err) - assert.is.equals("myservice.com", resolv.domain) -- must be nil, mutually exclusive + assert.is.equals("myservice.test", resolv.domain) -- must be nil, mutually exclusive assert.is.same({ ndots = 2 }, resolv.options) end) @@ -398,15 +398,15 @@ nameserver [fe80::1%enp0s20f0u1u1] # My test server for the website -192.168.1.2 test.computer.com - 192.168.1.3 ftp.COMPUTER.com alias1 alias2 -192.168.1.4 smtp.computer.com alias3 #alias4 -192.168.1.5 smtp.computer.com alias3 #doubles, first one should win +192.168.1.2 test.computer.test + 192.168.1.3 ftp.COMPUTER.test alias1 alias2 +192.168.1.4 smtp.computer.test alias3 #alias4 +192.168.1.5 smtp.computer.test alias3 #doubles, first one should win #Blocking known malicious sites -127.0.0.1 admin.abcsearch.com -127.0.0.2 www3.abcsearch.com #[Browseraid] -127.0.0.3 www.abcsearch.com wwwsearch #[Restricted Zone site] +127.0.0.1 admin.abcsearch.test +127.0.0.2 www3.abcsearch.test #[Browseraid] +127.0.0.3 www.abcsearch.test wwwsearch #[Restricted Zone site] [::1] alsolocalhost #support IPv6 in brackets ]]) @@ -414,16 +414,16 @@ nameserver [fe80::1%enp0s20f0u1u1] assert.is.equal("127.0.0.1", reverse.localhost.ipv4) assert.is.equal("[::1]", reverse.localhost.ipv6) - assert.is.equal("192.168.1.2", reverse["test.computer.com"].ipv4) + assert.is.equal("192.168.1.2", reverse["test.computer.test"].ipv4) - assert.is.equal("192.168.1.3", reverse["ftp.computer.com"].ipv4) + assert.is.equal("192.168.1.3", reverse["ftp.computer.test"].ipv4) assert.is.equal("192.168.1.3", reverse["alias1"].ipv4) assert.is.equal("192.168.1.3", reverse["alias2"].ipv4) - assert.is.equal("192.168.1.4", reverse["smtp.computer.com"].ipv4) + assert.is.equal("192.168.1.4", reverse["smtp.computer.test"].ipv4) assert.is.equal("192.168.1.4", reverse["alias3"].ipv4) - assert.is.equal("192.168.1.4", reverse["smtp.computer.com"].ipv4) -- .1.4; first one wins! + assert.is.equal("192.168.1.4", reverse["smtp.computer.test"].ipv4) -- .1.4; first one wins! assert.is.equal("192.168.1.4", reverse["alias3"].ipv4) -- .1.4; first one wins! assert.is.equal("[::1]", reverse["alsolocalhost"].ipv6) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 8d2ad46cf0af..47947e62c3e9 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -137,10 +137,10 @@ describe("[DNS client]", function() writefile(hosts_path, "") -- empty hosts local cli = assert(client_new()) - local answers = cli.cache:get("localhost:28") + local answers = cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) assert.equal("[::1]", answers[1].address) - answers = cli.cache:get("localhost:1") + answers = cli:resolve("localhost", { qtype = resolver.TYPE_A}) assert.equal("127.0.0.1", answers[1].address) answers = cli:resolve("localhost") @@ -152,10 +152,12 @@ describe("[DNS client]", function() local cli = assert(client_new()) -- IPv6 is not defined + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) local answers = cli.cache:get("localhost:28") assert.is_nil(answers) -- IPv4 is not overwritten + cli:resolve("localhost", { qtype = resolver.TYPE_A}) answers = cli.cache:get("localhost:1") assert.equal("1.2.3.4", answers[1].address) end) @@ -165,10 +167,12 @@ describe("[DNS client]", function() local cli = assert(client_new()) -- IPv6 is not overwritten + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) local answers = cli.cache:get("localhost:28") assert.equal("[::1:2:3:4]", answers[1].address) -- IPv4 is not defined + cli:resolve("localhost", { qtype = resolver.TYPE_A}) answers = cli.cache:get("localhost:1") assert.is_nil(answers) end) @@ -177,6 +181,7 @@ describe("[DNS client]", function() writefile(hosts_path, "::1:2:3:4 localhost") local cli = assert(client_new()) + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) local answers = cli.cache:get("localhost:28") assert.equal("[::1:2:3:4]", answers[1].address) @@ -189,6 +194,7 @@ describe("[DNS client]", function() answers = cli:resolve("localhost") assert.equal("[::1:2:3:4]", answers[1].address) + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) answers = cli.cache:get("localhost:28") assert.equal("[::1:2:3:4]", answers[1].address) end) @@ -210,7 +216,7 @@ describe("[DNS client]", function() it("works with a 'search' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "search one.com two.com", + "search one.test two.test", "options ndots:1", }) @@ -221,14 +227,14 @@ describe("[DNS client]", function() assert.same(answers, nil) assert.same(err, "dns client error: 101 empty record received") assert.same({ - 'host.one.com:33', - 'host.two.com:33', + 'host.one.test:33', + 'host.two.test:33', 'host:33', - 'host.one.com:1', - 'host.two.com:1', + 'host.one.test:1', + 'host.two.test:1', 'host:1', - 'host.one.com:28', - 'host.two.com:28', + 'host.one.test:28', + 'host.two.test:28', 'host:28', }, list) end) @@ -256,7 +262,7 @@ describe("[DNS client]", function() it("works with a 'domain' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "domain local.domain.com", + "domain local.domain.test", "options ndots:1", }) @@ -267,37 +273,11 @@ describe("[DNS client]", function() assert.same(answers, nil) assert.same(err, "dns client error: 101 empty record received") assert.same({ - 'host.local.domain.com:33', + 'host.local.domain.test:33', 'host:33', - 'host.local.domain.com:1', + 'host.local.domain.test:1', 'host:1', - 'host.local.domain.com:28', - 'host:28', - }, list) - end) - - it("handles last successful type", function() - writefile(resolv_path, { - "nameserver 198.51.100.0", - "search one.com two.com", - "options ndots:1", - }) - - local list = hook_query_func_get_list() - local cli = assert(client_new()) - cli:_insert_last_type("host", resolver.TYPE_CNAME) - - cli:resolve("host") - - assert.same({ - 'host.one.com:33', - 'host.two.com:33', - 'host:33', - 'host.one.com:1', - 'host.two.com:1', - 'host:1', - 'host.one.com:28', - 'host.two.com:28', + 'host.local.domain.test:28', 'host:28', }, list) end) @@ -307,7 +287,7 @@ describe("[DNS client]", function() it("works with a 'search' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "search one.com two.com", + "search one.test two.test", "options ndots:1", }) @@ -343,7 +323,7 @@ describe("[DNS client]", function() it("works with a 'domain' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "domain local.domain.com", + "domain local.domain.test", "options ndots:1", }) @@ -357,33 +337,13 @@ describe("[DNS client]", function() 'host.:28', }, list) end) - - it("handles last successful type", function() - writefile(resolv_path, { - "nameserver 198.51.100.0", - "search one.com two.com", - "options ndots:1", - }) - - local list = hook_query_func_get_list() - local cli = assert(client_new()) - cli:_insert_last_type("host.", resolver.TYPE_CNAME) - - cli:resolve("host.") - assert.same({ - 'host.:33', - 'host.:1', - 'host.:28', - }, list) - end) - end) describe("with type", function() it("works with a 'search' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "search one.com two.com", + "search one.test two.test", "options ndots:1", }) @@ -392,8 +352,8 @@ describe("[DNS client]", function() cli:resolve("host") assert.same({ - 'host.one.com:28', - 'host.two.com:28', + 'host.one.test:28', + 'host.two.test:28', 'host:28', }, list) end) @@ -401,7 +361,7 @@ describe("[DNS client]", function() it("works with a 'domain' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "domain local.domain.com", + "domain local.domain.test", "options ndots:1", }) @@ -410,37 +370,17 @@ describe("[DNS client]", function() cli:resolve("host") assert.same({ - 'host.local.domain.com:28', + 'host.local.domain.test:28', 'host:28', }, list) end) - - it("ignores last successful type", function() - writefile(resolv_path, { - "nameserver 198.51.100.0", - "search one.com two.com", - "options ndots:1", - }) - - local list = hook_query_func_get_list() - local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type - cli:_insert_last_type("host", resolver.TYPE_CNAME) - - cli:resolve("host") - assert.same({ - 'host.one.com:28', - 'host.two.com:28', - 'host:28', - }, list) - end) - end) describe("FQDN with type", function() it("works with a 'search' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "search one.com two.com", + "search one.test two.test", "options ndots:1", }) @@ -455,7 +395,7 @@ describe("[DNS client]", function() it("works with a 'domain' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "domain local.domain.com", + "domain local.domain.test", "options ndots:1", }) @@ -467,30 +407,12 @@ describe("[DNS client]", function() 'host.:28', }, list) end) - - it("ignores last successful type", function() - writefile(resolv_path, { - "nameserver 198.51.100.0", - "search one.com two.com", - "options ndots:1", - }) - - local list = hook_query_func_get_list() - local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type - cli:_insert_last_type("host", resolver.TYPE_CNAME) - - cli:resolve("host.") - - assert.same({ - 'host.:28', - }, list) - end) end) it("honours 'ndots'", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "search one.com two.com", + "search one.test two.test", "options ndots:1", }) @@ -508,7 +430,7 @@ describe("[DNS client]", function() it("hosts file always resolves first, overriding `ndots`", function() writefile(resolv_path, { "nameserver 198.51.100.0", - "search one.com two.com", + "search one.test two.test", "options ndots:1", }) writefile(hosts_path, { @@ -524,10 +446,12 @@ describe("[DNS client]", function() assert.same({}, list) -- hit on cache, so no query to the nameserver -- perferred IP type: IPv6 (AAAA takes priority in order) + --[[ local cli = assert(client_new({ order = { "LAST", "SRV", "AAAA", "A" } })) local answers = cli:resolve("host") assert.same(answers[1].address, "[::1]") assert.same({}, list) + ]] end) end) @@ -541,7 +465,7 @@ describe("[DNS client]", function() local orig_log = ngx.log _G.ngx.log = function (...) end -- mute ALERT log - local answers, err = cli:resolve("srv.timeout.com") + local answers, err = cli:resolve("srv.timeout.test") _G.ngx.log = orig_log assert.is_nil(answers) assert.match("callback threw an error:.*CALLBACK", err) @@ -549,7 +473,7 @@ describe("[DNS client]", function() describe("timeout", function () it("dont try other types with the low-level error", function() - -- KAG-2300 https://github.com/Kong/kong/issues/10182 + -- KAG-2300 https://github.test/Kong/kong/issues/10182 -- When timed out, don't keep trying with other answers types. writefile(resolv_path, { "nameserver 198.51.100.0", @@ -574,14 +498,14 @@ describe("[DNS client]", function() assert.same(cli.r_opts.retrans, 3) assert.same(cli.r_opts.timeout, 1) - local answers, err = cli:resolve("srv.timeout.com") + local answers, err = cli:resolve("timeout.test") assert.is_nil(answers) - assert.match("DNS server error: failed to receive reply from UDP server .*: timeout, Query Time: %d+%.%d+ 0.%d+", err) + assert.match("DNS server error: failed to receive reply from UDP server .*: timeout, took %d+ ms", err) assert.same(receive_count, 3) assert.same(query_count, 1) end) - -- KAG-2300 - https://github.com/Kong/kong/issues/10182 + -- KAG-2300 - https://github.test/Kong/kong/issues/10182 -- If we encounter a timeout while talking to the DNS server, -- expect the total timeout to be close to timeout * attemps parameters for _, attempts in ipairs({1, 2}) do @@ -601,7 +525,7 @@ describe("[DNS client]", function() assert.same(cli.r_opts.timeout, timeout) local start_time = ngx.now() - local answers = cli:resolve("timeout.com") + local answers = cli:resolve("timeout.test") assert.is.Nil(answers) assert.is("DNS server error: timeout" .. timeout .. attempts) local duration = ngx.now() - start_time @@ -663,11 +587,11 @@ describe("[DNS client]", function() local answers2 = assert(cli:resolve(host)) assert.are.equal(answers, answers2) -- same table from L1 cache - local ttl, _, value = cli.cache:peek("short:" .. host .. ":all") + local ttl, _, value = cli.cache:peek(host .. ":all") assert.same(answers, value) local ttl_diff = answers.ttl - ttl assert(math.abs(ttl_diff - wait_time) < 1, - ("ttl diff:%s s should be near to %s s"):format(ttl_diff, wait_time)) + ("ttl diff:%s s should be near to %s s"):format(ttl_diff, wait_time)) end) it("fetching names case insensitive", function() @@ -725,7 +649,7 @@ describe("[DNS client]", function() end assert.same({ - ["smtp.kong-gateway-testing.link"] = { + ["smtp.kong-gateway-testing.link:all"] = { miss = 1, runs = 1 }, @@ -737,10 +661,6 @@ describe("[DNS client]", function() query = 1, ["query_fail:empty record received"] = 1 } }, cli.stats) - - -- check last successful lookup references - local lastsuccess = cli:_get_last_type(host) - assert.are.equal(resolver.TYPE_A, lastsuccess) end) it("fetching multiple SRV answerss (un-typed)", function() @@ -778,7 +698,7 @@ describe("[DNS client]", function() end assert.same({ - ["cname2srv.kong-gateway-testing.link"] = { + ["cname2srv.kong-gateway-testing.link:all"] = { miss = 1, runs = 1, }, @@ -838,7 +758,7 @@ describe("[DNS client]", function() end) it("fetching IPv6 in an SRV answers adds brackets",function() - local host = "hello.world" + local host = "hello.world.test" local address = "::1" local entry = {{ type = resolver.TYPE_SRV, @@ -872,13 +792,13 @@ describe("[DNS client]", function() resolv_conf = "/etc/resolv.conf", order = {"SRV", "A", "AAAA"} })) - assert.equal(resolver.TYPE_A, cli:_get_last_type("localhost")) -- success set to A as it is the preferred option + assert(cli) local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"SRV", "AAAA", "A"} })) - assert.equal(resolver.TYPE_AAAA, cli:_get_last_type("localhost")) -- success set to AAAA as it is the preferred option + assert(cli) end) @@ -931,6 +851,7 @@ describe("[DNS client]", function() assert.same(order[n], ip) end end) + it("SRV-answers, round-robin on lowest prio",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) local host = "hello.world.test" @@ -967,11 +888,11 @@ describe("[DNS client]", function() }, } -- insert in the cache - cli.cache:set(entry[1].name .. ":" .. entry[1].type, {ttl=0}, entry) + cli.cache:set(entry[1].name .. ":all", {ttl=0}, entry) local results = {} for _ = 1,20 do - local _, port = cli:resolve(host, { return_random = true }) + local _, port = cli:resolve_address(host) results[port] = (results[port] or 0) + 1 end @@ -980,9 +901,10 @@ describe("[DNS client]", function() assert.equal(10, results[8000] or 0) --priority 10, 50% of hits assert.equal(10, results[8002] or 0) --priority 10, 50% of hits end) + it("SRV-answers with 1 entry, round-robin",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - local host = "hello.world" + local host = "hello.world.test" local entry = {{ type = resolver.TYPE_SRV, target = "1.2.3.4", @@ -994,20 +916,21 @@ describe("[DNS client]", function() ttl = 10, }} -- insert in the cache - cli.cache:set(entry[1].name .. ":" .. entry[1].type, { ttl=0 }, entry) + cli.cache:set(entry[1].name .. ":all", { ttl=0 }, entry) -- repeated lookups, as the first will simply serve the first entry -- and the only second will setup the round-robin scheme, this is -- specific for the SRV answers type, due to the weights for _ = 1 , 10 do - local ip, port = cli:resolve(host, { return_random = true }) + local ip, port = cli:resolve_address(host) assert.same("1.2.3.4", ip) assert.same(321, port) end end) + it("SRV-answers with 0-weight, round-robin",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) - local host = "hello.world" + local host = "hello.world.test" local entry = { { type = resolver.TYPE_SRV, @@ -1041,19 +964,20 @@ describe("[DNS client]", function() }, } -- insert in the cache - cli.cache:set(entry[1].name .. ":" .. entry[1].type, { ttl = 0 }, entry) + cli.cache:set(entry[1].name .. ":all", { ttl=0 }, entry) -- weight 0 will be weight 1, without any reduction in weight -- of the other ones. local track = {} for _ = 1 , 2002 do --> run around twice - local ip, _ = assert(cli:resolve(host, { return_random = true })) + local ip, _ = assert(cli:resolve_address(host)) track[ip] = (track[ip] or 0) + 1 end assert.equal(1000, track["1.2.3.5"]) assert.equal(1000, track["1.2.3.6"]) assert.equal(2, track["1.2.3.4"]) end) + it("port passing",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local entry_a = {{ @@ -1074,24 +998,25 @@ describe("[DNS client]", function() ttl = 10, }} -- insert in the cache - cli.cache:set(entry_a[1].name..":"..entry_a[1].type, { ttl = 0 }, entry_a) - cli.cache:set(entry_srv[1].name..":"..entry_srv[1].type, { ttl = 0 }, entry_srv) + cli.cache:set(entry_a[1].name..":-1", { ttl = 0 }, entry_a) + cli.cache:set(entry_a[1].name..":all", { ttl = 0 }, entry_a) + cli.cache:set(entry_srv[1].name..":all", { ttl = 0 }, entry_srv) local ip, port local host = "a.answers.test" - ip,port = cli:resolve(host, { return_random = true }) + ip, port = cli:resolve_address(host) assert.is_string(ip) assert.is_nil(port) - ip, port = cli:resolve(host, { return_random = true, port = 1234 }) + ip, port = cli:resolve_address(host, 1234) assert.is_string(ip) assert.equal(1234, port) host = "srv.answers.test" - ip, port = cli:resolve(host, { return_random = true }) + ip, port = cli:resolve_address(host) assert.is_string(ip) assert.is_number(port) - ip, port = cli:resolve(host, { return_random = true, port = 0 }) + ip, port = cli:resolve_address(host, 0) assert.is_string(ip) assert.is_number(port) assert.is_not.equal(0, port) @@ -1102,32 +1027,13 @@ describe("[DNS client]", function() local ip, port, host host = "srvport0."..TEST_DOMAIN - ip, port = cli:resolve(host, { return_random = true, port = 10 }) + ip, port = cli:resolve_address(host, 10) assert.is_string(ip) assert.is_number(port) assert.is_equal(10, port) - ip, port = cli:resolve(host, { return_random = true }) - assert.is_string(ip) - assert.is_nil(port) - end) - - it("recursive SRV pointing to itself",function() - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) - local ip, answers, port, host, err, _ - host = "srvrecurse."..TEST_DOMAIN - - -- resolve SRV specific should _not_ return the answers including its - -- recursive entry - answers, err, _ = cli:resolve(host, { qtype = resolver.TYPE_SRV }) - assert.same(answers, nil) - assert.same(err, "dns client error: 101 empty record received") - - -- default order, SRV, A; the recursive SRV answers fails, and it falls - -- back to the IP4 address - ip, port, _ = cli:resolve(host, { return_random = true }) + ip, port = cli:resolve_address(host) assert.is_string(ip) - assert.is_equal("10.0.0.44", ip) assert.is_nil(port) end) @@ -1138,41 +1044,41 @@ describe("[DNS client]", function() type = resolver.TYPE_A, address = "5.6.7.8", class = 1, - name = "hello.world", + name = "hello.world.test", ttl = 10, }} local AAAA_entry = {{ type = resolver.TYPE_AAAA, address = "::1", class = 1, - name = "hello.world", + name = "hello.world.test", ttl = 10, }} -- insert in the cache - cli.cache:set(A_entry[1].name..":"..A_entry[1].type, { ttl=0 }, A_entry) - cli.cache:set(AAAA_entry[1].name..":"..AAAA_entry[1].type, { ttl=0 }, AAAA_entry) + cli.cache:set(A_entry[1].name..":all", { ttl=0 }, A_entry) + cli.cache:set(AAAA_entry[1].name..":all", { ttl=0 }, AAAA_entry) end + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"AAAA", "A"} })) config(cli) - local ip,err = cli:resolve("hello.world", { return_random = true }) + local ip, err = cli:resolve_address("hello.world.test") assert.same(err, nil) assert.equals(ip, "::1") + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"A", "AAAA"} })) config(cli) - ip = cli:resolve("hello.world", { return_random = true }) - assert.equals(ip, "5.6.7.8") + ip = cli:resolve_address("hello.world.test") + --assert.equals(ip, "5.6.7.8") + assert.equals(ip, "::1") end) + it("handling of empty responses", function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - local empty_entry = { - touch = 0, - expire = 0, - } - -- insert in the cache - cli.cache[resolver.TYPE_A..":".."hello.world"] = empty_entry + -- insert empty records into cache + cli.cache:set("hello.world.test:all", { ttl=0 }, { errcode = 3 }) -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table - local ip, port = cli:resolve("hello.world", { return_random = true, port = 123, cache_only = true }) + local ip, port = cli:resolve_address("hello.world.test", 123, true) assert.is_nil(ip) assert.is.string(port) -- error message end) @@ -1182,7 +1088,7 @@ describe("[DNS client]", function() local valid_ttl = 0.1 local empty_ttl = 0.1 local stale_ttl = 0.1 - local qname = "konghq.com" + local qname = "konghq.test" local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", empty_ttl = empty_ttl, @@ -1203,7 +1109,7 @@ describe("[DNS client]", function() local answers, _, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.equal(valid_ttl, answers.ttl) - local ttl = cli.cache:peek("short:" .. qname .. ":1") + local ttl = cli.cache:peek(qname .. ":1") assert.is_near(valid_ttl, ttl, 0.1) end) @@ -1269,7 +1175,7 @@ describe("[DNS client]", function() --empty responses should be cached for a configurable time local error_ttl = 0.1 local stale_ttl = 0.1 - local qname = "realname.com" + local qname = "realname.test" local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", error_ttl = error_ttl, @@ -1401,3 +1307,6 @@ describe("[DNS client]", function() end) end) + +-- TODO +-- resolver.new set pper name hostname deadloop ? diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index a0578a6ca141..0a28fc2ed9be 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -117,11 +117,11 @@ describe("[DNS client cache]", function() local cli, mock_records, config before_each(function() - writefile(resolv_path, "search domain.com") + writefile(resolv_path, "search domain.test") config = { nameservers = { "198.51.100.0" }, ndots = 1, - search = { "domain.com" }, + search = { "domain.test" }, hosts = {}, order = { "LAST", "SRV", "A", "AAAA" }, error_ttl = 0.5, @@ -137,41 +137,41 @@ describe("[DNS client cache]", function() it("are stored in cache without type", function() mock_records = { - ["myhost1.domain.com:"..resolver.TYPE_A] = {{ + ["myhost1.domain.test:"..resolver.TYPE_A] = {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost1.domain.com", + name = "myhost1.domain.test", ttl = 30, }} } local answers = cli:resolve("myhost1") - assert.equal(answers, cli.cache:get("short:myhost1:all")) + assert.equal(answers, cli.cache:get("myhost1:all")) end) it("are stored in cache with type", function() mock_records = { - ["myhost2.domain.com:"..resolver.TYPE_A] = {{ + ["myhost2.domain.test:"..resolver.TYPE_A] = {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost2.domain.com", + name = "myhost2.domain.test", ttl = 30, }} } local answers = cli:resolve("myhost2", { qtype = resolver.TYPE_A }) - assert.equal(answers, cli.cache:get("short:myhost2:" .. resolver.TYPE_A)) + assert.equal(answers, cli.cache:get("myhost2:" .. resolver.TYPE_A)) end) it("are resolved from cache without type", function() mock_records = {} - cli.cache:set("short:myhost3:all", {ttl=30+4}, {{ + cli.cache:set("myhost3:all", {ttl=30+4}, {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost3.domain.com", + name = "myhost3.domain.test", ttl = 30, }, ttl = 30, @@ -179,17 +179,17 @@ describe("[DNS client cache]", function() }) local answers = cli:resolve("myhost3") - assert.same(answers, cli.cache:get("short:myhost3:all")) + assert.same(answers, cli.cache:get("myhost3:all")) end) it("are resolved from cache with type", function() mock_records = {} local cli = client_new() - cli.cache:set("short:myhost4:" .. resolver.TYPE_A, {ttl=30+4}, {{ + cli.cache:set("myhost4:" .. resolver.TYPE_A, {ttl=30+4}, {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost4.domain.com", + name = "myhost4.domain.test", ttl = 30, }, ttl = 30, @@ -197,7 +197,7 @@ describe("[DNS client cache]", function() }) local answers = cli:resolve("myhost4", { qtype = resolver.TYPE_A }) - assert.equal(answers, cli.cache:get("short:myhost4:" .. resolver.TYPE_A)) + assert.equal(answers, cli.cache:get("myhost4:" .. resolver.TYPE_A)) end) it("ttl in cache is honored for short name entries", function() @@ -205,11 +205,11 @@ describe("[DNS client cache]", function() -- in the short name case the same record is inserted again in the cache -- and the lru-ttl has to be calculated, make sure it is correct mock_records = { - ["myhost6.domain.com:"..resolver.TYPE_A] = {{ + ["myhost6.domain.test:"..resolver.TYPE_A] = {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost6.domain.com", + name = "myhost6.domain.test", ttl = ttl, }} } @@ -217,7 +217,7 @@ describe("[DNS client cache]", function() -- resolve and check whether we got the mocked record local answers = cli:resolve("myhost6") - assert_same_answers(answers, mock_records["myhost6.domain.com:"..resolver.TYPE_A]) + assert_same_answers(answers, mock_records["myhost6.domain.test:"..resolver.TYPE_A]) -- replace our mocked list with the copy made (new table, so no equality) mock_records = mock_copy @@ -226,7 +226,7 @@ describe("[DNS client cache]", function() sleep(ttl + config.stale_ttl / 2) -- fresh result, but it should not affect answers2 - mock_records["myhost6.domain.com:"..resolver.TYPE_A][1].tag = "new" + mock_records["myhost6.domain.test:"..resolver.TYPE_A][1].tag = "new" -- resolve again, now getting same record, but stale, this will trigger -- background refresh query @@ -246,7 +246,7 @@ describe("[DNS client cache]", function() assert.equal(answers3[1].tag, "new") assert.falsy(answers3.expired) assert.not_equal(answers, answers3) -- must be a different record now - assert_same_answers(answers3, mock_records["myhost6.domain.com:"..resolver.TYPE_A]) + assert_same_answers(answers3, mock_records["myhost6.domain.test:"..resolver.TYPE_A]) -- the 'answers3' resolve call above will also trigger a new background query -- (because the sleep of 0.1 equals the records ttl of 0.1) @@ -262,7 +262,7 @@ describe("[DNS client cache]", function() errstr = "server failure", } mock_records = { - ["myhost7.domain.com:"..resolver.TYPE_A] = rec, + ["myhost7.domain.test:"..resolver.TYPE_A] = rec, ["myhost7:"..resolver.TYPE_A] = rec, } @@ -278,7 +278,7 @@ describe("[DNS client cache]", function() errstr = "name error", } mock_records = { - ["myhost8.domain.com:"..resolver.TYPE_A] = rec, + ["myhost8.domain.test:"..resolver.TYPE_A] = rec, ["myhost8:"..resolver.TYPE_A] = rec, } @@ -295,11 +295,11 @@ describe("[DNS client cache]", function() local cli, mock_records, config before_each(function() - writefile(resolv_path, "search domain.com") + writefile(resolv_path, "search domain.test") config = { nameservers = { "198.51.100.0" }, ndots = 1, - search = { "domain.com" }, + search = { "domain.test" }, hosts = {}, resolvConf = {}, order = { "LAST", "SRV", "A", "AAAA" }, @@ -319,18 +319,18 @@ describe("[DNS client cache]", function() type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost9.domain.com", + name = "myhost9.domain.test", ttl = 0.1, }} mock_records = { - ["myhost9.domain.com:"..resolver.TYPE_A] = rec1, + ["myhost9.domain.test:"..resolver.TYPE_A] = rec1, } local answers, err = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) assert.is_nil(err) -- check that the cache is properly populated assert_same_answers(rec1, answers) - answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) assert_same_answers(rec1, answers) sleep(0.15) -- make sure we surpass the ttl of 0.1 of the record, so it is now stale. @@ -340,7 +340,7 @@ describe("[DNS client cache]", function() errstr = "server failure", } mock_records = { - ["myhost9.domain.com:"..resolver.TYPE_A] = rec2, + ["myhost9.domain.test:"..resolver.TYPE_A] = rec2, ["myhost9:"..resolver.TYPE_A] = rec2, } -- doing a resolve will trigger the background query now @@ -351,7 +351,7 @@ describe("[DNS client cache]", function() -- background resolve is now complete, check the cache, it should still have the -- stale record, and it should not have been replaced by the error -- - answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) assert.is_true(answers.expired) answers.expired = nil assert_same_answers(rec1, answers) @@ -362,18 +362,18 @@ describe("[DNS client cache]", function() type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost9.domain.com", + name = "myhost9.domain.test", ttl = 0.1, }} mock_records = { - ["myhost9.domain.com:"..resolver.TYPE_A] = rec1, + ["myhost9.domain.test:"..resolver.TYPE_A] = rec1, } local answers, err = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) assert.is_nil(err) -- check that the cache is properly populated assert_same_answers(rec1, answers) - answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) assert_same_answers(rec1, answers) sleep(0.15) -- make sure we surpass the ttl of 0.1 of the record, so it is now stale. @@ -383,7 +383,7 @@ describe("[DNS client cache]", function() errstr = "name error", } mock_records = { - ["myhost9.domain.com:"..resolver.TYPE_A] = rec2, + ["myhost9.domain.test:"..resolver.TYPE_A] = rec2, ["myhost9:"..resolver.TYPE_A] = rec2, } -- doing a resolve will trigger the background query now @@ -393,7 +393,7 @@ describe("[DNS client cache]", function() sleep(0.1) -- background resolve is now complete, check the cache, it should now have been -- replaced by the name error - assert.equal(rec2, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) + assert.equal(rec2, cli.cache:get("myhost9:" .. resolver.TYPE_A)) end) it("empty records do not replace stale records", function() @@ -401,23 +401,23 @@ describe("[DNS client cache]", function() type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myhost9.domain.com", + name = "myhost9.domain.test", ttl = 0.1, }} mock_records = { - ["myhost9.domain.com:"..resolver.TYPE_A] = rec1, + ["myhost9.domain.test:"..resolver.TYPE_A] = rec1, } local answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) -- check that the cache is properly populated assert_same_answers(rec1, answers) - assert_same_answers(rec1, cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A)) + assert_same_answers(rec1, cli.cache:get("myhost9:" .. resolver.TYPE_A)) sleep(0.15) -- stale -- clear mock records, such that we return name errors instead of records local rec2 = {} mock_records = { - ["myhost9.domain.com:"..resolver.TYPE_A] = rec2, + ["myhost9.domain.test:"..resolver.TYPE_A] = rec2, ["myhost9:"..resolver.TYPE_A] = rec2, } -- doing a resolve will trigger the background query now @@ -427,7 +427,7 @@ describe("[DNS client cache]", function() sleep(0.1) -- background resolve is now complete, check the cache, it should still have the -- stale record, and it should not have been replaced by the empty record - answers = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_A) + answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) assert.is_true(answers.expired) -- we get the stale record, now marked as expired answers.expired = nil assert_same_answers(rec1, answers) @@ -440,22 +440,22 @@ describe("[DNS client cache]", function() -- (additional section), but then they must be stored obviously. local CNAME1 = { type = resolver.TYPE_CNAME, - cname = "myotherhost.domain.com", + cname = "myotherhost.domain.test", class = 1, - name = "myhost9.domain.com", + name = "myhost9.domain.test", ttl = 0.1, } local A2 = { type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "myotherhost.domain.com", + name = "myotherhost.domain.test", ttl = 60, } mock_records = setmetatable({ - ["myhost9.domain.com:"..resolver.TYPE_CNAME] = { cycle_aware_deep_copy(CNAME1) }, -- copy to make it different - ["myhost9.domain.com:"..resolver.TYPE_A] = { CNAME1, A2 }, -- not there, just a reference and target - ["myotherhost.domain.com:"..resolver.TYPE_A] = { A2 }, + ["myhost9.domain.test:"..resolver.TYPE_CNAME] = { cycle_aware_deep_copy(CNAME1) }, -- copy to make it different + ["myhost9.domain.test:"..resolver.TYPE_A] = { CNAME1, A2 }, -- not there, just a reference and target + ["myotherhost.domain.test:"..resolver.TYPE_A] = { A2 }, }, { -- do not do lookups, return empty on anything else __index = function(self, key) @@ -468,145 +468,16 @@ describe("[DNS client cache]", function() ngx.sleep(0.2) -- wait for it to become stale assert(cli:resolve("myhost9"), { return_random = true }) - local cached = cli.cache:get("myhost9.domain.com:" .. resolver.TYPE_CNAME) + local cached = cli.cache:get("myhost9.domain.test:" .. resolver.TYPE_CNAME) assert.same(nil, cached) end) end) --- ============================================== --- success type caching --- ============================================== - - - describe("success types", function() - - local cli - local mock_records - before_each(function() - writefile(resolv_path, "search domain.com") - local config = { - ndots = 1, - search = { "domain.com" }, - hosts = {}, - resolvConf = {}, - order = { "LAST", "SRV", "A", "AAAA" }, - error_ttl = 0.5, - stale_ttl = 0.5, - enable_ipv6 = false, - } - cli = assert(client_new(config)) - - query_func = function(self, original_query_func, qname, opts) - return mock_records[qname..":"..opts.qtype] or { errcode = 3, errstr = "name error" } - end - end) - - it("in add. section are not stored for non-listed types", function() - mock_records = { - ["demo.service.consul:" .. resolver.TYPE_SRV] = { - { - type = resolver.TYPE_SRV, - class = 1, - name = "demo.service.consul", - target = "192.168.5.232.node.api_test.consul", - priority = 1, - weight = 1, - port = 32776, - ttl = 0, - }, { - type = resolver.TYPE_TXT, -- Not in the `order` as configured ! - class = 1, - name = "192.168.5.232.node.api_test.consul", - txt = "consul-network-segment=", - ttl = 0, - }, - } - } - cli:resolve("demo.service.consul", { return_random = true }) - local success = cli.cache:get("192.168.5.232.node.api_test.consul") - assert.not_equal(resolver.TYPE_TXT, success) - end) - - it("in add. section are stored for listed types", function() - mock_records = { - ["demo.service.consul:" .. resolver.TYPE_SRV] = { - { - type = resolver.TYPE_SRV, - class = 1, - name = "demo.service.consul", - target = "192.168.5.232.node.api_test.consul", - priority = 1, - weight = 1, - port = 32776, - ttl = 0, - }, { - type = resolver.TYPE_A, -- In configured `order` ! - class = 1, - name = "192.168.5.232.node.api_test.consul", - address = "192.168.5.232", - ttl = 0, - }, { - type = resolver.TYPE_TXT, -- Not in the `order` as configured ! - class = 1, - name = "192.168.5.232.node.api_test.consul", - txt = "consul-network-segment=", - ttl = 0, - }, - } - } - local _, err, tries = cli:resolve("demo.service.consul", { return_random = true }) - assert.same(err, "dns server error: 3 name error") - assert.same({ - { - "192.168.5.232.node.api_test.consul:SRV", - "dns server error: 3 name error", - }, - { - "192.168.5.232.node.api_test.consul:A", - "dns server error: 3 name error", - }, - { - "192.168.5.232.node.api_test.consul:AAAA", - "dns server error: 3 name error", - }, - }, tries) - end) - - it("are not overwritten by add. section info", function() - mock_records = { - ["demo.service.consul:" .. resolver.TYPE_SRV] = { - { - type = resolver.TYPE_SRV, - class = 1, - name = "demo.service.consul", - target = "192.168.5.232.node.api_test.consul", - priority = 1, - weight = 1, - port = 32776, - ttl = 0, - }, { - type = resolver.TYPE_A, -- In configured `order` ! - class = 1, - name = "another.name.consul", - address = "192.168.5.232", - ttl = 0, - }, - } - } - cli:_insert_last_type("another.name.consul", resolver.TYPE_AAAA) - cli:resolve("demo.service.consul", { return_random = true }) - local success = cli:_get_last_type("another.name.consul") - assert.equal(resolver.TYPE_AAAA, success) - end) - - end) - - describe("hosts entries", function() -- hosts file names are cached for 10 years, verify that -- it is not overwritten with valid_ttl settings. - -- Regressions reported in https://github.com/Kong/kong/issues/7444 + -- Regressions reported in https://github.test/Kong/kong/issues/7444 local cli, mock_records, config -- luacheck: ignore writefile(resolv_path, "") writefile(hosts_path, "127.0.0.1 myname.lan") @@ -623,11 +494,12 @@ describe("[DNS client cache]", function() end) it("entries from hosts file ignores valid_ttl overrides, Kong/kong #7444", function() + local record = cli:resolve("myname.lan") + assert.equal("127.0.0.1", record[1].address) ngx.sleep(0.2) -- must be > valid_ttl + stale_ttl - local record = cli.cache:get("myname.lan:1") + record = cli.cache:get("myname.lan:all") assert.equal("127.0.0.1", record[1].address) end) end) - end) diff --git a/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua b/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua index 441c9958fb35..a31dafda6bfa 100644 --- a/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua +++ b/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua @@ -42,15 +42,21 @@ describe("[dns-client] inter-process communication:",function() return count_log_lines("DNS query completed") == num_workers end, 5) - assert.same(count_log_lines("first:query:ipc.com"), 1) + assert.same(count_log_lines("first:query:ipc.test"), 1) assert.same(count_log_lines("first:answers:1.2.3.4"), num_workers) - assert.same(count_log_lines("stale:query:ipc.com"), 1) + assert.same(count_log_lines("stale:query:ipc.test"), 1) assert.same(count_log_lines("stale:answers:1.2.3.4."), num_workers) - assert.same(count_log_lines("stale:broadcast:ipc.com:33"), 1) + -- wait background tasks to finish + helpers.wait_until(function() + return count_log_lines("stale:broadcast:ipc.test:all") == 1 + end, 5) + -- "stale:lru ..." means the progress of the two workers is about the same. -- "first:lru ..." means one of the workers is far behind the other. - assert.same(count_log_lines(":lru delete:ipc.com:33"), 1) + helpers.wait_until(function() + return count_log_lines(":lru delete:ipc.test:all") == 1 + end, 5) end) end) diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua index a613ebd1c8f1..d55c89bbed90 100644 --- a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -37,16 +37,8 @@ describe("[DNS client stats]", function() end) describe("stats", function() - local cli, mock_records, config + local mock_records before_each(function() - config = { - order = { "LAST", "A", "CNAME" }, - error_ttl = 0.1, - empty_ttl = 0.1, - stale_ttl = 1, - } - cli = assert(client_new(config)) - query_func = function(self, qname, opts) local records = mock_records[qname..":"..opts.qtype] if type(records) == "string" then @@ -56,39 +48,91 @@ describe("[DNS client stats]", function() end end) - it("stats", function() + it("resolve SRV", function() mock_records = { - ["hit.com:"..resolver.TYPE_A] = {{ + ["_ldaps._tcp.srv.test:" .. resolver.TYPE_SRV] = {{ + type = resolver.TYPE_SRV, + target = "srv.test", + port = 636, + weight = 10, + priority = 10, + class = 1, + name = "_ldaps._tcp.srv.test", + ttl = 10, + }}, + ["srv.test:" .. resolver.TYPE_A] = {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, - name = "hit.com", + name = "srv.test", ttl = 30, }}, - ["nameserver_fail.com:" .. resolver.TYPE_A] = "nameserver failed", - ["stale.com" .. resolver.TYPE_A] = {{ + } + + local cli = assert(client_new()) + cli:resolve("_ldaps._tcp.srv.test") + + local query_last_time + for k, v in pairs(cli.stats) do + if v.query_last_time then + query_last_time = v.query_last_time + v.query_last_time = nil + end + end + assert.match("^%d+$", query_last_time) + + assert.same({ + ["_ldaps._tcp.srv.test:33"] = { + ["query"] = 1, + ["query_succ"] = 1, + }, + ["_ldaps._tcp.srv.test:all"] = { + ["miss"] = 1, + ["runs"] = 1, + }, + }, cli.stats) + end) + + it("resolve all types", function() + mock_records = { + ["hit.test:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "hit.test", + ttl = 30, + }}, + ["nameserver_fail.test:" .. resolver.TYPE_A] = "nameserver failed", + ["stale.test:" .. resolver.TYPE_A] = {{ type = resolver.TYPE_CNAME, - address = "stale.com", + address = "stale.test", class = 1, - name = "stale.com", + name = "stale.test", ttl = 0.1, }}, } + local cli = assert(client_new({ + order = { "A" }, + error_ttl = 0.1, + empty_ttl = 0.1, + stale_ttl = 1, + })) + -- "hit_lru" - cli:resolve("hit.com") - cli:resolve("hit.com") + cli:resolve("hit.test") + cli:resolve("hit.test") -- "hit_shm" - cli.cache.lru:delete("short:hit.com:all") - cli:resolve("hit.com") + cli.cache.lru:delete("hit.test:all") + cli:resolve("hit.test") -- "query_err:nameserver failed" - cli:resolve("nameserver_fail.com") + cli:resolve("nameserver_fail.test") -- "stale" - cli:resolve("stale.com") + cli:resolve("stale.test") sleep(0.2) - cli:resolve("stale.com") + cli:resolve("stale.test") local query_last_time for k, v in pairs(cli.stats) do @@ -97,36 +141,35 @@ describe("[DNS client stats]", function() v.query_last_time = nil end end - - assert.match("^%d+%.%d+ 0%.%d+$", query_last_time) + assert.match("^%d+$", query_last_time) assert.same({ - ["hit.com"] = { + ["hit.test:all"] = { ["hit_lru"] = 1, ["runs"] = 3, ["miss"] = 1, - ["hit_shm"] = 1 + ["hit_shm"] = 1, }, - ["hit.com:1"] = { + ["hit.test:1"] = { ["query"] = 1, - ["query_succ"] = 1 + ["query_succ"] = 1, }, - ["nameserver_fail.com"] = { + ["nameserver_fail.test:all"] = { ["fail"] = 1, - ["runs"] = 1 + ["runs"] = 1, }, - ["nameserver_fail.com:1"] = { + ["nameserver_fail.test:1"] = { ["query"] = 1, - ["query_fail_nameserver"] = 1 + ["query_fail_nameserver"] = 1, }, - ["stale.com"] = { - ["fail"] = 2, - ["runs"] = 2 + ["stale.test:all"] = { + ["miss"] = 2, + ["runs"] = 2, + ["stale"] = 1, }, - ["stale.com:1"] = { + ["stale.test:1"] = { ["query"] = 1, - ["query_fail:name error"] = 1, - ["stale"] = 1 + ["query_fail:empty record received"] = 1, }, }, cli.stats) end) diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua index 14bc52e7bc69..19c2a143713a 100644 --- a/spec/02-integration/04-admin_api/26-dns_client_spec.lua +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -38,7 +38,7 @@ for _, strategy in helpers.each_strategy() do assert(type(json.worker.count) == "number") assert(type(json.stats) == "table") - assert(type(json.stats["127.0.0.1"].runs) == "number") + assert(type(json.stats["127.0.0.1:all"].runs) == "number") end) end) diff --git a/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua index 1cdfb2021cd9..ba9d3a4f38f3 100644 --- a/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua +++ b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua @@ -14,7 +14,7 @@ local PRE = "dns-client-test:" local function test() local phase = "" - local host = "ipc.com" + local host = "ipc.test" -- inject resolver.query require("resty.dns.resolver").query = function(self, name, opts) diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 689f8a980c9d..6ddfda8bb55a 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -41,6 +41,7 @@ function _M.dnsExpire(client, record) local dnscache = client.getcache() dnscache:delete(record[1].name .. ":" .. record[1].type) dnscache:delete("short:" .. record[1].name .. ":" .. "all") + dnscache:delete(record[1].name .. ":" .. "all") record.expire = gettime() - 1 end @@ -84,8 +85,8 @@ function _M.dnsSRV(client, records, staleTtl) -- create key, and insert it local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) - -- insert last-succesful lookup type - client.getobj():_insert_last_type(records[1].name, records[1].type) + key = records[1].name..":all" + dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) return records end @@ -126,8 +127,8 @@ function _M.dnsA(client, records, staleTtl) -- create key, and insert it local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl) - -- insert last-succesful lookup type - client.getobj():_insert_last_type(records[1].name, records[1].type) + key = records[1].name..":all" + dnscache:set(key, records, records[1].ttl) return records end @@ -167,8 +168,8 @@ function _M.dnsAAAA(client, records, staleTtl) -- create key, and insert it local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) - -- insert last-succesful lookup type - client.getobj():_insert_last_type(records[1].name, records[1].type) + key = records[1].name..":all" + dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) return records end From 6958eb230e6872e8da889607f72db85df6668d72 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 17 Jun 2024 15:55:54 +0800 Subject: [PATCH 102/126] only use error_ttl, remove empty_ttl logic --- kong/dns/README.md | 3 +- kong/dns/client.lua | 23 +++--- .../30-new-dns-client/02-old_client_spec.lua | 71 ++++++++++--------- .../03-old_client_cache_spec.lua | 39 ---------- .../30-new-dns-client/05-client_stat_spec.lua | 29 ++++++-- 5 files changed, 71 insertions(+), 94 deletions(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index a838f6518aaa..344b7a54e292 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -39,8 +39,7 @@ Performs a series of initialization operations: * TTL options: * `valid_ttl`: same to the option `dns_valid_ttl` in `kong.conf`. * `stale_ttl`: same to the option `dns_stale_ttl` in `kong.conf`. - * `empty_ttl`: same to the option `dns_not_found_ttl` in `kong.conf`. - * `bad_ttl`: same to the option `dns_error_ttl` in `kong.conf`. + * `error_ttl`: same to the option `dns_error_ttl` in `kong.conf`. * `hosts`: (default: `/etc/hosts`) * the path of `hosts` file. * `resolv_conf`: (default: `/etc/resolv.conf`) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 3fba4c503836..01e6bb2284b7 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -39,7 +39,6 @@ local PREFIX = "[dns_client] " local DEFAULT_ERROR_TTL = 1 -- unit: second local DEFAULT_STALE_TTL = 4 -local DEFAULT_EMPTY_TTL = 30 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 @@ -66,9 +65,6 @@ local HIT_LEVEL_TO_NAME = { [4] = "hit_stale", } --- server replied error from the DNS protocol -local NAME_ERROR_CODE = 3 -- response code 3 as "Name Error" or "NXDOMAIN" - -- client specific error local CACHE_ONLY_ERROR_CODE = 100 local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" @@ -256,7 +252,7 @@ function _M.new(opts) local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { ipc = ipc, - neg_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + neg_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, lru_size = opts.cache_size or 10000, shm_locks = ngx.shared.kong_locks and "kong_locks", resty_lock_opts = resty_lock_opts, @@ -282,7 +278,6 @@ function _M.new(opts) valid_ttl = opts.valid_ttl, error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, - empty_ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, enable_srv = enable_srv, enable_ipv4 = enable_ipv4, enable_ipv6 = enable_ipv6, @@ -295,7 +290,7 @@ function _M.new(opts) EMPTY_ANSWERS = { errcode = EMPTY_RECORD_ERROR_CODE, errstr = EMPTY_RECORD_ERROR_MESSAGE, - ttl = opts.empty_ttl or DEFAULT_EMPTY_TTL, + ttl = opts.error_ttl or DEFAULT_ERROR_TTL, }, }, MT) end @@ -304,7 +299,7 @@ end local function process_answers(self, qname, qtype, answers) local errcode = answers.errcode if errcode then - answers.ttl = errcode == NAME_ERROR_CODE and self.empty_ttl or self.error_ttl + answers.ttl = self.error_ttl return answers end @@ -465,12 +460,12 @@ local function stale_update_task(premature, self, key, name, qtype) local tries = setmetatable({}, TRIES_MT) local answers = resolve_query_types(self, name, qtype, tries) - if answers and (not answers.errcode or answers.errcode == NAME_ERROR_CODE) then + if answers and not answers.errcode then + log(DEBUG, PREFIX, "update stale DNS records: ", #answers) self.cache:set(key, { ttl = answers.ttl }, answers) - end - if not answers or answers.errcode then - log(WARN, PREFIX, "Updating stale DNS records failed. Tried: ", tostring(tries)) + else + log(DEBUG, PREFIX, "failed to update stale DNS records: ", tostring(tries)) end end @@ -521,7 +516,7 @@ local function resolve_callback(self, name, qtype, cache_only, tries) -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then -- initiates an asynchronous background updating task to refresh it. local ttl, _, answers = self.cache:peek(key, true) - if answers and ttl then + if answers and not answers.errcode and ttl then if not answers.expired then answers.expire = now() + ttl answers.expired = true @@ -548,6 +543,8 @@ local function resolve_callback(self, name, qtype, cache_only, tries) return CACHE_ONLY_ANSWERS, nil, -1 end + log(DEBUG, PREFIX, "cache miss, try to query ", key) + return resolve_query_types(self, name, qtype, tries) end diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 47947e62c3e9..0ec7ed50e0d2 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -1086,12 +1086,12 @@ describe("[DNS client]", function() it("verifies valid_ttl", function() local valid_ttl = 0.1 - local empty_ttl = 0.1 + local error_ttl = 0.1 local stale_ttl = 0.1 local qname = "konghq.test" local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", - empty_ttl = empty_ttl, + error_ttl = error_ttl, stale_ttl = stale_ttl, valid_ttl = valid_ttl, })) @@ -1115,12 +1115,12 @@ describe("[DNS client]", function() it("verifies ttl and caching of empty responses and name errors", function() --empty/error responses should be cached for a configurable time - local empty_ttl = 0.1 + local error_ttl = 0.1 local stale_ttl = 0.1 local qname = "really.really.really.does.not.exist."..TEST_DOMAIN local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", - empty_ttl = empty_ttl, + error_ttl = error_ttl, stale_ttl = stale_ttl, })) @@ -1148,15 +1148,16 @@ describe("[DNS client]", function() assert.equal(answers1, answers2) assert.falsy(answers2.expired) - -- wait for expiry of ttl and retry, still called only once - ngx.sleep(empty_ttl+0.5 * stale_ttl) + -- wait for expiry of ttl and retry, it will not use the cached one + -- because the cached one contains no avaible IP addresses + ngx.sleep(error_ttl+0.5 * stale_ttl) answers2, err2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.is_nil(answers2) - assert.are.equal(1, call_count) + assert.are.equal(2, call_count) assert.are.equal(NOT_FOUND_ERROR, err2) answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) - assert.is_true(answers2.expired) -- by now, record is marked as expired + assert.falsy(answers2.expired) -- refreshed record -- wait for expiry of stale_ttl and retry, should be called twice now ngx.sleep(0.75 * stale_ttl) @@ -1171,14 +1172,12 @@ describe("[DNS client]", function() assert.falsy(answers2.expired) -- new answers, not expired end) - it("verifies ttl and caching of (other) dns errors", function() - --empty responses should be cached for a configurable time - local error_ttl = 0.1 + it("verifies stale_ttl for available records #ttt", function() local stale_ttl = 0.1 + local ttl = 0.1 local qname = "realname.test" local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", - error_ttl = error_ttl, stale_ttl = stale_ttl, })) @@ -1186,32 +1185,32 @@ describe("[DNS client]", function() local call_count = 0 query_func = function(self, original_query_func, name, options) call_count = call_count + 1 - return { errcode = 5, errstr = "refused" } + return {{ + type = resolver.TYPE_A, + address = "1.1.1.1", + class = 1, + name = name, + ttl = ttl, + }} end -- initial request to populate the cache - local answers1, answers2, err1, err2, _ - answers1, err1, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) - assert.is_nil(answers1) + local answers1, answers2 + answers1 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers1[1].address, "1.1.1.1") assert.are.equal(call_count, 1) - assert.are.equal("dns server error: 5 refused", err1) - answers1 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.falsy(answers1.expired) -- try again, HIT from cache, not stale - answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) - assert.is_nil(answers2) + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.are.equal(call_count, 1) - assert.are.equal(err1, err2) - answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) - assert.are.equal(answers1, answers2) - assert.falsy(answers1.expired) + assert(answers1 == answers2) -- wait for expiry of ttl and retry, HIT and stale - ngx.sleep(error_ttl + 0.5 * stale_ttl) - answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) - assert.is_nil(answers2) + ngx.sleep(ttl + 0.5 * stale_ttl) + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers2[1].address, "1.1.1.1") assert.are.equal(call_count, 1) - assert.are.equal(err1, err2) answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) assert.is_true(answers2.expired) @@ -1223,16 +1222,18 @@ describe("[DNS client]", function() ngx.sleep(0.1 * stale_ttl) assert.are.equal(call_count, 2) - -- wait for expiry of stale_ttl and retry, 2 calls, new result - ngx.sleep(0.75 * stale_ttl) + -- hit the cached one that is updated by the stale stask + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers2[1].address, "1.1.1.1") assert.are.equal(call_count, 2) + assert.falsy(answers2.expired) - answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) - assert.is_nil(answers2) + -- The stale one will be completely eliminated from the cache. + ngx.sleep(ttl + stale_ttl) + + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers2[1].address, "1.1.1.1") assert.are.equal(call_count, 3) - assert.are.equal(err1, err2) - answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) - assert.are_not.equal(answers1, answers2) -- a new answers assert.falsy(answers2.expired) end) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 0a28fc2ed9be..9fb6bf755f30 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -357,45 +357,6 @@ describe("[DNS client cache]", function() assert_same_answers(rec1, answers) end) - it("name errors do replace stale records", function() - local rec1 = {{ - type = resolver.TYPE_A, - address = "1.2.3.4", - class = 1, - name = "myhost9.domain.test", - ttl = 0.1, - }} - mock_records = { - ["myhost9.domain.test:"..resolver.TYPE_A] = rec1, - } - - local answers, err = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) - assert.is_nil(err) - -- check that the cache is properly populated - assert_same_answers(rec1, answers) - answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) - assert_same_answers(rec1, answers) - - sleep(0.15) -- make sure we surpass the ttl of 0.1 of the record, so it is now stale. - -- clear mock records, such that we return name errors instead of records - local rec2 = { - errcode = 3, - errstr = "name error", - } - mock_records = { - ["myhost9.domain.test:"..resolver.TYPE_A] = rec2, - ["myhost9:"..resolver.TYPE_A] = rec2, - } - -- doing a resolve will trigger the background query now - answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) - assert.is_true(answers.expired) -- we get the stale record, now marked as expired - -- wait again for the background query to complete - sleep(0.1) - -- background resolve is now complete, check the cache, it should now have been - -- replaced by the name error - assert.equal(rec2, cli.cache:get("myhost9:" .. resolver.TYPE_A)) - end) - it("empty records do not replace stale records", function() local rec1 = {{ type = resolver.TYPE_A, diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua index d55c89bbed90..00d3d587c19a 100644 --- a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -95,7 +95,7 @@ describe("[DNS client stats]", function() it("resolve all types", function() mock_records = { - ["hit.test:"..resolver.TYPE_A] = {{ + ["hit.test:" .. resolver.TYPE_A] = {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, @@ -104,12 +104,19 @@ describe("[DNS client stats]", function() }}, ["nameserver_fail.test:" .. resolver.TYPE_A] = "nameserver failed", ["stale.test:" .. resolver.TYPE_A] = {{ - type = resolver.TYPE_CNAME, - address = "stale.test", + type = resolver.TYPE_A, + address = "1.2.3.4", class = 1, name = "stale.test", ttl = 0.1, }}, + ["empty_result_not_stale.test:" .. resolver.TYPE_A] = {{ + type = resolver.TYPE_CNAME, -- will be ignored compared to type A + cname = "stale.test", + class = 1, + name = "empty_result_not_stale.test", + ttl = 0.1, + }}, } local cli = assert(client_new({ @@ -134,6 +141,10 @@ describe("[DNS client stats]", function() sleep(0.2) cli:resolve("stale.test") + cli:resolve("empty_result_not_stale.test") + sleep(0.2) + cli:resolve("empty_result_not_stale.test") + local query_last_time for k, v in pairs(cli.stats) do if v.query_last_time then @@ -168,9 +179,17 @@ describe("[DNS client stats]", function() ["stale"] = 1, }, ["stale.test:1"] = { - ["query"] = 1, - ["query_fail:empty record received"] = 1, + ["query"] = 2, + ["query_succ"] = 2, + }, + ["empty_result_not_stale.test:1"] = { + ["query"] = 2, + ["query_fail:empty record received"] = 2, }, + ["empty_result_not_stale.test:all"] = { + ["miss"] = 2, + ["runs"] = 2, + } }, cli.stats) end) end) From f9911ea98aa80f6d2840057c70388a96eaefe6b0 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 17 Jun 2024 17:46:54 +0800 Subject: [PATCH 103/126] fix type in readme.md --- kong/dns/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index 344b7a54e292..554443baca64 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -1,7 +1,7 @@ Name ==== -The module is currently Kong only, and builds on top of the `lua-resty-dns` and kong's `lua-resty-mlcache` library. +Kong DNS client - The module is currently Kong only, and builds on top of the `lua-resty-dns` and `lua-resty-mlcache` libraries. Table of Contents ================= From 34783dbcd2c02162e1d7bd1d763ae14f112c34a9 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 18 Jun 2024 11:29:02 +0800 Subject: [PATCH 104/126] change paths of test cases directory --- .../{21-dns-client => 21-legacy-dns-client}/01-utils_spec.lua | 0 .../{21-dns-client => 21-legacy-dns-client}/02-client_spec.lua | 0 .../03-client_cache_spec.lua | 0 .../{30-new-dns-client => 30-dns-client}/01-utils_spec.lua | 0 .../{30-new-dns-client => 30-dns-client}/02-old_client_spec.lua | 0 .../03-old_client_cache_spec.lua | 0 .../{30-new-dns-client => 30-dns-client}/04-client_ipc_spec.lua | 0 .../{30-new-dns-client => 30-dns-client}/05-client_stat_spec.lua | 0 spec/02-integration/21-request-debug/01-request-debug_spec.lua | 1 - 9 files changed, 1 deletion(-) rename spec/01-unit/{21-dns-client => 21-legacy-dns-client}/01-utils_spec.lua (100%) rename spec/01-unit/{21-dns-client => 21-legacy-dns-client}/02-client_spec.lua (100%) rename spec/01-unit/{21-dns-client => 21-legacy-dns-client}/03-client_cache_spec.lua (100%) rename spec/01-unit/{30-new-dns-client => 30-dns-client}/01-utils_spec.lua (100%) rename spec/01-unit/{30-new-dns-client => 30-dns-client}/02-old_client_spec.lua (100%) rename spec/01-unit/{30-new-dns-client => 30-dns-client}/03-old_client_cache_spec.lua (100%) rename spec/01-unit/{30-new-dns-client => 30-dns-client}/04-client_ipc_spec.lua (100%) rename spec/01-unit/{30-new-dns-client => 30-dns-client}/05-client_stat_spec.lua (100%) diff --git a/spec/01-unit/21-dns-client/01-utils_spec.lua b/spec/01-unit/21-legacy-dns-client/01-utils_spec.lua similarity index 100% rename from spec/01-unit/21-dns-client/01-utils_spec.lua rename to spec/01-unit/21-legacy-dns-client/01-utils_spec.lua diff --git a/spec/01-unit/21-dns-client/02-client_spec.lua b/spec/01-unit/21-legacy-dns-client/02-client_spec.lua similarity index 100% rename from spec/01-unit/21-dns-client/02-client_spec.lua rename to spec/01-unit/21-legacy-dns-client/02-client_spec.lua diff --git a/spec/01-unit/21-dns-client/03-client_cache_spec.lua b/spec/01-unit/21-legacy-dns-client/03-client_cache_spec.lua similarity index 100% rename from spec/01-unit/21-dns-client/03-client_cache_spec.lua rename to spec/01-unit/21-legacy-dns-client/03-client_cache_spec.lua diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-dns-client/01-utils_spec.lua similarity index 100% rename from spec/01-unit/30-new-dns-client/01-utils_spec.lua rename to spec/01-unit/30-dns-client/01-utils_spec.lua diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-dns-client/02-old_client_spec.lua similarity index 100% rename from spec/01-unit/30-new-dns-client/02-old_client_spec.lua rename to spec/01-unit/30-dns-client/02-old_client_spec.lua diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-dns-client/03-old_client_cache_spec.lua similarity index 100% rename from spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua rename to spec/01-unit/30-dns-client/03-old_client_cache_spec.lua diff --git a/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua b/spec/01-unit/30-dns-client/04-client_ipc_spec.lua similarity index 100% rename from spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua rename to spec/01-unit/30-dns-client/04-client_ipc_spec.lua diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-dns-client/05-client_stat_spec.lua similarity index 100% rename from spec/01-unit/30-new-dns-client/05-client_stat_spec.lua rename to spec/01-unit/30-dns-client/05-client_stat_spec.lua diff --git a/spec/02-integration/21-request-debug/01-request-debug_spec.lua b/spec/02-integration/21-request-debug/01-request-debug_spec.lua index ac9047bc32d1..13d626f474cd 100644 --- a/spec/02-integration/21-request-debug/01-request-debug_spec.lua +++ b/spec/02-integration/21-request-debug/01-request-debug_spec.lua @@ -113,7 +113,6 @@ local function get_token_file_content(deployment) assert(deployment == "hybrid", "unknown deploy mode") path = pl_path.join(DP_PREFIX, TOKEN_FILE) end - print(path) return pl_file.read(path) end From a0786a9074baee81127b5c3ddf21bf0f0e908457 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 18 Jun 2024 14:09:48 +0800 Subject: [PATCH 105/126] set legacy_dns_client off for some cases --- spec/01-unit/30-dns-client/04-client_ipc_spec.lua | 1 + spec/02-integration/04-admin_api/26-dns_client_spec.lua | 1 + 2 files changed, 2 insertions(+) diff --git a/spec/01-unit/30-dns-client/04-client_ipc_spec.lua b/spec/01-unit/30-dns-client/04-client_ipc_spec.lua index a31dafda6bfa..f4ff557b37d0 100644 --- a/spec/01-unit/30-dns-client/04-client_ipc_spec.lua +++ b/spec/01-unit/30-dns-client/04-client_ipc_spec.lua @@ -30,6 +30,7 @@ describe("[dns-client] inter-process communication:",function() nginx_conf = "spec/fixtures/custom_nginx.template", plugins = "bundled,dns-client-test", nginx_main_worker_processes = num_workers, + legacy_dns_client = "off", })) end) diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua index 19c2a143713a..a36d62d1e8b2 100644 --- a/spec/02-integration/04-admin_api/26-dns_client_spec.lua +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -12,6 +12,7 @@ for _, strategy in helpers.each_strategy() do assert(helpers.start_kong({ database = strategy, nginx_conf = "spec/fixtures/custom_nginx.template", + legacy_dns_client = "off", })) client = helpers.admin_client() From 12c63fb250720ec879e2dc8a3a0c2ea3342e1efc Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 18 Jun 2024 17:12:57 +0800 Subject: [PATCH 106/126] update changelog yml --- changelog/unreleased/kong/refactor_dns_client.yml | 4 +++- kong/dns/client.lua | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml index b561aa7a5e72..c923230d8d9f 100644 --- a/changelog/unreleased/kong/refactor_dns_client.yml +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -2,6 +2,8 @@ message: > Starting from this version, a new DNS client library has been implemented and added into Kong. The new DNS client library has the following changes - Introduced global caching for DNS records across workers, significantly reducing the query load on DNS servers. - Introduced observable statistics for the new DNS client, and a new Admin API `/status/dns` to retrieve them. - - Deprecated the `dns_no_sync` option in the context of the new DNS client library. With the new library, multiple DNS queries for the same name will always be synchronized (even across workers). The `dns_no_sync` option remains functional with the legacy DNS client library. + - Deprecated the `dns_no_sync` option. Multiple DNS queries for the same name will always be synchronized (even across workers). This remains functional with the legacy DNS client library. + - Deprecated the `dns_not_found_ttl` option. It uses the `dns_error_ttl` option for all error responses. This option remains functional with the legacy DNS client library. + - Deprecated `LAST` and `CNAME` values in the `dns_order` option. It's only used to specify supported types, not to decide their priority anymore. The priority of querying types from high to low is: `SRV`, `A`, `AAAA`. type: feature scope: Core diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 01e6bb2284b7..9902df39ad7b 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -175,7 +175,7 @@ function _M.new(opts) end end - log(NOTICE, PREFIX, PREFIX, "supported types: ", enable_srv and "srv " or "", + log(NOTICE, PREFIX, "supported types: ", enable_srv and "srv " or "", enable_ipv4 and "ipv4 " or "", enable_ipv6 and "ipv6 " or "") -- parse resolv.conf From a50282df421edcdec59a235e67253ee39d6816f0 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 25 Jun 2024 14:59:57 +0800 Subject: [PATCH 107/126] disable additional section & add tests --- kong/dns/client.lua | 2 +- .../30-dns-client/02-old_client_spec.lua | 105 +++++++++++++++++- 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 9902df39ad7b..cf1f077f81d6 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -359,7 +359,7 @@ local function resolve_query(self, name, qtype, tries) local start = now() - local answers, err = r:query(name, { additional_section = true, qtype = qtype }) + local answers, err = r:query(name, { qtype = qtype }) r:destroy() local duration = math_floor((now() - start) * 1000) diff --git a/spec/01-unit/30-dns-client/02-old_client_spec.lua b/spec/01-unit/30-dns-client/02-old_client_spec.lua index 0ec7ed50e0d2..749994e4e790 100644 --- a/spec/01-unit/30-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-dns-client/02-old_client_spec.lua @@ -1172,7 +1172,7 @@ describe("[DNS client]", function() assert.falsy(answers2.expired) -- new answers, not expired end) - it("verifies stale_ttl for available records #ttt", function() + it("verifies stale_ttl for available records", function() local stale_ttl = 0.1 local ttl = 0.1 local qname = "realname.test" @@ -1210,7 +1210,7 @@ describe("[DNS client]", function() ngx.sleep(ttl + 0.5 * stale_ttl) answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.same(answers2[1].address, "1.1.1.1") - assert.are.equal(call_count, 1) + assert.are.equal(call_count, 1) -- todo: flakiness answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) assert.is_true(answers2.expired) @@ -1307,7 +1307,102 @@ describe("[DNS client]", function() end) end) -end) --- TODO --- resolver.new set pper name hostname deadloop ? + it("disable additional section when querying", function() + + local function build_dns_reply(id, name, ip, ns_ip1, ns_ip2) + local function dns_encode_name(name) + local parts = {} + for part in string.gmatch(name, "[^.]+") do + table.insert(parts, string.char(#part) .. part) + end + table.insert(parts, "\0") + return table.concat(parts) + end + + local function ip_to_bytes(ip) + local bytes = { "\x00\x04" } -- RDLENGTH:4bytes (ipv4) + for octet in string.gmatch(ip, "%d+") do + table.insert(bytes, string.char(tonumber(octet))) + end + return table.concat(bytes) + end + + local package = {} + + -- Header + package[#package+1] = id + package[#package+1] = "\x85\x00" -- QR, AA, RD + package[#package+1] = "\x00\x01\x00\x01\x00\x00\x00\x02" -- QD:1 AN:1 NS:0 AR:2 + + -- Question + package[#package+1] = dns_encode_name(name) + package[#package+1] = "\x00\x01\x00\x01" -- QTYPE A; QCLASS IN + + -- Answer + package[#package+1] = dns_encode_name(name) + package[#package+1] = "\x00\x01\x00\x01\x00\x00\x00\x30" -- QTYPE:A; QCLASS:IN TTL:48 + package[#package+1] = ip_to_bytes(ip) + + -- Additional + local function add_additional(name, ip) + package[#package+1] = dns_encode_name(name) + package[#package+1] = "\x00\x01\x00\x01\x00\x00\x00\x30" -- QTYPE:A; QCLASS:IN TTL:48 + package[#package+1] = ip_to_bytes(ip) + end + + add_additional("ns1." .. name, ns_ip1) + add_additional("ns2." .. name, ns_ip2) + + return table.concat(package) + end + + local force_enable_additional_section = false + + -- dns client will ignore additional section + query_func = function(self, original_query_func, name, options) + if options.qtype ~= client.TYPE_A then + return { errcode = 5, errstr = "refused" } + end + + if force_enable_additional_section then + options.additional_section = true + end + + self.tcp_sock = nil -- disable TCP query + + local id + local sock = assert(self.socks[1]) + -- hook send to get id + local orig_sock_send = sock.send + sock.send = function (self, query) + id = query[1] .. query[2] + return orig_sock_send(self, query) + end + -- hook receive to reply raw data + sock.receive = function (self, size) + return build_dns_reply(id, name, "1.1.1.1", "2.2.2.2", "3.3.3.3") + end + + return original_query_func(self, name, options) + end + + local name = "additional-section.test" + + -- no additional_section by default + local cli = client.new({ nameservers = TEST_NSS }) + local answers = cli:resolve(name) + assert.equal(#answers, 1) + assert.same(answers[1].address, "1.1.1.1") + + -- test the buggy scenario + force_enable_additional_section = true + cli = client.new({ nameservers = TEST_NSS, cache_purge = true }) + answers = cli:resolve(name) + assert.equal(#answers, 3) + assert.same(answers[1].address, "1.1.1.1") + assert.same(answers[2].address, "2.2.2.2") + assert.same(answers[3].address, "3.3.3.3") + end) + +end) From 91c896c64367d853e45ac7289d3bcb2c984a76ef Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 26 Jun 2024 14:37:35 +0800 Subject: [PATCH 108/126] further simplify code: either query A/AAAA or SRV --- .../unreleased/kong/refactor_dns_client.yml | 2 +- kong/dns/README.md | 11 +- kong/dns/client.lua | 40 ++- kong/dns/utils.lua | 3 +- spec/01-unit/30-dns-client/01-utils_spec.lua | 30 +++ .../30-dns-client/02-old_client_spec.lua | 251 +++++++++++++----- .../03-old_client_cache_spec.lua | 8 +- .../30-dns-client/04-client_ipc_spec.lua | 4 +- .../30-dns-client/05-client_stat_spec.lua | 29 +- .../04-admin_api/26-dns_client_spec.lua | 16 +- spec/helpers/dns.lua | 9 +- 11 files changed, 281 insertions(+), 122 deletions(-) diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml index c923230d8d9f..da5cd40f65ca 100644 --- a/changelog/unreleased/kong/refactor_dns_client.yml +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -4,6 +4,6 @@ message: > - Introduced observable statistics for the new DNS client, and a new Admin API `/status/dns` to retrieve them. - Deprecated the `dns_no_sync` option. Multiple DNS queries for the same name will always be synchronized (even across workers). This remains functional with the legacy DNS client library. - Deprecated the `dns_not_found_ttl` option. It uses the `dns_error_ttl` option for all error responses. This option remains functional with the legacy DNS client library. - - Deprecated `LAST` and `CNAME` values in the `dns_order` option. It's only used to specify supported types, not to decide their priority anymore. The priority of querying types from high to low is: `SRV`, `A`, `AAAA`. + - Deprecated the `dns_order` option. By default, SRV, A, and AAAA are supported. Only names in the SRV format (`_service._proto.name`) enable resolving of DNS SRV records. type: feature scope: Core diff --git a/kong/dns/README.md b/kong/dns/README.md index 554443baca64..61fa14132674 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -46,8 +46,6 @@ Performs a series of initialization operations: * the path of `resolv.conf` file, it will be parsed and passed into the underlying `lua-resty-dns` library. * `order`: (default: `{ "SRV", "A", "AAAA" }`) * the order in which to resolve different record types, it's similar to the option `dns_order` in `kong.conf`. -* `enable_ipv6`: (default: `true`) - * whether to support IPv6 servers when getting nameservers from `resolv.conf`. * options for the underlying `lua-resty-dns` library: * `retrans`: (default: `5`) * the total number of times of retransmitting the DNS request when receiving a DNS response times out according to the timeout setting. When trying to retransmit the query, the next nameserver according to the round-robin algorithm will be picked up. @@ -73,15 +71,14 @@ Performs a series of initialization operations: Performs a DNS resolution. -1. First, use the key `:all` (or `:` if `@qtype` is not `nil`) to query mlcache to see if there are any results available. If results are found, return them directly. +1. Check if the `` matches SRV format (`\_service.\_proto.name`) to determine the `` (SRV or A/AAAA), then use the key `:` to query mlcache. If cached results are found, return them directly. 2. If there are no results available in the cache, it triggers the L3 callback of `mlcache:get` to query records from the DNS servers, details are as follows: 1. Check if `` has an IP address in the `hosts` file, return if found. 2. Check if `` is an IP address itself, return if true. 3. Use `mlcache:peek` to check if the expired key still exists in the shared dictionary. If it does, return it directly to mlcache and trigger an asynchronous background task to update the expired data (`start_stale_update_task`). The maximum time that expired data can be reused is `stale_ttl`, but the maximum TTL returned to mlcache cannot exceed 60s. This way, if the expired key is not successfully updated by the background task after 60s, it can still be reused by calling the `resolve` function from the upper layer to trigger the L3 callback to continue executing this logic and initiate another background task for updating. 1. For example, with a `stale_ttl` of 3600s, if the background task fails to update the record due to network issues during this time, and the upper-level application continues to call resolve to get the domain name result, it will trigger a background task to query the DNS result for that domain name every 60s, resulting in approximately 60 background tasks being triggered (3600s/60s). - 4. Query the DNS server, with `:` combinations: - 1. The `` is extended according to settings in `resolv.conf`, such as `ndots`, `search`, and `domain`. - 2. The `` is extended based on the `dns_order` parameter. + 4. Query the DNS server, with `:` combinations: + 1. The `` is extended according to settings in `resolv.conf`, such as `ndots`, `search`, and `domain`. **Return value:** @@ -93,7 +90,7 @@ Performs a DNS resolution. * `nil, "dns server error: failed to send request to UDP server 10.0.0.1:53: timeout"`, there was a network issue. * Return value and input parameter `@tries?`: * If provided as an empty table, it will be returned as a third result. This table will be an array containing the error message for each (if any) failed try. - * For example, `[["lambda.ab-cdef-1.amazonaws.com:SRV","dns server error: 3 name error"], ["lambda.ab-cdef-1.amazonaws.com:A","dns server error: 3 name error"]]`, both attempts failed due to a DNS server error with error code 3 (NXDOMAIN), indicating a name error. + * For example, `[["example.test:A","dns server error: 3 name error"], ["example.test:AAAA","dns server error: 3 name error"]]`, both attempts failed due to a DNS server error with error code 3 (NXDOMAIN), indicating a name error. **Input parameters:** diff --git a/kong/dns/client.lua b/kong/dns/client.lua index cf1f077f81d6..33c23ed5574d 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -24,6 +24,7 @@ local string_lower = string.lower local table_insert = table.insert local table_isempty = require("table.isempty") +local is_srv = utils.is_srv local parse_hosts = utils.parse_hosts local ipv6_bracket = utils.ipv6_bracket local search_names = utils.search_names @@ -42,7 +43,7 @@ local DEFAULT_STALE_TTL = 4 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 -local DEFAULT_ORDER = { "SRV", "A", "AAAA" } +local DEFAULT_FAMILY = { "SRV", "A", "AAAA" } local TYPE_SRV = resolver.TYPE_SRV local TYPE_A = resolver.TYPE_A @@ -123,7 +124,6 @@ local init_hosts do hosts_cache[name .. ":" .. qtype] = answers hosts_cache[name .. ":" .. TYPE_A_AAAA] = answers - hosts_cache[name .. ":all"] = answers end -- insert hosts into cache @@ -156,9 +156,7 @@ function _M.new(opts) local enable_ipv4, enable_ipv6, enable_srv - opts.order = opts.order or DEFAULT_ORDER - - for i, typstr in ipairs(opts.order) do + for i, typstr in ipairs(opts.family or DEFAULT_FAMILY) do typstr = typstr:upper() if typstr == "A" then @@ -170,8 +168,8 @@ function _M.new(opts) elseif typstr == "SRV" then enable_srv = true - elseif typstr ~= "LAST" and typstr ~= "CNAME" then - return nil, "Invalid dns record type in order array: " .. typstr + else + return nil, "Invalid dns type in dns_family array: " .. typstr end end @@ -422,19 +420,11 @@ local function resolve_query_types(self, name, qtype, tries) local answers, err, ttl -- the specific type - if qtype and qtype ~= TYPE_A_AAAA then + if qtype ~= TYPE_A_AAAA then return resolve_query_names(self, names, qtype, tries) end - -- query SRV for nil type - if self.enable_srv and qtype == nil then - answers, err, ttl = resolve_query_names(self, names, TYPE_SRV, tries) - if not answers or not answers.errcode then - return answers, err, ttl - end - end - - -- query A/AAAA for nil or TYPE_A_AAAA type + -- query A or AAAA if self.enable_ipv4 then answers, err, ttl = resolve_query_names(self, names, TYPE_A, tries) if not answers or not answers.errcode then @@ -444,9 +434,6 @@ local function resolve_query_types(self, name, qtype, tries) if self.enable_ipv6 then answers, err, ttl = resolve_query_names(self, names, TYPE_AAAA, tries) - if not answers or not answers.errcode then - return answers, err, ttl - end end return answers, err, ttl @@ -507,7 +494,7 @@ local function resolve_callback(self, name, qtype, cache_only, tries) end -- check if this key exists in the hosts file (it maybe evicted from cache) - local key = name .. ":" .. (qtype or "all") + local key = name .. ":" .. qtype local answers = self.hosts_cache[key] if answers then return answers, nil, answers.ttl @@ -551,11 +538,14 @@ end local function resolve_all(self, name, qtype, cache_only, tries, has_timing) name = string_lower(name) - tries = setmetatable(tries or {}, TRIES_MT) - -- key like "example.com:" - local key = name .. ":" .. (qtype or "all") + if not qtype then + qtype = ((self.enable_srv and is_srv(name)) and TYPE_SRV or TYPE_A_AAAA) + end + + local key = name .. ":" .. qtype + log(DEBUG, PREFIX, "resolve_all ", key) stats_init_name(self.stats, key) @@ -660,7 +650,7 @@ end -- "_ldap._tcp.example.com:33" -> "_ldap._tcp.example.com:SRV" local function format_key(key) - local qname, qtype = key:match("([^:]+):(%d+)") -- match "(qname):(qtype)" + local qname, qtype = key:match("([^:]+):(%-?%d+)") -- match "(qname):(qtype)" return qtype and qname .. ":" .. (TYPE_TO_NAME[tonumber(qtype)] or qtype) or key end diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 999dcd3f5f95..3158c5ce945e 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -172,8 +172,9 @@ function _M.is_fqdn(name, ndots) end +-- check if it matchs the SRV pattern: _._. function _M.is_srv(name) - return name:sub(1, 1) == "_" and name:find("%._") ~= nil + return name:match("^_[^._]+%._[^._]+%.[^.]+") ~= nil end diff --git a/spec/01-unit/30-dns-client/01-utils_spec.lua b/spec/01-unit/30-dns-client/01-utils_spec.lua index 37c8fcdd7d9e..93fa9e2fed67 100644 --- a/spec/01-unit/30-dns-client/01-utils_spec.lua +++ b/spec/01-unit/30-dns-client/01-utils_spec.lua @@ -26,6 +26,36 @@ describe("[utils]", function () end) end) + describe("is_srv(name)", function () + local test_domains = { + ["_imaps._tcp.example.test"] = true, + ["_http._tcp.example.test"] = true, + ["_imaps._udp.example.test"] = true, + ["_http._udp.example.test"] = true, + ["_ldap._udp.example.test"] = true, + ["_ldap._udp.example"] = true, + ["_ldap._udp."] = false, + ["_ldap._udp"] = false, + ["_ldap._udp._example.test"] = true, + ["_ldap._udp._example"] = true, + ["_ldap._udp._"] = true, + ["_imaps.tcp.example.test"] = false, + ["imaps._tcp.example.test"] = false, + ["imaps.tcp.example.test"] = false, + ["_._tcp.example.test"] = false, + ["_imaps._.example.test"] = false, + ["_._.example.test"] = false, + ["_..example.test"] = false, + ["._.example.test"] = false, + ["www.example.test"] = false, + ["localhost"] = false, + } + + for k,v in pairs(test_domains) do + assert.equal(utils.is_srv(k), v, "checking " .. k .. ", " .. tostring(v)) + end + end) + describe("search_names()", function () it("empty resolv, not apply the search list", function () local resolv = {} diff --git a/spec/01-unit/30-dns-client/02-old_client_spec.lua b/spec/01-unit/30-dns-client/02-old_client_spec.lua index 749994e4e790..7ad258541be6 100644 --- a/spec/01-unit/30-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-dns-client/02-old_client_spec.lua @@ -227,9 +227,6 @@ describe("[DNS client]", function() assert.same(answers, nil) assert.same(err, "dns client error: 101 empty record received") assert.same({ - 'host.one.test:33', - 'host.two.test:33', - 'host:33', 'host.one.test:1', 'host.two.test:1', 'host:1', @@ -239,6 +236,24 @@ describe("[DNS client]", function() }, list) end) + it("works with SRV name", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("_imap._tcp.example.test") + + assert.same(answers, nil) + assert.same(err, "dns client error: 101 empty record received") + assert.same({ + '_imap._tcp.example.test:33', + }, list) + end) + it("works with a 'search .' option", function() writefile(resolv_path, { "nameserver 198.51.100.0", @@ -253,7 +268,6 @@ describe("[DNS client]", function() assert.same(answers, nil) assert.same(err, "dns client error: 101 empty record received") assert.same({ - 'host:33', 'host:1', 'host:28', }, list) @@ -273,8 +287,6 @@ describe("[DNS client]", function() assert.same(answers, nil) assert.same(err, "dns client error: 101 empty record received") assert.same({ - 'host.local.domain.test:33', - 'host:33', 'host.local.domain.test:1', 'host:1', 'host.local.domain.test:28', @@ -296,7 +308,6 @@ describe("[DNS client]", function() cli:resolve("host.") assert.same({ - 'host.:33', 'host.:1', 'host.:28', }, list) @@ -314,7 +325,6 @@ describe("[DNS client]", function() cli:resolve("host.") assert.same({ - 'host.:33', 'host.:1', 'host.:28', }, list) @@ -332,7 +342,6 @@ describe("[DNS client]", function() cli:resolve("host.") assert.same({ - 'host.:33', 'host.:1', 'host.:28', }, list) @@ -348,7 +357,7 @@ describe("[DNS client]", function() }) local list = hook_query_func_get_list() - local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type cli:resolve("host") assert.same({ @@ -366,7 +375,7 @@ describe("[DNS client]", function() }) local list = hook_query_func_get_list() - local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type cli:resolve("host") assert.same({ @@ -385,7 +394,7 @@ describe("[DNS client]", function() }) local list = hook_query_func_get_list() - local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type cli:resolve("host.") assert.same({ 'host.:28', @@ -400,7 +409,7 @@ describe("[DNS client]", function() }) local list = hook_query_func_get_list() - local cli = assert(client_new({ order = { "AAAA" } })) -- IPv6 type + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type cli:resolve("host.") assert.same({ @@ -421,7 +430,6 @@ describe("[DNS client]", function() cli:resolve("local.host") assert.same({ - 'local.host:33', 'local.host:1', 'local.host:28', }, list) @@ -439,15 +447,15 @@ describe("[DNS client]", function() }) local list = hook_query_func_get_list() - -- perferred IP type: IPv4 (A takes priority in order) - local cli = assert(client_new({ order = { "LAST", "SRV", "A", "AAAA" } })) + -- perferred IP type: IPv4 (A takes priority in family) + local cli = assert(client_new({ family = { "SRV", "A", "AAAA" } })) local answers = cli:resolve("host") assert.same(answers[1].address, "127.0.0.1") assert.same({}, list) -- hit on cache, so no query to the nameserver - -- perferred IP type: IPv6 (AAAA takes priority in order) + -- perferred IP type: IPv6 (AAAA takes priority in family) --[[ - local cli = assert(client_new({ order = { "LAST", "SRV", "AAAA", "A" } })) + local cli = assert(client_new({ family = { "SRV", "AAAA", "A" } })) local answers = cli:resolve("host") assert.same(answers[1].address, "[::1]") assert.same({}, list) @@ -483,7 +491,7 @@ describe("[DNS client]", function() local query_count = 0 query_func = function(self, original_query_func, name, options) - assert(options.qtype == resolver.TYPE_SRV) + assert(options.qtype == resolver.TYPE_A) query_count = query_count + 1 return original_query_func(self, name, options) end @@ -587,7 +595,7 @@ describe("[DNS client]", function() local answers2 = assert(cli:resolve(host)) assert.are.equal(answers, answers2) -- same table from L1 cache - local ttl, _, value = cli.cache:peek(host .. ":all") + local ttl, _, value = cli.cache:peek(host .. ":-1") assert.same(answers, value) local ttl_diff = answers.ttl - ttl assert(math.abs(ttl_diff - wait_time) < 1, @@ -611,7 +619,7 @@ describe("[DNS client]", function() it("fetching multiple A answers", function() local host = "atest."..TEST_DOMAIN - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"LAST", "A"}})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"A"}})) local answers = assert(cli:resolve(host)) assert.are.equal(#answers, 2) assert.are.equal(host, answers[1].name) @@ -622,7 +630,7 @@ describe("[DNS client]", function() it("fetching multiple A answers FQDN", function() local host = "atest."..TEST_DOMAIN - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"LAST", "A"}})) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"A"}})) local answers = assert(cli:resolve(host .. ".")) assert.are.equal(#answers, 2) assert.are.equal(host, answers[1].name) @@ -649,7 +657,7 @@ describe("[DNS client]", function() end assert.same({ - ["smtp.kong-gateway-testing.link:all"] = { + ["smtp.kong-gateway-testing.link:-1"] = { miss = 1, runs = 1 }, @@ -657,16 +665,30 @@ describe("[DNS client]", function() query = 1, query_succ = 1 }, - ["smtp.kong-gateway-testing.link:33"] = { - query = 1, - ["query_fail:empty record received"] = 1 } - }, cli.stats) + }, cli.stats) end) it("fetching multiple SRV answerss (un-typed)", function() - local host = "srvtest."..TEST_DOMAIN + local host = "_ldap._tcp.srv.test" local typ = resolver.TYPE_SRV + query_func = function(self, original_query_func, name, options) + return { + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + } + } + end + -- un-typed lookup local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers = assert(cli:resolve(host)) @@ -681,9 +703,30 @@ describe("[DNS client]", function() it("fetching multiple SRV answerss through CNAME (un-typed)", function() writefile(resolv_path, "") -- search {} empty - local host = "cname2srv."..TEST_DOMAIN + local host = "_ldap._tcp.cname2srv.test" local typ = resolver.TYPE_SRV + query_func = function(self, original_query_func, name, options) + return { + { + type = resolver.TYPE_CNAME, cname = host, class = 1, name = host, + ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + } + } + end + -- un-typed lookup local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local answers = assert(cli:resolve(host)) @@ -698,14 +741,12 @@ describe("[DNS client]", function() end assert.same({ - ["cname2srv.kong-gateway-testing.link:all"] = { + ["_ldap._tcp.cname2srv.test:33"] = { miss = 1, runs = 1, - }, - ["cname2srv.kong-gateway-testing.link:33"] = { query = 1, - query_succ = 1 - } + query_succ = 1, + }, }, cli.stats) -- check final target @@ -783,20 +824,20 @@ describe("[DNS client]", function() assert.equal("["..address.."]", answers[1].target) end) - it("resolving from the /etc/hosts file; preferred A or AAAA order", function() + it("resolving from the /etc/hosts file; preferred A or AAAA family", function() writefile(hosts_path, { "127.3.2.1 localhost", "1::2 localhost", }) local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", - order = {"SRV", "A", "AAAA"} + family = {"SRV", "A", "AAAA"} })) assert(cli) local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", - order = {"SRV", "AAAA", "A"} + family = {"SRV", "AAAA", "A"} })) assert(cli) end) @@ -837,24 +878,24 @@ describe("[DNS client]", function() answers.last = nil -- make sure to clean local ips = {} for _,answers in ipairs(answers) do ips[answers.address] = true end - local order = {} + local family = {} for n = 1, #answers do local ip = cli:resolve(host, { return_random = true }) ips[ip] = nil - order[n] = ip + family[n] = ip end -- this table should be empty again assert.is_nil(next(ips)) - -- do again, and check same order - for n = 1, #order do + -- do again, and check same family + for n = 1, #family do local ip = cli:resolve(host, { return_random = true }) - assert.same(order[n], ip) + assert.same(family[n], ip) end end) it("SRV-answers, round-robin on lowest prio",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - local host = "hello.world.test" + local host = "_service._proto.hello.world.test" local entry = { { type = resolver.TYPE_SRV, @@ -888,7 +929,7 @@ describe("[DNS client]", function() }, } -- insert in the cache - cli.cache:set(entry[1].name .. ":all", {ttl=0}, entry) + cli.cache:set(entry[1].name .. ":" .. resolver.TYPE_SRV, {ttl=0}, entry) local results = {} for _ = 1,20 do @@ -904,7 +945,7 @@ describe("[DNS client]", function() it("SRV-answers with 1 entry, round-robin",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) - local host = "hello.world.test" + local host = "_service._proto.hello.world.test" local entry = {{ type = resolver.TYPE_SRV, target = "1.2.3.4", @@ -916,7 +957,7 @@ describe("[DNS client]", function() ttl = 10, }} -- insert in the cache - cli.cache:set(entry[1].name .. ":all", { ttl=0 }, entry) + cli.cache:set(entry[1].name .. ":" .. resolver.TYPE_SRV, { ttl=0 }, entry) -- repeated lookups, as the first will simply serve the first entry -- and the only second will setup the round-robin scheme, this is @@ -930,7 +971,7 @@ describe("[DNS client]", function() it("SRV-answers with 0-weight, round-robin",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) - local host = "hello.world.test" + local host = "_service._proto.hello.world.test" local entry = { { type = resolver.TYPE_SRV, @@ -964,7 +1005,7 @@ describe("[DNS client]", function() }, } -- insert in the cache - cli.cache:set(entry[1].name .. ":all", { ttl=0 }, entry) + cli.cache:set(entry[1].name .. ":" .. resolver.TYPE_SRV, { ttl=0 }, entry) -- weight 0 will be weight 1, without any reduction in weight -- of the other ones. @@ -994,13 +1035,12 @@ describe("[DNS client]", function() weight = 5, priority = 20, class = 1, - name = "srv.answers.test", + name = "_service._proto.srv.answers.test", ttl = 10, }} -- insert in the cache cli.cache:set(entry_a[1].name..":-1", { ttl = 0 }, entry_a) - cli.cache:set(entry_a[1].name..":all", { ttl = 0 }, entry_a) - cli.cache:set(entry_srv[1].name..":all", { ttl = 0 }, entry_srv) + cli.cache:set(entry_srv[1].name..":33", { ttl = 0 }, entry_srv) local ip, port local host = "a.answers.test" ip, port = cli:resolve_address(host) @@ -1011,14 +1051,14 @@ describe("[DNS client]", function() assert.is_string(ip) assert.equal(1234, port) - host = "srv.answers.test" + host = "_service._proto.srv.answers.test" ip, port = cli:resolve_address(host) - assert.is_string(ip) assert.is_number(port) + assert.is_string(ip) ip, port = cli:resolve_address(host, 0) - assert.is_string(ip) assert.is_number(port) + assert.is_string(ip) assert.is_not.equal(0, port) end) @@ -1026,10 +1066,27 @@ describe("[DNS client]", function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local ip, port, host - host = "srvport0."..TEST_DOMAIN + query_func = function(self, original_query_func, name, options) + if options.qtype ~= resolver.TYPE_SRV then + return original_query_func(self, name, options) + end + + return {{ + type = resolver.TYPE_SRV, + port = 0, + weight = 10, + priority = 20, + target = "kong-gateway-testing.link", + class = 1, + name = name, + ttl = 300, + }} + end + + host = "_service._proto.srvport0.test" ip, port = cli:resolve_address(host, 10) - assert.is_string(ip) assert.is_number(port) + assert.is_string(ip) assert.is_equal(10, port) ip, port = cli:resolve_address(host) @@ -1037,7 +1094,79 @@ describe("[DNS client]", function() assert.is_nil(port) end) - it("resolving in correct answers-type order",function() + it("SRV whole process: SRV -> A #ttt",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local ip, port, host + + query_func = function(self, original_query_func, name, options) + if options.qtype == resolver.TYPE_A then + return {{ + type = resolver.TYPE_A, + address = "1.1.1.1", + name = name, + ttl = 300, + }} + + elseif options.qtype == resolver.TYPE_SRV then + return {{ + type = resolver.TYPE_SRV, + port = 0, + weight = 10, + priority = 20, + target = "kong-gateway-testing.link", + class = 1, + name = name, + ttl = 300, + }} + + else + return {} + end + end + + host = "_service._proto.srv_a.test" + ip, port = cli:resolve_address(host) + assert.equal(ip, "1.1.1.1") + assert.is_nil(port) + end) + + it("SRV whole process: SRV -> A failed -> AAAA #ttt",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local ip, port, host + + query_func = function(self, original_query_func, name, options) + if options.qtype == resolver.TYPE_A then + return { errcode = 5, errstr = "refused" } + + elseif options.qtype == resolver.TYPE_SRV then + return {{ + type = resolver.TYPE_SRV, + port = 0, + weight = 10, + priority = 20, + target = "kong-gateway-testing.link", + class = 1, + name = name, + ttl = 300, + }} + + else + return {{ + type = resolver.TYPE_AAAA, + address = "::1:2:3:4", + name = name, + ttl = 300, + }} + end + end + + host = "_service._proto.srv_aaaa.test" + ip, port = cli:resolve_address(host) + assert.equal(ip, "[::1:2:3:4]") + assert.is_nil(port) + end) + + it("resolving in correct answers-type family",function() local function config(cli) -- function to insert 2 answerss in the cache local A_entry = {{ @@ -1055,17 +1184,17 @@ describe("[DNS client]", function() ttl = 10, }} -- insert in the cache - cli.cache:set(A_entry[1].name..":all", { ttl=0 }, A_entry) - cli.cache:set(AAAA_entry[1].name..":all", { ttl=0 }, AAAA_entry) + cli.cache:set(A_entry[1].name..":-1", { ttl=0 }, A_entry) + cli.cache:set(AAAA_entry[1].name..":-1", { ttl=0 }, AAAA_entry) end - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"AAAA", "A"} })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"AAAA", "A"} })) config(cli) local ip, err = cli:resolve_address("hello.world.test") assert.same(err, nil) assert.equals(ip, "::1") - local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", order = {"A", "AAAA"} })) + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"A", "AAAA"} })) config(cli) ip = cli:resolve_address("hello.world.test") --assert.equals(ip, "5.6.7.8") diff --git a/spec/01-unit/30-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-dns-client/03-old_client_cache_spec.lua index 9fb6bf755f30..3dd3d773d822 100644 --- a/spec/01-unit/30-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-dns-client/03-old_client_cache_spec.lua @@ -147,7 +147,7 @@ describe("[DNS client cache]", function() } local answers = cli:resolve("myhost1") - assert.equal(answers, cli.cache:get("myhost1:all")) + assert.equal(answers, cli.cache:get("myhost1:-1")) end) it("are stored in cache with type", function() @@ -167,7 +167,7 @@ describe("[DNS client cache]", function() it("are resolved from cache without type", function() mock_records = {} - cli.cache:set("myhost3:all", {ttl=30+4}, {{ + cli.cache:set("myhost3:-1", {ttl=30+4}, {{ type = resolver.TYPE_A, address = "1.2.3.4", class = 1, @@ -179,7 +179,7 @@ describe("[DNS client cache]", function() }) local answers = cli:resolve("myhost3") - assert.same(answers, cli.cache:get("myhost3:all")) + assert.same(answers, cli.cache:get("myhost3:-1")) end) it("are resolved from cache with type", function() @@ -459,7 +459,7 @@ describe("[DNS client cache]", function() assert.equal("127.0.0.1", record[1].address) ngx.sleep(0.2) -- must be > valid_ttl + stale_ttl - record = cli.cache:get("myname.lan:all") + record = cli.cache:get("myname.lan:-1") assert.equal("127.0.0.1", record[1].address) end) end) diff --git a/spec/01-unit/30-dns-client/04-client_ipc_spec.lua b/spec/01-unit/30-dns-client/04-client_ipc_spec.lua index f4ff557b37d0..5ed287def1df 100644 --- a/spec/01-unit/30-dns-client/04-client_ipc_spec.lua +++ b/spec/01-unit/30-dns-client/04-client_ipc_spec.lua @@ -51,13 +51,13 @@ describe("[dns-client] inter-process communication:",function() -- wait background tasks to finish helpers.wait_until(function() - return count_log_lines("stale:broadcast:ipc.test:all") == 1 + return count_log_lines("stale:broadcast:ipc.test:%-1") == 1 end, 5) -- "stale:lru ..." means the progress of the two workers is about the same. -- "first:lru ..." means one of the workers is far behind the other. helpers.wait_until(function() - return count_log_lines(":lru delete:ipc.test:all") == 1 + return count_log_lines(":lru delete:ipc.test:%-1") == 1 end, 5) end) end) diff --git a/spec/01-unit/30-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-dns-client/05-client_stat_spec.lua index 00d3d587c19a..4bf0efd0a46a 100644 --- a/spec/01-unit/30-dns-client/05-client_stat_spec.lua +++ b/spec/01-unit/30-dns-client/05-client_stat_spec.lua @@ -85,8 +85,6 @@ describe("[DNS client stats]", function() ["_ldaps._tcp.srv.test:33"] = { ["query"] = 1, ["query_succ"] = 1, - }, - ["_ldaps._tcp.srv.test:all"] = { ["miss"] = 1, ["runs"] = 1, }, @@ -155,17 +153,16 @@ describe("[DNS client stats]", function() assert.match("^%d+$", query_last_time) assert.same({ - ["hit.test:all"] = { - ["hit_lru"] = 1, - ["runs"] = 3, - ["miss"] = 1, - ["hit_shm"] = 1, - }, ["hit.test:1"] = { ["query"] = 1, ["query_succ"] = 1, }, - ["nameserver_fail.test:all"] = { + ["hit.test:-1"] = { + ["hit_lru"] = 2, + ["miss"] = 1, + ["runs"] = 3, + }, + ["nameserver_fail.test:-1"] = { ["fail"] = 1, ["runs"] = 1, }, @@ -173,7 +170,7 @@ describe("[DNS client stats]", function() ["query"] = 1, ["query_fail_nameserver"] = 1, }, - ["stale.test:all"] = { + ["stale.test:-1"] = { ["miss"] = 2, ["runs"] = 2, ["stale"] = 1, @@ -182,14 +179,18 @@ describe("[DNS client stats]", function() ["query"] = 2, ["query_succ"] = 2, }, + ["empty_result_not_stale.test:-1"] = { + ["miss"] = 2, + ["runs"] = 2, + }, ["empty_result_not_stale.test:1"] = { ["query"] = 2, ["query_fail:empty record received"] = 2, }, - ["empty_result_not_stale.test:all"] = { - ["miss"] = 2, - ["runs"] = 2, - } + ["empty_result_not_stale.test:28"] = { + ["query"] = 2, + ["query_fail:name error"] = 2, + }, }, cli.stats) end) end) diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua index a36d62d1e8b2..55481cbebea6 100644 --- a/spec/02-integration/04-admin_api/26-dns_client_spec.lua +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -7,7 +7,16 @@ for _, strategy in helpers.each_strategy() do local client lazy_setup(function() - helpers.get_db_utils(strategy) + local bp = helpers.get_db_utils(strategy, { + "upstreams", + "targets", + }) + + local upstream = bp.upstreams:insert() + bp.targets:insert({ + upstream = upstream, + target = "_service._proto.srv.test", + }) assert(helpers.start_kong({ database = strategy, @@ -39,7 +48,10 @@ for _, strategy in helpers.each_strategy() do assert(type(json.worker.count) == "number") assert(type(json.stats) == "table") - assert(type(json.stats["127.0.0.1:all"].runs) == "number") + assert(type(json.stats["127.0.0.1:A/AAAA"].runs) == "number") + + assert(type(json.stats) == "table") + assert(type(json.stats["_service._proto.srv.test:SRV"].runs) == "number") end) end) diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 6ddfda8bb55a..68fdbfbcf2bd 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -40,8 +40,7 @@ end function _M.dnsExpire(client, record) local dnscache = client.getcache() dnscache:delete(record[1].name .. ":" .. record[1].type) - dnscache:delete("short:" .. record[1].name .. ":" .. "all") - dnscache:delete(record[1].name .. ":" .. "all") + dnscache:delete(record[1].name .. ":-1") -- A/AAAA record.expire = gettime() - 1 end @@ -85,7 +84,7 @@ function _M.dnsSRV(client, records, staleTtl) -- create key, and insert it local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) - key = records[1].name..":all" + key = records[1].name..":-1" -- A/AAAA dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) return records end @@ -127,7 +126,7 @@ function _M.dnsA(client, records, staleTtl) -- create key, and insert it local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl) - key = records[1].name..":all" + key = records[1].name..":-1" -- A/AAAA dnscache:set(key, records, records[1].ttl) return records end @@ -168,7 +167,7 @@ function _M.dnsAAAA(client, records, staleTtl) -- create key, and insert it local key = records[1].name..":"..records[1].type dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) - key = records[1].name..":all" + key = records[1].name..":-1" -- A/AAAA dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) return records end From 3504b2681939d6fa7edf9275e126e3a742e706d5 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 26 Jun 2024 15:37:12 +0800 Subject: [PATCH 109/126] revert pathes modification for conflicts --- .../{21-legacy-dns-client => 21-dns-client}/01-utils_spec.lua | 0 .../{21-legacy-dns-client => 21-dns-client}/02-client_spec.lua | 0 .../03-client_cache_spec.lua | 0 .../{30-dns-client => 30-new-dns-client}/01-utils_spec.lua | 0 .../{30-dns-client => 30-new-dns-client}/02-old_client_spec.lua | 0 .../03-old_client_cache_spec.lua | 0 .../{30-dns-client => 30-new-dns-client}/04-client_ipc_spec.lua | 0 .../{30-dns-client => 30-new-dns-client}/05-client_stat_spec.lua | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename spec/01-unit/{21-legacy-dns-client => 21-dns-client}/01-utils_spec.lua (100%) rename spec/01-unit/{21-legacy-dns-client => 21-dns-client}/02-client_spec.lua (100%) rename spec/01-unit/{21-legacy-dns-client => 21-dns-client}/03-client_cache_spec.lua (100%) rename spec/01-unit/{30-dns-client => 30-new-dns-client}/01-utils_spec.lua (100%) rename spec/01-unit/{30-dns-client => 30-new-dns-client}/02-old_client_spec.lua (100%) rename spec/01-unit/{30-dns-client => 30-new-dns-client}/03-old_client_cache_spec.lua (100%) rename spec/01-unit/{30-dns-client => 30-new-dns-client}/04-client_ipc_spec.lua (100%) rename spec/01-unit/{30-dns-client => 30-new-dns-client}/05-client_stat_spec.lua (100%) diff --git a/spec/01-unit/21-legacy-dns-client/01-utils_spec.lua b/spec/01-unit/21-dns-client/01-utils_spec.lua similarity index 100% rename from spec/01-unit/21-legacy-dns-client/01-utils_spec.lua rename to spec/01-unit/21-dns-client/01-utils_spec.lua diff --git a/spec/01-unit/21-legacy-dns-client/02-client_spec.lua b/spec/01-unit/21-dns-client/02-client_spec.lua similarity index 100% rename from spec/01-unit/21-legacy-dns-client/02-client_spec.lua rename to spec/01-unit/21-dns-client/02-client_spec.lua diff --git a/spec/01-unit/21-legacy-dns-client/03-client_cache_spec.lua b/spec/01-unit/21-dns-client/03-client_cache_spec.lua similarity index 100% rename from spec/01-unit/21-legacy-dns-client/03-client_cache_spec.lua rename to spec/01-unit/21-dns-client/03-client_cache_spec.lua diff --git a/spec/01-unit/30-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua similarity index 100% rename from spec/01-unit/30-dns-client/01-utils_spec.lua rename to spec/01-unit/30-new-dns-client/01-utils_spec.lua diff --git a/spec/01-unit/30-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua similarity index 100% rename from spec/01-unit/30-dns-client/02-old_client_spec.lua rename to spec/01-unit/30-new-dns-client/02-old_client_spec.lua diff --git a/spec/01-unit/30-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua similarity index 100% rename from spec/01-unit/30-dns-client/03-old_client_cache_spec.lua rename to spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua diff --git a/spec/01-unit/30-dns-client/04-client_ipc_spec.lua b/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua similarity index 100% rename from spec/01-unit/30-dns-client/04-client_ipc_spec.lua rename to spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua diff --git a/spec/01-unit/30-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua similarity index 100% rename from spec/01-unit/30-dns-client/05-client_stat_spec.lua rename to spec/01-unit/30-new-dns-client/05-client_stat_spec.lua From 602ff4f3ea092050e6757a0627941c3002baf184 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 26 Jun 2024 17:14:47 +0800 Subject: [PATCH 110/126] fix health check tests for SRV --- .../05-proxy/10-balancer/01-healthchecks_spec.lua | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua b/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua index 0d3872c093c8..56769c6f26aa 100644 --- a/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua +++ b/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua @@ -38,7 +38,7 @@ for _, strategy in helpers.each_strategy() do } fixtures.dns_mock:SRV { - name = "my.srv.test.test", + name = "_srv._pro.my.srv.test.test", target = "a.my.srv.test.test", port = 80, -- port should fail to connect } @@ -57,7 +57,7 @@ for _, strategy in helpers.each_strategy() do } fixtures.dns_mock:SRV { - name = "srv-changes-port.test", + name = "_srv._pro.srv-changes-port.test", target = "a-changes-port.test", port = 90, -- port should fail to connect } @@ -114,7 +114,7 @@ for _, strategy in helpers.each_strategy() do }) -- the following port will not be used, will be overwritten by -- the mocked SRV record. - bu.add_target(bp, upstream_id, "my.srv.test.test", 80) + bu.add_target(bp, upstream_id, "_srv._pro.my.srv.test.test", 80) local api_host = bu.add_api(bp, upstream_name) bu.end_testcase_setup(strategy, bp) @@ -301,7 +301,7 @@ for _, strategy in helpers.each_strategy() do }) -- the following port will not be used, will be overwritten by -- the mocked SRV record. - bu.add_target(bp, upstream_id, "srv-changes-port.test", 80) + bu.add_target(bp, upstream_id, "_srv._pro.srv-changes-port.test", 80) local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 100, }) bu.end_testcase_setup(strategy, bp) @@ -328,7 +328,7 @@ for _, strategy in helpers.each_strategy() do assert.equals("UNHEALTHY", health.data[1].health) assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health) - local status = bu.put_target_address_health(upstream_id, "srv-changes-port.test:80", "a-changes-port.test:90", "healthy") + local status = bu.put_target_address_health(upstream_id, "_srv._pro.srv-changes-port.test:80", "a-changes-port.test:90", "healthy") assert.same(204, status) end, 15) @@ -1780,7 +1780,7 @@ for _, strategy in helpers.each_strategy() do for i = 1, 3 do hosts[i] = { - hostname = bu.gen_multi_host(), + hostname = "_srv._pro." .. bu.gen_multi_host(), port1 = helpers.get_available_port(), port2 = helpers.get_available_port(), } From 654c776d084512d03ea3b33da3ad5bffd9805b60 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 28 Jun 2024 18:45:03 +0800 Subject: [PATCH 111/126] fix /status/dns test cases --- .../04-admin_api/26-dns_client_spec.lua | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua index 55481cbebea6..3ef49556376d 100644 --- a/spec/02-integration/04-admin_api/26-dns_client_spec.lua +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -50,8 +50,18 @@ for _, strategy in helpers.each_strategy() do assert(type(json.stats) == "table") assert(type(json.stats["127.0.0.1:A/AAAA"].runs) == "number") - assert(type(json.stats) == "table") - assert(type(json.stats["_service._proto.srv.test:SRV"].runs) == "number") + -- Wait for the upstream target to be updated in the background + helpers.wait_until(function () + local res = assert(client:send { + method = "GET", + path = "/status/dns", + headers = { ["Content-Type"] = "application/json" } + }) + + local body = assert.res_status(200 , res) + local json = cjson.decode(body) + return type(json.stats["_service._proto.srv.test:SRV"]) == "table" + end, 5) end) end) From d0d196c99cd9aca4dceb0161fb3d3cf88a539d9c Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Jul 2024 16:20:59 +0800 Subject: [PATCH 112/126] chores(dns): fixed coding style --- kong/dns/client.lua | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 33c23ed5574d..6d3b5c19b568 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -28,6 +28,7 @@ local is_srv = utils.is_srv local parse_hosts = utils.parse_hosts local ipv6_bracket = utils.ipv6_bracket local search_names = utils.search_names +local parse_resolv_conf = utils.parse_resolv_conf local get_next_round_robin_answer = utils.get_next_round_robin_answer local get_next_weighted_round_robin_answer = utils.get_next_weighted_round_robin_answer @@ -85,7 +86,7 @@ local _M = { TYPE_A = TYPE_A, TYPE_AAAA = TYPE_AAAA, } -local MT = { __index = _M } +local MT = { __index = _M, } local TRIES_MT = { __tostring = cjson.encode, } @@ -156,7 +157,7 @@ function _M.new(opts) local enable_ipv4, enable_ipv6, enable_srv - for i, typstr in ipairs(opts.family or DEFAULT_FAMILY) do + for _, typstr in ipairs(opts.family or DEFAULT_FAMILY) do typstr = typstr:upper() if typstr == "A" then @@ -177,7 +178,7 @@ function _M.new(opts) enable_ipv4 and "ipv4 " or "", enable_ipv6 and "ipv6 " or "") -- parse resolv.conf - local resolv, err = utils.parse_resolv_conf(opts.resolv_conf, opts.enable_ipv6) + local resolv, err = parse_resolv_conf(opts.resolv_conf, opts.enable_ipv6) if not resolv then log(WARN, PREFIX, "Invalid resolv.conf: ", err) resolv = { options = {} } @@ -222,7 +223,7 @@ function _M.new(opts) return end - local cwid = worker_id() + local cwid = worker_id() or -1 for _, ev in pairs(events) do local handler = function(data, event, source, wid) if cwid ~= wid then -- Current worker has handled this event. @@ -251,6 +252,7 @@ function _M.new(opts) local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { ipc = ipc, neg_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, + -- 10000 is a reliable and tested value from the original library. lru_size = opts.cache_size or 10000, shm_locks = ngx.shared.kong_locks and "kong_locks", resty_lock_opts = resty_lock_opts, @@ -365,12 +367,12 @@ local function resolve_query(self, name, qtype, tries) stats_set_count(self.stats, key, "query_last_time", duration) log(DEBUG, PREFIX, "r:query(", key, ") ans:", answers and #answers or "-", - " t:", duration, " ms") + " t:", duration, " ms") -- network error or malformed DNS response if not answers then stats_increment(self.stats, key, "query_fail_nameserver") - err = "DNS server error: " .. tostring(err) .. ", took " .. duration .. " ms" + err = "DNS server error: " .. tostring(err) .. ", took " .. duration .. " ms" table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) return nil, err end @@ -468,6 +470,7 @@ end local function check_and_get_ip_answers(name) + -- TODO: use is_valid_ipv4 from kong/tools/ip.lua instead if name:match("^%d+%.%d+%.%d+%.%d+$") then -- IPv4 return { { name = name, class = 1, type = TYPE_A, address = name }, @@ -563,7 +566,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, has_timing) stats_increment(self.stats, key, hit_str) log(DEBUG, PREFIX, "cache lookup ", key, " ans:", answers and #answers or "-", - " hlv:", hit_str) + " hlv:", hit_str) if has_timing then req_dyn_hook_run_hook("timing", "dns:cache_lookup", @@ -592,11 +595,8 @@ function _M:resolve_address(name, port, cache_only, tries) local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, has_timing) - if not answers then - return nil, err, tries - end - if answers and answers[1].type == TYPE_SRV then + if answers and answers[1] and answers[1].type == TYPE_SRV then local answer = get_next_weighted_round_robin_answer(answers) port = (answer.port ~= 0 and answer.port) or port answers, err, tries = resolve_all(self, answer.target, TYPE_A_AAAA, From 350f2bd68e4afd38d49b5d194a7f3569362a4261 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 1 Jul 2024 17:10:49 +0800 Subject: [PATCH 113/126] chores(dns): fixed coding style: MT -> _MT --- kong/dns/client.lua | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 6d3b5c19b568..78048418342a 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -86,10 +86,10 @@ local _M = { TYPE_A = TYPE_A, TYPE_AAAA = TYPE_AAAA, } -local MT = { __index = _M, } +local _MT = { __index = _M, } -local TRIES_MT = { __tostring = cjson.encode, } +local _TRIES_MT = { __tostring = cjson.encode, } local function stats_init_name(stats, name) @@ -292,7 +292,7 @@ function _M.new(opts) errstr = EMPTY_RECORD_ERROR_MESSAGE, ttl = opts.error_ttl or DEFAULT_ERROR_TTL, }, - }, MT) + }, _MT) end @@ -447,7 +447,7 @@ local function stale_update_task(premature, self, key, name, qtype) return end - local tries = setmetatable({}, TRIES_MT) + local tries = setmetatable({}, _TRIES_MT) local answers = resolve_query_types(self, name, qtype, tries) if answers and not answers.errcode then log(DEBUG, PREFIX, "update stale DNS records: ", #answers) @@ -541,7 +541,7 @@ end local function resolve_all(self, name, qtype, cache_only, tries, has_timing) name = string_lower(name) - tries = setmetatable(tries or {}, TRIES_MT) + tries = setmetatable(tries or {}, _TRIES_MT) if not qtype then qtype = ((self.enable_srv and is_srv(name)) and TYPE_SRV or TYPE_A_AAAA) From 91b1f2d8fe8f6766c14881a473f5bb00664f8740 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Tue, 2 Jul 2024 14:07:01 +0800 Subject: [PATCH 114/126] @chobits chores(dns): fixed coding style: remove () from srv port Co-authored-by: Chrono --- kong/dns/client.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 78048418342a..b9234732c0fd 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -598,7 +598,7 @@ function _M:resolve_address(name, port, cache_only, tries) if answers and answers[1] and answers[1].type == TYPE_SRV then local answer = get_next_weighted_round_robin_answer(answers) - port = (answer.port ~= 0 and answer.port) or port + port = answer.port ~= 0 and answer.port or port answers, err, tries = resolve_all(self, answer.target, TYPE_A_AAAA, cache_only, tries, has_timing) end From b6955f0970a52de691c9597632c8aa73b28b7ea4 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 5 Jul 2024 15:05:15 +0800 Subject: [PATCH 115/126] chores(dns): fix coding style Co-authored-by: Qi --- kong/dns/client.lua | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index b9234732c0fd..e6cc4a918db9 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -449,13 +449,13 @@ local function stale_update_task(premature, self, key, name, qtype) local tries = setmetatable({}, _TRIES_MT) local answers = resolve_query_types(self, name, qtype, tries) - if answers and not answers.errcode then - log(DEBUG, PREFIX, "update stale DNS records: ", #answers) - self.cache:set(key, { ttl = answers.ttl }, answers) - - else + if not answers or answers.errcode then log(DEBUG, PREFIX, "failed to update stale DNS records: ", tostring(tries)) + return end + + log(DEBUG, PREFIX, "update stale DNS records: ", #answers) + self.cache:set(key, { ttl = answers.ttl }, answers) end From 67813e2ee1bf0b27dfb4dbcb046d980554f2460d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 5 Jul 2024 15:55:34 +0800 Subject: [PATCH 116/126] chores(test): fix typo, return `ttl` instead of `tries` --- kong/dns/client.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index e6cc4a918db9..27f806ce205c 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -493,7 +493,7 @@ local function resolve_callback(self, name, qtype, cache_only, tries) if answers then -- domain name is IP literal answers.ttl = LONG_LASTING_TTL answers.expire = now() + answers.ttl - return answers, nil, tries + return answers, nil, answers.ttl end -- check if this key exists in the hosts file (it maybe evicted from cache) From dd26cf2634a1cd9ace3ed063848e11f090c1028a Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Sun, 7 Jul 2024 18:30:10 +0800 Subject: [PATCH 117/126] fix conflicts: remove modification in test: 01-instrumentations_spec.lua --- .../14-observability/01-instrumentations_spec.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/02-integration/14-observability/01-instrumentations_spec.lua b/spec/02-integration/14-observability/01-instrumentations_spec.lua index 0d9af1927995..781c85cd8fb2 100644 --- a/spec/02-integration/14-observability/01-instrumentations_spec.lua +++ b/spec/02-integration/14-observability/01-instrumentations_spec.lua @@ -524,7 +524,7 @@ for _, strategy in helpers.each_strategy() do -- intentionally trigger a DNS query error local service = bp.services:insert({ name = "inexist-host-service", - host = "really-inexist-host.test", + host = "really-inexist-host", port = 80, }) @@ -558,7 +558,7 @@ for _, strategy in helpers.each_strategy() do local dns_spans = assert_has_spans("kong.dns", spans) local upstream_dns for _, dns_span in ipairs(dns_spans) do - if dns_span.attributes["dns.record.domain"] == "really-inexist-host.test" then + if dns_span.attributes["dns.record.domain"] == "really-inexist-host" then upstream_dns = dns_span break end From b69ec7fede8c35a71421a038d0c8ebd881021435 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Sun, 7 Jul 2024 20:01:41 +0800 Subject: [PATCH 118/126] fix conflicts and its tests --- .../14-observability/01-instrumentations_spec.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/02-integration/14-observability/01-instrumentations_spec.lua b/spec/02-integration/14-observability/01-instrumentations_spec.lua index 781c85cd8fb2..0d9af1927995 100644 --- a/spec/02-integration/14-observability/01-instrumentations_spec.lua +++ b/spec/02-integration/14-observability/01-instrumentations_spec.lua @@ -524,7 +524,7 @@ for _, strategy in helpers.each_strategy() do -- intentionally trigger a DNS query error local service = bp.services:insert({ name = "inexist-host-service", - host = "really-inexist-host", + host = "really-inexist-host.test", port = 80, }) @@ -558,7 +558,7 @@ for _, strategy in helpers.each_strategy() do local dns_spans = assert_has_spans("kong.dns", spans) local upstream_dns for _, dns_span in ipairs(dns_spans) do - if dns_span.attributes["dns.record.domain"] == "really-inexist-host" then + if dns_span.attributes["dns.record.domain"] == "really-inexist-host.test" then upstream_dns = dns_span break end From 97705b9c9a9c6b8093f61a7f5bfa16c63f0b3648 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Mon, 8 Jul 2024 10:58:51 +0800 Subject: [PATCH 119/126] chores(dns/README.md): fixed types --- kong/dns/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index 61fa14132674..90126aebceb6 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -44,8 +44,8 @@ Performs a series of initialization operations: * the path of `hosts` file. * `resolv_conf`: (default: `/etc/resolv.conf`) * the path of `resolv.conf` file, it will be parsed and passed into the underlying `lua-resty-dns` library. -* `order`: (default: `{ "SRV", "A", "AAAA" }`) - * the order in which to resolve different record types, it's similar to the option `dns_order` in `kong.conf`. +* `family`: (default: `{ "SRV", "A", "AAAA" }`) + * the types of DNS records that the library should query, it is taken from `kong.conf` option `dns_family`. * options for the underlying `lua-resty-dns` library: * `retrans`: (default: `5`) * the total number of times of retransmitting the DNS request when receiving a DNS response times out according to the timeout setting. When trying to retransmit the query, the next nameserver according to the round-robin algorithm will be picked up. From 28770c01187e405d6bbcb27d5cb4d6adc537c370 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 11 Jul 2024 14:40:04 +0800 Subject: [PATCH 120/126] perf(dns): reduce table creation --- kong/dns/utils.lua | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 3158c5ce945e..1eb8ec9d357e 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -8,6 +8,7 @@ local type = type local ipairs = ipairs local tonumber = tonumber local math_random = math.random +local table_clear = require("table.clear") local table_insert = table.insert local table_remove = table.remove @@ -257,7 +258,8 @@ do for _, answer in ipairs(answers) do if answer.priority < lowest_priority then lowest_priority = answer.priority - l = { answer } + table_clear(l) + l[1] = answer elseif answer.priority == lowest_priority then table_insert(l, answer) From 593f4ed1ebb463b3c75cbee5e9c3f75ee2eb8135 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 12 Jul 2024 12:43:57 +0800 Subject: [PATCH 121/126] fixed coding styles: add more blanks and rename some variables --- kong/dns/client.lua | 13 +++++++------ kong/dns/utils.lua | 20 ++++++++++++++++++-- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 27f806ce205c..d7145f416603 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -49,13 +49,13 @@ local DEFAULT_FAMILY = { "SRV", "A", "AAAA" } local TYPE_SRV = resolver.TYPE_SRV local TYPE_A = resolver.TYPE_A local TYPE_AAAA = resolver.TYPE_AAAA -local TYPE_A_AAAA = -1 -- used to resolve IP addresses for SRV targets +local TYPE_A_OR_AAAA = -1 -- used to resolve IP addresses for SRV targets local TYPE_TO_NAME = { [TYPE_SRV] = "SRV", [TYPE_A] = "A", [TYPE_AAAA] = "AAAA", - [TYPE_A_AAAA] = "A/AAAA", + [TYPE_A_OR_AAAA] = "A/AAAA", } local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale @@ -124,7 +124,7 @@ local init_hosts do } hosts_cache[name .. ":" .. qtype] = answers - hosts_cache[name .. ":" .. TYPE_A_AAAA] = answers + hosts_cache[name .. ":" .. TYPE_A_OR_AAAA] = answers end -- insert hosts into cache @@ -422,7 +422,7 @@ local function resolve_query_types(self, name, qtype, tries) local answers, err, ttl -- the specific type - if qtype ~= TYPE_A_AAAA then + if qtype ~= TYPE_A_OR_AAAA then return resolve_query_names(self, names, qtype, tries) end @@ -525,6 +525,7 @@ local function resolve_callback(self, name, qtype, cache_only, tries) -- mlcache's internal lock mechanism ensures concurrent control start_stale_update_task(self, key, name, qtype) answers.ttl = ttl + return answers, nil, ttl end end @@ -544,7 +545,7 @@ local function resolve_all(self, name, qtype, cache_only, tries, has_timing) tries = setmetatable(tries or {}, _TRIES_MT) if not qtype then - qtype = ((self.enable_srv and is_srv(name)) and TYPE_SRV or TYPE_A_AAAA) + qtype = ((self.enable_srv and is_srv(name)) and TYPE_SRV or TYPE_A_OR_AAAA) end local key = name .. ":" .. qtype @@ -599,7 +600,7 @@ function _M:resolve_address(name, port, cache_only, tries) if answers and answers[1] and answers[1].type == TYPE_SRV then local answer = get_next_weighted_round_robin_answer(answers) port = answer.port ~= 0 and answer.port or port - answers, err, tries = resolve_all(self, answer.target, TYPE_A_AAAA, + answers, err, tries = resolve_all(self, answer.target, TYPE_A_OR_AAAA, cache_only, tries, has_timing) end diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua index 1eb8ec9d357e..32a67c805fea 100644 --- a/kong/dns/utils.lua +++ b/kong/dns/utils.lua @@ -40,7 +40,7 @@ function _M.hostname_type(name) return "ipv4" end - return "name" + return "domain" end @@ -50,7 +50,7 @@ end -- @return `name/ip` + `port (or nil)` + `type ("ipv4", "ipv6" or "name")` function _M.parse_hostname(name) local t = _M.hostname_type(name) - if t == "ipv4" or t == "name" then + if t == "ipv4" or t == "domain" then local ip, port = name:match("^([^:]+)%:*(%d*)$") return ip, tonumber(port), t end @@ -69,6 +69,7 @@ local function get_lines(path) if type(path) == "table" then return path end + return readlines(path) end @@ -89,20 +90,24 @@ function _M.parse_hosts(path, enable_ipv6) if part:sub(1, 1) == '#' then break end + table_insert(parts, part:lower()) end -- Check if the line contains an IP address followed by hostnames if #parts >= 2 then local ip, _, family = _M.parse_hostname(parts[1]) + if family ~= "name" then -- ipv4/ipv6 for i = 2, #parts do local host = parts[i] local v = hosts[host] + if not v then v = {} hosts[host] = v end + v[family] = v[family] or ip -- prefer to use the first ip end end @@ -160,6 +165,7 @@ function _M.parse_resolv_conf(path, enable_ipv6) resolv.nameservers = nameservers end + return resolv end @@ -168,7 +174,9 @@ function _M.is_fqdn(name, ndots) if name:sub(-1) == "." then return true end + local _, dot_count = name:gsub("%.", "") + return (dot_count >= ndots) end @@ -188,6 +196,7 @@ function _M.search_names(name, resolv, hosts) end local names = {} + for _, suffix in ipairs(resolv.search) do table_insert(names, name .. "." .. suffix) end @@ -203,6 +212,7 @@ function _M.ipv6_bracket(name) if name:match("^[^[].*:") then -- not start with '[' and contains ':' return "[" .. name .. "]" end + return name end @@ -211,6 +221,7 @@ end function _M.get_next_round_robin_answer(answers) answers.last = (answers.last or 0) % #answers + 1 + return answers[answers.last] end @@ -225,14 +236,18 @@ do -- 0.1 gives weight 0 record a minimal chance of being chosen (rfc 2782) local w = (answer.weight == 0) and 0.1 or answer.weight local cw = answer.cw + w + answer.cw = cw + if not best or cw > best.cw then best = answer end + total = total + w end best.cw = best.cw - total + return best end @@ -267,6 +282,7 @@ do end answers.lowest_prio_records = l + return l end From b0d545514900807a9a98232340efc4c44f411b9d Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 12 Jul 2024 14:48:03 +0800 Subject: [PATCH 122/126] add option:random_resolver and fixed docs --- kong/dns/README.md | 26 ++++++++++++------- kong/dns/client.lua | 12 +++++++-- .../30-new-dns-client/02-old_client_spec.lua | 21 +++++++++++++-- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index 90126aebceb6..b77fc735acb5 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -1,7 +1,7 @@ Name ==== -Kong DNS client - The module is currently Kong only, and builds on top of the `lua-resty-dns` and `lua-resty-mlcache` libraries. +Kong DNS client - The module is currently only used by Kong, and builds on top of the `lua-resty-dns` and `lua-resty-mlcache` libraries. Table of Contents ================= @@ -37,9 +37,13 @@ Performs a series of initialization operations: `@opts` It accepts a options table argument. The following options are supported: * TTL options: - * `valid_ttl`: same to the option `dns_valid_ttl` in `kong.conf`. - * `stale_ttl`: same to the option `dns_stale_ttl` in `kong.conf`. - * `error_ttl`: same to the option `dns_error_ttl` in `kong.conf`. + * `valid_ttl`: (default: `nil`) + * By default, it caches answers using the TTL value of a response. This optional parameter (in seconds) allows overriding it. + * `stale_ttl`: (default: `3600`) + * the time in seconds for keeping expired DNS records. + * Stale data remains in use from when a record expires until either the background refresh query completes or until `stale_ttl` seconds have passed. This helps Kong stay resilient if the DNS server is temporarily unavailable. + * `error_ttl`: (default: `1`) + * the time in seconds for caching DNS error responses. * `hosts`: (default: `/etc/hosts`) * the path of `hosts` file. * `resolv_conf`: (default: `/etc/resolv.conf`) @@ -53,12 +57,14 @@ Performs a series of initialization operations: * `timeout`: (default: `2000`) * the time in milliseconds for waiting for the response for a single attempt of request transmission. * If not given, it is taken from `resolv.conf` option `options timeout:`. But note that its unit in `resolv.conf` is second. - * `no_random`: (default: `true`) - * a boolean flag controls whether to randomly pick the nameserver to query first. If `true`, it always starts with the first nameserver listed. - * If not given, it is taken from `resolv.conf` option `rotate` (inverted). + * `random_resolver`: (default: `false`) + * a boolean flag controls whether to randomly pick the nameserver to query first. If `true`, it will always start with the random nameserver. + * If not given, it is taken from `resolv.conf` option `rotate`. * `nameservers`: - * a list of nameservers to be used. Each nameserver entry can be either a single hostname string or a table holding both the hostname string and the port number. For exmaple, `{"8.8.8.8", {"8.8.4.4", 53} }`. + * a list of nameservers to be used. Each nameserver entry can be either a single hostname string or a table holding both the hostname string and the port number. For example, `{"8.8.8.8", {"8.8.4.4", 53} }`. * If not given, it is taken from `resolv.conf` option `nameserver`. +* `cache_purge`: (default: `false`) + * a boolean flag controls whether to clear the internal cache shared by other DNS client instances across workers. [Back to TOC](#table-of-contents) @@ -84,8 +90,10 @@ Performs a DNS resolution. * Return value `answers, err`: * Return one array-like Lua table contains all the records. + * For example, `{{"address":"[2001:db8:3333:4444:5555:6666:7777:8888]","class":1,"name":"example.test","ttl":30,"type":28},{"address":"192.168.1.1","class":1,"name":"example.test","ttl":30,"type":1},"expire":1720765379,"ttl":30}`. + * IPv6 addresses are enclosed in brackets (`[]`). * If the server returns a non-zero error code, it will return `nil` and a string describing the error in this record. - * For exmaple, `nil, "dns server error: name error"`, the server returned a result with error code 3 (NXDOMAIN). + * For example, `nil, "dns server error: name error"`, the server returned a result with error code 3 (NXDOMAIN). * In case of severe errors, such network error or server's malformed DNS record response, it will return `nil` and a string describing the error instead. For example: * `nil, "dns server error: failed to send request to UDP server 10.0.0.1:53: timeout"`, there was a network issue. * Return value and input parameter `@tries?`: diff --git a/kong/dns/client.lua b/kong/dns/client.lua index d7145f416603..83286599cc76 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -40,7 +40,7 @@ local req_dyn_hook_run_hook = require("kong.dynamic_hook").run_hook local PREFIX = "[dns_client] " local DEFAULT_ERROR_TTL = 1 -- unit: second -local DEFAULT_STALE_TTL = 4 +local DEFAULT_STALE_TTL = 3600 -- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 @@ -193,10 +193,18 @@ function _M.new(opts) log(WARN, PREFIX, "Invalid configuration, no nameservers specified") end + local no_random + + if opts.random_resolver == nil then + no_random = not resolv.options.rotate + else + no_random = not opts.random_resolver + end + local r_opts = { retrans = opts.retrans or resolv.options.attempts or 5, timeout = opts.timeout or resolv.options.timeout or 2000, -- ms - no_random = opts.no_random or not resolv.options.rotate, + no_random = no_random, nameservers = nameservers, } diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 7ad258541be6..9df473116950 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -119,6 +119,23 @@ describe("[DNS client]", function() describe("initialization", function() + it("check special opts", function() + local opts = { + hosts = "non/existent/hosts", + resolv_conf = "non/exitent/resolv.conf", + retrans = 4, + timeout = 5000, + random_resolver = true, + nameservers = {"1.1.1.1", {"2.2.2.2", 53}}, + } + + local cli = assert(client.new(opts)) + + assert.same(opts.retrans, cli.r_opts.retrans) + assert.same(opts.timeout, cli.r_opts.timeout) + assert.same(not opts.random_resolver, cli.r_opts.no_random) + assert.same(opts.nameservers, cli.r_opts.nameservers) + end) it("succeeds if hosts/resolv.conf fails", function() local cli, err = client.new({ @@ -1094,7 +1111,7 @@ describe("[DNS client]", function() assert.is_nil(port) end) - it("SRV whole process: SRV -> A #ttt",function() + it("SRV whole process: SRV -> A",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local ip, port, host @@ -1130,7 +1147,7 @@ describe("[DNS client]", function() assert.is_nil(port) end) - it("SRV whole process: SRV -> A failed -> AAAA #ttt",function() + it("SRV whole process: SRV -> A failed -> AAAA",function() local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) local ip, port, host From 7ce9599e1f1bec5d3905105098189042ccd81fee Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 12 Jul 2024 15:21:22 +0800 Subject: [PATCH 123/126] change seperator from `:` to `|` in the output of API /status/dns --- kong/dns/client.lua | 6 +++--- spec/02-integration/04-admin_api/26-dns_client_spec.lua | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 83286599cc76..05eb0d1b84ad 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -657,10 +657,10 @@ function _M.toip(name, port, cache_only, tries) end --- "_ldap._tcp.example.com:33" -> "_ldap._tcp.example.com:SRV" +-- "_ldap._tcp.example.com:33" -> "_ldap._tcp.example.com|SRV" local function format_key(key) - local qname, qtype = key:match("([^:]+):(%-?%d+)") -- match "(qname):(qtype)" - return qtype and qname .. ":" .. (TYPE_TO_NAME[tonumber(qtype)] or qtype) + local qname, qtype = key:match("^(.+):(%-?%d+)$") -- match "(qname):(qtype)" + return qtype and qname .. "|" .. (TYPE_TO_NAME[tonumber(qtype)] or qtype) or key end diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua index 3ef49556376d..036671732a8a 100644 --- a/spec/02-integration/04-admin_api/26-dns_client_spec.lua +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -48,7 +48,7 @@ for _, strategy in helpers.each_strategy() do assert(type(json.worker.count) == "number") assert(type(json.stats) == "table") - assert(type(json.stats["127.0.0.1:A/AAAA"].runs) == "number") + assert(type(json.stats["127.0.0.1|A/AAAA"].runs) == "number") -- Wait for the upstream target to be updated in the background helpers.wait_until(function () @@ -60,7 +60,7 @@ for _, strategy in helpers.each_strategy() do local body = assert.res_status(200 , res) local json = cjson.decode(body) - return type(json.stats["_service._proto.srv.test:SRV"]) == "table" + return type(json.stats["_service._proto.srv.test|SRV"]) == "table" end, 5) end) end) From 37cf30ded68c9e8c6a95e32e12e1b007d07b5c89 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 12 Jul 2024 16:09:57 +0800 Subject: [PATCH 124/126] add a TODO for more structured `tries` --- kong/dns/client.lua | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 05eb0d1b84ad..64d8b1be4e46 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -381,7 +381,11 @@ local function resolve_query(self, name, qtype, tries) if not answers then stats_increment(self.stats, key, "query_fail_nameserver") err = "DNS server error: " .. tostring(err) .. ", took " .. duration .. " ms" + + -- TODO: make the error more structured, like: + -- { qname = name, qtype = qtype, error = err, } or something similar table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) + return nil, err end From 48994bc9354603e42a646a2884f60bbd4a8def97 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 12 Jul 2024 17:48:48 +0800 Subject: [PATCH 125/126] doc: perf test for memory consumption --- kong/dns/README.md | 34 ++++++++++++++++++++++++++++++++++ kong/templates/nginx_kong.lua | 2 +- spec/fixtures/shared_dict.lua | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/kong/dns/README.md b/kong/dns/README.md index b77fc735acb5..8597627bda23 100644 --- a/kong/dns/README.md +++ b/kong/dns/README.md @@ -11,6 +11,8 @@ Table of Contents * [new](#new) * [resolve](#resolve) * [resolve_address](#resolve_address) +* [Performance characteristics](#performance-characteristics) + * [Memory](#memory) # APIs @@ -138,3 +140,35 @@ When calling multiple times on cached records, it will apply load-balancing base * control whether to solely retrieve data from the internal cache without querying to the nameserver. [Back to TOC](#table-of-contents) + +# Performance characteristics + +## Memory + +We evaluated the capacity of DNS records using the following resources: + +* Shared memory size: + * 5 MB (by default): `lua_shared_dict kong_dns_cache 5m`. + * 10 MB: `lua_shared_dict kong_dns_cache 10m`. +* DNS response: + * Each DNS resolution response contains some number of A type records. + * Record: ~80 bytes json string, e.g., `{address = "127.0.0.1", name = , ttl = 3600, class = 1, type = 1}`. + * Domain: ~36 bytes string, e.g., `example.long.long.long.long.test`. Domain names with lengths between 10 and 36 bytes yield similar results. + +The results of ) are as follows: + +| shared memory size | number of records per response | number of loaded responses | +|--------------------|-------------------|----------| +| 5 MB | 1 | 20224 | +| 5 MB | 2 ~ 3 | 10081 | +| 5 MB | 4 ~ 9 | 5041 | +| 5 MB | 10 ~ 20 | 5041 | +| 5 MB | 21 ~ 32 | 1261 | +| 10 MB | 1 | 40704 | +| 10 MB | 2 ~ 3 | 20321 | +| 10 MB | 4 ~ 9 | 10161 | +| 10 MB | 10 ~ 20 | 5081 | +| 10 MB | 20 ~ 32 | 2541 | + + +[Back to TOC](#table-of-contents) diff --git a/kong/templates/nginx_kong.lua b/kong/templates/nginx_kong.lua index af8035350b24..c42868b2139e 100644 --- a/kong/templates/nginx_kong.lua +++ b/kong/templates/nginx_kong.lua @@ -24,7 +24,7 @@ lua_shared_dict kong_db_cache_miss 12m; lua_shared_dict kong_secrets 5m; > if not legacy_dns_client then -lua_shared_dict kong_dns_cache 12m; +lua_shared_dict kong_dns_cache 5m; > end underscores_in_headers on; diff --git a/spec/fixtures/shared_dict.lua b/spec/fixtures/shared_dict.lua index 17ab30ba0b6f..fe0691d0a138 100644 --- a/spec/fixtures/shared_dict.lua +++ b/spec/fixtures/shared_dict.lua @@ -13,7 +13,7 @@ local dicts = { "kong_db_cache_2 16m", "kong_db_cache_miss 12m", "kong_db_cache_miss_2 12m", - "kong_dns_cache 12m", + "kong_dns_cache 5m", "kong_mock_upstream_loggers 10m", "kong_secrets 5m", "test_vault 5m", From 1f0bc17dc388a8285cb6e4993e2d6ce0533dfe6f Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 12 Jul 2024 19:56:32 +0800 Subject: [PATCH 126/126] stale_ttl: fix expired time caculation --- kong/dns/client.lua | 24 +++++++++---------- .../30-new-dns-client/02-old_client_spec.lua | 17 +++++++------ .../03-old_client_cache_spec.lua | 19 +++++++-------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 64d8b1be4e46..35f21a3eba34 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -518,27 +518,27 @@ local function resolve_callback(self, name, qtype, cache_only, tries) -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then -- initiates an asynchronous background updating task to refresh it. local ttl, _, answers = self.cache:peek(key, true) - if answers and not answers.errcode and ttl then - if not answers.expired then - answers.expire = now() + ttl - answers.expired = true - ttl = ttl + self.stale_ttl - else - ttl = ttl + (answers.expire - now()) + if answers and not answers.errcode and self.stale_ttl and ttl then + + -- `_expire_at` means the final expiration time of stale records + if not answers._expire_at then + answers._expire_at = answers.expire + self.stale_ttl end -- trigger the update task by the upper caller every 60 seconds - ttl = math_min(ttl, 60) + local remaining_stale_ttl = math_min(answers._expire_at - now(), 60) - if ttl > 0 then - log(DEBUG, PREFIX, "start stale update task ", key, " ttl:", ttl) + if remaining_stale_ttl > 0 then + log(DEBUG, PREFIX, "start stale update task ", key, + " remaining_stale_ttl:", remaining_stale_ttl) -- mlcache's internal lock mechanism ensures concurrent control start_stale_update_task(self, key, name, qtype) - answers.ttl = ttl + answers.ttl = remaining_stale_ttl + answers.expire = remaining_stale_ttl + now() - return answers, nil, ttl + return answers, nil, remaining_stale_ttl end end diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 9df473116950..b91319564fa4 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -1292,7 +1292,7 @@ describe("[DNS client]", function() assert.are.equal(NOT_FOUND_ERROR, err2) answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) assert.equal(answers1, answers2) - assert.falsy(answers2.expired) + assert.falsy(answers2._expire_at) -- wait for expiry of ttl and retry, it will not use the cached one -- because the cached one contains no avaible IP addresses @@ -1303,7 +1303,7 @@ describe("[DNS client]", function() assert.are.equal(NOT_FOUND_ERROR, err2) answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) - assert.falsy(answers2.expired) -- refreshed record + assert.falsy(answers2._expire_at) -- refreshed record -- wait for expiry of stale_ttl and retry, should be called twice now ngx.sleep(0.75 * stale_ttl) @@ -1315,7 +1315,7 @@ describe("[DNS client]", function() answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) assert.not_equal(answers1, answers2) - assert.falsy(answers2.expired) -- new answers, not expired + assert.falsy(answers2._expire_at) -- new answers, not expired end) it("verifies stale_ttl for available records", function() @@ -1345,7 +1345,7 @@ describe("[DNS client]", function() answers1 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.same(answers1[1].address, "1.1.1.1") assert.are.equal(call_count, 1) - assert.falsy(answers1.expired) + assert.falsy(answers1._expire_at) -- try again, HIT from cache, not stale answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) @@ -1359,10 +1359,9 @@ describe("[DNS client]", function() assert.are.equal(call_count, 1) -- todo: flakiness answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) - assert.is_true(answers2.expired) - answers2.expired = nil -- clear to be same with answers1 + assert(answers2._expire_at) + answers2._expire_at = nil -- clear to be same with answers1 assert_same_answers(answers1, answers2) - answers2.expired = true -- async stale updating task ngx.sleep(0.1 * stale_ttl) @@ -1372,7 +1371,7 @@ describe("[DNS client]", function() answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.same(answers2[1].address, "1.1.1.1") assert.are.equal(call_count, 2) - assert.falsy(answers2.expired) + assert.falsy(answers2._expire_at) -- The stale one will be completely eliminated from the cache. ngx.sleep(ttl + stale_ttl) @@ -1380,7 +1379,7 @@ describe("[DNS client]", function() answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.same(answers2[1].address, "1.1.1.1") assert.are.equal(call_count, 3) - assert.falsy(answers2.expired) + assert.falsy(answers2._expire_at) end) describe("verifies the polling of dns queries, retries, and wait times", function() diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 3dd3d773d822..eac3c53e55c8 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -232,10 +232,9 @@ describe("[DNS client cache]", function() -- background refresh query local answers2 = cli:resolve("myhost6") assert.falsy(answers2[1].tag) - assert.is_true(answers2.expired) -- stale; marked as expired - answers2.expired = nil + assert.is_number(answers2._expire_at) -- stale; marked as expired + answers2._expire_at = nil assert_same_answers(answers2, answers) - answers2.expired = true -- wait for the refresh to complete. Ensure that the sleeping time is less -- than ttl, avoiding the updated record from becoming stale again. @@ -244,7 +243,7 @@ describe("[DNS client cache]", function() -- resolve and check whether we got the new record from the mock copy local answers3 = cli:resolve("myhost6") assert.equal(answers3[1].tag, "new") - assert.falsy(answers3.expired) + assert.falsy(answers3._expired_at) assert.not_equal(answers, answers3) -- must be a different record now assert_same_answers(answers3, mock_records["myhost6.domain.test:"..resolver.TYPE_A]) @@ -345,15 +344,15 @@ describe("[DNS client cache]", function() } -- doing a resolve will trigger the background query now answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) - assert.is_true(answers.expired) -- we get the stale record, now marked as expired + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired -- wait again for the background query to complete sleep(0.1) -- background resolve is now complete, check the cache, it should still have the -- stale record, and it should not have been replaced by the error -- answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) - assert.is_true(answers.expired) - answers.expired = nil + assert.is_number(answers._expire_at) + answers._expire_at = nil assert_same_answers(rec1, answers) end) @@ -383,14 +382,14 @@ describe("[DNS client cache]", function() } -- doing a resolve will trigger the background query now answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) - assert.is_true(answers.expired) -- we get the stale record, now marked as expired + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired -- wait again for the background query to complete sleep(0.1) -- background resolve is now complete, check the cache, it should still have the -- stale record, and it should not have been replaced by the empty record answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) - assert.is_true(answers.expired) -- we get the stale record, now marked as expired - answers.expired = nil + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired + answers._expire_at = nil assert_same_answers(rec1, answers) end)