From fd10d6ee27ea77039fa2d9740431c4c6f4708930 Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Wed, 17 Jul 2024 06:24:10 +0900 Subject: [PATCH] feat(dns): new DNS client (#12305) This commit introduces a brand new implementation of the DNS client for Kong. Key features and improvements: - Designed for easy maintenance and troubleshooting - Follows industry norms - Utilizes `lua-resty-mlcache` for cross-worker DNS result caching - Implements stale-while-updating and stale-if-error behaviors - Improves responsiveness and reduces resolver traffic, especially with many workers The new DNS client is enabled by default. To use the old client, set `legacy_dns_client = on` in `kong.conf`. KAG-3220 --------- Co-authored-by: Keery Nie Co-authored-by: Aapo Talvensaari Co-authored-by: Thijs Schreijer Co-authored-by: Chrono Co-authored-by: Qi Co-authored-by: Thibault Charbonnier Co-authored-by: Datong Sun --- .../unreleased/kong/refactor_dns_client.yml | 9 + kong-3.8.0-0.rockspec | 4 + kong/api/routes/kong.lua | 17 +- kong/conf_loader/constants.lua | 1 + kong/dns/README.md | 174 ++ kong/dns/client.lua | 704 ++++++++ kong/dns/utils.lua | 303 ++++ kong/globalpatches.lua | 4 + kong/resty/dns/client.lua | 7 + kong/templates/kong_defaults.lua | 1 + kong/templates/nginx_kong.lua | 4 + spec/01-unit/09-balancer/01-generic_spec.lua | 6 +- .../09-balancer/02-least_connections_spec.lua | 1 + .../03-consistent_hashing_spec.lua | 3 + .../09-balancer/04-round_robin_spec.lua | 43 +- spec/01-unit/09-balancer/06-latency_spec.lua | 1 + spec/01-unit/14-dns_spec.lua | 1 + spec/01-unit/21-dns-client/02-client_spec.lua | 2 + .../21-dns-client/03-client_cache_spec.lua | 2 + .../30-new-dns-client/01-utils_spec.lua | 462 +++++ .../30-new-dns-client/02-old_client_spec.lua | 1553 +++++++++++++++++ .../03-old_client_cache_spec.lua | 465 +++++ .../30-new-dns-client/04-client_ipc_spec.lua | 63 + .../30-new-dns-client/05-client_stat_spec.lua | 197 +++ .../04-admin_api/26-dns_client_spec.lua | 102 ++ spec/02-integration/05-proxy/05-dns_spec.lua | 2 +- .../10-balancer/01-healthchecks_spec.lua | 12 +- .../01-instrumentations_spec.lua | 4 +- .../kong/plugins/dns-client-test/handler.lua | 74 + .../kong/plugins/dns-client-test/schema.lua | 12 + spec/fixtures/shared_dict.lua | 1 + spec/helpers/dns.lua | 28 +- 32 files changed, 4225 insertions(+), 37 deletions(-) create mode 100644 changelog/unreleased/kong/refactor_dns_client.yml create mode 100644 kong/dns/README.md create mode 100644 kong/dns/client.lua create mode 100644 kong/dns/utils.lua create mode 100644 spec/01-unit/30-new-dns-client/01-utils_spec.lua create mode 100644 spec/01-unit/30-new-dns-client/02-old_client_spec.lua create mode 100644 spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua create mode 100644 spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua create mode 100644 spec/01-unit/30-new-dns-client/05-client_stat_spec.lua create mode 100644 spec/02-integration/04-admin_api/26-dns_client_spec.lua create mode 100644 spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua create mode 100644 spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua diff --git a/changelog/unreleased/kong/refactor_dns_client.yml b/changelog/unreleased/kong/refactor_dns_client.yml new file mode 100644 index 000000000000..da5cd40f65ca --- /dev/null +++ b/changelog/unreleased/kong/refactor_dns_client.yml @@ -0,0 +1,9 @@ +message: > + Starting from this version, a new DNS client library has been implemented and added into Kong. The new DNS client library has the following changes + - Introduced global caching for DNS records across workers, significantly reducing the query load on DNS servers. + - Introduced observable statistics for the new DNS client, and a new Admin API `/status/dns` to retrieve them. + - Deprecated the `dns_no_sync` option. Multiple DNS queries for the same name will always be synchronized (even across workers). This remains functional with the legacy DNS client library. + - Deprecated the `dns_not_found_ttl` option. It uses the `dns_error_ttl` option for all error responses. This option remains functional with the legacy DNS client library. + - Deprecated the `dns_order` option. By default, SRV, A, and AAAA are supported. Only names in the SRV format (`_service._proto.name`) enable resolving of DNS SRV records. +type: feature +scope: Core diff --git a/kong-3.8.0-0.rockspec b/kong-3.8.0-0.rockspec index 22e1a2b937e7..e60441ef32da 100644 --- a/kong-3.8.0-0.rockspec +++ b/kong-3.8.0-0.rockspec @@ -115,6 +115,10 @@ build = { ["kong.resty.dns.client"] = "kong/resty/dns/client.lua", ["kong.resty.dns.utils"] = "kong/resty/dns/utils.lua", + + ["kong.dns.client"] = "kong/dns/client.lua", + ["kong.dns.utils"] = "kong/dns/utils.lua", + ["kong.resty.ctx"] = "kong/resty/ctx.lua", ["kong.resty.mlcache"] = "kong/resty/mlcache/init.lua", diff --git a/kong/api/routes/kong.lua b/kong/api/routes/kong.lua index d2fa8a59443c..633083a6d5fb 100644 --- a/kong/api/routes/kong.lua +++ b/kong/api/routes/kong.lua @@ -269,5 +269,20 @@ return { } return kong.response.exit(200, body) end - } + }, + ["/status/dns"] = { + GET = function (self, db, helpers) + if kong.configuration.legacy_dns_client then + return kong.response.exit(501, { message = "not implemented with the legacy DNS client" }) + end + + return kong.response.exit(200, { + worker = { + id = ngx.worker.id() or -1, + count = ngx.worker.count(), + }, + stats = kong.dns.stats(), + }) + end + }, } diff --git a/kong/conf_loader/constants.lua b/kong/conf_loader/constants.lua index cda8a9a9ccdb..dbf0cb6def91 100644 --- a/kong/conf_loader/constants.lua +++ b/kong/conf_loader/constants.lua @@ -370,6 +370,7 @@ local CONF_PARSERS = { dns_not_found_ttl = { typ = "number" }, dns_error_ttl = { typ = "number" }, dns_no_sync = { typ = "boolean" }, + legacy_dns_client = { typ = "boolean" }, privileged_worker = { typ = "boolean", deprecated = { diff --git a/kong/dns/README.md b/kong/dns/README.md new file mode 100644 index 000000000000..8597627bda23 --- /dev/null +++ b/kong/dns/README.md @@ -0,0 +1,174 @@ +Name +==== + +Kong DNS client - The module is currently only used by Kong, and builds on top of the `lua-resty-dns` and `lua-resty-mlcache` libraries. + +Table of Contents +================= + +* [Name](#name) +* [APIs](#apis) + * [new](#new) + * [resolve](#resolve) + * [resolve_address](#resolve_address) +* [Performance characteristics](#performance-characteristics) + * [Memory](#memory) + +# APIs + +The following APIs are for internal development use only within Kong. In the current version, the new DNS library still needs to be compatible with the original DNS library. Therefore, the functions listed below cannot be directly invoked. For example, the `_M:resolve` function in the following APIs will be replaced to ensure compatibility with the previous DNS library API interface specifications `_M.resolve`. + +## new + +**syntax:** *c, err = dns_client.new(opts)* +**context:** any + +**Functionality:** + +Creates a dns client object. Returns `nil` and a message string on error. + +Performs a series of initialization operations: + +* parse `host` file, +* parse `resolv.conf` file (used by the underlying `lua-resty-dns` library), +* initialize multiple TTL options, +* create a mlcache object and initialize it. + +**Input parameters:** + +`@opts` It accepts a options table argument. The following options are supported: + +* TTL options: + * `valid_ttl`: (default: `nil`) + * By default, it caches answers using the TTL value of a response. This optional parameter (in seconds) allows overriding it. + * `stale_ttl`: (default: `3600`) + * the time in seconds for keeping expired DNS records. + * Stale data remains in use from when a record expires until either the background refresh query completes or until `stale_ttl` seconds have passed. This helps Kong stay resilient if the DNS server is temporarily unavailable. + * `error_ttl`: (default: `1`) + * the time in seconds for caching DNS error responses. +* `hosts`: (default: `/etc/hosts`) + * the path of `hosts` file. +* `resolv_conf`: (default: `/etc/resolv.conf`) + * the path of `resolv.conf` file, it will be parsed and passed into the underlying `lua-resty-dns` library. +* `family`: (default: `{ "SRV", "A", "AAAA" }`) + * the types of DNS records that the library should query, it is taken from `kong.conf` option `dns_family`. +* options for the underlying `lua-resty-dns` library: + * `retrans`: (default: `5`) + * the total number of times of retransmitting the DNS request when receiving a DNS response times out according to the timeout setting. When trying to retransmit the query, the next nameserver according to the round-robin algorithm will be picked up. + * If not given, it is taken from `resolv.conf` option `options attempts:`. + * `timeout`: (default: `2000`) + * the time in milliseconds for waiting for the response for a single attempt of request transmission. + * If not given, it is taken from `resolv.conf` option `options timeout:`. But note that its unit in `resolv.conf` is second. + * `random_resolver`: (default: `false`) + * a boolean flag controls whether to randomly pick the nameserver to query first. If `true`, it will always start with the random nameserver. + * If not given, it is taken from `resolv.conf` option `rotate`. + * `nameservers`: + * a list of nameservers to be used. Each nameserver entry can be either a single hostname string or a table holding both the hostname string and the port number. For example, `{"8.8.8.8", {"8.8.4.4", 53} }`. + * If not given, it is taken from `resolv.conf` option `nameserver`. +* `cache_purge`: (default: `false`) + * a boolean flag controls whether to clear the internal cache shared by other DNS client instances across workers. + +[Back to TOC](#table-of-contents) + +## resolve + +**syntax:** *answers, err, tries? = resolve(qname, qtype, cache_only, tries?)* +**context:** *rewrite_by_lua\*, access_by_lua\*, content_by_lua\*, ngx.timer.\** + +**Functionality:** + +Performs a DNS resolution. + +1. Check if the `` matches SRV format (`\_service.\_proto.name`) to determine the `` (SRV or A/AAAA), then use the key `:` to query mlcache. If cached results are found, return them directly. +2. If there are no results available in the cache, it triggers the L3 callback of `mlcache:get` to query records from the DNS servers, details are as follows: + 1. Check if `` has an IP address in the `hosts` file, return if found. + 2. Check if `` is an IP address itself, return if true. + 3. Use `mlcache:peek` to check if the expired key still exists in the shared dictionary. If it does, return it directly to mlcache and trigger an asynchronous background task to update the expired data (`start_stale_update_task`). The maximum time that expired data can be reused is `stale_ttl`, but the maximum TTL returned to mlcache cannot exceed 60s. This way, if the expired key is not successfully updated by the background task after 60s, it can still be reused by calling the `resolve` function from the upper layer to trigger the L3 callback to continue executing this logic and initiate another background task for updating. + 1. For example, with a `stale_ttl` of 3600s, if the background task fails to update the record due to network issues during this time, and the upper-level application continues to call resolve to get the domain name result, it will trigger a background task to query the DNS result for that domain name every 60s, resulting in approximately 60 background tasks being triggered (3600s/60s). + 4. Query the DNS server, with `:` combinations: + 1. The `` is extended according to settings in `resolv.conf`, such as `ndots`, `search`, and `domain`. + +**Return value:** + +* Return value `answers, err`: + * Return one array-like Lua table contains all the records. + * For example, `{{"address":"[2001:db8:3333:4444:5555:6666:7777:8888]","class":1,"name":"example.test","ttl":30,"type":28},{"address":"192.168.1.1","class":1,"name":"example.test","ttl":30,"type":1},"expire":1720765379,"ttl":30}`. + * IPv6 addresses are enclosed in brackets (`[]`). + * If the server returns a non-zero error code, it will return `nil` and a string describing the error in this record. + * For example, `nil, "dns server error: name error"`, the server returned a result with error code 3 (NXDOMAIN). + * In case of severe errors, such network error or server's malformed DNS record response, it will return `nil` and a string describing the error instead. For example: + * `nil, "dns server error: failed to send request to UDP server 10.0.0.1:53: timeout"`, there was a network issue. +* Return value and input parameter `@tries?`: + * If provided as an empty table, it will be returned as a third result. This table will be an array containing the error message for each (if any) failed try. + * For example, `[["example.test:A","dns server error: 3 name error"], ["example.test:AAAA","dns server error: 3 name error"]]`, both attempts failed due to a DNS server error with error code 3 (NXDOMAIN), indicating a name error. + +**Input parameters:** + +* `@qname`: the domain name to resolve. +* `@qtype`: (optional: `nil` or DNS TYPE value) + * specify the query type instead of `self.order` types. +* `@cache_only`: (optional: `boolean`) + * control whether to solely retrieve data from the internal cache without querying to the nameserver. +* `@tries?`: see the above section `Return value and input paramter @tries?`. + +[Back to TOC](#table-of-contents) + +## resolve_address + +**syntax:** *ip, port_or_err, tries? = resolve_address(name, port, cache_only, tries?)* +**context:** *rewrite_by_lua\*, access_by_lua\*, content_by_lua\*, ngx.timer.\** + +**Functionality:** + +Performs a DNS resolution, and return a single randomly selected address (IP and port number). + +When calling multiple times on cached records, it will apply load-balancing based on a round-robin (RR) scheme. For SRV records, this will be a _weighted_ round-robin (WRR) scheme (because of the weights it will be randomized). It will apply the round-robin schemes on each level individually. + +**Return value:** + +* Return value `ip, port_or_err`: + * Return one IP address and port number from records. + * Return `nil, err` if errors occur, with `err` containing an error message. +* Return value and input parameter `@tries?`: same as `@tries?` of `resolve` API. + +**Input parameters:** + +* `@name`: the domain name to resolve. +* `@port`: (optional: `nil` or port number) + * default port number to return if none was found in the lookup chain (only SRV records carry port information, SRV with `port=0` will be ignored). +* `@cache_only`: (optional: `boolean`) + * control whether to solely retrieve data from the internal cache without querying to the nameserver. + +[Back to TOC](#table-of-contents) + +# Performance characteristics + +## Memory + +We evaluated the capacity of DNS records using the following resources: + +* Shared memory size: + * 5 MB (by default): `lua_shared_dict kong_dns_cache 5m`. + * 10 MB: `lua_shared_dict kong_dns_cache 10m`. +* DNS response: + * Each DNS resolution response contains some number of A type records. + * Record: ~80 bytes json string, e.g., `{address = "127.0.0.1", name = , ttl = 3600, class = 1, type = 1}`. + * Domain: ~36 bytes string, e.g., `example.long.long.long.long.test`. Domain names with lengths between 10 and 36 bytes yield similar results. + +The results of ) are as follows: + +| shared memory size | number of records per response | number of loaded responses | +|--------------------|-------------------|----------| +| 5 MB | 1 | 20224 | +| 5 MB | 2 ~ 3 | 10081 | +| 5 MB | 4 ~ 9 | 5041 | +| 5 MB | 10 ~ 20 | 5041 | +| 5 MB | 21 ~ 32 | 1261 | +| 10 MB | 1 | 40704 | +| 10 MB | 2 ~ 3 | 20321 | +| 10 MB | 4 ~ 9 | 10161 | +| 10 MB | 10 ~ 20 | 5081 | +| 10 MB | 20 ~ 32 | 2541 | + + +[Back to TOC](#table-of-contents) diff --git a/kong/dns/client.lua b/kong/dns/client.lua new file mode 100644 index 000000000000..35f21a3eba34 --- /dev/null +++ b/kong/dns/client.lua @@ -0,0 +1,704 @@ +local cjson = require("cjson.safe") +local utils = require("kong.dns.utils") +local mlcache = require("kong.resty.mlcache") +local resolver = require("resty.dns.resolver") + +local now = ngx.now +local log = ngx.log +local ERR = ngx.ERR +local WARN = ngx.WARN +local NOTICE = ngx.NOTICE +local DEBUG = ngx.DEBUG +local ALERT = ngx.ALERT +local timer_at = ngx.timer.at +local worker_id = ngx.worker.id + +local pairs = pairs +local ipairs = ipairs +local tonumber = tonumber +local setmetatable = setmetatable + +local math_min = math.min +local math_floor = math.floor +local string_lower = string.lower +local table_insert = table.insert +local table_isempty = require("table.isempty") + +local is_srv = utils.is_srv +local parse_hosts = utils.parse_hosts +local ipv6_bracket = utils.ipv6_bracket +local search_names = utils.search_names +local parse_resolv_conf = utils.parse_resolv_conf +local get_next_round_robin_answer = utils.get_next_round_robin_answer +local get_next_weighted_round_robin_answer = utils.get_next_weighted_round_robin_answer + +local req_dyn_hook_run_hook = require("kong.dynamic_hook").run_hook + + +-- Constants and default values + +local PREFIX = "[dns_client] " + +local DEFAULT_ERROR_TTL = 1 -- unit: second +local DEFAULT_STALE_TTL = 3600 +-- long-lasting TTL of 10 years for hosts or static IP addresses in cache settings +local LONG_LASTING_TTL = 10 * 365 * 24 * 60 * 60 + +local DEFAULT_FAMILY = { "SRV", "A", "AAAA" } + +local TYPE_SRV = resolver.TYPE_SRV +local TYPE_A = resolver.TYPE_A +local TYPE_AAAA = resolver.TYPE_AAAA +local TYPE_A_OR_AAAA = -1 -- used to resolve IP addresses for SRV targets + +local TYPE_TO_NAME = { + [TYPE_SRV] = "SRV", + [TYPE_A] = "A", + [TYPE_AAAA] = "AAAA", + [TYPE_A_OR_AAAA] = "A/AAAA", +} + +local HIT_L3 = 3 -- L1 lru, L2 shm, L3 callback, L4 stale + +local HIT_LEVEL_TO_NAME = { + [1] = "hit_lru", + [2] = "hit_shm", + [3] = "miss", + [4] = "hit_stale", +} + +-- client specific error +local CACHE_ONLY_ERROR_CODE = 100 +local CACHE_ONLY_ERROR_MESSAGE = "cache only lookup failed" +local CACHE_ONLY_ANSWERS = { + errcode = CACHE_ONLY_ERROR_CODE, + errstr = CACHE_ONLY_ERROR_MESSAGE, +} + +local EMPTY_RECORD_ERROR_CODE = 101 +local EMPTY_RECORD_ERROR_MESSAGE = "empty record received" + + +-- APIs + +local _M = { + TYPE_SRV = TYPE_SRV, + TYPE_A = TYPE_A, + TYPE_AAAA = TYPE_AAAA, +} +local _MT = { __index = _M, } + + +local _TRIES_MT = { __tostring = cjson.encode, } + + +local function stats_init_name(stats, name) + if not stats[name] then + stats[name] = {} + end +end + + +local function stats_increment(stats, name, key) + stats[name][key] = (stats[name][key] or 0) + 1 +end + + +local function stats_set_count(stats, name, key, value) + stats[name][key] = value +end + + +local init_hosts do + local function insert_answer_into_cache(cache, hosts_cache, address, name, qtype) + local answers = { + ttl = LONG_LASTING_TTL, + expire = now() + LONG_LASTING_TTL, + { + name = name, + type = qtype, + address = address, + class = 1, + ttl = LONG_LASTING_TTL, + }, + } + + hosts_cache[name .. ":" .. qtype] = answers + hosts_cache[name .. ":" .. TYPE_A_OR_AAAA] = answers + end + + -- insert hosts into cache + function init_hosts(cache, path) + local hosts = parse_hosts(path) + local hosts_cache = {} + + for name, address in pairs(hosts) do + name = string_lower(name) + + if address.ipv6 then + insert_answer_into_cache(cache, hosts_cache, address.ipv6, name, TYPE_AAAA) + end + + if address.ipv4 then + insert_answer_into_cache(cache, hosts_cache, address.ipv4, name, TYPE_A) + end + end + + return hosts, hosts_cache + end +end + + +-- distinguish the worker_events sources registered by different new() instances +local ipc_counter = 0 + +function _M.new(opts) + opts = opts or {} + + local enable_ipv4, enable_ipv6, enable_srv + + for _, typstr in ipairs(opts.family or DEFAULT_FAMILY) do + typstr = typstr:upper() + + if typstr == "A" then + enable_ipv4 = true + + elseif typstr == "AAAA" then + enable_ipv6 = true + + elseif typstr == "SRV" then + enable_srv = true + + else + return nil, "Invalid dns type in dns_family array: " .. typstr + end + end + + log(NOTICE, PREFIX, "supported types: ", enable_srv and "srv " or "", + enable_ipv4 and "ipv4 " or "", enable_ipv6 and "ipv6 " or "") + + -- parse resolv.conf + local resolv, err = parse_resolv_conf(opts.resolv_conf, opts.enable_ipv6) + if not resolv then + log(WARN, PREFIX, "Invalid resolv.conf: ", err) + resolv = { options = {} } + end + + -- init the resolver options for lua-resty-dns + local nameservers = (opts.nameservers and not table_isempty(opts.nameservers)) + and opts.nameservers + or resolv.nameservers + + if not nameservers or table_isempty(nameservers) then + log(WARN, PREFIX, "Invalid configuration, no nameservers specified") + end + + local no_random + + if opts.random_resolver == nil then + no_random = not resolv.options.rotate + else + no_random = not opts.random_resolver + end + + local r_opts = { + retrans = opts.retrans or resolv.options.attempts or 5, + timeout = opts.timeout or resolv.options.timeout or 2000, -- ms + no_random = no_random, + nameservers = nameservers, + } + + -- init the mlcache + + -- maximum timeout for the underlying r:query() operation to complete + -- socket timeout * retrans * 2 calls for send and receive + 1s extra delay + local lock_timeout = r_opts.timeout / 1000 * r_opts.retrans * 2 + 1 -- s + + local resty_lock_opts = { + timeout = lock_timeout, + exptimeout = lock_timeout + 1, + } + + -- TODO: convert the ipc a module constant, currently we need to use the + -- ipc_source to distinguish sources of different DNS client events. + ipc_counter = ipc_counter + 1 + local ipc_source = "dns_client_mlcache#" .. ipc_counter + local ipc = { + register_listeners = function(events) + -- The DNS client library will be required in globalpatches before Kong + -- initializes worker_events. + if not kong or not kong.worker_events then + return + end + + local cwid = worker_id() or -1 + for _, ev in pairs(events) do + local handler = function(data, event, source, wid) + if cwid ~= wid then -- Current worker has handled this event. + ev.handler(data) + end + end + + kong.worker_events.register(handler, ipc_source, ev.channel) + end + end, + + -- @channel: event channel name, such as "mlcache:invalidate:dns_cache" + -- @data: mlcache's key name, such as ":" + broadcast = function(channel, data) + if not kong or not kong.worker_events then + return + end + + local ok, err = kong.worker_events.post(ipc_source, channel, data) + if not ok then + log(ERR, PREFIX, "failed to post event '", ipc_source, "', '", channel, "': ", err) + end + end, + } + + local cache, err = mlcache.new("dns_cache", "kong_dns_cache", { + ipc = ipc, + neg_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, + -- 10000 is a reliable and tested value from the original library. + lru_size = opts.cache_size or 10000, + shm_locks = ngx.shared.kong_locks and "kong_locks", + resty_lock_opts = resty_lock_opts, + }) + + if not cache then + return nil, "could not create mlcache: " .. err + end + + if opts.cache_purge then + cache:purge(true) + end + + -- parse hosts + local hosts, hosts_cache = init_hosts(cache, opts.hosts) + + return setmetatable({ + cache = cache, + stats = {}, + hosts = hosts, + r_opts = r_opts, + resolv = opts._resolv or resolv, + valid_ttl = opts.valid_ttl, + error_ttl = opts.error_ttl or DEFAULT_ERROR_TTL, + stale_ttl = opts.stale_ttl or DEFAULT_STALE_TTL, + enable_srv = enable_srv, + enable_ipv4 = enable_ipv4, + enable_ipv6 = enable_ipv6, + hosts_cache = hosts_cache, + + -- TODO: Make the table readonly. But if `string.buffer.encode/decode` and + -- `pl.tablex.readonly` are called on it, it will become empty table. + -- + -- quickly accessible constant empty answers + EMPTY_ANSWERS = { + errcode = EMPTY_RECORD_ERROR_CODE, + errstr = EMPTY_RECORD_ERROR_MESSAGE, + ttl = opts.error_ttl or DEFAULT_ERROR_TTL, + }, + }, _MT) +end + + +local function process_answers(self, qname, qtype, answers) + local errcode = answers.errcode + if errcode then + answers.ttl = self.error_ttl + return answers + end + + local processed_answers = {} + + -- 0xffffffff for maximum TTL value + local ttl = math_min(self.valid_ttl or 0xffffffff, 0xffffffff) + + for _, answer in ipairs(answers) do + answer.name = string_lower(answer.name) + + if self.valid_ttl then + answer.ttl = self.valid_ttl + else + ttl = math_min(ttl, answer.ttl) + end + + local answer_type = answer.type + + if answer_type == qtype then + -- compatible with balancer, see https://github.com/Kong/kong/pull/3088 + if answer_type == TYPE_AAAA then + answer.address = ipv6_bracket(answer.address) + + elseif answer_type == TYPE_SRV then + answer.target = ipv6_bracket(answer.target) + end + + table_insert(processed_answers, answer) + end + end + + if table_isempty(processed_answers) then + log(DEBUG, PREFIX, "processed ans:empty") + return self.EMPTY_ANSWERS + end + + log(DEBUG, PREFIX, "processed ans:", #processed_answers) + + processed_answers.expire = now() + ttl + processed_answers.ttl = ttl + + return processed_answers +end + + +local function resolve_query(self, name, qtype, tries) + local key = name .. ":" .. qtype + + stats_init_name(self.stats, key) + stats_increment(self.stats, key, "query") + + local r, err = resolver:new(self.r_opts) + if not r then + return nil, "failed to instantiate the resolver: " .. err + end + + local start = now() + + local answers, err = r:query(name, { qtype = qtype }) + r:destroy() + + local duration = math_floor((now() - start) * 1000) + + stats_set_count(self.stats, key, "query_last_time", duration) + + log(DEBUG, PREFIX, "r:query(", key, ") ans:", answers and #answers or "-", + " t:", duration, " ms") + + -- network error or malformed DNS response + if not answers then + stats_increment(self.stats, key, "query_fail_nameserver") + err = "DNS server error: " .. tostring(err) .. ", took " .. duration .. " ms" + + -- TODO: make the error more structured, like: + -- { qname = name, qtype = qtype, error = err, } or something similar + table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) + + return nil, err + end + + answers = process_answers(self, name, qtype, answers) + + stats_increment(self.stats, key, answers.errstr and + "query_fail:" .. answers.errstr or + "query_succ") + + -- DNS response error + if answers.errcode then + err = ("dns %s error: %s %s"):format( + answers.errcode < CACHE_ONLY_ERROR_CODE and "server" or "client", + answers.errcode, answers.errstr) + table_insert(tries, { name .. ":" .. TYPE_TO_NAME[qtype], err }) + end + + return answers +end + + +-- resolve all `name`s and return first usable answers +local function resolve_query_names(self, names, qtype, tries) + local answers, err + + for _, qname in ipairs(names) do + answers, err = resolve_query(self, qname, qtype, tries) + + -- severe error occurred + if not answers then + return nil, err + end + + if not answers.errcode then + return answers, nil, answers.ttl + end + end + + -- not found in the search iteration + return answers, nil, answers.ttl +end + + +local function resolve_query_types(self, name, qtype, tries) + local names = search_names(name, self.resolv, self.hosts) + local answers, err, ttl + + -- the specific type + if qtype ~= TYPE_A_OR_AAAA then + return resolve_query_names(self, names, qtype, tries) + end + + -- query A or AAAA + if self.enable_ipv4 then + answers, err, ttl = resolve_query_names(self, names, TYPE_A, tries) + if not answers or not answers.errcode then + return answers, err, ttl + end + end + + if self.enable_ipv6 then + answers, err, ttl = resolve_query_names(self, names, TYPE_AAAA, tries) + end + + return answers, err, ttl +end + + +local function stale_update_task(premature, self, key, name, qtype) + if premature then + return + end + + local tries = setmetatable({}, _TRIES_MT) + local answers = resolve_query_types(self, name, qtype, tries) + if not answers or answers.errcode then + log(DEBUG, PREFIX, "failed to update stale DNS records: ", tostring(tries)) + return + end + + log(DEBUG, PREFIX, "update stale DNS records: ", #answers) + self.cache:set(key, { ttl = answers.ttl }, answers) +end + + +local function start_stale_update_task(self, key, name, qtype) + stats_increment(self.stats, key, "stale") + + local ok, err = timer_at(0, stale_update_task, self, key, name, qtype) + if not ok then + log(ALERT, PREFIX, "failed to start a timer to update stale DNS records: ", err) + end +end + + +local function check_and_get_ip_answers(name) + -- TODO: use is_valid_ipv4 from kong/tools/ip.lua instead + if name:match("^%d+%.%d+%.%d+%.%d+$") then -- IPv4 + return { + { name = name, class = 1, type = TYPE_A, address = name }, + } + end + + if name:find(":", 1, true) then -- IPv6 + return { + { name = name, class = 1, type = TYPE_AAAA, address = ipv6_bracket(name) }, + } + end + + return nil +end + + +local function resolve_callback(self, name, qtype, cache_only, tries) + -- check if name is ip address + local answers = check_and_get_ip_answers(name) + if answers then -- domain name is IP literal + answers.ttl = LONG_LASTING_TTL + answers.expire = now() + answers.ttl + return answers, nil, answers.ttl + end + + -- check if this key exists in the hosts file (it maybe evicted from cache) + local key = name .. ":" .. qtype + local answers = self.hosts_cache[key] + if answers then + return answers, nil, answers.ttl + end + + -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then + -- initiates an asynchronous background updating task to refresh it. + local ttl, _, answers = self.cache:peek(key, true) + + if answers and not answers.errcode and self.stale_ttl and ttl then + + -- `_expire_at` means the final expiration time of stale records + if not answers._expire_at then + answers._expire_at = answers.expire + self.stale_ttl + end + + -- trigger the update task by the upper caller every 60 seconds + local remaining_stale_ttl = math_min(answers._expire_at - now(), 60) + + if remaining_stale_ttl > 0 then + log(DEBUG, PREFIX, "start stale update task ", key, + " remaining_stale_ttl:", remaining_stale_ttl) + + -- mlcache's internal lock mechanism ensures concurrent control + start_stale_update_task(self, key, name, qtype) + answers.ttl = remaining_stale_ttl + answers.expire = remaining_stale_ttl + now() + + return answers, nil, remaining_stale_ttl + end + end + + if cache_only then + return CACHE_ONLY_ANSWERS, nil, -1 + end + + log(DEBUG, PREFIX, "cache miss, try to query ", key) + + return resolve_query_types(self, name, qtype, tries) +end + + +local function resolve_all(self, name, qtype, cache_only, tries, has_timing) + name = string_lower(name) + tries = setmetatable(tries or {}, _TRIES_MT) + + if not qtype then + qtype = ((self.enable_srv and is_srv(name)) and TYPE_SRV or TYPE_A_OR_AAAA) + end + + local key = name .. ":" .. qtype + + log(DEBUG, PREFIX, "resolve_all ", key) + + stats_init_name(self.stats, key) + stats_increment(self.stats, key, "runs") + + local answers, err, hit_level = self.cache:get(key, nil, resolve_callback, + self, name, qtype, cache_only, + tries) + -- check for runtime errors in the callback + if err and err:sub(1, 8) == "callback" then + log(ALERT, PREFIX, err) + end + + local hit_str = hit_level and HIT_LEVEL_TO_NAME[hit_level] or "fail" + stats_increment(self.stats, key, hit_str) + + log(DEBUG, PREFIX, "cache lookup ", key, " ans:", answers and #answers or "-", + " hlv:", hit_str) + + if has_timing then + req_dyn_hook_run_hook("timing", "dns:cache_lookup", + (hit_level and hit_level < HIT_L3)) + end + + if answers and answers.errcode then + err = ("dns %s error: %s %s"):format( + answers.errcode < CACHE_ONLY_ERROR_CODE and "server" or "client", + answers.errcode, answers.errstr) + return nil, err, tries + end + + return answers, err, tries +end + + +function _M:resolve(name, qtype, cache_only, tries) + return resolve_all(self, name, qtype, cache_only, tries, + ngx.ctx and ngx.ctx.has_timing) +end + + +function _M:resolve_address(name, port, cache_only, tries) + local has_timing = ngx.ctx and ngx.ctx.has_timing + + local answers, err, tries = resolve_all(self, name, nil, cache_only, tries, + has_timing) + + if answers and answers[1] and answers[1].type == TYPE_SRV then + local answer = get_next_weighted_round_robin_answer(answers) + port = answer.port ~= 0 and answer.port or port + answers, err, tries = resolve_all(self, answer.target, TYPE_A_OR_AAAA, + cache_only, tries, has_timing) + end + + if not answers then + return nil, err, tries + end + + return get_next_round_robin_answer(answers).address, port, tries +end + + +-- compatible with original DNS client library +-- These APIs will be deprecated if fully replacing the original one. +local dns_client + +function _M.init(opts) + log(DEBUG, PREFIX, "(re)configuring dns client") + + if opts then + opts.valid_ttl = opts.valid_ttl or opts.validTtl + opts.error_ttl = opts.error_ttl or opts.badTtl + opts.stale_ttl = opts.stale_ttl or opts.staleTtl + opts.cache_size = opts.cache_size or opts.cacheSize + end + + local client, err = _M.new(opts) + if not client then + return nil, err + end + + dns_client = client + return true +end + + +-- New and old libraries have the same function name. +_M._resolve = _M.resolve + +function _M.resolve(name, r_opts, cache_only, tries) + return dns_client:_resolve(name, r_opts and r_opts.qtype, cache_only, tries) +end + + +function _M.toip(name, port, cache_only, tries) + return dns_client:resolve_address(name, port, cache_only, tries) +end + + +-- "_ldap._tcp.example.com:33" -> "_ldap._tcp.example.com|SRV" +local function format_key(key) + local qname, qtype = key:match("^(.+):(%-?%d+)$") -- match "(qname):(qtype)" + return qtype and qname .. "|" .. (TYPE_TO_NAME[tonumber(qtype)] or qtype) + or key +end + + +function _M.stats() + local stats = {} + for k, v in pairs(dns_client.stats) do + stats[format_key(k)] = v + end + return stats +end + + +-- For testing + +if package.loaded.busted then + function _M.getobj() + return dns_client + end + + function _M.getcache() + return { + set = function(self, k, v, ttl) + self.cache:set(k, {ttl = ttl or 0}, v) + end, + + delete = function(self, k) + self.cache:delete(k) + end, + + cache = dns_client.cache, + } + end +end + + +return _M diff --git a/kong/dns/utils.lua b/kong/dns/utils.lua new file mode 100644 index 000000000000..32a67c805fea --- /dev/null +++ b/kong/dns/utils.lua @@ -0,0 +1,303 @@ +local utils = require("kong.resty.dns.utils") + +local log = ngx.log + +local NOTICE = ngx.NOTICE + +local type = type +local ipairs = ipairs +local tonumber = tonumber +local math_random = math.random +local table_clear = require("table.clear") +local table_insert = table.insert +local table_remove = table.remove + +local readlines = require("pl.utils").readlines + +local DEFAULT_HOSTS_FILE = "/etc/hosts" +local DEFAULT_RESOLV_CONF = "/etc/resolv.conf" + +local LOCALHOST = { + ipv4 = "127.0.0.1", + ipv6 = "[::1]", +} + +local DEFAULT_HOSTS = { localhost = LOCALHOST } + + +local _M = {} + + +-- checks the hostname type +-- @return "ipv4", "ipv6", or "name" +function _M.hostname_type(name) + local remainder, colons = name:gsub(":", "") + if colons > 1 then + return "ipv6" + end + + if remainder:match("^[%d%.]+$") then + return "ipv4" + end + + return "domain" +end + + +-- parses a hostname with an optional port +-- IPv6 addresses are always returned in square brackets +-- @param name the string to check (this may contain a port number) +-- @return `name/ip` + `port (or nil)` + `type ("ipv4", "ipv6" or "name")` +function _M.parse_hostname(name) + local t = _M.hostname_type(name) + if t == "ipv4" or t == "domain" then + local ip, port = name:match("^([^:]+)%:*(%d*)$") + return ip, tonumber(port), t + end + + -- ipv6 + if name:match("%[") then -- brackets, so possibly a port + local ip, port = name:match("^%[([^%]]+)%]*%:*(%d*)$") + return "[" .. ip .. "]", tonumber(port), t + end + + return "[" .. name .. "]", nil, t -- no brackets also means no port +end + + +local function get_lines(path) + if type(path) == "table" then + return path + end + + return readlines(path) +end + + +function _M.parse_hosts(path, enable_ipv6) + local lines, err = get_lines(path or DEFAULT_HOSTS_FILE) + if not lines then + log(NOTICE, "Invalid hosts file: ", err) + return DEFAULT_HOSTS + end + + local hosts = {} + + for _, line in ipairs(lines) do + -- Remove leading/trailing whitespaces and split by whitespace + local parts = {} + for part in line:gmatch("%S+") do + if part:sub(1, 1) == '#' then + break + end + + table_insert(parts, part:lower()) + end + + -- Check if the line contains an IP address followed by hostnames + if #parts >= 2 then + local ip, _, family = _M.parse_hostname(parts[1]) + + if family ~= "name" then -- ipv4/ipv6 + for i = 2, #parts do + local host = parts[i] + local v = hosts[host] + + if not v then + v = {} + hosts[host] = v + end + + v[family] = v[family] or ip -- prefer to use the first ip + end + end + end + end + + if not hosts.localhost then + hosts.localhost = LOCALHOST + end + + return hosts +end + + +-- TODO: need to rewrite it instead of calling parseResolvConf from the old library +function _M.parse_resolv_conf(path, enable_ipv6) + local resolv, err = utils.parseResolvConf(path or DEFAULT_RESOLV_CONF) + if not resolv then + return nil, err + end + + resolv = utils.applyEnv(resolv) + resolv.options = resolv.options or {} + resolv.ndots = resolv.options.ndots or 1 + resolv.search = resolv.search or (resolv.domain and { resolv.domain }) + + -- check if timeout is 0s + if resolv.options.timeout and resolv.options.timeout <= 0 then + log(NOTICE, "A non-positive timeout of ", resolv.options.timeout, + "s is configured in resolv.conf. Setting it to 2000ms.") + resolv.options.timeout = 2000 -- 2000ms is lua-resty-dns default + end + + -- remove special domain like "." + if resolv.search then + for i = #resolv.search, 1, -1 do + if resolv.search[i] == "." then + table_remove(resolv.search, i) + end + end + end + + -- nameservers + if resolv.nameserver then + local nameservers = {} + + for _, address in ipairs(resolv.nameserver) do + local ip, port, t = utils.parseHostname(address) + if t == "ipv4" or + (t == "ipv6" and not ip:find([[%]], nil, true) and enable_ipv6) + then + table_insert(nameservers, port and { ip, port } or ip) + end + end + + resolv.nameservers = nameservers + end + + return resolv +end + + +function _M.is_fqdn(name, ndots) + if name:sub(-1) == "." then + return true + end + + local _, dot_count = name:gsub("%.", "") + + return (dot_count >= ndots) +end + + +-- check if it matchs the SRV pattern: _._. +function _M.is_srv(name) + return name:match("^_[^._]+%._[^._]+%.[^.]+") ~= nil +end + + +-- construct names from resolv options: search, ndots and domain +function _M.search_names(name, resolv, hosts) + if not resolv.search or _M.is_fqdn(name, resolv.ndots) or + (hosts and hosts[name]) + then + return { name } + end + + local names = {} + + for _, suffix in ipairs(resolv.search) do + table_insert(names, name .. "." .. suffix) + end + + table_insert(names, name) -- append the original name at last + + return names +end + + +-- add square brackets around IPv6 addresses if a non-strict check detects them +function _M.ipv6_bracket(name) + if name:match("^[^[].*:") then -- not start with '[' and contains ':' + return "[" .. name .. "]" + end + + return name +end + + +-- util APIs to balance @answers + +function _M.get_next_round_robin_answer(answers) + answers.last = (answers.last or 0) % #answers + 1 + + return answers[answers.last] +end + + +do + -- based on the Nginx's SWRR algorithm and lua-resty-balancer + local function swrr_next(answers) + local total = 0 + local best = nil -- best answer in answers[] + + for _, answer in ipairs(answers) do + -- 0.1 gives weight 0 record a minimal chance of being chosen (rfc 2782) + local w = (answer.weight == 0) and 0.1 or answer.weight + local cw = answer.cw + w + + answer.cw = cw + + if not best or cw > best.cw then + best = answer + end + + total = total + w + end + + best.cw = best.cw - total + + return best + end + + + local function swrr_init(answers) + for _, answer in ipairs(answers) do + answer.cw = 0 -- current weight + end + + -- random start + for _ = 1, math_random(#answers) do + swrr_next(answers) + end + end + + + -- gather records with the lowest priority in SRV record + local function filter_lowest_priority_answers(answers) + -- SRV record MUST have `priority` field + local lowest_priority = answers[1].priority + local l = {} -- lowest priority records list + + for _, answer in ipairs(answers) do + if answer.priority < lowest_priority then + lowest_priority = answer.priority + table_clear(l) + l[1] = answer + + elseif answer.priority == lowest_priority then + table_insert(l, answer) + end + end + + answers.lowest_prio_records = l + + return l + end + + + function _M.get_next_weighted_round_robin_answer(answers) + local l = answers.lowest_prio_records or filter_lowest_priority_answers(answers) + + -- perform round robin selection on lowest priority answers @l + if not l[1].cw then + swrr_init(l) + end + + return swrr_next(l) + end +end + + +return _M diff --git a/kong/globalpatches.lua b/kong/globalpatches.lua index 397c4fc7c4e8..8d2a318568e3 100644 --- a/kong/globalpatches.lua +++ b/kong/globalpatches.lua @@ -409,6 +409,10 @@ return function(options) local seeded = {} local randomseed = math.randomseed + if options.rbusted then + _G.math.native_randomseed = randomseed + end + _G.math.randomseed = function() local pid = ngx.worker.pid() local id diff --git a/kong/resty/dns/client.lua b/kong/resty/dns/client.lua index 03625790ee58..0c7359c54ea1 100644 --- a/kong/resty/dns/client.lua +++ b/kong/resty/dns/client.lua @@ -1,3 +1,10 @@ +-- Use the new dns client library instead. If you want to switch to the original +-- one, you can set `legacy_dns_client = on` in kong.conf. +if ngx.shared.kong_dns_cache and not _G.busted_legacy_dns_client then + package.loaded["kong.dns.client"] = nil + return require("kong.dns.client") +end + -------------------------------------------------------------------------- -- DNS client. -- diff --git a/kong/templates/kong_defaults.lua b/kong/templates/kong_defaults.lua index ce532fd4b7ca..6a33c351d3a0 100644 --- a/kong/templates/kong_defaults.lua +++ b/kong/templates/kong_defaults.lua @@ -168,6 +168,7 @@ dns_cache_size = 10000 dns_not_found_ttl = 30 dns_error_ttl = 1 dns_no_sync = off +legacy_dns_client = off dedicated_config_processing = on worker_consistency = eventual diff --git a/kong/templates/nginx_kong.lua b/kong/templates/nginx_kong.lua index 6eca6ef9c6ad..5692d040b422 100644 --- a/kong/templates/nginx_kong.lua +++ b/kong/templates/nginx_kong.lua @@ -23,6 +23,10 @@ lua_shared_dict kong_db_cache ${{MEM_CACHE_SIZE}}; lua_shared_dict kong_db_cache_miss 12m; lua_shared_dict kong_secrets 5m; +> if not legacy_dns_client then +lua_shared_dict kong_dns_cache 5m; +> end + underscores_in_headers on; > if ssl_cipher_suite == 'old' then lua_ssl_conf_command CipherString DEFAULT:@SECLEVEL=0; diff --git a/spec/01-unit/09-balancer/01-generic_spec.lua b/spec/01-unit/09-balancer/01-generic_spec.lua index ec4c58f1c60c..b56fb1ad8f5b 100644 --- a/spec/01-unit/09-balancer/01-generic_spec.lua +++ b/spec/01-unit/09-balancer/01-generic_spec.lua @@ -214,6 +214,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro -- so that CI and docker can have reliable results -- but remove `search` and `domain` search = {}, + cache_purge = true, }) snapshot = assert:snapshot() assert:set_parameter("TableFormatLevel", 10) @@ -1198,7 +1199,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro }, }, b:getStatus()) - dnsExpire(record) + dnsExpire(client, record) dnsSRV({ { name = "srvrecord.test", target = "1.1.1.1", port = 9000, weight = 20 }, { name = "srvrecord.test", target = "2.2.2.2", port = 9001, weight = 20 }, @@ -1382,7 +1383,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro }, b:getStatus()) -- update weight, through dns renewal - dnsExpire(record) + dnsExpire(client, record) dnsSRV({ { name = "srvrecord.test", target = "1.1.1.1", port = 9000, weight = 20 }, { name = "srvrecord.test", target = "2.2.2.2", port = 9001, weight = 20 }, @@ -1695,6 +1696,7 @@ for _, algorithm in ipairs{ "consistent-hashing", "least-connections", "round-ro -- update DNS with a new backend IP -- balancer should now recover since a new healthy backend is available record.expire = 0 + dnsExpire(client, record) dnsA({ { name = "getkong.test", address = "5.6.7.8", ttl = 60 }, }) diff --git a/spec/01-unit/09-balancer/02-least_connections_spec.lua b/spec/01-unit/09-balancer/02-least_connections_spec.lua index 3db545dec093..caae6c8bbe07 100644 --- a/spec/01-unit/09-balancer/02-least_connections_spec.lua +++ b/spec/01-unit/09-balancer/02-least_connections_spec.lua @@ -219,6 +219,7 @@ describe("[least-connections]", function() resolvConf = { "nameserver 198.51.100.0" }, + cache_purge = true, }) snapshot = assert:snapshot() end) diff --git a/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua b/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua index 17f46f46fa5b..aaecbdd4301f 100644 --- a/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua +++ b/spec/01-unit/09-balancer/03-consistent_hashing_spec.lua @@ -21,6 +21,7 @@ local sleep = helpers.sleep local dnsSRV = function(...) return helpers.dnsSRV(client, ...) end local dnsA = function(...) return helpers.dnsA(client, ...) end local dnsAAAA = function(...) return helpers.dnsAAAA(client, ...) end +local dnsExpire = helpers.dnsExpire @@ -265,6 +266,7 @@ describe("[consistent_hashing]", function() -- so that CI and docker can have reliable results -- but remove `search` and `domain` search = {}, + cache_purge = true, }) snapshot = assert:snapshot() end) @@ -844,6 +846,7 @@ describe("[consistent_hashing]", function() -- expire the existing record record.expire = 0 record.expired = true + dnsExpire(client, record) -- do a lookup to trigger the async lookup client.resolve("really.really.really.does.not.exist.host.test", {qtype = client.TYPE_A}) sleep(1) -- provide time for async lookup to complete diff --git a/spec/01-unit/09-balancer/04-round_robin_spec.lua b/spec/01-unit/09-balancer/04-round_robin_spec.lua index 35f63f2c4522..341ec4fe459b 100644 --- a/spec/01-unit/09-balancer/04-round_robin_spec.lua +++ b/spec/01-unit/09-balancer/04-round_robin_spec.lua @@ -19,6 +19,7 @@ local sleep = helpers.sleep local dnsSRV = function(...) return helpers.dnsSRV(client, ...) end local dnsA = function(...) return helpers.dnsA(client, ...) end local dnsAAAA = function(...) return helpers.dnsAAAA(client, ...) end +local dnsExpire = helpers.dnsExpire local unset_register = {} @@ -304,6 +305,7 @@ describe("[round robin balancer]", function() -- so that CI and docker can have reliable results -- but remove `search` and `domain` search = {}, + cache_purge = true, }) snapshot = assert:snapshot() end) @@ -412,6 +414,7 @@ describe("[round robin balancer]", function() resolvConf = { "nameserver 127.0.0.1:22000" -- make sure dns query fails }, + cache_purge = true, }) -- create balancer local b = check_balancer(new_balancer { @@ -617,7 +620,7 @@ describe("[round robin balancer]", function() end) it("does not hit the resolver when 'cache_only' is set", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, }) local b = check_balancer(new_balancer { hosts = { { name = "mashape.test", port = 80, weight = 5 } }, @@ -625,6 +628,7 @@ describe("[round robin balancer]", function() wheelSize = 10, }) record.expire = gettime() - 1 -- expire current dns cache record + sleep(0.2) -- wait for record expiration dnsA({ -- create a new record { name = "mashape.test", address = "5.6.7.8" }, }) @@ -1018,7 +1022,7 @@ describe("[round robin balancer]", function() end) it("weight change for unresolved record, updates properly", function() local record = dnsA({ - { name = "really.really.really.does.not.exist.hostname.test", address = "1.2.3.4" }, + { name = "really.really.really.does.not.exist.hostname.test", address = "1.2.3.4", ttl = 0.1 }, }) dnsAAAA({ { name = "getkong.test", address = "::1" }, @@ -1039,6 +1043,8 @@ describe("[round robin balancer]", function() -- expire the existing record record.expire = 0 record.expired = true + dnsExpire(client, record) + sleep(0.2) -- wait for record expiration -- do a lookup to trigger the async lookup client.resolve("really.really.really.does.not.exist.hostname.test", {qtype = client.TYPE_A}) sleep(0.5) -- provide time for async lookup to complete @@ -1102,8 +1108,8 @@ describe("[round robin balancer]", function() end) it("renewed DNS A record; no changes", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, - { name = "mashape.test", address = "1.2.3.5" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, + { name = "mashape.test", address = "1.2.3.5", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1118,6 +1124,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) -- wait for record expiration dnsA({ -- create a new record (identical) { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.5" }, @@ -1133,8 +1140,8 @@ describe("[round robin balancer]", function() it("renewed DNS AAAA record; no changes", function() local record = dnsAAAA({ - { name = "mashape.test", address = "::1" }, - { name = "mashape.test", address = "::2" }, + { name = "mashape.test", address = "::1" , ttl = 0.1 }, + { name = "mashape.test", address = "::2" , ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1149,6 +1156,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) -- wait for record expiration dnsAAAA({ -- create a new record (identical) { name = "mashape.test", address = "::1" }, { name = "mashape.test", address = "::2" }, @@ -1163,9 +1171,9 @@ describe("[round robin balancer]", function() end) it("renewed DNS SRV record; no changes", function() local record = dnsSRV({ - { name = "gelato.test", target = "1.2.3.6", port = 8001, weight = 5 }, - { name = "gelato.test", target = "1.2.3.6", port = 8002, weight = 5 }, - { name = "gelato.test", target = "1.2.3.6", port = 8003, weight = 5 }, + { name = "gelato.test", target = "1.2.3.6", port = 8001, weight = 5, ttl = 0.1 }, + { name = "gelato.test", target = "1.2.3.6", port = 8002, weight = 5, ttl = 0.1 }, + { name = "gelato.test", target = "1.2.3.6", port = 8003, weight = 5, ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1180,6 +1188,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) -- wait for record expiration dnsSRV({ -- create a new record (identical) { name = "gelato.test", target = "1.2.3.6", port = 8001, weight = 5 }, { name = "gelato.test", target = "1.2.3.6", port = 8002, weight = 5 }, @@ -1195,8 +1204,8 @@ describe("[round robin balancer]", function() end) it("renewed DNS A record; address changes", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, - { name = "mashape.test", address = "1.2.3.5" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, + { name = "mashape.test", address = "1.2.3.5", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1212,6 +1221,7 @@ describe("[round robin balancer]", function() }) local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) -- wait for record expiration dnsA({ -- insert an updated record { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.6" }, -- target updated @@ -1229,7 +1239,7 @@ describe("[round robin balancer]", function() -- 2016/11/07 16:48:33 [error] 81932#0: *2 recv() failed (61: Connection refused), context: ngx.timer local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1251,8 +1261,10 @@ describe("[round robin balancer]", function() resolvConf = { "nameserver 127.0.0.1:22000" -- make sure dns query fails }, + cache_purge = true, }) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) -- wait for record expiration -- run entire wheel to make sure the expired one is requested, so it can fail for _ = 1, b.wheelSize do b:getPeer() end -- the only indice is now getkong.test @@ -1282,6 +1294,7 @@ describe("[round robin balancer]", function() local test_name = "really.really.really.does.not.exist.hostname.test" local ttl = 0.1 local staleTtl = 0 -- stale ttl = 0, force lookup upon expiring + client.getobj().stale_ttl = 0 local record = dnsA({ { name = test_name, address = "1.2.3.4", ttl = ttl }, }, staleTtl) @@ -1304,11 +1317,12 @@ describe("[round robin balancer]", function() assert.is_nil(ip) assert.equal(port, "Balancer is unhealthy") end + client.getobj().stale_ttl = 4 end) it("renewed DNS A record; unhealthy entries remain unhealthy after renewal", function() local record = dnsA({ - { name = "mashape.test", address = "1.2.3.4" }, - { name = "mashape.test", address = "1.2.3.5" }, + { name = "mashape.test", address = "1.2.3.4", ttl = 0.1 }, + { name = "mashape.test", address = "1.2.3.5", ttl = 0.1 }, }) dnsA({ { name = "getkong.test", address = "9.9.9.9" }, @@ -1342,6 +1356,7 @@ describe("[round robin balancer]", function() local state = copyWheel(b) record.expire = gettime() -1 -- expire current dns cache record + sleep(0.2) -- wait for record expiration dnsA({ -- create a new record (identical) { name = "mashape.test", address = "1.2.3.4" }, { name = "mashape.test", address = "1.2.3.5" }, diff --git a/spec/01-unit/09-balancer/06-latency_spec.lua b/spec/01-unit/09-balancer/06-latency_spec.lua index 89def3b45299..be9a23279e78 100644 --- a/spec/01-unit/09-balancer/06-latency_spec.lua +++ b/spec/01-unit/09-balancer/06-latency_spec.lua @@ -218,6 +218,7 @@ describe("[latency]", function() resolvConf = { "nameserver 198.51.100.0" }, + cache_purge = true, }) snapshot = assert:snapshot() end) diff --git a/spec/01-unit/14-dns_spec.lua b/spec/01-unit/14-dns_spec.lua index fda591d4df64..677977593cf3 100644 --- a/spec/01-unit/14-dns_spec.lua +++ b/spec/01-unit/14-dns_spec.lua @@ -29,6 +29,7 @@ local function setup_it_block() nameservers = { "198.51.100.0" }, enable_ipv6 = true, order = { "LAST", "SRV", "A", "CNAME" }, + cache_purge = true, } end diff --git a/spec/01-unit/21-dns-client/02-client_spec.lua b/spec/01-unit/21-dns-client/02-client_spec.lua index acd597ec2ec2..e5a88c8e8d9c 100644 --- a/spec/01-unit/21-dns-client/02-client_spec.lua +++ b/spec/01-unit/21-dns-client/02-client_spec.lua @@ -39,6 +39,7 @@ describe("[DNS client]", function() local client, resolver before_each(function() + _G.busted_legacy_dns_client = true client = require("kong.resty.dns.client") resolver = require("resty.dns.resolver") @@ -71,6 +72,7 @@ describe("[DNS client]", function() end) after_each(function() + _G.busted_legacy_dns_client = nil package.loaded["kong.resty.dns.client"] = nil package.loaded["resty.dns.resolver"] = nil client = nil diff --git a/spec/01-unit/21-dns-client/03-client_cache_spec.lua b/spec/01-unit/21-dns-client/03-client_cache_spec.lua index eb57d1ec2a24..448bd8b8a923 100644 --- a/spec/01-unit/21-dns-client/03-client_cache_spec.lua +++ b/spec/01-unit/21-dns-client/03-client_cache_spec.lua @@ -22,6 +22,7 @@ describe("[DNS client cache]", function() local client, resolver before_each(function() + _G.busted_legacy_dns_client = true client = require("kong.resty.dns.client") resolver = require("resty.dns.resolver") @@ -55,6 +56,7 @@ describe("[DNS client cache]", function() end) after_each(function() + _G.busted_legacy_dns_client = nil package.loaded["kong.resty.dns.client"] = nil package.loaded["resty.dns.resolver"] = nil client = nil diff --git a/spec/01-unit/30-new-dns-client/01-utils_spec.lua b/spec/01-unit/30-new-dns-client/01-utils_spec.lua new file mode 100644 index 000000000000..93fa9e2fed67 --- /dev/null +++ b/spec/01-unit/30-new-dns-client/01-utils_spec.lua @@ -0,0 +1,462 @@ +local utils = require "kong.dns.utils" +local tempfilename = require("pl.path").tmpname +local writefile = require("pl.utils").writefile +local splitlines = require("pl.stringx").splitlines + +describe("[utils]", function () + + describe("is_fqdn(name, ndots)", function () + it("test @name: end with `.`", function () + assert.is_true(utils.is_fqdn("www.", 2)) + assert.is_true(utils.is_fqdn("www.example.", 3)) + assert.is_true(utils.is_fqdn("www.example.test.", 4)) + end) + + it("test @ndots", function () + assert.is_true(utils.is_fqdn("www", 0)) + + assert.is_false(utils.is_fqdn("www", 1)) + assert.is_true(utils.is_fqdn("www.example", 1)) + assert.is_true(utils.is_fqdn("www.example.test", 1)) + + assert.is_false(utils.is_fqdn("www", 2)) + assert.is_false(utils.is_fqdn("www.example", 2)) + assert.is_true(utils.is_fqdn("www.example.test", 2)) + assert.is_true(utils.is_fqdn("www1.www2.example.test", 2)) + end) + end) + + describe("is_srv(name)", function () + local test_domains = { + ["_imaps._tcp.example.test"] = true, + ["_http._tcp.example.test"] = true, + ["_imaps._udp.example.test"] = true, + ["_http._udp.example.test"] = true, + ["_ldap._udp.example.test"] = true, + ["_ldap._udp.example"] = true, + ["_ldap._udp."] = false, + ["_ldap._udp"] = false, + ["_ldap._udp._example.test"] = true, + ["_ldap._udp._example"] = true, + ["_ldap._udp._"] = true, + ["_imaps.tcp.example.test"] = false, + ["imaps._tcp.example.test"] = false, + ["imaps.tcp.example.test"] = false, + ["_._tcp.example.test"] = false, + ["_imaps._.example.test"] = false, + ["_._.example.test"] = false, + ["_..example.test"] = false, + ["._.example.test"] = false, + ["www.example.test"] = false, + ["localhost"] = false, + } + + for k,v in pairs(test_domains) do + assert.equal(utils.is_srv(k), v, "checking " .. k .. ", " .. tostring(v)) + end + end) + + describe("search_names()", function () + it("empty resolv, not apply the search list", function () + local resolv = {} + local names = utils.search_names("www.example.test", resolv) + assert.same(names, { "www.example.test" }) + end) + + it("FQDN name: end with `.`, not apply the search list", function () + local names = utils.search_names("www.example.test.", { ndots = 1 }) + assert.same(names, { "www.example.test." }) + -- name with 3 dots, and ndots=4 > 3 + local names = utils.search_names("www.example.test.", { ndots = 4 }) + assert.same(names, { "www.example.test." }) + end) + + it("dots number in the name >= ndots, not apply the search list", function () + local resolv = { + ndots = 1, + search = { "example.net" }, + } + local names = utils.search_names("www.example.test", resolv) + assert.same(names, { "www.example.test" }) + + local names = utils.search_names("example.test", resolv) + assert.same(names, { "example.test" }) + end) + + it("dots number in the name < ndots, apply the search list", function () + local resolv = { + ndots = 2, + search = { "example.net" }, + } + local names = utils.search_names("www", resolv) + assert.same(names, { "www.example.net", "www" }) + + local names = utils.search_names("www1.www2", resolv) + assert.same(names, { "www1.www2.example.net", "www1.www2" }) + + local names = utils.search_names("www1.www2.www3", resolv) + assert.same(names, { "www1.www2.www3" }) -- not apply + + local resolv = { + ndots = 2, + search = { "example.net", "example.test" }, + } + local names = utils.search_names("www", resolv) + assert.same(names, { "www.example.net", "www.example.test", "www" }) + + local names = utils.search_names("www1.www2", resolv) + assert.same(names, { "www1.www2.example.net", "www1.www2.example.test", "www1.www2" }) + + local names = utils.search_names("www1.www2.www3", resolv) + assert.same(names, { "www1.www2.www3" }) -- not apply + end) + end) + + describe("ipv6_bracket()", function () + it("IPv6 address", function () + assert.equal(utils.ipv6_bracket("::1"), "[::1]") + assert.equal(utils.ipv6_bracket("[::1]"), "[::1]") + assert.equal(utils.ipv6_bracket("2001:db8::1"), "[2001:db8::1]") + assert.equal(utils.ipv6_bracket("[2001:db8::1]"), "[2001:db8::1]") + end) + + it("IPv4 address", function () + assert.equal(utils.ipv6_bracket("127.0.0.1"), "127.0.0.1") + end) + + it("host name", function () + assert.equal(utils.ipv6_bracket("example.test"), "example.test") + end) + end) + + describe("answer selection", function () + local function get_and_count(answers, n, get_ans) + local count = {} + for _ = 1, n do + local answer = get_ans(answers) + count[answer.target] = (count[answer.target] or 0) + 1 + end + return count + end + + it("round-robin", function () + local answers = { + { target = "1" }, -- 25% + { target = "2" }, -- 25% + { target = "3" }, -- 25% + { target = "4" }, -- 25% + } + local count = get_and_count(answers, 100, utils.get_next_round_robin_answer) + assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) + end) + + it("slight weight round-robin", function () + -- simple one + local answers = { + { target = "w5-p10-a", weight = 5, priority = 10, }, -- hit 100% + } + local count = get_and_count(answers, 20, utils.get_next_weighted_round_robin_answer) + assert.same(count, { ["w5-p10-a"] = 20 }) + + -- only get the lowest priority + local answers = { + { target = "w5-p10-a", weight = 5, priority = 10, }, -- hit 50% + { target = "w5-p20", weight = 5, priority = 20, }, -- hit 0% + { target = "w5-p10-b", weight = 5, priority = 10, }, -- hit 50% + { target = "w0-p10", weight = 0, priority = 10, }, -- hit 0% + } + local count = get_and_count(answers, 20, utils.get_next_weighted_round_robin_answer) + assert.same(count, { ["w5-p10-a"] = 10, ["w5-p10-b"] = 10 }) + + -- weight: 6, 3, 1 + local answers = { + { target = "w6", weight = 6, priority = 10, }, -- hit 60% + { target = "w3", weight = 3, priority = 10, }, -- hit 30% + { target = "w1", weight = 1, priority = 10, }, -- hit 10% + } + local count = get_and_count(answers, 100 * 1000, utils.get_next_weighted_round_robin_answer) + assert.same(count, { ["w6"] = 60000, ["w3"] = 30000, ["w1"] = 10000 }) + + -- random start + _G.math.native_randomseed(9975098) -- math.randomseed() ignores @seed + local answers1 = { + { target = "1", weight = 1, priority = 10, }, + { target = "2", weight = 1, priority = 10, }, + { target = "3", weight = 1, priority = 10, }, + { target = "4", weight = 1, priority = 10, }, + } + local answers2 = { + { target = "1", weight = 1, priority = 10, }, + { target = "2", weight = 1, priority = 10, }, + { target = "3", weight = 1, priority = 10, }, + { target = "4", weight = 1, priority = 10, }, + } + + local a1 = utils.get_next_weighted_round_robin_answer(answers1) + local a2 = utils.get_next_weighted_round_robin_answer(answers2) + assert.not_equal(a1.target, a2.target) + + -- weight 0 as 0.1 + local answers = { + { target = "w0", weight = 0, priority = 10, }, + { target = "w1", weight = 1, priority = 10, }, + { target = "w2", weight = 0, priority = 10, }, + { target = "w3", weight = 0, priority = 10, }, + } + local count = get_and_count(answers, 100, utils.get_next_weighted_round_robin_answer) + assert.same(count, { ["w0"] = 7, ["w1"] = 77, ["w2"] = 8, ["w3"] = 8 }) + + -- weight 0 and lowest priority + local answers = { + { target = "w0-a", weight = 0, priority = 0, }, + { target = "w1", weight = 1, priority = 10, }, -- hit 0% + { target = "w0-b", weight = 0, priority = 0, }, + { target = "w0-c", weight = 0, priority = 0, }, + } + local count = get_and_count(answers, 100, utils.get_next_weighted_round_robin_answer) + assert.same(count["w1"], nil) + + -- all weights are 0 + local answers = { + { target = "1", weight = 0, priority = 10, }, + { target = "2", weight = 0, priority = 10, }, + { target = "3", weight = 0, priority = 10, }, + { target = "4", weight = 0, priority = 10, }, + } + local count = get_and_count(answers, 100, utils.get_next_weighted_round_robin_answer) + assert.same(count, { ["1"] = 25, ["2"] = 25, ["3"] = 25, ["4"] = 25 }) + end) + end) + + describe("parsing 'resolv.conf':", function() + + -- override os.getenv to insert env variables + local old_getenv = os.getenv + local envvars -- whatever is in this table, gets served first + before_each(function() + envvars = {} + os.getenv = function(name) -- luacheck: ignore + return envvars[name] or old_getenv(name) + end + end) + + after_each(function() + os.getenv = old_getenv -- luacheck: ignore + envvars = nil + end) + + it("tests parsing when the 'resolv.conf' file does not exist", function() + local result, err = utils.parse_resolv_conf("non/existing/file") + assert.is.Nil(result) + assert.is.string(err) + end) + + it("tests parsing when the 'resolv.conf' file is empty", function() + local filename = tempfilename() + writefile(filename, "") + local resolv, err = utils.parse_resolv_conf(filename) + os.remove(filename) + assert.is.same({ ndots = 1, options = {} }, resolv) + assert.is.Nil(err) + end) + + it("tests parsing 'resolv.conf' with multiple comment types", function() + local file = splitlines( +[[# this is just a comment line +# at the top of the file + +domain myservice.test + +nameserver 198.51.100.0 +nameserver 2001:db8::1 ; and a comment here +nameserver 198.51.100.0:1234 ; this one has a port number (limited systems support this) +nameserver 1.2.3.4 ; this one is 4th, so should be ignored + +# search is commented out, test below for a mutually exclusive one +#search domaina.test domainb.test + +sortlist list1 list2 #list3 is not part of it + +options ndots:2 +options timeout:3 +options attempts:4 + +options debug +options rotate ; let's see about a comment here +options no-check-names +options inet6 +; here's annother comment +options ip6-bytestring +options ip6-dotint +options no-ip6-dotint +options edns0 +options single-request +options single-request-reopen +options no-tld-query +options use-vc +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.equal("myservice.test", resolv.domain) + assert.is.same({ "198.51.100.0", "2001:db8::1", "198.51.100.0:1234" }, resolv.nameserver) + assert.is.same({ "list1", "list2" }, resolv.sortlist) + assert.is.same({ ndots = 2, timeout = 3, attempts = 4, debug = true, rotate = true, + ["no-check-names"] = true, inet6 = true, ["ip6-bytestring"] = true, + ["ip6-dotint"] = nil, -- overridden by the next one, mutually exclusive + ["no-ip6-dotint"] = true, edns0 = true, ["single-request"] = true, + ["single-request-reopen"] = true, ["no-tld-query"] = true, ["use-vc"] = true}, + resolv.options) + end) + + it("tests parsing 'resolv.conf' with mutual exclusive domain vs search", function() + local file = splitlines( +[[domain myservice.test + +# search is overriding domain above +search domaina.test domainb.test + +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.Nil(resolv.domain) + assert.is.same({ "domaina.test", "domainb.test" }, resolv.search) + end) + + it("tests parsing 'resolv.conf' with 'timeout = 0'", function() + local file = splitlines("options timeout:0") + local resolv = utils.parse_resolv_conf(file) + assert.equal(2000, resolv.options.timeout) + end) + + it("tests parsing 'resolv.conf' with max search entries MAXSEARCH", function() + local file = splitlines( +[[ + +search domain1.test domain2.test domain3.test domain4.test domain5.test domain6.test domain7.test + +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.Nil(resolv.domain) + assert.is.same({ + "domain1.test", + "domain2.test", + "domain3.test", + "domain4.test", + "domain5.test", + "domain6.test", + }, resolv.search) + end) + + it("tests parsing 'resolv.conf' with environment variables", function() + local file = splitlines( +[[# this is just a comment line +domain myservice.test + +nameserver 198.51.100.0 +nameserver 198.51.100.1 ; and a comment here + +options ndots:1 +]]) + envvars.LOCALDOMAIN = "domaina.test domainb.test" + envvars.RES_OPTIONS = "ndots:2 debug" + + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + + + assert.is.Nil(resolv.domain) -- must be nil, mutually exclusive + assert.is.same({ "domaina.test", "domainb.test" }, resolv.search) + + assert.is.same({ ndots = 2, debug = true }, resolv.options) + end) + + it("tests parsing 'resolv.conf' with non-existing environment variables", function() + local file = splitlines( +[[# this is just a comment line +domain myservice.test + +nameserver 198.51.100.0 +nameserver 198.51.100.1 ; and a comment here + +options ndots:2 +]]) + envvars.LOCALDOMAIN = "" + envvars.RES_OPTIONS = "" + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.equals("myservice.test", resolv.domain) -- must be nil, mutually exclusive + assert.is.same({ ndots = 2 }, resolv.options) + end) + + it("skip ipv6 nameservers with scopes", function() + local file = splitlines( +[[# this is just a comment line +nameserver [fe80::1%enp0s20f0u1u1] +]]) + local resolv, err = utils.parse_resolv_conf(file) + assert.is.Nil(err) + assert.is.same({}, resolv.nameservers) + end) + + end) + + describe("parsing 'hosts':", function() + + it("tests parsing when the 'hosts' file does not exist", function() + local result = utils.parse_hosts("non/existing/file") + assert.same({ localhost = { ipv4 = "127.0.0.1", ipv6 = "[::1]" } }, result) + end) + + it("tests parsing when the 'hosts' file is empty", function() + local filename = tempfilename() + writefile(filename, "") + local result = utils.parse_hosts(filename) + os.remove(filename) + assert.same({ localhost = { ipv4 = "127.0.0.1", ipv6 = "[::1]" } }, result) + end) + + it("tests parsing 'hosts'", function() + local hostsfile = splitlines( +[[# The localhost entry should be in every HOSTS file and is used +# to point back to yourself. + +127.0.0.1 # only ip address, this one will be ignored + +127.0.0.1 localhost +::1 localhost + +# My test server for the website + +192.168.1.2 test.computer.test + 192.168.1.3 ftp.COMPUTER.test alias1 alias2 +192.168.1.4 smtp.computer.test alias3 #alias4 +192.168.1.5 smtp.computer.test alias3 #doubles, first one should win + +#Blocking known malicious sites +127.0.0.1 admin.abcsearch.test +127.0.0.2 www3.abcsearch.test #[Browseraid] +127.0.0.3 www.abcsearch.test wwwsearch #[Restricted Zone site] + +[::1] alsolocalhost #support IPv6 in brackets +]]) + local reverse = utils.parse_hosts(hostsfile) + assert.is.equal("127.0.0.1", reverse.localhost.ipv4) + assert.is.equal("[::1]", reverse.localhost.ipv6) + + assert.is.equal("192.168.1.2", reverse["test.computer.test"].ipv4) + + assert.is.equal("192.168.1.3", reverse["ftp.computer.test"].ipv4) + assert.is.equal("192.168.1.3", reverse["alias1"].ipv4) + assert.is.equal("192.168.1.3", reverse["alias2"].ipv4) + + assert.is.equal("192.168.1.4", reverse["smtp.computer.test"].ipv4) + assert.is.equal("192.168.1.4", reverse["alias3"].ipv4) + + assert.is.equal("192.168.1.4", reverse["smtp.computer.test"].ipv4) -- .1.4; first one wins! + assert.is.equal("192.168.1.4", reverse["alias3"].ipv4) -- .1.4; first one wins! + + assert.is.equal("[::1]", reverse["alsolocalhost"].ipv6) + end) + end) +end) diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua new file mode 100644 index 000000000000..b91319564fa4 --- /dev/null +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -0,0 +1,1553 @@ +-- This test case file originates from the old version of the DNS client and has +-- been modified to adapt to the new version of the DNS client. + +local _writefile = require("pl.utils").writefile +local tmpname = require("pl.path").tmpname +local cycle_aware_deep_copy = require("kong.tools.table").cycle_aware_deep_copy + +-- hosted in Route53 in the AWS sandbox +local TEST_DOMAIN = "kong-gateway-testing.link" +local TEST_NS = "192.51.100.0" + +local TEST_NSS = { TEST_NS } + +local NOT_FOUND_ERROR = 'dns server error: 3 name error' + +local function assert_same_answers(a1, a2) + a1 = cycle_aware_deep_copy(a1) + a1.ttl = nil + a1.expire = nil + + a2 = cycle_aware_deep_copy(a2) + a2.ttl = nil + a2.expire = nil + + assert.same(a1, a2) +end + +describe("[DNS client]", function() + + local resolver, client, query_func, old_udp, receive_func + + local resolv_path, hosts_path + + local function writefile(path, text) + _writefile(path, type(text) == "table" and table.concat(text, "\n") or text) + end + + local function client_new(opts) + opts = opts or {} + opts.resolv_conf = opts.resolv_conf or resolv_path + opts.hosts = hosts_path + opts.cache_purge = true + return client.new(opts) + end + + lazy_setup(function() + -- create temp resolv.conf and hosts + resolv_path = tmpname() + hosts_path = tmpname() + ngx.log(ngx.DEBUG, "create temp resolv.conf:", resolv_path, + " hosts:", hosts_path) + + -- hook sock:receive to do timeout test + old_udp = ngx.socket.udp + + _G.ngx.socket.udp = function (...) + local sock = old_udp(...) + + local old_receive = sock.receive + + sock.receive = function (...) + if receive_func then + receive_func(...) + end + return old_receive(...) + end + + return sock + end + + end) + + lazy_teardown(function() + if resolv_path then + os.remove(resolv_path) + end + if hosts_path then + os.remove(hosts_path) + end + + _G.ngx.socket.udp = old_udp + end) + + before_each(function() + -- inject r.query + package.loaded["resty.dns.resolver"] = nil + resolver = require("resty.dns.resolver") + + local original_query_func = resolver.query + query_func = function(self, original_query_func, name, options) + return original_query_func(self, name, options) + end + resolver.query = function(self, ...) + return query_func(self, original_query_func, ...) + end + + -- restore its API overlapped by the compatible layer + package.loaded["kong.dns.client"] = nil + client = require("kong.dns.client") + client.resolve = function (self, name, opts, tries) + if opts and opts.return_random then + return self:resolve_address(name, opts.port, opts.cache_only, tries) + else + return self:_resolve(name, opts and opts.qtype, opts and opts.cache_only, tries) + end + end + end) + + after_each(function() + package.loaded["resty.dns.resolver"] = nil + resolver = nil + query_func = nil + + package.loaded["kong.resty.dns.client"] = nil + client = nil + + receive_func = nil + end) + + + describe("initialization", function() + it("check special opts", function() + local opts = { + hosts = "non/existent/hosts", + resolv_conf = "non/exitent/resolv.conf", + retrans = 4, + timeout = 5000, + random_resolver = true, + nameservers = {"1.1.1.1", {"2.2.2.2", 53}}, + } + + local cli = assert(client.new(opts)) + + assert.same(opts.retrans, cli.r_opts.retrans) + assert.same(opts.timeout, cli.r_opts.timeout) + assert.same(not opts.random_resolver, cli.r_opts.no_random) + assert.same(opts.nameservers, cli.r_opts.nameservers) + end) + + it("succeeds if hosts/resolv.conf fails", function() + local cli, err = client.new({ + nameservers = TEST_NSS, + hosts = "non/existent/file", + resolv_conf = "non/exitent/file", + }) + assert.is.Nil(err) + assert.same(cli.r_opts.nameservers, TEST_NSS) + end) + + describe("inject localhost", function() + + it("if absent", function() + writefile(resolv_path, "") + writefile(hosts_path, "") -- empty hosts + + local cli = assert(client_new()) + local answers = cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) + assert.equal("[::1]", answers[1].address) + + answers = cli:resolve("localhost", { qtype = resolver.TYPE_A}) + assert.equal("127.0.0.1", answers[1].address) + + answers = cli:resolve("localhost") + assert.equal("127.0.0.1", answers[1].address) + end) + + it("not if ipv4 exists", function() + writefile(hosts_path, "1.2.3.4 localhost") + local cli = assert(client_new()) + + -- IPv6 is not defined + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) + local answers = cli.cache:get("localhost:28") + assert.is_nil(answers) + + -- IPv4 is not overwritten + cli:resolve("localhost", { qtype = resolver.TYPE_A}) + answers = cli.cache:get("localhost:1") + assert.equal("1.2.3.4", answers[1].address) + end) + + it("not if ipv6 exists", function() + writefile(hosts_path, "::1:2:3:4 localhost") + local cli = assert(client_new()) + + -- IPv6 is not overwritten + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) + local answers = cli.cache:get("localhost:28") + assert.equal("[::1:2:3:4]", answers[1].address) + + -- IPv4 is not defined + cli:resolve("localhost", { qtype = resolver.TYPE_A}) + answers = cli.cache:get("localhost:1") + assert.is_nil(answers) + end) + + it("cache evication", function() + writefile(hosts_path, "::1:2:3:4 localhost") + local cli = assert(client_new()) + + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) + local answers = cli.cache:get("localhost:28") + assert.equal("[::1:2:3:4]", answers[1].address) + + -- evict it + cli.cache:delete("localhost:28") + answers = cli.cache:get("localhost:28") + assert.equal(nil, answers) + + -- resolve and re-insert it into cache + answers = cli:resolve("localhost") + assert.equal("[::1:2:3:4]", answers[1].address) + + cli:resolve("localhost", { qtype = resolver.TYPE_AAAA}) + answers = cli.cache:get("localhost:28") + assert.equal("[::1:2:3:4]", answers[1].address) + end) + end) + end) + + + describe("iterating searches", function() + local function hook_query_func_get_list() + local list = {} + query_func = function(self, original_query_func, name, options) + table.insert(list, name .. ":" .. options.qtype) + return {} -- empty answers + end + return list + end + + describe("without type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("host") + + assert.same(answers, nil) + assert.same(err, "dns client error: 101 empty record received") + assert.same({ + 'host.one.test:1', + 'host.two.test:1', + 'host:1', + 'host.one.test:28', + 'host.two.test:28', + 'host:28', + }, list) + end) + + it("works with SRV name", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("_imap._tcp.example.test") + + assert.same(answers, nil) + assert.same(err, "dns client error: 101 empty record received") + assert.same({ + '_imap._tcp.example.test:33', + }, list) + end) + + it("works with a 'search .' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search .", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("host") + + assert.same(answers, nil) + assert.same(err, "dns client error: 101 empty record received") + assert.same({ + 'host:1', + 'host:28', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + local answers, err = cli:resolve("host") + + assert.same(answers, nil) + assert.same(err, "dns client error: 101 empty record received") + assert.same({ + 'host.local.domain.test:1', + 'host:1', + 'host.local.domain.test:28', + 'host:28', + }, list) + end) + end) + + describe("FQDN without type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("host.") + + assert.same({ + 'host.:1', + 'host.:28', + }, list) + end) + + it("works with a 'search .' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search .", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("host.") + + assert.same({ + 'host.:1', + 'host.:28', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("host.") + + assert.same({ + 'host.:1', + 'host.:28', + }, list) + end) + end) + + describe("with type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type + cli:resolve("host") + + assert.same({ + 'host.one.test:28', + 'host.two.test:28', + 'host:28', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type + cli:resolve("host") + + assert.same({ + 'host.local.domain.test:28', + 'host:28', + }, list) + end) + end) + + describe("FQDN with type", function() + it("works with a 'search' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type + cli:resolve("host.") + assert.same({ + 'host.:28', + }, list) + end) + + it("works with a 'domain' option", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "domain local.domain.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new({ family = { "AAAA" } })) -- IPv6 type + cli:resolve("host.") + + assert.same({ + 'host.:28', + }, list) + end) + end) + + it("honours 'ndots'", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + + local list = hook_query_func_get_list() + local cli = assert(client_new()) + cli:resolve("local.host") + + assert.same({ + 'local.host:1', + 'local.host:28', + }, list) + end) + + it("hosts file always resolves first, overriding `ndots`", function() + writefile(resolv_path, { + "nameserver 198.51.100.0", + "search one.test two.test", + "options ndots:1", + }) + writefile(hosts_path, { + "127.0.0.1 host", + "::1 host", + }) + + local list = hook_query_func_get_list() + -- perferred IP type: IPv4 (A takes priority in family) + local cli = assert(client_new({ family = { "SRV", "A", "AAAA" } })) + local answers = cli:resolve("host") + assert.same(answers[1].address, "127.0.0.1") + assert.same({}, list) -- hit on cache, so no query to the nameserver + + -- perferred IP type: IPv6 (AAAA takes priority in family) + --[[ + local cli = assert(client_new({ family = { "SRV", "AAAA", "A" } })) + local answers = cli:resolve("host") + assert.same(answers[1].address, "[::1]") + assert.same({}, list) + ]] + end) + end) + + -- This test will report an alert-level error message, ignore it. + it("low-level callback error", function() + receive_func = function(...) + error("CALLBACK") + end + + local cli = assert(client_new()) + + local orig_log = ngx.log + _G.ngx.log = function (...) end -- mute ALERT log + local answers, err = cli:resolve("srv.timeout.test") + _G.ngx.log = orig_log + assert.is_nil(answers) + assert.match("callback threw an error:.*CALLBACK", err) + end) + + describe("timeout", function () + it("dont try other types with the low-level error", function() + -- KAG-2300 https://github.test/Kong/kong/issues/10182 + -- When timed out, don't keep trying with other answers types. + writefile(resolv_path, { + "nameserver 198.51.100.0", + "options timeout:1", + "options attempts:3", + }) + + local query_count = 0 + query_func = function(self, original_query_func, name, options) + assert(options.qtype == resolver.TYPE_A) + query_count = query_count + 1 + return original_query_func(self, name, options) + end + + local receive_count = 0 + receive_func = function(...) + receive_count = receive_count + 1 + return nil, "timeout" + end + + local cli = assert(client_new()) + assert.same(cli.r_opts.retrans, 3) + assert.same(cli.r_opts.timeout, 1) + + local answers, err = cli:resolve("timeout.test") + assert.is_nil(answers) + assert.match("DNS server error: failed to receive reply from UDP server .*: timeout, took %d+ ms", err) + assert.same(receive_count, 3) + assert.same(query_count, 1) + end) + + -- KAG-2300 - https://github.test/Kong/kong/issues/10182 + -- If we encounter a timeout while talking to the DNS server, + -- expect the total timeout to be close to timeout * attemps parameters + for _, attempts in ipairs({1, 2}) do + for _, timeout in ipairs({1, 2}) do + it("options: timeout: " .. timeout .. " seconds, attempts: " .. attempts .. " times", function() + query_func = function(self, original_query_func, name, options) + ngx.sleep(math.min(timeout, 5)) + return nil, "timeout" .. timeout .. attempts + end + writefile(resolv_path, { + "nameserver 198.51.100.0", + "options timeout:" .. timeout, + "options attempts:" .. attempts, + }) + local cli = assert(client_new()) + assert.same(cli.r_opts.retrans, attempts) + assert.same(cli.r_opts.timeout, timeout) + + local start_time = ngx.now() + local answers = cli:resolve("timeout.test") + assert.is.Nil(answers) + assert.is("DNS server error: timeout" .. timeout .. attempts) + local duration = ngx.now() - start_time + assert.truthy(duration < (timeout * attempts + 1)) + end) + end + end + end) + + it("fetching answers without nameservers errors", function() + writefile(resolv_path, "") + local host = TEST_DOMAIN + local typ = resolver.TYPE_A + + local cli = assert(client_new()) + local answers, err = cli:resolve(host, { qtype = typ }) + assert.is_nil(answers) + assert.same(err, "failed to instantiate the resolver: no nameservers specified") + end) + + it("fetching CNAME answers", function() + local host = "smtp."..TEST_DOMAIN + local typ = resolver.TYPE_CNAME + + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + local answers = cli:resolve(host, { qtype = typ }) + + assert.are.equal(host, answers[1].name) + assert.are.equal(typ, answers[1].type) + assert.are.equal(#answers, 1) + end) + + it("fetching CNAME answers FQDN", function() + local host = "smtp."..TEST_DOMAIN + local typ = resolver.TYPE_CNAME + + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + local answers = cli:resolve(host .. ".", { qtype = typ }) + + assert.are.equal(host, answers[1].name) -- answers name does not contain "." + assert.are.equal(typ, answers[1].type) + assert.are.equal(#answers, 1) + end) + + it("cache hit and ttl", function() + -- TOOD: The special 0-ttl record may cause this test failed + -- [{"name":"kong-gateway-testing.link","class":1,"address":"198.51.100.0", + -- "ttl":0,"type":1,"section":1}] + local host = TEST_DOMAIN + + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + local answers = cli:resolve(host) + assert.are.equal(host, answers[1].name) + + local wait_time = 1 + ngx.sleep(wait_time) + + -- fetch again, now from cache + local answers2 = assert(cli:resolve(host)) + assert.are.equal(answers, answers2) -- same table from L1 cache + + local ttl, _, value = cli.cache:peek(host .. ":-1") + assert.same(answers, value) + local ttl_diff = answers.ttl - ttl + assert(math.abs(ttl_diff - wait_time) < 1, + ("ttl diff:%s s should be near to %s s"):format(ttl_diff, wait_time)) + end) + + it("fetching names case insensitive", function() + query_func = function(self, original_query_func, name, options) + return {{ + name = "some.UPPER.case", + type = resolver.TYPE_A, + ttl = 30, + }} + end + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + local answers = cli:resolve("some.upper.CASE") + + assert.equal(1, #answers) + assert.equal("some.upper.case", answers[1].name) + end) + + it("fetching multiple A answers", function() + local host = "atest."..TEST_DOMAIN + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"A"}})) + local answers = assert(cli:resolve(host)) + assert.are.equal(#answers, 2) + assert.are.equal(host, answers[1].name) + assert.are.equal(resolver.TYPE_A, answers[1].type) + assert.are.equal(host, answers[2].name) + assert.are.equal(resolver.TYPE_A, answers[2].type) + end) + + it("fetching multiple A answers FQDN", function() + local host = "atest."..TEST_DOMAIN + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"A"}})) + local answers = assert(cli:resolve(host .. ".")) + assert.are.equal(#answers, 2) + assert.are.equal(host, answers[1].name) + assert.are.equal(resolver.TYPE_A, answers[1].type) + assert.are.equal(host, answers[2].name) + assert.are.equal(resolver.TYPE_A, answers[2].type) + end) + + it("fetching A answers redirected through 2 CNAME answerss (un-typed)", function() + writefile(resolv_path, "") -- search {} empty + + local host = "smtp."..TEST_DOMAIN + + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + assert(cli:resolve(host)) + + -- check first CNAME + local key1 = host .. ":" .. resolver.TYPE_CNAME + local entry1 = cli.cache:get(key1) + assert.same(nil, entry1) + + for k,v in pairs(cli.stats) do + v.query_last_time = nil + end + + assert.same({ + ["smtp.kong-gateway-testing.link:-1"] = { + miss = 1, + runs = 1 + }, + ["smtp.kong-gateway-testing.link:1"] = { + query = 1, + query_succ = 1 + }, + }, cli.stats) + end) + + it("fetching multiple SRV answerss (un-typed)", function() + local host = "_ldap._tcp.srv.test" + local typ = resolver.TYPE_SRV + + query_func = function(self, original_query_func, name, options) + return { + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + } + } + end + + -- un-typed lookup + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local answers = assert(cli:resolve(host)) + assert.are.equal(host, answers[1].name) + assert.are.equal(typ, answers[1].type) + assert.are.equal(host, answers[2].name) + assert.are.equal(typ, answers[2].type) + assert.are.equal(host, answers[3].name) + assert.are.equal(typ, answers[3].type) + assert.are.equal(#answers, 3) + end) + + it("fetching multiple SRV answerss through CNAME (un-typed)", function() + writefile(resolv_path, "") -- search {} empty + local host = "_ldap._tcp.cname2srv.test" + local typ = resolver.TYPE_SRV + + query_func = function(self, original_query_func, name, options) + return { + { + type = resolver.TYPE_CNAME, cname = host, class = 1, name = host, + ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + }, + { + type = typ, target = "srv.test", port = 8002, weight = 10, + priority = 5, class = 1, name = host, ttl = 300, + } + } + end + + -- un-typed lookup + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local answers = assert(cli:resolve(host)) + + -- first check CNAME + local key = host .. ":" .. resolver.TYPE_CNAME + local entry = cli.cache:get(key) + assert.same(nil, entry) + + for k,v in pairs(cli.stats) do + v.query_last_time = nil + end + + assert.same({ + ["_ldap._tcp.cname2srv.test:33"] = { + miss = 1, + runs = 1, + query = 1, + query_succ = 1, + }, + }, cli.stats) + + -- check final target + assert.are.equal(typ, answers[1].type) + assert.are.equal(typ, answers[2].type) + assert.are.equal(typ, answers[3].type) + assert.are.equal(#answers, 3) + end) + + it("fetching non-type-matching answerss", function() + local host = "srvtest."..TEST_DOMAIN + local typ = resolver.TYPE_A --> the entry is SRV not A + + writefile(resolv_path, "") -- search {} empty + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local answers, err = cli:resolve(host, { qtype = typ }) + assert.is_nil(answers) -- returns nil + assert.equal("dns client error: 101 empty record received", err) + end) + + it("fetching non-existing answerss", function() + local host = "IsNotHere."..TEST_DOMAIN + + writefile(resolv_path, "") -- search {} empty + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local answers, err = cli:resolve(host) + assert.is_nil(answers) + assert.equal("dns server error: 3 name error", err) + end) + + it("fetching IP address", function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + + local host = "1.2.3.4" + local answers = cli:resolve(host) + assert.same(answers[1].address, host) + + local host = "[1:2::3:4]" + local answers = cli:resolve(host) + assert.same(answers[1].address, host) + + local host = "1:2::3:4" + local answers = cli:resolve(host) + assert.same(answers[1].address, "[" .. host .. "]") + + -- ignore ipv6 format error, it only check ':' + local host = "[invalid ipv6 address:::]" + local answers = cli:resolve(host) + assert.same(answers[1].address, host) + end) + + it("fetching IPv6 in an SRV answers adds brackets",function() + local host = "hello.world.test" + local address = "::1" + local entry = {{ + type = resolver.TYPE_SRV, + target = address, + port = 321, + weight = 10, + priority = 10, + class = 1, + name = host, + ttl = 10, + }} + + query_func = function(self, original_query_func, name, options) + if name == host and options.qtype == resolver.TYPE_SRV then + return entry + end + return original_query_func(self, name, options) + end + + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local answers = cli:resolve( host, { qtype = resolver.TYPE_SRV }) + assert.equal("["..address.."]", answers[1].target) + end) + + it("resolving from the /etc/hosts file; preferred A or AAAA family", function() + writefile(hosts_path, { + "127.3.2.1 localhost", + "1::2 localhost", + }) + local cli = assert(client_new({ + resolv_conf = "/etc/resolv.conf", + family = {"SRV", "A", "AAAA"} + })) + assert(cli) + + local cli = assert(client_new({ + resolv_conf = "/etc/resolv.conf", + family = {"SRV", "AAAA", "A"} + })) + assert(cli) + end) + + + it("resolving from the /etc/hosts file", function() + writefile(hosts_path, { + "127.3.2.1 localhost", + "1::2 localhost", + "123.123.123.123 mashape", + "1234::1234 kong.for.president", + }) + + local cli = assert(client_new({ nameservers = TEST_NSS })) + + local answers, err = cli:resolve("localhost", {qtype = resolver.TYPE_A}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "127.3.2.1") + + answers, err = cli:resolve("localhost", {qtype = resolver.TYPE_AAAA}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "[1::2]") + + answers, err = cli:resolve("mashape", {qtype = resolver.TYPE_A}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "123.123.123.123") + + answers, err = cli:resolve("kong.for.president", {qtype = resolver.TYPE_AAAA}) + assert.is.Nil(err) + assert.are.equal(answers[1].address, "[1234::1234]") + end) + + describe("toip() function", function() + it("A/AAAA-answers, round-robin",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + local host = "atest."..TEST_DOMAIN + local answers = assert(cli:resolve(host)) + answers.last = nil -- make sure to clean + local ips = {} + for _,answers in ipairs(answers) do ips[answers.address] = true end + local family = {} + for n = 1, #answers do + local ip = cli:resolve(host, { return_random = true }) + ips[ip] = nil + family[n] = ip + end + -- this table should be empty again + assert.is_nil(next(ips)) + -- do again, and check same family + for n = 1, #family do + local ip = cli:resolve(host, { return_random = true }) + assert.same(family[n], ip) + end + end) + + it("SRV-answers, round-robin on lowest prio",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + local host = "_service._proto.hello.world.test" + local entry = { + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 8000, + weight = 5, + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 8001, + weight = 5, + priority = 20, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 8002, + weight = 5, + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + } + -- insert in the cache + cli.cache:set(entry[1].name .. ":" .. resolver.TYPE_SRV, {ttl=0}, entry) + + local results = {} + for _ = 1,20 do + local _, port = cli:resolve_address(host) + results[port] = (results[port] or 0) + 1 + end + + -- 20 passes, each should get 10 + assert.equal(0, results[8001] or 0) --priority 20, no hits + assert.equal(10, results[8000] or 0) --priority 10, 50% of hits + assert.equal(10, results[8002] or 0) --priority 10, 50% of hits + end) + + it("SRV-answers with 1 entry, round-robin",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + local host = "_service._proto.hello.world.test" + local entry = {{ + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 321, + weight = 10, + priority = 10, + class = 1, + name = host, + ttl = 10, + }} + -- insert in the cache + cli.cache:set(entry[1].name .. ":" .. resolver.TYPE_SRV, { ttl=0 }, entry) + + -- repeated lookups, as the first will simply serve the first entry + -- and the only second will setup the round-robin scheme, this is + -- specific for the SRV answers type, due to the weights + for _ = 1 , 10 do + local ip, port = cli:resolve_address(host) + assert.same("1.2.3.4", ip) + assert.same(321, port) + end + end) + + it("SRV-answers with 0-weight, round-robin",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local host = "_service._proto.hello.world.test" + local entry = { + { + type = resolver.TYPE_SRV, + target = "1.2.3.4", + port = 321, + weight = 0, --> weight 0 + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.5", + port = 321, + weight = 50, --> weight 50 + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + { + type = resolver.TYPE_SRV, + target = "1.2.3.6", + port = 321, + weight = 50, --> weight 50 + priority = 10, + class = 1, + name = host, + ttl = 10, + }, + } + -- insert in the cache + cli.cache:set(entry[1].name .. ":" .. resolver.TYPE_SRV, { ttl=0 }, entry) + + -- weight 0 will be weight 1, without any reduction in weight + -- of the other ones. + local track = {} + for _ = 1 , 2002 do --> run around twice + local ip, _ = assert(cli:resolve_address(host)) + track[ip] = (track[ip] or 0) + 1 + end + assert.equal(1000, track["1.2.3.5"]) + assert.equal(1000, track["1.2.3.6"]) + assert.equal(2, track["1.2.3.4"]) + end) + + it("port passing",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local entry_a = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "a.answers.test", + ttl = 10, + }} + local entry_srv = {{ + type = resolver.TYPE_SRV, + target = "a.answers.test", + port = 8001, + weight = 5, + priority = 20, + class = 1, + name = "_service._proto.srv.answers.test", + ttl = 10, + }} + -- insert in the cache + cli.cache:set(entry_a[1].name..":-1", { ttl = 0 }, entry_a) + cli.cache:set(entry_srv[1].name..":33", { ttl = 0 }, entry_srv) + local ip, port + local host = "a.answers.test" + ip, port = cli:resolve_address(host) + assert.is_string(ip) + assert.is_nil(port) + + ip, port = cli:resolve_address(host, 1234) + assert.is_string(ip) + assert.equal(1234, port) + + host = "_service._proto.srv.answers.test" + ip, port = cli:resolve_address(host) + assert.is_number(port) + assert.is_string(ip) + + ip, port = cli:resolve_address(host, 0) + assert.is_number(port) + assert.is_string(ip) + assert.is_not.equal(0, port) + end) + + it("port passing if SRV port=0",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local ip, port, host + + query_func = function(self, original_query_func, name, options) + if options.qtype ~= resolver.TYPE_SRV then + return original_query_func(self, name, options) + end + + return {{ + type = resolver.TYPE_SRV, + port = 0, + weight = 10, + priority = 20, + target = "kong-gateway-testing.link", + class = 1, + name = name, + ttl = 300, + }} + end + + host = "_service._proto.srvport0.test" + ip, port = cli:resolve_address(host, 10) + assert.is_number(port) + assert.is_string(ip) + assert.is_equal(10, port) + + ip, port = cli:resolve_address(host) + assert.is_string(ip) + assert.is_nil(port) + end) + + it("SRV whole process: SRV -> A",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local ip, port, host + + query_func = function(self, original_query_func, name, options) + if options.qtype == resolver.TYPE_A then + return {{ + type = resolver.TYPE_A, + address = "1.1.1.1", + name = name, + ttl = 300, + }} + + elseif options.qtype == resolver.TYPE_SRV then + return {{ + type = resolver.TYPE_SRV, + port = 0, + weight = 10, + priority = 20, + target = "kong-gateway-testing.link", + class = 1, + name = name, + ttl = 300, + }} + + else + return {} + end + end + + host = "_service._proto.srv_a.test" + ip, port = cli:resolve_address(host) + assert.equal(ip, "1.1.1.1") + assert.is_nil(port) + end) + + it("SRV whole process: SRV -> A failed -> AAAA",function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf"})) + local ip, port, host + + query_func = function(self, original_query_func, name, options) + if options.qtype == resolver.TYPE_A then + return { errcode = 5, errstr = "refused" } + + elseif options.qtype == resolver.TYPE_SRV then + return {{ + type = resolver.TYPE_SRV, + port = 0, + weight = 10, + priority = 20, + target = "kong-gateway-testing.link", + class = 1, + name = name, + ttl = 300, + }} + + else + return {{ + type = resolver.TYPE_AAAA, + address = "::1:2:3:4", + name = name, + ttl = 300, + }} + end + end + + host = "_service._proto.srv_aaaa.test" + ip, port = cli:resolve_address(host) + assert.equal(ip, "[::1:2:3:4]") + assert.is_nil(port) + end) + + it("resolving in correct answers-type family",function() + local function config(cli) + -- function to insert 2 answerss in the cache + local A_entry = {{ + type = resolver.TYPE_A, + address = "5.6.7.8", + class = 1, + name = "hello.world.test", + ttl = 10, + }} + local AAAA_entry = {{ + type = resolver.TYPE_AAAA, + address = "::1", + class = 1, + name = "hello.world.test", + ttl = 10, + }} + -- insert in the cache + cli.cache:set(A_entry[1].name..":-1", { ttl=0 }, A_entry) + cli.cache:set(AAAA_entry[1].name..":-1", { ttl=0 }, AAAA_entry) + end + + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"AAAA", "A"} })) + config(cli) + local ip, err = cli:resolve_address("hello.world.test") + assert.same(err, nil) + assert.equals(ip, "::1") + + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf", family = {"A", "AAAA"} })) + config(cli) + ip = cli:resolve_address("hello.world.test") + --assert.equals(ip, "5.6.7.8") + assert.equals(ip, "::1") + end) + + it("handling of empty responses", function() + local cli = assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + -- insert empty records into cache + cli.cache:set("hello.world.test:all", { ttl=0 }, { errcode = 3 }) + + -- Note: the bad case would be that the below lookup would hang due to round-robin on an empty table + local ip, port = cli:resolve_address("hello.world.test", 123, true) + assert.is_nil(ip) + assert.is.string(port) -- error message + end) + end) + + it("verifies valid_ttl", function() + local valid_ttl = 0.1 + local error_ttl = 0.1 + local stale_ttl = 0.1 + local qname = "konghq.test" + local cli = assert(client_new({ + resolv_conf = "/etc/resolv.conf", + error_ttl = error_ttl, + stale_ttl = stale_ttl, + valid_ttl = valid_ttl, + })) + -- mock query function to return a default answers + query_func = function(self, original_query_func, name, options) + return {{ + type = resolver.TYPE_A, + address = "5.6.7.8", + class = 1, + name = qname, + ttl = 10, + }} -- will add new field .ttl = valid_ttl + end + + local answers, _, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.equal(valid_ttl, answers.ttl) + + local ttl = cli.cache:peek(qname .. ":1") + assert.is_near(valid_ttl, ttl, 0.1) + end) + + it("verifies ttl and caching of empty responses and name errors", function() + --empty/error responses should be cached for a configurable time + local error_ttl = 0.1 + local stale_ttl = 0.1 + local qname = "really.really.really.does.not.exist."..TEST_DOMAIN + local cli = assert(client_new({ + resolv_conf = "/etc/resolv.conf", + error_ttl = error_ttl, + stale_ttl = stale_ttl, + })) + + -- mock query function to count calls + local call_count = 0 + query_func = function(self, original_query_func, name, options) + call_count = call_count + 1 + return original_query_func(self, name, options) + end + + -- make a first request, populating the cache + local answers1, answers2, err1, err2, _ + answers1, err1, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers1) + assert.are.equal(1, call_count) + assert.are.equal(NOT_FOUND_ERROR, err1) + answers1 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + + -- make a second request, result from cache, still called only once + answers2, err2, _ = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(1, call_count) + assert.are.equal(NOT_FOUND_ERROR, err2) + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.equal(answers1, answers2) + assert.falsy(answers2._expire_at) + + -- wait for expiry of ttl and retry, it will not use the cached one + -- because the cached one contains no avaible IP addresses + ngx.sleep(error_ttl+0.5 * stale_ttl) + answers2, err2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(2, call_count) + assert.are.equal(NOT_FOUND_ERROR, err2) + + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.falsy(answers2._expire_at) -- refreshed record + + -- wait for expiry of stale_ttl and retry, should be called twice now + ngx.sleep(0.75 * stale_ttl) + assert.are.equal(2, call_count) + answers2, err2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.is_nil(answers2) + assert.are.equal(NOT_FOUND_ERROR, err2) + assert.are.equal(2, call_count) + + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert.not_equal(answers1, answers2) + assert.falsy(answers2._expire_at) -- new answers, not expired + end) + + it("verifies stale_ttl for available records", function() + local stale_ttl = 0.1 + local ttl = 0.1 + local qname = "realname.test" + local cli = assert(client_new({ + resolv_conf = "/etc/resolv.conf", + stale_ttl = stale_ttl, + })) + + -- mock query function to count calls, and return errors + local call_count = 0 + query_func = function(self, original_query_func, name, options) + call_count = call_count + 1 + return {{ + type = resolver.TYPE_A, + address = "1.1.1.1", + class = 1, + name = name, + ttl = ttl, + }} + end + + -- initial request to populate the cache + local answers1, answers2 + answers1 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers1[1].address, "1.1.1.1") + assert.are.equal(call_count, 1) + assert.falsy(answers1._expire_at) + + -- try again, HIT from cache, not stale + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.are.equal(call_count, 1) + assert(answers1 == answers2) + + -- wait for expiry of ttl and retry, HIT and stale + ngx.sleep(ttl + 0.5 * stale_ttl) + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers2[1].address, "1.1.1.1") + assert.are.equal(call_count, 1) -- todo: flakiness + + answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) + assert(answers2._expire_at) + answers2._expire_at = nil -- clear to be same with answers1 + assert_same_answers(answers1, answers2) + + -- async stale updating task + ngx.sleep(0.1 * stale_ttl) + assert.are.equal(call_count, 2) + + -- hit the cached one that is updated by the stale stask + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers2[1].address, "1.1.1.1") + assert.are.equal(call_count, 2) + assert.falsy(answers2._expire_at) + + -- The stale one will be completely eliminated from the cache. + ngx.sleep(ttl + stale_ttl) + + answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) + assert.same(answers2[1].address, "1.1.1.1") + assert.are.equal(call_count, 3) + assert.falsy(answers2._expire_at) + end) + + describe("verifies the polling of dns queries, retries, and wait times", function() + local function threads_resolve(nthreads, name, cli) + cli = cli or assert(client_new({ resolv_conf = "/etc/resolv.conf" })) + -- we're going to schedule a whole bunch of queries (lookup & stores answers) + local coros = {} + local answers_list = {} + for _ = 1, nthreads do + local co = ngx.thread.spawn(function () + coroutine.yield(coroutine.running()) + local answers, err = cli:resolve(name, { qtype = resolver.TYPE_A }) + table.insert(answers_list, (answers or err)) + end) + table.insert(coros, co) + end + for _, co in ipairs(coros) do + ngx.thread.wait(co) + end + return answers_list + end + + it("simultaneous lookups are synchronized to 1 lookup", function() + local call_count = 0 + query_func = function(self, original_query_func, name, options) + call_count = call_count + 1 + ngx.sleep(0.5) -- block all other threads + return original_query_func(self, name, options) + end + + local answers_list = threads_resolve(10, TEST_DOMAIN) + + assert(call_count == 1) + for _, answers in ipairs(answers_list) do + assert.same(answers_list[1], answers) + end + end) + + it("timeout while waiting", function() + + local ip = "1.4.2.3" + local timeout = 500 -- ms + local name = TEST_DOMAIN + -- insert a stub thats waits and returns a fixed answers + query_func = function() + -- `+ 2` s ensures that the resty-lock expires + ngx.sleep(timeout / 1000 + 2) + return {{ + type = resolver.TYPE_A, + address = ip, + class = 1, + name = name, + ttl = 10, + }} + end + + local cli = assert(client_new({ + resolv_conf = "/etc/resolv.conf", + timeout = timeout, + retrans = 1, + })) + local answers_list = threads_resolve(10, name, cli) + + -- answers[1~9] are equal, as they all will wait for the first response + for i = 1, 9 do + assert.equal("could not acquire callback lock: timeout", answers_list[i]) + end + -- answers[10] comes from synchronous DNS access of the first request + assert.equal(ip, answers_list[10][1]["address"]) + end) + end) + + + it("disable additional section when querying", function() + + local function build_dns_reply(id, name, ip, ns_ip1, ns_ip2) + local function dns_encode_name(name) + local parts = {} + for part in string.gmatch(name, "[^.]+") do + table.insert(parts, string.char(#part) .. part) + end + table.insert(parts, "\0") + return table.concat(parts) + end + + local function ip_to_bytes(ip) + local bytes = { "\x00\x04" } -- RDLENGTH:4bytes (ipv4) + for octet in string.gmatch(ip, "%d+") do + table.insert(bytes, string.char(tonumber(octet))) + end + return table.concat(bytes) + end + + local package = {} + + -- Header + package[#package+1] = id + package[#package+1] = "\x85\x00" -- QR, AA, RD + package[#package+1] = "\x00\x01\x00\x01\x00\x00\x00\x02" -- QD:1 AN:1 NS:0 AR:2 + + -- Question + package[#package+1] = dns_encode_name(name) + package[#package+1] = "\x00\x01\x00\x01" -- QTYPE A; QCLASS IN + + -- Answer + package[#package+1] = dns_encode_name(name) + package[#package+1] = "\x00\x01\x00\x01\x00\x00\x00\x30" -- QTYPE:A; QCLASS:IN TTL:48 + package[#package+1] = ip_to_bytes(ip) + + -- Additional + local function add_additional(name, ip) + package[#package+1] = dns_encode_name(name) + package[#package+1] = "\x00\x01\x00\x01\x00\x00\x00\x30" -- QTYPE:A; QCLASS:IN TTL:48 + package[#package+1] = ip_to_bytes(ip) + end + + add_additional("ns1." .. name, ns_ip1) + add_additional("ns2." .. name, ns_ip2) + + return table.concat(package) + end + + local force_enable_additional_section = false + + -- dns client will ignore additional section + query_func = function(self, original_query_func, name, options) + if options.qtype ~= client.TYPE_A then + return { errcode = 5, errstr = "refused" } + end + + if force_enable_additional_section then + options.additional_section = true + end + + self.tcp_sock = nil -- disable TCP query + + local id + local sock = assert(self.socks[1]) + -- hook send to get id + local orig_sock_send = sock.send + sock.send = function (self, query) + id = query[1] .. query[2] + return orig_sock_send(self, query) + end + -- hook receive to reply raw data + sock.receive = function (self, size) + return build_dns_reply(id, name, "1.1.1.1", "2.2.2.2", "3.3.3.3") + end + + return original_query_func(self, name, options) + end + + local name = "additional-section.test" + + -- no additional_section by default + local cli = client.new({ nameservers = TEST_NSS }) + local answers = cli:resolve(name) + assert.equal(#answers, 1) + assert.same(answers[1].address, "1.1.1.1") + + -- test the buggy scenario + force_enable_additional_section = true + cli = client.new({ nameservers = TEST_NSS, cache_purge = true }) + answers = cli:resolve(name) + assert.equal(#answers, 3) + assert.same(answers[1].address, "1.1.1.1") + assert.same(answers[2].address, "2.2.2.2") + assert.same(answers[3].address, "3.3.3.3") + end) + +end) diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua new file mode 100644 index 000000000000..eac3c53e55c8 --- /dev/null +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -0,0 +1,465 @@ +-- This test case file originates from the old version of the DNS client and has +-- been modified to adapt to the new version of the DNS client. + +local _writefile = require("pl.utils").writefile +local tmpname = require("pl.path").tmpname +local cycle_aware_deep_copy = require("kong.tools.table").cycle_aware_deep_copy + +-- hosted in Route53 in the AWS sandbox +local TEST_NS = "198.51.100.0" + +local TEST_NSS = { TEST_NS } + +local gettime = ngx.now +local sleep = ngx.sleep + +local function assert_same_answers(a1, a2) + a1 = cycle_aware_deep_copy(a1) + a1.ttl = nil + a1.expire = nil + + a2 = cycle_aware_deep_copy(a2) + a2.ttl = nil + a2.expire = nil + + assert.same(a1, a2) +end + +describe("[DNS client cache]", function() + local resolver, client, query_func, old_udp, receive_func + + local resolv_path, hosts_path + + local function writefile(path, text) + _writefile(path, type(text) == "table" and table.concat(text, "\n") or text) + end + + local function client_new(opts) + opts = opts or {} + opts.resolv_conf = resolv_path + opts.hosts = hosts_path + opts.nameservers = opts.nameservers or TEST_NSS + opts.cache_purge = true + return client.new(opts) + end + + lazy_setup(function() + -- create temp resolv.conf and hosts + resolv_path = tmpname() + hosts_path = tmpname() + ngx.log(ngx.DEBUG, "create temp resolv.conf:", resolv_path, + " hosts:", hosts_path) + + -- hook sock:receive to do timeout test + old_udp = ngx.socket.udp + + _G.ngx.socket.udp = function (...) + local sock = old_udp(...) + + local old_receive = sock.receive + + sock.receive = function (...) + if receive_func then + receive_func(...) + end + return old_receive(...) + end + + return sock + end + + end) + + lazy_teardown(function() + if resolv_path then + os.remove(resolv_path) + end + if hosts_path then + os.remove(hosts_path) + end + + _G.ngx.socket.udp = old_udp + end) + + before_each(function() + -- inject r.query + package.loaded["resty.dns.resolver"] = nil + resolver = require("resty.dns.resolver") + local original_query_func = resolver.query + resolver.query = function(self, ...) + return query_func(self, original_query_func, ...) + end + + -- restore its API overlapped by the compatible layer + package.loaded["kong.dns.client"] = nil + client = require("kong.dns.client") + client.resolve = function (self, name, opts, tries) + if opts and opts.return_random then + return self:resolve_address(name, opts.port, opts.cache_only, tries) + else + return self:_resolve(name, opts and opts.qtype, opts and opts.cache_only, tries) + end + end + end) + + after_each(function() + package.loaded["resty.dns.resolver"] = nil + resolver = nil + query_func = nil + + package.loaded["kong.resty.dns.client"] = nil + client = nil + + receive_func = nil + end) + + describe("shortnames caching", function() + + local cli, mock_records, config + before_each(function() + writefile(resolv_path, "search domain.test") + config = { + nameservers = { "198.51.100.0" }, + ndots = 1, + search = { "domain.test" }, + hosts = {}, + order = { "LAST", "SRV", "A", "AAAA" }, + error_ttl = 0.5, + stale_ttl = 0.5, + enable_ipv6 = false, + } + cli = assert(client_new(config)) + + query_func = function(self, original_query_func, qname, opts) + return mock_records[qname..":"..opts.qtype] or { errcode = 3, errstr = "name error" } + end + end) + + it("are stored in cache without type", function() + mock_records = { + ["myhost1.domain.test:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost1.domain.test", + ttl = 30, + }} + } + + local answers = cli:resolve("myhost1") + assert.equal(answers, cli.cache:get("myhost1:-1")) + end) + + it("are stored in cache with type", function() + mock_records = { + ["myhost2.domain.test:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost2.domain.test", + ttl = 30, + }} + } + + local answers = cli:resolve("myhost2", { qtype = resolver.TYPE_A }) + assert.equal(answers, cli.cache:get("myhost2:" .. resolver.TYPE_A)) + end) + + it("are resolved from cache without type", function() + mock_records = {} + cli.cache:set("myhost3:-1", {ttl=30+4}, {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost3.domain.test", + ttl = 30, + }, + ttl = 30, + expire = gettime() + 30, + }) + + local answers = cli:resolve("myhost3") + assert.same(answers, cli.cache:get("myhost3:-1")) + end) + + it("are resolved from cache with type", function() + mock_records = {} + local cli = client_new() + cli.cache:set("myhost4:" .. resolver.TYPE_A, {ttl=30+4}, {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost4.domain.test", + ttl = 30, + }, + ttl = 30, + expire = gettime() + 30, + }) + + local answers = cli:resolve("myhost4", { qtype = resolver.TYPE_A }) + assert.equal(answers, cli.cache:get("myhost4:" .. resolver.TYPE_A)) + end) + + it("ttl in cache is honored for short name entries", function() + local ttl = 0.2 + -- in the short name case the same record is inserted again in the cache + -- and the lru-ttl has to be calculated, make sure it is correct + mock_records = { + ["myhost6.domain.test:"..resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost6.domain.test", + ttl = ttl, + }} + } + local mock_copy = cycle_aware_deep_copy(mock_records) + + -- resolve and check whether we got the mocked record + local answers = cli:resolve("myhost6") + assert_same_answers(answers, mock_records["myhost6.domain.test:"..resolver.TYPE_A]) + + -- replace our mocked list with the copy made (new table, so no equality) + mock_records = mock_copy + + -- wait for expiring + sleep(ttl + config.stale_ttl / 2) + + -- fresh result, but it should not affect answers2 + mock_records["myhost6.domain.test:"..resolver.TYPE_A][1].tag = "new" + + -- resolve again, now getting same record, but stale, this will trigger + -- background refresh query + local answers2 = cli:resolve("myhost6") + assert.falsy(answers2[1].tag) + assert.is_number(answers2._expire_at) -- stale; marked as expired + answers2._expire_at = nil + assert_same_answers(answers2, answers) + + -- wait for the refresh to complete. Ensure that the sleeping time is less + -- than ttl, avoiding the updated record from becoming stale again. + sleep(ttl / 2) + + -- resolve and check whether we got the new record from the mock copy + local answers3 = cli:resolve("myhost6") + assert.equal(answers3[1].tag, "new") + assert.falsy(answers3._expired_at) + assert.not_equal(answers, answers3) -- must be a different record now + assert_same_answers(answers3, mock_records["myhost6.domain.test:"..resolver.TYPE_A]) + + -- the 'answers3' resolve call above will also trigger a new background query + -- (because the sleep of 0.1 equals the records ttl of 0.1) + -- so let's yield to activate that background thread now. If not done so, + -- the `after_each` will clear `query_func` and an error will appear on the + -- next test after this one that will yield. + sleep(0.1) + end) + + it("errors are not stored", function() + local rec = { + errcode = 4, + errstr = "server failure", + } + mock_records = { + ["myhost7.domain.test:"..resolver.TYPE_A] = rec, + ["myhost7:"..resolver.TYPE_A] = rec, + } + + local answers, err = cli:resolve("myhost7", { qtype = resolver.TYPE_A }) + assert.is_nil(answers) + assert.equal("dns server error: 4 server failure", err) + assert.is_nil(cli.cache:get("short:myhost7:" .. resolver.TYPE_A)) + end) + + it("name errors are not stored", function() + local rec = { + errcode = 3, + errstr = "name error", + } + mock_records = { + ["myhost8.domain.test:"..resolver.TYPE_A] = rec, + ["myhost8:"..resolver.TYPE_A] = rec, + } + + local answers, err = cli:resolve("myhost8", { qtype = resolver.TYPE_A }) + assert.is_nil(answers) + assert.equal("dns server error: 3 name error", err) + assert.is_nil(cli.cache:get("short:myhost8:" .. resolver.TYPE_A)) + end) + + end) + + + describe("fqdn caching", function() + + local cli, mock_records, config + before_each(function() + writefile(resolv_path, "search domain.test") + config = { + nameservers = { "198.51.100.0" }, + ndots = 1, + search = { "domain.test" }, + hosts = {}, + resolvConf = {}, + order = { "LAST", "SRV", "A", "AAAA" }, + error_ttl = 0.5, + stale_ttl = 0.5, + enable_ipv6 = false, + } + cli = assert(client_new(config)) + + query_func = function(self, original_query_func, qname, opts) + return mock_records[qname..":"..opts.qtype] or { errcode = 3, errstr = "name error" } + end + end) + + it("errors do not replace stale records", function() + local rec1 = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost9.domain.test", + ttl = 0.1, + }} + mock_records = { + ["myhost9.domain.test:"..resolver.TYPE_A] = rec1, + } + + local answers, err = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_nil(err) + -- check that the cache is properly populated + assert_same_answers(rec1, answers) + answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) + assert_same_answers(rec1, answers) + + sleep(0.15) -- make sure we surpass the ttl of 0.1 of the record, so it is now stale. + -- new mock records, such that we return server failures installed of records + local rec2 = { + errcode = 4, + errstr = "server failure", + } + mock_records = { + ["myhost9.domain.test:"..resolver.TYPE_A] = rec2, + ["myhost9:"..resolver.TYPE_A] = rec2, + } + -- doing a resolve will trigger the background query now + answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired + -- wait again for the background query to complete + sleep(0.1) + -- background resolve is now complete, check the cache, it should still have the + -- stale record, and it should not have been replaced by the error + -- + answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) + assert.is_number(answers._expire_at) + answers._expire_at = nil + assert_same_answers(rec1, answers) + end) + + it("empty records do not replace stale records", function() + local rec1 = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myhost9.domain.test", + ttl = 0.1, + }} + mock_records = { + ["myhost9.domain.test:"..resolver.TYPE_A] = rec1, + } + + local answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + -- check that the cache is properly populated + assert_same_answers(rec1, answers) + assert_same_answers(rec1, cli.cache:get("myhost9:" .. resolver.TYPE_A)) + + sleep(0.15) -- stale + -- clear mock records, such that we return name errors instead of records + local rec2 = {} + mock_records = { + ["myhost9.domain.test:"..resolver.TYPE_A] = rec2, + ["myhost9:"..resolver.TYPE_A] = rec2, + } + -- doing a resolve will trigger the background query now + answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired + -- wait again for the background query to complete + sleep(0.1) + -- background resolve is now complete, check the cache, it should still have the + -- stale record, and it should not have been replaced by the empty record + answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired + answers._expire_at = nil + assert_same_answers(rec1, answers) + end) + + it("AS records do replace stale records", function() + -- when the additional section provides recordds, they should be stored + -- in the cache, as in some cases lookups of certain types (eg. CNAME) are + -- blocked, and then we rely on the A record to get them in the AS + -- (additional section), but then they must be stored obviously. + local CNAME1 = { + type = resolver.TYPE_CNAME, + cname = "myotherhost.domain.test", + class = 1, + name = "myhost9.domain.test", + ttl = 0.1, + } + local A2 = { + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "myotherhost.domain.test", + ttl = 60, + } + mock_records = setmetatable({ + ["myhost9.domain.test:"..resolver.TYPE_CNAME] = { cycle_aware_deep_copy(CNAME1) }, -- copy to make it different + ["myhost9.domain.test:"..resolver.TYPE_A] = { CNAME1, A2 }, -- not there, just a reference and target + ["myotherhost.domain.test:"..resolver.TYPE_A] = { A2 }, + }, { + -- do not do lookups, return empty on anything else + __index = function(self, key) + --print("looking for ",key) + return {} + end, + }) + + assert(cli:resolve("myhost9", { qtype = resolver.TYPE_CNAME })) + ngx.sleep(0.2) -- wait for it to become stale + assert(cli:resolve("myhost9"), { return_random = true }) + + local cached = cli.cache:get("myhost9.domain.test:" .. resolver.TYPE_CNAME) + assert.same(nil, cached) + end) + + end) + + describe("hosts entries", function() + -- hosts file names are cached for 10 years, verify that + -- it is not overwritten with valid_ttl settings. + -- Regressions reported in https://github.test/Kong/kong/issues/7444 + local cli, mock_records, config -- luacheck: ignore + writefile(resolv_path, "") + writefile(hosts_path, "127.0.0.1 myname.lan") + before_each(function() + config = { + nameservers = { "198.51.100.0" }, + --hosts = {"127.0.0.1 myname.lan"}, + --resolvConf = {}, + valid_ttl = 0.1, + stale_ttl = 0, + } + + cli = assert(client_new(config)) + end) + + it("entries from hosts file ignores valid_ttl overrides, Kong/kong #7444", function() + local record = cli:resolve("myname.lan") + assert.equal("127.0.0.1", record[1].address) + ngx.sleep(0.2) -- must be > valid_ttl + stale_ttl + + record = cli.cache:get("myname.lan:-1") + assert.equal("127.0.0.1", record[1].address) + end) + end) +end) diff --git a/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua b/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua new file mode 100644 index 000000000000..5ed287def1df --- /dev/null +++ b/spec/01-unit/30-new-dns-client/04-client_ipc_spec.lua @@ -0,0 +1,63 @@ +local helpers = require "spec.helpers" +local pl_file = require "pl.file" + + +local function count_log_lines(pattern) + local cfg = helpers.test_conf + local logs = pl_file.read(cfg.prefix .. "/" .. cfg.proxy_error_log) + local _, count = logs:gsub(pattern, "") + return count +end + + +describe("[dns-client] inter-process communication:",function() + local num_workers = 2 + + setup(function() + local bp = helpers.get_db_utils("postgres", { + "routes", + "services", + "plugins", + }, { + "dns-client-test", + }) + + bp.plugins:insert { + name = "dns-client-test", + } + + assert(helpers.start_kong({ + nginx_conf = "spec/fixtures/custom_nginx.template", + plugins = "bundled,dns-client-test", + nginx_main_worker_processes = num_workers, + legacy_dns_client = "off", + })) + end) + + teardown(function() + helpers.stop_kong() + end) + + it("stale updating task broadcast events", function() + helpers.wait_until(function() + return count_log_lines("DNS query completed") == num_workers + end, 5) + + assert.same(count_log_lines("first:query:ipc.test"), 1) + assert.same(count_log_lines("first:answers:1.2.3.4"), num_workers) + + assert.same(count_log_lines("stale:query:ipc.test"), 1) + assert.same(count_log_lines("stale:answers:1.2.3.4."), num_workers) + + -- wait background tasks to finish + helpers.wait_until(function() + return count_log_lines("stale:broadcast:ipc.test:%-1") == 1 + end, 5) + + -- "stale:lru ..." means the progress of the two workers is about the same. + -- "first:lru ..." means one of the workers is far behind the other. + helpers.wait_until(function() + return count_log_lines(":lru delete:ipc.test:%-1") == 1 + end, 5) + end) +end) diff --git a/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua new file mode 100644 index 000000000000..4bf0efd0a46a --- /dev/null +++ b/spec/01-unit/30-new-dns-client/05-client_stat_spec.lua @@ -0,0 +1,197 @@ +local sleep = ngx.sleep + +describe("[DNS client stats]", function() + local resolver, client, query_func + + local function client_new(opts) + opts = opts or {} + opts.hosts = {} + opts.nameservers = { "198.51.100.0" } -- placeholder, not used + return client.new(opts) + end + + before_each(function() + -- inject r.query + package.loaded["resty.dns.resolver"] = nil + resolver = require("resty.dns.resolver") + resolver.query = function(...) + if not query_func then + return nil + end + return query_func(...) + end + + -- restore its API overlapped by the compatible layer + package.loaded["kong.dns.client"] = nil + client = require("kong.dns.client") + client.resolve = client._resolve + end) + + after_each(function() + package.loaded["resty.dns.resolver"] = nil + resolver = nil + query_func = nil + + package.loaded["kong.resty.dns.client"] = nil + client = nil + end) + + describe("stats", function() + local mock_records + before_each(function() + query_func = function(self, qname, opts) + local records = mock_records[qname..":"..opts.qtype] + if type(records) == "string" then + return nil, records -- as error message + end + return records or { errcode = 3, errstr = "name error" } + end + end) + + it("resolve SRV", function() + mock_records = { + ["_ldaps._tcp.srv.test:" .. resolver.TYPE_SRV] = {{ + type = resolver.TYPE_SRV, + target = "srv.test", + port = 636, + weight = 10, + priority = 10, + class = 1, + name = "_ldaps._tcp.srv.test", + ttl = 10, + }}, + ["srv.test:" .. resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "srv.test", + ttl = 30, + }}, + } + + local cli = assert(client_new()) + cli:resolve("_ldaps._tcp.srv.test") + + local query_last_time + for k, v in pairs(cli.stats) do + if v.query_last_time then + query_last_time = v.query_last_time + v.query_last_time = nil + end + end + assert.match("^%d+$", query_last_time) + + assert.same({ + ["_ldaps._tcp.srv.test:33"] = { + ["query"] = 1, + ["query_succ"] = 1, + ["miss"] = 1, + ["runs"] = 1, + }, + }, cli.stats) + end) + + it("resolve all types", function() + mock_records = { + ["hit.test:" .. resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "hit.test", + ttl = 30, + }}, + ["nameserver_fail.test:" .. resolver.TYPE_A] = "nameserver failed", + ["stale.test:" .. resolver.TYPE_A] = {{ + type = resolver.TYPE_A, + address = "1.2.3.4", + class = 1, + name = "stale.test", + ttl = 0.1, + }}, + ["empty_result_not_stale.test:" .. resolver.TYPE_A] = {{ + type = resolver.TYPE_CNAME, -- will be ignored compared to type A + cname = "stale.test", + class = 1, + name = "empty_result_not_stale.test", + ttl = 0.1, + }}, + } + + local cli = assert(client_new({ + order = { "A" }, + error_ttl = 0.1, + empty_ttl = 0.1, + stale_ttl = 1, + })) + + -- "hit_lru" + cli:resolve("hit.test") + cli:resolve("hit.test") + -- "hit_shm" + cli.cache.lru:delete("hit.test:all") + cli:resolve("hit.test") + + -- "query_err:nameserver failed" + cli:resolve("nameserver_fail.test") + + -- "stale" + cli:resolve("stale.test") + sleep(0.2) + cli:resolve("stale.test") + + cli:resolve("empty_result_not_stale.test") + sleep(0.2) + cli:resolve("empty_result_not_stale.test") + + local query_last_time + for k, v in pairs(cli.stats) do + if v.query_last_time then + query_last_time = v.query_last_time + v.query_last_time = nil + end + end + assert.match("^%d+$", query_last_time) + + assert.same({ + ["hit.test:1"] = { + ["query"] = 1, + ["query_succ"] = 1, + }, + ["hit.test:-1"] = { + ["hit_lru"] = 2, + ["miss"] = 1, + ["runs"] = 3, + }, + ["nameserver_fail.test:-1"] = { + ["fail"] = 1, + ["runs"] = 1, + }, + ["nameserver_fail.test:1"] = { + ["query"] = 1, + ["query_fail_nameserver"] = 1, + }, + ["stale.test:-1"] = { + ["miss"] = 2, + ["runs"] = 2, + ["stale"] = 1, + }, + ["stale.test:1"] = { + ["query"] = 2, + ["query_succ"] = 2, + }, + ["empty_result_not_stale.test:-1"] = { + ["miss"] = 2, + ["runs"] = 2, + }, + ["empty_result_not_stale.test:1"] = { + ["query"] = 2, + ["query_fail:empty record received"] = 2, + }, + ["empty_result_not_stale.test:28"] = { + ["query"] = 2, + ["query_fail:name error"] = 2, + }, + }, cli.stats) + end) + end) +end) diff --git a/spec/02-integration/04-admin_api/26-dns_client_spec.lua b/spec/02-integration/04-admin_api/26-dns_client_spec.lua new file mode 100644 index 000000000000..036671732a8a --- /dev/null +++ b/spec/02-integration/04-admin_api/26-dns_client_spec.lua @@ -0,0 +1,102 @@ +local helpers = require "spec.helpers" +local cjson = require "cjson" + + +for _, strategy in helpers.each_strategy() do + describe("Admin API - DNS client route with [#" .. strategy .. "]" , function() + local client + + lazy_setup(function() + local bp = helpers.get_db_utils(strategy, { + "upstreams", + "targets", + }) + + local upstream = bp.upstreams:insert() + bp.targets:insert({ + upstream = upstream, + target = "_service._proto.srv.test", + }) + + assert(helpers.start_kong({ + database = strategy, + nginx_conf = "spec/fixtures/custom_nginx.template", + legacy_dns_client = "off", + })) + + client = helpers.admin_client() + end) + + teardown(function() + if client then + client:close() + end + helpers.stop_kong() + end) + + it("/status/dns - status code 200", function () + local res = assert(client:send { + method = "GET", + path = "/status/dns", + headers = { ["Content-Type"] = "application/json" } + }) + + local body = assert.res_status(200 , res) + local json = cjson.decode(body) + + assert(type(json.worker.id) == "number") + assert(type(json.worker.count) == "number") + + assert(type(json.stats) == "table") + assert(type(json.stats["127.0.0.1|A/AAAA"].runs) == "number") + + -- Wait for the upstream target to be updated in the background + helpers.wait_until(function () + local res = assert(client:send { + method = "GET", + path = "/status/dns", + headers = { ["Content-Type"] = "application/json" } + }) + + local body = assert.res_status(200 , res) + local json = cjson.decode(body) + return type(json.stats["_service._proto.srv.test|SRV"]) == "table" + end, 5) + end) + end) + + describe("Admin API - DNS client route with [#" .. strategy .. "]" , function() + local client + + lazy_setup(function() + helpers.get_db_utils(strategy) + + assert(helpers.start_kong({ + database = strategy, + nginx_conf = "spec/fixtures/custom_nginx.template", + legacy_dns_client = true, + })) + + client = helpers.admin_client() + end) + + teardown(function() + if client then + client:close() + end + helpers.stop_kong() + end) + + it("/status/dns - status code 501", function () + local res = assert(client:send { + method = "GET", + path = "/status/dns", + headers = { ["Content-Type"] = "application/json" } + }) + + local body = assert.res_status(501, res) + local json = cjson.decode(body) + assert.same("not implemented with the legacy DNS client", json.message) + end) + end) +end diff --git a/spec/02-integration/05-proxy/05-dns_spec.lua b/spec/02-integration/05-proxy/05-dns_spec.lua index 9607352a26ce..3e2c9475723c 100644 --- a/spec/02-integration/05-proxy/05-dns_spec.lua +++ b/spec/02-integration/05-proxy/05-dns_spec.lua @@ -108,7 +108,7 @@ for _, strategy in helpers.each_strategy() do local service = bp.services:insert { name = "tests-retries", - host = "nowthisdoesnotexistatall", + host = "nowthisdoesnotexistatall.test", path = "/exist", port = 80, protocol = "http" diff --git a/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua b/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua index 0d3872c093c8..56769c6f26aa 100644 --- a/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua +++ b/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua @@ -38,7 +38,7 @@ for _, strategy in helpers.each_strategy() do } fixtures.dns_mock:SRV { - name = "my.srv.test.test", + name = "_srv._pro.my.srv.test.test", target = "a.my.srv.test.test", port = 80, -- port should fail to connect } @@ -57,7 +57,7 @@ for _, strategy in helpers.each_strategy() do } fixtures.dns_mock:SRV { - name = "srv-changes-port.test", + name = "_srv._pro.srv-changes-port.test", target = "a-changes-port.test", port = 90, -- port should fail to connect } @@ -114,7 +114,7 @@ for _, strategy in helpers.each_strategy() do }) -- the following port will not be used, will be overwritten by -- the mocked SRV record. - bu.add_target(bp, upstream_id, "my.srv.test.test", 80) + bu.add_target(bp, upstream_id, "_srv._pro.my.srv.test.test", 80) local api_host = bu.add_api(bp, upstream_name) bu.end_testcase_setup(strategy, bp) @@ -301,7 +301,7 @@ for _, strategy in helpers.each_strategy() do }) -- the following port will not be used, will be overwritten by -- the mocked SRV record. - bu.add_target(bp, upstream_id, "srv-changes-port.test", 80) + bu.add_target(bp, upstream_id, "_srv._pro.srv-changes-port.test", 80) local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 100, }) bu.end_testcase_setup(strategy, bp) @@ -328,7 +328,7 @@ for _, strategy in helpers.each_strategy() do assert.equals("UNHEALTHY", health.data[1].health) assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health) - local status = bu.put_target_address_health(upstream_id, "srv-changes-port.test:80", "a-changes-port.test:90", "healthy") + local status = bu.put_target_address_health(upstream_id, "_srv._pro.srv-changes-port.test:80", "a-changes-port.test:90", "healthy") assert.same(204, status) end, 15) @@ -1780,7 +1780,7 @@ for _, strategy in helpers.each_strategy() do for i = 1, 3 do hosts[i] = { - hostname = bu.gen_multi_host(), + hostname = "_srv._pro." .. bu.gen_multi_host(), port1 = helpers.get_available_port(), port2 = helpers.get_available_port(), } diff --git a/spec/02-integration/14-observability/01-instrumentations_spec.lua b/spec/02-integration/14-observability/01-instrumentations_spec.lua index 781c85cd8fb2..0d9af1927995 100644 --- a/spec/02-integration/14-observability/01-instrumentations_spec.lua +++ b/spec/02-integration/14-observability/01-instrumentations_spec.lua @@ -524,7 +524,7 @@ for _, strategy in helpers.each_strategy() do -- intentionally trigger a DNS query error local service = bp.services:insert({ name = "inexist-host-service", - host = "really-inexist-host", + host = "really-inexist-host.test", port = 80, }) @@ -558,7 +558,7 @@ for _, strategy in helpers.each_strategy() do local dns_spans = assert_has_spans("kong.dns", spans) local upstream_dns for _, dns_span in ipairs(dns_spans) do - if dns_span.attributes["dns.record.domain"] == "really-inexist-host" then + if dns_span.attributes["dns.record.domain"] == "really-inexist-host.test" then upstream_dns = dns_span break end diff --git a/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua new file mode 100644 index 000000000000..ba9d3a4f38f3 --- /dev/null +++ b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/handler.lua @@ -0,0 +1,74 @@ +-- The test case 04-client_ipc_spec.lua will load this plugin and check its +-- generated error logs. + +local DnsClientTestHandler = { + VERSION = "1.0", + PRIORITY = 1000, +} + + +local log = ngx.log +local ERR = ngx.ERR +local PRE = "dns-client-test:" + + +local function test() + local phase = "" + local host = "ipc.test" + + -- inject resolver.query + require("resty.dns.resolver").query = function(self, name, opts) + log(ERR, PRE, phase, "query:", name) + return {{ + type = opts.qtype, + address = "1.2.3.4", + target = "1.2.3.4", + class = 1, + name = name, + ttl = 0.1, + }} + end + + local dns_client = require("kong.tools.dns")() + local cli = dns_client.new({}) + + -- inject broadcast + local orig_broadcast = cli.cache.broadcast + cli.cache.broadcast = function(channel, data) + log(ERR, PRE, phase, "broadcast:", data) + orig_broadcast(channel, data) + end + + -- inject lrucahce.delete + local orig_delete = cli.cache.lru.delete + cli.cache.lru.delete = function(self, key) + log(ERR, PRE, phase, "lru delete:", key) + orig_delete(self, key) + end + + -- phase 1: two processes try to get answers and trigger only one query + phase = "first:" + local answers = cli:_resolve(host) + log(ERR, PRE, phase, "answers:", answers[1].address) + + -- wait records to be stale + ngx.sleep(0.5) + + -- phase 2: get the stale record and trigger only one stale-updating task, + -- the stale-updating task will update the record and broadcast + -- the lru cache invalidation event to other workers + phase = "stale:" + local answers = cli:_resolve(host) + log(ERR, PRE, phase, "answers:", answers[1].address) + + -- tests end + log(ERR, PRE, "DNS query completed") +end + + +function DnsClientTestHandler:init_worker() + ngx.timer.at(0, test) +end + + +return DnsClientTestHandler diff --git a/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua new file mode 100644 index 000000000000..8b6c80ad59e7 --- /dev/null +++ b/spec/fixtures/custom_plugins/kong/plugins/dns-client-test/schema.lua @@ -0,0 +1,12 @@ +return { + name = "dns-client-test", + fields = { + { + config = { + type = "record", + fields = { + }, + }, + }, + }, +} diff --git a/spec/fixtures/shared_dict.lua b/spec/fixtures/shared_dict.lua index c552376ecaff..fe0691d0a138 100644 --- a/spec/fixtures/shared_dict.lua +++ b/spec/fixtures/shared_dict.lua @@ -13,6 +13,7 @@ local dicts = { "kong_db_cache_2 16m", "kong_db_cache_miss 12m", "kong_db_cache_miss_2 12m", + "kong_dns_cache 5m", "kong_mock_upstream_loggers 10m", "kong_secrets 5m", "test_vault 5m", diff --git a/spec/helpers/dns.lua b/spec/helpers/dns.lua index 4f8bf45333ec..68fdbfbcf2bd 100644 --- a/spec/helpers/dns.lua +++ b/spec/helpers/dns.lua @@ -37,7 +37,10 @@ end --- Expires a record now. -- @param record a DNS record previously created -function _M.dnsExpire(record) +function _M.dnsExpire(client, record) + local dnscache = client.getcache() + dnscache:delete(record[1].name .. ":" .. record[1].type) + dnscache:delete(record[1].name .. ":-1") -- A/AAAA record.expire = gettime() - 1 end @@ -76,12 +79,13 @@ function _M.dnsSRV(client, records, staleTtl) -- set timeouts records.touch = gettime() records.expire = gettime() + records[1].ttl + records.ttl = records[1].ttl -- create key, and insert it - local key = records[1].type..":"..records[1].name + local key = records[1].name..":"..records[1].type + dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) + key = records[1].name..":-1" -- A/AAAA dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) - -- insert last-succesful lookup type - dnscache:set(records[1].name, records[1].type) return records end @@ -117,12 +121,13 @@ function _M.dnsA(client, records, staleTtl) -- set timeouts records.touch = gettime() records.expire = gettime() + records[1].ttl + records.ttl = records[1].ttl -- create key, and insert it - local key = records[1].type..":"..records[1].name - dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) - -- insert last-succesful lookup type - dnscache:set(records[1].name, records[1].type) + local key = records[1].name..":"..records[1].type + dnscache:set(key, records, records[1].ttl) + key = records[1].name..":-1" -- A/AAAA + dnscache:set(key, records, records[1].ttl) return records end @@ -157,12 +162,13 @@ function _M.dnsAAAA(client, records, staleTtl) -- set timeouts records.touch = gettime() records.expire = gettime() + records[1].ttl + records.ttl = records[1].ttl -- create key, and insert it - local key = records[1].type..":"..records[1].name + local key = records[1].name..":"..records[1].type + dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) + key = records[1].name..":-1" -- A/AAAA dnscache:set(key, records, records[1].ttl + (staleTtl or 4)) - -- insert last-succesful lookup type - dnscache:set(records[1].name, records[1].type) return records end