From 1f0bc17dc388a8285cb6e4993e2d6ce0533dfe6f Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Fri, 12 Jul 2024 19:56:32 +0800 Subject: [PATCH] stale_ttl: fix expired time caculation --- kong/dns/client.lua | 24 +++++++++---------- .../30-new-dns-client/02-old_client_spec.lua | 17 +++++++------ .../03-old_client_cache_spec.lua | 19 +++++++-------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/kong/dns/client.lua b/kong/dns/client.lua index 64d8b1be4e46..35f21a3eba34 100644 --- a/kong/dns/client.lua +++ b/kong/dns/client.lua @@ -518,27 +518,27 @@ local function resolve_callback(self, name, qtype, cache_only, tries) -- `:peek(stale=true)` verifies if the expired key remains in L2 shm, then -- initiates an asynchronous background updating task to refresh it. local ttl, _, answers = self.cache:peek(key, true) - if answers and not answers.errcode and ttl then - if not answers.expired then - answers.expire = now() + ttl - answers.expired = true - ttl = ttl + self.stale_ttl - else - ttl = ttl + (answers.expire - now()) + if answers and not answers.errcode and self.stale_ttl and ttl then + + -- `_expire_at` means the final expiration time of stale records + if not answers._expire_at then + answers._expire_at = answers.expire + self.stale_ttl end -- trigger the update task by the upper caller every 60 seconds - ttl = math_min(ttl, 60) + local remaining_stale_ttl = math_min(answers._expire_at - now(), 60) - if ttl > 0 then - log(DEBUG, PREFIX, "start stale update task ", key, " ttl:", ttl) + if remaining_stale_ttl > 0 then + log(DEBUG, PREFIX, "start stale update task ", key, + " remaining_stale_ttl:", remaining_stale_ttl) -- mlcache's internal lock mechanism ensures concurrent control start_stale_update_task(self, key, name, qtype) - answers.ttl = ttl + answers.ttl = remaining_stale_ttl + answers.expire = remaining_stale_ttl + now() - return answers, nil, ttl + return answers, nil, remaining_stale_ttl end end diff --git a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua index 9df473116950..b91319564fa4 100644 --- a/spec/01-unit/30-new-dns-client/02-old_client_spec.lua +++ b/spec/01-unit/30-new-dns-client/02-old_client_spec.lua @@ -1292,7 +1292,7 @@ describe("[DNS client]", function() assert.are.equal(NOT_FOUND_ERROR, err2) answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) assert.equal(answers1, answers2) - assert.falsy(answers2.expired) + assert.falsy(answers2._expire_at) -- wait for expiry of ttl and retry, it will not use the cached one -- because the cached one contains no avaible IP addresses @@ -1303,7 +1303,7 @@ describe("[DNS client]", function() assert.are.equal(NOT_FOUND_ERROR, err2) answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) - assert.falsy(answers2.expired) -- refreshed record + assert.falsy(answers2._expire_at) -- refreshed record -- wait for expiry of stale_ttl and retry, should be called twice now ngx.sleep(0.75 * stale_ttl) @@ -1315,7 +1315,7 @@ describe("[DNS client]", function() answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) assert.not_equal(answers1, answers2) - assert.falsy(answers2.expired) -- new answers, not expired + assert.falsy(answers2._expire_at) -- new answers, not expired end) it("verifies stale_ttl for available records", function() @@ -1345,7 +1345,7 @@ describe("[DNS client]", function() answers1 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.same(answers1[1].address, "1.1.1.1") assert.are.equal(call_count, 1) - assert.falsy(answers1.expired) + assert.falsy(answers1._expire_at) -- try again, HIT from cache, not stale answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) @@ -1359,10 +1359,9 @@ describe("[DNS client]", function() assert.are.equal(call_count, 1) -- todo: flakiness answers2 = assert(cli.cache:get(qname .. ":" .. resolver.TYPE_A)) - assert.is_true(answers2.expired) - answers2.expired = nil -- clear to be same with answers1 + assert(answers2._expire_at) + answers2._expire_at = nil -- clear to be same with answers1 assert_same_answers(answers1, answers2) - answers2.expired = true -- async stale updating task ngx.sleep(0.1 * stale_ttl) @@ -1372,7 +1371,7 @@ describe("[DNS client]", function() answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.same(answers2[1].address, "1.1.1.1") assert.are.equal(call_count, 2) - assert.falsy(answers2.expired) + assert.falsy(answers2._expire_at) -- The stale one will be completely eliminated from the cache. ngx.sleep(ttl + stale_ttl) @@ -1380,7 +1379,7 @@ describe("[DNS client]", function() answers2 = cli:resolve(qname, { qtype = resolver.TYPE_A }) assert.same(answers2[1].address, "1.1.1.1") assert.are.equal(call_count, 3) - assert.falsy(answers2.expired) + assert.falsy(answers2._expire_at) end) describe("verifies the polling of dns queries, retries, and wait times", function() diff --git a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua index 3dd3d773d822..eac3c53e55c8 100644 --- a/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua +++ b/spec/01-unit/30-new-dns-client/03-old_client_cache_spec.lua @@ -232,10 +232,9 @@ describe("[DNS client cache]", function() -- background refresh query local answers2 = cli:resolve("myhost6") assert.falsy(answers2[1].tag) - assert.is_true(answers2.expired) -- stale; marked as expired - answers2.expired = nil + assert.is_number(answers2._expire_at) -- stale; marked as expired + answers2._expire_at = nil assert_same_answers(answers2, answers) - answers2.expired = true -- wait for the refresh to complete. Ensure that the sleeping time is less -- than ttl, avoiding the updated record from becoming stale again. @@ -244,7 +243,7 @@ describe("[DNS client cache]", function() -- resolve and check whether we got the new record from the mock copy local answers3 = cli:resolve("myhost6") assert.equal(answers3[1].tag, "new") - assert.falsy(answers3.expired) + assert.falsy(answers3._expired_at) assert.not_equal(answers, answers3) -- must be a different record now assert_same_answers(answers3, mock_records["myhost6.domain.test:"..resolver.TYPE_A]) @@ -345,15 +344,15 @@ describe("[DNS client cache]", function() } -- doing a resolve will trigger the background query now answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) - assert.is_true(answers.expired) -- we get the stale record, now marked as expired + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired -- wait again for the background query to complete sleep(0.1) -- background resolve is now complete, check the cache, it should still have the -- stale record, and it should not have been replaced by the error -- answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) - assert.is_true(answers.expired) - answers.expired = nil + assert.is_number(answers._expire_at) + answers._expire_at = nil assert_same_answers(rec1, answers) end) @@ -383,14 +382,14 @@ describe("[DNS client cache]", function() } -- doing a resolve will trigger the background query now answers = cli:resolve("myhost9", { qtype = resolver.TYPE_A }) - assert.is_true(answers.expired) -- we get the stale record, now marked as expired + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired -- wait again for the background query to complete sleep(0.1) -- background resolve is now complete, check the cache, it should still have the -- stale record, and it should not have been replaced by the empty record answers = cli.cache:get("myhost9:" .. resolver.TYPE_A) - assert.is_true(answers.expired) -- we get the stale record, now marked as expired - answers.expired = nil + assert.is_number(answers._expire_at) -- we get the stale record, now marked as expired + answers._expire_at = nil assert_same_answers(rec1, answers) end)