From 9487620d7bd06c441bbb95f5653ded1442d1b2ff Mon Sep 17 00:00:00 2001 From: Kong Team Gateway Bot <98048765+team-gateway-bot@users.noreply.github.com> Date: Wed, 13 Sep 2023 03:55:21 -0500 Subject: [PATCH] fix(tracing): span start/end time precision (#11484) (#11556) * fix(tracing): span start/end time precision Ensure the span lifetime is consistent with the hierarchy by using the same time source for the root span's end time as the balancer span. Before this fix, an approximation (causing a precision error) could result in the root span to end before its children. * docs(changelog): add issue Co-authored-by: Xumin <100666470+StarlightIbuki@users.noreply.github.com> --------- Co-authored-by: Xumin <100666470+StarlightIbuki@users.noreply.github.com> (cherry picked from commit 37a17eb491a8b85b2dd01f994bd48a3757ad216a) Co-authored-by: Samuele --- CHANGELOG/unreleased/kong/11484.yaml | 9 ++++ kong/init.lua | 2 + kong/tracing/instrumentation.lua | 3 +- .../14-tracing/03-tracer-pdk_spec.lua | 42 +++++++++++++++++++ 4 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG/unreleased/kong/11484.yaml diff --git a/CHANGELOG/unreleased/kong/11484.yaml b/CHANGELOG/unreleased/kong/11484.yaml new file mode 100644 index 000000000000..4a76743c834c --- /dev/null +++ b/CHANGELOG/unreleased/kong/11484.yaml @@ -0,0 +1,9 @@ +message: "Tracing: fix an issue that resulted in some parent spans to end before their children due to different precision of their timestamps" +type: bugfix +scope: PDK +prs: + - 11484 +jiras: + - "KAG-2336" +issues: + - 11294 diff --git a/kong/init.lua b/kong/init.lua index 811d16ebd25e..db22a18dfb6b 100644 --- a/kong/init.lua +++ b/kong/init.lua @@ -1503,6 +1503,7 @@ function Kong.log() local ctx = ngx.ctx if not ctx.KONG_LOG_START then ctx.KONG_LOG_START = now() * 1000 + ctx.KONG_LOG_START_NS = time_ns() if is_stream_module then if not ctx.KONG_PROCESSING_START then ctx.KONG_PROCESSING_START = start_time() * 1000 @@ -1571,6 +1572,7 @@ function Kong.log() if ctx.KONG_BODY_FILTER_START and not ctx.KONG_BODY_FILTER_ENDED_AT then ctx.KONG_BODY_FILTER_ENDED_AT = ctx.KONG_LOG_START + ctx.KONG_BODY_FILTER_ENDED_AT_NS = ctx.KONG_LOG_START_NS ctx.KONG_BODY_FILTER_TIME = ctx.KONG_BODY_FILTER_ENDED_AT - ctx.KONG_BODY_FILTER_START end diff --git a/kong/tracing/instrumentation.lua b/kong/tracing/instrumentation.lua index c26553446a74..786b5896fcd3 100644 --- a/kong/tracing/instrumentation.lua +++ b/kong/tracing/instrumentation.lua @@ -313,8 +313,7 @@ function _M.runloop_log_before(ctx) local active_span = tracer.active_span() -- check root span type to avoid encounter error if active_span and type(active_span.finish) == "function" then - local end_time = ctx.KONG_BODY_FILTER_ENDED_AT - and ctx.KONG_BODY_FILTER_ENDED_AT * 1e6 + local end_time = ctx.KONG_BODY_FILTER_ENDED_AT_NS active_span:finish(end_time) end end diff --git a/spec/02-integration/14-tracing/03-tracer-pdk_spec.lua b/spec/02-integration/14-tracing/03-tracer-pdk_spec.lua index fa4c4b10cab9..4c245c29aba1 100644 --- a/spec/02-integration/14-tracing/03-tracer-pdk_spec.lua +++ b/spec/02-integration/14-tracing/03-tracer-pdk_spec.lua @@ -7,6 +7,14 @@ local TCP_PORT = helpers.get_available_port() local tcp_trace_plugin_name = "tcp-trace-exporter" +local function get_parent(span, spans) + for _, s in ipairs(spans) do + if s.span_id == span.parent_id then + return s + end + end +end + for _, strategy in helpers.each_strategy() do local proxy_client @@ -77,5 +85,39 @@ for _, strategy in helpers.each_strategy() do end end) end) + + describe("spans start/end times are consistent with their hierarchy", function () + lazy_setup(function() + setup_instrumentations("all", false, 1) + end) + + lazy_teardown(function() + helpers.stop_kong() + end) + + it("sets child lifespan within parent's lifespan", function () + for _ = 1, 100 do + local thread = helpers.tcp_server(TCP_PORT) + local r = assert(proxy_client:send { + method = "GET", + path = "/", + }) + assert.res_status(200, r) + + local ok, res = thread:join() + assert.True(ok) + assert.is_string(res) + + local spans = cjson.decode(res) + for i = 2, #spans do -- skip the root span (no parent) + local span = spans[i] + local parent = get_parent(span, spans) + assert.is_not_nil(parent) + assert.True(span.start_time_ns >= parent.start_time_ns) + assert.True(span.end_time_ns <= parent.end_time_ns) + end + end + end) + end) end) end