Skip to content

Commit

Permalink
fix(tracing): ensure sampling decisions apply to correct trace ID
Browse files Browse the repository at this point in the history
Kong generates a temporary trace ID when spans are created, this trace
ID can be overwritten if tracing headers are passed in the request.
Before this change, the global sampling rate from kong.conf was applied
to the temporary trace ID, which was only valid for traces created by
Kong, and not in distributed tracing scenarios.

This commit ensures the sampling decision is only taken once, after
headers have been read, when the trace id is final. This ensures Kong's
sampling decision matches that of other tracing systems and therefore
guarantees that traces are not broken when sampling rates are set to
the same values.
  • Loading branch information
samugi committed Jun 21, 2024
1 parent 8e86dba commit 2100b0f
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 19 deletions.
3 changes: 3 additions & 0 deletions changelog/unreleased/kong/fix-tracing-sampling-rate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
message: "**OpenTelemetry:** improved accuracy of sampling decisions."
type: bugfix
scope: Plugin
23 changes: 4 additions & 19 deletions kong/pdk/tracing.lua
Original file line number Diff line number Diff line change
Expand Up @@ -186,18 +186,7 @@ local function create_span(tracer, options)
sampled = options.should_sample

else
if not tracer then
sampled = false

else
local err
sampled, err = tracer.sampler(trace_id)

if err then
sampled = false
ngx_log(ngx_ERR, "sampler failure: ", err)
end
end
sampled = false
end

span.parent_id = span.parent and span.parent.span_id
Expand Down Expand Up @@ -575,12 +564,13 @@ local function new_tracer(name, options)
-- @tparam number sampling_rate the sampling rate to apply for the
-- probability sampler
-- @treturn bool sampled value of sampled for this trace
function self:get_sampling_decision(parent_should_sample, sampling_rate)
function self:get_sampling_decision(parent_should_sample, plugin_sampling_rate)
local ctx = ngx.ctx

local sampled
local root_span = ctx.KONG_SPANS and ctx.KONG_SPANS[1]
local trace_id = tracing_context.get_raw_trace_id(ctx)
local sampling_rate = plugin_sampling_rate or kong.configuration.tracing_sampling_rate

if not root_span or root_span.attributes["kong.propagation_only"] then
-- should not sample if there is no root span or if the root span is
Expand All @@ -592,12 +582,7 @@ local function new_tracer(name, options)
-- and Kong is configured to only do headers propagation
sampled = parent_should_sample

elseif not sampling_rate then
-- no custom sampling_rate was passed:
-- reuse the sampling result of the root_span
sampled = root_span.should_sample == true

else
elseif sampling_rate then
-- use probability-based sampler
local err
sampled, err = self.sampler(trace_id, sampling_rate)
Expand Down
74 changes: 74 additions & 0 deletions spec/03-plugins/37-opentelemetry/04-exporter_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,80 @@ for _, strategy in helpers.each_strategy() do
end)
end


describe("With config.sampling_rate unset, using global sampling rate: 0.5", function ()
local mock
local sampling_rate = 0.5
-- this trace_id is always sampled with 0.5 rate
local sampled_trace_id = "92a54b3e1a7c4f2da9e44b8a6f3e1dab"
-- this trace_id is never sampled with 0.5 rate
local non_sampled_trace_id = "4bf92f3577b34da6a3ce929d0e0e4736"

lazy_setup(function()
bp, _ = assert(helpers.get_db_utils(strategy, {
"services",
"routes",
"plugins",
}, { "opentelemetry" }))

setup_instrumentations("all", {}, nil, nil, nil, nil, sampling_rate)
mock = helpers.http_mock(HTTP_SERVER_PORT, { timeout = HTTP_MOCK_TIMEOUT })
end)

lazy_teardown(function()
helpers.stop_kong()
if mock then
mock("close", true)
end
end)

it("does not sample spans when trace_id == non_sampled_trace_id", function()
local cli = helpers.proxy_client(7000, PROXY_PORT)
local r = assert(cli:send {
method = "GET",
path = "/",
headers = {
traceparent = "00-" .. non_sampled_trace_id .. "-0123456789abcdef-01"
}
})
assert.res_status(200, r)

cli:close()

ngx.sleep(2)
local lines = mock()
assert.is_falsy(lines)
end)

it("samples spans when trace_id == sampled_trace_id", function ()
for _ = 1, 10 do
local body
helpers.wait_until(function()
local cli = helpers.proxy_client(7000, PROXY_PORT)
local r = assert(cli:send {
method = "GET",
path = "/",
headers = {
traceparent = "00-" .. sampled_trace_id .. "-0123456789abcdef-01"
}
})
assert.res_status(200, r)

cli:close()

local lines
lines, body = mock()
return lines
end, 10)

local decoded = assert(pb.decode("opentelemetry.proto.collector.trace.v1.ExportTraceServiceRequest", body))
assert.not_nil(decoded)
local scope_spans = decoded.resource_spans[1].scope_spans
assert.is_true(#scope_spans > 0, scope_spans)
end
end)
end)

for _, case in ipairs{
{true, true, true},
{true, true, nil},
Expand Down

0 comments on commit 2100b0f

Please sign in to comment.