Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(opentelemetry): sampling rate configuration option #12054

Merged
merged 2 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog/unreleased/kong/tracing-sampling-rate-scope.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
message: >
Tracing Sampling Rate can now be set via the `config.sampling_rate` property
of the OpenTelemetry plugin instead of it just being a global setting for the gateway.
type: feature
scope: Plugin
7 changes: 7 additions & 0 deletions kong/clustering/compat/removed_fields.lua
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,11 @@ return {
"read_body_for_logout",
},
},

-- Any dataplane older than 3.6.0
[3006000000] = {
opentelemetry = {
"sampling_rate",
},
},
}
98 changes: 70 additions & 28 deletions kong/pdk/tracing.lua
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ local tablepool = require "tablepool"
local new_tab = require "table.new"
local utils = require "kong.tools.utils"
local phase_checker = require "kong.pdk.private.phases"
local tracing_context = require "kong.tracing.tracing_context"

local ngx = ngx
local type = type
Expand Down Expand Up @@ -63,34 +64,29 @@ local function generate_span_id()
return rand_bytes(8)
end

--- Build-in sampler
local function always_on_sampler()
return true
end

local function always_off_sampler()
return false
end

-- Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
-- spec: https://github.com/c24t/opentelemetry-specification/blob/3b3d321865cf46364bdfb292c179b6444dc96bf9/specification/sdk-tracing.md#probability-sampler-algorithm
local function get_trace_id_based_sampler(rate)
if type(rate) ~= "number" then
error("invalid fraction", 2)
end
local function get_trace_id_based_sampler(options_sampling_rate)
return function(trace_id, sampling_rate)
sampling_rate = sampling_rate or options_sampling_rate

if rate >= 1 then
return always_on_sampler
end
if type(sampling_rate) ~= "number" then
error("invalid fraction", 2)
end

if rate <= 0 then
return always_off_sampler
end
-- always on sampler
if sampling_rate >= 1 then
return true
end

-- always off sampler
if sampling_rate <= 0 then
return false
end

local bound = rate * BOUND_MAX
-- probability sampler
local bound = sampling_rate * BOUND_MAX

-- TODO: is this a sound method to sample?
return function(trace_id)
if #trace_id < SAMPLING_BYTE then
error(TOO_SHORT_MESSAGE, 2)
end
Expand Down Expand Up @@ -200,17 +196,17 @@ local function create_span(tracer, options)
span.span_id = generate_span_id()
span.trace_id = trace_id
span.kind = options.span_kind or SPAN_KIND.INTERNAL
-- get_sampling_decision() can be used to dynamically run the sampler's logic
-- and obtain the sampling decision for the span. This way plugins can apply
-- their configured sampling rate dynamically. The sampled flag can then be
-- overwritten by set_should_sample.
span.should_sample = sampled

setmetatable(span, span_mt)
return span
end

local function link_span(tracer, span, name, options)
if not span.should_sample then
kong.log.debug("skipping non-sampled span")
return
end
if tracer and type(tracer) ~= "table" then
error("invalid tracer", 2)
end
Expand Down Expand Up @@ -270,8 +266,8 @@ end
-- local time = ngx.now()
-- span:finish(time * 100000000)
function span_mt:finish(end_time_ns)
if self.end_time_ns ~= nil or not self.should_sample then
-- span is finished, and already processed or not sampled
if self.end_time_ns ~= nil then
-- span is finished, and already processed
return
end

Expand Down Expand Up @@ -426,6 +422,7 @@ noop_tracer.active_span = NOOP
noop_tracer.set_active_span = NOOP
noop_tracer.process_span = NOOP
noop_tracer.set_should_sample = NOOP
noop_tracer.get_sampling_decision = NOOP

local VALID_TRACING_PHASES = {
rewrite = true,
Expand Down Expand Up @@ -554,6 +551,51 @@ local function new_tracer(name, options)
end
end

--- Get the sampling decision result
--
-- Uses a parent-based sampler when the parent has sampled flag == false
-- to inherit the non-recording decision from the parent span, or when
-- trace_id is not available.
--
-- Else, apply the probability-based should_sample decision.
--
-- @function kong.tracing:get_sampling_decision
-- @tparam bool parent_should_sample value of the parent span sampled flag
-- extracted from the incoming tracing headers
-- @tparam number sampling_rate the sampling rate to apply for the
-- probability sampler
-- @treturn bool sampled value of sampled for this trace
function self:get_sampling_decision(parent_should_sample, sampling_rate)
local ctx = ngx.ctx

local sampled
local root_span = ctx.KONG_SPANS and ctx.KONG_SPANS[1]
local trace_id = tracing_context.get_raw_trace_id(ctx)

if not root_span or root_span.attributes["kong.propagation_only"] then
-- should not sample if there is no root span or if the root span is
-- a dummy created only to propagate headers
sampled = false

elseif parent_should_sample == false or not trace_id then
-- trace_id can be nil when tracing instrumentations are disabled
-- and Kong is configured to only do headers propagation
sampled = parent_should_sample

elseif not sampling_rate then
-- no custom sampling_rate was passed:
-- reuse the sampling result of the root_span
sampled = root_span.should_sample == true

else
-- use probability-based sampler
sampled = self.sampler(trace_id, sampling_rate)
end

-- enforce boolean
return not not sampled
end

tracer_memo[name] = setmetatable(self, tracer_mt)
return tracer_memo[name]
end
Expand Down
41 changes: 29 additions & 12 deletions kong/plugins/opentelemetry/handler.lua
Original file line number Diff line number Diff line change
Expand Up @@ -94,34 +94,32 @@ end
function OpenTelemetryHandler:access(conf)
local headers = ngx_get_headers()
local root_span = ngx.ctx.KONG_SPANS and ngx.ctx.KONG_SPANS[1]
local tracer = kong.tracing.new("otel")

-- make propagation running with tracing instrumetation not enabled
-- get the global tracer when available, or instantiate a new one
local tracer = kong.tracing.name == "noop" and kong.tracing.new("otel")
or kong.tracing

-- make propagation work with tracing disabled
if not root_span then
root_span = tracer.start_span("root")
root_span:set_attribute("kong.propagation_only", true)

-- the span created only for the propagation and will be bypassed to the exporter
-- since tracing is disabled, turn off sampling entirely for this trace
kong.ctx.plugin.should_sample = false
end

local injected_parent_span = tracing_context.get_unlinked_span("balancer") or root_span
local header_type, trace_id, span_id, parent_id, parent_sampled, _ = propagation_parse(headers, conf.header_type)

local header_type, trace_id, span_id, parent_id, should_sample, _ = propagation_parse(headers, conf.header_type)
if should_sample == false then
tracer:set_should_sample(should_sample)
injected_parent_span.should_sample = should_sample
end

-- overwrite trace id
-- as we are in a chain of existing trace
-- Overwrite trace ids
-- with the value extracted from incoming tracing headers
if trace_id then
-- to propagate the correct trace ID we have to set it here
-- before passing this span to propagation.set()
injected_parent_span.trace_id = trace_id
-- update the Tracing Context with the trace ID extracted from headers
tracing_context.set_raw_trace_id(trace_id)
end

-- overwrite root span's parent_id
if span_id then
root_span.parent_id = span_id
Expand All @@ -130,6 +128,25 @@ function OpenTelemetryHandler:access(conf)
root_span.parent_id = parent_id
end

-- Configure the sampled flags
local sampled
if kong.ctx.plugin.should_sample == false then
sampled = false

else
-- Sampling decision for the current trace.
local err
-- get_sampling_decision() depends on the value of the trace id: call it
-- after the trace_id is updated
sampled, err = tracer:get_sampling_decision(parent_sampled, conf.sampling_rate)
if err then
ngx_log(ngx_ERR, _log_prefix, "sampler failure: ", err)
end
end
tracer:set_should_sample(sampled)
-- Set the sampled flag for the outgoing header's span
injected_parent_span.should_sample = sampled

propagation_set(conf.header_type, header_type, injected_parent_span, "w3c")
end

Expand Down
7 changes: 7 additions & 0 deletions kong/plugins/opentelemetry/schema.lua
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ return {
required = false,
default = "preserve",
one_of = { "preserve", "ignore", "b3", "b3-single", "w3c", "jaeger", "ot", "aws", "gcp" } } },
{ sampling_rate = {
description = "Tracing sampling rate for configuring the probability-based sampler. When set, this value supersedes the global `tracing_sampling_rate` setting from kong.conf.",
type = "number",
between = {0, 1},
required = false,
default = nil,
} },
},
entity_checks = {
{ custom_entity_check = {
Expand Down
2 changes: 2 additions & 0 deletions spec/02-integration/09-hybrid_mode/09-config-compat_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ describe("CP/DP config compat transformations #" .. strategy, function()

local expected_otel_prior_35 = utils.cycle_aware_deep_copy(opentelemetry)
expected_otel_prior_35.config.header_type = "preserve"
expected_otel_prior_35.config.sampling_rate = nil
do_assert(utils.uuid(), "3.4.0", expected_otel_prior_35)

-- cleanup
Expand All @@ -231,6 +232,7 @@ describe("CP/DP config compat transformations #" .. strategy, function()

local expected_otel_prior_34 = utils.cycle_aware_deep_copy(opentelemetry)
expected_otel_prior_34.config.header_type = "preserve"
expected_otel_prior_34.config.sampling_rate = nil
do_assert(utils.uuid(), "3.3.0", expected_otel_prior_34)

-- cleanup
Expand Down
Loading
Loading