Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update TraceIdRatioBasedSampler to calculate sampling threshold according to OTEP 235 #1391

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 65 additions & 17 deletions src/SDK/Trace/Sampler/TraceIdRatioBasedSampler.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,20 @@

namespace OpenTelemetry\SDK\Trace\Sampler;

use function assert;
use function bin2hex;
use InvalidArgumentException;
use OpenTelemetry\Context\ContextInterface;
use OpenTelemetry\SDK\Common\Attribute\AttributesInterface;
use OpenTelemetry\SDK\Trace\SamplerInterface;
use OpenTelemetry\SDK\Trace\SamplingResult;
use OpenTelemetry\SDK\Trace\Span;
use function pack;
use function rtrim;
use function sprintf;
use function substr;
use function substr_compare;
use function unpack;

/**
* This implementation of the SamplerInterface records with given probability.
Expand All @@ -22,22 +30,22 @@
class TraceIdRatioBasedSampler implements SamplerInterface
{
private readonly float $probability;
private readonly string $tv;

/**
* @param float $probability Probability float value between 0.0 and 1.0.
* @param int<1, 14> $precision threshold precision in hexadecimal digits
*/
public function __construct(float $probability)
public function __construct(float $probability, int $precision = 4)
{
if ($probability < 0.0 || $probability > 1.0) {
if (!($probability >= 0 && $probability <= 1)) {
throw new InvalidArgumentException('probability should be be between 0.0 and 1.0.');
}

$this->probability = $probability;
$this->tv = rtrim(bin2hex(substr(pack('J', self::computeTValue($probability, $precision, 4)), 1)), '0') ?: '0';
}

/**
* Returns `SamplingResult` based on probability. Respects the parent `SampleFlag`
* {@inheritdoc}
*/
public function shouldSample(
ContextInterface $parentContext,
string $traceId,
Expand All @@ -46,22 +54,62 @@ public function shouldSample(
AttributesInterface $attributes,
array $links,
): SamplingResult {
// TODO: Add config to adjust which spans get sampled (only default from specification is implemented)
$parentSpan = Span::fromContext($parentContext);
$parentSpanContext = $parentSpan->getContext();
$traceState = $parentSpanContext->getTraceState();
$traceState = Span::fromContext($parentContext)->getContext()->getTraceState();

/**
* Since php can only store up to 63 bit positive integers
*/
$traceIdLimit = (1 << 60) - 1;
$lowerOrderBytes = hexdec(substr($traceId, strlen($traceId) - 15, 15));
$traceIdCondition = $lowerOrderBytes < round($this->probability * $traceIdLimit);
$decision = $traceIdCondition ? SamplingResult::RECORD_AND_SAMPLE : SamplingResult::DROP;
$decision = $this->probability >= 2 ** -56 && substr_compare($traceId, $this->tv, -14) >= 0
? SamplingResult::RECORD_AND_SAMPLE
: SamplingResult::DROP;

return new SamplingResult($decision, [], $traceState);
}

/**
* Computes the 56-bit rejection threshold (T-value) for a given probability.
*
* The T-value is computed as `2**56*(1-$probability)` with a precision of
* `2**-($wordSize*⌈-log2($probability)/$wordSize+$precision-1⌉)`.
*
* Values below `2**-56` will return `0`.
*
* ```
* 1/3 w/ precision=3, wordSize=4
* => 1 - 1/3
* => 2/3
* => 2730.666../4096
* => 2731/4096
* => 0xaab
* ```
*
* Converting the result into `th` hexadecimal value:
* ```
* $th = rtrim(bin2hex(substr(pack('J', $t), 1)), '0') ?: '0';
* ```
*
* @param float $probability sampling probability, must be between 0 and 1
* @param int $precision precision in words
* @param int $wordSize word size to use, must be a power of two
* @return int 56bit T-value
*
* @internal
*/
public static function computeTValue(float $probability, int $precision, int $wordSize = 1): int
{
assert($probability >= 0 && $probability <= 1);
assert($precision >= 1);
assert($wordSize >= 1 && ($wordSize & $wordSize - 1) === 0);

$b = unpack('J', pack('E', $probability))[1];
$e = $b >> 52 & (1 << 11) - 1;
$f = $b & (1 << 52) - 1 | ($e ? 1 << 52 : 0);

// 56+1bit for rounding
$s = $e - 1023 - 52 + 57;
$t = (1 << 57) - ($s < 0 ? $f >> -$s : $f << $s);
$m = -1 << 56 >> (-($e - 1023 + 1) + $precision * $wordSize & -$wordSize);

return $t - $m >> 1 & $m;
}

public function getDescription(): string
{
return sprintf('%s{%.6F}', 'TraceIdRatioBasedSampler', $this->probability);
Expand Down
4 changes: 2 additions & 2 deletions tests/Integration/SDK/TraceIdRatioBasedSamplerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public function test_failing_trace_id_ratio_based_sampler_decision(): void
$sampler = new TraceIdRatioBasedSampler(0.99);
$decision = $sampler->shouldSample(
Context::getRoot(),
'4bf92f3577b34da6afffffffffffffff',
'4bf92f3577b34da6a000000000000000',
'test.opentelemetry.io',
API\SpanKind::KIND_INTERNAL,
Attributes::create([]),
Expand All @@ -65,7 +65,7 @@ public function test_passing_trace_id_ratio_based_sampler_decision(): void
$sampler = new TraceIdRatioBasedSampler(0.01);
$decision = $sampler->shouldSample(
Context::getRoot(),
'4bf92f3577b34da6a000000000000000',
'4bf92f3577b34da6afffffffffffffff',
'test.opentelemetry.io',
API\SpanKind::KIND_INTERNAL,
Attributes::create([]),
Expand Down
59 changes: 55 additions & 4 deletions tests/Unit/SDK/Trace/Sampler/TraceIdRatioBasedSamplerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,81 @@

namespace OpenTelemetry\Tests\SDK\Unit\Trace\Sampler;

use function bin2hex;
use InvalidArgumentException;
use OpenTelemetry\API\Trace as API;
use OpenTelemetry\Context\Context;
use OpenTelemetry\SDK\Common\Attribute\Attributes;
use OpenTelemetry\SDK\Trace\Sampler\TraceIdRatioBasedSampler;
use OpenTelemetry\SDK\Trace\SamplingResult;
use function pack;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;
use function rtrim;
use function substr;

#[CoversClass(TraceIdRatioBasedSampler::class)]
class TraceIdRatioBasedSamplerTest extends TestCase
{
public function test_should_sample(): void
#[DataProvider('shouldSampleProvider')]
public function test_should_sample(string $traceId, float $probability, int $result): void
{
$sampler = new TraceIdRatioBasedSampler(1.0);
$sampler = new TraceIdRatioBasedSampler($probability);
$decision = $sampler->shouldSample(
Context::getRoot(),
'4bf92f3577b34da6a3ce929d0e0e4736',
$traceId,
'test.opentelemetry.io',
API\SpanKind::KIND_INTERNAL,
Attributes::create([]),
[],
);
$this->assertEquals(SamplingResult::RECORD_AND_SAMPLE, $decision->getDecision());
$this->assertEquals($result, $decision->getDecision());
}

public static function shouldSampleProvider(): iterable
{
yield 'otep-0235' => ['123456789123456789d29d6a7215ced0', 0.25, SamplingResult::RECORD_AND_SAMPLE];

yield 'tv=0' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1.0, SamplingResult::RECORD_AND_SAMPLE];
yield 'tv=8' => ['4bf92f3577b34da6a3ce929d0e0e4736', 0.5, SamplingResult::RECORD_AND_SAMPLE];
yield 'tv=cccd' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1 / 5, SamplingResult::RECORD_AND_SAMPLE];
yield 'tv=d' => ['4bf92f3577b34da6a3ce929d0e0e4736', 3 / 16, SamplingResult::DROP];

yield ['4bf92f3577b34da6a380000000000000', 0.5, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a37fffffffffffff', 0.5, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3f5560000000000', 1 / 24, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a3f554ffffffffff', 1 / 24, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3fffffffffffff0', 2 ** -52, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a3ffffffffffffef', 2 ** -52, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -56, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a3fffffffffffffe', 2 ** -56, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -57, SamplingResult::DROP];
}

#[DataProvider('computeTValueProvider')]
public function test_compute_t_value(string $expected, float $probability, int $precision): void
{
$tv = TraceIdRatioBasedSampler::computeTValue($probability, $precision, 4);
$this->assertSame($expected, rtrim(bin2hex(substr(pack('J', $tv), 1)), '0') ?: '0');
}

public static function computeTValueProvider(): iterable
{
// see https://github.com/open-telemetry/opentelemetry-specification/pull/4166
yield from [['0', 1, 3], ['0', 1, 4], ['0', 1, 5]];
yield from [['8', 1/2, 3], ['8', 1/2, 4], ['8', 1/2, 5]];
yield from [['aab', 1/3, 3], ['aaab', 1/3, 4], ['aaaab', 1/3, 5]];
yield from [['c', 1/4, 3], ['c', 1/4, 4], ['c', 1/4, 5]];
yield from [['ccd', 1/5, 3], ['cccd', 1/5, 4], ['ccccd', 1/5, 5]];
yield from [['e', 1/8, 3], ['e', 1/8, 4], ['e', 1/8, 5]];
yield from [['e66', 1/10, 3], ['e666', 1/10, 4], ['e6666', 1/10, 5]];
yield from [['f', 1/16, 3], ['f', 1/16, 4], ['f', 1/16, 5]];
yield from [['fd71', 1/100, 3], ['fd70a', 1/100, 4], ['fd70a4', 1/100, 5]];
yield from [['ffbe7', 1/1000, 3], ['ffbe77', 1/1000, 4], ['ffbe76d', 1/1000, 5]];
yield from [['fff972', 1/10000, 3], ['fff9724', 1/10000, 4], ['fff97247', 1/10000, 5]];
yield from [['ffff584', 1/100000, 3], ['ffff583a', 1/100000, 4], ['ffff583a5', 1/100000, 5]];
yield from [['ffffef4', 1/1000000, 3], ['ffffef39', 1/1000000, 4], ['ffffef391', 1/1000000, 5]];
}

#[DataProvider('invalidProbabilityProvider')]
Expand All @@ -43,6 +93,7 @@ public static function invalidProbabilityProvider(): array
return [
'negative' => [-0.05],
'greater than one' => [1.5],
'NaN' => [NAN],
];
}

Expand Down
Loading