diff --git a/src/SDK/Trace/Sampler/TraceIdRatioBasedSampler.php b/src/SDK/Trace/Sampler/TraceIdRatioBasedSampler.php index 4f49e5865..10d58eeb6 100644 --- a/src/SDK/Trace/Sampler/TraceIdRatioBasedSampler.php +++ b/src/SDK/Trace/Sampler/TraceIdRatioBasedSampler.php @@ -4,12 +4,20 @@ namespace OpenTelemetry\SDK\Trace\Sampler; +use function assert; +use function bin2hex; use InvalidArgumentException; use OpenTelemetry\Context\ContextInterface; use OpenTelemetry\SDK\Common\Attribute\AttributesInterface; use OpenTelemetry\SDK\Trace\SamplerInterface; use OpenTelemetry\SDK\Trace\SamplingResult; use OpenTelemetry\SDK\Trace\Span; +use function pack; +use function rtrim; +use function sprintf; +use function substr; +use function substr_compare; +use function unpack; /** * This implementation of the SamplerInterface records with given probability. @@ -22,22 +30,22 @@ class TraceIdRatioBasedSampler implements SamplerInterface { private readonly float $probability; + private readonly string $tv; /** * @param float $probability Probability float value between 0.0 and 1.0. + * @param int<1, 14> $precision threshold precision in hexadecimal digits */ - public function __construct(float $probability) + public function __construct(float $probability, int $precision = 4) { - if ($probability < 0.0 || $probability > 1.0) { + if (!($probability >= 0 && $probability <= 1)) { throw new InvalidArgumentException('probability should be be between 0.0 and 1.0.'); } + $this->probability = $probability; + $this->tv = rtrim(bin2hex(substr(pack('J', self::computeTValue($probability, $precision, 4)), 1)), '0') ?: '0'; } - /** - * Returns `SamplingResult` based on probability. Respects the parent `SampleFlag` - * {@inheritdoc} - */ public function shouldSample( ContextInterface $parentContext, string $traceId, @@ -46,22 +54,62 @@ public function shouldSample( AttributesInterface $attributes, array $links, ): SamplingResult { - // TODO: Add config to adjust which spans get sampled (only default from specification is implemented) - $parentSpan = Span::fromContext($parentContext); - $parentSpanContext = $parentSpan->getContext(); - $traceState = $parentSpanContext->getTraceState(); + $traceState = Span::fromContext($parentContext)->getContext()->getTraceState(); - /** - * Since php can only store up to 63 bit positive integers - */ - $traceIdLimit = (1 << 60) - 1; - $lowerOrderBytes = hexdec(substr($traceId, strlen($traceId) - 15, 15)); - $traceIdCondition = $lowerOrderBytes < round($this->probability * $traceIdLimit); - $decision = $traceIdCondition ? SamplingResult::RECORD_AND_SAMPLE : SamplingResult::DROP; + $decision = $this->probability >= 2 ** -56 && substr_compare($traceId, $this->tv, -14) >= 0 + ? SamplingResult::RECORD_AND_SAMPLE + : SamplingResult::DROP; return new SamplingResult($decision, [], $traceState); } + /** + * Computes the 56-bit rejection threshold (T-value) for a given probability. + * + * The T-value is computed as `2**56*(1-$probability)` with a precision of + * `2**-($wordSize*⌈-log2($probability)/$wordSize+$precision-1⌉)`. + * + * Values below `2**-56` will return `0`. + * + * ``` + * 1/3 w/ precision=3, wordSize=4 + * => 1 - 1/3 + * => 2/3 + * => 2730.666../4096 + * => 2731/4096 + * => 0xaab + * ``` + * + * Converting the result into `th` hexadecimal value: + * ``` + * $th = rtrim(bin2hex(substr(pack('J', $t), 1)), '0') ?: '0'; + * ``` + * + * @param float $probability sampling probability, must be between 0 and 1 + * @param int $precision precision in words + * @param int $wordSize word size to use, must be a power of two + * @return int 56bit T-value + * + * @internal + */ + public static function computeTValue(float $probability, int $precision, int $wordSize = 1): int + { + assert($probability >= 0 && $probability <= 1); + assert($precision >= 1); + assert($wordSize >= 1 && ($wordSize & $wordSize - 1) === 0); + + $b = unpack('J', pack('E', $probability))[1]; + $e = $b >> 52 & (1 << 11) - 1; + $f = $b & (1 << 52) - 1 | ($e ? 1 << 52 : 0); + + // 56+1bit for rounding + $s = $e - 1023 - 52 + 57; + $t = (1 << 57) - ($s < 0 ? $f >> -$s : $f << $s); + $m = -1 << 56 >> (-($e - 1023 + 1) + $precision * $wordSize & -$wordSize); + + return $t - $m >> 1 & $m; + } + public function getDescription(): string { return sprintf('%s{%.6F}', 'TraceIdRatioBasedSampler', $this->probability); diff --git a/tests/Integration/SDK/TraceIdRatioBasedSamplerTest.php b/tests/Integration/SDK/TraceIdRatioBasedSamplerTest.php index 0f1bc58e4..ae759f2d6 100644 --- a/tests/Integration/SDK/TraceIdRatioBasedSamplerTest.php +++ b/tests/Integration/SDK/TraceIdRatioBasedSamplerTest.php @@ -51,7 +51,7 @@ public function test_failing_trace_id_ratio_based_sampler_decision(): void $sampler = new TraceIdRatioBasedSampler(0.99); $decision = $sampler->shouldSample( Context::getRoot(), - '4bf92f3577b34da6afffffffffffffff', + '4bf92f3577b34da6a000000000000000', 'test.opentelemetry.io', API\SpanKind::KIND_INTERNAL, Attributes::create([]), @@ -65,7 +65,7 @@ public function test_passing_trace_id_ratio_based_sampler_decision(): void $sampler = new TraceIdRatioBasedSampler(0.01); $decision = $sampler->shouldSample( Context::getRoot(), - '4bf92f3577b34da6a000000000000000', + '4bf92f3577b34da6afffffffffffffff', 'test.opentelemetry.io', API\SpanKind::KIND_INTERNAL, Attributes::create([]), diff --git a/tests/Unit/SDK/Trace/Sampler/TraceIdRatioBasedSamplerTest.php b/tests/Unit/SDK/Trace/Sampler/TraceIdRatioBasedSamplerTest.php index bf43fc723..916ad8ff4 100644 --- a/tests/Unit/SDK/Trace/Sampler/TraceIdRatioBasedSamplerTest.php +++ b/tests/Unit/SDK/Trace/Sampler/TraceIdRatioBasedSamplerTest.php @@ -4,31 +4,81 @@ namespace OpenTelemetry\Tests\SDK\Unit\Trace\Sampler; +use function bin2hex; use InvalidArgumentException; use OpenTelemetry\API\Trace as API; use OpenTelemetry\Context\Context; use OpenTelemetry\SDK\Common\Attribute\Attributes; use OpenTelemetry\SDK\Trace\Sampler\TraceIdRatioBasedSampler; use OpenTelemetry\SDK\Trace\SamplingResult; +use function pack; use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; +use function rtrim; +use function substr; #[CoversClass(TraceIdRatioBasedSampler::class)] class TraceIdRatioBasedSamplerTest extends TestCase { - public function test_should_sample(): void + #[DataProvider('shouldSampleProvider')] + public function test_should_sample(string $traceId, float $probability, int $result): void { - $sampler = new TraceIdRatioBasedSampler(1.0); + $sampler = new TraceIdRatioBasedSampler($probability); $decision = $sampler->shouldSample( Context::getRoot(), - '4bf92f3577b34da6a3ce929d0e0e4736', + $traceId, 'test.opentelemetry.io', API\SpanKind::KIND_INTERNAL, Attributes::create([]), [], ); - $this->assertEquals(SamplingResult::RECORD_AND_SAMPLE, $decision->getDecision()); + $this->assertEquals($result, $decision->getDecision()); + } + + public static function shouldSampleProvider(): iterable + { + yield 'otep-0235' => ['123456789123456789d29d6a7215ced0', 0.25, SamplingResult::RECORD_AND_SAMPLE]; + + yield 'tv=0' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1.0, SamplingResult::RECORD_AND_SAMPLE]; + yield 'tv=8' => ['4bf92f3577b34da6a3ce929d0e0e4736', 0.5, SamplingResult::RECORD_AND_SAMPLE]; + yield 'tv=cccd' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1 / 5, SamplingResult::RECORD_AND_SAMPLE]; + yield 'tv=d' => ['4bf92f3577b34da6a3ce929d0e0e4736', 3 / 16, SamplingResult::DROP]; + + yield ['4bf92f3577b34da6a380000000000000', 0.5, SamplingResult::RECORD_AND_SAMPLE]; + yield ['4bf92f3577b34da6a37fffffffffffff', 0.5, SamplingResult::DROP]; + yield ['4bf92f3577b34da6a3f5560000000000', 1 / 24, SamplingResult::RECORD_AND_SAMPLE]; + yield ['4bf92f3577b34da6a3f554ffffffffff', 1 / 24, SamplingResult::DROP]; + yield ['4bf92f3577b34da6a3fffffffffffff0', 2 ** -52, SamplingResult::RECORD_AND_SAMPLE]; + yield ['4bf92f3577b34da6a3ffffffffffffef', 2 ** -52, SamplingResult::DROP]; + yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -56, SamplingResult::RECORD_AND_SAMPLE]; + yield ['4bf92f3577b34da6a3fffffffffffffe', 2 ** -56, SamplingResult::DROP]; + yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -57, SamplingResult::DROP]; + } + + #[DataProvider('computeTValueProvider')] + public function test_compute_t_value(string $expected, float $probability, int $precision): void + { + $tv = TraceIdRatioBasedSampler::computeTValue($probability, $precision, 4); + $this->assertSame($expected, rtrim(bin2hex(substr(pack('J', $tv), 1)), '0') ?: '0'); + } + + public static function computeTValueProvider(): iterable + { + // see https://github.com/open-telemetry/opentelemetry-specification/pull/4166 + yield from [['0', 1, 3], ['0', 1, 4], ['0', 1, 5]]; + yield from [['8', 1/2, 3], ['8', 1/2, 4], ['8', 1/2, 5]]; + yield from [['aab', 1/3, 3], ['aaab', 1/3, 4], ['aaaab', 1/3, 5]]; + yield from [['c', 1/4, 3], ['c', 1/4, 4], ['c', 1/4, 5]]; + yield from [['ccd', 1/5, 3], ['cccd', 1/5, 4], ['ccccd', 1/5, 5]]; + yield from [['e', 1/8, 3], ['e', 1/8, 4], ['e', 1/8, 5]]; + yield from [['e66', 1/10, 3], ['e666', 1/10, 4], ['e6666', 1/10, 5]]; + yield from [['f', 1/16, 3], ['f', 1/16, 4], ['f', 1/16, 5]]; + yield from [['fd71', 1/100, 3], ['fd70a', 1/100, 4], ['fd70a4', 1/100, 5]]; + yield from [['ffbe7', 1/1000, 3], ['ffbe77', 1/1000, 4], ['ffbe76d', 1/1000, 5]]; + yield from [['fff972', 1/10000, 3], ['fff9724', 1/10000, 4], ['fff97247', 1/10000, 5]]; + yield from [['ffff584', 1/100000, 3], ['ffff583a', 1/100000, 4], ['ffff583a5', 1/100000, 5]]; + yield from [['ffffef4', 1/1000000, 3], ['ffffef39', 1/1000000, 4], ['ffffef391', 1/1000000, 5]]; } #[DataProvider('invalidProbabilityProvider')] @@ -43,6 +93,7 @@ public static function invalidProbabilityProvider(): array return [ 'negative' => [-0.05], 'greater than one' => [1.5], + 'NaN' => [NAN], ]; }