From 5844eb1d196d6cfe2cb652310d1ba6e15d0a0e3b Mon Sep 17 00:00:00 2001 From: "Daniel (dB.) Doubrovkine" Date: Fri, 24 Jun 2022 13:19:16 -0400 Subject: [PATCH] Upgraded to t-digest 3.3. (#3634) --- .../180_percentiles_tdigest_metric.yml | 149 ++++++++++++++---- server/build.gradle | 2 +- server/licenses/t-digest-3.2.jar.sha1 | 1 - server/licenses/t-digest-3.3.jar.sha1 | 1 + .../InternalTDigestPercentilesRanksTests.java | 3 +- .../InternalTDigestPercentilesTests.java | 3 +- .../TDigestPercentilesAggregatorTests.java | 14 +- 7 files changed, 133 insertions(+), 40 deletions(-) delete mode 100644 server/licenses/t-digest-3.2.jar.sha1 create mode 100644 server/licenses/t-digest-3.3.jar.sha1 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml index 9ed414f6b8439..53d0ed1b2d05f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/180_percentiles_tdigest_metric.yml @@ -44,9 +44,16 @@ setup: string_field: foo --- -"Basic test": +"Basic 2.x test": + + - skip: + version: "3.0.0 -" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" - do: + node_selector: + version: "- 2.9.99" search: rest_total_hits_as_int: true body: @@ -77,7 +84,58 @@ setup: - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } +--- +"Basic 3.x test": + + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" + search: + rest_total_hits_as_int: true + body: + aggs: + percentiles_int: + percentiles: + field: int_field + percentiles_double: + percentiles: + field: double_field + + - match: { hits.total: 4 } + - length: { hits.hits: 4 } + + - match: { aggregations.percentiles_int.values.1\.0: 1.0 } + - match: { aggregations.percentiles_int.values.5\.0: 1.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } + - match: { aggregations.percentiles_int.values.95\.0: 151.0 } + - match: { aggregations.percentiles_int.values.99\.0: 151.0 } + + - match: { aggregations.percentiles_double.values.1\.0: 1.0 } + - match: { aggregations.percentiles_double.values.5\.0: 1.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } + - match: { aggregations.percentiles_double.values.95\.0: 151.0 } + - match: { aggregations.percentiles_double.values.99\.0: 151.0 } + +--- +"Compression test": + + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -93,31 +151,36 @@ setup: tdigest: compression: 200 - - match: { hits.total: 4 } - length: { hits.hits: 4 } - match: { aggregations.percentiles_int.values.1\.0: 1.0 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } - - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } - match: { aggregations.percentiles_double.values.1\.0: 1.0 } - match: { aggregations.percentiles_double.values.5\.0: 1.0 } - - match: { aggregations.percentiles_double.values.25\.0: 26.0 } - - match: { aggregations.percentiles_double.values.50\.0: 76.0 } - - match: { aggregations.percentiles_double.values.75\.0: 126.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } - --- "Only aggs test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -135,26 +198,31 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 1.0 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } - - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } - match: { aggregations.percentiles_double.values.1\.0: 1.0 } - match: { aggregations.percentiles_double.values.5\.0: 1.0 } - - match: { aggregations.percentiles_double.values.25\.0: 26.0 } - - match: { aggregations.percentiles_double.values.50\.0: 76.0 } - - match: { aggregations.percentiles_double.values.75\.0: 126.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } - - --- "Filtered test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -177,17 +245,17 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 51.0 } - match: { aggregations.percentiles_int.values.5\.0: 51.0 } - - match: { aggregations.percentiles_int.values.25\.0: 63.5 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } - match: { aggregations.percentiles_int.values.50\.0: 101.0 } - - match: { aggregations.percentiles_int.values.75\.0: 138.5 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } - match: { aggregations.percentiles_double.values.1\.0: 51.0 } - match: { aggregations.percentiles_double.values.5\.0: 51.0 } - - match: { aggregations.percentiles_double.values.25\.0: 63.5 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } - match: { aggregations.percentiles_double.values.50\.0: 101.0 } - - match: { aggregations.percentiles_double.values.75\.0: 138.5 } + - match: { aggregations.percentiles_double.values.75\.0: 151.0 } - match: { aggregations.percentiles_double.values.95\.0: 151.0 } - match: { aggregations.percentiles_double.values.99\.0: 151.0 } @@ -234,7 +302,14 @@ setup: --- "Metadata test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -252,9 +327,9 @@ setup: - match: { aggregations.percentiles_int.values.1\.0: 1.0 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } - - match: { aggregations.percentiles_int.values.75\.0: 126.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } + - match: { aggregations.percentiles_int.values.75\.0: 151.0 } - match: { aggregations.percentiles_int.values.95\.0: 151.0 } - match: { aggregations.percentiles_int.values.99\.0: 151.0 } @@ -319,7 +394,14 @@ setup: --- "Explicit Percents test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -338,17 +420,24 @@ setup: - length: { hits.hits: 4 } - match: { aggregations.percentiles_int.values.5\.0: 1.0 } - - match: { aggregations.percentiles_int.values.25\.0: 26.0 } - - match: { aggregations.percentiles_int.values.50\.0: 76.0 } + - match: { aggregations.percentiles_int.values.25\.0: 51.0 } + - match: { aggregations.percentiles_int.values.50\.0: 101.0 } - match: { aggregations.percentiles_double.values.5\.0: 1.0 } - - match: { aggregations.percentiles_double.values.25\.0: 26.0 } - - match: { aggregations.percentiles_double.values.50\.0: 76.0 } + - match: { aggregations.percentiles_double.values.25\.0: 51.0 } + - match: { aggregations.percentiles_double.values.50\.0: 101.0 } --- "Non-keyed test": + - skip: + version: "- 2.9.99" + features: node_selector + reason: "t-digest 3.2 was interpolating leading to incorrect percentiles" + - do: + node_selector: + version: "3.0.0 -" search: rest_total_hits_as_int: true body: @@ -366,6 +455,6 @@ setup: - match: { aggregations.percentiles_int.values.0.key: 5.0 } - match: { aggregations.percentiles_int.values.0.value: 1.0 } - match: { aggregations.percentiles_int.values.1.key: 25.0 } - - match: { aggregations.percentiles_int.values.1.value: 26.0 } + - match: { aggregations.percentiles_int.values.1.value: 51.0 } - match: { aggregations.percentiles_int.values.2.key: 50.0 } - - match: { aggregations.percentiles_int.values.2.value: 76.0 } + - match: { aggregations.percentiles_int.values.2.value: 101.0 } diff --git a/server/build.gradle b/server/build.gradle index 4490b2ea170cf..9d9d12e798eab 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -119,7 +119,7 @@ dependencies { api "joda-time:joda-time:${versions.joda}" // percentiles aggregation - api 'com.tdunning:t-digest:3.2' + api 'com.tdunning:t-digest:3.3' // precentil ranks aggregation api 'org.hdrhistogram:HdrHistogram:2.1.12' diff --git a/server/licenses/t-digest-3.2.jar.sha1 b/server/licenses/t-digest-3.2.jar.sha1 deleted file mode 100644 index de6e848545f38..0000000000000 --- a/server/licenses/t-digest-3.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -2ab94758b0276a8a26102adf8d528cf6d0567b9a \ No newline at end of file diff --git a/server/licenses/t-digest-3.3.jar.sha1 b/server/licenses/t-digest-3.3.jar.sha1 new file mode 100644 index 0000000000000..79319da60ead6 --- /dev/null +++ b/server/licenses/t-digest-3.3.jar.sha1 @@ -0,0 +1 @@ +5e96c4fd7d63b05828cf5ef41da20649195b1b78 \ No newline at end of file diff --git a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java index aea81fd5d1c78..78296eddbdc2c 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesRanksTests.java @@ -53,7 +53,8 @@ protected InternalTDigestPercentileRanks createTestInstance( final TDigestState state = new TDigestState(100); Arrays.stream(values).forEach(state::add); - assertEquals(state.centroidCount(), values.length); + // the number of centroids is defined as <= the number of samples inserted + assertTrue(state.centroidCount() <= values.length); return new InternalTDigestPercentileRanks(name, percents, state, keyed, format, metadata); } diff --git a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java index 4d88f8fecd709..101583f1f37c9 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/metrics/InternalTDigestPercentilesTests.java @@ -53,7 +53,8 @@ protected InternalTDigestPercentiles createTestInstance( final TDigestState state = new TDigestState(100); Arrays.stream(values).forEach(state::add); - assertEquals(state.centroidCount(), values.length); + // the number of centroids is defined as <= the number of samples inserted + assertTrue(state.centroidCount() <= values.length); return new InternalTDigestPercentiles(name, percents, state, keyed, format, metadata); } diff --git a/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java index 50415dc10df7e..fd98a090367b2 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/metrics/TDigestPercentilesAggregatorTests.java @@ -105,8 +105,10 @@ public void testSomeMatchesSortedNumericDocValues() throws IOException { }, tdigest -> { assertEquals(7L, tdigest.state.size()); assertEquals(7L, tdigest.state.centroidCount()); - assertEquals(4.5d, tdigest.percentile(75), 0.0d); - assertEquals("4.5", tdigest.percentileAsString(75)); + assertEquals(5.0d, tdigest.percentile(75), 0.0d); + assertEquals("5.0", tdigest.percentileAsString(75)); + assertEquals(3.0d, tdigest.percentile(71), 0.0d); + assertEquals("3.0", tdigest.percentileAsString(71)); assertEquals(2.0d, tdigest.percentile(50), 0.0d); assertEquals("2.0", tdigest.percentileAsString(50)); assertEquals(1.0d, tdigest.percentile(22), 0.0d); @@ -126,11 +128,11 @@ public void testSomeMatchesNumericDocValues() throws IOException { iw.addDocument(singleton(new NumericDocValuesField("number", 0))); }, tdigest -> { assertEquals(tdigest.state.size(), 7L); - assertEquals(tdigest.state.centroidCount(), 7L); + assertTrue(tdigest.state.centroidCount() <= 7L); assertEquals(8.0d, tdigest.percentile(100), 0.0d); assertEquals("8.0", tdigest.percentileAsString(100)); - assertEquals(6.98d, tdigest.percentile(88), 0.0d); - assertEquals("6.98", tdigest.percentileAsString(88)); + assertEquals(8.0d, tdigest.percentile(88), 0.0d); + assertEquals("8.0", tdigest.percentileAsString(88)); assertEquals(1.0d, tdigest.percentile(33), 0.0d); assertEquals("1.0", tdigest.percentileAsString(33)); assertEquals(1.0d, tdigest.percentile(25), 0.0d); @@ -157,7 +159,7 @@ public void testQueryFiltering() throws IOException { assertEquals(4L, tdigest.state.centroidCount()); assertEquals(2.0d, tdigest.percentile(100), 0.0d); assertEquals(1.0d, tdigest.percentile(50), 0.0d); - assertEquals(0.5d, tdigest.percentile(25), 0.0d); + assertEquals(1.0d, tdigest.percentile(25), 0.0d); assertTrue(AggregationInspectionHelper.hasValue(tdigest)); });