Skip to content

Commit

Permalink
Upgraded to t-digest 3.3. (opensearch-project#3634)
Browse files Browse the repository at this point in the history
  • Loading branch information
dblock authored and imRishN committed Jul 3, 2022
1 parent 561f5d5 commit 5844eb1
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,16 @@ setup:
string_field: foo

---
"Basic test":
"Basic 2.x test":

- skip:
version: "3.0.0 -"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "- 2.9.99"
search:
rest_total_hits_as_int: true
body:
Expand Down Expand Up @@ -77,7 +84,58 @@ setup:
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }

---
"Basic 3.x test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
aggs:
percentiles_int:
percentiles:
field: int_field
percentiles_double:
percentiles:
field: double_field

- match: { hits.total: 4 }
- length: { hits.hits: 4 }

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 1.0 }
- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }

---
"Compression test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -93,31 +151,36 @@ setup:
tdigest:
compression: 200


- match: { hits.total: 4 }
- length: { hits.hits: 4 }

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.75\.0: 126.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 1.0 }
- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 26.0 }
- match: { aggregations.percentiles_double.values.50\.0: 76.0 }
- match: { aggregations.percentiles_double.values.75\.0: 126.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }


---
"Only aggs test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -135,26 +198,31 @@ setup:

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.75\.0: 126.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 1.0 }
- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 26.0 }
- match: { aggregations.percentiles_double.values.50\.0: 76.0 }
- match: { aggregations.percentiles_double.values.75\.0: 126.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }



---
"Filtered test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -177,17 +245,17 @@ setup:

- match: { aggregations.percentiles_int.values.1\.0: 51.0 }
- match: { aggregations.percentiles_int.values.5\.0: 51.0 }
- match: { aggregations.percentiles_int.values.25\.0: 63.5 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 138.5 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

- match: { aggregations.percentiles_double.values.1\.0: 51.0 }
- match: { aggregations.percentiles_double.values.5\.0: 51.0 }
- match: { aggregations.percentiles_double.values.25\.0: 63.5 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }
- match: { aggregations.percentiles_double.values.75\.0: 138.5 }
- match: { aggregations.percentiles_double.values.75\.0: 151.0 }
- match: { aggregations.percentiles_double.values.95\.0: 151.0 }
- match: { aggregations.percentiles_double.values.99\.0: 151.0 }

Expand Down Expand Up @@ -234,7 +302,14 @@ setup:
---
"Metadata test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -252,9 +327,9 @@ setup:

- match: { aggregations.percentiles_int.values.1\.0: 1.0 }
- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.75\.0: 126.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }
- match: { aggregations.percentiles_int.values.75\.0: 151.0 }
- match: { aggregations.percentiles_int.values.95\.0: 151.0 }
- match: { aggregations.percentiles_int.values.99\.0: 151.0 }

Expand Down Expand Up @@ -319,7 +394,14 @@ setup:
---
"Explicit Percents test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -338,17 +420,24 @@ setup:
- length: { hits.hits: 4 }

- match: { aggregations.percentiles_int.values.5\.0: 1.0 }
- match: { aggregations.percentiles_int.values.25\.0: 26.0 }
- match: { aggregations.percentiles_int.values.50\.0: 76.0 }
- match: { aggregations.percentiles_int.values.25\.0: 51.0 }
- match: { aggregations.percentiles_int.values.50\.0: 101.0 }

- match: { aggregations.percentiles_double.values.5\.0: 1.0 }
- match: { aggregations.percentiles_double.values.25\.0: 26.0 }
- match: { aggregations.percentiles_double.values.50\.0: 76.0 }
- match: { aggregations.percentiles_double.values.25\.0: 51.0 }
- match: { aggregations.percentiles_double.values.50\.0: 101.0 }

---
"Non-keyed test":

- skip:
version: "- 2.9.99"
features: node_selector
reason: "t-digest 3.2 was interpolating leading to incorrect percentiles"

- do:
node_selector:
version: "3.0.0 -"
search:
rest_total_hits_as_int: true
body:
Expand All @@ -366,6 +455,6 @@ setup:
- match: { aggregations.percentiles_int.values.0.key: 5.0 }
- match: { aggregations.percentiles_int.values.0.value: 1.0 }
- match: { aggregations.percentiles_int.values.1.key: 25.0 }
- match: { aggregations.percentiles_int.values.1.value: 26.0 }
- match: { aggregations.percentiles_int.values.1.value: 51.0 }
- match: { aggregations.percentiles_int.values.2.key: 50.0 }
- match: { aggregations.percentiles_int.values.2.value: 76.0 }
- match: { aggregations.percentiles_int.values.2.value: 101.0 }
2 changes: 1 addition & 1 deletion server/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ dependencies {
api "joda-time:joda-time:${versions.joda}"

// percentiles aggregation
api 'com.tdunning:t-digest:3.2'
api 'com.tdunning:t-digest:3.3'
// precentil ranks aggregation
api 'org.hdrhistogram:HdrHistogram:2.1.12'

Expand Down
1 change: 0 additions & 1 deletion server/licenses/t-digest-3.2.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/t-digest-3.3.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5e96c4fd7d63b05828cf5ef41da20649195b1b78
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ protected InternalTDigestPercentileRanks createTestInstance(
final TDigestState state = new TDigestState(100);
Arrays.stream(values).forEach(state::add);

assertEquals(state.centroidCount(), values.length);
// the number of centroids is defined as <= the number of samples inserted
assertTrue(state.centroidCount() <= values.length);
return new InternalTDigestPercentileRanks(name, percents, state, keyed, format, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ protected InternalTDigestPercentiles createTestInstance(
final TDigestState state = new TDigestState(100);
Arrays.stream(values).forEach(state::add);

assertEquals(state.centroidCount(), values.length);
// the number of centroids is defined as <= the number of samples inserted
assertTrue(state.centroidCount() <= values.length);
return new InternalTDigestPercentiles(name, percents, state, keyed, format, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ public void testSomeMatchesSortedNumericDocValues() throws IOException {
}, tdigest -> {
assertEquals(7L, tdigest.state.size());
assertEquals(7L, tdigest.state.centroidCount());
assertEquals(4.5d, tdigest.percentile(75), 0.0d);
assertEquals("4.5", tdigest.percentileAsString(75));
assertEquals(5.0d, tdigest.percentile(75), 0.0d);
assertEquals("5.0", tdigest.percentileAsString(75));
assertEquals(3.0d, tdigest.percentile(71), 0.0d);
assertEquals("3.0", tdigest.percentileAsString(71));
assertEquals(2.0d, tdigest.percentile(50), 0.0d);
assertEquals("2.0", tdigest.percentileAsString(50));
assertEquals(1.0d, tdigest.percentile(22), 0.0d);
Expand All @@ -126,11 +128,11 @@ public void testSomeMatchesNumericDocValues() throws IOException {
iw.addDocument(singleton(new NumericDocValuesField("number", 0)));
}, tdigest -> {
assertEquals(tdigest.state.size(), 7L);
assertEquals(tdigest.state.centroidCount(), 7L);
assertTrue(tdigest.state.centroidCount() <= 7L);
assertEquals(8.0d, tdigest.percentile(100), 0.0d);
assertEquals("8.0", tdigest.percentileAsString(100));
assertEquals(6.98d, tdigest.percentile(88), 0.0d);
assertEquals("6.98", tdigest.percentileAsString(88));
assertEquals(8.0d, tdigest.percentile(88), 0.0d);
assertEquals("8.0", tdigest.percentileAsString(88));
assertEquals(1.0d, tdigest.percentile(33), 0.0d);
assertEquals("1.0", tdigest.percentileAsString(33));
assertEquals(1.0d, tdigest.percentile(25), 0.0d);
Expand All @@ -157,7 +159,7 @@ public void testQueryFiltering() throws IOException {
assertEquals(4L, tdigest.state.centroidCount());
assertEquals(2.0d, tdigest.percentile(100), 0.0d);
assertEquals(1.0d, tdigest.percentile(50), 0.0d);
assertEquals(0.5d, tdigest.percentile(25), 0.0d);
assertEquals(1.0d, tdigest.percentile(25), 0.0d);
assertTrue(AggregationInspectionHelper.hasValue(tdigest));
});

Expand Down

0 comments on commit 5844eb1

Please sign in to comment.