Skip to content

Commit

Permalink
Correctly calculate doc_count_error at the slice level for concurrent…
Browse files Browse the repository at this point in the history
… segment search. Change slice_size heuristic to be equal to shard_size.

Signed-off-by: Jay Deng <[email protected]>
  • Loading branch information
jed326 authored and Jay Deng committed Jan 11, 2024
1 parent 10be2ef commit 7571351
Show file tree
Hide file tree
Showing 7 changed files with 399 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Restore support for Java 8 for RestClient ([#11562](https://github.com/opensearch-project/OpenSearch/pull/11562))
- Add deleted doc count in _cat/shards ([#11678](https://github.com/opensearch-project/OpenSearch/pull/11678))
- Capture information for additional query types and aggregation types ([#11582](https://github.com/opensearch-project/OpenSearch/pull/11582))
- Use slice_size == shard_size heuristic in terms aggs for concurrent segment search and properly calculate the doc_count_error ([#11732](https://github.com/opensearch-project/OpenSearch/pull/11732))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ public void testShardSizeEqualsSizeString() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -98,8 +99,11 @@ public void testShardSizeEqualsSizeString() throws Exception {
expected.put("1", 8L);
expected.put("3", 8L);
expected.put("2", 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsString())));
expectedDocCount = expected.get(bucket.getKeyAsString());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -221,6 +225,7 @@ public void testShardSizeEqualsSizeLong() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -233,8 +238,11 @@ public void testShardSizeEqualsSizeLong() throws Exception {
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -355,6 +363,7 @@ public void testShardSizeEqualsSizeDouble() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
Expand All @@ -367,8 +376,11 @@ public void testShardSizeEqualsSizeDouble() throws Exception {
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
// Doc count can vary when using concurrent segment search. See https://github.com/opensearch-project/OpenSearch/issues/11680
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,11 @@ public void testDoubleValueFieldSubAggDesc() throws Exception {
* 3 one-shard indices.
*/
public void testFixedDocs() throws Exception {
// Force merge each shard down to 1 segment to verify results are the same between concurrent and non-concurrent search paths
// Else for concurrent segment search there will be additional error introduced during the slice level reduce and thus different
// buckets may be returned. See https://github.com/opensearch-project/OpenSearch/issues/11680",
client().admin().indices().prepareForceMerge("idx_fixed_docs_0", "idx_fixed_docs_1", "idx_fixed_docs_2").setMaxNumSegments(1).get();

SearchResponse response = client().prepareSearch("idx_fixed_docs_0", "idx_fixed_docs_1", "idx_fixed_docs_2")
.addAggregation(
terms("terms").executionHint(randomExecutionHint())
Expand Down
Loading

0 comments on commit 7571351

Please sign in to comment.