Skip to content

Commit

Permalink
Correctly calculate doc_count_error at the slice level for concurrent…
Browse files Browse the repository at this point in the history
… segment search. Change slice_size heuristic to be equal to shard_size.

Signed-off-by: Jay Deng <[email protected]>
  • Loading branch information
jed326 authored and Jay Deng committed Jan 9, 2024
1 parent 2de44a7 commit 1194280
Show file tree
Hide file tree
Showing 7 changed files with 393 additions and 19 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Restore support for Java 8 for RestClient ([#11562](https://github.com/opensearch-project/OpenSearch/pull/11562))
- Add deleted doc count in _cat/shards ([#11678](https://github.com/opensearch-project/OpenSearch/pull/11678))
- Capture information for additional query types and aggregation types ([#11582](https://github.com/opensearch-project/OpenSearch/pull/11582))
- Use slice_size == shard_size heuristic in terms aggs for concurrent segment search and properly calculate the doc_count_error ([#11732](https://github.com/opensearch-project/OpenSearch/pull/11732))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,23 @@ public void testShardSizeEqualsSizeString() throws Exception {
terms("keys").field("key")
.size(3)
.shardSize(3)
.showTermDocCountError(true)
.collectMode(randomFrom(SubAggCollectionMode.values()))
.order(BucketOrder.count(false))
)
.get();

Terms terms = response.getAggregations().get("keys");
List<? extends Terms.Bucket> buckets = terms.getBuckets();
assertThat(buckets.size(), equalTo(3));
assertEquals(3, buckets.size());
Map<String, Long> expected = new HashMap<>();
expected.put("1", 8L);
expected.put("3", 8L);
expected.put("2", 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsString())));
expectedDocCount = expected.get(bucket.getKeyAsString());
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -228,13 +231,15 @@ public void testShardSizeEqualsSizeLong() throws Exception {

Terms terms = response.getAggregations().get("keys");
List<? extends Terms.Bucket> buckets = terms.getBuckets();
assertThat(buckets.size(), equalTo(3));
assertEquals(3, buckets.size());
Map<Integer, Long> expected = new HashMap<>();
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down Expand Up @@ -362,13 +367,15 @@ public void testShardSizeEqualsSizeDouble() throws Exception {

Terms terms = response.getAggregations().get("keys");
List<? extends Terms.Bucket> buckets = terms.getBuckets();
assertThat(buckets.size(), equalTo(3));
assertEquals(3, buckets.size());
Map<Integer, Long> expected = new HashMap<>();
expected.put(1, 8L);
expected.put(3, 8L);
expected.put(2, 4L);
Long expectedDocCount;
for (Terms.Bucket bucket : buckets) {
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
expectedDocCount = expected.get(bucket.getKeyAsNumber().intValue());
assertTrue((bucket.getDocCount() == expectedDocCount) || bucket.getDocCount() + bucket.getDocCountError() >= expectedDocCount);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,11 @@ public void testDoubleValueFieldSubAggDesc() throws Exception {
* 3 one-shard indices.
*/
public void testFixedDocs() throws Exception {
// Force merge each shard down to 1 segment to verify results are the same between concurrent and non-concurrent search paths
// Else for concurrent segment search there will be additional error introduced during the slice level reduce and thus different
// buckets may be returned. See https://github.com/opensearch-project/OpenSearch/issues/11680",
client().admin().indices().prepareForceMerge("idx_fixed_docs_0", "idx_fixed_docs_1", "idx_fixed_docs_2").setMaxNumSegments(1).get();

SearchResponse response = client().prepareSearch("idx_fixed_docs_0", "idx_fixed_docs_1", "idx_fixed_docs_2")
.addAggregation(
terms("terms").executionHint(randomExecutionHint())
Expand Down
Loading

0 comments on commit 1194280

Please sign in to comment.