From 10959cb357f61f1d271c6065e32998409edae0c4 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 16 Aug 2024 10:23:25 -0700 Subject: [PATCH 1/6] Backport rest + unit tests Signed-off-by: Finn Carroll --- CHANGELOG.md | 2 + .../search.aggregation/360_date_histogram.yml | 91 ++++++++++++ .../test/search.aggregation/40_range.yml | 79 +++++++++++ .../bucket/range/RangeAggregatorTests.java | 129 +++++++++++++++++- 4 files changed, 299 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90ef2781705db..0b05fb087d80a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed +- Fix range aggregation optimization ignoring top level queries ([#15194](https://github.com/opensearch-project/OpenSearch/pull/15194)) + ### Security [Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/57cd81da11e5cb831029719f0394e40aff68ced2...2.16 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml index 0ea9d3de00926..8c8a98b2db22c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml @@ -61,3 +61,94 @@ setup: - match: { aggregations.histo.buckets.8.doc_count: 1 } - match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" } - match: { aggregations.histo.buckets.12.doc_count: 1 } + +--- +"Date histogram aggregation w/ filter query test": + - skip: + version: " - 2.99.99" + reason: Backport fix to 2.16 + + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-12", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-14", "dow": "wednesday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-19", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-13", "dow": "tuesday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-15", "dow": "thursday"}' + + - do: + search: + index: dhisto-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + weekHisto: + date_histogram: + field: date + calendar_interval: week + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.weekHisto.buckets.0.doc_count: 2 } + - match: { aggregations.weekHisto.buckets.1.doc_count: 1 } + +--- +"Date histogram aggregation w/ shared field range test": + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"date": "2024-10-31"}' + - '{"index": {}}' + - '{"date": "2024-11-11"}' + - '{"index": {}}' + - '{"date": "2024-11-28"}' + - '{"index": {}}' + - '{"date": "2024-12-25"}' + - '{"index": {}}' + - '{"date": "2025-01-01"}' + - '{"index": {}}' + - '{"date": "2025-02-14"}' + + - do: + search: + index: dhisto-agg-w-query + body: + profile: true + query: + range: + date: + gte: "2024-01-01" + lt: "2025-01-01" + aggregations: + monthHisto: + date_histogram: + field: date + calendar_interval: month + _source: false + + - match: { hits.total.value: 4 } + - match: { aggregations.monthHisto.buckets.0.doc_count: 1 } + - match: { aggregations.monthHisto.buckets.1.doc_count: 2 } + - match: { aggregations.monthHisto.buckets.2.doc_count: 1 } + - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 } + - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } + - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 0 } + - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml index 80aad96ce1f6b..1e1d2b0706d6b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -673,3 +673,82 @@ setup: - match: { aggregations.my_range.buckets.3.from: 1.5 } - is_false: aggregations.my_range.buckets.3.to - match: { aggregations.my_range.buckets.3.doc_count: 2 } + +--- +"Filter query w/ aggregation test": + - skip: + version: " - 2.99.99" + reason: Backport fix to 2.16 + + - do: + bulk: + refresh: true + index: range-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "v": -10, "date": "2024-10-29"}' + - '{"index": {}}' + - '{"routing": "route1", "v": -5, "date": "2024-10-30"}' + - '{"index": {}}' + - '{"routing": "route1", "v": 10, "date": "2024-10-31"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 15, "date": "2024-11-01"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 20, "date": "2024-11-02"}' + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + NegPosAgg: + range: + field: v + keyed: true + ranges: + - to: 0 + key: "0" + - from: 0 + key: "1" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.NegPosAgg.buckets.0.doc_count: 2 } + - match: { aggregations.NegPosAgg.buckets.1.doc_count: 1 } + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + HalloweenAgg: + date_range: + field: date + format: "yyyy-MM-dd" + keyed: true + ranges: + - to: "2024-11-01" + key: "to-october" + - from: "2024-11-01" + key: "from-september" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.HalloweenAgg.buckets.to-october.doc_count: 3 } + - match: { aggregations.HalloweenAgg.buckets.from-september.doc_count: 0 } diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 7e796b684e869..630326967aae1 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -32,6 +32,9 @@ package org.opensearch.search.aggregations.bucket.range; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -39,9 +42,13 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.TestUtil; @@ -54,6 +61,7 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType; import org.opensearch.index.mapper.NumberFieldMapper.NumberType; +import org.opensearch.index.mapper.ParseContext.Document; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregatorTestCase; import org.opensearch.search.aggregations.CardinalityUpperBound; @@ -65,6 +73,7 @@ import java.io.IOException; import java.time.ZoneOffset; import java.time.ZonedDateTime; +import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedList; @@ -457,6 +466,29 @@ public void testFloatType() throws IOException { ); } + public void testTopLevelRangeQuery() throws IOException { + NumberFieldType fieldType = new NumberFieldType(NumberType.INTEGER.typeName(), NumberType.INTEGER); + String fieldName = fieldType.numberType().typeName(); + Query query = IntPoint.newRangeQuery(fieldName, 5, 20); + + testRewriteOptimizationCase( + fieldType, + new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, + query, + new Number[] { 0.1, 4.0, 9, 11, 12, 19 }, + range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(3, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, + false + ); + } + public void testUnsignedLongType() throws IOException { testRewriteOptimizationCase( new NumberFieldType(NumberType.UNSIGNED_LONG.typeName(), NumberType.UNSIGNED_LONG), @@ -493,6 +525,76 @@ public void testUnsignedLongType() throws IOException { ); } + /** + * Expect no optimization as top level query excludes documents. + */ + public void testTopLevelFilterTermQuery() throws IOException { + final String KEYWORD_FIELD_NAME = "route"; + final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE); + final NumberType numType = NUM_FIELD_TYPE.numberType(); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(0); + builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST); + Query boolQuery = builder.build(); + + List docList = new ArrayList<>(); + for (int i = 0; i < 3; i++) + docList.add(new Document()); + + docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false)); + docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false)); + docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false)); + docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route2", Field.Store.NO)); + + testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(1, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, false); + } + + /** + * Expect optimization as top level query is effective match all. + */ + public void testTopLevelEffectiveMatchAll() throws IOException { + final String KEYWORD_FIELD_NAME = "route"; + final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE); + final NumberType numType = NUM_FIELD_TYPE.numberType(); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(0); + builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST); + Query boolQuery = builder.build(); + + List docList = new ArrayList<>(); + for (int i = 0; i < 3; i++) + docList.add(new Document()); + + docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false)); + docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false)); + docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false)); + docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + + testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(2, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, true); + } + private void testCase( Query query, CheckedConsumer buildIndex, @@ -556,11 +658,34 @@ private void testRewriteOptimizationCase( ) throws IOException { NumberType numberType = fieldType.numberType(); String fieldName = numberType.typeName(); + List docs = new ArrayList<>(); + + for (Number dataPoint : dataPoints) { + Document doc = new Document(); + List fieldList = numberType.createFields(fieldName, dataPoint, true, true, false); + for (Field fld : fieldList) + doc.add(fld); + docs.add(doc); + } + + testRewriteOptimizationCase(fieldType, ranges, query, docs, verify, optimized); + } + + private void testRewriteOptimizationCase( + NumberFieldType fieldType, + double[][] ranges, + Query query, + List documents, + Consumer> verify, + boolean optimized + ) throws IOException { + NumberType numberType = fieldType.numberType(); + String fieldName = numberType.typeName(); try (Directory directory = newDirectory()) { try (IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) { - for (Number dataPoint : dataPoints) { - indexWriter.addDocument(numberType.createFields(fieldName, dataPoint, true, true, false)); + for (Document doc : documents) { + indexWriter.addDocument(doc); } } From 8a90ea943ce96879ac4d393ec04e8e6eb94360d6 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 16 Aug 2024 11:12:18 -0700 Subject: [PATCH 2/6] segmentMatchAll() public Signed-off-by: Finn Carroll --- .../search/aggregations/bucket/FastFilterRewriteHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java index 2ab003fb94e33..97865269ae923 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java @@ -543,7 +543,7 @@ public static boolean isCompositeAggRewriteable(CompositeValuesSourceConfig[] so return sourceConfigs.length == 1 && sourceConfigs[0].valuesSource() instanceof RoundingValuesSource; } - private static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException { + public static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException { Weight weight = ctx.searcher().createWeight(ctx.query(), ScoreMode.COMPLETE_NO_SCORES, 1f); return weight != null && weight.count(leafCtx) == leafCtx.reader().numDocs(); } From 0977795dea0c3fa0d3b2b8972f8053a53485bfd3 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 16 Aug 2024 11:52:51 -0700 Subject: [PATCH 3/6] Remove rest test skip per backport Signed-off-by: Finn Carroll --- .../test/search.aggregation/360_date_histogram.yml | 4 ---- .../rest-api-spec/test/search.aggregation/40_range.yml | 4 ---- 2 files changed, 8 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml index 8c8a98b2db22c..00499f3e9def8 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml @@ -64,10 +64,6 @@ setup: --- "Date histogram aggregation w/ filter query test": - - skip: - version: " - 2.99.99" - reason: Backport fix to 2.16 - - do: bulk: refresh: true diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml index 1e1d2b0706d6b..7a93a61aa8d1f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -676,10 +676,6 @@ setup: --- "Filter query w/ aggregation test": - - skip: - version: " - 2.99.99" - reason: Backport fix to 2.16 - - do: bulk: refresh: true From a762cb161072f04aa6e625a2087629c82e67c601 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 16 Aug 2024 11:53:50 -0700 Subject: [PATCH 4/6] Add segment match all guard for range aggregator Signed-off-by: Finn Carroll --- .../aggregations/bucket/range/RangeAggregator.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 2ba2b06514de1..ab71a382211ca 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -68,6 +68,7 @@ import java.util.function.BiConsumer; import static org.opensearch.core.xcontent.ConstructingObjectParser.optionalConstructorArg; +import static org.opensearch.search.aggregations.bucket.FastFilterRewriteHelper.segmentMatchAll; /** * Aggregate all docs that match given ranges. @@ -298,12 +299,12 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - boolean optimized = fastFilterContext.tryFastFilterAggregation( + if (segmentMatchAll(context, ctx) && fastFilterContext.tryFastFilterAggregation( ctx, this::incrementBucketDocCount, - (activeIndex) -> subBucketOrdinal(0, (int) activeIndex) - ); - if (optimized) throw new CollectionTerminatedException(); + (activeIndex) -> subBucketOrdinal(0, (int) activeIndex))) { + throw new CollectionTerminatedException(); + } final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); return new LeafBucketCollectorBase(sub, values) { From e22e4c4a4d6733ae18a0c0b71d689ef65ad8528d Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 16 Aug 2024 11:59:48 -0700 Subject: [PATCH 5/6] Changelog Signed-off-by: Finn Carroll --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b05fb087d80a..bf74ab8f5c71c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed -- Fix range aggregation optimization ignoring top level queries ([#15194](https://github.com/opensearch-project/OpenSearch/pull/15194)) +- Fix range aggregation optimization ignoring top level queries ([#15286](https://github.com/opensearch-project/OpenSearch/pull/15286)) ### Security From f6f9c4e156da20cb5e5c9336369edf1b37352a9f Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 16 Aug 2024 12:04:05 -0700 Subject: [PATCH 6/6] Spotless apply Signed-off-by: Finn Carroll --- .../aggregations/bucket/range/RangeAggregator.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index ab71a382211ca..620f1e602b53c 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -299,10 +299,12 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - if (segmentMatchAll(context, ctx) && fastFilterContext.tryFastFilterAggregation( - ctx, - this::incrementBucketDocCount, - (activeIndex) -> subBucketOrdinal(0, (int) activeIndex))) { + if (segmentMatchAll(context, ctx) + && fastFilterContext.tryFastFilterAggregation( + ctx, + this::incrementBucketDocCount, + (activeIndex) -> subBucketOrdinal(0, (int) activeIndex) + )) { throw new CollectionTerminatedException(); }