From 7c44da1b423cbb66b0a7650004daaec5ec8f3180 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 18 Jul 2024 14:34:58 -0700 Subject: [PATCH 01/29] Add sub agg support for fast filter optmization - RangeAgg + DateHistoAgg + AutoDateHistoAgg + CompositAgg Signed-off-by: Finn Carroll --- .../bucket/composite/CompositeAggregator.java | 4 +- .../AutoDateHistogramAggregator.java | 4 +- .../histogram/DateHistogramAggregator.java | 4 +- .../bucket/range/RangeAggregator.java | 4 +- .../filterrewrite/AggregatorBridge.java | 3 +- .../DateHistogramAggregatorBridge.java | 20 +++++--- .../filterrewrite/OptimizationContext.java | 10 ++-- .../filterrewrite/RangeAggregatorBridge.java | 20 +++++--- .../filterrewrite/TreeTraversal.java | 50 +++++++++---------- 9 files changed, 68 insertions(+), 51 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index b51bea511e067..33fa0cf0e4ca8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -222,7 +222,7 @@ protected Function bucketOrdProducer() { return (key) -> bucketOrds.add(0, getRoundingPrepared().round((long) key)); } }); - if (optimizationContext.canOptimize(parent, subAggregators.length, context)) { + if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); } } @@ -559,7 +559,7 @@ private void processLeafFromQuery(LeafReaderContext ctx, Sort indexSortPrefix) t @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { - boolean optimized = optimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = optimizationContext.tryOptimize(ctx, sub, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); if (optimized) throw new CollectionTerminatedException(); finishLeaf(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index 9263d3935538d..dd6a4be8fbf7b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -205,7 +205,7 @@ protected Function bucketOrdProducer() { } }); - if (optimizationContext.canOptimize(parent, subAggregators.length, context)) { + if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); } } @@ -239,7 +239,7 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBuc return LeafBucketCollector.NO_OP_COLLECTOR; } - boolean optimized = optimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = optimizationContext.tryOptimize(ctx, sub, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); if (optimized) throw new CollectionTerminatedException(); final SortedNumericDocValues values = valuesSource.longValues(ctx); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 20f62f4d6e3f8..3be7646518dc4 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -151,7 +151,7 @@ protected Function bucketOrdProducer() { } }); - if (optimizationContext.canOptimize(parent, subAggregators.length, context)) { + if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); } } @@ -170,7 +170,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol return LeafBucketCollector.NO_OP_COLLECTOR; } - boolean optimized = optimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = optimizationContext.tryOptimize(ctx, sub, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); if (optimized) throw new CollectionTerminatedException(); SortedNumericDocValues values = valuesSource.longValues(ctx); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index c206d1e522e01..312ba60dcfb83 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -297,7 +297,7 @@ protected Function bucketOrdProducer() { return (activeIndex) -> subBucketOrdinal(0, (int) activeIndex); } }); - if (optimizationContext.canOptimize(parent, subAggregators.length, context)) { + if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); } } @@ -312,7 +312,7 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - boolean optimized = optimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false); + boolean optimized = optimizationContext.tryOptimize(ctx, sub, this::incrementBucketDocCount, false); if (optimized) throw new CollectionTerminatedException(); final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java index 9e1c75e659989..17b8c9db9a782 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java @@ -11,6 +11,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.aggregations.LeafBucketCollector; import java.io.IOException; import java.util.function.BiConsumer; @@ -72,5 +73,5 @@ void setOptimizationContext(OptimizationContext context) { * @param values the point values (index structure for numeric values) for a segment * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket */ - public abstract void tryOptimize(PointValues values, BiConsumer incrementDocCount) throws IOException; + public abstract void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException; } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index da53e4aa73684..705e16b93264c 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -16,11 +16,13 @@ import org.opensearch.common.Rounding; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.bucket.histogram.LongBounds; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; import java.io.IOException; +import java.util.List; import java.util.OptionalLong; import java.util.function.BiConsumer; import java.util.function.Function; @@ -123,20 +125,26 @@ protected int getSize() { } @Override - public final void tryOptimize(PointValues values, BiConsumer incrementDocCount) throws IOException { + public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { int size = getSize(); DateFieldMapper.DateFieldType fieldType = getFieldType(); - BiConsumer incrementFunc = (activeIndex, docCount) -> { + BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); rangeStart = fieldType.convertNanosToMillis(rangeStart); long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(ord, (long) docCount); + incrementDocCount.accept(ord, (long) docIDs.size()); + + try { + for (int docID : docIDs) { + sub.collect(docID, ord); + } + } catch ( IOException ioe) { + throw new RuntimeException(ioe); + } }; - optimizationContext.consumeDebugInfo( - multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), incrementFunc, size) - ); + optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, size)); } private static long getBucketOrd(long bucketOrd) { diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java index d4d5880b37ce1..92485911d3daf 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java @@ -15,6 +15,7 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.DocCountFieldMapper; +import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.internal.SearchContext; import java.io.IOException; @@ -54,10 +55,9 @@ public OptimizationContext(AggregatorBridge aggregatorBridge) { this.aggregatorBridge = aggregatorBridge; } - public boolean canOptimize(final Object parent, final int subAggLength, SearchContext context) { + public boolean canOptimize(final Object parent, SearchContext context) { if (context.maxAggRewriteFilters() == 0) return false; - - if (parent != null || subAggLength != 0) return false; + if (parent != null) return false; this.canOptimize = aggregatorBridge.canOptimize(); if (canOptimize) { @@ -102,7 +102,7 @@ Ranges getRanges() { * @param incrementDocCount consume the doc_count results for certain ordinal * @param segmentMatchAll if your optimization can prepareFromSegment, you should pass in this flag to decide whether to prepareFromSegment */ - public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer incrementDocCount, boolean segmentMatchAll) + public boolean tryOptimize(final LeafReaderContext leafCtx, LeafBucketCollector sub, final BiConsumer incrementDocCount, boolean segmentMatchAll) throws IOException { segments++; if (!canOptimize) { @@ -129,7 +129,7 @@ public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer incrementDocCount) throws IOException { + public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { int size = Integer.MAX_VALUE; - BiConsumer incrementFunc = (activeIndex, docCount) -> { + BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { long ord = bucketOrdProducer().apply(activeIndex); - incrementDocCount.accept(ord, (long) docCount); + incrementDocCount.accept(ord, (long) docIDs.size()); + + try { + for (int docID : docIDs) { + sub.collect(docID, activeIndex); + } + } catch ( IOException ioe) { + throw new RuntimeException(ioe); + } }; - optimizationContext.consumeDebugInfo( - multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), incrementFunc, size) - ); + optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, size)); } /** diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index aad833324a841..bf3d1a408eb42 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -16,6 +16,8 @@ import org.opensearch.common.CheckedRunnable; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.function.BiConsumer; import static org.opensearch.search.optimization.filterrewrite.Helper.loggerName; @@ -42,14 +44,14 @@ private TreeTraversal() {} * * @param tree the point tree to traverse * @param ranges the set of ranges to intersect with - * @param incrementDocCount a callback to increment the document count for a range bucket + * @param collectRangeIDs a callback to collect the doc ids for a range bucket * @param maxNumNonZeroRanges the maximum number of non-zero ranges to collect * @return a {@link OptimizationContext.DebugInfo} object containing debug information about the traversal */ static OptimizationContext.DebugInfo multiRangesTraverse( final PointValues.PointTree tree, final Ranges ranges, - final BiConsumer incrementDocCount, + final BiConsumer> collectRangeIDs, final int maxNumNonZeroRanges ) throws IOException { OptimizationContext.DebugInfo debugInfo = new OptimizationContext.DebugInfo(); @@ -58,7 +60,7 @@ static OptimizationContext.DebugInfo multiRangesTraverse( logger.debug("No ranges match the query, skip the fast filter optimization"); return debugInfo; } - RangeCollectorForPointTree collector = new RangeCollectorForPointTree(incrementDocCount, maxNumNonZeroRanges, ranges, activeIndex); + RangeCollectorForPointTree collector = new RangeCollectorForPointTree(collectRangeIDs, maxNumNonZeroRanges, ranges, activeIndex); PointValues.IntersectVisitor visitor = getIntersectVisitor(collector); try { intersectWithRanges(visitor, tree, collector, debugInfo); @@ -80,7 +82,7 @@ private static void intersectWithRanges( switch (r) { case CELL_INSIDE_QUERY: - collector.countNode((int) pointTree.size()); + pointTree.visitDocIDs(visitor); debug.visitInner(); break; case CELL_CROSSES_QUERY: @@ -110,19 +112,21 @@ public void visit(int docID) { @Override public void visit(int docID, byte[] packedValue) throws IOException { - visitPoints(packedValue, collector::count); + if (canCollect(packedValue)) { + collector.count(docID); + } } @Override public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { - visitPoints(packedValue, () -> { + if (canCollect(packedValue)) { for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { - collector.count(); + collector.count(iterator.docID()); } - }); + } } - private void visitPoints(byte[] packedValue, CheckedRunnable collect) throws IOException { + private boolean canCollect(byte[] packedValue) { if (!collector.withinUpperBound(packedValue)) { collector.finalizePreviousRange(); if (collector.iterateRangeEnd(packedValue)) { @@ -130,9 +134,7 @@ private void visitPoints(byte[] packedValue, CheckedRunnable collec } } - if (collector.withinRange(packedValue)) { - collect.run(); - } + return collector.withinRange(packedValue); } @Override @@ -158,39 +160,37 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue } private static class RangeCollectorForPointTree { - private final BiConsumer incrementRangeDocCount; - private int counter = 0; - + private final BiConsumer> collectRangeIDs; + private final List DIDList = new ArrayList<>(); private final Ranges ranges; private int activeIndex; - private int visitedRange = 0; private final int maxNumNonZeroRange; public RangeCollectorForPointTree( - BiConsumer incrementRangeDocCount, + BiConsumer> collectRangeIDs, int maxNumNonZeroRange, Ranges ranges, int activeIndex ) { - this.incrementRangeDocCount = incrementRangeDocCount; + this.collectRangeIDs = collectRangeIDs; this.maxNumNonZeroRange = maxNumNonZeroRange; this.ranges = ranges; this.activeIndex = activeIndex; } - private void count() { - counter++; + private void count(int docID) { + DIDList.add(docID); } - private void countNode(int count) { - counter += count; + private void countNode(List docIDs) { + DIDList.addAll(docIDs); } private void finalizePreviousRange() { - if (counter > 0) { - incrementRangeDocCount.accept(activeIndex, counter); - counter = 0; + if (!DIDList.isEmpty()) { + collectRangeIDs.accept(activeIndex, DIDList); + DIDList.clear(); } } From adda80a888d806f97897bb84271d3a3187c32382 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 18 Jul 2024 14:35:26 -0700 Subject: [PATCH 02/29] Add yaml rest test for data histo sub agg Signed-off-by: Finn Carroll --- .../test/search.aggregation/10_histogram.yml | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index a75b1d0eac793..0bcf1345dcd17 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -706,3 +706,77 @@ setup: - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 } - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } + +--- +"date_histogram with range sub aggregation": + - do: + indices.create: + index: test_date_hist_range_sub_agg + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + date: + type: date + - do: + bulk: + refresh: true + index: test + body: + - '{"index": {}}' + - '{"date": "2020-03-01", "v": 1}' + - '{"index": {}}' + - '{"date": "2020-03-01", "v": 11}' + - '{"index": {}}' + - '{"date": "2020-03-02", "v": 12}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 23}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 28}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 28}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 39}' + - '{"index": {}}' + - '{"date": "2020-03-09", "v": 4}' + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: date + calendar_interval: day + aggs: + my_range: + range: + field: v + ranges: + - to: 10 + - from: 10 + to: 20 + - from: 20 + to: 30 + - from: 30 + to: 40 + + - match: { hits.total.value: 9 } + - length: { aggregations.histo.buckets: 9 } + + - match: { aggregations.histo.buckets.0.key_as_string: "2020-03-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.1.key_as_string: "2020-03-02T00:00:00.000Z" } + - match: { aggregations.histo.buckets.7.key_as_string: "2020-03-08T00:00:00.000Z" } + - match: { aggregations.histo.buckets.8.key_as_string: "2020-03-09T00:00:00.000Z" } + + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { aggregations.histo.buckets.1.doc_count: 2 } + - match: { aggregations.histo.buckets.2.doc_count: 0 } + - match: { aggregations.histo.buckets.7.doc_count: 4 } + - match: { aggregations.histo.buckets.8.doc_count: 1 } + + - match: { aggregations.histo.buckets.0.my_range.buckets.0.doc_count: 1 } + - match: { aggregations.histo.buckets.7.my_range.buckets.2.doc_count: 2 } + - match: { aggregations.histo.buckets.7.my_range.buckets.1.doc_count: 1 } From b3f9066c404c4c9e8e2769f240029c32aaf55994 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Mon, 22 Jul 2024 09:32:05 -0700 Subject: [PATCH 03/29] Avoid visiting individual docIDs Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/TreeTraversal.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index bf3d1a408eb42..603c3ea448cea 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -82,7 +82,7 @@ private static void intersectWithRanges( switch (r) { case CELL_INSIDE_QUERY: - pointTree.visitDocIDs(visitor); + pointTree.visitDocValues(visitor); debug.visitInner(); break; case CELL_CROSSES_QUERY: From 942c4e757ad0a1f5de94aef9a2e518ddfbd5c1f8 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Mon, 22 Jul 2024 15:22:01 -0700 Subject: [PATCH 04/29] Fix histogram - range yaml rest test Signed-off-by: Finn Carroll --- .../test/search.aggregation/10_histogram.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index 0bcf1345dcd17..2d7a7db1d740e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -763,7 +763,7 @@ setup: - from: 30 to: 40 - - match: { hits.total.value: 9 } + - match: { hits.total.value: 8 } - length: { aggregations.histo.buckets: 9 } - match: { aggregations.histo.buckets.0.key_as_string: "2020-03-01T00:00:00.000Z" } @@ -772,11 +772,14 @@ setup: - match: { aggregations.histo.buckets.8.key_as_string: "2020-03-09T00:00:00.000Z" } - match: { aggregations.histo.buckets.0.doc_count: 2 } - - match: { aggregations.histo.buckets.1.doc_count: 2 } + - match: { aggregations.histo.buckets.1.doc_count: 1 } - match: { aggregations.histo.buckets.2.doc_count: 0 } - match: { aggregations.histo.buckets.7.doc_count: 4 } - match: { aggregations.histo.buckets.8.doc_count: 1 } - match: { aggregations.histo.buckets.0.my_range.buckets.0.doc_count: 1 } - - match: { aggregations.histo.buckets.7.my_range.buckets.2.doc_count: 2 } - - match: { aggregations.histo.buckets.7.my_range.buckets.1.doc_count: 1 } + + - match: { aggregations.histo.buckets.7.my_range.buckets.0.doc_count: 0 } + - match: { aggregations.histo.buckets.7.my_range.buckets.1.doc_count: 0 } + - match: { aggregations.histo.buckets.7.my_range.buckets.2.doc_count: 3 } + - match: { aggregations.histo.buckets.7.my_range.buckets.3.doc_count: 1 } From 2a69ed30467d04c5269e911236279d519b00459d Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Tue, 23 Jul 2024 15:36:20 -0700 Subject: [PATCH 05/29] Disable sub aggs on composite date histo Signed-off-by: Finn Carroll --- .../bucket/composite/CompositeAggregator.java | 1 + .../CompositeAggregatorBridge.java | 22 +++++++++++++++++++ .../DateHistogramAggregatorBridge.java | 6 ++--- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index 33fa0cf0e4ca8..9ca27a3459ecf 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -173,6 +173,7 @@ public final class CompositeAggregator extends BucketsAggregator { @Override public boolean canOptimize() { + if (parent != null || subAggregators.length != 0) return false; if (canOptimize(sourceConfigs)) { this.valuesSource = (RoundingValuesSource) sourceConfigs[0].valuesSource(); if (rawAfterKey != null) { diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java index 1982793332605..9cd6c35ad9541 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java @@ -8,11 +8,20 @@ package org.opensearch.search.optimization.filterrewrite; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceConfig; import org.opensearch.search.aggregations.bucket.composite.RoundingValuesSource; +import java.io.IOException; +import java.util.List; +import java.util.function.BiConsumer; + +import static org.opensearch.search.optimization.filterrewrite.TreeTraversal.multiRangesTraverse; + /** * For composite aggregation to do optimization when it only has a single date histogram source */ @@ -33,4 +42,17 @@ private boolean canOptimize(boolean missing, boolean hasScript, MappedFieldType } return false; } + + @Override + public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { + DateFieldMapper.DateFieldType fieldType = getFieldType(); + BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + incrementDocCount.accept(ord, (long) docIDs.size()); + }; + + optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, getSize())); + } } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index 705e16b93264c..bff63137f8575 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -115,7 +115,7 @@ protected long[] processHardBounds(long[] bounds, LongBounds hardBounds) { return bounds; } - private DateFieldMapper.DateFieldType getFieldType() { + protected DateFieldMapper.DateFieldType getFieldType() { assert fieldType instanceof DateFieldMapper.DateFieldType; return (DateFieldMapper.DateFieldType) fieldType; } @@ -125,7 +125,7 @@ protected int getSize() { } @Override - public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { + public void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { int size = getSize(); DateFieldMapper.DateFieldType fieldType = getFieldType(); @@ -147,7 +147,7 @@ public final void tryOptimize(PointValues values, BiConsumer increme optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, size)); } - private static long getBucketOrd(long bucketOrd) { + protected static long getBucketOrd(long bucketOrd) { if (bucketOrd < 0) { // already seen bucketOrd = -1 - bucketOrd; } From 2290c12c3fc76718ea3c8c8749319a27b2813594 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Mon, 29 Jul 2024 19:07:37 +0000 Subject: [PATCH 06/29] No need to scrutinize doc values when CELL_INSIDE_QUERY Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/TreeTraversal.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 603c3ea448cea..bf3d1a408eb42 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -82,7 +82,7 @@ private static void intersectWithRanges( switch (r) { case CELL_INSIDE_QUERY: - pointTree.visitDocValues(visitor); + pointTree.visitDocIDs(visitor); debug.visitInner(); break; case CELL_CROSSES_QUERY: From aa36c326e82cd98275dfc89ace3955b06772c4f7 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Mon, 29 Jul 2024 19:08:10 +0000 Subject: [PATCH 07/29] histo and range sub agg rest api tests Signed-off-by: Finn Carroll --- .../test/search.aggregation/10_histogram.yml | 2 +- .../test/search.aggregation/40_range.yml | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index 2d7a7db1d740e..940e5adc6468f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -723,7 +723,7 @@ setup: - do: bulk: refresh: true - index: test + index: test_date_hist_range_sub_agg body: - '{"index": {}}' - '{"date": "2020-03-01", "v": 1}' diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml index 80aad96ce1f6b..807117bee81ea 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -673,3 +673,71 @@ setup: - match: { aggregations.my_range.buckets.3.from: 1.5 } - is_false: aggregations.my_range.buckets.3.to - match: { aggregations.my_range.buckets.3.doc_count: 2 } + +--- +"range with auto date sub aggregation": + - do: + indices.create: + index: test_range_auto_date + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + date: + type: date + - do: + bulk: + refresh: true + index: test_range_auto_date + body: + - '{"index": {}}' + - '{"date": "2020-03-01", "v": 1}' + - '{"index": {}}' + - '{"date": "2020-03-01", "v": 11}' + - '{"index": {}}' + - '{"date": "2020-03-02", "v": 12}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 23}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 28}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 28}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 39}' + - '{"index": {}}' + - '{"date": "2020-03-09", "v": 4}' + - do: + search: + body: + size: 0 + aggs: + my_range: + range: + field: v + ranges: + - to: 10 + - from: 10 + to: 20 + - from: 20 + aggs: + histo: + date_histogram: + field: date + calendar_interval: day + + - match: { hits.total.value: 8 } + - length: { aggregations.my_range.buckets: 3 } + + - match: { aggregations.my_range.buckets.0.key: "*-10.0" } + - match: { aggregations.my_range.buckets.1.key: "10.0-20.0" } + - match: { aggregations.my_range.buckets.2.key: "20.0-*" } + + - match: { aggregations.my_range.buckets.0.doc_count: 2 } + - match: { aggregations.my_range.buckets.1.doc_count: 2 } + - match: { aggregations.my_range.buckets.2.doc_count: 4 } + + - match: { aggregations.my_range.buckets.0.histo.buckets.0.doc_count: 1 } + - match: { aggregations.my_range.buckets.0.histo.buckets.1.doc_count: 0 } + - match: { aggregations.my_range.buckets.2.histo.buckets.0.doc_count: 4 } From fbbdc45b5ead1aa93ac96f230439f3635fd066c0 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 26 Jul 2024 11:31:54 -0700 Subject: [PATCH 08/29] Range with auto date sub agg rest test Signed-off-by: Finn Carroll --- .../330_auto_date_histogram.yml | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml index 0897e0bdd894b..593083cb384ef 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml @@ -158,3 +158,75 @@ setup: - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 } - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } + +--- +"Range aggregation with auto_date_histogram sub-aggregation": + - do: + indices.create: + index: sub_agg_profile + body: + mappings: + properties: + "@timestamp": + type: date + metrics.size: + type: long + + - do: + bulk: + refresh: true + index: sub_agg_profile + body: + - '{"index": {}}' + - '{"date": "2020-03-01", "v": 1}' + - '{"index": {}}' + - '{"date": "2020-03-02", "v": 2}' + - '{"index": {}}' + - '{"date": "2020-03-03", "v": 3}' + - '{"index": {}}' + - '{"date": "2020-04-09", "v": 4}' + - '{"index": {}}' + - '{"date": "2020-03-08", "v": 13}' + - '{"index": {}}' + - '{"date": "2020-03-09", "v": 14}' + - '{"index": {}}' + - '{"date": "2020-03-09", "v": 15}' + - '{"index": {}}' + - '{"date": "2020-04-11", "v": 19}' + + - do: + search: + index: sub_agg_profile + body: + size: 0 + aggs: + range_histo: + range: + field: v + ranges: + - to: 0 + - from: 0 + to: 10 + - from: 10 + aggs: + date: + auto_date_histogram: + field: "date" + buckets: 3 + + - match: { hits.total.value: 8 } + - length: { aggregations.range_histo.buckets: 3 } + + - match: { aggregations.range_histo.buckets.0.key: "*-0.0" } + - match: { aggregations.range_histo.buckets.1.key: "0.0-10.0" } + - match: { aggregations.range_histo.buckets.2.key: "10.0-*" } + + - match: { aggregations.range_histo.buckets.0.doc_count: 0 } + - match: { aggregations.range_histo.buckets.1.doc_count: 4 } + - match: { aggregations.range_histo.buckets.2.doc_count: 4 } + + - match: { aggregations.range_histo.buckets.1.date.buckets.0.doc_count: 3 } + - match: { aggregations.range_histo.buckets.1.date.buckets.1.doc_count: 1 } + + - match: { aggregations.range_histo.buckets.2.date.buckets.0.doc_count: 3 } + - match: { aggregations.range_histo.buckets.2.date.buckets.1.doc_count: 1 } From 556861892eaac8d28d481a49d52915218427aa11 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Wed, 31 Jul 2024 10:04:32 -0700 Subject: [PATCH 09/29] Re-apply must visit every leaf to collect doc id Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/TreeTraversal.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index bf3d1a408eb42..2f57e981baf7f 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -104,10 +104,7 @@ private static PointValues.IntersectVisitor getIntersectVisitor(RangeCollectorFo return new PointValues.IntersectVisitor() { @Override public void visit(int docID) { - // this branch should be unreachable - throw new UnsupportedOperationException( - "This IntersectVisitor does not perform any actions on a " + "docID=" + docID + " node being visited" - ); + collector.count(docID); } @Override From ad27f81f6af5673f058d6ddaae9871ad2314693d Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Wed, 31 Jul 2024 10:05:33 -0700 Subject: [PATCH 10/29] Micro benchmark for collect doc ids with multiRangeTraverseTree Signed-off-by: Finn Carroll --- .../BKDTreeMultiRangesTraverseBenchmark.java | 157 ++++++++++++++++++ .../optimization/filterrewrite/Ranges.java | 4 +- .../filterrewrite/TreeTraversal.java | 4 +- 3 files changed, 161 insertions(+), 4 deletions(-) create mode 100644 benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java new file mode 100644 index 0000000000000..0b45ea1fe59ac --- /dev/null +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java @@ -0,0 +1,157 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.benchmark.search.aggregations; + +import org.openjdk.jmh.annotations.*; + +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.NumericPointEncoder; +import org.opensearch.search.optimization.filterrewrite.Ranges; + +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; + +import static org.opensearch.search.optimization.filterrewrite.TreeTraversal.multiRangesTraverse; + +@Warmup(iterations = 10) +@Measurement(iterations = 5) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Fork(value = 1) +public class BKDTreeMultiRangesTraverseBenchmark { + @State(Scope.Benchmark) + public static class treeState { + @Param({ "10000", "10000000" }) + int treeSize; + + @Param({ "10000", "10000000" }) + int valMax; + + @Param({ "10", "100" }) + int buckets; + + @Param({ "12345" }) + int seed; + + private Random random; + + Path tmpDir; + Directory directory; + IndexWriter writer; + IndexReader reader; + + // multiRangesTraverse params + PointValues.PointTree pointTree; + Ranges ranges; + BiConsumer> collectRangeIDs; + int maxNumNonZeroRanges = Integer.MAX_VALUE; + + @Setup + public void setup() throws IOException { + random = new Random(seed); + tmpDir = Files.createTempDirectory("tree-test"); + directory = FSDirectory.open(tmpDir); + writer = new IndexWriter(directory, new IndexWriterConfig()); + + for (int i = 0; i < treeSize; i++) { + writer.addDocument(List.of(new IntField("val", random.nextInt(valMax), Field.Store.NO))); + } + + reader = DirectoryReader.open(writer); + + // should only contain single segment + for (LeafReaderContext lrc : reader.leaves()) { + pointTree = lrc.reader().getPointValues("val").getPointTree(); + } + + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType("val", NumberFieldMapper.NumberType.INTEGER); + NumericPointEncoder numericPointEncoder = (NumericPointEncoder) fieldType; + + int bucketWidth = valMax/buckets; + byte[][] lowers = new byte[buckets][]; + byte[][] uppers = new byte[buckets][]; + for (int i = 0; i < buckets; i++) { + lowers[i] = numericPointEncoder.encodePoint(i * bucketWidth); + uppers[i] = numericPointEncoder.encodePoint(i * bucketWidth); + } + + ranges = new Ranges(lowers, uppers); + } + + @TearDown + public void tearDown() throws IOException { + for (String indexFile : FSDirectory.listAll(tmpDir)) { + Files.deleteIfExists(tmpDir.resolve(indexFile)); + } + Files.deleteIfExists(tmpDir); + } + } + + @Benchmark + public Map> multiRangeTraverseTree(treeState state) throws Exception { + Map> mockIDCollect = new HashMap<>(); + + BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { + if (mockIDCollect.containsKey(activeIndex)) { + mockIDCollect.get(activeIndex).addAll(docIDs); + } else { + mockIDCollect.put(activeIndex, docIDs); + } + }; + + multiRangesTraverse(state.pointTree, state.ranges, collectRangeIDs, state.maxNumNonZeroRanges); + + return mockIDCollect; + } +} diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java index ebf4b5c9b2b9c..9cb6cc6b52ff3 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java @@ -13,14 +13,14 @@ /** * Internal ranges representation for the filter rewrite optimization */ -final class Ranges { +public final class Ranges { byte[][] lowers; // inclusive byte[][] uppers; // exclusive int size; int byteLen; static ArrayUtil.ByteArrayComparator comparator; - Ranges(byte[][] lowers, byte[][] uppers) { + public Ranges(byte[][] lowers, byte[][] uppers) { this.lowers = lowers; this.uppers = uppers; assert lowers.length == uppers.length; diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 2f57e981baf7f..6ee3b7831b36e 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -34,7 +34,7 @@ * PointValues.IntersectVisitor} implementation is responsible for the actual visitation and * document count collection. */ -final class TreeTraversal { +public final class TreeTraversal { private TreeTraversal() {} private static final Logger logger = LogManager.getLogger(loggerName); @@ -48,7 +48,7 @@ private TreeTraversal() {} * @param maxNumNonZeroRanges the maximum number of non-zero ranges to collect * @return a {@link OptimizationContext.DebugInfo} object containing debug information about the traversal */ - static OptimizationContext.DebugInfo multiRangesTraverse( + public static OptimizationContext.DebugInfo multiRangesTraverse( final PointValues.PointTree tree, final Ranges ranges, final BiConsumer> collectRangeIDs, From a0aabe2e8c207770379bbeede2b0bc6a5ce575ef Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Wed, 31 Jul 2024 10:28:51 -0700 Subject: [PATCH 11/29] Set node name in log config to avoid console barf Signed-off-by: Finn Carroll --- .../aggregations/BKDTreeMultiRangesTraverseBenchmark.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java index 0b45ea1fe59ac..134dcdcb7266f 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java @@ -34,9 +34,6 @@ import org.openjdk.jmh.annotations.*; import org.apache.lucene.index.PointValues; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TotalHits; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.index.DirectoryReader; @@ -52,6 +49,7 @@ import java.nio.file.Path; import java.util.List; +import org.opensearch.common.logging.LogConfigurator; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.NumericPointEncoder; @@ -99,6 +97,7 @@ public static class treeState { @Setup public void setup() throws IOException { + LogConfigurator.setNodeName("sample-name"); random = new Random(seed); tmpDir = Files.createTempDirectory("tree-test"); directory = FSDirectory.open(tmpDir); From d5dfbed8f692be0b6dcfa5b37f132b0ff5a0d05f Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Wed, 31 Jul 2024 10:44:15 -0700 Subject: [PATCH 12/29] Remove collector from intersectWithRanges - not used Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/TreeTraversal.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 6ee3b7831b36e..240d81f36e01f 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -63,7 +63,7 @@ public static OptimizationContext.DebugInfo multiRangesTraverse( RangeCollectorForPointTree collector = new RangeCollectorForPointTree(collectRangeIDs, maxNumNonZeroRanges, ranges, activeIndex); PointValues.IntersectVisitor visitor = getIntersectVisitor(collector); try { - intersectWithRanges(visitor, tree, collector, debugInfo); + intersectWithRanges(visitor, tree, debugInfo); } catch (CollectionTerminatedException e) { logger.debug("Early terminate since no more range to collect"); } @@ -75,7 +75,6 @@ public static OptimizationContext.DebugInfo multiRangesTraverse( private static void intersectWithRanges( PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, - RangeCollectorForPointTree collector, OptimizationContext.DebugInfo debug ) throws IOException { PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); @@ -88,7 +87,7 @@ private static void intersectWithRanges( case CELL_CROSSES_QUERY: if (pointTree.moveToChild()) { do { - intersectWithRanges(visitor, pointTree, collector, debug); + intersectWithRanges(visitor, pointTree, debug); } while (pointTree.moveToSibling()); pointTree.moveToParent(); } else { From 9bd609412762852b2583bcec9e0d86bc8cfa8761 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Wed, 31 Jul 2024 11:44:09 -0700 Subject: [PATCH 13/29] Remove un used import/var Signed-off-by: Finn Carroll --- .../optimization/filterrewrite/RangeAggregatorBridge.java | 4 ++-- .../search/optimization/filterrewrite/TreeTraversal.java | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index 5dac4b41b43f8..3bda6c349de86 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -77,7 +77,7 @@ public void prepareFromSegment(LeafReaderContext leaf) { @Override public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { - int size = Integer.MAX_VALUE; + BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { long ord = bucketOrdProducer().apply(activeIndex); @@ -92,7 +92,7 @@ public final void tryOptimize(PointValues values, BiConsumer increme } }; - optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, size)); + optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, Integer.MAX_VALUE)); } /** diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 240d81f36e01f..f00b2392c44da 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -13,7 +13,6 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.DocIdSetIterator; -import org.opensearch.common.CheckedRunnable; import java.io.IOException; import java.util.ArrayList; From cae98649a520e2a222894fd404143b0b5852bdf3 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 1 Aug 2024 10:54:48 -0700 Subject: [PATCH 14/29] Move all range checking functionality to Ranges Signed-off-by: Finn Carroll --- .../optimization/filterrewrite/Ranges.java | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java index 9cb6cc6b52ff3..18c88a4af2e67 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java @@ -14,11 +14,11 @@ * Internal ranges representation for the filter rewrite optimization */ public final class Ranges { + static ArrayUtil.ByteArrayComparator comparator; byte[][] lowers; // inclusive byte[][] uppers; // exclusive int size; int byteLen; - static ArrayUtil.ByteArrayComparator comparator; public Ranges(byte[][] lowers, byte[][] uppers) { this.lowers = lowers; @@ -29,6 +29,13 @@ public Ranges(byte[][] lowers, byte[][] uppers) { comparator = ArrayUtil.getUnsignedComparator(byteLen); } + public static int compareByteValue(byte[] value1, byte[] value2) { return comparator.compare(value1, 0, value2, 0); } + public static boolean withinLowerBound(byte[] value, byte[] lowerBound) { return compareByteValue(value, lowerBound) >= 0; } + public static boolean withinUpperBound(byte[] value, byte[] upperBound) { return compareByteValue(value, upperBound) < 0; } + public boolean withinLowerBound(byte[] value, int idx) { return Ranges.withinLowerBound(value, lowers[idx]); } + public boolean withinUpperBound(byte[] value, int idx) { return Ranges.withinUpperBound(value, uppers[idx]); } + public boolean withinRange(byte[] value, int idx) { return withinLowerBound(value, idx) && withinUpperBound(value, idx); } + public int firstRangeIndex(byte[] globalMin, byte[] globalMax) { if (compareByteValue(lowers[0], globalMax) > 0) { return -1; @@ -42,16 +49,4 @@ public int firstRangeIndex(byte[] globalMin, byte[] globalMax) { } return i; } - - public static int compareByteValue(byte[] value1, byte[] value2) { - return comparator.compare(value1, 0, value2, 0); - } - - public static boolean withinLowerBound(byte[] value, byte[] lowerBound) { - return compareByteValue(value, lowerBound) >= 0; - } - - public static boolean withinUpperBound(byte[] value, byte[] upperBound) { - return compareByteValue(value, upperBound) < 0; - } } From b340a55f91abaa0bd7317a15d7b824eec55d3f79 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 1 Aug 2024 10:55:34 -0700 Subject: [PATCH 15/29] Move tree traversal functionality into single IntersectVisitor child class Signed-off-by: Finn Carroll --- .../filterrewrite/TreeTraversal.java | 319 ++++++++++-------- 1 file changed, 182 insertions(+), 137 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index f00b2392c44da..1e6418e081186 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -15,8 +15,6 @@ import org.apache.lucene.search.DocIdSetIterator; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.function.BiConsumer; import static org.opensearch.search.optimization.filterrewrite.Helper.loggerName; @@ -25,195 +23,242 @@ /** * Utility class for traversing a {@link PointValues.PointTree} and collecting document counts for the ranges. * - *

The main entry point is the {@link #multiRangesTraverse(PointValues.PointTree, Ranges, - * BiConsumer, int)} method + *

The main entry point is the {@link #multiRangesTraverse(RangeAwareIntersectVisitor)} method * - *

The class uses a {@link RangeCollectorForPointTree} to keep track of the active ranges and - * determine which parts of the tree to visit. The {@link - * PointValues.IntersectVisitor} implementation is responsible for the actual visitation and - * document count collection. + *

The class uses a {@link RangeAwareIntersectVisitor} to keep track of the active ranges, traverse the tree, and + * consume documents. */ public final class TreeTraversal { - private TreeTraversal() {} - private static final Logger logger = LogManager.getLogger(loggerName); /** - * Traverses the given {@link PointValues.PointTree} and collects document counts for the intersecting ranges. - * - * @param tree the point tree to traverse - * @param ranges the set of ranges to intersect with - * @param collectRangeIDs a callback to collect the doc ids for a range bucket - * @param maxNumNonZeroRanges the maximum number of non-zero ranges to collect + * Traverse the RangeAwareIntersectVisitor PointTree. + * Collects and returns DebugInfo from traversal + * @param visitor the maximum number of non-zero ranges to collect * @return a {@link OptimizationContext.DebugInfo} object containing debug information about the traversal */ - public static OptimizationContext.DebugInfo multiRangesTraverse( - final PointValues.PointTree tree, - final Ranges ranges, - final BiConsumer> collectRangeIDs, - final int maxNumNonZeroRanges - ) throws IOException { + public static OptimizationContext.DebugInfo multiRangesTraverse(RangeAwareIntersectVisitor visitor) throws IOException { OptimizationContext.DebugInfo debugInfo = new OptimizationContext.DebugInfo(); - int activeIndex = ranges.firstRangeIndex(tree.getMinPackedValue(), tree.getMaxPackedValue()); - if (activeIndex < 0) { + + if (visitor.getActiveIndex() < 0) { logger.debug("No ranges match the query, skip the fast filter optimization"); return debugInfo; } - RangeCollectorForPointTree collector = new RangeCollectorForPointTree(collectRangeIDs, maxNumNonZeroRanges, ranges, activeIndex); - PointValues.IntersectVisitor visitor = getIntersectVisitor(collector); + try { - intersectWithRanges(visitor, tree, debugInfo); + visitor.traverse(debugInfo); } catch (CollectionTerminatedException e) { logger.debug("Early terminate since no more range to collect"); } - collector.finalizePreviousRange(); return debugInfo; } - private static void intersectWithRanges( - PointValues.IntersectVisitor visitor, - PointValues.PointTree pointTree, - OptimizationContext.DebugInfo debug - ) throws IOException { - PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); - - switch (r) { - case CELL_INSIDE_QUERY: - pointTree.visitDocIDs(visitor); - debug.visitInner(); - break; - case CELL_CROSSES_QUERY: - if (pointTree.moveToChild()) { - do { - intersectWithRanges(visitor, pointTree, debug); - } while (pointTree.moveToSibling()); - pointTree.moveToParent(); - } else { - pointTree.visitDocValues(visitor); - debug.visitLeaf(); - } - break; - case CELL_OUTSIDE_QUERY: + /** + * This IntersectVisitor contains a packed value representation of Ranges + * as well as the current activeIndex being considered for collection. + */ + public static abstract class RangeAwareIntersectVisitor implements PointValues.IntersectVisitor { + private final PointValues.PointTree pointTree; + private final Ranges ranges; + private final int maxNumNonZeroRange; + protected int visitedRange = 0; + protected int activeIndex; + + public RangeAwareIntersectVisitor( + PointValues.PointTree pointTree, + Ranges ranges, + int maxNumNonZeroRange + ) { + this.ranges = ranges; + this.pointTree = pointTree; + this.maxNumNonZeroRange = maxNumNonZeroRange; + this.activeIndex = ranges.firstRangeIndex(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); } - } - private static PointValues.IntersectVisitor getIntersectVisitor(RangeCollectorForPointTree collector) { - return new PointValues.IntersectVisitor() { - @Override - public void visit(int docID) { - collector.count(docID); + public int getActiveIndex() { + return activeIndex; + } + + public abstract void visit(int docID); + public abstract void visit(int docID, byte[] packedValue); + public abstract void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException; + protected abstract void consumeContainedNode(PointValues.PointTree pointTree) throws IOException; + protected abstract void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException; + + public void traverse(OptimizationContext.DebugInfo debug) throws IOException { + PointValues.Relation r = compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + switch (r) { + case CELL_INSIDE_QUERY: + consumeContainedNode(pointTree); + debug.visitInner(); + break; + case CELL_CROSSES_QUERY: + if (pointTree.moveToChild()) { + do { + traverse(debug); + } while (pointTree.moveToSibling()); + pointTree.moveToParent(); + } else { + consumeCrossedNode(pointTree); + debug.visitLeaf(); + } + break; + case CELL_OUTSIDE_QUERY: } + } - @Override - public void visit(int docID, byte[] packedValue) throws IOException { - if (canCollect(packedValue)) { - collector.count(docID); - } + /** + * increment activeIndex until we run out of ranges or find a valid range that contains maxPackedValue + * else throw CollectionTerminatedException if we run out of ranges to check + * @param minPackedValue lower bound of PointValues.PointTree node + * @param maxPackedValue upper bound of PointValues.PointTree node + * @return the min/max values of the PointValues.PointTree node can be one of: + * 1.) Completely outside the activeIndex range + * 2.) Completely inside the activeIndex range + * 3.) Overlapping with the activeIndex range + */ + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + // try to find the first range that may collect values from this cell + if (!ranges.withinUpperBound(minPackedValue, activeIndex) && iterateRangeEnd(minPackedValue)) { + throw new CollectionTerminatedException(); } - @Override - public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { - if (canCollect(packedValue)) { - for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { - collector.count(iterator.docID()); - } - } + // DOES THIS CONDITION EVER RUN? + + // after the loop, min < upper + // cell could be outside [min max] lower + if (!ranges.withinLowerBound(maxPackedValue, activeIndex) && iterateRangeEnd(maxPackedValue)) { + return PointValues.Relation.CELL_OUTSIDE_QUERY; } - private boolean canCollect(byte[] packedValue) { - if (!collector.withinUpperBound(packedValue)) { - collector.finalizePreviousRange(); - if (collector.iterateRangeEnd(packedValue)) { - throw new CollectionTerminatedException(); - } - } + if (ranges.withinRange(minPackedValue, activeIndex) && ranges.withinRange(maxPackedValue, activeIndex)) { + return PointValues.Relation.CELL_INSIDE_QUERY; + } + return PointValues.Relation.CELL_CROSSES_QUERY; + } - return collector.withinRange(packedValue); + /** + * throws CollectionTerminatedException if we have reached our last range, and it does not contain packedValue + * @param packedValue determine if packedValue falls within the range at activeIndex + * @return true when packedValue falls within the activeIndex range + */ + protected boolean canCollect(byte[] packedValue) { + if (!ranges.withinUpperBound(packedValue, activeIndex) && iterateRangeEnd(packedValue)) { + throw new CollectionTerminatedException(); } + return ranges.withinRange(packedValue, activeIndex); + } - @Override - public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - // try to find the first range that may collect values from this cell - if (!collector.withinUpperBound(minPackedValue)) { - collector.finalizePreviousRange(); - if (collector.iterateRangeEnd(minPackedValue)) { - throw new CollectionTerminatedException(); - } - } - // after the loop, min < upper - // cell could be outside [min max] lower - if (!collector.withinLowerBound(maxPackedValue)) { - return PointValues.Relation.CELL_OUTSIDE_QUERY; - } - if (collector.withinRange(minPackedValue) && collector.withinRange(maxPackedValue)) { - return PointValues.Relation.CELL_INSIDE_QUERY; + /** + * @param packedValue increment active index until we reach a range containing value + * @return true when we've exhausted all available ranges or visited maxNumNonZeroRange and can stop early + */ + protected boolean iterateRangeEnd(byte[] packedValue) { + // the new value may not be contiguous to the previous one + // so try to find the first next range that cross the new value + while (!ranges.withinUpperBound(packedValue, activeIndex)) { + if (++activeIndex >= ranges.size) { + return true; } - return PointValues.Relation.CELL_CROSSES_QUERY; } - }; + visitedRange++; + return visitedRange > maxNumNonZeroRange; + } } - private static class RangeCollectorForPointTree { - private final BiConsumer> collectRangeIDs; - private final List DIDList = new ArrayList<>(); - private final Ranges ranges; - private int activeIndex; - private int visitedRange = 0; - private final int maxNumNonZeroRange; + /** + * Traverse PointTree with countDocs callback where countDock inputs are + * 1.) activeIndex for range in which document(s) reside + * 2.) total documents counted + */ + private static abstract class DocCountRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + BiConsumer countDocs; - public RangeCollectorForPointTree( - BiConsumer> collectRangeIDs, - int maxNumNonZeroRange, + public DocCountRangeAwareIntersectVisitor( + PointValues.PointTree pointTree, Ranges ranges, - int activeIndex + int maxNumNonZeroRange, + BiConsumer countDocs ) { - this.collectRangeIDs = collectRangeIDs; - this.maxNumNonZeroRange = maxNumNonZeroRange; - this.ranges = ranges; - this.activeIndex = activeIndex; + super(pointTree, ranges, maxNumNonZeroRange); + this.countDocs = countDocs; } - private void count(int docID) { - DIDList.add(docID); + @Override + public void visit(int docID) { + countDocs.accept(activeIndex, 1); } - private void countNode(List docIDs) { - DIDList.addAll(docIDs); + @Override + public void visit(int docID, byte[] packedValue) { + if (canCollect(packedValue)) { + countDocs.accept(activeIndex, 1); + } } - private void finalizePreviousRange() { - if (!DIDList.isEmpty()) { - collectRangeIDs.accept(activeIndex, DIDList); - DIDList.clear(); + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (canCollect(packedValue)) { + for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { + countDocs.accept(activeIndex, 1); + } } } - /** - * @return true when iterator exhausted or collect enough non-zero ranges - */ - private boolean iterateRangeEnd(byte[] value) { - // the new value may not be contiguous to the previous one - // so try to find the first next range that cross the new value - while (!withinUpperBound(value)) { - if (++activeIndex >= ranges.size) { - return true; - } + protected void consumeContainedNode(PointValues.PointTree pointTree) throws IOException { + countDocs.accept(activeIndex, (int) pointTree.size()); + } + + protected void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocValues(this); + } + } + + /** + * Traverse PointTree with collectDocs callback where collectDocs inputs are + * 1.) activeIndex for range in which document(s) reside + * 2.) document id to collect + */ + private static abstract class DocCollectRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + BiConsumer collectDocs; + + public DocCollectRangeAwareIntersectVisitor( + PointValues.PointTree pointTree, + Ranges ranges, + int maxNumNonZeroRange, + BiConsumer collectDocs + ) { + super(pointTree, ranges, maxNumNonZeroRange); + this.collectDocs = collectDocs; + } + + @Override + public void visit(int docID) { + collectDocs.accept(activeIndex, docID); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (canCollect(packedValue)) { + collectDocs.accept(activeIndex, docID); } - visitedRange++; - return visitedRange > maxNumNonZeroRange; } - private boolean withinLowerBound(byte[] value) { - return Ranges.withinLowerBound(value, ranges.lowers[activeIndex]); + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (canCollect(packedValue)) { + for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { + collectDocs.accept(activeIndex, iterator.docID()); + } + } } - private boolean withinUpperBound(byte[] value) { - return Ranges.withinUpperBound(value, ranges.uppers[activeIndex]); + protected void consumeContainedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocIDs(this); } - private boolean withinRange(byte[] value) { - return withinLowerBound(value) && withinUpperBound(value); + protected void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocValues(this); } } } From 7b1f8e4a820e5ed35afcc6ae3d3738a854bc170e Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 1 Aug 2024 10:56:21 -0700 Subject: [PATCH 16/29] Expose debug info Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/OptimizationContext.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java index 92485911d3daf..896867ebafd53 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java @@ -159,7 +159,7 @@ private Ranges tryBuildRangesFromSegment(LeafReaderContext leafCtx, boolean segm /** * Contains debug info of BKD traversal to show in profile */ - static class DebugInfo { + public static class DebugInfo { private int leaf = 0; // leaf node visited private int inner = 0; // inner node visited From 9ab1447c1c33565406c9dfa206a26626f8bcac46 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 1 Aug 2024 15:09:01 -0700 Subject: [PATCH 17/29] Modify TreeTraversal usage to match new format Signed-off-by: Finn Carroll --- .../BKDTreeMultiRangesTraverseBenchmark.java | 12 +++--- .../CompositeAggregatorBridge.java | 32 ++++++++++++---- .../DateHistogramAggregatorBridge.java | 38 +++++++++++-------- .../filterrewrite/RangeAggregatorBridge.java | 29 +++++++------- .../filterrewrite/TreeTraversal.java | 4 +- 5 files changed, 70 insertions(+), 45 deletions(-) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java index 134dcdcb7266f..0af2a3eb98adb 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java @@ -54,6 +54,7 @@ import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.NumericPointEncoder; import org.opensearch.search.optimization.filterrewrite.Ranges; +import org.opensearch.search.optimization.filterrewrite.TreeTraversal; import java.util.*; import java.util.concurrent.TimeUnit; @@ -141,16 +142,15 @@ public void tearDown() throws IOException { public Map> multiRangeTraverseTree(treeState state) throws Exception { Map> mockIDCollect = new HashMap<>(); - BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { + TreeTraversal.RangeAwareIntersectVisitor treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(state.pointTree, state.ranges, state.maxNumNonZeroRanges, (activeIndex, docID) -> { if (mockIDCollect.containsKey(activeIndex)) { - mockIDCollect.get(activeIndex).addAll(docIDs); + mockIDCollect.get(activeIndex).add(docID); } else { - mockIDCollect.put(activeIndex, docIDs); + mockIDCollect.put(activeIndex, List.of(docID)); } - }; - - multiRangesTraverse(state.pointTree, state.ranges, collectRangeIDs, state.maxNumNonZeroRanges); + }); + multiRangesTraverse(treeVisitor); return mockIDCollect; } } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java index 9cd6c35ad9541..86c695ea4dcdc 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java @@ -46,13 +46,29 @@ private boolean canOptimize(boolean missing, boolean hasScript, MappedFieldType @Override public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { DateFieldMapper.DateFieldType fieldType = getFieldType(); - BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(ord, (long) docIDs.size()); - }; - - optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, getSize())); + TreeTraversal.RangeAwareIntersectVisitor treeVisitor; + if (sub != null) { + treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docID) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, activeIndex); + } catch ( IOException ioe) { + throw new RuntimeException(ioe); + } + }); + } else { + treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docCount) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + incrementDocCount.accept(ord, (long) docCount); + }); + } + + optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); } } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index bff63137f8575..ecba0ee0c3566 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -126,25 +126,31 @@ protected int getSize() { @Override public void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { - int size = getSize(); - DateFieldMapper.DateFieldType fieldType = getFieldType(); - BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(ord, (long) docIDs.size()); - - try { - for (int docID : docIDs) { - sub.collect(docID, ord); + TreeTraversal.RangeAwareIntersectVisitor treeVisitor; + if (sub != null) { + treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docID) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, activeIndex); + } catch ( IOException ioe) { + throw new RuntimeException(ioe); } - } catch ( IOException ioe) { - throw new RuntimeException(ioe); - } - }; + }); + } else { + treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docCount) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + incrementDocCount.accept(ord, (long) docCount); + }); + } - optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, size)); + optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); } protected static long getBucketOrd(long bucketOrd) { diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index 3bda6c349de86..a6b456d6971bd 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.function.BiConsumer; import java.util.function.Function; -import java.util.List; import static org.opensearch.search.optimization.filterrewrite.TreeTraversal.multiRangesTraverse; @@ -77,22 +76,26 @@ public void prepareFromSegment(LeafReaderContext leaf) { @Override public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { + TreeTraversal.RangeAwareIntersectVisitor treeVisitor; + if (sub != null) { + treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), Integer.MAX_VALUE, (activeIndex, docID) -> { + long ord = bucketOrdProducer().apply(activeIndex); - - BiConsumer> collectRangeIDs = (activeIndex, docIDs) -> { - long ord = bucketOrdProducer().apply(activeIndex); - incrementDocCount.accept(ord, (long) docIDs.size()); - - try { - for (int docID : docIDs) { + try { + incrementDocCount.accept(ord, (long) 1); sub.collect(docID, activeIndex); + } catch ( IOException ioe) { + throw new RuntimeException(ioe); } - } catch ( IOException ioe) { - throw new RuntimeException(ioe); - } - }; + }); + } else { + treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), Integer.MAX_VALUE, (activeIndex, docCount) -> { + long ord = bucketOrdProducer().apply(activeIndex); + incrementDocCount.accept(ord, (long) docCount); + }); + } - optimizationContext.consumeDebugInfo(multiRangesTraverse(values.getPointTree(), optimizationContext.getRanges(), collectRangeIDs, Integer.MAX_VALUE)); + optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); } /** diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 1e6418e081186..8ff3821f45dc8 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -173,7 +173,7 @@ protected boolean iterateRangeEnd(byte[] packedValue) { * 1.) activeIndex for range in which document(s) reside * 2.) total documents counted */ - private static abstract class DocCountRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + public static class DocCountRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { BiConsumer countDocs; public DocCountRangeAwareIntersectVisitor( @@ -220,7 +220,7 @@ protected void consumeCrossedNode(PointValues.PointTree pointTree) throws IOExce * 1.) activeIndex for range in which document(s) reside * 2.) document id to collect */ - private static abstract class DocCollectRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + public static class DocCollectRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { BiConsumer collectDocs; public DocCollectRangeAwareIntersectVisitor( From 5c713c6ec64aeab1173e74ea8d3e8cfed43f1855 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 1 Aug 2024 15:54:59 -0700 Subject: [PATCH 18/29] In sub agg path collect by converted ordinal, not active index Signed-off-by: Finn Carroll --- .../optimization/filterrewrite/CompositeAggregatorBridge.java | 2 +- .../filterrewrite/DateHistogramAggregatorBridge.java | 2 +- .../optimization/filterrewrite/RangeAggregatorBridge.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java index 86c695ea4dcdc..74fe30720b85d 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java @@ -55,7 +55,7 @@ public final void tryOptimize(PointValues values, BiConsumer increme try { incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, activeIndex); + sub.collect(docID, ord); } catch ( IOException ioe) { throw new RuntimeException(ioe); } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index ecba0ee0c3566..06d0e251a4105 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -136,7 +136,7 @@ public void tryOptimize(PointValues values, BiConsumer incrementDocC try { incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, activeIndex); + sub.collect(docID, ord); } catch ( IOException ioe) { throw new RuntimeException(ioe); } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index a6b456d6971bd..0c276891f345d 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -83,7 +83,7 @@ public final void tryOptimize(PointValues values, BiConsumer increme try { incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, activeIndex); + sub.collect(docID, ord); } catch ( IOException ioe) { throw new RuntimeException(ioe); } From 554c8ef3312fc3fa18bc927ffba8fbb43d488301 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 2 Aug 2024 14:59:20 -0700 Subject: [PATCH 19/29] Remove wild card import Signed-off-by: Finn Carroll --- .../BKDTreeMultiRangesTraverseBenchmark.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java index 0af2a3eb98adb..cefc388059ef0 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java @@ -31,7 +31,18 @@ package org.opensearch.benchmark.search.aggregations; -import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; import org.apache.lucene.index.PointValues; import org.apache.lucene.document.Field; @@ -47,6 +58,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashMap; import java.util.List; import org.opensearch.common.logging.LogConfigurator; @@ -56,7 +68,8 @@ import org.opensearch.search.optimization.filterrewrite.Ranges; import org.opensearch.search.optimization.filterrewrite.TreeTraversal; -import java.util.*; +import java.util.Map; +import java.util.Random; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; From d7772ac766bb07f5f7486bb9deffe2ad2a929a54 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 2 Aug 2024 15:00:45 -0700 Subject: [PATCH 20/29] Spotless apply Signed-off-by: Finn Carroll --- .../BKDTreeMultiRangesTraverseBenchmark.java | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java index cefc388059ef0..0ef0a261cecd0 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java @@ -31,6 +31,22 @@ package org.opensearch.benchmark.search.aggregations; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.opensearch.common.logging.LogConfigurator; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.NumericPointEncoder; +import org.opensearch.search.optimization.filterrewrite.Ranges; +import org.opensearch.search.optimization.filterrewrite.TreeTraversal; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -40,34 +56,15 @@ import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.TearDown; - -import org.apache.lucene.index.PointValues; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.IntField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; +import org.openjdk.jmh.annotations.Warmup; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.List; - -import org.opensearch.common.logging.LogConfigurator; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; -import org.opensearch.index.mapper.NumericPointEncoder; -import org.opensearch.search.optimization.filterrewrite.Ranges; -import org.opensearch.search.optimization.filterrewrite.TreeTraversal; - import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -131,7 +128,7 @@ public void setup() throws IOException { MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType("val", NumberFieldMapper.NumberType.INTEGER); NumericPointEncoder numericPointEncoder = (NumericPointEncoder) fieldType; - int bucketWidth = valMax/buckets; + int bucketWidth = valMax / buckets; byte[][] lowers = new byte[buckets][]; byte[][] uppers = new byte[buckets][]; for (int i = 0; i < buckets; i++) { @@ -155,13 +152,18 @@ public void tearDown() throws IOException { public Map> multiRangeTraverseTree(treeState state) throws Exception { Map> mockIDCollect = new HashMap<>(); - TreeTraversal.RangeAwareIntersectVisitor treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(state.pointTree, state.ranges, state.maxNumNonZeroRanges, (activeIndex, docID) -> { - if (mockIDCollect.containsKey(activeIndex)) { - mockIDCollect.get(activeIndex).add(docID); - } else { - mockIDCollect.put(activeIndex, List.of(docID)); + TreeTraversal.RangeAwareIntersectVisitor treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( + state.pointTree, + state.ranges, + state.maxNumNonZeroRanges, + (activeIndex, docID) -> { + if (mockIDCollect.containsKey(activeIndex)) { + mockIDCollect.get(activeIndex).add(docID); + } else { + mockIDCollect.put(activeIndex, List.of(docID)); + } } - }); + ); multiRangesTraverse(treeVisitor); return mockIDCollect; From 50b20a8bab25ca51e772be65e70f353c79e80741 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 2 Aug 2024 15:04:11 -0700 Subject: [PATCH 21/29] Spotless apply Signed-off-by: Finn Carroll --- .../filterrewrite/AggregatorBridge.java | 3 +- .../CompositeAggregatorBridge.java | 46 +++++++++++------- .../DateHistogramAggregatorBridge.java | 48 +++++++++++-------- .../filterrewrite/OptimizationContext.java | 8 +++- .../filterrewrite/RangeAggregatorBridge.java | 39 +++++++++------ .../optimization/filterrewrite/Ranges.java | 29 ++++++++--- .../filterrewrite/TreeTraversal.java | 10 ++-- 7 files changed, 118 insertions(+), 65 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java index 17b8c9db9a782..f57dab3e5313d 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java @@ -73,5 +73,6 @@ void setOptimizationContext(OptimizationContext context) { * @param values the point values (index structure for numeric values) for a segment * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket */ - public abstract void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException; + public abstract void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) + throws IOException; } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java index 74fe30720b85d..b84897f793ea4 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java @@ -17,7 +17,6 @@ import org.opensearch.search.aggregations.bucket.composite.RoundingValuesSource; import java.io.IOException; -import java.util.List; import java.util.function.BiConsumer; import static org.opensearch.search.optimization.filterrewrite.TreeTraversal.multiRangesTraverse; @@ -44,29 +43,40 @@ private boolean canOptimize(boolean missing, boolean hasScript, MappedFieldType } @Override - public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { + public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) + throws IOException { DateFieldMapper.DateFieldType fieldType = getFieldType(); TreeTraversal.RangeAwareIntersectVisitor treeVisitor; if (sub != null) { - treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docID) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + getSize(), + (activeIndex, docID) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - try { - incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, ord); - } catch ( IOException ioe) { - throw new RuntimeException(ioe); + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, ord); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } } - }); + ); } else { - treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docCount) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(ord, (long) docCount); - }); + treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + getSize(), + (activeIndex, docCount) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + incrementDocCount.accept(ord, (long) docCount); + } + ); } optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index 06d0e251a4105..164e6a2c9aefd 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -22,7 +22,6 @@ import org.opensearch.search.internal.SearchContext; import java.io.IOException; -import java.util.List; import java.util.OptionalLong; import java.util.function.BiConsumer; import java.util.function.Function; @@ -125,29 +124,40 @@ protected int getSize() { } @Override - public void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { + public void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) + throws IOException { DateFieldMapper.DateFieldType fieldType = getFieldType(); TreeTraversal.RangeAwareIntersectVisitor treeVisitor; if (sub != null) { - treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docID) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - - try { - incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, ord); - } catch ( IOException ioe) { - throw new RuntimeException(ioe); + treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + getSize(), + (activeIndex, docID) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, ord); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } } - }); + ); } else { - treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), getSize(), (activeIndex, docCount) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(ord, (long) docCount); - }); + treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + getSize(), + (activeIndex, docCount) -> { + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + incrementDocCount.accept(ord, (long) docCount); + } + ); } optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java index 896867ebafd53..98c42f3aea049 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java @@ -102,8 +102,12 @@ Ranges getRanges() { * @param incrementDocCount consume the doc_count results for certain ordinal * @param segmentMatchAll if your optimization can prepareFromSegment, you should pass in this flag to decide whether to prepareFromSegment */ - public boolean tryOptimize(final LeafReaderContext leafCtx, LeafBucketCollector sub, final BiConsumer incrementDocCount, boolean segmentMatchAll) - throws IOException { + public boolean tryOptimize( + final LeafReaderContext leafCtx, + LeafBucketCollector sub, + final BiConsumer incrementDocCount, + boolean segmentMatchAll + ) throws IOException { segments++; if (!canOptimize) { return false; diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index 0c276891f345d..6fda3b4ea6951 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -75,27 +75,38 @@ public void prepareFromSegment(LeafReaderContext leaf) { } @Override - public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { + public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) + throws IOException { TreeTraversal.RangeAwareIntersectVisitor treeVisitor; if (sub != null) { - treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), Integer.MAX_VALUE, (activeIndex, docID) -> { - long ord = bucketOrdProducer().apply(activeIndex); + treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + Integer.MAX_VALUE, + (activeIndex, docID) -> { + long ord = bucketOrdProducer().apply(activeIndex); - try { - incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, ord); - } catch ( IOException ioe) { - throw new RuntimeException(ioe); + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, ord); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } } - }); + ); } else { - treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor(values.getPointTree(), optimizationContext.getRanges(), Integer.MAX_VALUE, (activeIndex, docCount) -> { - long ord = bucketOrdProducer().apply(activeIndex); - incrementDocCount.accept(ord, (long) docCount); - }); + treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + Integer.MAX_VALUE, + (activeIndex, docCount) -> { + long ord = bucketOrdProducer().apply(activeIndex); + incrementDocCount.accept(ord, (long) docCount); + } + ); } - optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); + optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); } /** diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java index 18c88a4af2e67..c9aaf0b39f9c9 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java @@ -29,12 +29,29 @@ public Ranges(byte[][] lowers, byte[][] uppers) { comparator = ArrayUtil.getUnsignedComparator(byteLen); } - public static int compareByteValue(byte[] value1, byte[] value2) { return comparator.compare(value1, 0, value2, 0); } - public static boolean withinLowerBound(byte[] value, byte[] lowerBound) { return compareByteValue(value, lowerBound) >= 0; } - public static boolean withinUpperBound(byte[] value, byte[] upperBound) { return compareByteValue(value, upperBound) < 0; } - public boolean withinLowerBound(byte[] value, int idx) { return Ranges.withinLowerBound(value, lowers[idx]); } - public boolean withinUpperBound(byte[] value, int idx) { return Ranges.withinUpperBound(value, uppers[idx]); } - public boolean withinRange(byte[] value, int idx) { return withinLowerBound(value, idx) && withinUpperBound(value, idx); } + public static int compareByteValue(byte[] value1, byte[] value2) { + return comparator.compare(value1, 0, value2, 0); + } + + public static boolean withinLowerBound(byte[] value, byte[] lowerBound) { + return compareByteValue(value, lowerBound) >= 0; + } + + public static boolean withinUpperBound(byte[] value, byte[] upperBound) { + return compareByteValue(value, upperBound) < 0; + } + + public boolean withinLowerBound(byte[] value, int idx) { + return Ranges.withinLowerBound(value, lowers[idx]); + } + + public boolean withinUpperBound(byte[] value, int idx) { + return Ranges.withinUpperBound(value, uppers[idx]); + } + + public boolean withinRange(byte[] value, int idx) { + return withinLowerBound(value, idx) && withinUpperBound(value, idx); + } public int firstRangeIndex(byte[] globalMin, byte[] globalMax) { if (compareByteValue(lowers[0], globalMax) > 0) { diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 8ff3821f45dc8..65a72bbbc89fd 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -65,11 +65,7 @@ public static abstract class RangeAwareIntersectVisitor implements PointValues.I protected int visitedRange = 0; protected int activeIndex; - public RangeAwareIntersectVisitor( - PointValues.PointTree pointTree, - Ranges ranges, - int maxNumNonZeroRange - ) { + public RangeAwareIntersectVisitor(PointValues.PointTree pointTree, Ranges ranges, int maxNumNonZeroRange) { this.ranges = ranges; this.pointTree = pointTree; this.maxNumNonZeroRange = maxNumNonZeroRange; @@ -81,9 +77,13 @@ public int getActiveIndex() { } public abstract void visit(int docID); + public abstract void visit(int docID, byte[] packedValue); + public abstract void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException; + protected abstract void consumeContainedNode(PointValues.PointTree pointTree) throws IOException; + protected abstract void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException; public void traverse(OptimizationContext.DebugInfo debug) throws IOException { From 71e189fd032a3610f65b31347ec1b1d308db8012 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 2 Aug 2024 15:05:43 -0700 Subject: [PATCH 22/29] Remove comment Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/TreeTraversal.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 65a72bbbc89fd..f6415a43e995b 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -124,9 +124,7 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue if (!ranges.withinUpperBound(minPackedValue, activeIndex) && iterateRangeEnd(minPackedValue)) { throw new CollectionTerminatedException(); } - - // DOES THIS CONDITION EVER RUN? - + // after the loop, min < upper // cell could be outside [min max] lower if (!ranges.withinLowerBound(maxPackedValue, activeIndex) && iterateRangeEnd(maxPackedValue)) { From 3709e57a9c6bacbeb4f02968b5fd73542ba778db Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 2 Aug 2024 15:07:58 -0700 Subject: [PATCH 23/29] Spotless apply Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/TreeTraversal.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index f6415a43e995b..3b837cf161c3c 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -124,7 +124,7 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue if (!ranges.withinUpperBound(minPackedValue, activeIndex) && iterateRangeEnd(minPackedValue)) { throw new CollectionTerminatedException(); } - + // after the loop, min < upper // cell could be outside [min max] lower if (!ranges.withinLowerBound(maxPackedValue, activeIndex) && iterateRangeEnd(maxPackedValue)) { From 10f77f49b7b96d3c846c872dd01c0f3a665a2e20 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 8 Aug 2024 14:05:35 -0700 Subject: [PATCH 24/29] Rename Ranges -> PackedValueRanges Signed-off-by: Finn Carroll --- .../BKDTreeMultiRangesTraverseBenchmark.java | 8 ++--- .../opensearch.release-notes-2.1.0.md | 2 +- .../opensearch.release-notes-2.14.0.md | 2 +- .../DateHistogramAggregatorBridge.java | 2 +- .../optimization/filterrewrite/Helper.java | 4 +-- .../filterrewrite/OptimizationContext.java | 36 +++++++++---------- .../{Ranges.java => PackedValueRanges.java} | 8 ++--- .../filterrewrite/RangeAggregatorBridge.java | 2 +- .../filterrewrite/TreeTraversal.java | 30 ++++++++-------- 9 files changed, 47 insertions(+), 47 deletions(-) rename server/src/main/java/org/opensearch/search/optimization/filterrewrite/{Ranges.java => PackedValueRanges.java} (88%) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java index 0ef0a261cecd0..c29808cb3ae4f 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java @@ -45,7 +45,7 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.NumericPointEncoder; -import org.opensearch.search.optimization.filterrewrite.Ranges; +import org.opensearch.search.optimization.filterrewrite.PackedValueRanges; import org.opensearch.search.optimization.filterrewrite.TreeTraversal; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -102,7 +102,7 @@ public static class treeState { // multiRangesTraverse params PointValues.PointTree pointTree; - Ranges ranges; + PackedValueRanges packedValueRanges; BiConsumer> collectRangeIDs; int maxNumNonZeroRanges = Integer.MAX_VALUE; @@ -136,7 +136,7 @@ public void setup() throws IOException { uppers[i] = numericPointEncoder.encodePoint(i * bucketWidth); } - ranges = new Ranges(lowers, uppers); + packedValueRanges = new PackedValueRanges(lowers, uppers); } @TearDown @@ -154,7 +154,7 @@ public Map> multiRangeTraverseTree(treeState state) throw TreeTraversal.RangeAwareIntersectVisitor treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( state.pointTree, - state.ranges, + state.packedValueRanges, state.maxNumNonZeroRanges, (activeIndex, docID) -> { if (mockIDCollect.containsKey(activeIndex)) { diff --git a/release-notes/opensearch.release-notes-2.1.0.md b/release-notes/opensearch.release-notes-2.1.0.md index b32864990b82e..e55aab4810c2c 100644 --- a/release-notes/opensearch.release-notes-2.1.0.md +++ b/release-notes/opensearch.release-notes-2.1.0.md @@ -61,7 +61,7 @@ * Update github action gradle-check to use pull_request_target for accessing token (#3728) ([#3731](https://github.com/opensearch-project/opensearch/pull/3731)) * Add gradle check test for github workflows (#3717) ([#3723](https://github.com/opensearch-project/opensearch/pull/3723)) * Used set to make shell scripts more strict (#3278) ([#3344](https://github.com/opensearch-project/opensearch/pull/3344)) -* Bootstrap should implement a denylist of Java versions (ranges) (#3164) ([#3292](https://github.com/opensearch-project/opensearch/pull/3292)) +* Bootstrap should implement a denylist of Java versions (packedValueRanges) (#3164) ([#3292](https://github.com/opensearch-project/opensearch/pull/3292)) * Add Github Workflow to build and publish lucene snapshots. (#2906) ([#3038](https://github.com/opensearch-project/opensearch/pull/3038)) * Remove JavaVersion in favour of standard Runtime.Version (java-version-checker) (#3027) ([#3034](https://github.com/opensearch-project/opensearch/pull/3034)) * Remove JavaVersion, use builtin Runtime.Version to deal with runtime versions (#3006) ([#3013](https://github.com/opensearch-project/opensearch/pull/3013)) diff --git a/release-notes/opensearch.release-notes-2.14.0.md b/release-notes/opensearch.release-notes-2.14.0.md index c5fc3e895c45d..c55e0a6c27196 100644 --- a/release-notes/opensearch.release-notes-2.14.0.md +++ b/release-notes/opensearch.release-notes-2.14.0.md @@ -34,7 +34,7 @@ - [Search Pipeline] Handle default pipeline for multiple indices ([#13276](https://github.com/opensearch-project/OpenSearch/pull/13276)) - [Batch Ingestion] Add `batch_size` to `_bulk` API. ([#12457](https://github.com/opensearch-project/OpenSearch/issues/12457)) - [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992)) -- Support multi ranges traversal when doing date histogram rewrite optimization. ([#13317](https://github.com/opensearch-project/OpenSearch/pull/13317)) +- Support multi packedValueRanges traversal when doing date histogram rewrite optimization. ([#13317](https://github.com/opensearch-project/OpenSearch/pull/13317)) ### Dependencies - Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896)) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index 164e6a2c9aefd..b73e3f5442117 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -57,7 +57,7 @@ public void prepareFromSegment(LeafReaderContext leaf) throws IOException { optimizationContext.setRangesFromSegment(buildRanges(bounds)); } - private Ranges buildRanges(long[] bounds) { + private PackedValueRanges buildRanges(long[] bounds) { bounds = processHardBounds(bounds); if (bounds == null) { return null; diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Helper.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Helper.java index eb57cd90b9ad9..a79a7d5d0b1a0 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Helper.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Helper.java @@ -155,7 +155,7 @@ private static long[] getBoundsWithRangeQuery(PointRangeQuery prq, String fieldN * Creates the date ranges from date histo aggregations using its interval, * and min/max boundaries */ - static Ranges createRangesFromAgg( + static PackedValueRanges createRangesFromAgg( final DateFieldMapper.DateFieldType fieldType, final long interval, final Rounding.Prepared preparedRounding, @@ -208,6 +208,6 @@ static Ranges createRangesFromAgg( uppers[i] = max; } - return new Ranges(lowers, uppers); + return new PackedValueRanges(lowers, uppers); } } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java index 98c42f3aea049..8611b0b1fd201 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java @@ -42,8 +42,8 @@ public final class OptimizationContext { int maxAggRewriteFilters; String shardId; - private Ranges ranges; - private Ranges rangesFromSegment; + private PackedValueRanges packedValueRanges; + private PackedValueRanges packedValueRangesFromSegment; // debug info related fields private int leaf; @@ -70,9 +70,9 @@ public boolean canOptimize(final Object parent, SearchContext context) { } public void prepare() throws IOException { - assert ranges == null : "Ranges should only be built once at shard level, but they are already built"; + assert packedValueRanges == null : "Ranges should only be built once at shard level, but they are already built"; aggregatorBridge.prepare(); - if (ranges != null) { + if (packedValueRanges != null) { preparedAtShardLevel = true; } } @@ -81,17 +81,17 @@ public void prepareFromSegment(LeafReaderContext leaf) throws IOException { aggregatorBridge.prepareFromSegment(leaf); } - void setRanges(Ranges ranges) { - this.ranges = ranges; + void setRanges(PackedValueRanges packedValueRanges) { + this.packedValueRanges = packedValueRanges; } - void setRangesFromSegment(Ranges ranges) { - this.rangesFromSegment = ranges; + void setRangesFromSegment(PackedValueRanges packedValueRanges) { + this.packedValueRangesFromSegment = packedValueRanges; } - Ranges getRanges() { - if (rangesFromSegment != null) return rangesFromSegment; - return ranges; + PackedValueRanges getRanges() { + if (packedValueRangesFromSegment != null) return packedValueRangesFromSegment; + return packedValueRanges; } /** @@ -130,8 +130,8 @@ public boolean tryOptimize( return false; } - Ranges ranges = tryBuildRangesFromSegment(leafCtx, segmentMatchAll); - if (ranges == null) return false; + PackedValueRanges packedValueRanges = tryBuildRangesFromSegment(leafCtx, segmentMatchAll); + if (packedValueRanges == null) return false; aggregatorBridge.tryOptimize(values, incrementDocCount, sub); @@ -139,7 +139,7 @@ public boolean tryOptimize( logger.debug("Fast filter optimization applied to shard {} segment {}", shardId, leafCtx.ord); logger.debug("crossed leaf nodes: {}, inner nodes: {}", leaf, inner); - rangesFromSegment = null; + packedValueRangesFromSegment = null; return true; } @@ -147,17 +147,17 @@ public boolean tryOptimize( * Even when ranges cannot be built at shard level, we can still build ranges * at segment level when it's functionally match-all at segment level */ - private Ranges tryBuildRangesFromSegment(LeafReaderContext leafCtx, boolean segmentMatchAll) throws IOException { + private PackedValueRanges tryBuildRangesFromSegment(LeafReaderContext leafCtx, boolean segmentMatchAll) throws IOException { if (!preparedAtShardLevel && !segmentMatchAll) { return null; } - if (ranges == null) { // not built at shard level but segment match all + if (packedValueRanges == null) { // not built at shard level but segment match all logger.debug("Shard {} segment {} functionally match all documents. Build the fast filter", shardId, leafCtx.ord); prepareFromSegment(leafCtx); - return rangesFromSegment; + return packedValueRangesFromSegment; } - return ranges; + return packedValueRanges; } /** diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java similarity index 88% rename from server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java rename to server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java index c9aaf0b39f9c9..4e839fe20e5bd 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/Ranges.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java @@ -13,14 +13,14 @@ /** * Internal ranges representation for the filter rewrite optimization */ -public final class Ranges { +public final class PackedValueRanges { static ArrayUtil.ByteArrayComparator comparator; byte[][] lowers; // inclusive byte[][] uppers; // exclusive int size; int byteLen; - public Ranges(byte[][] lowers, byte[][] uppers) { + public PackedValueRanges(byte[][] lowers, byte[][] uppers) { this.lowers = lowers; this.uppers = uppers; assert lowers.length == uppers.length; @@ -42,11 +42,11 @@ public static boolean withinUpperBound(byte[] value, byte[] upperBound) { } public boolean withinLowerBound(byte[] value, int idx) { - return Ranges.withinLowerBound(value, lowers[idx]); + return PackedValueRanges.withinLowerBound(value, lowers[idx]); } public boolean withinUpperBound(byte[] value, int idx) { - return Ranges.withinUpperBound(value, uppers[idx]); + return PackedValueRanges.withinUpperBound(value, uppers[idx]); } public boolean withinRange(byte[] value, int idx) { diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index 6fda3b4ea6951..016711684f85f 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -66,7 +66,7 @@ protected void buildRanges(RangeAggregator.Range[] ranges) { uppers[i] = upper; } - optimizationContext.setRanges(new Ranges(lowers, uppers)); + optimizationContext.setRanges(new PackedValueRanges(lowers, uppers)); } @Override diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 3b837cf161c3c..3924724566e4c 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -60,16 +60,16 @@ public static OptimizationContext.DebugInfo multiRangesTraverse(RangeAwareInters */ public static abstract class RangeAwareIntersectVisitor implements PointValues.IntersectVisitor { private final PointValues.PointTree pointTree; - private final Ranges ranges; + private final PackedValueRanges packedValueRanges; private final int maxNumNonZeroRange; protected int visitedRange = 0; protected int activeIndex; - public RangeAwareIntersectVisitor(PointValues.PointTree pointTree, Ranges ranges, int maxNumNonZeroRange) { - this.ranges = ranges; + public RangeAwareIntersectVisitor(PointValues.PointTree pointTree, PackedValueRanges packedValueRanges, int maxNumNonZeroRange) { + this.packedValueRanges = packedValueRanges; this.pointTree = pointTree; this.maxNumNonZeroRange = maxNumNonZeroRange; - this.activeIndex = ranges.firstRangeIndex(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + this.activeIndex = packedValueRanges.firstRangeIndex(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); } public int getActiveIndex() { @@ -121,17 +121,17 @@ public void traverse(OptimizationContext.DebugInfo debug) throws IOException { @Override public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { // try to find the first range that may collect values from this cell - if (!ranges.withinUpperBound(minPackedValue, activeIndex) && iterateRangeEnd(minPackedValue)) { + if (!packedValueRanges.withinUpperBound(minPackedValue, activeIndex) && iterateRangeEnd(minPackedValue)) { throw new CollectionTerminatedException(); } // after the loop, min < upper // cell could be outside [min max] lower - if (!ranges.withinLowerBound(maxPackedValue, activeIndex) && iterateRangeEnd(maxPackedValue)) { + if (!packedValueRanges.withinLowerBound(maxPackedValue, activeIndex) && iterateRangeEnd(maxPackedValue)) { return PointValues.Relation.CELL_OUTSIDE_QUERY; } - if (ranges.withinRange(minPackedValue, activeIndex) && ranges.withinRange(maxPackedValue, activeIndex)) { + if (packedValueRanges.withinRange(minPackedValue, activeIndex) && packedValueRanges.withinRange(maxPackedValue, activeIndex)) { return PointValues.Relation.CELL_INSIDE_QUERY; } return PointValues.Relation.CELL_CROSSES_QUERY; @@ -143,10 +143,10 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue * @return true when packedValue falls within the activeIndex range */ protected boolean canCollect(byte[] packedValue) { - if (!ranges.withinUpperBound(packedValue, activeIndex) && iterateRangeEnd(packedValue)) { + if (!packedValueRanges.withinUpperBound(packedValue, activeIndex) && iterateRangeEnd(packedValue)) { throw new CollectionTerminatedException(); } - return ranges.withinRange(packedValue, activeIndex); + return packedValueRanges.withinRange(packedValue, activeIndex); } /** @@ -156,8 +156,8 @@ protected boolean canCollect(byte[] packedValue) { protected boolean iterateRangeEnd(byte[] packedValue) { // the new value may not be contiguous to the previous one // so try to find the first next range that cross the new value - while (!ranges.withinUpperBound(packedValue, activeIndex)) { - if (++activeIndex >= ranges.size) { + while (!packedValueRanges.withinUpperBound(packedValue, activeIndex)) { + if (++activeIndex >= packedValueRanges.size) { return true; } } @@ -176,11 +176,11 @@ public static class DocCountRangeAwareIntersectVisitor extends RangeAwareInterse public DocCountRangeAwareIntersectVisitor( PointValues.PointTree pointTree, - Ranges ranges, + PackedValueRanges packedValueRanges, int maxNumNonZeroRange, BiConsumer countDocs ) { - super(pointTree, ranges, maxNumNonZeroRange); + super(pointTree, packedValueRanges, maxNumNonZeroRange); this.countDocs = countDocs; } @@ -223,11 +223,11 @@ public static class DocCollectRangeAwareIntersectVisitor extends RangeAwareInter public DocCollectRangeAwareIntersectVisitor( PointValues.PointTree pointTree, - Ranges ranges, + PackedValueRanges packedValueRanges, int maxNumNonZeroRange, BiConsumer collectDocs ) { - super(pointTree, ranges, maxNumNonZeroRange); + super(pointTree, packedValueRanges, maxNumNonZeroRange); this.collectDocs = collectDocs; } From bac2062267455ae999ad97693b26d344c54465c6 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 8 Aug 2024 17:03:25 -0700 Subject: [PATCH 25/29] Update ranges comment Signed-off-by: Finn Carroll --- .../search/optimization/filterrewrite/PackedValueRanges.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java index 4e839fe20e5bd..bfee822d1ce85 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java @@ -11,7 +11,8 @@ import org.apache.lucene.util.ArrayUtil; /** - * Internal ranges representation for the filter rewrite optimization + * Packed value representation of aggregation buckets for the filter rewrite optimization. + * It is convenient to represent our buckets in this way while traversing PointValues.PointTree. */ public final class PackedValueRanges { static ArrayUtil.ByteArrayComparator comparator; From ba89da58cfcf1661cfd819c062b497be342cc968 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Thu, 8 Aug 2024 18:01:25 -0700 Subject: [PATCH 26/29] Replace generic object lambda in range agg with long primitive Signed-off-by: Finn Carroll --- .../bucket/composite/CompositeAggregator.java | 3 ++- .../bucket/histogram/AutoDateHistogramAggregator.java | 3 ++- .../bucket/histogram/DateHistogramAggregator.java | 3 ++- .../aggregations/bucket/range/RangeAggregator.java | 3 ++- .../optimization/filterrewrite/AggregatorBridge.java | 7 +++++++ .../filterrewrite/DateHistogramAggregatorBridge.java | 5 ----- .../filterrewrite/RangeAggregatorBridge.java | 9 ++------- .../search/optimization/filterrewrite/TreeTraversal.java | 2 +- 8 files changed, 18 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index 9ca27a3459ecf..c1efd0a20fbea 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -90,6 +90,7 @@ import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.function.LongFunction; import java.util.function.LongUnaryOperator; import java.util.stream.Collectors; @@ -219,7 +220,7 @@ protected int getSize() { } @Override - protected Function bucketOrdProducer() { + protected LongFunction bucketOrdProducer() { return (key) -> bucketOrds.add(0, getRoundingPrepared().round((long) key)); } }); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index dd6a4be8fbf7b..44ab5e35fac9d 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -66,6 +66,7 @@ import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.function.LongFunction; import java.util.function.LongToIntFunction; import static org.opensearch.search.optimization.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll; @@ -200,7 +201,7 @@ protected Prepared getRoundingPrepared() { } @Override - protected Function bucketOrdProducer() { + protected LongFunction bucketOrdProducer() { return (key) -> getBucketOrds().add(0, preparedRounding.round((long) key)); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 3be7646518dc4..5c15794432a2c 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -60,6 +60,7 @@ import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.function.LongFunction; import static org.opensearch.search.optimization.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll; @@ -146,7 +147,7 @@ protected long[] processHardBounds(long[] bounds) { } @Override - protected Function bucketOrdProducer() { + protected LongFunction bucketOrdProducer() { return (key) -> bucketOrds.add(0, preparedRounding.round((long) key)); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 312ba60dcfb83..3161a5cfbfd59 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -68,6 +68,7 @@ import java.util.Objects; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.function.LongFunction; import static org.opensearch.core.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -293,7 +294,7 @@ public void prepare() { } @Override - protected Function bucketOrdProducer() { + protected LongFunction bucketOrdProducer() { return (activeIndex) -> subBucketOrdinal(0, (int) activeIndex); } }); diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java index f57dab3e5313d..0974133ea175c 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java @@ -15,6 +15,8 @@ import java.io.IOException; import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.function.LongFunction; /** * This interface provides a bridge between an aggregator and the optimization context, allowing @@ -75,4 +77,9 @@ void setOptimizationContext(OptimizationContext context) { */ public abstract void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException; + + /** + * Provides a function to produce bucket ordinals from index of the corresponding range in the range array + */ + protected abstract LongFunction bucketOrdProducer(); } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index b73e3f5442117..2d64d32a46303 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -171,11 +171,6 @@ protected static long getBucketOrd(long bucketOrd) { return bucketOrd; } - /** - * Provides a function to produce bucket ordinals from the lower bound of the range - */ - protected abstract Function bucketOrdProducer(); - /** * Checks whether the top level query matches all documents on the segment * diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index 016711684f85f..a955ecd88078d 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -84,7 +84,7 @@ public final void tryOptimize(PointValues values, BiConsumer increme optimizationContext.getRanges(), Integer.MAX_VALUE, (activeIndex, docID) -> { - long ord = bucketOrdProducer().apply(activeIndex); + long ord = bucketOrdProducer().apply((long) activeIndex); try { incrementDocCount.accept(ord, (long) 1); @@ -100,7 +100,7 @@ public final void tryOptimize(PointValues values, BiConsumer increme optimizationContext.getRanges(), Integer.MAX_VALUE, (activeIndex, docCount) -> { - long ord = bucketOrdProducer().apply(activeIndex); + long ord = bucketOrdProducer().apply((long) activeIndex); incrementDocCount.accept(ord, (long) docCount); } ); @@ -108,9 +108,4 @@ public final void tryOptimize(PointValues values, BiConsumer increme optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); } - - /** - * Provides a function to produce bucket ordinals from index of the corresponding range in the range array - */ - protected abstract Function bucketOrdProducer(); } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java index 3924724566e4c..02ed661c99265 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/TreeTraversal.java @@ -72,7 +72,7 @@ public RangeAwareIntersectVisitor(PointValues.PointTree pointTree, PackedValueRa this.activeIndex = packedValueRanges.firstRangeIndex(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); } - public int getActiveIndex() { + public long getActiveIndex() { return activeIndex; } From ed84ebb6a3008ad0b8a08ac3d0fbf702b335d599 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 9 Aug 2024 00:46:37 -0700 Subject: [PATCH 27/29] Replace bucket ord producer lambda with object Signed-off-by: Finn Carroll --- .../bucket/composite/CompositeAggregator.java | 13 +-- .../AutoDateHistogramAggregator.java | 5 ++ .../histogram/DateHistogramAggregator.java | 14 ++-- .../bucket/range/RangeAggregator.java | 6 +- .../filterrewrite/AggregatorBridge.java | 65 +++++++++++++-- .../CompositeAggregatorBridge.java | 40 --------- .../DateHistogramAggregatorBridge.java | 82 +++++++------------ .../filterrewrite/OptimizationContext.java | 2 +- .../filterrewrite/RangeAggregatorBridge.java | 41 ++-------- 9 files changed, 116 insertions(+), 152 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index c1efd0a20fbea..d803bca5d47f5 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -77,6 +77,7 @@ import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.opensearch.search.internal.SearchContext; import org.opensearch.search.optimization.filterrewrite.CompositeAggregatorBridge; +import org.opensearch.search.optimization.filterrewrite.DateHistogramAggregatorBridge; import org.opensearch.search.optimization.filterrewrite.OptimizationContext; import org.opensearch.search.searchafter.SearchAfterBuilder; import org.opensearch.search.sort.SortAndFormats; @@ -194,6 +195,11 @@ public boolean canOptimize() { @Override public void prepare() throws IOException { buildRanges(context); + this.ordProducer = new DateHistogramAggregatorBridge.DateHistoOrdProducer( + getFieldType(), + optimizationContext.getRanges(), + bucketOrds, + getRoundingPrepared()); } protected Rounding getRounding(final long low, final long high) { @@ -215,14 +221,9 @@ protected long[] processAfterKey(long[] bounds, long interval) { } @Override - protected int getSize() { + protected int rangeMax() { return size; } - - @Override - protected LongFunction bucketOrdProducer() { - return (key) -> bucketOrds.add(0, getRoundingPrepared().round((long) key)); - } }); if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index 44ab5e35fac9d..15023faed9171 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -169,6 +169,11 @@ public boolean canOptimize() { @Override public void prepare() throws IOException { buildRanges(context); + this.ordProducer = new DateHistogramAggregatorBridge.DateHistoOrdProducer( + getFieldType(), + optimizationContext.getRanges(), + getBucketOrds(), + getRoundingPrepared()); } @Override diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 5c15794432a2c..b9877a24b7d14 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -39,6 +39,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.Rounding; import org.opensearch.common.lease.Releasables; +import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -54,6 +55,8 @@ import org.opensearch.search.internal.SearchContext; import org.opensearch.search.optimization.filterrewrite.DateHistogramAggregatorBridge; import org.opensearch.search.optimization.filterrewrite.OptimizationContext; +import org.opensearch.search.optimization.filterrewrite.PackedValueRanges; +import org.opensearch.search.optimization.filterrewrite.RangeAggregatorBridge; import java.io.IOException; import java.util.Collections; @@ -129,6 +132,11 @@ public boolean canOptimize() { @Override public void prepare() throws IOException { buildRanges(context); + this.ordProducer = new DateHistogramAggregatorBridge.DateHistoOrdProducer( + getFieldType(), + optimizationContext.getRanges(), + bucketOrds, + preparedRounding); } @Override @@ -145,12 +153,6 @@ protected Rounding.Prepared getRoundingPrepared() { protected long[] processHardBounds(long[] bounds) { return super.processHardBounds(bounds, hardBounds); } - - @Override - protected LongFunction bucketOrdProducer() { - return (key) -> bucketOrds.add(0, preparedRounding.round((long) key)); - } - }); if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 3161a5cfbfd59..536c19c1e95a8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -291,11 +291,7 @@ public boolean canOptimize() { @Override public void prepare() { buildRanges(ranges); - } - - @Override - protected LongFunction bucketOrdProducer() { - return (activeIndex) -> subBucketOrdinal(0, (int) activeIndex); + this.ordProducer = new RangeOrdProducer(); } }); if (optimizationContext.canOptimize(parent, context)) { diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java index 0974133ea175c..c0e9d1254482f 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java @@ -18,6 +18,8 @@ import java.util.function.Function; import java.util.function.LongFunction; +import static org.opensearch.search.optimization.filterrewrite.TreeTraversal.multiRangesTraverse; + /** * This interface provides a bridge between an aggregator and the optimization context, allowing * the aggregator to provide data and optimize the aggregation process. @@ -39,6 +41,20 @@ public abstract class AggregatorBridge { */ OptimizationContext optimizationContext; + /** + * Produce bucket ordinals from index of the corresponding range in the range array + */ + public abstract static class OrdProducer { + abstract long get(int idx); + } + + protected OrdProducer ordProducer; + + public long getOrd(int rangeIdx) { + return ordProducer.get(rangeIdx); + } + + /** * The field type associated with this aggregator bridge. */ @@ -70,16 +86,51 @@ void setOptimizationContext(OptimizationContext context) { public abstract void prepareFromSegment(LeafReaderContext leaf) throws IOException; /** - * Attempts to build aggregation results for a segment + * @return max range to stop collecting at. + * Utilized by aggs which stop early + */ + protected int rangeMax() { + return Integer.MAX_VALUE; + } + + /** + * Attempts to build aggregation results for a segment. + * With no sub agg count docs and avoid iterating docIds. + * If a sub agg is present we must iterate through and collect docIds to support it. * * @param values the point values (index structure for numeric values) for a segment * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket */ - public abstract void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) - throws IOException; + public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) + throws IOException { + TreeTraversal.RangeAwareIntersectVisitor treeVisitor; + if (sub != null) { + treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + rangeMax(), + (activeIndex, docID) -> { + long ord = this.getOrd(activeIndex); + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, ord); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + ); + } else { + treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor( + values.getPointTree(), + optimizationContext.getRanges(), + rangeMax(), + (activeIndex, docCount) -> { + long ord = this.getOrd(activeIndex); + incrementDocCount.accept(ord, (long) docCount); + } + ); + } - /** - * Provides a function to produce bucket ordinals from index of the corresponding range in the range array - */ - protected abstract LongFunction bucketOrdProducer(); + optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); + } } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java index b84897f793ea4..1d7d381128a13 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/CompositeAggregatorBridge.java @@ -41,44 +41,4 @@ private boolean canOptimize(boolean missing, boolean hasScript, MappedFieldType } return false; } - - @Override - public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) - throws IOException { - DateFieldMapper.DateFieldType fieldType = getFieldType(); - TreeTraversal.RangeAwareIntersectVisitor treeVisitor; - if (sub != null) { - treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( - values.getPointTree(), - optimizationContext.getRanges(), - getSize(), - (activeIndex, docID) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - - try { - incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, ord); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - ); - } else { - treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor( - values.getPointTree(), - optimizationContext.getRanges(), - getSize(), - (activeIndex, docCount) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(ord, (long) docCount); - } - ); - } - - optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); - } } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index 2d64d32a46303..3330cb971e78d 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -18,6 +18,7 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.bucket.histogram.LongBounds; +import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -33,6 +34,35 @@ */ public abstract class DateHistogramAggregatorBridge extends AggregatorBridge { + public static class DateHistoOrdProducer extends OrdProducer { + DateFieldMapper.DateFieldType fieldType; + PackedValueRanges ranges; + LongKeyedBucketOrds bucketOrds; + Rounding.Prepared rounding; + + public DateHistoOrdProducer(DateFieldMapper.DateFieldType fieldType, + PackedValueRanges ranges, + LongKeyedBucketOrds bucketOrds, + Rounding.Prepared rounding) { + this.fieldType = fieldType; + this.ranges = ranges; + this.bucketOrds = bucketOrds; + this.rounding = rounding; + } + + long get(int idx) { + long rangeStart = LongPoint.decodeDimension(ranges.lowers[idx], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long ord = bucketOrds.add(0, rounding.round((long) rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; + } + } + protected boolean canOptimize(ValuesSourceConfig config) { if (config.script() == null && config.missing() == null) { MappedFieldType fieldType = config.fieldType(); @@ -119,58 +149,6 @@ protected DateFieldMapper.DateFieldType getFieldType() { return (DateFieldMapper.DateFieldType) fieldType; } - protected int getSize() { - return Integer.MAX_VALUE; - } - - @Override - public void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) - throws IOException { - DateFieldMapper.DateFieldType fieldType = getFieldType(); - TreeTraversal.RangeAwareIntersectVisitor treeVisitor; - if (sub != null) { - treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( - values.getPointTree(), - optimizationContext.getRanges(), - getSize(), - (activeIndex, docID) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - - try { - incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, ord); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - ); - } else { - treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor( - values.getPointTree(), - optimizationContext.getRanges(), - getSize(), - (activeIndex, docCount) -> { - long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(ord, (long) docCount); - } - ); - } - - optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); - } - - protected static long getBucketOrd(long bucketOrd) { - if (bucketOrd < 0) { // already seen - bucketOrd = -1 - bucketOrd; - } - - return bucketOrd; - } - /** * Checks whether the top level query matches all documents on the segment * diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java index 8611b0b1fd201..96e6e6b652c1a 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/OptimizationContext.java @@ -89,7 +89,7 @@ void setRangesFromSegment(PackedValueRanges packedValueRanges) { this.packedValueRangesFromSegment = packedValueRanges; } - PackedValueRanges getRanges() { + public PackedValueRanges getRanges() { if (packedValueRangesFromSegment != null) return packedValueRangesFromSegment; return packedValueRanges; } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index a955ecd88078d..639f7de33857c 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -28,6 +28,12 @@ */ public abstract class RangeAggregatorBridge extends AggregatorBridge { + public static class RangeOrdProducer extends OrdProducer { + long get(int idx) { + return idx; + } + } + protected boolean canOptimize(ValuesSourceConfig config, RangeAggregator.Range[] ranges) { if (config.fieldType() == null) return false; MappedFieldType fieldType = config.fieldType(); @@ -73,39 +79,4 @@ protected void buildRanges(RangeAggregator.Range[] ranges) { public void prepareFromSegment(LeafReaderContext leaf) { throw new UnsupportedOperationException("Range aggregation should not build ranges at segment level"); } - - @Override - public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) - throws IOException { - TreeTraversal.RangeAwareIntersectVisitor treeVisitor; - if (sub != null) { - treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( - values.getPointTree(), - optimizationContext.getRanges(), - Integer.MAX_VALUE, - (activeIndex, docID) -> { - long ord = bucketOrdProducer().apply((long) activeIndex); - - try { - incrementDocCount.accept(ord, (long) 1); - sub.collect(docID, ord); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - ); - } else { - treeVisitor = new TreeTraversal.DocCountRangeAwareIntersectVisitor( - values.getPointTree(), - optimizationContext.getRanges(), - Integer.MAX_VALUE, - (activeIndex, docCount) -> { - long ord = bucketOrdProducer().apply((long) activeIndex); - incrementDocCount.accept(ord, (long) docCount); - } - ); - } - - optimizationContext.consumeDebugInfo(multiRangesTraverse(treeVisitor)); - } } From 8a53ce5f37780735c01ff1acc83ec9307d3e1ac6 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 9 Aug 2024 09:04:41 -0700 Subject: [PATCH 28/29] Remove unused method Signed-off-by: Finn Carroll --- .../bucket/histogram/AutoDateHistogramAggregator.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index 15023faed9171..bc0db49ba1cb0 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -204,12 +204,6 @@ protected Rounding getRounding(final long low, final long high) { protected Prepared getRoundingPrepared() { return preparedRounding; } - - @Override - protected LongFunction bucketOrdProducer() { - return (key) -> getBucketOrds().add(0, preparedRounding.round((long) key)); - } - }); if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); From ccc4d7b7698275d839e62f8a028267cd14757c75 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 9 Aug 2024 10:52:05 -0700 Subject: [PATCH 29/29] Move ordProducer delegate to agg bridge function. Ranges need to be produced dynamically to support auto date histograms. Signed-off-by: Finn Carroll --- .../bucket/composite/CompositeAggregator.java | 23 +++++++++----- .../AutoDateHistogramAggregator.java | 23 +++++++++----- .../histogram/DateHistogramAggregator.java | 23 +++++++++----- .../bucket/range/RangeAggregator.java | 5 +--- .../filterrewrite/AggregatorBridge.java | 24 ++++++--------- .../DateHistogramAggregatorBridge.java | 30 ------------------- .../filterrewrite/PackedValueRanges.java | 8 +++++ .../filterrewrite/RangeAggregatorBridge.java | 6 ---- 8 files changed, 63 insertions(+), 79 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index d803bca5d47f5..c8ecb0d9dc68c 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -32,6 +32,7 @@ package org.opensearch.search.aggregations.bucket.composite; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; @@ -193,14 +194,7 @@ public boolean canOptimize() { } @Override - public void prepare() throws IOException { - buildRanges(context); - this.ordProducer = new DateHistogramAggregatorBridge.DateHistoOrdProducer( - getFieldType(), - optimizationContext.getRanges(), - bucketOrds, - getRoundingPrepared()); - } + public void prepare() throws IOException { buildRanges(context); } protected Rounding getRounding(final long low, final long high) { return valuesSource.getRounding(); @@ -224,6 +218,19 @@ protected long[] processAfterKey(long[] bounds, long interval) { protected int rangeMax() { return size; } + + @Override + protected long getOrd(int rangeIdx){ + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = bucketOrds.add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; + } }); if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index bc0db49ba1cb0..ee036c8c837aa 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -31,6 +31,7 @@ package org.opensearch.search.aggregations.bucket.histogram; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.CollectionTerminatedException; @@ -167,14 +168,7 @@ public boolean canOptimize() { } @Override - public void prepare() throws IOException { - buildRanges(context); - this.ordProducer = new DateHistogramAggregatorBridge.DateHistoOrdProducer( - getFieldType(), - optimizationContext.getRanges(), - getBucketOrds(), - getRoundingPrepared()); - } + public void prepare() throws IOException { buildRanges(context); } @Override protected Rounding getRounding(final long low, final long high) { @@ -204,6 +198,19 @@ protected Rounding getRounding(final long low, final long high) { protected Prepared getRoundingPrepared() { return preparedRounding; } + + @Override + protected long getOrd(int rangeIdx){ + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = getBucketOrds().add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; + } }); if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index b9877a24b7d14..99d8d44393ecf 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -31,6 +31,7 @@ package org.opensearch.search.aggregations.bucket.histogram; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.CollectionTerminatedException; @@ -130,14 +131,7 @@ public boolean canOptimize() { } @Override - public void prepare() throws IOException { - buildRanges(context); - this.ordProducer = new DateHistogramAggregatorBridge.DateHistoOrdProducer( - getFieldType(), - optimizationContext.getRanges(), - bucketOrds, - preparedRounding); - } + public void prepare() throws IOException { buildRanges(context); } @Override protected Rounding getRounding(long low, long high) { @@ -153,6 +147,19 @@ protected Rounding.Prepared getRoundingPrepared() { protected long[] processHardBounds(long[] bounds) { return super.processHardBounds(bounds, hardBounds); } + + @Override + protected long getOrd(int rangeIdx){ + long rangeStart = LongPoint.decodeDimension(optimizationContext.getRanges().getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = bucketOrds.add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; + } }); if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 536c19c1e95a8..74ce313803c8b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -289,10 +289,7 @@ public boolean canOptimize() { } @Override - public void prepare() { - buildRanges(ranges); - this.ordProducer = new RangeOrdProducer(); - } + public void prepare() { buildRanges(ranges); } }); if (optimizationContext.canOptimize(parent, context)) { optimizationContext.prepare(); diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java index c0e9d1254482f..ed8c86747698c 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/AggregatorBridge.java @@ -41,20 +41,6 @@ public abstract class AggregatorBridge { */ OptimizationContext optimizationContext; - /** - * Produce bucket ordinals from index of the corresponding range in the range array - */ - public abstract static class OrdProducer { - abstract long get(int idx); - } - - protected OrdProducer ordProducer; - - public long getOrd(int rangeIdx) { - return ordProducer.get(rangeIdx); - } - - /** * The field type associated with this aggregator bridge. */ @@ -87,12 +73,19 @@ void setOptimizationContext(OptimizationContext context) { /** * @return max range to stop collecting at. - * Utilized by aggs which stop early + * Utilized by aggs which stop early. */ protected int rangeMax() { return Integer.MAX_VALUE; } + /** + * Translate an index of the packed value range array to an agg bucket ordinal. + */ + protected long getOrd(int rangeIdx){ + return rangeIdx; + } + /** * Attempts to build aggregation results for a segment. * With no sub agg count docs and avoid iterating docIds. @@ -104,6 +97,7 @@ protected int rangeMax() { public final void tryOptimize(PointValues values, BiConsumer incrementDocCount, final LeafBucketCollector sub) throws IOException { TreeTraversal.RangeAwareIntersectVisitor treeVisitor; + if (sub != null) { treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor( values.getPointTree(), diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java index 3330cb971e78d..1e7008134ad32 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/DateHistogramAggregatorBridge.java @@ -33,36 +33,6 @@ * For date histogram aggregation */ public abstract class DateHistogramAggregatorBridge extends AggregatorBridge { - - public static class DateHistoOrdProducer extends OrdProducer { - DateFieldMapper.DateFieldType fieldType; - PackedValueRanges ranges; - LongKeyedBucketOrds bucketOrds; - Rounding.Prepared rounding; - - public DateHistoOrdProducer(DateFieldMapper.DateFieldType fieldType, - PackedValueRanges ranges, - LongKeyedBucketOrds bucketOrds, - Rounding.Prepared rounding) { - this.fieldType = fieldType; - this.ranges = ranges; - this.bucketOrds = bucketOrds; - this.rounding = rounding; - } - - long get(int idx) { - long rangeStart = LongPoint.decodeDimension(ranges.lowers[idx], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long ord = bucketOrds.add(0, rounding.round((long) rangeStart)); - - if (ord < 0) { // already seen - ord = -1 - ord; - } - - return ord; - } - } - protected boolean canOptimize(ValuesSourceConfig config) { if (config.script() == null && config.missing() == null) { MappedFieldType fieldType = config.fieldType(); diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java index bfee822d1ce85..aec893aff6e99 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/PackedValueRanges.java @@ -42,6 +42,14 @@ public static boolean withinUpperBound(byte[] value, byte[] upperBound) { return compareByteValue(value, upperBound) < 0; } + public byte[] getLower(int idx){ + return lowers[idx]; + } + + public byte[] getUpper(int idx){ + return uppers[idx]; + } + public boolean withinLowerBound(byte[] value, int idx) { return PackedValueRanges.withinLowerBound(value, lowers[idx]); } diff --git a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java index 639f7de33857c..2adddbbf535b1 100644 --- a/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/optimization/filterrewrite/RangeAggregatorBridge.java @@ -28,12 +28,6 @@ */ public abstract class RangeAggregatorBridge extends AggregatorBridge { - public static class RangeOrdProducer extends OrdProducer { - long get(int idx) { - return idx; - } - } - protected boolean canOptimize(ValuesSourceConfig config, RangeAggregator.Range[] ranges) { if (config.fieldType() == null) return false; MappedFieldType fieldType = config.fieldType();