From 93f4a51d4a744d97242164006bc908887743d8dc Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Fri, 9 Aug 2024 23:39:58 -0700 Subject: [PATCH] Sub agg support for fast filter optimization Signed-off-by: Finn Carroll --- .../bucket/composite/CompositeAggregator.java | 22 +- .../filterrewrite/AggregatorBridge.java | 71 +++- .../DateHistogramAggregatorBridge.java | 49 +-- .../FilterRewriteOptimizationContext.java | 21 +- .../bucket/filterrewrite/Helper.java | 4 +- .../{Ranges.java => PackedValueRanges.java} | 53 ++- .../filterrewrite/PointTreeTraversal.java | 326 ++++++++++-------- .../filterrewrite/RangeAggregatorBridge.java | 32 +- .../AutoDateHistogramAggregator.java | 19 +- .../histogram/DateHistogramAggregator.java | 20 +- .../bucket/range/RangeAggregator.java | 7 +- 11 files changed, 340 insertions(+), 284 deletions(-) rename server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/{Ranges.java => PackedValueRanges.java} (63%) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index cfe716eb57ca8..8cd5b99c05cc5 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -32,6 +32,7 @@ package org.opensearch.search.aggregations.bucket.composite; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; @@ -89,7 +90,6 @@ import java.util.List; import java.util.Map; import java.util.function.BiConsumer; -import java.util.function.Function; import java.util.function.LongUnaryOperator; import java.util.stream.Collectors; @@ -190,9 +190,7 @@ protected boolean canOptimize() { } @Override - protected void prepare() throws IOException { - buildRanges(context); - } + protected void prepare() throws IOException { buildRanges(context); } protected Rounding getRounding(final long low, final long high) { return valuesSource.getRounding(); @@ -213,13 +211,21 @@ protected long[] processAfterKey(long[] bounds, long interval) { } @Override - protected int getSize() { + protected int rangeMax() { return size; } @Override - protected Function bucketOrdProducer() { - return (key) -> bucketOrds.add(0, getRoundingPrepared().round((long) key)); + protected long getOrd(int rangeIdx){ + long rangeStart = LongPoint.decodeDimension(filterRewriteOptimizationContext.getRanges().getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = bucketOrds.add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; } }; filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); @@ -557,7 +563,7 @@ private void processLeafFromQuery(LeafReaderContext ctx, Sort indexSortPrefix) t @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, sub, segmentMatchAll(context, ctx)); if (optimized) throw new CollectionTerminatedException(); finishLeaf(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java index 6b34582b259ea..a425a06ee5b43 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java @@ -11,11 +11,14 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.aggregations.LeafBucketCollector; import java.io.IOException; import java.util.function.BiConsumer; import java.util.function.Consumer; +import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; + /** * This interface provides a bridge between an aggregator and the optimization context, allowing * the aggregator to provide data and optimize the aggregation process. @@ -37,9 +40,9 @@ public abstract class AggregatorBridge { */ MappedFieldType fieldType; - Consumer setRanges; + Consumer setRanges; - void setRangesConsumer(Consumer setRanges) { + void setRangesConsumer(Consumer setRanges) { this.setRanges = setRanges; } @@ -67,18 +70,62 @@ void setRangesConsumer(Consumer setRanges) { * * @param leaf the leaf reader context for the segment */ - abstract Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException; + abstract PackedValueRanges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException; + + /** + * @return max range to stop collecting at. + * Utilized by aggs which stop early. + */ + protected int rangeMax() { + return Integer.MAX_VALUE; + } + + /** + * Translate an index of the packed value range array to an agg bucket ordinal. + */ + protected long getOrd(int rangeIdx){ + return rangeIdx; + } /** - * Attempts to build aggregation results for a segment + * Attempts to build aggregation results for a segment. + * With no sub agg count docs and avoid iterating docIds. + * If a sub agg is present we must iterate through and collect docIds to support it. * - * @param values the point values (index structure for numeric values) for a segment - * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket - * @param ranges + * @param values the point values (index structure for numeric values) for a segment + * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket */ - abstract FilterRewriteOptimizationContext.DebugInfo tryOptimize( - PointValues values, - BiConsumer incrementDocCount, - Ranges ranges - ) throws IOException; + public final FilterRewriteOptimizationContext.DebugInfo tryOptimize(PointValues values, BiConsumer incrementDocCount, PackedValueRanges ranges, final LeafBucketCollector sub) + throws IOException { + PointTreeTraversal.RangeAwareIntersectVisitor treeVisitor; + + if (sub != null) { + treeVisitor = new PointTreeTraversal.DocCollectRangeAwareIntersectVisitor( + values.getPointTree(), + ranges, + rangeMax(), + (activeIndex, docID) -> { + long ord = this.getOrd(activeIndex); + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, ord); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + ); + } else { + treeVisitor = new PointTreeTraversal.DocCountRangeAwareIntersectVisitor( + values.getPointTree(), + ranges, + rangeMax(), + (activeIndex, docCount) -> { + long ord = this.getOrd(activeIndex); + incrementDocCount.accept(ord, (long) docCount); + } + ); + } + + return multiRangesTraverse(treeVisitor); + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java index 8bff3fdc5fefb..d7d932aa4afc0 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java @@ -8,11 +8,10 @@ package org.opensearch.search.aggregations.bucket.filterrewrite; -import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PointValues; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; + import org.opensearch.common.Rounding; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.MappedFieldType; @@ -22,10 +21,6 @@ import java.io.IOException; import java.util.OptionalLong; -import java.util.function.BiConsumer; -import java.util.function.Function; - -import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; /** * For date histogram aggregation @@ -54,12 +49,12 @@ protected void buildRanges(SearchContext context) throws IOException { } @Override - final Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException { + final PackedValueRanges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException { long[] bounds = Helper.getSegmentBounds(leaf, fieldType.name()); return buildRanges(bounds, maxRewriteFilters); } - private Ranges buildRanges(long[] bounds, int maxRewriteFilters) { + private PackedValueRanges buildRanges(long[] bounds, int maxRewriteFilters) { bounds = processHardBounds(bounds); if (bounds == null) { return null; @@ -116,47 +111,11 @@ protected long[] processHardBounds(long[] bounds, LongBounds hardBounds) { return bounds; } - private DateFieldMapper.DateFieldType getFieldType() { + public DateFieldMapper.DateFieldType getFieldType() { assert fieldType instanceof DateFieldMapper.DateFieldType; return (DateFieldMapper.DateFieldType) fieldType; } - protected int getSize() { - return Integer.MAX_VALUE; - } - - @Override - final FilterRewriteOptimizationContext.DebugInfo tryOptimize( - PointValues values, - BiConsumer incrementDocCount, - Ranges ranges - ) throws IOException { - int size = getSize(); - - DateFieldMapper.DateFieldType fieldType = getFieldType(); - BiConsumer incrementFunc = (activeIndex, docCount) -> { - long rangeStart = LongPoint.decodeDimension(ranges.lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long bucketOrd = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(bucketOrd, (long) docCount); - }; - - return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size); - } - - private static long getBucketOrd(long bucketOrd) { - if (bucketOrd < 0) { // already seen - bucketOrd = -1 - bucketOrd; - } - - return bucketOrd; - } - - /** - * Provides a function to produce bucket ordinals from the lower bound of the range - */ - protected abstract Function bucketOrdProducer(); - /** * Checks whether the top level query matches all documents on the segment * diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java index 87faafe4526de..a780d16354cd7 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java @@ -14,6 +14,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; +import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.search.internal.SearchContext; @@ -40,7 +41,7 @@ public final class FilterRewriteOptimizationContext { private final AggregatorBridge aggregatorBridge; private String shardId; - private Ranges ranges; // built at shard level + private PackedValueRanges ranges; // built at shard level // debug info related fields private final AtomicInteger leafNodeVisited = new AtomicInteger(); @@ -84,10 +85,14 @@ private boolean canOptimize(final Object parent, final int subAggLength, SearchC return canOptimize; } - void setRanges(Ranges ranges) { + public void setRanges(PackedValueRanges ranges) { this.ranges = ranges; } + public PackedValueRanges getRanges() { + return this.ranges; + } + /** * Try to populate the bucket doc counts for aggregation *

@@ -96,7 +101,7 @@ void setRanges(Ranges ranges) { * @param incrementDocCount consume the doc_count results for certain ordinal * @param segmentMatchAll if your optimization can prepareFromSegment, you should pass in this flag to decide whether to prepareFromSegment */ - public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer incrementDocCount, boolean segmentMatchAll) + public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer incrementDocCount, LeafBucketCollector sub, boolean segmentMatchAll) throws IOException { segments.incrementAndGet(); if (!canOptimize) { @@ -120,10 +125,10 @@ public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer= 0; + } + + public static boolean withinUpperBound(byte[] value, byte[] upperBound) { + return compareByteValue(value, upperBound) < 0; + } + + public byte[] getLower(int idx){ + return lowers[idx]; + } + + public byte[] getUpper(int idx){ + return uppers[idx]; + } + + public boolean withinLowerBound(byte[] value, int idx) { + return PackedValueRanges.withinLowerBound(value, lowers[idx]); + } + + public boolean withinUpperBound(byte[] value, int idx) { + return PackedValueRanges.withinUpperBound(value, uppers[idx]); + } + + public boolean withinRange(byte[] value, int idx) { + return withinLowerBound(value, idx) && withinUpperBound(value, idx); + } + public int firstRangeIndex(byte[] globalMin, byte[] globalMax) { if (compareByteValue(lowers[0], globalMax) > 0) { return -1; @@ -42,16 +75,4 @@ public int firstRangeIndex(byte[] globalMin, byte[] globalMax) { } return i; } - - public static int compareByteValue(byte[] value1, byte[] value2) { - return comparator.compare(value1, 0, value2, 0); - } - - public static boolean withinLowerBound(byte[] value, byte[] lowerBound) { - return compareByteValue(value, lowerBound) >= 0; - } - - public static boolean withinUpperBound(byte[] value, byte[] upperBound) { - return compareByteValue(value, upperBound) < 0; - } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java index 581ecc416f486..331893ea8af49 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java @@ -13,7 +13,6 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.DocIdSetIterator; -import org.opensearch.common.CheckedRunnable; import java.io.IOException; import java.util.function.BiConsumer; @@ -23,201 +22,240 @@ /** * Utility class for traversing a {@link PointValues.PointTree} and collecting document counts for the ranges. * - *

The main entry point is the {@link #multiRangesTraverse(PointValues.PointTree, Ranges, - * BiConsumer, int)} method + *

The main entry point is the {@link #multiRangesTraverse(RangeAwareIntersectVisitor)} method * - *

The class uses a {@link RangeCollectorForPointTree} to keep track of the active ranges and - * determine which parts of the tree to visit. The {@link - * PointValues.IntersectVisitor} implementation is responsible for the actual visitation and - * document count collection. + *

The class uses a {@link RangeAwareIntersectVisitor} to keep track of the active ranges, traverse the tree, and + * consume documents. */ -final class PointTreeTraversal { - private PointTreeTraversal() {} - +public final class PointTreeTraversal { private static final Logger logger = LogManager.getLogger(Helper.loggerName); /** - * Traverses the given {@link PointValues.PointTree} and collects document counts for the intersecting ranges. - * - * @param tree the point tree to traverse - * @param ranges the set of ranges to intersect with - * @param incrementDocCount a callback to increment the document count for a range bucket - * @param maxNumNonZeroRanges the maximum number of non-zero ranges to collect - * @return a {@link FilterRewriteOptimizationContext.DebugInfo} object containing debug information about the traversal + * Traverse the RangeAwareIntersectVisitor PointTree. + * Collects and returns DebugInfo from traversal + * @param visitor the maximum number of non-zero ranges to collect + * @return a {@link FilterRewriteOptimizationContext.DebugInfo} object containing debug information about the traversal */ - static FilterRewriteOptimizationContext.DebugInfo multiRangesTraverse( - final PointValues.PointTree tree, - final Ranges ranges, - final BiConsumer incrementDocCount, - final int maxNumNonZeroRanges - ) throws IOException { + public static FilterRewriteOptimizationContext.DebugInfo multiRangesTraverse(RangeAwareIntersectVisitor visitor) throws IOException { FilterRewriteOptimizationContext.DebugInfo debugInfo = new FilterRewriteOptimizationContext.DebugInfo(); - int activeIndex = ranges.firstRangeIndex(tree.getMinPackedValue(), tree.getMaxPackedValue()); - if (activeIndex < 0) { + + if (visitor.getActiveIndex() < 0) { logger.debug("No ranges match the query, skip the fast filter optimization"); return debugInfo; } - RangeCollectorForPointTree collector = new RangeCollectorForPointTree(incrementDocCount, maxNumNonZeroRanges, ranges, activeIndex); - PointValues.IntersectVisitor visitor = getIntersectVisitor(collector); + try { - intersectWithRanges(visitor, tree, collector, debugInfo); + visitor.traverse(debugInfo); } catch (CollectionTerminatedException e) { logger.debug("Early terminate since no more range to collect"); } - collector.finalizePreviousRange(); return debugInfo; } - private static void intersectWithRanges( - PointValues.IntersectVisitor visitor, - PointValues.PointTree pointTree, - RangeCollectorForPointTree collector, - FilterRewriteOptimizationContext.DebugInfo debug - ) throws IOException { - PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); - - switch (r) { - case CELL_INSIDE_QUERY: - collector.countNode((int) pointTree.size()); - debug.visitInner(); - break; - case CELL_CROSSES_QUERY: - if (pointTree.moveToChild()) { - do { - intersectWithRanges(visitor, pointTree, collector, debug); - } while (pointTree.moveToSibling()); - pointTree.moveToParent(); - } else { - pointTree.visitDocValues(visitor); - debug.visitLeaf(); - } - break; - case CELL_OUTSIDE_QUERY: + /** + * This IntersectVisitor contains a packed value representation of Ranges + * as well as the current activeIndex being considered for collection. + */ + public static abstract class RangeAwareIntersectVisitor implements PointValues.IntersectVisitor { + private final PointValues.PointTree pointTree; + private final PackedValueRanges packedValueRanges; + private final int maxNumNonZeroRange; + protected int visitedRange = 0; + protected int activeIndex; + + public RangeAwareIntersectVisitor(PointValues.PointTree pointTree, PackedValueRanges packedValueRanges, int maxNumNonZeroRange) { + this.packedValueRanges = packedValueRanges; + this.pointTree = pointTree; + this.maxNumNonZeroRange = maxNumNonZeroRange; + this.activeIndex = packedValueRanges.firstRangeIndex(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); } - } - private static PointValues.IntersectVisitor getIntersectVisitor(RangeCollectorForPointTree collector) { - return new PointValues.IntersectVisitor() { - @Override - public void visit(int docID) { - // this branch should be unreachable - throw new UnsupportedOperationException( - "This IntersectVisitor does not perform any actions on a " + "docID=" + docID + " node being visited" - ); + public long getActiveIndex() { + return activeIndex; + } + + public abstract void visit(int docID); + + public abstract void visit(int docID, byte[] packedValue); + + public abstract void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException; + + protected abstract void consumeContainedNode(PointValues.PointTree pointTree) throws IOException; + + protected abstract void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException; + + public void traverse(FilterRewriteOptimizationContext.DebugInfo debug) throws IOException { + PointValues.Relation r = compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + switch (r) { + case CELL_INSIDE_QUERY: + consumeContainedNode(pointTree); + debug.visitInner(); + break; + case CELL_CROSSES_QUERY: + if (pointTree.moveToChild()) { + do { + traverse(debug); + } while (pointTree.moveToSibling()); + pointTree.moveToParent(); + } else { + consumeCrossedNode(pointTree); + debug.visitLeaf(); + } + break; + case CELL_OUTSIDE_QUERY: } + } - @Override - public void visit(int docID, byte[] packedValue) throws IOException { - visitPoints(packedValue, collector::count); + /** + * increment activeIndex until we run out of ranges or find a valid range that contains maxPackedValue + * else throw CollectionTerminatedException if we run out of ranges to check + * @param minPackedValue lower bound of PointValues.PointTree node + * @param maxPackedValue upper bound of PointValues.PointTree node + * @return the min/max values of the PointValues.PointTree node can be one of: + * 1.) Completely outside the activeIndex range + * 2.) Completely inside the activeIndex range + * 3.) Overlapping with the activeIndex range + */ + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + // try to find the first range that may collect values from this cell + if (!packedValueRanges.withinUpperBound(minPackedValue, activeIndex) && iterateRangeEnd(minPackedValue)) { + throw new CollectionTerminatedException(); } - @Override - public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { - visitPoints(packedValue, () -> { - for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { - collector.count(); - } - }); + // after the loop, min < upper + // cell could be outside [min max] lower + if (!packedValueRanges.withinLowerBound(maxPackedValue, activeIndex) && iterateRangeEnd(maxPackedValue)) { + return PointValues.Relation.CELL_OUTSIDE_QUERY; } - private void visitPoints(byte[] packedValue, CheckedRunnable collect) throws IOException { - if (!collector.withinUpperBound(packedValue)) { - collector.finalizePreviousRange(); - if (collector.iterateRangeEnd(packedValue)) { - throw new CollectionTerminatedException(); - } - } + if (packedValueRanges.withinRange(minPackedValue, activeIndex) && packedValueRanges.withinRange(maxPackedValue, activeIndex)) { + return PointValues.Relation.CELL_INSIDE_QUERY; + } + return PointValues.Relation.CELL_CROSSES_QUERY; + } - if (collector.withinRange(packedValue)) { - collect.run(); - } + /** + * throws CollectionTerminatedException if we have reached our last range, and it does not contain packedValue + * @param packedValue determine if packedValue falls within the range at activeIndex + * @return true when packedValue falls within the activeIndex range + */ + protected boolean canCollect(byte[] packedValue) { + if (!packedValueRanges.withinUpperBound(packedValue, activeIndex) && iterateRangeEnd(packedValue)) { + throw new CollectionTerminatedException(); } + return packedValueRanges.withinRange(packedValue, activeIndex); + } - @Override - public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - // try to find the first range that may collect values from this cell - if (!collector.withinUpperBound(minPackedValue)) { - collector.finalizePreviousRange(); - if (collector.iterateRangeEnd(minPackedValue)) { - throw new CollectionTerminatedException(); - } - } - // after the loop, min < upper - // cell could be outside [min max] lower - if (!collector.withinLowerBound(maxPackedValue)) { - return PointValues.Relation.CELL_OUTSIDE_QUERY; - } - if (collector.withinRange(minPackedValue) && collector.withinRange(maxPackedValue)) { - return PointValues.Relation.CELL_INSIDE_QUERY; + /** + * @param packedValue increment active index until we reach a range containing value + * @return true when we've exhausted all available ranges or visited maxNumNonZeroRange and can stop early + */ + protected boolean iterateRangeEnd(byte[] packedValue) { + // the new value may not be contiguous to the previous one + // so try to find the first next range that cross the new value + while (!packedValueRanges.withinUpperBound(packedValue, activeIndex)) { + if (++activeIndex >= packedValueRanges.size) { + return true; } - return PointValues.Relation.CELL_CROSSES_QUERY; } - }; + visitedRange++; + return visitedRange > maxNumNonZeroRange; + } } - private static class RangeCollectorForPointTree { - private final BiConsumer incrementRangeDocCount; - private int counter = 0; - - private final Ranges ranges; - private int activeIndex; - - private int visitedRange = 0; - private final int maxNumNonZeroRange; + /** + * Traverse PointTree with countDocs callback where countDock inputs are + * 1.) activeIndex for range in which document(s) reside + * 2.) total documents counted + */ + public static class DocCountRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + BiConsumer countDocs; - public RangeCollectorForPointTree( - BiConsumer incrementRangeDocCount, + public DocCountRangeAwareIntersectVisitor( + PointValues.PointTree pointTree, + PackedValueRanges packedValueRanges, int maxNumNonZeroRange, - Ranges ranges, - int activeIndex + BiConsumer countDocs ) { - this.incrementRangeDocCount = incrementRangeDocCount; - this.maxNumNonZeroRange = maxNumNonZeroRange; - this.ranges = ranges; - this.activeIndex = activeIndex; + super(pointTree, packedValueRanges, maxNumNonZeroRange); + this.countDocs = countDocs; } - private void count() { - counter++; + @Override + public void visit(int docID) { + countDocs.accept(activeIndex, 1); } - private void countNode(int count) { - counter += count; + @Override + public void visit(int docID, byte[] packedValue) { + if (canCollect(packedValue)) { + countDocs.accept(activeIndex, 1); + } } - private void finalizePreviousRange() { - if (counter > 0) { - incrementRangeDocCount.accept(activeIndex, counter); - counter = 0; + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (canCollect(packedValue)) { + for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { + countDocs.accept(activeIndex, 1); + } } } - /** - * @return true when iterator exhausted or collect enough non-zero ranges - */ - private boolean iterateRangeEnd(byte[] value) { - // the new value may not be contiguous to the previous one - // so try to find the first next range that cross the new value - while (!withinUpperBound(value)) { - if (++activeIndex >= ranges.size) { - return true; - } + protected void consumeContainedNode(PointValues.PointTree pointTree) throws IOException { + countDocs.accept(activeIndex, (int) pointTree.size()); + } + + protected void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocValues(this); + } + } + + /** + * Traverse PointTree with collectDocs callback where collectDocs inputs are + * 1.) activeIndex for range in which document(s) reside + * 2.) document id to collect + */ + public static class DocCollectRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + BiConsumer collectDocs; + + public DocCollectRangeAwareIntersectVisitor( + PointValues.PointTree pointTree, + PackedValueRanges packedValueRanges, + int maxNumNonZeroRange, + BiConsumer collectDocs + ) { + super(pointTree, packedValueRanges, maxNumNonZeroRange); + this.collectDocs = collectDocs; + } + + @Override + public void visit(int docID) { + collectDocs.accept(activeIndex, docID); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (canCollect(packedValue)) { + collectDocs.accept(activeIndex, docID); } - visitedRange++; - return visitedRange > maxNumNonZeroRange; } - private boolean withinLowerBound(byte[] value) { - return Ranges.withinLowerBound(value, ranges.lowers[activeIndex]); + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (canCollect(packedValue)) { + for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { + collectDocs.accept(activeIndex, iterator.docID()); + } + } } - private boolean withinUpperBound(byte[] value) { - return Ranges.withinUpperBound(value, ranges.uppers[activeIndex]); + protected void consumeContainedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocIDs(this); } - private boolean withinRange(byte[] value) { - return withinLowerBound(value) && withinUpperBound(value); + protected void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocValues(this); } } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java index b590a444c8b04..77ef6b231755b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java @@ -9,19 +9,12 @@ package org.opensearch.search.aggregations.bucket.filterrewrite; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumericPointEncoder; import org.opensearch.search.aggregations.bucket.range.RangeAggregator; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; -import java.io.IOException; -import java.util.function.BiConsumer; -import java.util.function.Function; - -import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; - /** * For range aggregation */ @@ -65,32 +58,11 @@ protected void buildRanges(RangeAggregator.Range[] ranges) { uppers[i] = upper; } - setRanges.accept(new Ranges(lowers, uppers)); + setRanges.accept(new PackedValueRanges(lowers, uppers)); } @Override - final Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) { + final PackedValueRanges tryBuildRangesFromSegment(LeafReaderContext leaf) { throw new UnsupportedOperationException("Range aggregation should not build ranges at segment level"); } - - @Override - final FilterRewriteOptimizationContext.DebugInfo tryOptimize( - PointValues values, - BiConsumer incrementDocCount, - Ranges ranges - ) throws IOException { - int size = Integer.MAX_VALUE; - - BiConsumer incrementFunc = (activeIndex, docCount) -> { - long bucketOrd = bucketOrdProducer().apply(activeIndex); - incrementDocCount.accept(bucketOrd, (long) docCount); - }; - - return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size); - } - - /** - * Provides a function to produce bucket ordinals from index of the corresponding range in the range array - */ - protected abstract Function bucketOrdProducer(); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index f3a36b4882d19..80df7a5100343 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -31,6 +31,7 @@ package org.opensearch.search.aggregations.bucket.histogram; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.CollectionTerminatedException; @@ -166,9 +167,7 @@ protected boolean canOptimize() { } @Override - protected void prepare() throws IOException { - buildRanges(context); - } + protected void prepare() throws IOException { buildRanges(context); } @Override protected Rounding getRounding(final long low, final long high) { @@ -200,8 +199,16 @@ protected Prepared getRoundingPrepared() { } @Override - protected Function bucketOrdProducer() { - return (key) -> getBucketOrds().add(0, preparedRounding.round((long) key)); + protected long getOrd(int rangeIdx){ + long rangeStart = LongPoint.decodeDimension(filterRewriteOptimizationContext.getRanges().getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = getBucketOrds().add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; } }; filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); @@ -236,7 +243,7 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBuc return LeafBucketCollector.NO_OP_COLLECTOR; } - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, sub, segmentMatchAll(context, ctx)); if (optimized) throw new CollectionTerminatedException(); final SortedNumericDocValues values = valuesSource.longValues(ctx); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 96a49bc3fd5f6..22f0e2c63b0b6 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -31,6 +31,7 @@ package org.opensearch.search.aggregations.bucket.histogram; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.CollectionTerminatedException; @@ -59,7 +60,6 @@ import java.util.Collections; import java.util.Map; import java.util.function.BiConsumer; -import java.util.function.Function; import static org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll; @@ -126,9 +126,7 @@ protected boolean canOptimize() { } @Override - protected void prepare() throws IOException { - buildRanges(context); - } + protected void prepare() throws IOException { buildRanges(context); } @Override protected Rounding getRounding(long low, long high) { @@ -146,8 +144,16 @@ protected long[] processHardBounds(long[] bounds) { } @Override - protected Function bucketOrdProducer() { - return (key) -> bucketOrds.add(0, preparedRounding.round((long) key)); + protected long getOrd(int rangeIdx){ + long rangeStart = LongPoint.decodeDimension(filterRewriteOptimizationContext.getRanges().getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = bucketOrds.add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; } }; filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); @@ -167,7 +173,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol return LeafBucketCollector.NO_OP_COLLECTOR; } - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, sub, segmentMatchAll(context, ctx)); if (optimized) throw new CollectionTerminatedException(); SortedNumericDocValues values = valuesSource.longValues(ctx); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 17461f228e993..6e3df042bfeec 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -291,11 +291,6 @@ protected boolean canOptimize() { protected void prepare() { buildRanges(ranges); } - - @Override - protected Function bucketOrdProducer() { - return (activeIndex) -> subBucketOrdinal(0, (int) activeIndex); - } }; filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); } @@ -310,7 +305,7 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false); + boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, sub,false); if (optimized) throw new CollectionTerminatedException(); final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);