diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 870c6be2e037c..7adcf6230b54e 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -75,7 +75,6 @@ import java.util.function.Function; import java.util.function.LongPredicate; import java.util.function.LongUnaryOperator; -import java.util.logging.Logger; import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder; import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; @@ -87,9 +86,6 @@ * @opensearch.internal */ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator { - - // testing only - will remove - protected Logger logger = Logger.getLogger(GlobalOrdinalsStringTermsAggregator.class.getName()); protected final ResultStrategy resultStrategy; protected final ValuesSource.Bytes.WithOrdinals valuesSource; @@ -164,58 +160,52 @@ public void setWeight(Weight weight) { /** Collects term frequencies for a given field from a LeafReaderContext. @param ctx The LeafReaderContext to collect terms from - @param ords The SortedSetDocValues for the field's ordinals + @param globalOrds The SortedSetDocValues for the field's ordinals @param ordCountConsumer A consumer to accept collected term frequencies - @return A LeafBucketCollector implementation that throws an exception, since collection is complete - @throws IOException If an I/O error occurs during reading */ - LeafBucketCollector termDocFreqCollector(LeafReaderContext ctx, SortedSetDocValues ords, BiConsumer ordCountConsumer) - throws IOException { - // long n0 = System.nanoTime(), n1, n2, n3, n4, n5 = 0; + @return A no-operation LeafBucketCollector implementation, since collection is complete + @throws IOException If an I/O error occurs during reading + */ + LeafBucketCollector termDocFreqCollector( + LeafReaderContext ctx, + SortedSetDocValues globalOrds, + BiConsumer ordCountConsumer + ) throws IOException { if (weight.count(ctx) != ctx.reader().maxDoc()) { // Top-level query does not match all docs in this segment. return null; } - // n1 = System.nanoTime(); - Terms aggTerms = ctx.reader().terms(this.fieldName); - if (aggTerms == null) { + Terms segmentTerms = ctx.reader().terms(this.fieldName); + if (segmentTerms == null) { // Field is not indexed. return null; } - // n2 = System.nanoTime(); + NumericDocValues docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); if (docCountValues.nextDoc() != NO_MORE_DOCS) { // This segment has at least one document with the _doc_count field. return null; } - // n3 = System.nanoTime(); - TermsEnum indexTermsEnum = aggTerms.iterator(); + + TermsEnum indexTermsEnum = segmentTerms.iterator(); BytesRef indexTerm = indexTermsEnum.next(); - TermsEnum ordinalTermsEnum = ords.termsEnum(); - BytesRef ordinalTerm = ordinalTermsEnum.next(); - // n4 = System.nanoTime(); + TermsEnum globalOrdinalTermsEnum = globalOrds.termsEnum(); + BytesRef ordinalTerm = globalOrdinalTermsEnum.next(); + while (indexTerm != null && ordinalTerm != null) { int compare = indexTerm.compareTo(ordinalTerm); if (compare == 0) { - if (acceptedGlobalOrdinals.test(ordinalTermsEnum.ord())) { - ordCountConsumer.accept(ordinalTermsEnum.ord(), indexTermsEnum.docFreq()); + if (acceptedGlobalOrdinals.test(globalOrdinalTermsEnum.ord())) { + ordCountConsumer.accept(globalOrdinalTermsEnum.ord(), indexTermsEnum.docFreq()); } indexTerm = indexTermsEnum.next(); - ordinalTerm = ordinalTermsEnum.next(); + ordinalTerm = globalOrdinalTermsEnum.next(); } else if (compare < 0) { indexTerm = indexTermsEnum.next(); } else { - ordinalTerm = ordinalTermsEnum.next(); + ordinalTerm = globalOrdinalTermsEnum.next(); } - // n5 = System.nanoTime(); } - // logger.info((n1 - n0) + " " + (n2 - n1) + " " + (n3 - n2) + " " + (n4 - n3) + " " + (n5 - n4)); - // return new LeafBucketCollector() { - // @Override - // public void collect(int doc, long owningBucketOrd) { - // throw new CollectionTerminatedException(); - // } - // }; return LeafBucketCollector.NO_OP_COLLECTOR; } @@ -228,10 +218,10 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol LeafBucketCollector termDocFreqCollector = termDocFreqCollector( ctx, globalOrds, - (o, c) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, o), c) + (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, ord), docCount) ); if (termDocFreqCollector != null) { - return termDocFreqCollector; + return null; } } @@ -436,7 +426,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector( ctx, segmentOrds, - (o, c) -> segmentDocCounts.increment(o + 1, c) + (ord, docCount) -> segmentDocCounts.increment(ord + 1, docCount) ); if (termDocFreqCollector != null) { return termDocFreqCollector; diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index cc4ff0e9cd301..4ac30834b743f 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -308,6 +308,9 @@ private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collecto // See please https://github.com/apache/lucene/pull/964 collector.setWeight(weight); leafCollector = collector.getLeafCollector(ctx); + if (leafCollector == null) { + return; + } } catch (CollectionTerminatedException e) { // there is no doc of interest in this reader context // continue with the following leaf