From b7645d286cfffb6c8f27506228afb47379201c12 Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Mon, 26 Feb 2024 15:44:18 -0800 Subject: [PATCH] Increase code coverage and minor refactoring Signed-off-by: Sandesh Kumar --- .../GlobalOrdinalsStringTermsAggregator.java | 23 +++-- .../terms/KeywordTermsAggregatorTests.java | 95 ++++++++++++++----- .../bucket/terms/TermsAggregatorTests.java | 30 ++---- .../aggregations/AggregatorTestCase.java | 50 +++++++++- 4 files changed, 137 insertions(+), 61 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 39aae29a36e5f..2a83710117e74 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -158,7 +158,7 @@ public void setWeight(Weight weight) { } /** - Collects term frequencies for a given field from a LeafReaderContext. + Collects term frequencies for a given field from a LeafReaderContext directly from stored segment terms @param ctx The LeafReaderContext to collect terms from @param globalOrds The SortedSetDocValues for the field's ordinals @param ordCountConsumer A consumer to accept collected term frequencies @@ -170,15 +170,18 @@ LeafBucketCollector termDocFreqCollector( SortedSetDocValues globalOrds, BiConsumer ordCountConsumer ) throws IOException { - if (weight == null || weight.count(ctx) != ctx.reader().maxDoc()) { - // weight.count(ctx) == ctx.reader().maxDoc() implies there are no deleted documents and - // top-level query matches all docs in the segment + if (weight == null) { + // Weight not assigned - cannot use this optimization return null; - } - - if (weight.count(ctx) == 0) { - // No documents matches top level query on this segment, we can skip the segment - return LeafBucketCollector.NO_OP_COLLECTOR; + } else { + if (weight.count(ctx) == 0) { + // No documents matches top level query on this segment, we can skip the segment entirely + return LeafBucketCollector.NO_OP_COLLECTOR; + } else if (weight.count(ctx) != ctx.reader().maxDoc()) { + // weight.count(ctx) == ctx.reader().maxDoc() implies there are no deleted documents and + // top-level query matches all docs in the segment + return null; + } } Terms segmentTerms = ctx.reader().terms(this.fieldName); @@ -198,6 +201,8 @@ LeafBucketCollector termDocFreqCollector( TermsEnum globalOrdinalTermsEnum = globalOrds.termsEnum(); BytesRef ordinalTerm = globalOrdinalTermsEnum.next(); + // Iterate over the terms in the segment, look for matches in the global ordinal terms, + // and increment bucket count when segment terms match global ordinal terms. while (indexTerm != null && ordinalTerm != null) { int compare = indexTerm.compareTo(ordinalTerm); if (compare == 0) { diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java index 4229361aa7f46..5d1e02116f189 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java @@ -42,8 +42,10 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.util.BytesRef; +import org.opensearch.common.TriConsumer; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorTestCase; import org.opensearch.search.aggregations.support.ValueType; @@ -68,61 +70,103 @@ public class KeywordTermsAggregatorTests extends AggregatorTestCase { dataset = d; } + private static Consumer VERIFY_MATCH_ALL_DOCS = agg -> { + assertEquals(9, agg.getBuckets().size()); + for (int i = 0; i < 9; i++) { + StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i); + assertThat(bucket.getKey(), equalTo(String.valueOf(9L - i))); + assertThat(bucket.getDocCount(), equalTo(9L - i)); + } + }; + + private static Query MATCH_ALL_DOCS_QUERY = new MatchAllDocsQuery(); + + private static Query MATCH_NO_DOCS_QUERY = new MatchNoDocsQuery(); + public void testMatchNoDocs() throws IOException { testSearchCase( - new MatchNoDocsQuery(), + ADD_SORTED_FIELD_NO_STORE, + MATCH_NO_DOCS_QUERY, dataset, aggregation -> aggregation.field(KEYWORD_FIELD), agg -> assertEquals(0, agg.getBuckets().size()), - null // without type hint + null, // without type hint + DEFAULT_POST_COLLECTION ); testSearchCase( - new MatchNoDocsQuery(), + ADD_SORTED_FIELD_NO_STORE, + MATCH_NO_DOCS_QUERY, dataset, aggregation -> aggregation.field(KEYWORD_FIELD), agg -> assertEquals(0, agg.getBuckets().size()), - ValueType.STRING // with type hint + ValueType.STRING, // with type hint + DEFAULT_POST_COLLECTION ); } public void testMatchAllDocs() throws IOException { - Query query = new MatchAllDocsQuery(); - - testSearchCase(query, dataset, aggregation -> aggregation.field(KEYWORD_FIELD), agg -> { - assertEquals(9, agg.getBuckets().size()); - for (int i = 0; i < 9; i++) { - StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i); - assertThat(bucket.getKey(), equalTo(String.valueOf(9L - i))); - assertThat(bucket.getDocCount(), equalTo(9L - i)); - } - }, - null // without type hint + testSearchCase( + ADD_SORTED_FIELD_NO_STORE, + MATCH_ALL_DOCS_QUERY, + dataset, + aggregation -> aggregation.field(KEYWORD_FIELD), + VERIFY_MATCH_ALL_DOCS, + null, // without type hint + DEFAULT_POST_COLLECTION ); - testSearchCase(query, dataset, aggregation -> aggregation.field(KEYWORD_FIELD), agg -> { - assertEquals(9, agg.getBuckets().size()); - for (int i = 0; i < 9; i++) { - StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i); - assertThat(bucket.getKey(), equalTo(String.valueOf(9L - i))); - assertThat(bucket.getDocCount(), equalTo(9L - i)); - } - }, - ValueType.STRING // with type hint + testSearchCase( + ADD_SORTED_FIELD_NO_STORE, + MATCH_ALL_DOCS_QUERY, + dataset, + aggregation -> aggregation.field(KEYWORD_FIELD), + VERIFY_MATCH_ALL_DOCS, + ValueType.STRING, // with type hint + DEFAULT_POST_COLLECTION + ); + } + + public void testMatchAllDocsWithStoredValues() throws IOException { + // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used, + // therefore using NOOP_POST_COLLECTION + // This also verifies that the bucket count is completed without running postCollection() + + testSearchCase( + ADD_SORTED_FIELD_STORE, + MATCH_ALL_DOCS_QUERY, + dataset, + aggregation -> aggregation.field(KEYWORD_FIELD), + VERIFY_MATCH_ALL_DOCS, + null, // without type hint + NOOP_POST_COLLECTION + ); + + testSearchCase( + ADD_SORTED_FIELD_STORE, + MATCH_ALL_DOCS_QUERY, + dataset, + aggregation -> aggregation.field(KEYWORD_FIELD), + VERIFY_MATCH_ALL_DOCS, + ValueType.STRING, // with type hint + NOOP_POST_COLLECTION ); } private void testSearchCase( + TriConsumer addField, Query query, List dataset, Consumer configure, Consumer verify, - ValueType valueType + ValueType valueType, + Consumer postCollectionConsumer ) throws IOException { try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { Document document = new Document(); for (String value : dataset) { + addField.apply(document, KEYWORD_FIELD, value); document.add(new SortedSetDocValuesField(KEYWORD_FIELD, new BytesRef(value))); indexWriter.addDocument(document); document.clear(); @@ -147,5 +191,4 @@ private void testSearchCase( } } } - } diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 91fc3b54faa3f..93939657b6981 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -144,18 +144,6 @@ public class TermsAggregatorTests extends AggregatorTestCase { private static final String STRING_SCRIPT_NAME = "string_script"; private static final String STRING_SCRIPT_OUTPUT = "Orange"; - private static final Consumer DEFAULT_POST_COLLECTION = termsAggregator -> { - try { - termsAggregator.postCollection(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }; - - // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used. - // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection() - private static final Consumer NOOP_POST_COLLECTION = termsAggregator -> {}; - @Override protected MapperService mapperServiceMock() { MapperService mapperService = mock(MapperService.class); @@ -275,10 +263,7 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception { * In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization */ public void testSimpleAggregation() throws Exception { - testSimple( - (document, field, value) -> document.add(new SortedSetDocValuesField(field, new BytesRef(value))), - DEFAULT_POST_COLLECTION - ); + testSimple(ADD_SORTED_FIELD_NO_STORE, DEFAULT_POST_COLLECTION); } /** @@ -286,20 +271,17 @@ public void testSimpleAggregation() throws Exception { * In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization */ public void testSimpleAggregationWithStoredValues() throws Exception { - // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used. + // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used, + // therefore using NOOP_POST_COLLECTION // This also verifies that the bucket count is completed without running postCollection() - testSimple((document, field, value) -> { - document.add(new SortedSetDocValuesField(field, new BytesRef(value))); - document.add(new StringField(field, value, Field.Store.NO)); - }, NOOP_POST_COLLECTION); - + testSimple(ADD_SORTED_FIELD_STORE, NOOP_POST_COLLECTION); } /** * This is a utility method to test out string terms aggregation * @param addFieldConsumer a function that determines how a field is added to the document */ - private void testSimple(TriConsumer addFieldConsumer, Consumer postCollectionConsumer) + private void testSimple(TriConsumer addFieldConsumer, Consumer postCollectionConsumer) throws Exception { try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { @@ -374,7 +356,7 @@ public void testStringIncludeExcludeWithStoredValues() throws Exception { }, NOOP_POST_COLLECTION); } - private void testStringIncludeExclude(TriConsumer addField, Consumer postCollectionConsumer) + private void testStringIncludeExclude(TriConsumer addField, Consumer postCollectionConsumer) throws Exception { try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index ac0447dbebf7e..ec928e1122bb3 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -34,11 +34,13 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.LatLonDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.CompositeReaderContext; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -62,6 +64,7 @@ import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.TriConsumer; import org.opensearch.common.TriFunction; import org.opensearch.common.lease.Releasable; import org.opensearch.common.lease.Releasables; @@ -121,6 +124,7 @@ import org.opensearch.search.aggregations.AggregatorFactories.Builder; import org.opensearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer; import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregator; import org.opensearch.search.aggregations.metrics.MetricsAggregator; import org.opensearch.search.aggregations.pipeline.PipelineAggregator; import org.opensearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree; @@ -178,6 +182,26 @@ public abstract class AggregatorTestCase extends OpenSearchTestCase { // A list of field types that should not be tested, or are not currently supported private static List TYPE_TEST_DENYLIST; + protected static final Consumer DEFAULT_POST_COLLECTION = termsAggregator -> { + try { + termsAggregator.postCollection(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + + // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used. + // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection() + protected static final Consumer NOOP_POST_COLLECTION = termsAggregator -> {}; + + protected static final TriConsumer ADD_SORTED_FIELD_NO_STORE = (document, field, value) -> + document.add(new SortedSetDocValuesField(field, new BytesRef(value))); + + protected static final TriConsumer ADD_SORTED_FIELD_STORE = (document, field, value) -> { + document.add(new SortedSetDocValuesField(field, new BytesRef(value))); + document.add(new StringField(field, value, Field.Store.NO)); + }; + static { List denylist = new ArrayList<>(); denylist.add(ObjectMapper.CONTENT_TYPE); // Cannot aggregate objects @@ -484,6 +508,16 @@ protected A searchAndReduc return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, fieldTypes); } + protected A searchAndReduce( + IndexSearcher searcher, + Query query, + AggregationBuilder builder, + Consumer postCollectionConsumer, + MappedFieldType... fieldTypes + ) throws IOException { + return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, postCollectionConsumer, fieldTypes); + } + protected A searchAndReduce( IndexSettings indexSettings, IndexSearcher searcher, @@ -504,6 +538,17 @@ protected A searchAndReduc return searchAndReduce(createIndexSettings(), searcher, query, builder, maxBucket, fieldTypes); } + protected A searchAndReduce( + IndexSettings indexSettings, + IndexSearcher searcher, + Query query, + AggregationBuilder builder, + int maxBucket, + MappedFieldType... fieldTypes + ) throws IOException { + return searchAndReduce(indexSettings, searcher, query, builder, maxBucket, DEFAULT_POST_COLLECTION, fieldTypes); + } + /** * Collects all documents that match the provided query {@link Query} and * returns the reduced {@link InternalAggregation}. @@ -518,6 +563,7 @@ protected A searchAndReduc Query query, AggregationBuilder builder, int maxBucket, + Consumer postCollectionConsumer, MappedFieldType... fieldTypes ) throws IOException { final IndexReaderContext ctx = searcher.getTopReaderContext(); @@ -548,13 +594,13 @@ protected A searchAndReduc a.preCollection(); Weight weight = subSearcher.createWeight(rewritten, ScoreMode.COMPLETE, 1f); subSearcher.search(weight, a); - a.postCollection(); + postCollectionConsumer.accept(a); aggs.add(a.buildTopLevel()); } } else { root.preCollection(); searcher.search(rewritten, root); - root.postCollection(); + postCollectionConsumer.accept(root); aggs.add(root.buildTopLevel()); }