diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
index f090532aecfc2..75df90e75392b 100644
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -5,8 +5,9 @@
You should use {@link #minValue} and {@link #maxValue} to guide/terminate iteration before + * calling this. + */ + public boolean contains(long l) { + if (l == MISSING) { + return hasMissingValue; + } + final int slot = Long.hashCode(l) & mask; + for (int i = slot;; i = (i + 1) & mask) { + if (table[i] == MISSING) { + return false; + } else if (table[i] == l) { + return true; + } + } + } + + /** returns a stream of all values contained in this set */ + LongStream stream() { + LongStream stream = Arrays.stream(table).filter(v -> v != MISSING); + if (hasMissingValue) { + stream = LongStream.concat(LongStream.of(MISSING), stream); + } + return stream; + } + + @Override + public int hashCode() { + return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table)); + } + + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof LongHashSet) { + LongHashSet that = (LongHashSet) obj; + return size == that.size + && minValue == that.minValue + && maxValue == that.maxValue + && mask == that.mask + && hasMissingValue == that.hasMissingValue + && Arrays.equals(table, that.table); + } + return false; + } + + @Override + public String toString() { + return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]")); + } + + /** number of elements in the set */ + public int size() { + return size; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table); + } +} diff --git a/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java new file mode 100644 index 0000000000000..1cfbbd5ebf2a8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.document; + +import org.apache.lucene.document.LongHashSet; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Objects; + +/** + * The {@link org.apache.lucene.document.SortedNumericDocValuesSetQuery} implementation for unsigned long numeric data type. + * + * @opensearch.internal + */ +public abstract class SortedUnsignedLongDocValuesSetQuery extends Query { + + private final String field; + private final LongHashSet numbers; + + SortedUnsignedLongDocValuesSetQuery(String field, BigInteger[] numbers) { + this.field = Objects.requireNonNull(field); + Arrays.sort(numbers); + this.numbers = new LongHashSet(Arrays.stream(numbers).mapToLong(n -> n.longValue()).toArray()); + } + + @Override + public String toString(String field) { + return new StringBuilder().append(field).append(": ").append(numbers.toString()).toString(); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (numbers.size() == 0) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + SortedUnsignedLongDocValuesSetQuery that = (SortedUnsignedLongDocValuesSetQuery) other; + return field.equals(that.field) && numbers.equals(that.numbers); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, numbers); + } + + abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return DocValues.isCacheable(ctx, field); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + SortedNumericDocValues values = getValues(context.reader(), field); + if (values == null) { + return null; + } + final NumericDocValues singleton = DocValues.unwrapSingleton(values); + final TwoPhaseIterator iterator; + if (singleton != null) { + iterator = new TwoPhaseIterator(singleton) { + @Override + public boolean matches() throws IOException { + long value = singleton.longValue(); + return Long.compareUnsigned(value, numbers.minValue) >= 0 + && Long.compareUnsigned(value, numbers.maxValue) <= 0 + && numbers.contains(value); + } + + @Override + public float matchCost() { + return 5; // 2 comparisions, possible lookup in the set + } + }; + } else { + iterator = new TwoPhaseIterator(values) { + @Override + public boolean matches() throws IOException { + int count = values.docValueCount(); + for (int i = 0; i < count; i++) { + final long value = values.nextValue(); + if (Long.compareUnsigned(value, numbers.minValue) < 0) { + continue; + } else if (Long.compareUnsigned(value, numbers.maxValue) > 0) { + return false; // values are sorted, terminate + } else if (numbers.contains(value)) { + return true; + } + } + return false; + } + + @Override + public float matchCost() { + return 5; // 2 comparisons, possible lookup in the set + } + }; + } + return new ConstantScoreScorer(this, score(), scoreMode, iterator); + } + }; + } + + public static Query newSlowSetQuery(String field, BigInteger... values) { + return new SortedUnsignedLongDocValuesSetQuery(field, values) { + @Override + SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException { + FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + // Queries have some optimizations when one sub scorer returns null rather + // than a scorer that does not match any documents + return null; + } + return DocValues.getSortedNumeric(reader, field); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index e77cbb816b9c4..7e35e561b898e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -62,6 +62,7 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; +import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.IndexNumericFieldData.NumericType; import org.opensearch.index.fielddata.plain.SortedNumericIndexFieldData; @@ -1018,11 +1019,11 @@ public Query termsQuery(String field, List