diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml index f090532aecfc2..75df90e75392b 100644 --- a/.idea/inspectionProfiles/Project_Default.xml +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -5,8 +5,9 @@ + - + \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/350_number_field_term.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/350_number_field_term.yml index e49f1f17d8abd..2bd60d606b5dd 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/350_number_field_term.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/350_number_field_term.yml @@ -58,7 +58,7 @@ setup: integer: 1 long: 1 short: 1 - unsigned_long: 1 + unsigned_long: 10223372036854775807 - do: headers: @@ -74,7 +74,7 @@ setup: integer: 1 long: 1 short: 1 - unsigned_long: 1 + unsigned_long: 10223372036854775807 - do: @@ -91,7 +91,7 @@ setup: integer: 1 long: 1 short: 1 - unsigned_long: 1 + unsigned_long: 10223372036854775807 - do: diff --git a/server/src/main/java/org/apache/lucene/document/LongHashSet.java b/server/src/main/java/org/apache/lucene/document/LongHashSet.java new file mode 100644 index 0000000000000..fde20466042cd --- /dev/null +++ b/server/src/main/java/org/apache/lucene/document/LongHashSet.java @@ -0,0 +1,138 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.document; + +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts; + +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +/** Set of longs, optimized for docvalues usage */ +public final class LongHashSet implements Accountable { + private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class); + + private static final long MISSING = Long.MIN_VALUE; + + final long[] table; + final int mask; + final boolean hasMissingValue; + final int size; + /** minimum value in the set, or Long.MAX_VALUE for an empty set */ + public final long minValue; + /** maximum value in the set, or Long.MIN_VALUE for an empty set */ + public final long maxValue; + + /** Construct a set. Values must be in sorted order. */ + public LongHashSet(long[] values) { + int tableSize = Math.toIntExact(values.length * 3L / 2); + tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2 + assert tableSize >= values.length * 3L / 2; + table = new long[tableSize]; + Arrays.fill(table, MISSING); + mask = tableSize - 1; + boolean hasMissingValue = false; + int size = 0; + long previousValue = Long.MIN_VALUE; // for assert + for (long value : values) { + if (value == MISSING) { + size += hasMissingValue ? 0 : 1; + hasMissingValue = true; + } else if (add(value)) { + ++size; + } + assert value >= previousValue : "values must be provided in sorted order"; + previousValue = value; + } + this.hasMissingValue = hasMissingValue; + this.size = size; + this.minValue = values.length == 0 ? Long.MAX_VALUE : values[0]; + this.maxValue = values.length == 0 ? Long.MIN_VALUE : values[values.length - 1]; + } + + private boolean add(long l) { + assert l != MISSING; + final int slot = Long.hashCode(l) & mask; + for (int i = slot;; i = (i + 1) & mask) { + if (table[i] == MISSING) { + table[i] = l; + return true; + } else if (table[i] == l) { + // already added + return false; + } + } + } + + /** + * check for membership in the set. + * + *

You should use {@link #minValue} and {@link #maxValue} to guide/terminate iteration before + * calling this. + */ + public boolean contains(long l) { + if (l == MISSING) { + return hasMissingValue; + } + final int slot = Long.hashCode(l) & mask; + for (int i = slot;; i = (i + 1) & mask) { + if (table[i] == MISSING) { + return false; + } else if (table[i] == l) { + return true; + } + } + } + + /** returns a stream of all values contained in this set */ + LongStream stream() { + LongStream stream = Arrays.stream(table).filter(v -> v != MISSING); + if (hasMissingValue) { + stream = LongStream.concat(LongStream.of(MISSING), stream); + } + return stream; + } + + @Override + public int hashCode() { + return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table)); + } + + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof LongHashSet) { + LongHashSet that = (LongHashSet) obj; + return size == that.size + && minValue == that.minValue + && maxValue == that.maxValue + && mask == that.mask + && hasMissingValue == that.hasMissingValue + && Arrays.equals(table, that.table); + } + return false; + } + + @Override + public String toString() { + return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]")); + } + + /** number of elements in the set */ + public int size() { + return size; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table); + } +} diff --git a/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java new file mode 100644 index 0000000000000..1cfbbd5ebf2a8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.document; + +import org.apache.lucene.document.LongHashSet; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Objects; + +/** + * The {@link org.apache.lucene.document.SortedNumericDocValuesSetQuery} implementation for unsigned long numeric data type. + * + * @opensearch.internal + */ +public abstract class SortedUnsignedLongDocValuesSetQuery extends Query { + + private final String field; + private final LongHashSet numbers; + + SortedUnsignedLongDocValuesSetQuery(String field, BigInteger[] numbers) { + this.field = Objects.requireNonNull(field); + Arrays.sort(numbers); + this.numbers = new LongHashSet(Arrays.stream(numbers).mapToLong(n -> n.longValue()).toArray()); + } + + @Override + public String toString(String field) { + return new StringBuilder().append(field).append(": ").append(numbers.toString()).toString(); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (numbers.size() == 0) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + SortedUnsignedLongDocValuesSetQuery that = (SortedUnsignedLongDocValuesSetQuery) other; + return field.equals(that.field) && numbers.equals(that.numbers); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, numbers); + } + + abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return DocValues.isCacheable(ctx, field); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + SortedNumericDocValues values = getValues(context.reader(), field); + if (values == null) { + return null; + } + final NumericDocValues singleton = DocValues.unwrapSingleton(values); + final TwoPhaseIterator iterator; + if (singleton != null) { + iterator = new TwoPhaseIterator(singleton) { + @Override + public boolean matches() throws IOException { + long value = singleton.longValue(); + return Long.compareUnsigned(value, numbers.minValue) >= 0 + && Long.compareUnsigned(value, numbers.maxValue) <= 0 + && numbers.contains(value); + } + + @Override + public float matchCost() { + return 5; // 2 comparisions, possible lookup in the set + } + }; + } else { + iterator = new TwoPhaseIterator(values) { + @Override + public boolean matches() throws IOException { + int count = values.docValueCount(); + for (int i = 0; i < count; i++) { + final long value = values.nextValue(); + if (Long.compareUnsigned(value, numbers.minValue) < 0) { + continue; + } else if (Long.compareUnsigned(value, numbers.maxValue) > 0) { + return false; // values are sorted, terminate + } else if (numbers.contains(value)) { + return true; + } + } + return false; + } + + @Override + public float matchCost() { + return 5; // 2 comparisons, possible lookup in the set + } + }; + } + return new ConstantScoreScorer(this, score(), scoreMode, iterator); + } + }; + } + + public static Query newSlowSetQuery(String field, BigInteger... values) { + return new SortedUnsignedLongDocValuesSetQuery(field, values) { + @Override + SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException { + FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + // Queries have some optimizations when one sub scorer returns null rather + // than a scorer that does not match any documents + return null; + } + return DocValues.getSortedNumeric(reader, field); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index e77cbb816b9c4..7e35e561b898e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -62,6 +62,7 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; +import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.IndexNumericFieldData.NumericType; import org.opensearch.index.fielddata.plain.SortedNumericIndexFieldData; @@ -1018,11 +1019,11 @@ public Query termsQuery(String field, List values, boolean hasDocvalues, if (isSearchable && hasDocvalues) { Query query = BigIntegerPoint.newSetQuery(field, v); - Query dvQuery = SortedNumericDocValuesField.newSlowSetQuery(field, points); + Query dvQuery = SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery(field, v); return new IndexOrDocValuesQuery(query, dvQuery); } if (hasDocvalues) { - return SortedNumericDocValuesField.newSlowSetQuery(field, points); + return SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery(field, v); } return BigIntegerPoint.newSetQuery(field, v); } diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index a6d01adf18865..af852b12e7a30 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -66,6 +66,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; +import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.MappedFieldType.Relation; import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType; @@ -413,6 +414,22 @@ public void testUnsignedLongRangeQuery() { assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } + public void testUnsignedLongTermsQuery() { + MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.UNSIGNED_LONG); + Query expected = new IndexOrDocValuesQuery( + BigIntegerPoint.newSetQuery("field", BigInteger.valueOf(1), BigInteger.valueOf(3)), + SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery("field", BigInteger.valueOf(1), BigInteger.valueOf(3)) + ); + assertEquals(expected, ft.termsQuery(List.of("1", "3"), MOCK_QSC)); + + MappedFieldType unsearchable = unsearchable(); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.termsQuery(List.of("1", "3"), MOCK_QSC) + ); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); + } + public void testDoubleRangeQuery() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.DOUBLE); Query expected = new IndexOrDocValuesQuery(