forked from opensearch-project/OpenSearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding new SortedUnsignedLongDocValuesSetQuery to allow for BitIntege…
…r Terms query Signed-off-by: Harsha Vamsi Kalluri <[email protected]>
- Loading branch information
1 parent
670afb4
commit 2cdeb08
Showing
6 changed files
with
324 additions
and
6 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
138 changes: 138 additions & 0 deletions
138
server/src/main/java/org/apache/lucene/document/LongHashSet.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.apache.lucene.document; | ||
|
||
import org.apache.lucene.util.Accountable; | ||
import org.apache.lucene.util.RamUsageEstimator; | ||
import org.apache.lucene.util.packed.PackedInts; | ||
|
||
import java.util.Arrays; | ||
import java.util.Objects; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.LongStream; | ||
|
||
/** Set of longs, optimized for docvalues usage */ | ||
public final class LongHashSet implements Accountable { | ||
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class); | ||
|
||
private static final long MISSING = Long.MIN_VALUE; | ||
|
||
final long[] table; | ||
final int mask; | ||
final boolean hasMissingValue; | ||
final int size; | ||
/** minimum value in the set, or Long.MAX_VALUE for an empty set */ | ||
public final long minValue; | ||
/** maximum value in the set, or Long.MIN_VALUE for an empty set */ | ||
public final long maxValue; | ||
|
||
/** Construct a set. Values must be in sorted order. */ | ||
public LongHashSet(long[] values) { | ||
int tableSize = Math.toIntExact(values.length * 3L / 2); | ||
tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2 | ||
assert tableSize >= values.length * 3L / 2; | ||
table = new long[tableSize]; | ||
Arrays.fill(table, MISSING); | ||
mask = tableSize - 1; | ||
boolean hasMissingValue = false; | ||
int size = 0; | ||
long previousValue = Long.MIN_VALUE; // for assert | ||
for (long value : values) { | ||
if (value == MISSING) { | ||
size += hasMissingValue ? 0 : 1; | ||
hasMissingValue = true; | ||
} else if (add(value)) { | ||
++size; | ||
} | ||
assert value >= previousValue : "values must be provided in sorted order"; | ||
previousValue = value; | ||
} | ||
this.hasMissingValue = hasMissingValue; | ||
this.size = size; | ||
this.minValue = values.length == 0 ? Long.MAX_VALUE : values[0]; | ||
this.maxValue = values.length == 0 ? Long.MIN_VALUE : values[values.length - 1]; | ||
} | ||
|
||
private boolean add(long l) { | ||
assert l != MISSING; | ||
final int slot = Long.hashCode(l) & mask; | ||
for (int i = slot;; i = (i + 1) & mask) { | ||
if (table[i] == MISSING) { | ||
table[i] = l; | ||
return true; | ||
} else if (table[i] == l) { | ||
// already added | ||
return false; | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* check for membership in the set. | ||
* | ||
* <p>You should use {@link #minValue} and {@link #maxValue} to guide/terminate iteration before | ||
* calling this. | ||
*/ | ||
public boolean contains(long l) { | ||
if (l == MISSING) { | ||
return hasMissingValue; | ||
} | ||
final int slot = Long.hashCode(l) & mask; | ||
for (int i = slot;; i = (i + 1) & mask) { | ||
if (table[i] == MISSING) { | ||
return false; | ||
} else if (table[i] == l) { | ||
return true; | ||
} | ||
} | ||
} | ||
|
||
/** returns a stream of all values contained in this set */ | ||
LongStream stream() { | ||
LongStream stream = Arrays.stream(table).filter(v -> v != MISSING); | ||
if (hasMissingValue) { | ||
stream = LongStream.concat(LongStream.of(MISSING), stream); | ||
} | ||
return stream; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table)); | ||
} | ||
|
||
@Override | ||
public boolean equals(Object obj) { | ||
if (obj != null && obj instanceof LongHashSet) { | ||
LongHashSet that = (LongHashSet) obj; | ||
return size == that.size | ||
&& minValue == that.minValue | ||
&& maxValue == that.maxValue | ||
&& mask == that.mask | ||
&& hasMissingValue == that.hasMissingValue | ||
&& Arrays.equals(table, that.table); | ||
} | ||
return false; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]")); | ||
} | ||
|
||
/** number of elements in the set */ | ||
public int size() { | ||
return size; | ||
} | ||
|
||
@Override | ||
public long ramBytesUsed() { | ||
return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table); | ||
} | ||
} |
161 changes: 161 additions & 0 deletions
161
server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.document; | ||
|
||
import org.apache.lucene.document.LongHashSet; | ||
import org.apache.lucene.index.DocValues; | ||
import org.apache.lucene.index.FieldInfo; | ||
import org.apache.lucene.index.LeafReader; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.NumericDocValues; | ||
import org.apache.lucene.index.SortedNumericDocValues; | ||
import org.apache.lucene.search.ConstantScoreScorer; | ||
import org.apache.lucene.search.ConstantScoreWeight; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.MatchNoDocsQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.QueryVisitor; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.apache.lucene.search.Scorer; | ||
import org.apache.lucene.search.TwoPhaseIterator; | ||
import org.apache.lucene.search.Weight; | ||
|
||
import java.io.IOException; | ||
import java.math.BigInteger; | ||
import java.util.Arrays; | ||
import java.util.Objects; | ||
|
||
/** | ||
* The {@link org.apache.lucene.document.SortedNumericDocValuesSetQuery} implementation for unsigned long numeric data type. | ||
* | ||
* @opensearch.internal | ||
*/ | ||
public abstract class SortedUnsignedLongDocValuesSetQuery extends Query { | ||
|
||
private final String field; | ||
private final LongHashSet numbers; | ||
|
||
SortedUnsignedLongDocValuesSetQuery(String field, BigInteger[] numbers) { | ||
this.field = Objects.requireNonNull(field); | ||
Arrays.sort(numbers); | ||
this.numbers = new LongHashSet(Arrays.stream(numbers).mapToLong(n -> n.longValue()).toArray()); | ||
} | ||
|
||
@Override | ||
public String toString(String field) { | ||
return new StringBuilder().append(field).append(": ").append(numbers.toString()).toString(); | ||
} | ||
|
||
@Override | ||
public void visit(QueryVisitor visitor) { | ||
if (visitor.acceptField(field)) { | ||
visitor.visitLeaf(this); | ||
} | ||
} | ||
|
||
@Override | ||
public Query rewrite(IndexSearcher indexSearcher) throws IOException { | ||
if (numbers.size() == 0) { | ||
return new MatchNoDocsQuery(); | ||
} | ||
return super.rewrite(indexSearcher); | ||
} | ||
|
||
@Override | ||
public boolean equals(Object other) { | ||
if (sameClassAs(other) == false) { | ||
return false; | ||
} | ||
SortedUnsignedLongDocValuesSetQuery that = (SortedUnsignedLongDocValuesSetQuery) other; | ||
return field.equals(that.field) && numbers.equals(that.numbers); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(classHash(), field, numbers); | ||
} | ||
|
||
abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; | ||
|
||
@Override | ||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { | ||
return new ConstantScoreWeight(this, boost) { | ||
|
||
@Override | ||
public boolean isCacheable(LeafReaderContext ctx) { | ||
return DocValues.isCacheable(ctx, field); | ||
} | ||
|
||
@Override | ||
public Scorer scorer(LeafReaderContext context) throws IOException { | ||
SortedNumericDocValues values = getValues(context.reader(), field); | ||
if (values == null) { | ||
return null; | ||
} | ||
final NumericDocValues singleton = DocValues.unwrapSingleton(values); | ||
final TwoPhaseIterator iterator; | ||
if (singleton != null) { | ||
iterator = new TwoPhaseIterator(singleton) { | ||
@Override | ||
public boolean matches() throws IOException { | ||
long value = singleton.longValue(); | ||
return Long.compareUnsigned(value, numbers.minValue) >= 0 | ||
&& Long.compareUnsigned(value, numbers.maxValue) <= 0 | ||
&& numbers.contains(value); | ||
} | ||
|
||
@Override | ||
public float matchCost() { | ||
return 5; // 2 comparisions, possible lookup in the set | ||
} | ||
}; | ||
} else { | ||
iterator = new TwoPhaseIterator(values) { | ||
@Override | ||
public boolean matches() throws IOException { | ||
int count = values.docValueCount(); | ||
for (int i = 0; i < count; i++) { | ||
final long value = values.nextValue(); | ||
if (Long.compareUnsigned(value, numbers.minValue) < 0) { | ||
continue; | ||
} else if (Long.compareUnsigned(value, numbers.maxValue) > 0) { | ||
return false; // values are sorted, terminate | ||
} else if (numbers.contains(value)) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
@Override | ||
public float matchCost() { | ||
return 5; // 2 comparisons, possible lookup in the set | ||
} | ||
}; | ||
} | ||
return new ConstantScoreScorer(this, score(), scoreMode, iterator); | ||
} | ||
}; | ||
} | ||
|
||
public static Query newSlowSetQuery(String field, BigInteger... values) { | ||
return new SortedUnsignedLongDocValuesSetQuery(field, values) { | ||
@Override | ||
SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException { | ||
FieldInfo info = reader.getFieldInfos().fieldInfo(field); | ||
if (info == null) { | ||
// Queries have some optimizations when one sub scorer returns null rather | ||
// than a scorer that does not match any documents | ||
return null; | ||
} | ||
return DocValues.getSortedNumeric(reader, field); | ||
} | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters