Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Sort] Sending missing values in comparator instead sending null #11196

Merged
merged 2 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,341 @@
- length: { hits.hits: 1 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.population: null }

---
"numeric skipping logic with competitive missing value":
- skip:
version: " - 2.12.99"
reason: newly added test, supported from 3.0.0

# This test checks if skipping logic is skipped in case missing values are competitive
# for all numeric type int, long, float, half_float, double, unsigned_long.
# We are inserting 24 documents with some missing values and giving search after parameter
# as missing value. The secondary sort field is on id which doesn't have missing value.
# In case skipping logic is applied in Lucene, it will skipp all documents with primary sort field
# missing value even though it should list sort by secondary field id with missing value primary field.
# This test is addressing bugs like here https://github.com/opensearch-project/OpenSearch/issues/9537

- do:
indices.create:
index: test
body:
mappings:
properties:
halffloat:
type: half_float
long:
type: long
int:
type: integer
float:
type: float
double:
type: double
unsignedlong:
type: unsigned_long
- do:
bulk:
refresh: true
index: test
body: |
{"index":{}}
{"id": 1, "halffloat": 1, "long": 1, "int": 1, "float": 1, "double": 1, "unsignedlong": 1}
{"index":{}}
{"id": 2, "halffloat": 2, "long": 2, "int": 2, "float": 2, "double": 2, "unsignedlong": 2}
{"index":{}}
{"id": 3, "halffloat": 3, "long": 3, "int": 3, "float": 3, "double": 3, "unsignedlong": 3}
{"index":{}}
{"id": 4, "halffloat": 4, "long": 4, "int": 4, "float": 4, "double": 4, "unsignedlong": 4}
{"index":{}}
{"id": 5, "halffloat": 5, "long": 5, "int": 5, "float": 5, "double": 5, "unsignedlong": 5}
{"index":{}}
{"id": 6, "halffloat": 6, "long": 6, "int": 6, "float": 6, "double": 6, "unsignedlong": 6}
{"index":{}}
{"id": 7, "halffloat": 7, "long": 7, "int": 7, "float": 7, "double": 7, "unsignedlong": 7}
{"index":{}}
{"id": 8, "halffloat": 8, "long": 8, "int": 8, "float": 8, "double": 8, "unsignedlong": 8}
{"index":{}}
{"id": 9, "halffloat": 9, "long": 9, "int": 9, "float": 9, "double": 9, "unsignedlong": 9}
{"index":{}}
{"id": 10, "halffloat": 10, "long": 10, "int": 10, "float": 10, "double": 10, "unsignedlong": 10}
{"index":{}}
{"id": 11, "halffloat": 11, "long": 11, "int": 11, "float": 11, "double": 11, "unsignedlong": 11}
{"index":{}}
{"id": 12, "halffloat": 12, "long": 12, "int": 12, "float": 12, "double": 12, "unsignedlong": 12}
{"index":{}}
{"id": 13, "halffloat": 13, "long": 13, "int": 13, "float": 13, "double": 13, "unsignedlong": 13}
{"index":{}}
{"id": 14, "halffloat": 14, "long": 14, "int": 14, "float": 14, "double": 14, "unsignedlong": 14}
{"index":{}}
{"id": 15, "halffloat": 15, "long": 15, "int": 15, "float": 15, "double": 15, "unsignedlong": 15}
{"index":{}}
{"id": 16, "halffloat": 16, "long": 16, "int": 16, "float": 16, "double": 16, "unsignedlong": 16}
{"index":{}}
{"id": 17, "halffloat": 17, "long": 17, "int": 17, "float": 17, "double": 17, "unsignedlong": 17}
{"index":{}}
{"id": 18, "halffloat": 18, "long": 18, "int": 18, "float": 18, "double": 18, "unsignedlong": 18}
{"index":{}}
{"id": 19, "halffloat": 19, "long": 19, "int": 19, "float": 19, "double": 19, "unsignedlong": 19}
{"index":{}}
{"id": 20, "halffloat": 20, "long": 20, "int": 20, "float": 20, "double": 20, "unsignedlong": 20}
{"index":{}}
{"id": 21, "halffloat": null, "long": null, "int": null, "float": null, "double": null, "unsignedlong": null}
{"index":{}}
{"id": 22, "halffloat": null, "long": null, "int": null, "float": null, "double": null, "unsignedlong": null}
{"index":{}}
{"id": 23, "halffloat": null, "long": null, "int": null, "float": null, "double": null, "unsignedlong": null}
{"index":{}}
{"id": 24, "halffloat": null, "long": null, "int": null, "float": null, "double": null, "unsignedlong": null}

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "halffloat": { "order": "asc" } }, { "id": { "order": "asc" } } ]
search_after: [ 200, 0 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.halffloat: null }
- match: { hits.hits.0._source.id: 21 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.halffloat: null }
- match: { hits.hits.1._source.id: 22 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.halffloat: null }
- match: { hits.hits.2._source.id: 23 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "halffloat": { "order": "desc" } }, { "id": { "order": "desc" } } ]
search_after: [ 0, 25 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.halffloat: null }
- match: { hits.hits.0._source.id: 24 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.halffloat: null }
- match: { hits.hits.1._source.id: 23 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.halffloat: null }
- match: { hits.hits.2._source.id: 22 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "long": { "order": "asc" } }, { "id": { "order": "asc" } } ]
search_after: [ 200, 0 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.long: null }
- match: { hits.hits.0._source.id: 21 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.long: null }
- match: { hits.hits.1._source.id: 22 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.long: null }
- match: { hits.hits.2._source.id: 23 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "long": { "order": "desc" } }, { "id": { "order": "desc" } } ]
search_after: [ 0, 25 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.long: null }
- match: { hits.hits.0._source.id: 24 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.long: null }
- match: { hits.hits.1._source.id: 23 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.long: null }
- match: { hits.hits.2._source.id: 22 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "int": { "order": "asc" } }, { "id": { "order": "asc" } } ]
search_after: [ 200, 0 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.int: null }
- match: { hits.hits.0._source.id: 21 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.int: null }
- match: { hits.hits.1._source.id: 22 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.int: null }
- match: { hits.hits.2._source.id: 23 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "int": { "order": "desc" } }, { "id": { "order": "desc" } } ]
search_after: [ 0, 25 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.int: null }
- match: { hits.hits.0._source.id: 24 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.int: null }
- match: { hits.hits.1._source.id: 23 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.int: null }
- match: { hits.hits.2._source.id: 22 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "float": { "order": "asc" } }, { "id": { "order": "asc" } } ]
search_after: [ 200, 0 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.float: null }
- match: { hits.hits.0._source.id: 21 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.float: null }
- match: { hits.hits.1._source.id: 22 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.float: null }
- match: { hits.hits.2._source.id: 23 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "float": { "order": "desc" } }, { "id": { "order": "desc" } } ]
search_after: [ 0, 25 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.float: null }
- match: { hits.hits.0._source.id: 24 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.float: null }
- match: { hits.hits.1._source.id: 23 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.float: null }
- match: { hits.hits.2._source.id: 22 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "double": { "order": "asc" } }, { "id": { "order": "asc" } } ]
search_after: [ 200, 0 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.double: null }
- match: { hits.hits.0._source.id: 21 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.double: null }
- match: { hits.hits.1._source.id: 22 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.double: null }
- match: { hits.hits.2._source.id: 23 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "double": { "order": "desc" } }, { "id": { "order": "desc" } } ]
search_after: [ 0, 25 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.double: null }
- match: { hits.hits.0._source.id: 24 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.double: null }
- match: { hits.hits.1._source.id: 23 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.double: null }
- match: { hits.hits.2._source.id: 22 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "unsignedlong": { "order": "asc" } }, { "id": { "order": "asc" } } ]
search_after: [ 200, 0 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.unsignedlong: null }
- match: { hits.hits.0._source.id: 21 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.unsignedlong: null }
- match: { hits.hits.1._source.id: 22 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.unsignedlong: null }
- match: { hits.hits.2._source.id: 23 }

- do:
search:
index: test
rest_total_hits_as_int: true
body:
"size": 3
"sort": [ { "unsignedlong": { "order": "desc" } }, { "id": { "order": "desc" } } ]
search_after: [ 0, 25 ] # making it out of min/max so only missing value hit is qualified

- match: { hits.total: 24 }
- length: { hits.hits: 3 }
- match: { hits.hits.0._index: test }
- match: { hits.hits.0._source.unsignedlong: null }
- match: { hits.hits.0._source.id: 24 }
- match: { hits.hits.1._index: test }
- match: { hits.hits.1._source.unsignedlong: null }
- match: { hits.hits.1._source.id: 23 }
- match: { hits.hits.2._index: test }
- match: { hits.hits.2._source.unsignedlong: null }
- match: { hits.hits.2._source.id: 22 }
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,7 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, boolean e
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final double dMissingValue = (Double) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new DoubleComparator(numHits, fieldname, null, reversed, enableSkipping && this.enableSkipping) {
return new DoubleComparator(numHits, fieldname, dMissingValue, reversed, enableSkipping && this.enableSkipping) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new DoubleLeafComparator(context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,7 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, boolean e
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final float fMissingValue = (Float) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new FloatComparator(numHits, fieldname, null, reversed, enableSkipping && this.enableSkipping) {
return new FloatComparator(numHits, fieldname, fMissingValue, reversed, enableSkipping && this.enableSkipping) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new FloatLeafComparator(context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,14 @@
public FieldComparator<?> newComparator(String fieldname, int numHits, boolean enableSkipping, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final float fMissingValue = (Float) missingObject(missingValue, reversed);

Check warning on line 48 in server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java#L48

Added line #L48 was not covered by tests
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new HalfFloatComparator(numHits, fieldname, null, reversed, enableSkipping && this.enableSkipping) {
return new HalfFloatComparator(numHits, fieldname, fMissingValue, reversed, enableSkipping && this.enableSkipping) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new HalfFloatLeafComparator(context) {

Check warning on line 52 in server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java#L52

Added line #L52 was not covered by tests
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
return HalfFloatValuesComparatorSource.this.getNumericDocValues(context, fMissingValue).getRawFloatValues();

Check warning on line 55 in server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java#L55

Added line #L55 was not covered by tests
}
};
}
Expand All @@ -62,14 +60,14 @@
}

private NumericDoubleValues getNumericDocValues(LeafReaderContext context, float missingValue) throws IOException {
final SortedNumericDoubleValues values = indexFieldData.load(context).getDoubleValues();

Check warning on line 63 in server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java#L63

Added line #L63 was not covered by tests
if (nested == null) {
return FieldData.replaceMissing(sortMode.select(values), missingValue);

Check warning on line 65 in server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java#L65

Added line #L65 was not covered by tests
} else {
final BitSet rootDocs = nested.rootDocs(context);
final DocIdSetIterator innerDocs = nested.innerDocs(context);

Check warning on line 68 in server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java#L67-L68

Added lines #L67 - L68 were not covered by tests
final int maxChildren = nested.getNestedSort() != null ? nested.getNestedSort().getMaxChildren() : Integer.MAX_VALUE;
return sortMode.select(values, missingValue, rootDocs, innerDocs, context.reader().maxDoc(), maxChildren);

Check warning on line 70 in server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/HalfFloatValuesComparatorSource.java#L70

Added line #L70 was not covered by tests
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,7 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, boolean e
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final int iMissingValue = (Integer) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new IntComparator(numHits, fieldname, null, reversed, enableSkipping && this.enableSkipping) {
return new IntComparator(numHits, fieldname, iMissingValue, reversed, enableSkipping && this.enableSkipping) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new IntLeafComparator(context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, boolean e
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final long lMissingValue = (Long) missingObject(missingValue, reversed);
// NOTE: it's important to pass null as a missing value in the constructor so that
// the comparator doesn't check docsWithField since we replace missing values in select()
return new LongComparator(numHits, fieldname, null, reversed, enableSkipping && this.enableSkipping) {
return new LongComparator(numHits, fieldname, lMissingValue, reversed, enableSkipping && this.enableSkipping) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
return new LongLeafComparator(context) {
Expand Down
Loading
Loading