Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable numeric sort optimisation for few numerical sort types #6321

Merged
merged 10 commits into from
Feb 15, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Segment Replication] Fix for peer recovery ([#5344](https://github.com/opensearch-project/OpenSearch/pull/5344))
- Fix weighted shard routing state across search requests([#6004](https://github.com/opensearch-project/OpenSearch/pull/6004))
- [Segment Replication] Fix bug where inaccurate sequence numbers are sent during replication ([#6122](https://github.com/opensearch-project/OpenSearch/pull/6122))
- Enable numeric sort optimisation for few numerical sort types ([#6321](https://github.com/opensearch-project/OpenSearch/pull/6321))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader, executor), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -379,7 +380,6 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size, executor));
QueryPhase.executeInternal(context.withCleanQueryResult(), queryPhaseSearcher);
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
reader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader, executor), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -331,10 +332,10 @@ public void testInOrderScrollOptimization() throws Exception {
assertNull(context.queryResult().terminatedEarly());
assertThat(context.terminateAfter(), equalTo(0));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertProfileData(context, "MatchAllDocsQuery", query -> {
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
Expand All @@ -346,25 +347,22 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size, executor));
QueryPhase.executeInternal(context.withCleanQueryResult().withProfilers(), queryPhaseSearcher);
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
assertThat(collector.getReason(), equalTo("search_terminate_after_count"));
assertThat(collector.getReason(), equalTo("search_top_hits"));
assertThat(collector.getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(1));
assertThat(collector.getProfiledChildren().get(0).getReason(), equalTo("search_top_hits"));
assertThat(collector.getProfiledChildren().get(0).getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(0));
});

reader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,76 @@ public abstract class IndexNumericFieldData implements IndexFieldData<LeafNumeri
* @opensearch.internal
*/
public enum NumericType {
BOOLEAN(false, SortField.Type.LONG, CoreValuesSourceType.BOOLEAN),
BYTE(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
SHORT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
INT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC),
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC);
BOOLEAN(false, SortField.Type.LONG, CoreValuesSourceType.BOOLEAN) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
BYTE(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
SHORT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
INT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE) {
@Deprecated
@Override
public PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
};

private final boolean floatingPoint;
private final ValuesSourceType valuesSourceType;
Expand All @@ -93,6 +153,24 @@ public final boolean isFloatingPoint() {
public final ValuesSourceType getValuesSourceType() {
return valuesSourceType;
}

@Deprecated
protected abstract PointSortOptimization applyPointSortOptimization();
}

/**
* Controls whether to apply sort optimization to skip non-competitive docs
* based on the BKD index.
*
* @deprecated this control will be removed in a future version of OpenSearch
*
* @opensearch.internal
* @opensearch.experimental
*/
@Deprecated
private enum PointSortOptimization {
ENABLED,
DISABLED
}

/**
Expand Down Expand Up @@ -133,8 +211,21 @@ public final SortField sortField(
: SortedNumericSelector.Type.MIN;
SortField sortField = new SortedNumericSortField(getFieldName(), getNumericType().sortFieldType, reverse, selectorType);
sortField.setMissingValue(source.missingObject(missingValue, reverse));
// todo: remove since deprecated
sortField.setOptimizeSortWithPoints(false);

// LUCENE-9280 added the ability for collectors to skip non-competitive
// documents when top docs are sorted by other fields different from the _score.
// However, from Lucene 9 onwards, numeric sort optimisation requires the byte size
// for points (BKD index) and doc values (columnar) and SortField.Type to be matched.
// NumericType violates this requirement
// (see: https://github.com/opensearch-project/OpenSearch/issues/2063#issuecomment-1069358826 test failure)
// because it uses the largest byte size (LONG) for the SortField of most types. The section below disables
// the BKD based sort optimization for numeric types whose encoded BYTE size does not match the comparator (LONG)/
// So as of now, we can only enable for DATE, DATE_NANOSECONDS, LONG, DOUBLE.
// BOOLEAN, BYTE, SHORT, INT, HALF_FLOAT, FLOAT (use long for doc values, but fewer for BKD Points)
// todo : Enable other SortField.Type as well, that will require wider change
if (getNumericType().applyPointSortOptimization() == PointSortOptimization.DISABLED) {
sortField.setOptimizeSortWithPoints(false);
}
return sortField;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,11 @@
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.lucene.queries.MinDocQuery;
import org.opensearch.lucene.queries.SearchAfterSortedDocQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
Expand Down Expand Up @@ -201,17 +198,7 @@ static boolean executeInternal(SearchContext searchContext, QueryPhaseSearcher q

} else {
final ScoreDoc after = scrollContext.lastEmittedDoc;
if (returnsDocsInOrder(query, searchContext.sort())) {
// now this gets interesting: since we sort in index-order, we can directly
// skip to the desired doc
if (after != null) {
query = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST)
.add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER)
.build();
}
// ... and stop collecting after ${size} matches
searchContext.terminateAfter(searchContext.size());
} else if (canEarlyTerminate(reader, searchContext.sort())) {
if (canEarlyTerminate(reader, searchContext.sort())) {
// now this gets interesting: since the search sort is a prefix of the index sort, we can directly
// skip to the desired doc
if (after != null) {
Expand Down Expand Up @@ -366,22 +353,6 @@ private static boolean searchWithCollector(
return topDocsFactory.shouldRescore();
}

/**
* Returns true if the provided <code>query</code> returns docs in index order (internal doc ids).
* @param query The query to execute
* @param sf The query sort
*/
private static boolean returnsDocsInOrder(Query query, SortAndFormats sf) {
if (sf == null || Sort.RELEVANCE.equals(sf.sort)) {
// sort by score
// queries that return constant scores will return docs in index
// order since Lucene tie-breaks on the doc id
return query.getClass() == ConstantScoreQuery.class || query.getClass() == MatchAllDocsQuery.class;
} else {
return Sort.INDEXORDER.equals(sf.sort);
}
}

/**
* Returns whether collection within the provided <code>reader</code> can be early-terminated if it sorts
* with <code>sortAndFormats</code>.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -358,7 +359,6 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size));
QueryPhase.executeInternal(context.withCleanQueryResult());
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
reader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ public void testInOrderScrollOptimization() throws Exception {
ScrollContext scrollContext = new ScrollContext();
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader), scrollContext);
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
context.sort(new SortAndFormats(sort, new DocValueFormat[] { DocValueFormat.RAW }));
scrollContext.lastEmittedDoc = null;
scrollContext.maxScore = Float.NaN;
scrollContext.totalHits = null;
Expand All @@ -299,12 +300,11 @@ public void testInOrderScrollOptimization() throws Exception {
QueryPhase.executeInternal(context.withCleanQueryResult().withProfilers());
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertNull(context.queryResult().terminatedEarly());
assertThat(context.terminateAfter(), equalTo(0));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertProfileData(context, "MatchAllDocsQuery", query -> {
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
Expand All @@ -316,25 +316,22 @@ public void testInOrderScrollOptimization() throws Exception {
context.setSearcher(newEarlyTerminationContextSearcher(reader, size));
QueryPhase.executeInternal(context.withCleanQueryResult().withProfilers());
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) numDocs));
assertThat(context.terminateAfter(), equalTo(size));
assertThat(context.queryResult().getTotalHits().value, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().topDocs.scoreDocs[0].doc, greaterThanOrEqualTo(size));
assertProfileData(context, "ConstantScoreQuery", query -> {
assertThat(query.getTimeBreakdown().keySet(), not(empty()));
assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score_count"), equalTo(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight"), greaterThan(0L));
assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight_count"), equalTo(1L));
}, collector -> {
assertThat(collector.getReason(), equalTo("search_terminate_after_count"));
assertThat(collector.getReason(), equalTo("search_top_hits"));
assertThat(collector.getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(1));
assertThat(collector.getProfiledChildren().get(0).getReason(), equalTo("search_top_hits"));
assertThat(collector.getProfiledChildren().get(0).getTime(), greaterThan(0L));
assertThat(collector.getProfiledChildren(), hasSize(0));
});

reader.close();
Expand Down