Skip to content

Commit

Permalink
search_after query optimization with shard/segment level short cutting (
Browse files Browse the repository at this point in the history
#7453)

* search_after query optimization with shard/segment level short cutting

Signed-off-by: Chaitanya Gohel <[email protected]>

* Correcting logical && operator

Signed-off-by: Chaitanya Gohel <[email protected]>

* Addressig review comments

Signed-off-by: Chaitanya Gohel <[email protected]>

* Correcting NPE

Signed-off-by: gashutos <[email protected]>

* Fixing failed integ

Signed-off-by: gashutos <[email protected]>

* Addressing review comments and fixing some more integ

Signed-off-by: Chaitanya Gohel <[email protected]>

* Addressing review comments and fixing some more integ

Signed-off-by: Chaitanya Gohel <[email protected]>

* Refactoring a bit to aggressive null checks

Signed-off-by: Chaitanya Gohel <[email protected]>

* Adding unit tests for MinAndMax methods

Signed-off-by: gashutos <[email protected]>

---------

Signed-off-by: Chaitanya Gohel <[email protected]>
Signed-off-by: gashutos <[email protected]>
Signed-off-by: Chaitanya Gohel <[email protected]>
  • Loading branch information
gashutos authored May 23, 2023
1 parent f640c40 commit c659d04
Show file tree
Hide file tree
Showing 8 changed files with 289 additions and 35 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add task cancellation timestamp in task API ([#7455](https://github.com/opensearch-project/OpenSearch/pull/7455))
- Adds ExtensionsManager.lookupExtensionSettingsById ([#7466](https://github.com/opensearch-project/OpenSearch/pull/7466))
- SegRep with Remote: Add hook for publishing checkpoint notifications after segment upload to remote store ([#7394](https://github.com/opensearch-project/OpenSearch/pull/7394))
- Add search_after query optimizations with shard/segment short cutting ([#7453](https://github.com/opensearch-project/OpenSearch/pull/7453))
- Provide mechanism to configure XContent parsing constraints (after update to Jackson 2.15.0 and above) ([#7550](https://github.com/opensearch-project/OpenSearch/pull/7550))
- Support to clear filecache using clear indices cache API ([#7498](https://github.com/opensearch-project/OpenSearch/pull/7498))
- Create NamedRoute to map extension routes to a shortened name ([#6870](https://github.com/opensearch-project/OpenSearch/pull/6870))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@
import org.opensearch.search.rescore.RescoreContext;
import org.opensearch.search.slice.SliceBuilder;
import org.opensearch.search.sort.SortAndFormats;
import org.opensearch.search.sort.SortOrder;
import org.opensearch.search.suggest.SuggestionSearchContext;

import java.io.IOException;
Expand Down Expand Up @@ -212,7 +211,7 @@ final class DefaultSearchContext extends SearchContext {
engineSearcher.getQueryCachingPolicy(),
lowLevelCancellation,
executor,
shouldReverseLeafReaderContexts()
this
);
this.relativeTimeSupplier = relativeTimeSupplier;
this.timeout = timeout;
Expand Down Expand Up @@ -887,22 +886,4 @@ public boolean isCancelled() {
public ReaderContext readerContext() {
return readerContext;
}

private boolean shouldReverseLeafReaderContexts() {
// Time series based workload by default traverses segments in desc order i.e. latest to the oldest order.
// This is actually beneficial for search queries to start search on latest segments first for time series workload.
// That can slow down ASC order queries on timestamp workload. So to avoid that slowdown, we will reverse leaf
// reader order here.
if (this.indexShard.isTimeSeriesIndex()) {
// Only reverse order for asc order sort queries
if (request != null
&& request.source() != null
&& request.source().sorts() != null
&& request.source().sorts().size() > 0
&& request.source().sorts().get(0).order() == SortOrder.ASC) {
return true;
}
}
return false;
}
}
36 changes: 35 additions & 1 deletion server/src/main/java/org/opensearch/search/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
import org.opensearch.search.sort.MinAndMax;
import org.opensearch.search.sort.SortAndFormats;
import org.opensearch.search.sort.SortBuilder;
import org.opensearch.search.sort.SortOrder;
import org.opensearch.search.suggest.Suggest;
import org.opensearch.search.suggest.completion.CompletionSuggestion;
import org.opensearch.threadpool.Scheduler.Cancellable;
Expand Down Expand Up @@ -1525,19 +1526,52 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
final boolean aliasFilterCanMatch = request.getAliasFilter().getQueryBuilder() instanceof MatchNoneQueryBuilder == false;
FieldSortBuilder sortBuilder = FieldSortBuilder.getPrimaryFieldSortOrNull(request.source());
MinAndMax<?> minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null;
final boolean canMatch;
boolean canMatch;
if (canRewriteToMatchNone(request.source())) {
QueryBuilder queryBuilder = request.source().query();
canMatch = aliasFilterCanMatch && queryBuilder instanceof MatchNoneQueryBuilder == false;
} else {
// null query means match_all
canMatch = aliasFilterCanMatch;
}
final FieldDoc searchAfterFieldDoc = getSearchAfterFieldDoc(request, context);
canMatch = canMatch && canMatchSearchAfter(searchAfterFieldDoc, minMax, sortBuilder);

return new CanMatchResponse(canMatch || hasRefreshPending, minMax);
}
}
}

public static boolean canMatchSearchAfter(FieldDoc searchAfter, MinAndMax<?> minMax, FieldSortBuilder primarySortField) {
if (searchAfter != null && minMax != null && primarySortField != null) {
final Object searchAfterPrimary = searchAfter.fields[0];
if (primarySortField.order() == SortOrder.DESC) {
if (minMax.compareMin(searchAfterPrimary) > 0) {
// In Desc order, if segment/shard minimum is gt search_after, the segment/shard won't be competitive
return false;
}
} else {
if (minMax.compareMax(searchAfterPrimary) < 0) {
// In ASC order, if segment/shard maximum is lt search_after, the segment/shard won't be competitive
return false;
}
}
}
return true;
}

private static FieldDoc getSearchAfterFieldDoc(ShardSearchRequest request, QueryShardContext context) throws IOException {
if (context != null && request != null && request.source() != null && request.source().sorts() != null) {
final List<SortBuilder<?>> sorts = request.source().sorts();
final Object[] searchAfter = request.source().searchAfter();
final Optional<SortAndFormats> sortOpt = SortBuilder.buildSort(sorts, context);
if (sortOpt.isPresent() && !CollectionUtils.isEmpty(searchAfter)) {
return SearchAfterBuilder.buildFieldDoc(sortOpt.get(), searchAfter);
}
}
return null;
}

/**
* Returns true iff the given search source builder can be early terminated by rewriting to a match none query. Or in other words
* if the execution of the search request can be early terminated without executing it. This is for instance not possible if
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,17 @@
import org.opensearch.common.lease.Releasable;
import org.opensearch.common.lucene.search.TopDocsAndMaxScore;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.SearchService;
import org.opensearch.search.dfs.AggregatedDfs;
import org.opensearch.search.profile.ContextualProfileBreakdown;
import org.opensearch.search.profile.Timer;
import org.opensearch.search.profile.query.ProfileWeight;
import org.opensearch.search.profile.query.QueryProfiler;
import org.opensearch.search.profile.query.QueryTimingType;
import org.opensearch.search.query.QuerySearchResult;
import org.opensearch.search.sort.FieldSortBuilder;
import org.opensearch.search.sort.MinAndMax;
import org.opensearch.search.sort.SortOrder;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -97,12 +101,7 @@ public class ContextIndexSearcher extends IndexSearcher implements Releasable {
private AggregatedDfs aggregatedDfs;
private QueryProfiler profiler;
private MutableQueryTimeout cancellable;

/**
* Certain queries can benefit if we reverse the segment read order,
* for example time series based queries if searched for desc sort order
*/
private final boolean reverseLeafReaderContexts;
private SearchContext searchContext;

public ContextIndexSearcher(
IndexReader reader,
Expand All @@ -120,7 +119,7 @@ public ContextIndexSearcher(
new MutableQueryTimeout(),
wrapWithExitableDirectoryReader,
executor,
false
null
);
}

Expand All @@ -131,7 +130,7 @@ public ContextIndexSearcher(
QueryCachingPolicy queryCachingPolicy,
boolean wrapWithExitableDirectoryReader,
Executor executor,
boolean reverseLeafReaderContexts
SearchContext searchContext
) throws IOException {
this(
reader,
Expand All @@ -141,7 +140,7 @@ public ContextIndexSearcher(
new MutableQueryTimeout(),
wrapWithExitableDirectoryReader,
executor,
reverseLeafReaderContexts
searchContext
);
}

Expand All @@ -153,14 +152,14 @@ private ContextIndexSearcher(
MutableQueryTimeout cancellable,
boolean wrapWithExitableDirectoryReader,
Executor executor,
boolean reverseLeafReaderContexts
SearchContext searchContext
) throws IOException {
super(wrapWithExitableDirectoryReader ? new ExitableDirectoryReader((DirectoryReader) reader, cancellable) : reader, executor);
setSimilarity(similarity);
setQueryCache(queryCache);
setQueryCachingPolicy(queryCachingPolicy);
this.cancellable = cancellable;
this.reverseLeafReaderContexts = reverseLeafReaderContexts;
this.searchContext = searchContext;
}

public void setProfiler(QueryProfiler profiler) {
Expand Down Expand Up @@ -284,8 +283,10 @@ public void search(

@Override
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
if (reverseLeafReaderContexts) {
if (shouldReverseLeafReaderContexts()) {
// reverse the segment search order if this flag is true.
// Certain queries can benefit if we reverse the segment read order,
// for example time series based queries if searched for desc sort order.
for (int i = leaves.size() - 1; i >= 0; i--) {
searchLeaf(leaves.get(i), weight, collector);
}
Expand All @@ -303,6 +304,12 @@ protected void search(List<LeafReaderContext> leaves, Weight weight, Collector c
* the provided <code>ctx</code>.
*/
private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {

// Check if at all we need to call this leaf for collecting results.
if (canMatch(ctx) == false) {
return;
}

cancellable.checkCancelled();
weight = wrapWeight(weight);
// See please https://github.com/apache/lucene/pull/964
Expand Down Expand Up @@ -478,4 +485,43 @@ public void clear() {
runnables.clear();
}
}

private boolean canMatch(LeafReaderContext ctx) throws IOException {
// skip segments for search after if min/max of them doesn't qualify competitive
return canMatchSearchAfter(ctx);
}

private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException {
if (searchContext != null && searchContext.request() != null && searchContext.request().source() != null) {
// Only applied on primary sort field and primary search_after.
FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(searchContext.request().source());
if (primarySortField != null) {
MinAndMax<?> minMax = FieldSortBuilder.getMinMaxOrNullForSegment(
this.searchContext.getQueryShardContext(),
ctx,
primarySortField
);
return SearchService.canMatchSearchAfter(searchContext.searchAfter(), minMax, primarySortField);
}
}
return true;
}

private boolean shouldReverseLeafReaderContexts() {
// Time series based workload by default traverses segments in desc order i.e. latest to the oldest order.
// This is actually beneficial for search queries to start search on latest segments first for time series workload.
// That can slow down ASC order queries on timestamp workload. So to avoid that slowdown, we will reverse leaf
// reader order here.
if (searchContext != null && searchContext.indexShard().isTimeSeriesIndex()) {
// Only reverse order for asc order sort queries
if (searchContext.request() != null
&& searchContext.request().source() != null
&& searchContext.request().source().sorts() != null
&& searchContext.request().source().sorts().size() > 0
&& searchContext.request().source().sorts().get(0).order() == SortOrder.ASC) {
return true;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Terms;
Expand Down Expand Up @@ -605,17 +606,31 @@ public static FieldSortBuilder getPrimaryFieldSortOrNull(SearchSourceBuilder sou
}

/**
* Return the {@link MinAndMax} indexed value from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
* Return the {@link MinAndMax} indexed value for shard from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
* and configurations return <code>null</code>.
*/
public static MinAndMax<?> getMinMaxOrNull(QueryShardContext context, FieldSortBuilder sortBuilder) throws IOException {
return getMinMaxOrNullInternal(context.getIndexReader(), context, sortBuilder);
}

/**
* Return the {@link MinAndMax} indexed value for segment from the provided {@link FieldSortBuilder} or <code>null</code> if unknown.
* The value can be extracted on non-nested indexed mapped fields of type keyword, numeric or date, other fields
* and configurations return <code>null</code>.
*/
public static MinAndMax<?> getMinMaxOrNullForSegment(QueryShardContext context, LeafReaderContext ctx, FieldSortBuilder sortBuilder)
throws IOException {
return getMinMaxOrNullInternal(ctx.reader(), context, sortBuilder);
}

private static MinAndMax<?> getMinMaxOrNullInternal(IndexReader reader, QueryShardContext context, FieldSortBuilder sortBuilder)
throws IOException {
SortAndFormats sort = SortBuilder.buildSort(Collections.singletonList(sortBuilder), context).get();
SortField sortField = sort.sort.getSort()[0];
if (sortField.getField() == null) {
return null;
}
IndexReader reader = context.getIndexReader();
MappedFieldType fieldType = context.fieldMapper(sortField.getField());
if (reader == null || (fieldType == null || fieldType.isSearchable() == false)) {
return null;
Expand Down
34 changes: 34 additions & 0 deletions server/src/main/java/org/opensearch/search/sort/MinAndMax.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@

package org.opensearch.search.sort;

import org.apache.lucene.util.BytesRef;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.io.stream.StreamOutput;
import org.opensearch.common.io.stream.Writeable;
import org.opensearch.common.lucene.Lucene;

import java.io.IOException;
import java.math.BigInteger;
import java.util.Comparator;
import java.util.Objects;

Expand Down Expand Up @@ -93,4 +95,36 @@ public static Comparator<MinAndMax<?>> getComparator(SortOrder order) {
}
return Comparator.nullsLast(cmp);
}

/**
* Compare given object with min
*/
public int compareMin(Object object) {
return compare(getMin(), object);
}

/**
* Compare given object with max
*/
public int compareMax(Object object) {
return compare(getMax(), object);
}

private int compare(T one, Object two) {
if (one instanceof Long) {
return Long.compare((Long) one, (Long) two);
} else if (one instanceof Integer) {
return Integer.compare((Integer) one, (Integer) two);
} else if (one instanceof Float) {
return Float.compare((Float) one, (Float) two);
} else if (one instanceof Double) {
return Double.compare((Double) one, (Double) two);
} else if (one instanceof BigInteger) {
return ((BigInteger) one).compareTo((BigInteger) two);
} else if (one instanceof BytesRef) {
return ((BytesRef) one).compareTo((BytesRef) two);
} else {
throw new UnsupportedOperationException("compare type not supported : " + one.getClass());
}
}
}
Loading

0 comments on commit c659d04

Please sign in to comment.