forked from opensearch-project/neural-search
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix for missing HybridQuery results when concurrent segment search is…
… enabled (opensearch-project#800) * Adding merge logic for multiple collector result case Signed-off-by: Martin Gaievski <[email protected]>
- Loading branch information
1 parent
3e4a2ef
commit 25d2e82
Showing
17 changed files
with
1,015 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryScoreDocsMerger.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.search.query; | ||
|
||
import lombok.AccessLevel; | ||
import lombok.NoArgsConstructor; | ||
import org.apache.lucene.search.ScoreDoc; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
import static org.opensearch.neuralsearch.search.util.HybridSearchResultFormatUtil.isHybridQueryScoreDocElement; | ||
|
||
/** | ||
* Merges two ScoreDoc arrays into one | ||
*/ | ||
@NoArgsConstructor(access = AccessLevel.PACKAGE) | ||
class HybridQueryScoreDocsMerger<T extends ScoreDoc> { | ||
|
||
private static final int MIN_NUMBER_OF_ELEMENTS_IN_SCORE_DOC = 3; | ||
|
||
/** | ||
* Merge two score docs objects, result ScoreDocs[] object will have all hits per sub-query from both original objects. | ||
* Input and output ScoreDocs are in format that is specific to Hybrid Query. This method should not be used for ScoreDocs from | ||
* other query types. | ||
* Logic is based on assumption that hits of every sub-query are sorted by score. | ||
* Method returns new object and doesn't mutate original ScoreDocs arrays. | ||
* @param sourceScoreDocs original score docs from query result | ||
* @param newScoreDocs new score docs that we need to merge into existing scores | ||
* @return merged array of ScoreDocs objects | ||
*/ | ||
public T[] merge(final T[] sourceScoreDocs, final T[] newScoreDocs, final Comparator<T> comparator) { | ||
if (Objects.requireNonNull(sourceScoreDocs, "score docs cannot be null").length < MIN_NUMBER_OF_ELEMENTS_IN_SCORE_DOC | ||
|| Objects.requireNonNull(newScoreDocs, "score docs cannot be null").length < MIN_NUMBER_OF_ELEMENTS_IN_SCORE_DOC) { | ||
throw new IllegalArgumentException("cannot merge top docs because it does not have enough elements"); | ||
} | ||
// we overshoot and preallocate more than we need - length of both top docs combined. | ||
// we will take only portion of the array at the end | ||
List<T> mergedScoreDocs = new ArrayList<>(sourceScoreDocs.length + newScoreDocs.length); | ||
int sourcePointer = 0; | ||
// mark beginning of hybrid query results by start element | ||
mergedScoreDocs.add(sourceScoreDocs[sourcePointer]); | ||
sourcePointer++; | ||
// new pointer is set to 1 as we don't care about it start-stop element | ||
int newPointer = 1; | ||
|
||
while (sourcePointer < sourceScoreDocs.length - 1 && newPointer < newScoreDocs.length - 1) { | ||
// every iteration is for results of one sub-query | ||
mergedScoreDocs.add(sourceScoreDocs[sourcePointer]); | ||
sourcePointer++; | ||
newPointer++; | ||
// simplest case when both arrays have results for sub-query | ||
while (sourcePointer < sourceScoreDocs.length | ||
&& isHybridQueryScoreDocElement(sourceScoreDocs[sourcePointer]) | ||
&& newPointer < newScoreDocs.length | ||
&& isHybridQueryScoreDocElement(newScoreDocs[newPointer])) { | ||
if (comparator.compare(sourceScoreDocs[sourcePointer], newScoreDocs[newPointer]) >= 0) { | ||
mergedScoreDocs.add(sourceScoreDocs[sourcePointer]); | ||
sourcePointer++; | ||
} else { | ||
mergedScoreDocs.add(newScoreDocs[newPointer]); | ||
newPointer++; | ||
} | ||
} | ||
// at least one object got exhausted at this point, now merge all elements from object that's left | ||
while (sourcePointer < sourceScoreDocs.length && isHybridQueryScoreDocElement(sourceScoreDocs[sourcePointer])) { | ||
mergedScoreDocs.add(sourceScoreDocs[sourcePointer]); | ||
sourcePointer++; | ||
} | ||
while (newPointer < newScoreDocs.length && isHybridQueryScoreDocElement(newScoreDocs[newPointer])) { | ||
mergedScoreDocs.add(newScoreDocs[newPointer]); | ||
newPointer++; | ||
} | ||
} | ||
// mark end of hybrid query results by end element | ||
mergedScoreDocs.add(sourceScoreDocs[sourceScoreDocs.length - 1]); | ||
return mergedScoreDocs.toArray((T[]) new ScoreDoc[0]); | ||
} | ||
} |
74 changes: 74 additions & 0 deletions
74
src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.search.query; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import lombok.AccessLevel; | ||
import lombok.RequiredArgsConstructor; | ||
import org.apache.lucene.search.ScoreDoc; | ||
import org.apache.lucene.search.TopDocs; | ||
import org.apache.lucene.search.TotalHits; | ||
import org.opensearch.common.lucene.search.TopDocsAndMaxScore; | ||
|
||
import java.util.Comparator; | ||
import java.util.Objects; | ||
|
||
/** | ||
* Utility class for merging TopDocs and MaxScore across multiple search queries | ||
*/ | ||
@RequiredArgsConstructor(access = AccessLevel.PACKAGE) | ||
class TopDocsMerger { | ||
|
||
private final HybridQueryScoreDocsMerger<ScoreDoc> scoreDocsMerger; | ||
@VisibleForTesting | ||
protected static final Comparator<ScoreDoc> SCORE_DOC_BY_SCORE_COMPARATOR = Comparator.comparing((scoreDoc) -> scoreDoc.score); | ||
/** | ||
* Uses hybrid query score docs merger to merge internal score docs | ||
*/ | ||
static final TopDocsMerger TOP_DOCS_MERGER_TOP_SCORES = new TopDocsMerger(new HybridQueryScoreDocsMerger<>()); | ||
|
||
/** | ||
* Merge TopDocs and MaxScore from multiple search queries into a single TopDocsAndMaxScore object. | ||
* @param source TopDocsAndMaxScore for the original query | ||
* @param newTopDocs TopDocsAndMaxScore for the new query | ||
* @return merged TopDocsAndMaxScore object | ||
*/ | ||
public TopDocsAndMaxScore merge(TopDocsAndMaxScore source, TopDocsAndMaxScore newTopDocs) { | ||
if (Objects.isNull(newTopDocs) || Objects.isNull(newTopDocs.topDocs) || newTopDocs.topDocs.totalHits.value == 0) { | ||
return source; | ||
} | ||
// we need to merge hits per individual sub-query | ||
// format of results in both new and source TopDocs is following | ||
// doc_id | magic_number_1 | ||
// doc_id | magic_number_2 | ||
// ... | ||
// doc_id | magic_number_2 | ||
// ... | ||
// doc_id | magic_number_2 | ||
// ... | ||
// doc_id | magic_number_1 | ||
ScoreDoc[] mergedScoreDocs = scoreDocsMerger.merge( | ||
source.topDocs.scoreDocs, | ||
newTopDocs.topDocs.scoreDocs, | ||
SCORE_DOC_BY_SCORE_COMPARATOR | ||
); | ||
TotalHits mergedTotalHits = getMergedTotalHits(source, newTopDocs); | ||
TopDocsAndMaxScore result = new TopDocsAndMaxScore( | ||
new TopDocs(mergedTotalHits, mergedScoreDocs), | ||
Math.max(source.maxScore, newTopDocs.maxScore) | ||
); | ||
return result; | ||
} | ||
|
||
private TotalHits getMergedTotalHits(TopDocsAndMaxScore source, TopDocsAndMaxScore newTopDocs) { | ||
// merged value is a lower bound - if both are equal_to than merged will also be equal_to, | ||
// otherwise assign greater_than_or_equal | ||
TotalHits.Relation mergedHitsRelation = source.topDocs.totalHits.relation == TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO | ||
|| newTopDocs.topDocs.totalHits.relation == TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO | ||
? TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO | ||
: TotalHits.Relation.EQUAL_TO; | ||
return new TotalHits(source.topDocs.totalHits.value + newTopDocs.topDocs.totalHits.value, mergedHitsRelation); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.