Skip to content

Commit

Permalink
Add an additional tiebreaker to RRF (elastic#101847)
Browse files Browse the repository at this point in the history
This change adds an additional tiebreaker for RRF where when two documents have the same RRF 
"score" such as identical ranks of (3,4) and (4,3) or (1,-) and (-,1), etc. the ordering will fallback to the 
highest score from query 1 then query 2, and so on. If all scores are equal then the tiebreaker will be 
shard index followed by doc id, but these are not necessarily stable. This should resolve most of the 
stability issues outlined as part of (elastic#101232).

Closes elastic#101232
  • Loading branch information
jdconrad authored Nov 7, 2023
1 parent d25435e commit 99b6518
Show file tree
Hide file tree
Showing 4 changed files with 349 additions and 9 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/101847.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 101847
summary: Add an additional tiebreaker to RRF
area: Ranking
type: bug
issues:
- 101232
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,25 @@ protected boolean lessThan(RRFRankDoc a, RRFRankDoc b) {
}
}

// sort the results based on rrf score, tiebreaker based on smaller shard then smaller doc id
// sort the results based on rrf score, tiebreaker based on
// larger individual query score from 1 to n, smaller shard then smaller doc id
RRFRankDoc[] sortedResults = results.values().toArray(RRFRankDoc[]::new);
Arrays.sort(sortedResults, (RRFRankDoc rrf1, RRFRankDoc rrf2) -> {
if (rrf1.score != rrf2.score) {
return rrf1.score < rrf2.score ? 1 : -1;
}
assert rrf1.positions.length == rrf2.positions.length;
for (int qi = 0; qi < rrf1.positions.length; ++qi) {
if (rrf1.positions[qi] != NO_RANK && rrf2.positions[qi] != NO_RANK) {
if (rrf1.scores[qi] != rrf2.scores[qi]) {
return rrf1.scores[qi] < rrf2.scores[qi] ? 1 : -1;
}
} else if (rrf1.positions[qi] != NO_RANK) {
return -1;
} else if (rrf2.positions[qi] != NO_RANK) {
return 1;
}
}
if (rrf1.shardIndex != rrf2.shardIndex) {
return rrf1.shardIndex < rrf2.shardIndex ? -1 : 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import java.util.List;
import java.util.Map;

import static org.elasticsearch.search.rank.RankDoc.NO_RANK;

/**
* Executes queries and generates results on the shard for RRF.
*/
Expand Down Expand Up @@ -74,6 +76,18 @@ public RRFRankShardResult combine(List<TopDocs> rankResults) {
if (rrf1.score != rrf2.score) {
return rrf1.score < rrf2.score ? 1 : -1;
}
assert rrf1.positions.length == rrf2.positions.length;
for (int qi = 0; qi < rrf1.positions.length; ++qi) {
if (rrf1.positions[qi] != NO_RANK && rrf2.positions[qi] != NO_RANK) {
if (rrf1.scores[qi] != rrf2.scores[qi]) {
return rrf1.scores[qi] < rrf2.scores[qi] ? 1 : -1;
}
} else if (rrf1.positions[qi] != NO_RANK) {
return -1;
} else if (rrf2.positions[qi] != NO_RANK) {
return 1;
}
}
return rrf1.doc < rrf2.doc ? -1 : 1;
});
// trim the results to window size
Expand Down
Loading

0 comments on commit 99b6518

Please sign in to comment.