Skip to content

Commit

Permalink
Remove HitsThresholdChecker. (#13943)
Browse files Browse the repository at this point in the history
`TopScoreDocCollectorManager` has a dependency on `HitsThresholdChecker`, which
is essentially a shared counter that is incremented until it reaches the total
hits threshold, when the scorer can start dynamically pruning hits.

A consequence of this removal is that dynamic pruning may start later, as soon
as:
 - either the current slice collected `totalHitsThreshold` hits,
 - or another slice collected `totalHitsThreshold` hits and the current slice
   collected enough hits (up to 1,024) to check the shared
   `MaxScoreAccumulator`.

So in short, it exchanges a bit more work globally in favor of a bit less
contention. A longer-term goal of mine is to stop specializing our
`CollectorManager`s based on whether they are going to be used concurrently or
not.
  • Loading branch information
jpountz authored Oct 28, 2024
1 parent 81ab3b9 commit 937432a
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 216 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ Optimizations

* GITHUB#13899: Check ahead if we can get the count. (Lu Xugang)

* GITHUB#13943: Removed shared `HitsThresholdChecker`, which reduces overhead
but may delay a bit when dynamic pruning kicks in. (Adrien Grand)

Bug Fixes
---------------------
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,14 @@ private abstract class TopFieldLeafCollector implements LeafCollector {
}

void countHit(int doc) throws IOException {
++totalHits;
hitsThresholdChecker.incrementHitCount();
int hitCountSoFar = ++totalHits;

if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (scoreMode.isExhaustive() == false
&& totalHitsRelation == TotalHits.Relation.EQUAL_TO
&& hitsThresholdChecker.isThresholdReached()) {
&& totalHits > totalHitsThreshold) {
// for the first time hitsThreshold is reached, notify comparator about this
comparator.setHitsThresholdReached();
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
Expand All @@ -92,7 +91,7 @@ boolean thresholdCheck(int doc) throws IOException {
// this document is larger than anything else in the queue, and
// therefore not competitive.
if (searchSortPartOfIndexSort) {
if (hitsThresholdChecker.isThresholdReached()) {
if (totalHits > totalHitsThreshold) {
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
throw new CollectionTerminatedException();
} else {
Expand Down Expand Up @@ -180,9 +179,9 @@ public SimpleFieldCollector(
Sort sort,
FieldValueHitQueue<Entry> queue,
int numHits,
HitsThresholdChecker hitsThresholdChecker,
int totalHitsThreshold,
MaxScoreAccumulator minScoreAcc) {
super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
super(queue, numHits, totalHitsThreshold, sort.needsScores(), minScoreAcc);
this.sort = sort;
this.queue = queue;
}
Expand Down Expand Up @@ -235,9 +234,9 @@ public PagingFieldCollector(
FieldValueHitQueue<Entry> queue,
FieldDoc after,
int numHits,
HitsThresholdChecker hitsThresholdChecker,
int totalHitsThreshold,
MaxScoreAccumulator minScoreAcc) {
super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
super(queue, numHits, totalHitsThreshold, sort.needsScores(), minScoreAcc);
this.sort = sort;
this.queue = queue;
this.after = after;
Expand Down Expand Up @@ -301,7 +300,7 @@ public void collect(int doc) throws IOException {
private static final ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0];

final int numHits;
final HitsThresholdChecker hitsThresholdChecker;
final int totalHitsThreshold;
final FieldComparator<?> firstComparator;
final boolean canSetMinScore;

Expand All @@ -327,25 +326,25 @@ public void collect(int doc) throws IOException {
private TopFieldCollector(
FieldValueHitQueue<Entry> pq,
int numHits,
HitsThresholdChecker hitsThresholdChecker,
int totalHitsThreshold,
boolean needsScores,
MaxScoreAccumulator minScoreAcc) {
super(pq);
this.needsScores = needsScores;
this.numHits = numHits;
this.hitsThresholdChecker = hitsThresholdChecker;
this.totalHitsThreshold = Math.max(totalHitsThreshold, numHits);
this.numComparators = pq.getComparators().length;
this.firstComparator = pq.getComparators()[0];
int reverseMul = pq.reverseMul[0];

if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
&& totalHitsThreshold != Integer.MAX_VALUE) {
scoreMode = ScoreMode.TOP_SCORES;
canSetMinScore = true;
} else {
canSetMinScore = false;
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
if (totalHitsThreshold != Integer.MAX_VALUE) {
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
} else {
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
Expand All @@ -361,10 +360,10 @@ public ScoreMode scoreMode() {

protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
if (canSetMinScore && hitsThresholdChecker.isThresholdReached()) {
// we can start checking the global maximum score even
// if the local queue is not full because the threshold
// is reached.
if (canSetMinScore) {
// we can start checking the global maximum score even if the local queue is not full or if
// the threshold is not reached on the local competitor: the fact that there is a shared min
// competitive score implies that one of the collectors hit its totalHitsThreshold already
long maxMinScore = minScoreAcc.getRaw();
float score;
if (maxMinScore != Long.MIN_VALUE
Expand All @@ -377,7 +376,7 @@ protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOExcepti
}

protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached()) {
if (canSetMinScore && queueFull && totalHits > totalHitsThreshold) {
assert bottom != null;
float minScore = (float) firstComparator.value(bottom.slot);
if (minScore > minCompetitiveScore) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class TopFieldCollectorManager implements CollectorManager<TopFieldCollec
private final Sort sort;
private final int numHits;
private final FieldDoc after;
private final HitsThresholdChecker hitsThresholdChecker;
private final int totalHitsThreshold;
private final MaxScoreAccumulator minScoreAcc;
private final List<TopFieldCollector> collectors;
private final boolean supportsConcurrency;
Expand Down Expand Up @@ -89,10 +89,7 @@ public TopFieldCollectorManager(
this.numHits = numHits;
this.after = after;
this.supportsConcurrency = supportsConcurrency;
this.hitsThresholdChecker =
supportsConcurrency
? HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits))
: HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits));
this.totalHitsThreshold = totalHitsThreshold;
this.minScoreAcc =
supportsConcurrency && totalHitsThreshold != Integer.MAX_VALUE
? new MaxScoreAccumulator()
Expand Down Expand Up @@ -162,7 +159,7 @@ public TopFieldCollector newCollector() {
}
collector =
new TopFieldCollector.SimpleFieldCollector(
sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
sort, queue, numHits, totalHitsThreshold, minScoreAcc);
} else {
if (after.fields == null) {
throw new IllegalArgumentException(
Expand All @@ -178,7 +175,7 @@ public TopFieldCollector newCollector() {
}
collector =
new TopFieldCollector.PagingFieldCollector(
sort, queue, after, numHits, hitsThresholdChecker, minScoreAcc);
sort, queue, after, numHits, totalHitsThreshold, minScoreAcc);
}

collectors.add(collector);
Expand Down
Loading

0 comments on commit 937432a

Please sign in to comment.