Skip to content

Commit

Permalink
Use Max WAND optimizations with ToParentBlockJoinQuery when using Sco…
Browse files Browse the repository at this point in the history
…reMode.Max (apache#13587)
  • Loading branch information
Mikep86 authored Aug 26, 2024
1 parent e850cd1 commit 4e3945e
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 9 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ Optimizations
* GITHUB##13644: Improve NumericComparator competitive iterator logic by comparing the missing value with the top
value even after the hit queue is full (Pan Guixin)

* GITHUB#13587: Use Max WAND optimizations with ToParentBlockJoinQuery when using ScoreMode.Max (Mike Pellegrini)

Changes in runtime behavior
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,17 @@ public Weight createWeight(
.rewrite(new ConstantScoreQuery(childQuery))
.createWeight(searcher, weightScoreMode, 0f);
} else {
// if the score is needed we force the collection mode to COMPLETE because the child query
// cannot skip
// non-competitive documents.
// if the score is needed and the score mode is not max, we force the collection mode to
// COMPLETE because the child query cannot skip non-competitive documents.
// weightScoreMode.needsScores() will always be true here, but keep the check to make the
// logic clearer.
childWeight =
childQuery.createWeight(
searcher, weightScoreMode.needsScores() ? COMPLETE : weightScoreMode, boost);
searcher,
weightScoreMode.needsScores() && childScoreMode != ScoreMode.Max
? COMPLETE
: weightScoreMode,
boost);
}
return new BlockJoinWeight(this, childWeight, parentsFilter, childScoreMode);
}
Expand Down Expand Up @@ -155,6 +160,13 @@ public Scorer get(long leadCost) throws IOException {
public long cost() {
return childScorerSupplier.cost();
}

@Override
public void setTopLevelScoringClause() throws IOException {
if (scoreMode == ScoreMode.Max) {
childScorerSupplier.setTopLevelScoringClause();
}
}
};
}

Expand Down Expand Up @@ -331,7 +343,7 @@ public float getMaxScore(int upTo) throws IOException {

@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
if (scoreMode == ScoreMode.None) {
if (scoreMode == ScoreMode.None || scoreMode == ScoreMode.Max) {
childScorer.setMinCompetitiveScore(minScore);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,26 @@
*/
package org.apache.lucene.search.join;

import static org.apache.lucene.search.ScoreMode.TOP_SCORES;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TermQuery;
Expand Down Expand Up @@ -76,10 +83,9 @@ public void testScoreNone() throws IOException {

Query childQuery = new MatchAllDocsQuery();
ToParentBlockJoinQuery query =
new ToParentBlockJoinQuery(
childQuery, parentsFilter, org.apache.lucene.search.join.ScoreMode.None);
new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.None);

Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1);
Weight weight = searcher.createWeight(searcher.rewrite(query), TOP_SCORES, 1);
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);

Scorer scorer = weight.scorer(context);
Expand Down Expand Up @@ -118,4 +124,119 @@ public void testScoreNone() throws IOException {
reader.close();
dir.close();
}

public void testScoreMax() throws IOException {
try (Directory dir = newDirectory()) {
try (RandomIndexWriter w =
new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig()
.setMergePolicy(
// retain doc id order
newLogMergePolicy(random().nextBoolean())))) {

for (String[][] values :
Arrays.asList(
new String[][] {{"A", "B"}, {"A", "B", "C"}},
new String[][] {{"A"}, {"B"}},
new String[][] {{}},
new String[][] {{"A", "B", "C"}, {"A", "B", "C", "D"}},
new String[][] {{"B"}},
new String[][] {{"B", "C"}, {"A", "B"}, {"A", "C"}})) {

List<Document> docs = new ArrayList<>();
for (String[] value : values) {
Document childDoc = new Document();
childDoc.add(newStringField("type", "child", Field.Store.NO));
for (String v : value) {
childDoc.add(newStringField("value", v, Field.Store.NO));
}
docs.add(childDoc);
}

Document parentDoc = new Document();
parentDoc.add(newStringField("type", "parent", Field.Store.NO));
docs.add(parentDoc);

w.addDocuments(docs);
}

w.forceMerge(1);
}

try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);

BooleanQuery childQuery =
new BooleanQuery.Builder()
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("value", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("value", "B"))),
BooleanClause.Occur.SHOULD)
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("value", "C"))), 3),
BooleanClause.Occur.SHOULD)
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("value", "D"))), 4),
BooleanClause.Occur.SHOULD)
.build();
BitSetProducer parentsFilter =
new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
ToParentBlockJoinQuery parentQuery =
new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Max);

Weight weight = searcher.createWeight(searcher.rewrite(parentQuery), TOP_SCORES, 1);
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
ss.setTopLevelScoringClause();
Scorer scorer = ss.get(Long.MAX_VALUE);

assertEquals(2, scorer.iterator().nextDoc());
assertEquals(2 + 1 + 3, scorer.score(), 0);

assertEquals(5, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);

assertEquals(10, scorer.iterator().nextDoc());
assertEquals(2 + 1 + 3 + 4, scorer.score(), 0);

assertEquals(12, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);

assertEquals(16, scorer.iterator().nextDoc());
assertEquals(2 + 3, scorer.score(), 0);

assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
ss.setTopLevelScoringClause();
scorer = ss.get(Long.MAX_VALUE);
scorer.setMinCompetitiveScore(6);

assertEquals(2, scorer.iterator().nextDoc());
assertEquals(2 + 1 + 3, scorer.score(), 0);

assertEquals(10, scorer.iterator().nextDoc());
assertEquals(2 + 1 + 3 + 4, scorer.score(), 0);

assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
ss.setTopLevelScoringClause();
scorer = ss.get(Long.MAX_VALUE);

assertEquals(2, scorer.iterator().nextDoc());
assertEquals(2 + 1 + 3, scorer.score(), 0);

scorer.setMinCompetitiveScore(11);

assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
}

0 comments on commit 4e3945e

Please sign in to comment.