Skip to content

Commit

Permalink
Another round
Browse files Browse the repository at this point in the history
  • Loading branch information
jimczi committed Jun 10, 2024
1 parent 87142ad commit 9c6b06d
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException {
}

@Override
public QueryBuilder originalQuery(QueryBuilder leadQuery) {
public QueryBuilder topDocsQuery(QueryBuilder leadQuery) {
// TODO nested + inner_hits
BoolQueryBuilder ret = new BoolQueryBuilder().must(leadQuery)
.should(new ExactKnnQueryBuilder(knnSearchBuilder.getQueryVector(), knnSearchBuilder.getField()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException {
}

@Override
public QueryBuilder originalQuery(QueryBuilder leadQuery) {
public QueryBuilder topDocsQuery(QueryBuilder leadQuery) {
DisMaxQueryBuilder disMax = new DisMaxQueryBuilder().tieBreaker(0f);
for (var source : sources) {
var query = source.originalQuery(leadQuery);
var query = source.topDocsQuery(leadQuery);
if (query != null) {
if (source.retrieverName != null) {
query.queryName(source.retrieverName);
Expand Down Expand Up @@ -105,7 +105,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
for (var preFilterQueryBuilder : preFilterQueryBuilders) {
bq.filter(preFilterQueryBuilder);
}
QueryBuilder originalQuery = originalQuery(rankQuery);
QueryBuilder originalQuery = topDocsQuery(rankQuery);
if (originalQuery != null) {
bq.should(originalQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
/**
* A retriever represents an API element that returns an ordered list of top
* documents. These can be obtained from a query, from another retriever, etc.
* Internally, a {@link RetrieverBuilder} is just a wrapper for other search
* elements that are extracted into a {@link SearchSourceBuilder}. The advantage
* retrievers have is in the API they appear as a tree-like structure enabling
* Internally, a {@link RetrieverBuilder} is first rewritten into its simplest
* form and then its elements are extracted into a {@link SearchSourceBuilder}.
*
* The advantage retrievers have is in the API they appear as a tree-like structure enabling
* easier reasoning about what a search does.
*
* This is the base class for all other retrievers. This class does not support
Expand Down Expand Up @@ -181,7 +182,7 @@ public List<QueryBuilder> getPreFilterQueryBuilders() {
}

/**
* Determines if this retriever contains sub-retrievers that need to be rewritten into simpler forms.
* Determines if this retriever contains sub-retrievers that need to be executed prior to search.
*/
public boolean isCompound() {
return false;
Expand All @@ -193,10 +194,12 @@ public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException {
}

/**
* Returns the original {@link QueryBuilder} used to compute the top documents.
* @param leadQuery
* This function is called by compound {@link RetrieverBuilder} to return the original query that
* was used by this retriever to compute its top documents.
*
* @param leadQuery The query identifying the top documents of the parent retriever.
*/
public abstract QueryBuilder originalQuery(QueryBuilder leadQuery);
public abstract QueryBuilder topDocsQuery(QueryBuilder leadQuery);

/**
* This method is called at the end of rewrite on the final retriever.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException {
}

@Override
public QueryBuilder originalQuery(QueryBuilder leadQuery) {
public QueryBuilder topDocsQuery(QueryBuilder leadQuery) {
/**
* What actions should we take with {@link KnnVectorQueryBuilder} or {@link MultiTermQueryBuilder} when a
* compound retriever executes the original queries? Our goal is to retain these queries in scenarios where
* aggregations, highlighting, or inner_hits are used. However, this approach can be costly for compound
* retrievers since they will be executed twice: once as a must clause at this level and a second time as a
* should clause at the upper level (compound retriever).
* Therefore, it would be beneficial to rewrite these queries at the upper level to focus solely on
* scoring/matching similar to what {@link RetrieverBuilder#originalQuery(QueryBuilder)} is doing.
* scoring/matching similar to what {@link RetrieverBuilder#topDocsQuery(QueryBuilder)} is doing.
*/
if (preFilterQueryBuilders.isEmpty()) {
return queryBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public TestRetrieverBuilder(String value) {
}

@Override
public QueryBuilder originalQuery(QueryBuilder leadQuery) {
public QueryBuilder topDocsQuery(QueryBuilder leadQuery) {
throw new UnsupportedOperationException("only used for parsing tests");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ public void onFailure(Exception e) {
}

@Override
public QueryBuilder originalQuery(QueryBuilder leadQuery) {
public QueryBuilder topDocsQuery(QueryBuilder leadQuery) {
throw new IllegalStateException(NAME + " cannot be nested");
}

Expand Down

0 comments on commit 9c6b06d

Please sign in to comment.