From 9634945848e76093cbca87b14c8c25736009ff22 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 13 Dec 2024 11:39:41 +0000 Subject: [PATCH] address review comments --- docs/reference/search/retriever.asciidoc | 12 +++++++----- .../elasticsearch/search/rescore/RescorePhase.java | 7 ++++--- .../search/retriever/CompoundRetrieverBuilder.java | 9 ++++++--- .../search/retriever/RescorerRetrieverBuilder.java | 10 ++++++++++ 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 55a2af37cfd8a..897fb778c3c84 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -382,12 +382,14 @@ For the `standard` and `knn` retrievers, the `window_size` parameter specifies t For compound retrievers like `rrf`, the `window_size` parameter defines the total number of documents examined globally. -When using the `rescorer`, ensure its minimum rescore's `window_size` is: -- Greater than or equal to the `size` of the parent retriever for nested `rescorer` setups. -- Greater than or equal to the `size` of the search request when used as the primary retriever in the tree. +When using the `rescorer`, an error is returned when: -And that its maximum rescore's `window_size` is: -- Smaller than or equal to the `size` or `rank_window_size` of the child retriever. +* The minimum configured rescore's `window_size` is: +** Greater than or equal to the `size` of the parent retriever for nested `rescorer` setups. +** Greater than or equal to the `size` of the search request when used as the primary retriever in the tree. + +* And the maximum rescore's `window_size` is: +** Smaller than or equal to the `size` or `rank_window_size` of the child retriever. ===== Parameters diff --git a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java index 34aa7988d9d51..c23df9cdfa441 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java @@ -43,13 +43,14 @@ public static void execute(SearchContext context) { if (context.size() == 0 || context.rescore() == null || context.rescore().isEmpty()) { return; } - assert validateSort(context.sort()) : "invalid sort"; + if (validateSort(context.sort()) == false) { + throw new IllegalStateException("Cannot use [sort] option in conjunction with [rescore], missing a validate?"); + } TopDocs topDocs = context.queryResult().topDocs().topDocs; if (topDocs.scoreDocs.length == 0) { return; } - // Populate FieldDoc#score using the primary sort field (_score) to ensure compatibility - // with top docs rescoring. + // Populate FieldDoc#score using the primary sort field (_score) to ensure compatibility with top docs rescoring Arrays.stream(topDocs.scoreDocs).forEach(t -> { if (t instanceof FieldDoc fieldDoc) { fieldDoc.score = (float) fieldDoc.fields[0]; diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 60887f895d9dc..0527179d42b73 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -234,9 +234,12 @@ public ActionRequestValidationException validate( validationException = innerRetriever.retriever().validate(source, validationException, isScroll, allowPartialSearchResults); if (innerRetriever.retriever() instanceof CompoundRetrieverBuilder compoundChild) { String errorMessage = String.format( - Locale.ROOT, - "[%s] requires [rank_window_size: %d] to be smaller than or equal to its sub retriever's %s [rank_window_size: %d]", - this.getName(), rankWindowSize, compoundChild.getName(), compoundChild.rankWindowSize + Locale.ROOT, + "[%s] requires [rank_window_size: %d] to be smaller than or equal to its sub retriever's %s [rank_window_size: %d]", + this.getName(), + rankWindowSize, + compoundChild.getName(), + compoundChild.rankWindowSize ); validationException = addValidationError(errorMessage, validationException); } diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java index efb9ad0089a39..c387974d06521 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RescorerRetrieverBuilder.java @@ -107,6 +107,16 @@ public String getName() { @Override protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder source) { + /** + * The re-scorer is passed downstream because this query operates only on + * the top documents retrieved by the child retriever. + * + * - If the sub-retriever is a {@link CompoundRetrieverBuilder}, only the top + * documents are re-scored since they are already determined at this stage. + * - For other retrievers that do not require a rewrite, the re-scorer's window + * size is applied per shard. As a result, more documents are re-scored + * compared to the final top documents produced by these retrievers in isolation. + */ for (var rescorer : rescorers) { source.addRescorer(rescorer); }