From 0d4c0f208087ca84d9727b777a366bb1448f2a4c Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Thu, 5 Dec 2024 09:06:53 +0200 Subject: [PATCH] Fix for propagating filters from compound to inner retrievers (#117914) --- docs/changelog/117914.yaml | 5 ++ .../retriever/CompoundRetrieverBuilder.java | 32 +++++--- .../search/retriever/KnnRetrieverBuilder.java | 3 +- .../retriever/RankDocsRetrieverBuilder.java | 16 +--- .../RankDocsRetrieverBuilderTests.java | 7 +- .../vectors/TestQueryVectorBuilderPlugin.java | 8 +- .../TestCompoundRetrieverBuilder.java | 10 ++- .../retriever/QueryRuleRetrieverBuilder.java | 15 +++- .../TextSimilarityRankRetrieverBuilder.java | 7 +- .../xpack/rank/rrf/RRFRetrieverBuilderIT.java | 38 +++++++++- .../xpack/rank/rrf/RRFFeatures.java | 6 ++ .../xpack/rank/rrf/RRFRetrieverBuilder.java | 7 +- ...rrf_retriever_search_api_compatibility.yml | 74 +++++++++++++++++++ 13 files changed, 180 insertions(+), 48 deletions(-) create mode 100644 docs/changelog/117914.yaml diff --git a/docs/changelog/117914.yaml b/docs/changelog/117914.yaml new file mode 100644 index 0000000000000..da58ed7bb04b7 --- /dev/null +++ b/docs/changelog/117914.yaml @@ -0,0 +1,5 @@ +pr: 117914 +summary: Fix for propagating filters from compound to inner retrievers +area: Ranking +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index db839de9f573a..2ab6395db73b5 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -20,6 +20,7 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.TransportMultiSearchAction; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.rest.RestStatus; @@ -46,6 +47,8 @@ */ public abstract class CompoundRetrieverBuilder> extends RetrieverBuilder { + public static final NodeFeature INNER_RETRIEVERS_FILTER_SUPPORT = new NodeFeature("inner_retrievers_filter_support"); + public record RetrieverSource(RetrieverBuilder retriever, SearchSourceBuilder source) {} protected final int rankWindowSize; @@ -64,9 +67,9 @@ public T addChild(RetrieverBuilder retrieverBuilder) { /** * Returns a clone of the original retriever, replacing the sub-retrievers with - * the provided {@code newChildRetrievers}. + * the provided {@code newChildRetrievers} and the filters with the {@code newPreFilterQueryBuilders}. */ - protected abstract T clone(List newChildRetrievers); + protected abstract T clone(List newChildRetrievers, List newPreFilterQueryBuilders); /** * Combines the provided {@code rankResults} to return the final top documents. @@ -85,13 +88,25 @@ public final RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOExceptio } // Rewrite prefilters - boolean hasChanged = false; + // We eagerly rewrite prefilters, because some of the innerRetrievers + // could be compound too, so we want to propagate all the necessary filter information to them + // and have it available as part of their own rewrite step var newPreFilters = rewritePreFilters(ctx); - hasChanged |= newPreFilters != preFilterQueryBuilders; + if (newPreFilters != preFilterQueryBuilders) { + return clone(innerRetrievers, newPreFilters); + } + boolean hasChanged = false; // Rewrite retriever sources List newRetrievers = new ArrayList<>(); for (var entry : innerRetrievers) { + // we propagate the filters only for compound retrievers as they won't be attached through + // the createSearchSourceBuilder. + // We could remove this check, but we would end up adding the same filters + // multiple times in case an inner retriever rewrites itself, when we re-enter to rewrite + if (entry.retriever.isCompound() && false == preFilterQueryBuilders.isEmpty()) { + entry.retriever.getPreFilterQueryBuilders().addAll(preFilterQueryBuilders); + } RetrieverBuilder newRetriever = entry.retriever.rewrite(ctx); if (newRetriever != entry.retriever) { newRetrievers.add(new RetrieverSource(newRetriever, null)); @@ -106,7 +121,7 @@ public final RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOExceptio } } if (hasChanged) { - return clone(newRetrievers); + return clone(newRetrievers, newPreFilters); } // execute searches @@ -166,12 +181,7 @@ public void onFailure(Exception e) { }); }); - return new RankDocsRetrieverBuilder( - rankWindowSize, - newRetrievers.stream().map(s -> s.retriever).toList(), - results::get, - newPreFilters - ); + return new RankDocsRetrieverBuilder(rankWindowSize, newRetrievers.stream().map(s -> s.retriever).toList(), results::get); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/retriever/KnnRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/KnnRetrieverBuilder.java index 8be9a78dae154..f1464c41ca3be 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/KnnRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/KnnRetrieverBuilder.java @@ -184,8 +184,7 @@ public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException { ll.onResponse(null); })); }); - var rewritten = new KnnRetrieverBuilder(this, () -> toSet.get(), null); - return rewritten; + return new KnnRetrieverBuilder(this, () -> toSet.get(), null); } return super.rewrite(ctx); } diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java index 02f890f51d011..4d3f3fefd4462 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java @@ -33,19 +33,13 @@ public class RankDocsRetrieverBuilder extends RetrieverBuilder { final List sources; final Supplier rankDocs; - public RankDocsRetrieverBuilder( - int rankWindowSize, - List sources, - Supplier rankDocs, - List preFilterQueryBuilders - ) { + public RankDocsRetrieverBuilder(int rankWindowSize, List sources, Supplier rankDocs) { this.rankWindowSize = rankWindowSize; this.rankDocs = rankDocs; if (sources == null || sources.isEmpty()) { throw new IllegalArgumentException("sources must not be null or empty"); } this.sources = sources; - this.preFilterQueryBuilders = preFilterQueryBuilders; } @Override @@ -73,10 +67,6 @@ private boolean sourceShouldRewrite(QueryRewriteContext ctx) throws IOException @Override public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException { assert false == sourceShouldRewrite(ctx) : "retriever sources should be rewritten first"; - var rewrittenFilters = rewritePreFilters(ctx); - if (rewrittenFilters != preFilterQueryBuilders) { - return new RankDocsRetrieverBuilder(rankWindowSize, sources, rankDocs, rewrittenFilters); - } return this; } @@ -94,7 +84,7 @@ public QueryBuilder topDocsQuery() { boolQuery.should(query); } } - // ignore prefilters of this level, they are already propagated to children + // ignore prefilters of this level, they were already propagated to children return boolQuery; } @@ -133,7 +123,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder } else { rankQuery = new RankDocsQueryBuilder(rankDocResults, null, false); } - // ignore prefilters of this level, they are already propagated to children + // ignore prefilters of this level, they were already propagated to children searchSourceBuilder.query(rankQuery); if (sourceHasMinScore()) { searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore()); diff --git a/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java b/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java index af6782c45dce8..ccf33c0b71b6b 100644 --- a/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java @@ -95,12 +95,7 @@ private List preFilters(QueryRewriteContext queryRewriteContext) t } private RankDocsRetrieverBuilder createRandomRankDocsRetrieverBuilder(QueryRewriteContext queryRewriteContext) throws IOException { - return new RankDocsRetrieverBuilder( - randomIntBetween(1, 100), - innerRetrievers(queryRewriteContext), - rankDocsSupplier(), - preFilters(queryRewriteContext) - ); + return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier()); } public void testExtractToSearchSourceBuilder() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/search/vectors/TestQueryVectorBuilderPlugin.java b/server/src/test/java/org/elasticsearch/search/vectors/TestQueryVectorBuilderPlugin.java index c47c8c16f6a2f..5733a51bb7e9c 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/TestQueryVectorBuilderPlugin.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/TestQueryVectorBuilderPlugin.java @@ -27,9 +27,9 @@ /** * A SearchPlugin to exercise query vector builder */ -class TestQueryVectorBuilderPlugin implements SearchPlugin { +public class TestQueryVectorBuilderPlugin implements SearchPlugin { - static class TestQueryVectorBuilder implements QueryVectorBuilder { + public static class TestQueryVectorBuilder implements QueryVectorBuilder { private static final String NAME = "test_query_vector_builder"; private static final ParseField QUERY_VECTOR = new ParseField("query_vector"); @@ -47,11 +47,11 @@ static class TestQueryVectorBuilder implements QueryVectorBuilder { private List vectorToBuild; - TestQueryVectorBuilder(List vectorToBuild) { + public TestQueryVectorBuilder(List vectorToBuild) { this.vectorToBuild = vectorToBuild; } - TestQueryVectorBuilder(float[] expected) { + public TestQueryVectorBuilder(float[] expected) { this.vectorToBuild = new ArrayList<>(expected.length); for (float f : expected) { vectorToBuild.add(f); diff --git a/test/framework/src/main/java/org/elasticsearch/search/retriever/TestCompoundRetrieverBuilder.java b/test/framework/src/main/java/org/elasticsearch/search/retriever/TestCompoundRetrieverBuilder.java index 9f199aa7f3ef8..4a5f280c10a99 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/retriever/TestCompoundRetrieverBuilder.java +++ b/test/framework/src/main/java/org/elasticsearch/search/retriever/TestCompoundRetrieverBuilder.java @@ -10,6 +10,7 @@ package org.elasticsearch.search.retriever; import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.rank.RankDoc; import org.elasticsearch.xcontent.XContentBuilder; @@ -23,16 +24,17 @@ public class TestCompoundRetrieverBuilder extends CompoundRetrieverBuilder(), rankWindowSize); + this(new ArrayList<>(), rankWindowSize, new ArrayList<>()); } - TestCompoundRetrieverBuilder(List childRetrievers, int rankWindowSize) { + TestCompoundRetrieverBuilder(List childRetrievers, int rankWindowSize, List preFilterQueryBuilders) { super(childRetrievers, rankWindowSize); + this.preFilterQueryBuilders = preFilterQueryBuilders; } @Override - protected TestCompoundRetrieverBuilder clone(List newChildRetrievers) { - return new TestCompoundRetrieverBuilder(newChildRetrievers, rankWindowSize); + protected TestCompoundRetrieverBuilder clone(List newChildRetrievers, List newPreFilterQueryBuilders) { + return new TestCompoundRetrieverBuilder(newChildRetrievers, rankWindowSize, newPreFilterQueryBuilders); } @Override diff --git a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/retriever/QueryRuleRetrieverBuilder.java b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/retriever/QueryRuleRetrieverBuilder.java index 54a89d061de35..5b27cc7a3e05a 100644 --- a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/retriever/QueryRuleRetrieverBuilder.java +++ b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/retriever/QueryRuleRetrieverBuilder.java @@ -110,12 +110,14 @@ public QueryRuleRetrieverBuilder( Map matchCriteria, List retrieverSource, int rankWindowSize, - String retrieverName + String retrieverName, + List preFilterQueryBuilders ) { super(retrieverSource, rankWindowSize); this.rulesetIds = rulesetIds; this.matchCriteria = matchCriteria; this.retrieverName = retrieverName; + this.preFilterQueryBuilders = preFilterQueryBuilders; } @Override @@ -156,8 +158,15 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept } @Override - protected QueryRuleRetrieverBuilder clone(List newChildRetrievers) { - return new QueryRuleRetrieverBuilder(rulesetIds, matchCriteria, newChildRetrievers, rankWindowSize, retrieverName); + protected QueryRuleRetrieverBuilder clone(List newChildRetrievers, List newPreFilterQueryBuilders) { + return new QueryRuleRetrieverBuilder( + rulesetIds, + matchCriteria, + newChildRetrievers, + rankWindowSize, + retrieverName, + newPreFilterQueryBuilders + ); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index c239319b6283a..fd2427dc8ac6a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -129,7 +129,10 @@ public TextSimilarityRankRetrieverBuilder( } @Override - protected TextSimilarityRankRetrieverBuilder clone(List newChildRetrievers) { + protected TextSimilarityRankRetrieverBuilder clone( + List newChildRetrievers, + List newPreFilterQueryBuilders + ) { return new TextSimilarityRankRetrieverBuilder( newChildRetrievers, inferenceId, @@ -138,7 +141,7 @@ protected TextSimilarityRankRetrieverBuilder clone(List newChil rankWindowSize, minScore, retrieverName, - preFilterQueryBuilders + newPreFilterQueryBuilders ); } diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java index 37e1807d138aa..ae35153b6f39f 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderIT.java @@ -33,6 +33,7 @@ import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; import org.elasticsearch.search.vectors.QueryVectorBuilder; +import org.elasticsearch.search.vectors.TestQueryVectorBuilderPlugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; import org.elasticsearch.xcontent.XContentBuilder; @@ -57,7 +58,6 @@ public class RRFRetrieverBuilderIT extends ESIntegTestCase { protected static String INDEX = "test_index"; - protected static final String ID_FIELD = "_id"; protected static final String DOC_FIELD = "doc"; protected static final String TEXT_FIELD = "text"; protected static final String VECTOR_FIELD = "vector"; @@ -743,6 +743,42 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder expectThrows(UnsupportedOperationException.class, () -> client().prepareSearch(INDEX).setSource(source).get()); } + public void testRRFFiltersPropagatedToKnnQueryVectorBuilder() { + final int rankWindowSize = 100; + final int rankConstant = 10; + SearchSourceBuilder source = new SearchSourceBuilder(); + // this will retriever all but 7 only due to top-level filter + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(QueryBuilders.matchAllQuery()); + // this will too retrieve just doc 7 + KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( + "vector", + null, + new TestQueryVectorBuilderPlugin.TestQueryVectorBuilder(new float[] { 3 }), + 10, + 10, + null + ); + source.retriever( + new RRFRetrieverBuilder( + Arrays.asList( + new CompoundRetrieverBuilder.RetrieverSource(standardRetriever, null), + new CompoundRetrieverBuilder.RetrieverSource(knnRetriever, null) + ), + rankWindowSize, + rankConstant + ) + ); + source.retriever().getPreFilterQueryBuilders().add(QueryBuilders.boolQuery().must(QueryBuilders.termQuery(DOC_FIELD, "doc_7"))); + source.size(10); + SearchRequestBuilder req = client().prepareSearch(INDEX).setSource(source); + ElasticsearchAssertions.assertResponse(req, resp -> { + assertNull(resp.pointInTimeId()); + assertNotNull(resp.getHits().getTotalHits()); + assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(resp.getHits().getHits()[0].getId(), equalTo("doc_7")); + }); + } + public void testRewriteOnce() { final float[] vector = new float[] { 1 }; AtomicInteger numAsyncCalls = new AtomicInteger(); diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFFeatures.java index bbc0f622724a3..bb61fa951948d 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFFeatures.java @@ -12,6 +12,7 @@ import java.util.Set; +import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT; import static org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilder.RRF_RETRIEVER_COMPOSITION_SUPPORTED; /** @@ -23,4 +24,9 @@ public class RRFFeatures implements FeatureSpecification { public Set getFeatures() { return Set.of(RRFRetrieverBuilder.RRF_RETRIEVER_SUPPORTED, RRF_RETRIEVER_COMPOSITION_SUPPORTED); } + + @Override + public Set getTestFeatures() { + return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT); + } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 792ff4eac3893..f1171b74f7468 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.util.Maps; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rank.RankDoc; @@ -108,8 +109,10 @@ public String getName() { } @Override - protected RRFRetrieverBuilder clone(List newRetrievers) { - return new RRFRetrieverBuilder(newRetrievers, this.rankWindowSize, this.rankConstant); + protected RRFRetrieverBuilder clone(List newRetrievers, List newPreFilterQueryBuilders) { + RRFRetrieverBuilder clone = new RRFRetrieverBuilder(newRetrievers, this.rankWindowSize, this.rankConstant); + clone.preFilterQueryBuilders = newPreFilterQueryBuilders; + return clone; } @Override diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml index 42c01f0b9636c..cb30542d80003 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml @@ -1071,3 +1071,77 @@ setup: - match: { hits.hits.2.inner_hits.nested_data_field.hits.total.value: 0 } - match: { hits.hits.2.inner_hits.nested_vector_field.hits.total.value: 0 } + + +--- +"rrf retriever with filters to be passed to nested rrf retrievers": + - requires: + cluster_features: 'inner_retrievers_filter_support' + reason: 'requires fix for properly propagating filters to nested sub-retrievers' + + - do: + search: + _source: false + index: test + body: + retriever: + { + rrf: + { + filter: { + term: { + keyword: "technology" + } + }, + retrievers: [ + { + rrf: { + retrievers: [ + { + # this should only return docs 3 and 5 due to top level filter + standard: { + query: { + knn: { + field: vector, + query_vector: [ 4.0 ], + k: 3 + } + } + } }, + { + # this should return no docs as no docs match both biology and technology + standard: { + query: { + term: { + keyword: "biology" + } + } + } + } + ], + rank_window_size: 10, + rank_constant: 10 + } + }, + # this should only return doc 5 + { + standard: { + query: { + term: { + text: "term5" + } + } + } + } + ], + rank_window_size: 10, + rank_constant: 10 + } + } + size: 10 + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "5" } + - match: { hits.hits.1._id: "3" } + +