From c34abfa1c7fc32a6d7f4c60d4923beb97328b015 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 13 Aug 2024 02:12:16 +0900 Subject: [PATCH] Update the intervals query docs Since https://github.com/apache/lucene-solr/pull/620, intervals disjunctions are automatically rewritten to handle cases where minimizations can miss valid matches. This change updates the documentation to take this behaviour into account (users don't need to manually pull intervals disjunctions to the top anymore). --- .../query-dsl/intervals-query.asciidoc | 65 ------------------- .../test/search/230_interval_query.yml | 31 +++++++++ 2 files changed, 31 insertions(+), 65 deletions(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 63ba4046a395d..1e3380389d861 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -397,68 +397,3 @@ This query does *not* match a document containing the phrase `hot porridge is salty porridge`, because the intervals returned by the match query for `hot porridge` only cover the initial two terms in this document, and these do not overlap the intervals covering `salty`. - -Another restriction to be aware of is the case of `any_of` rules that contain -sub-rules which overlap. In particular, if one of the rules is a strict -prefix of the other, then the longer rule can never match, which can -cause surprises when used in combination with `max_gaps`. Consider the -following query, searching for `the` immediately followed by `big` or `big bad`, -immediately followed by `wolf`: - -[source,console] --------------------------------------------------- -POST _search -{ - "query": { - "intervals" : { - "my_text" : { - "all_of" : { - "intervals" : [ - { "match" : { "query" : "the" } }, - { "any_of" : { - "intervals" : [ - { "match" : { "query" : "big" } }, - { "match" : { "query" : "big bad" } } - ] } }, - { "match" : { "query" : "wolf" } } - ], - "max_gaps" : 0, - "ordered" : true - } - } - } - } -} --------------------------------------------------- - -Counter-intuitively, this query does *not* match the document `the big bad -wolf`, because the `any_of` rule in the middle only produces intervals -for `big` - intervals for `big bad` being longer than those for `big`, while -starting at the same position, and so being minimized away. In these cases, -it's better to rewrite the query so that all of the options are explicitly -laid out at the top level: - -[source,console] --------------------------------------------------- -POST _search -{ - "query": { - "intervals" : { - "my_text" : { - "any_of" : { - "intervals" : [ - { "match" : { - "query" : "the big bad wolf", - "ordered" : true, - "max_gaps" : 0 } }, - { "match" : { - "query" : "the big wolf", - "ordered" : true, - "max_gaps" : 0 } } - ] - } - } - } - } -} --------------------------------------------------- diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml index 82fb18a879346..99bd001bd95e2 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml @@ -21,6 +21,10 @@ setup: - '{"text" : "Baby its cold there outside"}' - '{"index": {"_index": "test", "_id": "4"}}' - '{"text" : "Outside it is cold and wet"}' + - '{"index": {"_index": "test", "_id": "5"}}' + - '{"text" : "the big bad wolf"}' + - '{"index": {"_index": "test", "_id": "6"}}' + - '{"text" : "the big wolf"}' --- "Test ordered matching": @@ -444,4 +448,31 @@ setup: prefix: out - match: { hits.total.value: 3 } +--- +"Test rewrite disjunctions": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - "match": + "query": "the" + - "any_of": + "intervals": + - "match": + "query": "big" + - "match": + "query": "big bad" + - "match": + "query": "wolf" + max_gaps: 0 + ordered: true + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "6" } + - match: { hits.hits.1._id: "5" }