From de6921b60b3573dd5dd330c021e6b60fa3911f23 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Wed, 9 Oct 2024 22:13:11 +0100 Subject: [PATCH] Prevent flattening of ordered and unordered interval sources (#114234) This PR applies a temporary patch to fix an issue with ordered and unordered intervals source. The flattening that is applied in Lucene modifies the final gap preventing valid queries to match. The fix already exists in Lucene but will be released in Lucene 10.x later this year. Since the bug prevents the combination of ordered and unordered intervals with gaps, this change applies a workaround to ensure that the bug is fixed in Elasticsearch 8x. Relates #113554 --- docs/changelog/114234.yaml | 5 + .../search/query/IntervalQueriesIT.java | 53 +++++++++ .../index/query/IntervalBuilder.java | 2 +- .../elasticsearch/index/query/XIntervals.java | 106 ++++++++++++++++++ .../index/query/IntervalBuilderTests.java | 14 +-- .../query/IntervalQueryBuilderTests.java | 16 +-- 6 files changed, 180 insertions(+), 16 deletions(-) create mode 100644 docs/changelog/114234.yaml create mode 100644 server/src/main/java/org/elasticsearch/index/query/XIntervals.java diff --git a/docs/changelog/114234.yaml b/docs/changelog/114234.yaml new file mode 100644 index 0000000000000..0f77ada794bee --- /dev/null +++ b/docs/changelog/114234.yaml @@ -0,0 +1,5 @@ +pr: 114234 +summary: Prevent flattening of ordered and unordered interval sources +area: Search +type: bug +issues: [] diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java index 7fc93debc65c9..def137f9a9ec8 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/IntervalQueriesIT.java @@ -31,6 +31,7 @@ import static java.util.Collections.singletonMap; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; public class IntervalQueriesIT extends ESIntegTestCase { @@ -64,6 +65,58 @@ public void testEmptyIntervalsWithNestedMappings() throws InterruptedException { ); } + public void testPreserveInnerGap() { + assertAcked(prepareCreate("index").setMapping(""" + { + "_doc" : { + "properties" : { + "text" : { "type" : "text" } + } + } + } + """)); + + indexRandom(true, prepareIndex("index").setId("1").setSource("text", "w1 w2 w3 w4 w5")); + + // ordered + { + var res = prepareSearch("index").setQuery( + new IntervalQueryBuilder( + "text", + new IntervalsSourceProvider.Combine( + Arrays.asList( + new IntervalsSourceProvider.Match("w1 w4", -1, true, null, null, null), + new IntervalsSourceProvider.Match("w5", -1, true, null, null, null) + ), + true, + 1, + null + ) + ) + ); + assertSearchHits(res, "1"); + } + + // unordered + { + var res = prepareSearch("index").setQuery( + new IntervalQueryBuilder( + "text", + new IntervalsSourceProvider.Combine( + Arrays.asList( + new IntervalsSourceProvider.Match("w3", 0, false, null, null, null), + new IntervalsSourceProvider.Match("w4 w1", -1, false, null, null, null) + ), + false, + 0, + null + ) + ) + ); + assertSearchHits(res, "1"); + } + } + private static class EmptyAnalyzer extends Analyzer { @Override diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 46d7fec641943..f21edaeb94f22 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -126,7 +126,7 @@ protected static IntervalsSource combineSources(List sources, i if (maxGaps == 0 && ordered) { return Intervals.phrase(sourcesArray); } - IntervalsSource inner = ordered ? Intervals.ordered(sourcesArray) : Intervals.unordered(sourcesArray); + IntervalsSource inner = ordered ? XIntervals.ordered(sourcesArray) : XIntervals.unordered(sourcesArray); if (maxGaps == -1) { return inner; } diff --git a/server/src/main/java/org/elasticsearch/index/query/XIntervals.java b/server/src/main/java/org/elasticsearch/index/query/XIntervals.java new file mode 100644 index 0000000000000..7d8552e18f790 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/XIntervals.java @@ -0,0 +1,106 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.intervals.IntervalIterator; +import org.apache.lucene.queries.intervals.IntervalMatchesIterator; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.QueryVisitor; + +import java.io.IOException; +import java.util.Collection; +import java.util.Objects; + +/** + * Copy of {@link Intervals} that exposes versions of {@link Intervals#ordered} and {@link Intervals#unordered} + * that preserve their inner gaps. + * NOTE: Remove this hack when a version of Lucene with https://github.com/apache/lucene/pull/13819 is used (10.1.0). + */ +public final class XIntervals { + + /** + * Create an ordered {@link IntervalsSource} + * + *

Returns intervals in which the subsources all appear in the given order + * + * @param subSources an ordered set of {@link IntervalsSource} objects + */ + public static IntervalsSource ordered(IntervalsSource... subSources) { + return new DelegateIntervalsSource(Intervals.ordered(subSources)); + } + + /** + * Create an ordered {@link IntervalsSource} + * + *

Returns intervals in which the subsources all appear in the given order + * + * @param subSources an ordered set of {@link IntervalsSource} objects + */ + public static IntervalsSource unordered(IntervalsSource... subSources) { + return new DelegateIntervalsSource(Intervals.unordered(subSources)); + } + + /** + * Wraps a source to avoid aggressive flattening of the ordered and unordered sources. + * The flattening modifies the final gap and is removed in the latest unreleased version of Lucene (10.1). + */ + private static class DelegateIntervalsSource extends IntervalsSource { + private final IntervalsSource delegate; + + private DelegateIntervalsSource(IntervalsSource delegate) { + this.delegate = delegate; + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return delegate.intervals(field, ctx); + } + + @Override + public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { + return delegate.matches(field, ctx, doc); + } + + @Override + public void visit(String field, QueryVisitor visitor) { + delegate.visit(field, visitor); + } + + @Override + public int minExtent() { + return delegate.minExtent(); + } + + @Override + public Collection pullUpDisjunctions() { + return delegate.pullUpDisjunctions(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DelegateIntervalsSource that = (DelegateIntervalsSource) o; + return Objects.equals(delegate, that.delegate); + } + + @Override + public int hashCode() { + return Objects.hash(delegate); + } + + @Override + public String toString() { + return delegate.toString(); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java index 3476655c705ae..7005f17663d0d 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -46,7 +46,7 @@ public void testOrdered() throws IOException { CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6)); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); + IntervalsSource expected = XIntervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); assertEquals(expected, source); @@ -57,7 +57,7 @@ public void testUnordered() throws IOException { CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6)); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, false); - IntervalsSource expected = Intervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); + IntervalsSource expected = XIntervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3")); assertEquals(expected, source); @@ -101,7 +101,7 @@ public void testSimpleSynonyms() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.term("term4")), Intervals.term("term3") @@ -122,7 +122,7 @@ public void testSimpleSynonymsWithGap() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.extend(Intervals.or(Intervals.term("term2"), Intervals.term("term3"), Intervals.term("term4")), 1, 0), Intervals.term("term5") @@ -143,7 +143,7 @@ public void testGraphSynonyms() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.term("term5") @@ -166,7 +166,7 @@ public void testGraphSynonymsWithGaps() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or( Intervals.extend(Intervals.term("term2"), 1, 0), @@ -190,7 +190,7 @@ public void testGraphTerminatesOnGap() throws IOException { ); IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); - IntervalsSource expected = Intervals.ordered( + IntervalsSource expected = XIntervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.extend(Intervals.term("term5"), 1, 0) diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index aad8275f4749d..f0084f4f24e98 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -203,7 +203,7 @@ public void testMatchInterval() throws IOException { }""", TEXT_FIELD_NAME); IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); - Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))); + Query expected = new IntervalQuery(TEXT_FIELD_NAME, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world"))); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -222,7 +222,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery( TEXT_FIELD_NAME, - Intervals.maxgaps(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))) + Intervals.maxgaps(40, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world"))) ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -241,7 +241,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new BoostQuery( - new IntervalQuery(TEXT_FIELD_NAME, Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))), + new IntervalQuery(TEXT_FIELD_NAME, XIntervals.ordered(Intervals.term("hello"), Intervals.term("world"))), 2 ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -263,7 +263,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery( TEXT_FIELD_NAME, - Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))) + Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))) ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -285,7 +285,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery( TEXT_FIELD_NAME, - Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))) + Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world")))) ); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); @@ -314,7 +314,7 @@ public void testMatchInterval() throws IOException { expected = new IntervalQuery( TEXT_FIELD_NAME, Intervals.containing( - Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), + Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), Intervals.term("blah") ) ); @@ -426,7 +426,7 @@ public void testCombineInterval() throws IOException { Intervals.containedBy( Intervals.maxgaps( 30, - Intervals.ordered(Intervals.term("one"), Intervals.unordered(Intervals.term("two"), Intervals.term("three"))) + XIntervals.ordered(Intervals.term("one"), XIntervals.unordered(Intervals.term("two"), Intervals.term("three"))) ), Intervals.term("SENTENCE") ) @@ -486,7 +486,7 @@ public void testCombineDisjunctionInterval() throws IOException { Intervals.notContainedBy( Intervals.maxgaps( 30, - Intervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside"))) + XIntervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside"))) ), Intervals.term("freeze") )