Skip to content

Commit

Permalink
Prevent flattening of ordered and unordered interval sources (elastic…
Browse files Browse the repository at this point in the history
…#114234)

This PR applies a temporary patch to fix an issue with ordered and unordered intervals source.
The flattening that is applied in Lucene modifies the final gap preventing valid queries to match.
The fix already exists in Lucene but will be released in Lucene 10.x later this year.
Since the bug prevents the combination of ordered and unordered intervals with gaps, this change applies
a workaround to ensure that the bug is fixed in Elasticsearch 8x.

Relates elastic#113554
  • Loading branch information
jimczi committed Oct 9, 2024
1 parent 09a50e5 commit de6921b
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 16 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/114234.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 114234
summary: Prevent flattening of ordered and unordered interval sources
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import static java.util.Collections.singletonMap;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;

public class IntervalQueriesIT extends ESIntegTestCase {

Expand Down Expand Up @@ -64,6 +65,58 @@ public void testEmptyIntervalsWithNestedMappings() throws InterruptedException {
);
}

public void testPreserveInnerGap() {
assertAcked(prepareCreate("index").setMapping("""
{
"_doc" : {
"properties" : {
"text" : { "type" : "text" }
}
}
}
"""));

indexRandom(true, prepareIndex("index").setId("1").setSource("text", "w1 w2 w3 w4 w5"));

// ordered
{
var res = prepareSearch("index").setQuery(
new IntervalQueryBuilder(
"text",
new IntervalsSourceProvider.Combine(
Arrays.asList(
new IntervalsSourceProvider.Match("w1 w4", -1, true, null, null, null),
new IntervalsSourceProvider.Match("w5", -1, true, null, null, null)
),
true,
1,
null
)
)
);
assertSearchHits(res, "1");
}

// unordered
{
var res = prepareSearch("index").setQuery(
new IntervalQueryBuilder(
"text",
new IntervalsSourceProvider.Combine(
Arrays.asList(
new IntervalsSourceProvider.Match("w3", 0, false, null, null, null),
new IntervalsSourceProvider.Match("w4 w1", -1, false, null, null, null)
),
false,
0,
null
)
)
);
assertSearchHits(res, "1");
}
}

private static class EmptyAnalyzer extends Analyzer {

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ protected static IntervalsSource combineSources(List<IntervalsSource> sources, i
if (maxGaps == 0 && ordered) {
return Intervals.phrase(sourcesArray);
}
IntervalsSource inner = ordered ? Intervals.ordered(sourcesArray) : Intervals.unordered(sourcesArray);
IntervalsSource inner = ordered ? XIntervals.ordered(sourcesArray) : XIntervals.unordered(sourcesArray);
if (maxGaps == -1) {
return inner;
}
Expand Down
106 changes: 106 additions & 0 deletions server/src/main/java/org/elasticsearch/index/query/XIntervals.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.query;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.intervals.IntervalIterator;
import org.apache.lucene.queries.intervals.IntervalMatchesIterator;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.QueryVisitor;

import java.io.IOException;
import java.util.Collection;
import java.util.Objects;

/**
* Copy of {@link Intervals} that exposes versions of {@link Intervals#ordered} and {@link Intervals#unordered}
* that preserve their inner gaps.
* NOTE: Remove this hack when a version of Lucene with https://github.com/apache/lucene/pull/13819 is used (10.1.0).
*/
public final class XIntervals {

/**
* Create an ordered {@link IntervalsSource}
*
* <p>Returns intervals in which the subsources all appear in the given order
*
* @param subSources an ordered set of {@link IntervalsSource} objects
*/
public static IntervalsSource ordered(IntervalsSource... subSources) {
return new DelegateIntervalsSource(Intervals.ordered(subSources));
}

/**
* Create an ordered {@link IntervalsSource}
*
* <p>Returns intervals in which the subsources all appear in the given order
*
* @param subSources an ordered set of {@link IntervalsSource} objects
*/
public static IntervalsSource unordered(IntervalsSource... subSources) {
return new DelegateIntervalsSource(Intervals.unordered(subSources));
}

/**
* Wraps a source to avoid aggressive flattening of the ordered and unordered sources.
* The flattening modifies the final gap and is removed in the latest unreleased version of Lucene (10.1).
*/
private static class DelegateIntervalsSource extends IntervalsSource {
private final IntervalsSource delegate;

private DelegateIntervalsSource(IntervalsSource delegate) {
this.delegate = delegate;
}

@Override
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
return delegate.intervals(field, ctx);
}

@Override
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
return delegate.matches(field, ctx, doc);
}

@Override
public void visit(String field, QueryVisitor visitor) {
delegate.visit(field, visitor);
}

@Override
public int minExtent() {
return delegate.minExtent();
}

@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return delegate.pullUpDisjunctions();
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DelegateIntervalsSource that = (DelegateIntervalsSource) o;
return Objects.equals(delegate, that.delegate);
}

@Override
public int hashCode() {
return Objects.hash(delegate);
}

@Override
public String toString() {
return delegate.toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void testOrdered() throws IOException {
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6));

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));
IntervalsSource expected = XIntervals.ordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));

assertEquals(expected, source);

Expand All @@ -57,7 +57,7 @@ public void testUnordered() throws IOException {
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6));

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, false);
IntervalsSource expected = Intervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));
IntervalsSource expected = XIntervals.unordered(Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3"));

assertEquals(expected, source);

Expand Down Expand Up @@ -101,7 +101,7 @@ public void testSimpleSynonyms() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(Intervals.term("term2"), Intervals.term("term4")),
Intervals.term("term3")
Expand All @@ -122,7 +122,7 @@ public void testSimpleSynonymsWithGap() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.extend(Intervals.or(Intervals.term("term2"), Intervals.term("term3"), Intervals.term("term4")), 1, 0),
Intervals.term("term5")
Expand All @@ -143,7 +143,7 @@ public void testGraphSynonyms() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")),
Intervals.term("term5")
Expand All @@ -166,7 +166,7 @@ public void testGraphSynonymsWithGaps() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(
Intervals.extend(Intervals.term("term2"), 1, 0),
Expand All @@ -190,7 +190,7 @@ public void testGraphTerminatesOnGap() throws IOException {
);

IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(
IntervalsSource expected = XIntervals.ordered(
Intervals.term("term1"),
Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")),
Intervals.extend(Intervals.term("term5"), 1, 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ public void testMatchInterval() throws IOException {
}""", TEXT_FIELD_NAME);

IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")));
Query expected = new IntervalQuery(TEXT_FIELD_NAME, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world")));

assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand All @@ -222,7 +222,7 @@ public void testMatchInterval() throws IOException {
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")))
Intervals.maxgaps(40, XIntervals.unordered(Intervals.term("hello"), Intervals.term("world")))
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand All @@ -241,7 +241,7 @@ public void testMatchInterval() throws IOException {

builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(TEXT_FIELD_NAME, Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))),
new IntervalQuery(TEXT_FIELD_NAME, XIntervals.ordered(Intervals.term("hello"), Intervals.term("world"))),
2
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
Expand All @@ -263,7 +263,7 @@ public void testMatchInterval() throws IOException {
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))
Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world")))
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand All @@ -285,7 +285,7 @@ public void testMatchInterval() throws IOException {
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))))
Intervals.fixField(MASKED_FIELD, Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))))
);
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));

Expand Down Expand Up @@ -314,7 +314,7 @@ public void testMatchInterval() throws IOException {
expected = new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.containing(
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))),
Intervals.maxgaps(10, XIntervals.ordered(Intervals.term("Hello"), Intervals.term("world"))),
Intervals.term("blah")
)
);
Expand Down Expand Up @@ -426,7 +426,7 @@ public void testCombineInterval() throws IOException {
Intervals.containedBy(
Intervals.maxgaps(
30,
Intervals.ordered(Intervals.term("one"), Intervals.unordered(Intervals.term("two"), Intervals.term("three")))
XIntervals.ordered(Intervals.term("one"), XIntervals.unordered(Intervals.term("two"), Intervals.term("three")))
),
Intervals.term("SENTENCE")
)
Expand Down Expand Up @@ -486,7 +486,7 @@ public void testCombineDisjunctionInterval() throws IOException {
Intervals.notContainedBy(
Intervals.maxgaps(
30,
Intervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside")))
XIntervals.ordered(Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside")))
),
Intervals.term("freeze")
)
Expand Down

0 comments on commit de6921b

Please sign in to comment.