diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomPassageFormatter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomPassageFormatter.java index eb87dc982543f..6ae2f53a94ad8 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomPassageFormatter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomPassageFormatter.java @@ -48,7 +48,7 @@ public Snippet[] format(Passage[] passages, String content) { assert end > start; // Look ahead to expand 'end' past all overlapping: while (i + 1 < passage.getNumMatches() && passage.getMatchStarts()[i + 1] < end) { - end = passage.getMatchEnds()[++i]; + end = Math.max(passage.getMatchEnds()[++i], end); } end = Math.min(end, passage.getEndOffset()); // in case match straddles past passage diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index 5df6840640264..bf249ba4409ab 100644 --- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -13,6 +13,10 @@ import org.apache.lucene.analysis.custom.CustomAnalyzer; import org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.standard.StandardTokenizerFactory; +import org.apache.lucene.analysis.synonym.SolrSynonymParser; +import org.apache.lucene.analysis.synonym.SynonymFilterFactory; +import org.apache.lucene.analysis.synonym.SynonymMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -35,11 +39,15 @@ import org.apache.lucene.search.uhighlight.UnifiedHighlighter; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.ResourceLoader; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.test.ESTestCase; +import java.io.IOException; +import java.io.StringReader; import java.text.BreakIterator; +import java.text.ParseException; import java.util.Locale; import java.util.Map; import java.util.TreeMap; @@ -153,9 +161,9 @@ private void assertHighlightOneDoc( true ); final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue); - assertEquals(snippets.length, expectedPassages.length); + assertEquals(expectedPassages.length, snippets.length); for (int i = 0; i < snippets.length; i++) { - assertEquals(snippets[i].getText(), expectedPassages[i]); + assertEquals(expectedPassages[i], snippets[i].getText()); } } } @@ -356,6 +364,42 @@ public void testOverlappingTerms() throws Exception { assertHighlightOneDoc("text", inputs, analyzer, query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs); } + public static class NYCFilterFactory extends SynonymFilterFactory { + public NYCFilterFactory(Map args) { + super(args); + } + + @Override + protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, + ParseException { + SynonymMap.Parser parser = new SolrSynonymParser(false, false, analyzer); + parser.parse(new StringReader("new york city => nyc, new york city")); + return parser.build(); + } + } + + public void testOverlappingPositions() throws Exception { + final String[] inputs = { "new york city" }; + final String[] outputs = { "new york city" }; + BooleanQuery query = new BooleanQuery.Builder().add( + new BooleanQuery.Builder().add(new TermQuery(new Term("text", "nyc")), BooleanClause.Occur.SHOULD) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("text", "new")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("text", "york")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("text", "city")), BooleanClause.Occur.MUST) + .build(), + BooleanClause.Occur.SHOULD + ) + .build(), + BooleanClause.Occur.MUST + ).build(); + Analyzer analyzer = CustomAnalyzer.builder() + .withTokenizer(StandardTokenizerFactory.class) + .addTokenFilter(NYCFilterFactory.class, "synonyms", "N/A") + .build(); + assertHighlightOneDoc("text", inputs, analyzer, query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs); + } + public void testExceedMaxAnalyzedOffset() throws Exception { TermQuery query = new TermQuery(new Term("text", "max")); Analyzer analyzer = CustomAnalyzer.builder()