From cdc01a682d487d4ceb8a4c8597571573744f0a46 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Tue, 17 Dec 2024 16:44:20 -0500 Subject: [PATCH] Add ability to set "max_analyzed_offet" implicitly to "index.highlight .max_analyzed_offset", by setting it excplicitly to "-1". --- .../AnnotatedTextHighlighter.java | 3 +- .../AnnotatedTextHighlighterTests.java | 21 ++++++++++++-- .../uhighlight/CustomFieldHighlighter.java | 8 ++--- .../uhighlight/CustomUnifiedHighlighter.java | 8 ++--- .../uhighlight/QueryMaxAnalyzedOffset.java | 29 +++++++++++++++++++ .../highlight/DefaultHighlighter.java | 14 +++++---- .../subphase/highlight/PlainHighlighter.java | 15 ++++++---- .../CustomUnifiedHighlighterTests.java | 14 ++++----- 8 files changed, 83 insertions(+), 29 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/lucene/search/uhighlight/QueryMaxAnalyzedOffset.java diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java index 8b4a9d6544b75..f636335701da5 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter; +import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; @@ -52,7 +53,7 @@ protected List loadFieldValues( } @Override - protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) { + protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) { return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset)); } diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java index d4c4ccfaa442d..9c365b44f66c2 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java @@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper; import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter; +import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset; import org.elasticsearch.lucene.search.uhighlight.Snippet; import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer; import org.elasticsearch.test.ESTestCase; @@ -85,7 +86,7 @@ private void assertHighlightOneDoc( int noMatchSize, String[] expectedPassages, int maxAnalyzedOffset, - Integer queryMaxAnalyzedOffset + Integer queryMaxAnalyzedOffsetIn ) throws Exception { try (Directory dir = newDirectory()) { @@ -116,8 +117,9 @@ private void assertHighlightOneDoc( for (int i = 0; i < markedUpInputs.length; i++) { annotations[i] = AnnotatedText.parse(markedUpInputs[i]); } - if (queryMaxAnalyzedOffset != null) { - wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset); + QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset); + if (queryMaxAnalyzedOffset.isNull() == false) { + wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull()); } AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer); hiliteAnalyzer.setAnnotations(annotations); @@ -311,6 +313,19 @@ public void testExceedMaxAnalyzedOffset() throws Exception { e.getMessage() ); + // Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1. + assertHighlightOneDoc( + "text", + new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" }, + query, + Locale.ROOT, + breakIterator, + 0, + new String[] {}, + 20, + -1 + ); + assertHighlightOneDoc( "text", new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" }, diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java index 3e59814de0585..f966daaa57b5f 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java @@ -34,7 +34,7 @@ class CustomFieldHighlighter extends FieldHighlighter { private final Locale breakIteratorLocale; private final int noMatchSize; private String fieldValue; - private final Integer queryMaxAnalyzedOffset; + private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset; CustomFieldHighlighter( String field, @@ -47,7 +47,7 @@ class CustomFieldHighlighter extends FieldHighlighter { PassageFormatter passageFormatter, Comparator passageSortComparator, int noMatchSize, - Integer queryMaxAnalyzedOffset + QueryMaxAnalyzedOffset queryMaxAnalyzedOffset ) { super( field, @@ -112,8 +112,8 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) { @Override protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException { - if (queryMaxAnalyzedOffset != null) { - off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset); + if (queryMaxAnalyzedOffset.isNull() == false) { + off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset.getNotNull()); } return super.highlightOffsetsEnums(off); } diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index d1c7d0415ad15..f69f1a1f01df8 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -66,7 +66,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter { private final int noMatchSize; private final CustomFieldHighlighter fieldHighlighter; private final int maxAnalyzedOffset; - private final Integer queryMaxAnalyzedOffset; + private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset; /** * Creates a new instance of {@link CustomUnifiedHighlighter} @@ -94,7 +94,7 @@ public CustomUnifiedHighlighter( int noMatchSize, int maxPassages, int maxAnalyzedOffset, - Integer queryMaxAnalyzedOffset, + QueryMaxAnalyzedOffset queryMaxAnalyzedOffset, boolean requireFieldMatch, boolean weightMatchesEnabled ) { @@ -125,9 +125,9 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier maxAnalyzedOffset) + if ((queryMaxAnalyzedOffset.isNull() || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset) && (getOffsetSource(field) == OffsetSource.ANALYSIS) - && (fieldValueLength > maxAnalyzedOffset))) { + && (fieldValueLength > maxAnalyzedOffset)) { throw new IllegalArgumentException( "The length [" + fieldValueLength diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/QueryMaxAnalyzedOffset.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/QueryMaxAnalyzedOffset.java new file mode 100644 index 0000000000000..a9f4a3031c7ad --- /dev/null +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/QueryMaxAnalyzedOffset.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.lucene.search.uhighlight; + +public class QueryMaxAnalyzedOffset { + private final Integer queryMaxAnalyzedOffset; + + public QueryMaxAnalyzedOffset(final Integer queryMaxAnalyzedOffset, final int indexMaxAnalyzedOffset) { + // If we have a negative value, grab value for the actual maximum from the index. + this.queryMaxAnalyzedOffset = (queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset >= 0) + ? queryMaxAnalyzedOffset + : Integer.valueOf(indexMaxAnalyzedOffset); + } + + public boolean isNull() { + return queryMaxAnalyzedOffset == null; + } + + public int getNotNull() { + return queryMaxAnalyzedOffset; + } +} diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 75f8e5588761a..e2eb348d052d0 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -31,6 +31,7 @@ import org.elasticsearch.lucene.search.uhighlight.BoundedBreakIteratorScanner; import org.elasticsearch.lucene.search.uhighlight.CustomPassageFormatter; import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter; +import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset; import org.elasticsearch.lucene.search.uhighlight.Snippet; import org.elasticsearch.search.fetch.FetchContext; import org.elasticsearch.search.fetch.FetchSubPhase; @@ -121,7 +122,10 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset(); boolean weightMatchesEnabled = indexSettings.isWeightMatchesEnabled(); int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments(); - Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset(); + QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = new QueryMaxAnalyzedOffset( + fieldContext.field.fieldOptions().maxAnalyzedOffset(), + maxAnalyzedOffset + ); Analyzer analyzer = wrapAnalyzer( fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER), queryMaxAnalyzedOffset @@ -171,7 +175,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { fieldContext.field.fieldOptions().noMatchSize(), highlighterNumberOfFragments, maxAnalyzedOffset, - fieldContext.field.fieldOptions().maxAnalyzedOffset(), + queryMaxAnalyzedOffset, fieldContext.field.fieldOptions().requireFieldMatch(), weightMatchesEnabled ); @@ -186,9 +190,9 @@ protected PassageFormatter getPassageFormatter(SearchHighlightContext.Field fiel ); } - protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) { - if (maxAnalyzedOffset != null) { - analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset); + protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) { + if (maxAnalyzedOffset.isNull() == false) { + analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull()); } return analyzer; } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index 3dd3dd1b42c8c..850caf8012f39 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.text.Text; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset; import org.elasticsearch.search.fetch.FetchContext; import org.elasticsearch.search.fetch.FetchSubPhase; @@ -107,7 +108,10 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc ArrayList fragsList = new ArrayList<>(); List textsToHighlight; final int maxAnalyzedOffset = context.getSearchExecutionContext().getIndexSettings().getHighlightMaxAnalyzedOffset(); - Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset(); + QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = new QueryMaxAnalyzedOffset( + fieldContext.field.fieldOptions().maxAnalyzedOffset(), + maxAnalyzedOffset + ); Analyzer analyzer = wrapAnalyzer( context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER), queryMaxAnalyzedOffset @@ -119,7 +123,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc for (Object textToHighlight : textsToHighlight) { String text = convertFieldValue(fieldType, textToHighlight); int textLength = text.length(); - if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset) && (textLength > maxAnalyzedOffset)) { + if ((queryMaxAnalyzedOffset.isNull() || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset) + && (textLength > maxAnalyzedOffset)) { throw new IllegalArgumentException( "The length [" + textLength @@ -241,9 +246,9 @@ private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer an } } - private static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) { - if (maxAnalyzedOffset != null) { - return new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset); + private static Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) { + if (maxAnalyzedOffset.isNull() == false) { + return new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull()); } return analyzer; } diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index 126641037fde7..d676075e0259a 100644 --- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -93,7 +93,7 @@ private void assertHighlightOneDoc( int noMatchSize, String[] expectedPassages, int maxAnalyzedOffset, - Integer queryMaxAnalyzedOffset + QueryMaxAnalyzedOffset queryMaxAnalyzedOffset ) throws Exception { assertHighlightOneDoc( fieldName, @@ -120,7 +120,7 @@ private void assertHighlightOneDoc( int noMatchSize, String[] expectedPassages, int maxAnalyzedOffset, - Integer queryMaxAnalyzedOffset, + QueryMaxAnalyzedOffset queryMaxAnalyzedOffset, UnifiedHighlighter.OffsetSource offsetSource ) throws Exception { try (Directory dir = newDirectory()) { @@ -453,7 +453,7 @@ public void testExceedMaxAnalyzedOffset() throws Exception { 0, new String[] {}, 10, - queryMaxAnalyzedOffset + new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffset, 10) ); }); assertEquals( @@ -473,7 +473,7 @@ public void testExceedMaxAnalyzedOffset() throws Exception { 1, new String[] { "exceeds" }, 10, - 10 + new QueryMaxAnalyzedOffset(10, 10) ); } @@ -491,7 +491,7 @@ public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception { 0, new String[] { "Testing Fun Testing Fun" }, 29, - 10, + new QueryMaxAnalyzedOffset(10, 29), UnifiedHighlighter.OffsetSource.ANALYSIS ); assertHighlightOneDoc( @@ -504,7 +504,7 @@ public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception { 0, new String[] { "Testing Fun Testing Fun" }, 29, - 10, + new QueryMaxAnalyzedOffset(10, 29), UnifiedHighlighter.OffsetSource.POSTINGS ); } @@ -540,7 +540,7 @@ public void testExceedMaxAnalyzedOffsetRandomOffset() throws Exception { 0, new String[] { output }, 47, - randomOffset, + new QueryMaxAnalyzedOffset(randomOffset, 47), offsetSource ); }