Skip to content

Commit

Permalink
Add ability to set "max_analyzed_offet" implicitly to "index.highlight
Browse files Browse the repository at this point in the history
.max_analyzed_offset", by setting it excplicitly to "-1".
  • Loading branch information
svilen-mihaylov-elastic committed Dec 18, 2024
1 parent 8d1f456 commit cdc01a6
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
Expand Down Expand Up @@ -52,7 +53,7 @@ protected List<Object> loadFieldValues(
}

@Override
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
import org.elasticsearch.test.ESTestCase;
Expand Down Expand Up @@ -85,7 +86,7 @@ private void assertHighlightOneDoc(
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
Integer queryMaxAnalyzedOffsetIn
) throws Exception {

try (Directory dir = newDirectory()) {
Expand Down Expand Up @@ -116,8 +117,9 @@ private void assertHighlightOneDoc(
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
if (queryMaxAnalyzedOffset != null) {
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset);
if (queryMaxAnalyzedOffset.isNull() == false) {
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull());
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
Expand Down Expand Up @@ -311,6 +313,19 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
e.getMessage()
);

// Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1.
assertHighlightOneDoc(
"text",
new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
query,
Locale.ROOT,
breakIterator,
0,
new String[] {},
20,
-1
);

assertHighlightOneDoc(
"text",
new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private String fieldValue;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;

CustomFieldHighlighter(
String field,
Expand All @@ -47,7 +47,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator,
int noMatchSize,
Integer queryMaxAnalyzedOffset
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset
) {
super(
field,
Expand Down Expand Up @@ -112,8 +112,8 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {

@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
if (queryMaxAnalyzedOffset != null) {
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
if (queryMaxAnalyzedOffset.isNull() == false) {
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset.getNotNull());
}
return super.highlightOffsetsEnums(off);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final int noMatchSize;
private final CustomFieldHighlighter fieldHighlighter;
private final int maxAnalyzedOffset;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;

/**
* Creates a new instance of {@link CustomUnifiedHighlighter}
Expand Down Expand Up @@ -94,7 +94,7 @@ public CustomUnifiedHighlighter(
int noMatchSize,
int maxPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset,
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset,
boolean requireFieldMatch,
boolean weightMatchesEnabled
) {
Expand Down Expand Up @@ -125,9 +125,9 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
return null;
}
int fieldValueLength = fieldValue.length();
if (((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset)
if ((queryMaxAnalyzedOffset.isNull() || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
&& (getOffsetSource(field) == OffsetSource.ANALYSIS)
&& (fieldValueLength > maxAnalyzedOffset))) {
&& (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length ["
+ fieldValueLength
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.lucene.search.uhighlight;

public class QueryMaxAnalyzedOffset {
private final Integer queryMaxAnalyzedOffset;

public QueryMaxAnalyzedOffset(final Integer queryMaxAnalyzedOffset, final int indexMaxAnalyzedOffset) {
// If we have a negative value, grab value for the actual maximum from the index.
this.queryMaxAnalyzedOffset = (queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset >= 0)
? queryMaxAnalyzedOffset
: Integer.valueOf(indexMaxAnalyzedOffset);
}

public boolean isNull() {
return queryMaxAnalyzedOffset == null;
}

public int getNotNull() {
return queryMaxAnalyzedOffset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.lucene.search.uhighlight.BoundedBreakIteratorScanner;
import org.elasticsearch.lucene.search.uhighlight.CustomPassageFormatter;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
Expand Down Expand Up @@ -121,7 +122,10 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset();
boolean weightMatchesEnabled = indexSettings.isWeightMatchesEnabled();
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = new QueryMaxAnalyzedOffset(
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
maxAnalyzedOffset
);
Analyzer analyzer = wrapAnalyzer(
fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
queryMaxAnalyzedOffset
Expand Down Expand Up @@ -171,7 +175,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
fieldContext.field.fieldOptions().noMatchSize(),
highlighterNumberOfFragments,
maxAnalyzedOffset,
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
queryMaxAnalyzedOffset,
fieldContext.field.fieldOptions().requireFieldMatch(),
weightMatchesEnabled
);
Expand All @@ -186,9 +190,9 @@ protected PassageFormatter getPassageFormatter(SearchHighlightContext.Field fiel
);
}

protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
if (maxAnalyzedOffset != null) {
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
if (maxAnalyzedOffset.isNull() == false) {
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull());
}
return analyzer;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;

Expand Down Expand Up @@ -107,7 +108,10 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
ArrayList<OrderedTextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
final int maxAnalyzedOffset = context.getSearchExecutionContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = new QueryMaxAnalyzedOffset(
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
maxAnalyzedOffset
);
Analyzer analyzer = wrapAnalyzer(
context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
queryMaxAnalyzedOffset
Expand All @@ -119,7 +123,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
int textLength = text.length();
if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset) && (textLength > maxAnalyzedOffset)) {
if ((queryMaxAnalyzedOffset.isNull() || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
&& (textLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length ["
+ textLength
Expand Down Expand Up @@ -241,9 +246,9 @@ private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer an
}
}

private static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
if (maxAnalyzedOffset != null) {
return new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
private static Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
if (maxAnalyzedOffset.isNull() == false) {
return new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull());
}
return analyzer;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ private void assertHighlightOneDoc(
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset
) throws Exception {
assertHighlightOneDoc(
fieldName,
Expand All @@ -120,7 +120,7 @@ private void assertHighlightOneDoc(
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset,
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset,
UnifiedHighlighter.OffsetSource offsetSource
) throws Exception {
try (Directory dir = newDirectory()) {
Expand Down Expand Up @@ -453,7 +453,7 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
0,
new String[] {},
10,
queryMaxAnalyzedOffset
new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffset, 10)
);
});
assertEquals(
Expand All @@ -473,7 +473,7 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
1,
new String[] { "exceeds" },
10,
10
new QueryMaxAnalyzedOffset(10, 10)
);
}

Expand All @@ -491,7 +491,7 @@ public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception {
0,
new String[] { "Testing <b>Fun</b> Testing Fun" },
29,
10,
new QueryMaxAnalyzedOffset(10, 29),
UnifiedHighlighter.OffsetSource.ANALYSIS
);
assertHighlightOneDoc(
Expand All @@ -504,7 +504,7 @@ public void testExceedMaxAnalyzedOffsetWithRepeatedWords() throws Exception {
0,
new String[] { "Testing <b>Fun</b> Testing Fun" },
29,
10,
new QueryMaxAnalyzedOffset(10, 29),
UnifiedHighlighter.OffsetSource.POSTINGS
);
}
Expand Down Expand Up @@ -540,7 +540,7 @@ public void testExceedMaxAnalyzedOffsetRandomOffset() throws Exception {
0,
new String[] { output },
47,
randomOffset,
new QueryMaxAnalyzedOffset(randomOffset, 47),
offsetSource
);
}
Expand Down

0 comments on commit cdc01a6

Please sign in to comment.