From bb9d093c41ef7436633eff7d6619f8984918a227 Mon Sep 17 00:00:00 2001 From: Thomas Seidl Date: Sat, 17 Feb 2024 05:26:46 +0100 Subject: [PATCH] Fix the "highlight.max_analyzer_offset" request parameter with "plain" highlighter (#10919) Signed-off-by: Shivansh Arora --- CHANGELOG.md | 1 + .../search.highlight/30_max_analyzed_offset.yml | 12 ++++++++++++ .../subphase/highlight/PlainHighlighter.java | 16 +++++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60e68edbfffbf..6055b55f83738 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -120,6 +120,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add support of special WrappingSearchAsyncActionPhase so the onPhaseStart() will always be followed by onPhaseEnd() within AbstractSearchAsyncAction ([#12293](https://github.com/opensearch-project/OpenSearch/pull/12293)) - Add a system property to configure YamlParser codepoint limits ([#12298](https://github.com/opensearch-project/OpenSearch/pull/12298)) - Prevent read beyond slice boundary in ByteArrayIndexInput ([#10481](https://github.com/opensearch-project/OpenSearch/issues/10481)) +- Fix the "highlight.max_analyzer_offset" request parameter with "plain" highlighter ([#10919](https://github.com/opensearch-project/OpenSearch/pull/10919)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml index a18ac45e62175..4ee905972d106 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml @@ -78,3 +78,15 @@ setup: index: test1 body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}}} - match: { error.root_cause.0.type: "illegal_argument_exception" } + +--- +"Plain highlighter on a field WITHOUT OFFSETS using max_analyzer_offset should SUCCEED": + - skip: + version: " - 2.1.99" + reason: only starting supporting the parameter max_analyzer_offset on version 2.2 + - do: + search: + rest_total_hits_as_int: true + index: test1 + body: {"query" : {"match" : {"field1" : "quick"}}, "highlight" : {"type" : "plain", "fields" : {"field1" : {"max_analyzer_offset": 10}}}} + - match: {hits.hits.0.highlight.field1.0: "The quick "} diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/PlainHighlighter.java index eb5f4f3c14eb2..c06a733203434 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -123,13 +123,27 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc List textsToHighlight; Analyzer analyzer = context.mapperService().documentMapper().mappers().indexAnalyzer(); final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset(); + final Integer fieldMaxAnalyzedOffset = field.fieldOptions().maxAnalyzerOffset(); + if (fieldMaxAnalyzedOffset != null && fieldMaxAnalyzedOffset > maxAnalyzedOffset) { + throw new IllegalArgumentException( + "max_analyzer_offset has exceeded [" + + maxAnalyzedOffset + + "] - maximum allowed to be analyzed for highlighting. " + + "This maximum can be set by changing the [" + + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() + + "] index level setting. " + + "For large texts, indexing with offsets or term vectors is recommended!" + ); + } textsToHighlight = HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource); for (Object textToHighlight : textsToHighlight) { String text = convertFieldValue(fieldType, textToHighlight); int textLength = text.length(); - if (textLength > maxAnalyzedOffset) { + if (fieldMaxAnalyzedOffset != null && textLength > fieldMaxAnalyzedOffset) { + text = text.substring(0, fieldMaxAnalyzedOffset); + } else if (textLength > maxAnalyzedOffset) { throw new IllegalArgumentException( "The length of [" + fieldContext.fieldName