diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerUtil.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerUtil.java index d4406f33e..1cf2680e6 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerUtil.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerUtil.java @@ -19,23 +19,12 @@ private ChunkerUtil() {} // no instance of this util class /** * Checks whether the chunking results would exceed the max chunk limit. - * If exceeds, then Throw IllegalStateException + * If exceeds, then return true * * @param chunkResultSize the size of chunking result * @param runtimeMaxChunkLimit runtime max_chunk_limit, used to check with chunkResultSize - * @param nonRuntimeMaxChunkLimit non-runtime max_chunk_limit, used to keep exception message consistent */ - public static void checkRunTimeMaxChunkLimit(int chunkResultSize, int runtimeMaxChunkLimit, int nonRuntimeMaxChunkLimit) { - if (runtimeMaxChunkLimit != DISABLED_MAX_CHUNK_LIMIT && chunkResultSize >= runtimeMaxChunkLimit) { - throw new IllegalArgumentException( - String.format( - Locale.ROOT, - "The number of chunks produced by %s processor has exceeded the allowed maximum of [%s]. This limit can be set by changing the [%s] parameter.", - TYPE, - nonRuntimeMaxChunkLimit, - MAX_CHUNK_LIMIT_FIELD - ) - ); - } + public static boolean checkRunTimeMaxChunkLimit(int chunkResultSize, int runtimeMaxChunkLimit) { + return runtimeMaxChunkLimit != DISABLED_MAX_CHUNK_LIMIT && chunkResultSize >= runtimeMaxChunkLimit; } } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java index c688af436..b9bcce8ba 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/DelimiterChunker.java @@ -59,15 +59,16 @@ public List chunk(final String content, final Map runtim int nextDelimiterPosition = content.indexOf(delimiter); while (nextDelimiterPosition != -1) { - ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit, maxChunkLimit); + if (ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit)) { + break; + } end = nextDelimiterPosition + delimiter.length(); chunkResult.add(content.substring(start, end)); start = end; nextDelimiterPosition = content.indexOf(delimiter, start); } - if (start < content.length()) { - ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit, maxChunkLimit); + if (start < content.length() && !ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit)) { chunkResult.add(content.substring(start)); } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java index cd630adf1..f3e272ab7 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java @@ -131,7 +131,9 @@ public List chunk(final String content, final Map runtim int overlapTokenNumber = (int) Math.floor(tokenLimit * overlapRate); while (startTokenIndex < tokens.size()) { - ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit, maxChunkLimit); + if (ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit)) { + break; + } if (startTokenIndex == 0) { // include all characters till the start if no previous passage startContentPosition = 0;