Skip to content

Commit

Permalink
change max chunk limit behavior
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Apr 30, 2024
1 parent a3bdde5 commit 2088b63
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,12 @@ private ChunkerUtil() {} // no instance of this util class

/**
* Checks whether the chunking results would exceed the max chunk limit.
* If exceeds, then Throw IllegalStateException
* If exceeds, then return true
*
* @param chunkResultSize the size of chunking result
* @param runtimeMaxChunkLimit runtime max_chunk_limit, used to check with chunkResultSize
* @param nonRuntimeMaxChunkLimit non-runtime max_chunk_limit, used to keep exception message consistent
*/
public static void checkRunTimeMaxChunkLimit(int chunkResultSize, int runtimeMaxChunkLimit, int nonRuntimeMaxChunkLimit) {
if (runtimeMaxChunkLimit != DISABLED_MAX_CHUNK_LIMIT && chunkResultSize >= runtimeMaxChunkLimit) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The number of chunks produced by %s processor has exceeded the allowed maximum of [%s]. This limit can be set by changing the [%s] parameter.",
TYPE,
nonRuntimeMaxChunkLimit,
MAX_CHUNK_LIMIT_FIELD
)
);
}
public static boolean checkRunTimeMaxChunkLimit(int chunkResultSize, int runtimeMaxChunkLimit) {
return runtimeMaxChunkLimit != DISABLED_MAX_CHUNK_LIMIT && chunkResultSize >= runtimeMaxChunkLimit;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,16 @@ public List<String> chunk(final String content, final Map<String, Object> runtim
int nextDelimiterPosition = content.indexOf(delimiter);

while (nextDelimiterPosition != -1) {
ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit, maxChunkLimit);
if (ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit)) {
break;
}
end = nextDelimiterPosition + delimiter.length();
chunkResult.add(content.substring(start, end));
start = end;
nextDelimiterPosition = content.indexOf(delimiter, start);
}

if (start < content.length()) {
ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit, maxChunkLimit);
if (start < content.length() && !ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit)) {
chunkResult.add(content.substring(start));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ public List<String> chunk(final String content, final Map<String, Object> runtim
int overlapTokenNumber = (int) Math.floor(tokenLimit * overlapRate);

while (startTokenIndex < tokens.size()) {
ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit, maxChunkLimit);
if (ChunkerUtil.checkRunTimeMaxChunkLimit(chunkResult.size(), runtimeMaxChunkLimit)) {
break;
}
if (startTokenIndex == 0) {
// include all characters till the start if no previous passage
startContentPosition = 0;
Expand Down

0 comments on commit 2088b63

Please sign in to comment.