Skip to content

Commit

Permalink
resolve code review comments
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Mar 18, 2024
1 parent 3d8c030 commit 9931fae
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,8 @@ private void parseAlgorithmMap(final Map<String, Object> algorithmMap) {
)
);
}
if (algorithmKey.equals(FixedTokenLengthChunker.ALGORITHM_NAME)) {
// fixed token length algorithm needs analysis registry for tokenization
chunkerParameters.put(FixedTokenLengthChunker.ANALYSIS_REGISTRY_FIELD, analysisRegistry);
}
// fixed token length algorithm needs analysis registry for tokenization
chunkerParameters.put(FixedTokenLengthChunker.ANALYSIS_REGISTRY_FIELD, analysisRegistry);
this.chunker = ChunkerFactory.create(algorithmKey, chunkerParameters);
}

Expand Down Expand Up @@ -171,10 +169,8 @@ public IngestDocument execute(final IngestDocument ingestDocument) {
validateFieldsValue(sourceAndMetadataMap);
// fixed token length algorithm needs runtime parameter max_token_count for tokenization
Map<String, Object> runtimeParameters = new HashMap<>();
if (chunker instanceof FixedTokenLengthChunker) {
int maxTokenCount = getMaxTokenCount(sourceAndMetadataMap);
runtimeParameters.put(FixedTokenLengthChunker.MAX_TOKEN_COUNT_FIELD, maxTokenCount);
}
int maxTokenCount = getMaxTokenCount(sourceAndMetadataMap);
runtimeParameters.put(FixedTokenLengthChunker.MAX_TOKEN_COUNT_FIELD, maxTokenCount);
runtimeParameters.put(MAX_CHUNK_LIMIT_FIELD, maxChunkLimit);
chunkMapType(sourceAndMetadataMap, fieldMap, runtimeParameters, 0);
return ingestDocument;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ private ChunkerUtil() {} // no instance of this util class
*/
public static void checkRunTimeMaxChunkLimit(int chunkResultSize, int runtimeMaxChunkLimit, int nonRuntimeMaxChunkLimit) {
if (chunkResultSize == runtimeMaxChunkLimit) {
throw new IllegalStateException(
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The number of chunks produced by %s processor has exceeded the allowed maximum of [%s]. This limit can be set by changing the [%s] parameter.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ private List<AnalyzeToken> tokenize(final String content, final String tokenizer
return analyzeResponse.getTokens();
} catch (Exception e) {
throw new IllegalStateException(
String.format(Locale.ROOT, "%s algorithm encounters exception in tokenization: %s", ALGORITHM_NAME, e.getMessage()),
String.format(Locale.ROOT, "analyzer %s encounters exception: %s", tokenizer, e.getMessage()),
e
);
}
Expand Down

0 comments on commit 9931fae

Please sign in to comment.