Skip to content

Commit

Permalink
fix unit tests for fixed token length algorithm
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Mar 7, 2024
1 parent d3d0f79 commit fa8d9ea
Showing 1 changed file with 5 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,13 @@ public void testValidateParameters_whenIllegalOverlapRateType_thenFail() {

public void testValidateParameters_whenIllegalOverlapRateValue_thenFail() {
Map<String, Object> parameters = new HashMap<>();
parameters.put(OVERLAP_RATE_FIELD, 1.0);
parameters.put(OVERLAP_RATE_FIELD, 0.6);
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> FixedTokenLengthChunker.validateParameters(parameters)
);
assertEquals(
"fixed length parameter [" + OVERLAP_RATE_FIELD + "] must be between 0 and 1, 1 is not included.",
"fixed length parameter [" + OVERLAP_RATE_FIELD + "] must be between 0 and 0.5",
illegalArgumentException.getMessage()
);
}
Expand All @@ -129,8 +129,8 @@ public void testChunk_withTokenLimit_10() {
List<String> passages = FixedTokenLengthChunker.chunk(content, parameters);
List<String> expectedPassages = new ArrayList<>();
expectedPassages.add("This is an example document to be chunked The document");
expectedPassages.add("The document contains a single paragraph two sentences and 24");
expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
assertEquals(expectedPassages, passages);
}

Expand All @@ -144,7 +144,7 @@ public void testChunk_withTokenLimit_20() {
expectedPassages.add(
"This is an example document to be chunked The document contains a single paragraph two sentences and 24 tokens by"
);
expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch");
expectedPassages.add("standard tokenizer in OpenSearch");
assertEquals(expectedPassages, passages);
}

Expand Down

0 comments on commit fa8d9ea

Please sign in to comment.