From fa8d9ea0a61895f98cec2e1e1756db1058934615 Mon Sep 17 00:00:00 2001 From: yuye-aws Date: Thu, 7 Mar 2024 16:38:34 +0800 Subject: [PATCH] fix unit tests for fixed token length algorithm Signed-off-by: yuye-aws --- .../chunker/FixedTokenLengthChunkerTests.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunkerTests.java b/src/test/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunkerTests.java index 2f86d448e..0633213fb 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunkerTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunkerTests.java @@ -97,13 +97,13 @@ public void testValidateParameters_whenIllegalOverlapRateType_thenFail() { public void testValidateParameters_whenIllegalOverlapRateValue_thenFail() { Map parameters = new HashMap<>(); - parameters.put(OVERLAP_RATE_FIELD, 1.0); + parameters.put(OVERLAP_RATE_FIELD, 0.6); IllegalArgumentException illegalArgumentException = assertThrows( IllegalArgumentException.class, () -> FixedTokenLengthChunker.validateParameters(parameters) ); assertEquals( - "fixed length parameter [" + OVERLAP_RATE_FIELD + "] must be between 0 and 1, 1 is not included.", + "fixed length parameter [" + OVERLAP_RATE_FIELD + "] must be between 0 and 0.5", illegalArgumentException.getMessage() ); } @@ -129,8 +129,8 @@ public void testChunk_withTokenLimit_10() { List passages = FixedTokenLengthChunker.chunk(content, parameters); List expectedPassages = new ArrayList<>(); expectedPassages.add("This is an example document to be chunked The document"); - expectedPassages.add("The document contains a single paragraph two sentences and 24"); - expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch"); + expectedPassages.add("contains a single paragraph two sentences and 24 tokens by"); + expectedPassages.add("standard tokenizer in OpenSearch"); assertEquals(expectedPassages, passages); } @@ -144,7 +144,7 @@ public void testChunk_withTokenLimit_20() { expectedPassages.add( "This is an example document to be chunked The document contains a single paragraph two sentences and 24 tokens by" ); - expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch"); + expectedPassages.add("standard tokenizer in OpenSearch"); assertEquals(expectedPassages, passages); }