From 33e4a2b3f18cd9ad0e0ccbac9a01ca7283204604 Mon Sep 17 00:00:00 2001 From: yuye-aws Date: Thu, 7 Mar 2024 18:14:29 +0800 Subject: [PATCH] fix integration tests Signed-off-by: yuye-aws --- .../DocumentChunkingProcessorIT.java | 6 +++--- .../chunker/PipelineForCascadedChunker.json | 20 +++++++++---------- .../chunker/PipelineForDelimiterChunker.json | 10 +++++----- .../PipelineForFixedTokenLengthChunker.json | 10 +++++----- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java index d8caa64da..d136af9c4 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorIT.java @@ -63,8 +63,8 @@ public void testDocumentChunkingProcessor_withFixedTokenLength_successful() thro List expectedPassages = new ArrayList<>(); expectedPassages.add("This is an example document to be chunked The document"); - expectedPassages.add("The document contains a single paragraph two sentences and 24"); - expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch"); + expectedPassages.add("contains a single paragraph two sentences and 24 tokens by"); + expectedPassages.add("standard tokenizer in OpenSearch"); validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages); } finally { wipeOfTestResources(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_NAME, null, null); @@ -112,7 +112,7 @@ public void testDocumentChunkingProcessor_withCascade_successful() throws Except // " ", "." and "," will not be included in fixed token length output expectedPassages.add("This is an example document to be chunked"); expectedPassages.add("The document contains a single paragraph two sentences and 24"); - expectedPassages.add("and 24 tokens by standard tokenizer in OpenSearch"); + expectedPassages.add("tokens by standard tokenizer in OpenSearch"); validateIndexIngestResults(INDEX_NAME, OUTPUT_FIELD, expectedPassages); expectedPassages.clear(); diff --git a/src/test/resources/processor/chunker/PipelineForCascadedChunker.json b/src/test/resources/processor/chunker/PipelineForCascadedChunker.json index 6302cfdfe..3125d3d53 100644 --- a/src/test/resources/processor/chunker/PipelineForCascadedChunker.json +++ b/src/test/resources/processor/chunker/PipelineForCascadedChunker.json @@ -4,11 +4,11 @@ { "chunking": { "field_map": { - "body": { - "delimiter": { - "delimiter": "." - }, - "output_field": "body_chunk_intermediate" + "body": "body_chunk_intermediate" + }, + "algorithm": { + "delimiter": { + "delimiter": "." } } } @@ -16,11 +16,11 @@ { "chunking": { "field_map": { - "body_chunk_intermediate": { - "fix_length": { - "token_limit": 10 - }, - "output_field": "body_chunk" + "body_chunk_intermediate": "body_chunk" + }, + "algorithm": { + "fix_length": { + "token_limit": 10 } } } diff --git a/src/test/resources/processor/chunker/PipelineForDelimiterChunker.json b/src/test/resources/processor/chunker/PipelineForDelimiterChunker.json index 9ababd6ed..dfa504065 100644 --- a/src/test/resources/processor/chunker/PipelineForDelimiterChunker.json +++ b/src/test/resources/processor/chunker/PipelineForDelimiterChunker.json @@ -4,11 +4,11 @@ { "chunking": { "field_map": { - "body": { - "delimiter": { - "delimiter": "." - }, - "output_field": "body_chunk" + "body": "body_chunk" + }, + "algorithm": { + "delimiter": { + "delimiter": "." } } } diff --git a/src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunker.json b/src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunker.json index 27daf19c8..c2a55e4f2 100644 --- a/src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunker.json +++ b/src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunker.json @@ -4,11 +4,11 @@ { "chunking": { "field_map": { - "body": { - "fix_length": { - "token_limit": 10 - }, - "output_field": "body_chunk" + "body": "body_chunk" + }, + "algorithm": { + "fix_length": { + "token_limit": 10 } } }