From 90a1f3a7c1bd9b9f2489ea7fbcd95679eb806b16 Mon Sep 17 00:00:00 2001 From: yuye-aws Date: Tue, 27 Feb 2024 16:49:38 +0800 Subject: [PATCH] add unit tests for parameter valdiation in document chunking processor Signed-off-by: yuye-aws --- .idea/runConfigurations/Run_Neural_Search.xml | 23 ---- .../processor/DocumentChunkingProcessor.java | 4 +- .../DocumentChunkingProcessorTests.java | 116 ++++++++++++++++-- 3 files changed, 112 insertions(+), 31 deletions(-) delete mode 100644 .idea/runConfigurations/Run_Neural_Search.xml diff --git a/.idea/runConfigurations/Run_Neural_Search.xml b/.idea/runConfigurations/Run_Neural_Search.xml deleted file mode 100644 index d881bd512..000000000 --- a/.idea/runConfigurations/Run_Neural_Search.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - true - true - false - - - \ No newline at end of file diff --git a/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java index 550c8013f..d1a1af536 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java @@ -93,7 +93,9 @@ private void validateDocumentChunkingFieldMap(Map fieldMap) { // output field must be string if (!(parameterMap.containsKey(OUTPUT_FIELD))) { - throw new IllegalArgumentException("parameters for output field [" + OUTPUT_FIELD + "] is null, cannot process it."); + throw new IllegalArgumentException( + "parameters for input field [" + inputField + "] misses [" + OUTPUT_FIELD + "], cannot process it." + ); } Object outputField = parameterMap.get(OUTPUT_FIELD); diff --git a/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorTests.java index 3b0395320..daadcbb6a 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessorTests.java @@ -81,13 +81,6 @@ public void setup() { factory = new DocumentChunkingProcessor.Factory(settings, clusterService, indicesService, getAnalysisRegistry()); } - @SneakyThrows - public void testGetType() { - DocumentChunkingProcessor processor = createFixedTokenLengthInstance(); - String type = processor.getType(); - assertEquals(DocumentChunkingProcessor.TYPE, type); - } - private Map createFixedTokenLengthParameters() { Map parameters = new HashMap<>(); parameters.put(FixedTokenLengthChunker.TOKEN_LIMIT_FIELD, 10); @@ -126,6 +119,115 @@ private DocumentChunkingProcessor createDelimiterInstance() { return factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config); } + public void testCreate_whenFieldMapEmpty_failure() { + Map config = new HashMap<>(); + Map emptyFieldMap = new HashMap<>(); + config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, emptyFieldMap); + Map registry = new HashMap<>(); + IllegalArgumentException illegalArgumentException = assertThrows( + IllegalArgumentException.class, + () -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config) + ); + assertEquals("Unable to create the processor as field_map is null or empty", illegalArgumentException.getMessage()); + } + + public void testCreate_whenFieldMapWithEmptyParameter_failure() { + Map config = new HashMap<>(); + Map fieldMap = new HashMap<>(); + fieldMap.put("key", null); + config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap); + Map registry = new HashMap<>(); + IllegalArgumentException illegalArgumentException = assertThrows( + IllegalArgumentException.class, + () -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config) + ); + assertEquals("parameters for input field [key] is null, cannot process it.", illegalArgumentException.getMessage()); + } + + public void testCreate_whenFieldMapWithIllegalParameterType_failure() { + Map config = new HashMap<>(); + Map fieldMap = new HashMap<>(); + fieldMap.put("key", "value"); + config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap); + Map registry = new HashMap<>(); + IllegalArgumentException illegalArgumentException = assertThrows( + IllegalArgumentException.class, + () -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config) + ); + assertEquals("parameters for input field [key] cannot be cast to [java.util.Map]", illegalArgumentException.getMessage()); + } + + public void testCreate_whenFieldMapWithEmptyOutputField_failure() { + Map config = new HashMap<>(); + Map fieldMap = new HashMap<>(); + fieldMap.put(INPUT_FIELD, ImmutableMap.of()); + config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap); + Map registry = new HashMap<>(); + IllegalArgumentException illegalArgumentException = assertThrows( + IllegalArgumentException.class, + () -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config) + ); + assertEquals( + "parameters for input field [" + INPUT_FIELD + "] misses [" + DocumentChunkingProcessor.OUTPUT_FIELD + "], cannot process it.", + illegalArgumentException.getMessage() + ); + } + + public void testCreate_whenFieldMapWithIllegalOutputField_failure() { + Map config = new HashMap<>(); + Map fieldMap = new HashMap<>(); + fieldMap.put(INPUT_FIELD, ImmutableMap.of(DocumentChunkingProcessor.OUTPUT_FIELD, 1)); + config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap); + Map registry = new HashMap<>(); + IllegalArgumentException illegalArgumentException = assertThrows( + IllegalArgumentException.class, + () -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config) + ); + assertEquals( + "parameters for output field [output_field] cannot be cast to [java.lang.String]", + illegalArgumentException.getMessage() + ); + } + + public void testCreate_whenFieldMapWithIllegalKey_failure() { + Map config = new HashMap<>(); + Map fieldMap = new HashMap<>(); + fieldMap.put(INPUT_FIELD, ImmutableMap.of(DocumentChunkingProcessor.OUTPUT_FIELD, OUTPUT_FIELD, 1, 1)); + config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap); + Map registry = new HashMap<>(); + IllegalArgumentException illegalArgumentException = assertThrows( + IllegalArgumentException.class, + () -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config) + ); + assertEquals( + "found parameter entry with non-string key", + illegalArgumentException.getMessage() + ); + } + + public void testCreate_whenFieldMapWithNoAlgorithm_failure() { + Map config = new HashMap<>(); + Map fieldMap = new HashMap<>(); + fieldMap.put(INPUT_FIELD, ImmutableMap.of(DocumentChunkingProcessor.OUTPUT_FIELD, INPUT_FIELD)); + config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap); + Map registry = new HashMap<>(); + IllegalArgumentException illegalArgumentException = assertThrows( + IllegalArgumentException.class, + () -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config) + ); + assertEquals( + "input field [" + INPUT_FIELD + "] should has and only has 1 chunking algorithm", + illegalArgumentException.getMessage() + ); + } + + @SneakyThrows + public void testGetType() { + DocumentChunkingProcessor processor = createFixedTokenLengthInstance(); + String type = processor.getType(); + assertEquals(DocumentChunkingProcessor.TYPE, type); + } + private String createSourceDataString() { return "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."; }