Skip to content

Commit

Permalink
add unit tests for parameter valdiation in document chunking processor
Browse files Browse the repository at this point in the history
Signed-off-by: yuye-aws <[email protected]>
  • Loading branch information
yuye-aws committed Feb 27, 2024
1 parent e7a0aaa commit 90a1f3a
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 31 deletions.
23 changes: 0 additions & 23 deletions .idea/runConfigurations/Run_Neural_Search.xml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ private void validateDocumentChunkingFieldMap(Map<String, Object> fieldMap) {

// output field must be string
if (!(parameterMap.containsKey(OUTPUT_FIELD))) {
throw new IllegalArgumentException("parameters for output field [" + OUTPUT_FIELD + "] is null, cannot process it.");
throw new IllegalArgumentException(
"parameters for input field [" + inputField + "] misses [" + OUTPUT_FIELD + "], cannot process it."
);
}

Object outputField = parameterMap.get(OUTPUT_FIELD);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,6 @@ public void setup() {
factory = new DocumentChunkingProcessor.Factory(settings, clusterService, indicesService, getAnalysisRegistry());
}

@SneakyThrows
public void testGetType() {
DocumentChunkingProcessor processor = createFixedTokenLengthInstance();
String type = processor.getType();
assertEquals(DocumentChunkingProcessor.TYPE, type);
}

private Map<String, Object> createFixedTokenLengthParameters() {
Map<String, Object> parameters = new HashMap<>();
parameters.put(FixedTokenLengthChunker.TOKEN_LIMIT_FIELD, 10);
Expand Down Expand Up @@ -126,6 +119,115 @@ private DocumentChunkingProcessor createDelimiterInstance() {
return factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config);
}

public void testCreate_whenFieldMapEmpty_failure() {
Map<String, Object> config = new HashMap<>();
Map<String, Object> emptyFieldMap = new HashMap<>();
config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, emptyFieldMap);
Map<String, Processor.Factory> registry = new HashMap<>();
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config)
);
assertEquals("Unable to create the processor as field_map is null or empty", illegalArgumentException.getMessage());
}

public void testCreate_whenFieldMapWithEmptyParameter_failure() {
Map<String, Object> config = new HashMap<>();
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("key", null);
config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap);
Map<String, Processor.Factory> registry = new HashMap<>();
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config)
);
assertEquals("parameters for input field [key] is null, cannot process it.", illegalArgumentException.getMessage());
}

public void testCreate_whenFieldMapWithIllegalParameterType_failure() {
Map<String, Object> config = new HashMap<>();
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("key", "value");
config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap);
Map<String, Processor.Factory> registry = new HashMap<>();
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config)
);
assertEquals("parameters for input field [key] cannot be cast to [java.util.Map]", illegalArgumentException.getMessage());
}

public void testCreate_whenFieldMapWithEmptyOutputField_failure() {
Map<String, Object> config = new HashMap<>();
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put(INPUT_FIELD, ImmutableMap.of());
config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap);
Map<String, Processor.Factory> registry = new HashMap<>();
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config)
);
assertEquals(
"parameters for input field [" + INPUT_FIELD + "] misses [" + DocumentChunkingProcessor.OUTPUT_FIELD + "], cannot process it.",
illegalArgumentException.getMessage()
);
}

public void testCreate_whenFieldMapWithIllegalOutputField_failure() {
Map<String, Object> config = new HashMap<>();
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put(INPUT_FIELD, ImmutableMap.of(DocumentChunkingProcessor.OUTPUT_FIELD, 1));
config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap);
Map<String, Processor.Factory> registry = new HashMap<>();
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config)
);
assertEquals(
"parameters for output field [output_field] cannot be cast to [java.lang.String]",
illegalArgumentException.getMessage()
);
}

public void testCreate_whenFieldMapWithIllegalKey_failure() {
Map<String, Object> config = new HashMap<>();
Map<Object, Object> fieldMap = new HashMap<>();
fieldMap.put(INPUT_FIELD, ImmutableMap.of(DocumentChunkingProcessor.OUTPUT_FIELD, OUTPUT_FIELD, 1, 1));
config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap);
Map<String, Processor.Factory> registry = new HashMap<>();
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config)
);
assertEquals(
"found parameter entry with non-string key",
illegalArgumentException.getMessage()
);
}

public void testCreate_whenFieldMapWithNoAlgorithm_failure() {
Map<String, Object> config = new HashMap<>();
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put(INPUT_FIELD, ImmutableMap.of(DocumentChunkingProcessor.OUTPUT_FIELD, INPUT_FIELD));
config.put(DocumentChunkingProcessor.FIELD_MAP_FIELD, fieldMap);
Map<String, Processor.Factory> registry = new HashMap<>();
IllegalArgumentException illegalArgumentException = assertThrows(
IllegalArgumentException.class,
() -> factory.create(registry, PROCESSOR_TAG, DESCRIPTION, config)
);
assertEquals(
"input field [" + INPUT_FIELD + "] should has and only has 1 chunking algorithm",
illegalArgumentException.getMessage()
);
}

@SneakyThrows
public void testGetType() {
DocumentChunkingProcessor processor = createFixedTokenLengthInstance();
String type = processor.getType();
assertEquals(DocumentChunkingProcessor.TYPE, type);
}

private String createSourceDataString() {
return "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.";
}
Expand Down

0 comments on commit 90a1f3a

Please sign in to comment.