From 6548996c90730f5bb58a140c11c1f25f448dfa4e Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Sun, 3 Dec 2023 20:35:28 +0800 Subject: [PATCH] Remove the code of field pattern Signed-off-by: Gao Binlong --- .../ingest/common/RemoveProcessor.java | 137 +---------------- .../common/RemoveProcessorFactoryTests.java | 63 +------- .../ingest/common/RemoveProcessorTests.java | 73 +-------- .../test/ingest/290_remove_processor.yml | 145 ------------------ 4 files changed, 13 insertions(+), 405 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java index 9a4160b350310..4988ec89fec1d 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java @@ -32,8 +32,6 @@ package org.opensearch.ingest.common; -import org.opensearch.common.ValidationException; -import org.opensearch.common.regex.Regex; import org.opensearch.core.common.Strings; import org.opensearch.index.VersionType; import org.opensearch.ingest.AbstractProcessor; @@ -61,25 +59,19 @@ public final class RemoveProcessor extends AbstractProcessor { public static final String TYPE = "remove"; private final List fields; - private final List fieldPatterns; private final List excludeFields; - private final List excludeFieldPatterns; private final boolean ignoreMissing; RemoveProcessor( String tag, String description, List fields, - List fieldPatterns, List excludeFields, - List excludeFieldPatterns, boolean ignoreMissing ) { super(tag, description); this.fields = new ArrayList<>(fields); - this.fieldPatterns = new ArrayList<>(fieldPatterns); this.excludeFields = new ArrayList<>(excludeFields); - this.excludeFieldPatterns = new ArrayList<>(excludeFieldPatterns); this.ignoreMissing = ignoreMissing; } @@ -87,18 +79,10 @@ public List getFields() { return fields; } - public List getFieldPatterns() { - return fieldPatterns; - } - public List getExcludeFields() { return excludeFields; } - public List getExcludeFieldPatterns() { - return excludeFieldPatterns; - } - @Override public IngestDocument execute(IngestDocument document) { if (!fields.isEmpty()) { @@ -137,24 +121,6 @@ public IngestDocument execute(IngestDocument document) { }); } - if (!fieldPatterns.isEmpty()) { - Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); - Set metadataFields = document.getMetadata() - .keySet() - .stream() - .map(IngestDocument.Metadata::getFieldName) - .collect(Collectors.toSet()); - existingFields.forEach(field -> { - // ignore metadata fields such as _index, _id, etc. - if (!metadataFields.contains(field)) { - final boolean matched = fieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field)); - if (matched) { - document.removeField(field); - } - } - }); - } - Set excludeFieldSet = new HashSet<>(); if (!excludeFields.isEmpty()) { excludeFields.forEach(field -> { @@ -166,7 +132,7 @@ public IngestDocument execute(IngestDocument document) { }); } - if (!excludeFieldSet.isEmpty() || !excludeFieldPatterns.isEmpty()) { + if (!excludeFieldSet.isEmpty()) { Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); Set metadataFields = document.getMetadata() .keySet() @@ -175,18 +141,8 @@ public IngestDocument execute(IngestDocument document) { .collect(Collectors.toSet()); existingFields.forEach(field -> { // ignore metadata fields such as _index, _id, etc. - if (!metadataFields.contains(field)) { - // when both exclude_field and exclude_field_pattern are not empty, remove the field if it doesn't exist in both of them - // if not, remove the field if it doesn't exist in the non-empty one - if (!excludeFieldPatterns.isEmpty()) { - final boolean matched = excludeFieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field)); - if (!excludeFieldSet.isEmpty() && !excludeFieldSet.contains(field) && !matched - || excludeFieldSet.isEmpty() && !matched) { - document.removeField(field); - } - } else if (!excludeFieldSet.isEmpty() && !excludeFieldSet.contains(field)) { - document.removeField(field); - } + if (!metadataFields.contains(field) && !excludeFieldSet.contains(field)) { + document.removeField(field); } }); } @@ -215,31 +171,12 @@ public RemoveProcessor create( Map config ) throws Exception { final List fields = new ArrayList<>(); - final List fieldPatterns = new ArrayList<>(); final List excludeFields = new ArrayList<>(); - final List excludeFieldPatterns = new ArrayList<>(); - final Object field = ConfigurationUtils.readOptionalObject(config, "field"); - final Object fieldPattern = ConfigurationUtils.readOptionalObject(config, "field_pattern"); final Object excludeField = ConfigurationUtils.readOptionalObject(config, "exclude_field"); - final Object excludeFieldPattern = ConfigurationUtils.readOptionalObject(config, "exclude_field_pattern"); - if (field == null && fieldPattern == null && excludeField == null && excludeFieldPattern == null) { - throw newConfigurationException( - TYPE, - processorTag, - "field", - "at least one of the parameters field, field_pattern, exclude_field and exclude_field_pattern need to be set" - ); - } - - if ((field != null || fieldPattern != null) && (excludeField != null || excludeFieldPattern != null)) { - throw newConfigurationException( - TYPE, - processorTag, - "field", - "ether (field,field_pattern) or (exclude_field,exclude_field_pattern) can be set" - ); + if (field == null && excludeField == null || field != null && excludeField != null) { + throw newConfigurationException(TYPE, processorTag, "field", "ether field or exclude_field must be set"); } List fieldCompiledTemplates = new ArrayList<>(); @@ -256,17 +193,6 @@ public RemoveProcessor create( .collect(Collectors.toList()); } - if (fieldPattern != null) { - if (fieldPattern instanceof List) { - @SuppressWarnings("unchecked") - List fieldPatternList = (List) fieldPattern; - fieldPatterns.addAll(fieldPatternList); - } else { - fieldPatterns.add((String) fieldPattern); - } - validateFieldPatterns(processorTag, fieldPatterns, "field_pattern"); - } - List excludeFieldCompiledTemplates = new ArrayList<>(); if (excludeField != null) { if (excludeField instanceof List) { @@ -281,59 +207,8 @@ public RemoveProcessor create( .collect(Collectors.toList()); } - if (excludeFieldPattern != null) { - if (excludeFieldPattern instanceof List) { - @SuppressWarnings("unchecked") - List excludeFieldPatternList = (List) excludeFieldPattern; - excludeFieldPatterns.addAll(excludeFieldPatternList); - } else { - excludeFieldPatterns.add((String) excludeFieldPattern); - } - validateFieldPatterns(processorTag, excludeFieldPatterns, "exclude_field_pattern"); - } - boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); - return new RemoveProcessor( - processorTag, - description, - fieldCompiledTemplates, - fieldPatterns, - excludeFieldCompiledTemplates, - excludeFieldPatterns, - ignoreMissing - ); - } - - private void validateFieldPatterns(String processorTag, List patterns, String patternKey) { - List validationErrors = new ArrayList<>(); - for (String fieldPattern : patterns) { - if (fieldPattern.contains(" ")) { - validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a space"); - } - if (fieldPattern.contains(",")) { - validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ','"); - } - if (fieldPattern.contains("#")) { - validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a '#'"); - } - if (fieldPattern.contains(":")) { - validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ':'"); - } - if (fieldPattern.startsWith("_")) { - validationErrors.add(patternKey + " [" + fieldPattern + "] must not start with '_'"); - } - if (Strings.validFileNameExcludingAstrix(fieldPattern) == false) { - validationErrors.add( - patternKey + " [" + fieldPattern + "] must not contain the following characters " + Strings.INVALID_FILENAME_CHARS - ); - } - } - - if (validationErrors.size() > 0) { - ValidationException validationException = new ValidationException(); - validationException.addValidationErrors(validationErrors); - throw newConfigurationException(TYPE, processorTag, patternKey, validationException.getMessage()); - } + return new RemoveProcessor(processorTag, description, fieldCompiledTemplates, excludeFieldCompiledTemplates, ignoreMissing); } } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java index 202b35f315550..179aef2feac0c 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java @@ -90,70 +90,24 @@ public void testInvalidMustacheTemplate() throws Exception { assertThat(exception.getMetadata("opensearch.processor_tag").get(0), equalTo(processorTag)); } - public void testCreateWithFieldPatterns() throws Exception { + public void testCreateWithExcludeField() throws Exception { Map config = new HashMap<>(); - List patterns = List.of("foo*"); - config.put("field_pattern", patterns); - config.put("exclude_field", "field"); String processorTag = randomAlphaOfLength(10); OpenSearchException exception = expectThrows( OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config) ); - assertThat( - exception.getMessage(), - equalTo("[field] ether (field,field_pattern) or (exclude_field,exclude_field_pattern) can be set") - ); + assertThat(exception.getMessage(), equalTo("[field] ether field or exclude_field must be set")); Map config2 = new HashMap<>(); - patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_"); - config2.put("field_pattern", patterns); + config2.put("field", "field1"); + config2.put("exclude_field", "field2"); exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config2)); - assertThat( - exception.getMessage(), - equalTo( - "[field_pattern] Validation Failed: 1: field_pattern [ ] must not contain a space;" - + "2: field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" - + "3: field_pattern [,] must not contain a ',';" - + "4: field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" - + "5: field_pattern [#] must not contain a '#';" - + "6: field_pattern [:] must not contain a ':';" - + "7: field_pattern [_] must not start with '_';" - ) - ); - - Map config3 = new HashMap<>(); - patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_"); - config3.put("exclude_field_pattern", patterns); - exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config3)); - assertThat( - exception.getMessage(), - equalTo( - "[exclude_field_pattern] Validation Failed: 1: exclude_field_pattern [ ] must not contain a space;" - + "2: exclude_field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" - + "3: exclude_field_pattern [,] must not contain a ',';" - + "4: exclude_field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" - + "5: exclude_field_pattern [#] must not contain a '#';" - + "6: exclude_field_pattern [:] must not contain a ':';" - + "7: exclude_field_pattern [_] must not start with '_';" - ) - ); - - Map config4 = new HashMap<>(); - exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config4)); - assertThat( - exception.getMessage(), - equalTo("[field] at least one of the parameters field, field_pattern, exclude_field and exclude_field_pattern need to be set") - ); - - Map config5 = new HashMap<>(); - config5.put("field_pattern", "field*"); - RemoveProcessor removeProcessor = factory.create(null, processorTag, null, config5); - assertThat(removeProcessor.getFieldPatterns(), equalTo(List.of("field*"))); + assertThat(exception.getMessage(), equalTo("[field] ether field or exclude_field must be set")); Map config6 = new HashMap<>(); config6.put("exclude_field", "exclude_field"); - removeProcessor = factory.create(null, processorTag, null, config6); + RemoveProcessor removeProcessor = factory.create(null, processorTag, null, config6); assertThat( removeProcessor.getExcludeFields() .stream() @@ -161,10 +115,5 @@ public void testCreateWithFieldPatterns() throws Exception { .collect(Collectors.toList()), equalTo(List.of("exclude_field")) ); - - Map config7 = new HashMap<>(); - config7.put("exclude_field_pattern", "exclude_field*"); - removeProcessor = factory.create(null, processorTag, null, config7); - assertThat(removeProcessor.getExcludeFieldPatterns(), equalTo(List.of("exclude_field*"))); } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java index 126537ca7b856..0e5dbf2cf1032 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java @@ -59,56 +59,12 @@ public void testRemoveFields() throws Exception { null, Collections.singletonList(new TestTemplateService.MockTemplateScript.Factory(field)), Collections.emptyList(), - Collections.emptyList(), - Collections.emptyList(), false ); processor.execute(ingestDocument); assertThat(ingestDocument.hasField(field), equalTo(false)); } - public void testRemoveWithFieldPatterns() throws Exception { - IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); - ingestDocument.setFieldValue("foo_1", "value"); - ingestDocument.setFieldValue("foo_2", "value"); - List fieldPatterns = new ArrayList<>(); - fieldPatterns.add("foo*"); - Processor processor = new RemoveProcessor( - randomAlphaOfLength(10), - null, - Collections.emptyList(), - fieldPatterns, - Collections.emptyList(), - Collections.emptyList(), - false - ); - processor.execute(ingestDocument); - assertThat(ingestDocument.hasField("foo_1"), equalTo(false)); - assertThat(ingestDocument.hasField("foo_2"), equalTo(false)); - - ingestDocument.setFieldValue("foo_1", "value"); - ingestDocument.setFieldValue("foo_2", "value"); - ingestDocument.setFieldValue("bar_1", "value"); - ingestDocument.setFieldValue("bar_2", "value"); - List fields = new ArrayList<>(); - fields.add(new TestTemplateService.MockTemplateScript.Factory("bar_1")); - fields.add(new TestTemplateService.MockTemplateScript.Factory("bar_2")); - Processor processorWithFieldsAndPatterns = new RemoveProcessor( - randomAlphaOfLength(10), - null, - fields, - fieldPatterns, - Collections.emptyList(), - Collections.emptyList(), - false - ); - processorWithFieldsAndPatterns.execute(ingestDocument); - assertThat(ingestDocument.hasField("foo_1"), equalTo(false)); - assertThat(ingestDocument.hasField("foo_2"), equalTo(false)); - assertThat(ingestDocument.hasField("bar_1"), equalTo(false)); - assertThat(ingestDocument.hasField("bar_2"), equalTo(false)); - } - public void testRemoveWithExcludeFields() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); ingestDocument.setFieldValue("foo_1", "value"); @@ -117,38 +73,11 @@ public void testRemoveWithExcludeFields() throws Exception { List excludeFields = new ArrayList<>(); excludeFields.add(new TestTemplateService.MockTemplateScript.Factory("foo_1")); excludeFields.add(new TestTemplateService.MockTemplateScript.Factory("foo_2")); - Processor processor = new RemoveProcessor( - randomAlphaOfLength(10), - null, - Collections.emptyList(), - Collections.emptyList(), - excludeFields, - Collections.emptyList(), - false - ); + Processor processor = new RemoveProcessor(randomAlphaOfLength(10), null, Collections.emptyList(), excludeFields, false); processor.execute(ingestDocument); assertThat(ingestDocument.hasField("foo_1"), equalTo(true)); assertThat(ingestDocument.hasField("foo_2"), equalTo(true)); assertThat(ingestDocument.hasField("foo_3"), equalTo(false)); - - ingestDocument.setFieldValue("foo_1", "value"); - ingestDocument.setFieldValue("foo_2", "value"); - ingestDocument.setFieldValue("foo_3", "value"); - List excludeFieldPatterns = new ArrayList<>(); - excludeFieldPatterns.add("foo_3*"); - Processor processorWithExcludeFieldsAndPatterns = new RemoveProcessor( - randomAlphaOfLength(10), - null, - Collections.emptyList(), - Collections.emptyList(), - excludeFields, - excludeFieldPatterns, - false - ); - processorWithExcludeFieldsAndPatterns.execute(ingestDocument); - assertThat(ingestDocument.hasField("foo_1"), equalTo(true)); - assertThat(ingestDocument.hasField("foo_2"), equalTo(true)); - assertThat(ingestDocument.hasField("foo_3"), equalTo(true)); } public void testRemoveNonExistingField() throws Exception { diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml index 6a787e33fd7f2..5603442d712b6 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml @@ -229,78 +229,6 @@ teardown: body: { message: "foo bar baz" } # Related issue: https://github.com/opensearch-project/OpenSearch/issues/1578 ---- -"Test remove processor with field_pattern": - - do: - ingest.put_pipeline: - id: "my_pipeline" - body: > - { - "description": "_description", - "processors": [ - { - "remove" : { - "field_pattern" : "foo*" - } - } - ] - } - - match: { acknowledged: true } - - - do: - index: - index: test - id: 1 - pipeline: "my_pipeline" - body: { - foo1: "bar", - foo2: "bar", - zoo: "bar" - } - - - do: - get: - index: test - id: 1 - - match: { _source: {zoo: "bar" }} - ---- -"Test remove processor with both field and field_pattern": - - do: - ingest.put_pipeline: - id: "my_pipeline" - body: > - { - "description": "_description", - "processors": [ - { - "remove" : { - "field": "bar", - "field_pattern" : ["foo*"] - } - } - ] - } - - match: { acknowledged: true } - - - do: - index: - index: test - id: 1 - pipeline: "my_pipeline" - body: { - foo1: "bar", - foo2: "bar", - bar: "zoo", - zoo: "bar" - } - - - do: - get: - index: test - id: 1 - - match: { _source: {zoo: "bar" }} - --- "Test remove processor with exclude_field": - do: @@ -336,76 +264,3 @@ teardown: index: test id: 1 - match: { _source: { bar: "zoo"}} - ---- -"Test remove processor with exclude_field_pattern": - - do: - ingest.put_pipeline: - id: "my_pipeline" - body: > - { - "description": "_description", - "processors": [ - { - "remove" : { - "exclude_field_pattern": "foo*" - } - } - ] - } - - match: { acknowledged: true } - - - do: - index: - index: test - id: 1 - pipeline: "my_pipeline" - body: { - foo1: "bar", - foo2: "bar", - bar: "zoo", - zoo: "bar" - } - - - do: - get: - index: test - id: 1 - - match: { _source: { foo1: "bar", foo2: "bar"}} - ---- -"Test remove processor with both exclude_field and exclude_field_pattern": - - do: - ingest.put_pipeline: - id: "my_pipeline" - body: > - { - "description": "_description", - "processors": [ - { - "remove" : { - "exclude_field": "bar", - "exclude_field_pattern" : ["foo*"] - } - } - ] - } - - match: { acknowledged: true } - - - do: - index: - index: test - id: 1 - pipeline: "my_pipeline" - body: { - foo1: "bar", - foo2: "bar", - bar: "zoo", - zoo: "bar" - } - - - do: - get: - index: test - id: 1 - - match: { _source: {foo1: "bar", foo2: "bar", bar: "zoo"}}