Skip to content

Commit

Permalink
Remove the code of field pattern
Browse files Browse the repository at this point in the history
Signed-off-by: Gao Binlong <[email protected]>
  • Loading branch information
gaobinlong committed Dec 3, 2023
1 parent aedd938 commit 6548996
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 405 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@

package org.opensearch.ingest.common;

import org.opensearch.common.ValidationException;
import org.opensearch.common.regex.Regex;
import org.opensearch.core.common.Strings;
import org.opensearch.index.VersionType;
import org.opensearch.ingest.AbstractProcessor;
Expand Down Expand Up @@ -61,44 +59,30 @@ public final class RemoveProcessor extends AbstractProcessor {
public static final String TYPE = "remove";

private final List<TemplateScript.Factory> fields;
private final List<String> fieldPatterns;
private final List<TemplateScript.Factory> excludeFields;
private final List<String> excludeFieldPatterns;
private final boolean ignoreMissing;

RemoveProcessor(
String tag,
String description,
List<TemplateScript.Factory> fields,
List<String> fieldPatterns,
List<TemplateScript.Factory> excludeFields,
List<String> excludeFieldPatterns,
boolean ignoreMissing
) {
super(tag, description);
this.fields = new ArrayList<>(fields);
this.fieldPatterns = new ArrayList<>(fieldPatterns);
this.excludeFields = new ArrayList<>(excludeFields);
this.excludeFieldPatterns = new ArrayList<>(excludeFieldPatterns);
this.ignoreMissing = ignoreMissing;
}

public List<TemplateScript.Factory> getFields() {
return fields;
}

public List<String> getFieldPatterns() {
return fieldPatterns;
}

public List<TemplateScript.Factory> getExcludeFields() {
return excludeFields;
}

public List<String> getExcludeFieldPatterns() {
return excludeFieldPatterns;
}

@Override
public IngestDocument execute(IngestDocument document) {
if (!fields.isEmpty()) {
Expand Down Expand Up @@ -137,24 +121,6 @@ public IngestDocument execute(IngestDocument document) {
});
}

if (!fieldPatterns.isEmpty()) {
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet());
Set<String> metadataFields = document.getMetadata()
.keySet()
.stream()
.map(IngestDocument.Metadata::getFieldName)
.collect(Collectors.toSet());
existingFields.forEach(field -> {
// ignore metadata fields such as _index, _id, etc.
if (!metadataFields.contains(field)) {
final boolean matched = fieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field));
if (matched) {
document.removeField(field);
}
}
});
}

Set<String> excludeFieldSet = new HashSet<>();
if (!excludeFields.isEmpty()) {
excludeFields.forEach(field -> {
Expand All @@ -166,7 +132,7 @@ public IngestDocument execute(IngestDocument document) {
});
}

if (!excludeFieldSet.isEmpty() || !excludeFieldPatterns.isEmpty()) {
if (!excludeFieldSet.isEmpty()) {
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet());
Set<String> metadataFields = document.getMetadata()
.keySet()
Expand All @@ -175,18 +141,8 @@ public IngestDocument execute(IngestDocument document) {
.collect(Collectors.toSet());
existingFields.forEach(field -> {
// ignore metadata fields such as _index, _id, etc.
if (!metadataFields.contains(field)) {
// when both exclude_field and exclude_field_pattern are not empty, remove the field if it doesn't exist in both of them
// if not, remove the field if it doesn't exist in the non-empty one
if (!excludeFieldPatterns.isEmpty()) {
final boolean matched = excludeFieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field));
if (!excludeFieldSet.isEmpty() && !excludeFieldSet.contains(field) && !matched
|| excludeFieldSet.isEmpty() && !matched) {
document.removeField(field);
}
} else if (!excludeFieldSet.isEmpty() && !excludeFieldSet.contains(field)) {
document.removeField(field);
}
if (!metadataFields.contains(field) && !excludeFieldSet.contains(field)) {
document.removeField(field);
}
});
}
Expand Down Expand Up @@ -215,31 +171,12 @@ public RemoveProcessor create(
Map<String, Object> config
) throws Exception {
final List<String> fields = new ArrayList<>();
final List<String> fieldPatterns = new ArrayList<>();
final List<String> excludeFields = new ArrayList<>();
final List<String> excludeFieldPatterns = new ArrayList<>();

final Object field = ConfigurationUtils.readOptionalObject(config, "field");
final Object fieldPattern = ConfigurationUtils.readOptionalObject(config, "field_pattern");
final Object excludeField = ConfigurationUtils.readOptionalObject(config, "exclude_field");
final Object excludeFieldPattern = ConfigurationUtils.readOptionalObject(config, "exclude_field_pattern");

if (field == null && fieldPattern == null && excludeField == null && excludeFieldPattern == null) {
throw newConfigurationException(
TYPE,
processorTag,
"field",
"at least one of the parameters field, field_pattern, exclude_field and exclude_field_pattern need to be set"
);
}

if ((field != null || fieldPattern != null) && (excludeField != null || excludeFieldPattern != null)) {
throw newConfigurationException(
TYPE,
processorTag,
"field",
"ether (field,field_pattern) or (exclude_field,exclude_field_pattern) can be set"
);
if (field == null && excludeField == null || field != null && excludeField != null) {
throw newConfigurationException(TYPE, processorTag, "field", "ether field or exclude_field must be set");
}

List<TemplateScript.Factory> fieldCompiledTemplates = new ArrayList<>();
Expand All @@ -256,17 +193,6 @@ public RemoveProcessor create(
.collect(Collectors.toList());
}

if (fieldPattern != null) {
if (fieldPattern instanceof List) {
@SuppressWarnings("unchecked")
List<String> fieldPatternList = (List<String>) fieldPattern;
fieldPatterns.addAll(fieldPatternList);
} else {
fieldPatterns.add((String) fieldPattern);
}
validateFieldPatterns(processorTag, fieldPatterns, "field_pattern");
}

List<TemplateScript.Factory> excludeFieldCompiledTemplates = new ArrayList<>();
if (excludeField != null) {
if (excludeField instanceof List) {
Expand All @@ -281,59 +207,8 @@ public RemoveProcessor create(
.collect(Collectors.toList());
}

if (excludeFieldPattern != null) {
if (excludeFieldPattern instanceof List) {
@SuppressWarnings("unchecked")
List<String> excludeFieldPatternList = (List<String>) excludeFieldPattern;
excludeFieldPatterns.addAll(excludeFieldPatternList);
} else {
excludeFieldPatterns.add((String) excludeFieldPattern);
}
validateFieldPatterns(processorTag, excludeFieldPatterns, "exclude_field_pattern");
}

boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
return new RemoveProcessor(
processorTag,
description,
fieldCompiledTemplates,
fieldPatterns,
excludeFieldCompiledTemplates,
excludeFieldPatterns,
ignoreMissing
);
}

private void validateFieldPatterns(String processorTag, List<String> patterns, String patternKey) {
List<String> validationErrors = new ArrayList<>();
for (String fieldPattern : patterns) {
if (fieldPattern.contains(" ")) {
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a space");
}
if (fieldPattern.contains(",")) {
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ','");
}
if (fieldPattern.contains("#")) {
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a '#'");
}
if (fieldPattern.contains(":")) {
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ':'");
}
if (fieldPattern.startsWith("_")) {
validationErrors.add(patternKey + " [" + fieldPattern + "] must not start with '_'");
}
if (Strings.validFileNameExcludingAstrix(fieldPattern) == false) {
validationErrors.add(
patternKey + " [" + fieldPattern + "] must not contain the following characters " + Strings.INVALID_FILENAME_CHARS
);
}
}

if (validationErrors.size() > 0) {
ValidationException validationException = new ValidationException();
validationException.addValidationErrors(validationErrors);
throw newConfigurationException(TYPE, processorTag, patternKey, validationException.getMessage());
}
return new RemoveProcessor(processorTag, description, fieldCompiledTemplates, excludeFieldCompiledTemplates, ignoreMissing);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,81 +90,30 @@ public void testInvalidMustacheTemplate() throws Exception {
assertThat(exception.getMetadata("opensearch.processor_tag").get(0), equalTo(processorTag));
}

public void testCreateWithFieldPatterns() throws Exception {
public void testCreateWithExcludeField() throws Exception {
Map<String, Object> config = new HashMap<>();
List<String> patterns = List.of("foo*");
config.put("field_pattern", patterns);
config.put("exclude_field", "field");
String processorTag = randomAlphaOfLength(10);
OpenSearchException exception = expectThrows(
OpenSearchParseException.class,
() -> factory.create(null, processorTag, null, config)
);
assertThat(
exception.getMessage(),
equalTo("[field] ether (field,field_pattern) or (exclude_field,exclude_field_pattern) can be set")
);
assertThat(exception.getMessage(), equalTo("[field] ether field or exclude_field must be set"));

Map<String, Object> config2 = new HashMap<>();
patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_");
config2.put("field_pattern", patterns);
config2.put("field", "field1");
config2.put("exclude_field", "field2");
exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config2));
assertThat(
exception.getMessage(),
equalTo(
"[field_pattern] Validation Failed: 1: field_pattern [ ] must not contain a space;"
+ "2: field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];"
+ "3: field_pattern [,] must not contain a ',';"
+ "4: field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];"
+ "5: field_pattern [#] must not contain a '#';"
+ "6: field_pattern [:] must not contain a ':';"
+ "7: field_pattern [_] must not start with '_';"
)
);

Map<String, Object> config3 = new HashMap<>();
patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_");
config3.put("exclude_field_pattern", patterns);
exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config3));
assertThat(
exception.getMessage(),
equalTo(
"[exclude_field_pattern] Validation Failed: 1: exclude_field_pattern [ ] must not contain a space;"
+ "2: exclude_field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];"
+ "3: exclude_field_pattern [,] must not contain a ',';"
+ "4: exclude_field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];"
+ "5: exclude_field_pattern [#] must not contain a '#';"
+ "6: exclude_field_pattern [:] must not contain a ':';"
+ "7: exclude_field_pattern [_] must not start with '_';"
)
);

Map<String, Object> config4 = new HashMap<>();
exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config4));
assertThat(
exception.getMessage(),
equalTo("[field] at least one of the parameters field, field_pattern, exclude_field and exclude_field_pattern need to be set")
);

Map<String, Object> config5 = new HashMap<>();
config5.put("field_pattern", "field*");
RemoveProcessor removeProcessor = factory.create(null, processorTag, null, config5);
assertThat(removeProcessor.getFieldPatterns(), equalTo(List.of("field*")));
assertThat(exception.getMessage(), equalTo("[field] ether field or exclude_field must be set"));

Map<String, Object> config6 = new HashMap<>();
config6.put("exclude_field", "exclude_field");
removeProcessor = factory.create(null, processorTag, null, config6);
RemoveProcessor removeProcessor = factory.create(null, processorTag, null, config6);
assertThat(
removeProcessor.getExcludeFields()
.stream()
.map(template -> template.newInstance(Collections.emptyMap()).execute())
.collect(Collectors.toList()),
equalTo(List.of("exclude_field"))
);

Map<String, Object> config7 = new HashMap<>();
config7.put("exclude_field_pattern", "exclude_field*");
removeProcessor = factory.create(null, processorTag, null, config7);
assertThat(removeProcessor.getExcludeFieldPatterns(), equalTo(List.of("exclude_field*")));
}
}
Loading

0 comments on commit 6548996

Please sign in to comment.