Skip to content

Commit

Permalink
Make fields and exclude_fields mutually exclusive when constructing R…
Browse files Browse the repository at this point in the history
…emoveProcessor

Signed-off-by: Gao Binlong <[email protected]>
  • Loading branch information
gaobinlong committed Dec 7, 2023
1 parent b602996 commit aef6614
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

package org.opensearch.ingest.common;

import org.opensearch.common.Nullable;
import org.opensearch.core.common.Strings;
import org.opensearch.index.VersionType;
import org.opensearch.ingest.AbstractProcessor;
Expand Down Expand Up @@ -65,13 +66,22 @@ public final class RemoveProcessor extends AbstractProcessor {
RemoveProcessor(
String tag,
String description,
List<TemplateScript.Factory> fields,
List<TemplateScript.Factory> excludeFields,
@Nullable List<TemplateScript.Factory> fields,
@Nullable List<TemplateScript.Factory> excludeFields,
boolean ignoreMissing
) {
super(tag, description);
this.fields = new ArrayList<>(fields);
this.excludeFields = new ArrayList<>(excludeFields);
if (fields == null && excludeFields == null || fields != null && excludeFields != null) {
throw new IllegalArgumentException("ether fields and excludeFields must be set");
}
if (fields != null) {
this.fields = new ArrayList<>(fields);
this.excludeFields = null;
} else {
this.fields = null;
this.excludeFields = new ArrayList<>(excludeFields);
}

this.ignoreMissing = ignoreMissing;
}

Expand All @@ -85,7 +95,7 @@ public List<TemplateScript.Factory> getExcludeFields() {

@Override
public IngestDocument execute(IngestDocument document) {
if (!fields.isEmpty()) {
if (fields != null && !fields.isEmpty()) {
fields.forEach(field -> {
String path = document.renderTemplate(field);
final boolean fieldPathIsNullOrEmpty = Strings.isNullOrEmpty(path);
Expand Down Expand Up @@ -121,30 +131,30 @@ public IngestDocument execute(IngestDocument document) {
});
}

Set<String> excludeFieldSet = new HashSet<>();
if (!excludeFields.isEmpty()) {
if (excludeFields != null && !excludeFields.isEmpty()) {
Set<String> excludeFieldSet = new HashSet<>();
excludeFields.forEach(field -> {
String path = document.renderTemplate(field);
// ignore the empty or null field path
if (!Strings.isNullOrEmpty(path)) {
excludeFieldSet.add(path);
}
});
}

if (!excludeFieldSet.isEmpty()) {
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet());
Set<String> metadataFields = document.getMetadata()
.keySet()
.stream()
.map(IngestDocument.Metadata::getFieldName)
.collect(Collectors.toSet());
existingFields.forEach(field -> {
// ignore metadata fields such as _index, _id, etc.
if (!metadataFields.contains(field) && !excludeFieldSet.contains(field)) {
document.removeField(field);
}
});
if (!excludeFieldSet.isEmpty()) {
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet());
Set<String> metadataFields = document.getMetadata()
.keySet()
.stream()
.map(IngestDocument.Metadata::getFieldName)
.collect(Collectors.toSet());
existingFields.forEach(field -> {
// ignore metadata fields such as _index, _id, etc.
if (!metadataFields.contains(field) && !excludeFieldSet.contains(field)) {
document.removeField(field);
}
});
}
}

return document;
Expand Down Expand Up @@ -179,7 +189,8 @@ public RemoveProcessor create(
throw newConfigurationException(TYPE, processorTag, "field", "ether field or exclude_field must be set");
}

List<TemplateScript.Factory> fieldCompiledTemplates = new ArrayList<>();
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);

if (field != null) {
if (field instanceof List) {
@SuppressWarnings("unchecked")
Expand All @@ -188,27 +199,23 @@ public RemoveProcessor create(
} else {
fields.add((String) field);
}
fieldCompiledTemplates = fields.stream()
List<TemplateScript.Factory> fieldCompiledTemplates = fields.stream()
.map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "field", f, scriptService))
.collect(Collectors.toList());
}

List<TemplateScript.Factory> excludeFieldCompiledTemplates = new ArrayList<>();
if (excludeField != null) {
return new RemoveProcessor(processorTag, description, fieldCompiledTemplates, null, ignoreMissing);
} else {
if (excludeField instanceof List) {
@SuppressWarnings("unchecked")
List<String> stringList = (List<String>) excludeField;
excludeFields.addAll(stringList);
} else {
excludeFields.add((String) excludeField);
}
excludeFieldCompiledTemplates = excludeFields.stream()
List<TemplateScript.Factory> excludeFieldCompiledTemplates = excludeFields.stream()
.map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "exclude_field", f, scriptService))
.collect(Collectors.toList());
return new RemoveProcessor(processorTag, description, null, excludeFieldCompiledTemplates, ignoreMissing);
}

boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
return new RemoveProcessor(processorTag, description, fieldCompiledTemplates, excludeFieldCompiledTemplates, ignoreMissing);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,22 @@ public void testRemoveFields() throws Exception {
randomAlphaOfLength(10),
null,
Collections.singletonList(new TestTemplateService.MockTemplateScript.Factory(field)),
Collections.emptyList(),
null,
false
);
processor.execute(ingestDocument);
assertThat(ingestDocument.hasField(field), equalTo(false));
}

public void testRemoveWithExcludeFields() throws Exception {
public void testRemoveByExcludeFields() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
ingestDocument.setFieldValue("foo_1", "value");
ingestDocument.setFieldValue("foo_2", "value");
ingestDocument.setFieldValue("foo_3", "value");
List<TemplateScript.Factory> excludeFields = new ArrayList<>();
excludeFields.add(new TestTemplateService.MockTemplateScript.Factory("foo_1"));
excludeFields.add(new TestTemplateService.MockTemplateScript.Factory("foo_2"));
Processor processor = new RemoveProcessor(randomAlphaOfLength(10), null, Collections.emptyList(), excludeFields, false);
Processor processor = new RemoveProcessor(randomAlphaOfLength(10), null, null, excludeFields, false);
processor.execute(ingestDocument);
assertThat(ingestDocument.hasField("foo_1"), equalTo(true));
assertThat(ingestDocument.hasField("foo_2"), equalTo(true));
Expand Down Expand Up @@ -199,4 +199,32 @@ public void testRemoveMetadataField() throws Exception {
}
}
}

public void testCreateRemoveProcessorWithBothFieldsAndExcludeFields() throws Exception {
assertThrows(
"ether fields and excludeFields must be set",
IllegalArgumentException.class,
() -> new RemoveProcessor(randomAlphaOfLength(10), null, null, null, false)
);

final List<TemplateScript.Factory> fields;
if (randomBoolean()) {
fields = new ArrayList<>();
} else {
fields = List.of(new TestTemplateService.MockTemplateScript.Factory("foo_1"));
}

final List<TemplateScript.Factory> excludeFields;
if (randomBoolean()) {
excludeFields = new ArrayList<>();
} else {
excludeFields = List.of(new TestTemplateService.MockTemplateScript.Factory("foo_2"));
}

assertThrows(
"ether fields and excludeFields must be set",
IllegalArgumentException.class,
() -> new RemoveProcessor(randomAlphaOfLength(10), null, fields, excludeFields, false)
);
}
}

0 comments on commit aef6614

Please sign in to comment.