Skip to content

Commit

Permalink
Disallow removing some metadata fields by remove ingest processor (#1…
Browse files Browse the repository at this point in the history
…0895)

* Ignore metadata fields when removing fields by remove ingest processor

Signed-off-by: Gao Binlong <[email protected]>

* Modify change log

Signed-off-by: Gao Binlong <[email protected]>

* Throw exception when removing some metadata fields

Signed-off-by: Gao Binlong <[email protected]>

* Format the code

Signed-off-by: Gao Binlong <[email protected]>

* Remove calling toLowerCase()

Signed-off-by: Gao Binlong <[email protected]>

* Remove calling toLowerCase()

Signed-off-by: Gao Binlong <[email protected]>

* Fix test failure

Signed-off-by: Gao Binlong <[email protected]>

* Add skip config in yml test file

Signed-off-by: Gao Binlong <[email protected]>

* Improve test coverage

Signed-off-by: Gao Binlong <[email protected]>

* Optimize some code

Signed-off-by: Gao Binlong <[email protected]>

---------

Signed-off-by: Gao Binlong <[email protected]>
  • Loading branch information
gaobinlong authored Nov 22, 2023
1 parent b974dfb commit a0b016b
Show file tree
Hide file tree
Showing 4 changed files with 215 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642))
- Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395))
- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273))
- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895))
- Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023))
- Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057))
- Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.ingest.common;

import org.opensearch.core.common.Strings;
import org.opensearch.index.VersionType;
import org.opensearch.ingest.AbstractProcessor;
import org.opensearch.ingest.ConfigurationUtils;
import org.opensearch.ingest.IngestDocument;
Expand All @@ -43,6 +44,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -79,6 +81,24 @@ public IngestDocument execute(IngestDocument document) {
throw new IllegalArgumentException("field [" + path + "] doesn't exist");
}
}
// cannot remove _index, _version and _version_type.
if (path.equals(IngestDocument.Metadata.INDEX.getFieldName())
|| path.equals(IngestDocument.Metadata.VERSION.getFieldName())
|| path.equals(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) {
throw new IllegalArgumentException("cannot remove metadata field [" + path + "]");
}
// removing _id is disallowed when there's an external version specified in the request
String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class);
if (path.equals(IngestDocument.Metadata.ID.getFieldName())
&& !Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) {
Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class);
throw new IllegalArgumentException(
"cannot remove metadata field [_id] when specifying external version for the document, version: "
+ version
+ ", version_type: "
+ versionType
);
}
document.removeField(path);
});
return document;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

package org.opensearch.ingest.common;

import org.opensearch.index.VersionType;
import org.opensearch.ingest.IngestDocument;
import org.opensearch.ingest.Processor;
import org.opensearch.ingest.RandomDocumentPicks;
Expand All @@ -40,15 +41,17 @@

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.equalTo;

public class RemoveProcessorTests extends OpenSearchTestCase {

public void testRemoveFields() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String field = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument);
String field = RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10));
Processor processor = new RemoveProcessor(
randomAlphaOfLength(10),
null,
Expand Down Expand Up @@ -124,4 +127,58 @@ public void testIgnoreMissing() throws Exception {
processor = new RemoveProcessor.Factory(TestTemplateService.instance()).create(null, processorTag, null, configWithEmptyField);
processor.execute(ingestDocument);
}

public void testRemoveMetadataField() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
List<String> metadataFields = ingestDocument.getMetadata()
.keySet()
.stream()
.map(IngestDocument.Metadata::getFieldName)
.collect(Collectors.toList());

for (String metadataFieldName : metadataFields) {
Map<String, Object> config = new HashMap<>();
config.put("field", metadataFieldName);
String processorTag = randomAlphaOfLength(10);
Processor processor = new RemoveProcessor.Factory(TestTemplateService.instance()).create(null, processorTag, null, config);
// _if_seq_no and _if_primary_term do not exist in the enriched document, removing them will throw IllegalArgumentException
if (metadataFieldName.equals(IngestDocument.Metadata.IF_SEQ_NO.getFieldName())
|| metadataFieldName.equals(IngestDocument.Metadata.IF_PRIMARY_TERM.getFieldName())) {
assertThrows(
"field: [" + metadataFieldName + "] doesn't exist",
IllegalArgumentException.class,
() -> processor.execute(ingestDocument)
);
} else if (metadataFieldName.equals(IngestDocument.Metadata.INDEX.getFieldName())
|| metadataFieldName.equals(IngestDocument.Metadata.VERSION.getFieldName())
|| metadataFieldName.equals(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) {
// _index, _version and _version_type cannot be removed
assertThrows(
"cannot remove metadata field [" + metadataFieldName + "]",
IllegalArgumentException.class,
() -> processor.execute(ingestDocument)
);
} else if (metadataFieldName.equals(IngestDocument.Metadata.ID.getFieldName())) {
Long version = ingestDocument.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class);
String versionType = ingestDocument.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class);
if (!versionType.equals(VersionType.toString(VersionType.INTERNAL))) {
assertThrows(
"cannot remove metadata field [_id] when specifying external version for the document, version: "
+ version
+ ", version_type: "
+ versionType,
IllegalArgumentException.class,
() -> processor.execute(ingestDocument)
);
} else {
processor.execute(ingestDocument);
assertThat(ingestDocument.hasField(metadataFieldName), equalTo(false));
}
} else if (metadataFieldName.equals(IngestDocument.Metadata.ROUTING.getFieldName())
&& ingestDocument.hasField(IngestDocument.Metadata.ROUTING.getFieldName())) {
processor.execute(ingestDocument);
assertThat(ingestDocument.hasField(metadataFieldName), equalTo(false));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,139 @@ teardown:
index: test
id: 1
- match: { _source.message: "foo bar baz" }

#Related issue: https://github.com/opensearch-project/OpenSearch/issues/10732
---
"Test remove metadata field":
- skip:
version: " - 2.11.99"
reason: "The bug was fixed in 2.12"

- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "{{foo}}"
}
}
]
}
- match: { acknowledged: true }

- do:
catch: /cannot remove metadata field \[\_index\]/
index:
index: test
id: 1
pipeline: "my_pipeline"
body: {
foo: "_index"
}

- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "_version"
}
}
]
}
- match: { acknowledged: true }

- do:
catch: /cannot remove metadata field \[\_version\]/
index:
index: test
id: 1
pipeline: "my_pipeline"
body: {
foo: "bar"
}

- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "_version_type"
}
}
]
}
- match: { acknowledged: true }

- do:
catch: /cannot remove metadata field \[\_version\_type\]/
index:
index: test
id: 1
pipeline: "my_pipeline"
body: {
foo: "bar"
}

- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : ["_id", "_routing"]
}
}
]
}
- match: { acknowledged: true }

- do:
index:
index: test
id: 1
routing: abc
pipeline: "my_pipeline"
body: { message: "foo bar baz" }
- match: { result: created }

- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "_id"
}
}
]
}
- match: { acknowledged: true }

- do:
catch: /cannot remove metadata field \[\_id\] when specifying external version for the document/
index:
index: test
id: "test_id_10000"
pipeline: "my_pipeline"
version: 1
version_type: "external"
body: { message: "foo bar baz" }

0 comments on commit a0b016b

Please sign in to comment.