From a0b016bf154cf765483f38c4c7f135ae972004c2 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Wed, 22 Nov 2023 21:51:27 +0800 Subject: [PATCH] Disallow removing some metadata fields by remove ingest processor (#10895) * Ignore metadata fields when removing fields by remove ingest processor Signed-off-by: Gao Binlong * Modify change log Signed-off-by: Gao Binlong * Throw exception when removing some metadata fields Signed-off-by: Gao Binlong * Format the code Signed-off-by: Gao Binlong * Remove calling toLowerCase() Signed-off-by: Gao Binlong * Remove calling toLowerCase() Signed-off-by: Gao Binlong * Fix test failure Signed-off-by: Gao Binlong * Add skip config in yml test file Signed-off-by: Gao Binlong * Improve test coverage Signed-off-by: Gao Binlong * Optimize some code Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../ingest/common/RemoveProcessor.java | 20 +++ .../ingest/common/RemoveProcessorTests.java | 59 +++++++- .../test/ingest/290_remove_processor.yml | 136 ++++++++++++++++++ 4 files changed, 215 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95baa2e88c04a..0e490620f97dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,6 +137,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) - Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) +- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895)) - Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023)) - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java index 93a35eef4d396..bb3d4bca47859 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java @@ -33,6 +33,7 @@ package org.opensearch.ingest.common; import org.opensearch.core.common.Strings; +import org.opensearch.index.VersionType; import org.opensearch.ingest.AbstractProcessor; import org.opensearch.ingest.ConfigurationUtils; import org.opensearch.ingest.IngestDocument; @@ -43,6 +44,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.stream.Collectors; /** @@ -79,6 +81,24 @@ public IngestDocument execute(IngestDocument document) { throw new IllegalArgumentException("field [" + path + "] doesn't exist"); } } + // cannot remove _index, _version and _version_type. + if (path.equals(IngestDocument.Metadata.INDEX.getFieldName()) + || path.equals(IngestDocument.Metadata.VERSION.getFieldName()) + || path.equals(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { + throw new IllegalArgumentException("cannot remove metadata field [" + path + "]"); + } + // removing _id is disallowed when there's an external version specified in the request + String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); + if (path.equals(IngestDocument.Metadata.ID.getFieldName()) + && !Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) { + Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class); + throw new IllegalArgumentException( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + version + + ", version_type: " + + versionType + ); + } document.removeField(path); }); return document; diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java index 8f729c6a39bbd..1a5630a4730f2 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java @@ -32,6 +32,7 @@ package org.opensearch.ingest.common; +import org.opensearch.index.VersionType; import org.opensearch.ingest.IngestDocument; import org.opensearch.ingest.Processor; import org.opensearch.ingest.RandomDocumentPicks; @@ -40,7 +41,9 @@ import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -48,7 +51,7 @@ public class RemoveProcessorTests extends OpenSearchTestCase { public void testRemoveFields() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); - String field = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); + String field = RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10)); Processor processor = new RemoveProcessor( randomAlphaOfLength(10), null, @@ -124,4 +127,58 @@ public void testIgnoreMissing() throws Exception { processor = new RemoveProcessor.Factory(TestTemplateService.instance()).create(null, processorTag, null, configWithEmptyField); processor.execute(ingestDocument); } + + public void testRemoveMetadataField() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); + List metadataFields = ingestDocument.getMetadata() + .keySet() + .stream() + .map(IngestDocument.Metadata::getFieldName) + .collect(Collectors.toList()); + + for (String metadataFieldName : metadataFields) { + Map config = new HashMap<>(); + config.put("field", metadataFieldName); + String processorTag = randomAlphaOfLength(10); + Processor processor = new RemoveProcessor.Factory(TestTemplateService.instance()).create(null, processorTag, null, config); + // _if_seq_no and _if_primary_term do not exist in the enriched document, removing them will throw IllegalArgumentException + if (metadataFieldName.equals(IngestDocument.Metadata.IF_SEQ_NO.getFieldName()) + || metadataFieldName.equals(IngestDocument.Metadata.IF_PRIMARY_TERM.getFieldName())) { + assertThrows( + "field: [" + metadataFieldName + "] doesn't exist", + IllegalArgumentException.class, + () -> processor.execute(ingestDocument) + ); + } else if (metadataFieldName.equals(IngestDocument.Metadata.INDEX.getFieldName()) + || metadataFieldName.equals(IngestDocument.Metadata.VERSION.getFieldName()) + || metadataFieldName.equals(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { + // _index, _version and _version_type cannot be removed + assertThrows( + "cannot remove metadata field [" + metadataFieldName + "]", + IllegalArgumentException.class, + () -> processor.execute(ingestDocument) + ); + } else if (metadataFieldName.equals(IngestDocument.Metadata.ID.getFieldName())) { + Long version = ingestDocument.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class); + String versionType = ingestDocument.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); + if (!versionType.equals(VersionType.toString(VersionType.INTERNAL))) { + assertThrows( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + version + + ", version_type: " + + versionType, + IllegalArgumentException.class, + () -> processor.execute(ingestDocument) + ); + } else { + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(metadataFieldName), equalTo(false)); + } + } else if (metadataFieldName.equals(IngestDocument.Metadata.ROUTING.getFieldName()) + && ingestDocument.hasField(IngestDocument.Metadata.ROUTING.getFieldName())) { + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(metadataFieldName), equalTo(false)); + } + } + } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml index ff5a17136afa2..4811769d04f0e 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml @@ -91,3 +91,139 @@ teardown: index: test id: 1 - match: { _source.message: "foo bar baz" } + +#Related issue: https://github.com/opensearch-project/OpenSearch/issues/10732 +--- +"Test remove metadata field": + - skip: + version: " - 2.11.99" + reason: "The bug was fixed in 2.12" + + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "{{foo}}" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /cannot remove metadata field \[\_index\]/ + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: { + foo: "_index" + } + + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "_version" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /cannot remove metadata field \[\_version\]/ + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: { + foo: "bar" + } + + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "_version_type" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /cannot remove metadata field \[\_version\_type\]/ + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: { + foo: "bar" + } + + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : ["_id", "_routing"] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + routing: abc + pipeline: "my_pipeline" + body: { message: "foo bar baz" } + - match: { result: created } + + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "_id" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /cannot remove metadata field \[\_id\] when specifying external version for the document/ + index: + index: test + id: "test_id_10000" + pipeline: "my_pipeline" + version: 1 + version_type: "external" + body: { message: "foo bar baz" }