From a6dcdfbfe55ae58cfa8071cd1f9c66362ec5fb8b Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Tue, 7 May 2024 10:45:23 -0700 Subject: [PATCH] Add capability to disable source recovery_source for an index Signed-off-by: Navneet Verma --- CHANGELOG.md | 1 + .../index/mapper/SourceFieldMapper.java | 87 +++++++++-- .../index/mapper/SourceFieldMapperTests.java | 137 +++++++++++++++++- 3 files changed, 212 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8215c2983724a..6bf4ad34d2f79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Search Pipeline] Handle default pipeline for multiple indices ([#13276](https://github.com/opensearch-project/OpenSearch/pull/13276)) - Add support for deep copying SearchRequest ([#12295](https://github.com/opensearch-project/OpenSearch/pull/12295)) - Support multi ranges traversal when doing date histogram rewrite optimization. ([#13317](https://github.com/opensearch-project/OpenSearch/pull/13317)) +- Add capability to disable source recovery_source for an index ([#13590](https://github.com/opensearch-project/OpenSearch/pull/13590)) ### Dependencies - Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896)) diff --git a/server/src/main/java/org/opensearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/SourceFieldMapper.java index a2d769d486a0f..2063ff7f934a3 100644 --- a/server/src/main/java/org/opensearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/SourceFieldMapper.java @@ -74,6 +74,7 @@ public class SourceFieldMapper extends MetadataFieldMapper { public static final String CONTENT_TYPE = "_source"; private final Function, Map> filter; + private final Function, Map> recoverySourceFilter; /** * Default parameters for source fields @@ -119,13 +120,34 @@ public static class Builder extends MetadataFieldMapper.Builder { Collections.emptyList() ); + private final Parameter recoverySourceEnabled = Parameter.boolParam( + "recovery_source_enabled", + false, + m -> toType(m).recoverySourceEnabled, + Defaults.ENABLED + ); + + private final Parameter> recoverySourceIncludes = Parameter.stringArrayParam( + "recovery_source_includes", + false, + m -> Arrays.asList(toType(m).recoverySourceIncludes), + Collections.emptyList() + ); + + private final Parameter> recoverySourceExcludes = Parameter.stringArrayParam( + "recovery_source_excludes", + false, + m -> Arrays.asList(toType(m).recoverySourceExcludes), + Collections.emptyList() + ); + public Builder() { super(Defaults.NAME); } @Override protected List> getParameters() { - return Arrays.asList(enabled, includes, excludes); + return Arrays.asList(enabled, includes, excludes, recoverySourceEnabled, recoverySourceIncludes, recoverySourceExcludes); } @Override @@ -133,7 +155,10 @@ public SourceFieldMapper build(BuilderContext context) { return new SourceFieldMapper( enabled.getValue(), includes.getValue().toArray(new String[0]), - excludes.getValue().toArray(new String[0]) + excludes.getValue().toArray(new String[0]), + recoverySourceEnabled.getValue(), + recoverySourceIncludes.getValue().toArray(new String[0]), + recoverySourceExcludes.getValue().toArray(new String[0]) ); } } @@ -173,17 +198,27 @@ public Query termQuery(Object value, QueryShardContext context) { } private final boolean enabled; + private final boolean recoverySourceEnabled; /** indicates whether the source will always exist and be complete, for use by features like the update API */ private final boolean complete; private final String[] includes; private final String[] excludes; + private final String[] recoverySourceIncludes; + private final String[] recoverySourceExcludes; private SourceFieldMapper() { - this(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); + this(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY, Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); } - private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes) { + private SourceFieldMapper( + boolean enabled, + String[] includes, + String[] excludes, + boolean recoverySourceEnabled, + String[] recoverySourceIncludes, + String[] recoverySourceExcludes + ) { super(new SourceFieldType(enabled)); this.enabled = enabled; this.includes = includes; @@ -191,6 +226,16 @@ private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes) final boolean filtered = CollectionUtils.isEmpty(includes) == false || CollectionUtils.isEmpty(excludes) == false; this.filter = enabled && filtered ? XContentMapValues.filter(includes, excludes) : null; this.complete = enabled && CollectionUtils.isEmpty(includes) && CollectionUtils.isEmpty(excludes); + + // Set parameters for recovery source + this.recoverySourceEnabled = recoverySourceEnabled; + this.recoverySourceIncludes = recoverySourceIncludes; + this.recoverySourceExcludes = recoverySourceExcludes; + final boolean recoverySourcefiltered = CollectionUtils.isEmpty(recoverySourceIncludes) == false + || CollectionUtils.isEmpty(recoverySourceExcludes) == false; + this.recoverySourceFilter = this.recoverySourceEnabled && recoverySourcefiltered + ? XContentMapValues.filter(recoverySourceIncludes, recoverySourceExcludes) + : null; } public boolean enabled() { @@ -212,22 +257,40 @@ public void preParse(ParseContext context) throws IOException { context.doc().add(new StoredField(fieldType().name(), ref.bytes, ref.offset, ref.length)); } - if (originalSource != null && adaptedSource != originalSource) { - // if we omitted source or modified it we add the _recovery_source to ensure we have it for ops based recovery - BytesRef ref = originalSource.toBytesRef(); - context.doc().add(new StoredField(RECOVERY_SOURCE_NAME, ref.bytes, ref.offset, ref.length)); - context.doc().add(new NumericDocValuesField(RECOVERY_SOURCE_NAME, 1)); + if (recoverySourceEnabled) { + if (originalSource != null && adaptedSource != originalSource) { + final BytesReference adaptedRecoverySource = applyFilters( + originalSource, + contentType, + recoverySourceEnabled, + recoverySourceFilter + ); + // if we omitted source or modified it we add the _recovery_source to ensure we have it for ops based recovery + BytesRef ref = adaptedRecoverySource.toBytesRef(); + context.doc().add(new StoredField(RECOVERY_SOURCE_NAME, ref.bytes, ref.offset, ref.length)); + context.doc().add(new NumericDocValuesField(RECOVERY_SOURCE_NAME, 1)); + } } } @Nullable public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable MediaType contentType) throws IOException { - if (enabled && originalSource != null) { + return applyFilters(originalSource, contentType, enabled, filter); + } + + @Nullable + private BytesReference applyFilters( + @Nullable BytesReference originalSource, + @Nullable MediaType contentType, + boolean isProvidedSourceEnabled, + @Nullable final Function, Map> filters + ) throws IOException { + if (isProvidedSourceEnabled && originalSource != null) { // Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data - if (filter != null) { + if (filters != null) { // we don't update the context source if we filter, we want to keep it as is... Tuple> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType); - Map filteredSource = filter.apply(mapTuple.v2()); + Map filteredSource = filters.apply(mapTuple.v2()); BytesStreamOutput bStream = new BytesStreamOutput(); MediaType actualContentType = mapTuple.v1(); XContentBuilder builder = MediaTypeRegistry.contentBuilder(actualContentType, bStream).map(filteredSource); diff --git a/server/src/test/java/org/opensearch/index/mapper/SourceFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/SourceFieldMapperTests.java index 83d42fd423f08..4f3a4530b5475 100644 --- a/server/src/test/java/org/opensearch/index/mapper/SourceFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/SourceFieldMapperTests.java @@ -90,7 +90,8 @@ public void testNoFormat() throws Exception { XContentType.SMILE ) ); - + final IndexableField recoverySourceIndexableField = doc.rootDoc().getField("_recovery_source"); + assertNull(recoverySourceIndexableField); assertThat(MediaTypeRegistry.xContentType(doc.source()), equalTo(XContentType.SMILE)); } @@ -128,6 +129,52 @@ public void testIncludes() throws Exception { ) ); + IndexableField sourceField = doc.rootDoc().getField("_source"); + Map sourceAsMap; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, new BytesArray(sourceField.binaryValue()))) { + sourceAsMap = parser.map(); + } + final IndexableField recoverySourceIndexableField = doc.rootDoc().getField("_recovery_source"); + assertNotNull(recoverySourceIndexableField); + assertThat(sourceAsMap.containsKey("path1"), equalTo(true)); + assertThat(sourceAsMap.containsKey("path2"), equalTo(false)); + } + + public void testIncludesForRecoverySource() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type") + .startObject("_source") + .array("includes", new String[] { "path1*" }) + .array("recovery_source_includes", new String[] { "path2*" }) + .endObject() + .endObject() + .endObject() + .toString(); + + DocumentMapper documentMapper = createIndex("test").mapperService() + .documentMapperParser() + .parse("type", new CompressedXContent(mapping)); + + ParsedDocument doc = documentMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .startObject("path1") + .field("field1", "value1") + .endObject() + .startObject("path2") + .field("field2", "value2") + .endObject() + .endObject() + ), + MediaTypeRegistry.JSON + ) + ); + IndexableField sourceField = doc.rootDoc().getField("_source"); Map sourceAsMap; try (XContentParser parser = createParser(JsonXContent.jsonXContent, new BytesArray(sourceField.binaryValue()))) { @@ -135,6 +182,39 @@ public void testIncludes() throws Exception { } assertThat(sourceAsMap.containsKey("path1"), equalTo(true)); assertThat(sourceAsMap.containsKey("path2"), equalTo(false)); + + final IndexableField recoverySourceIndexableField = doc.rootDoc().getField("_recovery_source"); + assertNotNull(recoverySourceIndexableField); + Map recoverySourceAsMap; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, new BytesArray(recoverySourceIndexableField.binaryValue()))) { + recoverySourceAsMap = parser.map(); + } + + assertThat(recoverySourceAsMap.containsKey("path1"), equalTo(false)); + assertThat(recoverySourceAsMap.containsKey("path2"), equalTo(true)); + } + + public void testNoRecoverySourceAndNoSource_whenBothAreDisabled() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type") + .startObject("_source") + .field("enabled", "false") + .field("recovery_source_enabled", "false") + .endObject() + .endObject() + .endObject() + .toString(); + + DocumentMapperParser parser = createIndex("test").mapperService().documentMapperParser(); + DocumentMapper documentMapper = parser.parse("type", new CompressedXContent(mapping)); + BytesReference source = BytesReference.bytes(XContentFactory.jsonBuilder().startObject().field("field", "value").endObject()); + ParsedDocument doc = documentMapper.parse(new SourceToParse("test", "1", source, MediaTypeRegistry.JSON)); + + final IndexableField sourceIndexableField = doc.rootDoc().getField("_source"); + final IndexableField recoverySourceIndexableField = doc.rootDoc().getField("_recovery_source"); + assertNull(recoverySourceIndexableField); + assertNull(sourceIndexableField); } public void testExcludes() throws Exception { @@ -171,6 +251,52 @@ public void testExcludes() throws Exception { ) ); + IndexableField sourceField = doc.rootDoc().getField("_source"); + Map sourceAsMap; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, new BytesArray(sourceField.binaryValue()))) { + sourceAsMap = parser.map(); + } + final IndexableField recoverySourceIndexableField = doc.rootDoc().getField("_recovery_source"); + assertNotNull(recoverySourceIndexableField); + assertThat(sourceAsMap.containsKey("path1"), equalTo(false)); + assertThat(sourceAsMap.containsKey("path2"), equalTo(true)); + } + + public void testExcludesForRecoverySource() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type") + .startObject("_source") + .array("excludes", "path1*") + .array("recovery_source_excludes", "path2*") + .endObject() + .endObject() + .endObject() + .toString(); + + DocumentMapper documentMapper = createIndex("test").mapperService() + .documentMapperParser() + .parse("type", new CompressedXContent(mapping)); + + ParsedDocument doc = documentMapper.parse( + new SourceToParse( + "test", + "1", + BytesReference.bytes( + XContentFactory.jsonBuilder() + .startObject() + .startObject("path1") + .field("field1", "value1") + .endObject() + .startObject("path2") + .field("field2", "value2") + .endObject() + .endObject() + ), + MediaTypeRegistry.JSON + ) + ); + IndexableField sourceField = doc.rootDoc().getField("_source"); Map sourceAsMap; try (XContentParser parser = createParser(JsonXContent.jsonXContent, new BytesArray(sourceField.binaryValue()))) { @@ -178,6 +304,15 @@ public void testExcludes() throws Exception { } assertThat(sourceAsMap.containsKey("path1"), equalTo(false)); assertThat(sourceAsMap.containsKey("path2"), equalTo(true)); + + final IndexableField recoverySourceIndexableField = doc.rootDoc().getField("_recovery_source"); + assertNotNull(recoverySourceIndexableField); + Map recoverySourceAsMap; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, new BytesArray(recoverySourceIndexableField.binaryValue()))) { + recoverySourceAsMap = parser.map(); + } + assertThat(recoverySourceAsMap.containsKey("path1"), equalTo(true)); + assertThat(recoverySourceAsMap.containsKey("path2"), equalTo(false)); } public void testEnabledNotUpdateable() throws Exception {