From 06eb0727c22db5559cc9cc9da46ba423bd3663c5 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Tue, 29 Oct 2024 21:12:43 +0200 Subject: [PATCH] Use flattened names in ignored source (#115822) * Use flattened names in ignored source * spotless * fix rest compat * fix unittests * expand dots --- rest-api-spec/build.gradle | 4 + .../indices.create/20_synthetic_source.yml | 20 +-- .../21_synthetic_source_stored.yml | 49 +++++++ .../index/mapper/DocumentParserContext.java | 6 +- .../mapper/DotExpandingXContentParser.java | 4 + .../index/mapper/XContentDataHelper.java | 15 +- .../mapper/IgnoredSourceFieldMapperTests.java | 130 ++++++++++++++++++ 7 files changed, 211 insertions(+), 17 deletions(-) diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index 6cc2028bffa39..b9064ab1d79ad 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -59,6 +59,10 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling with vector_operations_count") task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility") task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility") + task.skipTest("indices.create/20_synthetic_source/object with dynamic override", "temporary until backported") + task.skipTest("indices.create/20_synthetic_source/object with unmapped fields", "temporary until backported") + task.skipTest("indices.create/20_synthetic_source/empty object with unmapped fields", "temporary until backported") + task.skipTest("indices.create/20_synthetic_source/nested object with unmapped fields", "temporary until backported") task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "temporary until backported") task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index a871d2ac0ae15..258dfeb57e00c 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1,6 +1,6 @@ object with unmapped fields: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -41,13 +41,13 @@ object with unmapped fields: - match: { hits.hits.0._source.some_string: AaAa } - match: { hits.hits.0._source.some_int: 1000 } - match: { hits.hits.0._source.some_double: 123.456789 } - - match: { hits.hits.0._source.a.very.deeply.nested.field: AAAA } + - match: { hits.hits.0._source.a: { very.deeply.nested.field: AAAA } } - match: { hits.hits.0._source.some_bool: true } - match: { hits.hits.1._source.name: bbbb } - match: { hits.hits.1._source.some_string: BbBb } - match: { hits.hits.1._source.some_int: 2000 } - match: { hits.hits.1._source.some_double: 321.987654 } - - match: { hits.hits.1._source.a.very.deeply.nested.field: BBBB } + - match: { hits.hits.1._source.a: { very.deeply.nested.field: BBBB } } --- @@ -100,7 +100,7 @@ unmapped arrays: --- nested object with unmapped fields: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -143,16 +143,16 @@ nested object with unmapped fields: - match: { hits.total.value: 2 } - match: { hits.hits.0._source.path.to.name: aaaa } - match: { hits.hits.0._source.path.to.surname: AaAa } - - match: { hits.hits.0._source.path.some.other.name: AaAaAa } + - match: { hits.hits.0._source.path.some.other\.name: AaAaAa } - match: { hits.hits.1._source.path.to.name: bbbb } - match: { hits.hits.1._source.path.to.surname: BbBb } - - match: { hits.hits.1._source.path.some.other.name: BbBbBb } + - match: { hits.hits.1._source.path.some.other\.name: BbBbBb } --- empty object with unmapped fields: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -191,7 +191,7 @@ empty object with unmapped fields: - match: { hits.total.value: 1 } - match: { hits.hits.0._source.path.to.surname: AaAa } - - match: { hits.hits.0._source.path.some.other.name: AaAaAa } + - match: { hits.hits.0._source.path.some.other\.name: AaAaAa } --- @@ -434,7 +434,7 @@ mixed disabled and enabled objects: --- object with dynamic override: - requires: - cluster_features: ["mapper.ignored_source.dont_expand_dots"] + cluster_features: ["mapper.ignored_source.dont_expand_dots", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -475,7 +475,7 @@ object with dynamic override: - match: { hits.hits.0._source.path_no.to: { a.very.deeply.nested.field: A } } - match: { hits.hits.0._source.path_runtime.name: bar } - match: { hits.hits.0._source.path_runtime.some_int: 20 } - - match: { hits.hits.0._source.path_runtime.to.a.very.deeply.nested.field: B } + - match: { hits.hits.0._source.path_runtime.to: { a.very.deeply.nested.field: B } } --- diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml index 6a4e92f694220..f3545bb0a3f0e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml @@ -1249,3 +1249,52 @@ index param - nested object with stored array: - match: { hits.hits.1._source.nested.0.b.1.c: 300 } - match: { hits.hits.1._source.nested.1.b.0.c: 40 } - match: { hits.hits.1._source.nested.1.b.1.c: 400 } + + +--- +index param - flattened fields: + - requires: + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] + reason: requires keeping array source + + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + synthetic_source_keep: arrays + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + outer: + properties: + inner: + type: object + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "name": "A", "outer": { "inner": [ { "a.b": "AA", "a.c": "AAA" } ] } }' + - '{ "create": { } }' + - '{ "name": "B", "outer": { "inner": [ { "a.x.y.z": "BB", "a.z.y.x": "BBB" } ] } }' + + + - match: { errors: false } + + - do: + search: + index: test + sort: name + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: A } + - match: { hits.hits.0._source.outer.inner: [{ a.b: AA, a.c: AAA }] } + - match: { hits.hits.1._source.name: B } + - match: { hits.hits.1._source.outer.inner: [{ a.x.y.z: BB, a.z.y.x: BBB }] } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 3b1f1a6d2809a..c884d68c8f0ee 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -528,11 +528,7 @@ public final boolean addDynamicMapper(Mapper mapper) { if (canAddIgnoredField()) { try { addIgnoredField( - IgnoredSourceFieldMapper.NameValue.fromContext( - this, - mapper.fullPath(), - XContentDataHelper.encodeToken(parser()) - ) + IgnoredSourceFieldMapper.NameValue.fromContext(this, mapper.fullPath(), encodeFlattenedToken()) ); } catch (IOException e) { throw new IllegalArgumentException("failed to parse field [" + mapper.fullPath() + " ]", e); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java index fc003e709cbca..42784e0974417 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java @@ -34,6 +34,10 @@ */ class DotExpandingXContentParser extends FilterXContentParserWrapper { + static boolean isInstance(XContentParser parser) { + return parser instanceof WrappingParser; + } + private static final class WrappingParser extends FilterXContentParser { private final ContentPath contentPath; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java index 8bacaf8505f91..dee5ff92040a9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/XContentDataHelper.java @@ -221,8 +221,11 @@ static Tuple cloneSubContextWithParser(Do private static Tuple cloneSubContextParserConfiguration(DocumentParserContext context) throws IOException { XContentParser parser = context.parser(); + var oldValue = context.path().isWithinLeafObject(); + context.path().setWithinLeafObject(true); XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent()); builder.copyCurrentStructure(parser); + context.path().setWithinLeafObject(oldValue); XContentParserConfiguration configuration = XContentParserConfiguration.EMPTY.withRegistry(parser.getXContentRegistry()) .withDeprecationHandler(parser.getDeprecationHandler()) @@ -235,9 +238,17 @@ private static DocumentParserContext cloneDocumentParserContext( XContentParserConfiguration configuration, XContentBuilder builder ) throws IOException { - DocumentParserContext subcontext = context.switchParser( - XContentHelper.createParserNotCompressed(configuration, BytesReference.bytes(builder), context.parser().contentType()) + XContentParser newParser = XContentHelper.createParserNotCompressed( + configuration, + BytesReference.bytes(builder), + context.parser().contentType() ); + if (DotExpandingXContentParser.isInstance(context.parser())) { + // If we performed dot expanding originally we need to continue to do so when we replace the parser. + newParser = DotExpandingXContentParser.expandDots(newParser, context.path()); + } + + DocumentParserContext subcontext = context.switchParser(newParser); subcontext.setRecordedSource(); // Avoids double-storing parts of the source for the same parser subtree. subcontext.parser().nextToken(); return subcontext; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 7a4ce8bcb03fa..884372d249287 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -2075,6 +2075,136 @@ public void testDisabledObjectWithFlatFields() throws IOException { {"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray); } + public void testRegularObjectWithFlatFields() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("top").field("type", "object").field("synthetic_source_keep", "all").endObject(); + })).documentMapper(); + + CheckedConsumer document = b -> { + b.startObject("top"); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + }; + + var syntheticSource = syntheticSource(documentMapper, document); + assertEquals("{\"top\":{\"file.name\":\"A\",\"file.line\":10}}", syntheticSource); + + CheckedConsumer documentWithArray = b -> { + b.startArray("top"); + b.startObject(); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + b.startObject(); + b.field("file.name", "B"); + b.field("file.line", 20); + b.endObject(); + b.endArray(); + }; + + var syntheticSourceWithArray = syntheticSource(documentMapper, documentWithArray); + assertEquals(""" + {"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray); + } + + public void testRegularObjectWithFlatFieldsInsideAnArray() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("top"); + b.startObject("properties"); + { + b.startObject("inner").field("type", "object").field("synthetic_source_keep", "all").endObject(); + } + b.endObject(); + b.endObject(); + })).documentMapper(); + + CheckedConsumer document = b -> { + b.startArray("top"); + b.startObject(); + { + b.startObject("inner"); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + } + b.endObject(); + b.endArray(); + }; + + var syntheticSource = syntheticSource(documentMapper, document); + assertEquals("{\"top\":{\"inner\":{\"file.name\":\"A\",\"file.line\":10}}}", syntheticSource); + + CheckedConsumer documentWithArray = b -> { + b.startArray("top"); + b.startObject(); + { + b.startObject("inner"); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + } + b.endObject(); + b.startObject(); + { + b.startObject("inner"); + b.field("file.name", "B"); + b.field("file.line", 20); + b.endObject(); + } + b.endObject(); + b.endArray(); + }; + + var syntheticSourceWithArray = syntheticSource(documentMapper, documentWithArray); + assertEquals(""" + {"top":{"inner":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}}""", syntheticSourceWithArray); + } + + public void testIgnoredDynamicObjectWithFlatFields() throws IOException { + var syntheticSource = getSyntheticSourceWithFieldLimit(b -> { + b.startObject("top"); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + }); + assertEquals("{\"top\":{\"file.name\":\"A\",\"file.line\":10}}", syntheticSource); + + var syntheticSourceWithArray = getSyntheticSourceWithFieldLimit(b -> { + b.startArray("top"); + b.startObject(); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + b.startObject(); + b.field("file.name", "B"); + b.field("file.line", 20); + b.endObject(); + b.endArray(); + }); + assertEquals(""" + {"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray); + } + + public void testStoredArrayWithFlatFields() throws IOException { + DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(syntheticSourceMapping(b -> { + b.startObject("outer").startObject("properties"); + { + b.startObject("inner").field("type", "object").endObject(); + } + b.endObject().endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startObject("outer").startArray("inner"); + { + b.startObject().field("a.b", "a.b").field("a.c", "a.c").endObject(); + } + b.endArray().endObject(); + }); + assertEquals(""" + {"outer":{"inner":[{"a.b":"a.b","a.c":"a.c"}]}}""", syntheticSource); + } + protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) throws IOException { // We exclude ignored source field since in some cases it contains an exact copy of a part of document source.