From 65850a0c7decd388cd3488cc828e86eb40f63eb6 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 28 May 2024 15:33:46 +0200 Subject: [PATCH] Handle nested fields explicitly in synthetic source. Ignored objects that appear on nested fields are now stored in the nested documents directly. --- .../indices.create/20_synthetic_source.yml | 222 ++++++++++++++++-- .../index/mapper/DocumentParser.java | 23 +- .../index/mapper/DocumentParserContext.java | 8 +- .../mapper/IgnoredSourceFieldMapper.java | 19 +- .../index/mapper/NestedObjectMapper.java | 158 ++++++++++++- .../mapper/IgnoredSourceFieldMapperTests.java | 12 +- .../index/mapper/MapperServiceTestCase.java | 36 ++- 7 files changed, 408 insertions(+), 70 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index a763d6e457490..3d95712d30b30 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -863,18 +863,22 @@ nested object: - '{ "create": { } }' - '{ "name": "aaaa", "nested_field": {"a": 1, "b": 2}, "nested_array": [{ "a": 10, "b": 20 }, { "a": 100, "b": 200 }] }' + - match: { errors: false } + - do: search: index: test - - match: { hits.total.value: 1 } - - match: { hits.hits.0._source.name: aaaa } - - match: { hits.hits.0._source.nested_field.a: 1 } - - match: { hits.hits.0._source.nested_field.b: 2 } - - match: { hits.hits.0._source.nested_array.0.a: 10 } - - match: { hits.hits.0._source.nested_array.0.b: 20 } - - match: { hits.hits.0._source.nested_array.1.a: 100 } - - match: { hits.hits.0._source.nested_array.1.b: 200 } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.name: aaaa } + - length: { hits.hits.0._source.nested_field: 2 } + - match: { hits.hits.0._source.nested_field.a: 1 } + - match: { hits.hits.0._source.nested_field.b: 2 } + - length: { hits.hits.0._source.nested_array: 2 } + - match: { hits.hits.0._source.nested_array.0.a: 10 } + - match: { hits.hits.0._source.nested_array.0.b: 20 } + - match: { hits.hits.0._source.nested_array.1.a: 100 } + - match: { hits.hits.0._source.nested_array.1.b: 200 } --- @@ -906,15 +910,201 @@ nested object next to regular: - '{ "create": { } }' - '{ "name": "aaaa", "path": { "to": { "nested": [{ "a": 10, "b": 20 }, { "a": 100, "b": 200 } ], "regular": [{ "a": 10, "b": 20 }, { "a": 100, "b": 200 } ] } } }' + - match: { errors: false } + - do: search: index: test - - match: { hits.total.value: 1 } - - match: { hits.hits.0._source.name: aaaa } - - match: { hits.hits.0._source.path.to.nested.0.a: 10 } - - match: { hits.hits.0._source.path.to.nested.0.b: 20 } - - match: { hits.hits.0._source.path.to.nested.1.a: 100 } - - match: { hits.hits.0._source.path.to.nested.1.b: 200 } - - match: { hits.hits.0._source.path.to.regular.a: [ 10, 100 ] } - - match: { hits.hits.0._source.path.to.regular.b: [ 20, 200 ] } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.name: aaaa } + - length: { hits.hits.0._source.path.to.nested: 2 } + - match: { hits.hits.0._source.path.to.nested.0.a: 10 } + - match: { hits.hits.0._source.path.to.nested.0.b: 20 } + - match: { hits.hits.0._source.path.to.nested.1.a: 100 } + - match: { hits.hits.0._source.path.to.nested.1.b: 200 } + - match: { hits.hits.0._source.path.to.regular.a: [ 10, 100 ] } + - match: { hits.hits.0._source.path.to.regular.b: [ 20, 200 ] } + + +--- +nested object with disabled: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + obj_field: + properties: + obj1: + enabled: false + sub_nested: + type: nested + nested_field: + type: nested + properties: + obj1: + enabled: false + nested_array: + type: nested + properties: + obj1: + enabled: false + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 0, "nested_field": {"a": 1, "b": 2, "obj1": { "foo": "bar", "k": [1, 2, 3]}}, "nested_array": [{ "a": 10, "b": 20, "obj1": [{"field1": 1, "field2": 2}, {"field3": 3, "field4": 4}]}, { "a": 100, "b": 200, "obj1": {"field5": 5, "field6": 6}}]}' + - '{ "create": { } }' + - '{ "id": 1, "obj_field": {"a": 1, "b": 2, "obj1": { "foo": "bar", "k": [1, 2, 3]}, "sub_nested": [{ "a": 10, "b": 20}, { "a": 100, "b": 200}]}}' + + - match: { errors: false } + + - do: + search: + index: test + sort: "id" + + - match: { hits.total.value: 2 } + - length: { hits.hits.0._source: 3 } + - match: { hits.hits.0._source.id: 0 } + - length: { hits.hits.0._source.nested_field: 3 } + - match: { hits.hits.0._source.nested_field.a: 1 } + - match: { hits.hits.0._source.nested_field.b: 2 } + - length: { hits.hits.0._source.nested_field.obj1: 2 } + - match: { hits.hits.0._source.nested_field.obj1.foo: "bar" } + - match: { hits.hits.0._source.nested_field.obj1.k: [1, 2, 3] } + - length: { hits.hits.0._source.nested_array: 2 } + - match: { hits.hits.0._source.nested_array.0.a: 10 } + - match: { hits.hits.0._source.nested_array.0.b: 20 } + - length: { hits.hits.0._source.nested_array.0.obj1: 2 } + - match: { hits.hits.0._source.nested_array.0.obj1.0.field1: 1 } + - match: { hits.hits.0._source.nested_array.0.obj1.0.field2: 2 } + - match: { hits.hits.0._source.nested_array.0.obj1.1.field3: 3 } + - match: { hits.hits.0._source.nested_array.0.obj1.1.field4: 4 } + - length: { hits.hits.0._source.nested_array.1: 3 } + - match: { hits.hits.0._source.nested_array.1.a: 100 } + - match: { hits.hits.0._source.nested_array.1.b: 200 } + - length: { hits.hits.0._source.nested_array.1.obj1: 2 } + - match: { hits.hits.0._source.nested_array.1.obj1.field5: 5 } + - match: { hits.hits.0._source.nested_array.1.obj1.field6: 6 } + - length: { hits.hits.1._source: 2 } + - match: { hits.hits.1._source.id: 1 } + - length: { hits.hits.1._source.obj_field: 4 } + - match: { hits.hits.1._source.obj_field.a: 1 } + - match: { hits.hits.1._source.obj_field.b: 2 } + - length: { hits.hits.1._source.obj_field.obj1: 2 } + - match: { hits.hits.1._source.obj_field.obj1.foo: "bar" } + - match: { hits.hits.1._source.obj_field.obj1.k: [ 1, 2, 3 ] } + - length: { hits.hits.1._source.obj_field.sub_nested: 2 } + - length: { hits.hits.1._source.obj_field.sub_nested.0: 2 } + - match: { hits.hits.1._source.obj_field.sub_nested.0.a: 10 } + - match: { hits.hits.1._source.obj_field.sub_nested.0.b: 20 } + - length: { hits.hits.1._source.obj_field.sub_nested.1: 2 } + - match: { hits.hits.1._source.obj_field.sub_nested.1.a: 100 } + - match: { hits.hits.1._source.obj_field.sub_nested.1.b: 200 } + + +--- +doubly nested object: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + obj_field: + properties: + obj1: + enabled: false + sub_nested: + type: nested + nested_field: + type: nested + properties: + sub_nested_field: + type: nested + properties: + obj1: + enabled: false + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 0, "nested_field": {"a": 1, "b": 2, "sub_nested_field": {"foo": "bar", "k": [1, 2, 3]}}}' + - '{ "create": { } }' + - '{ "id": 1, "nested_field": {"a": 2, "b": 3, "sub_nested_field": [{"foo": "baz", "k": [4, 50, 6]}, {"foo": "bar"}]}}' + - '{ "create": { } }' + - '{ "id": 2, "nested_field": [{"a": 20, "b": 30, "sub_nested_field": [{"foo": "foobar", "k": [7, 8, 9]}, {"k": [400, 500, 6]}]}, {"a": 0, "b": 33, "sub_nested_field": [{"other": "value", "k": [1, 2, -3]}, {"number": 42}]}]}' + - '{ "create": { } }' + - '{ "id": 3}' + + - match: { errors: false } + + - do: + search: + index: test + sort: "id" + + - match: { hits.total.value: 4 } + - length: { hits.hits.0._source: 2 } + - match: { hits.hits.0._source.id: 0 } + - length: { hits.hits.0._source.nested_field: 3 } + - match: { hits.hits.0._source.nested_field.a: 1 } + - match: { hits.hits.0._source.nested_field.b: 2 } + - length: { hits.hits.0._source.nested_field.sub_nested_field: 2 } + - match: { hits.hits.0._source.nested_field.sub_nested_field.foo: "bar" } + - match: { hits.hits.0._source.nested_field.sub_nested_field.k: [ 1, 2, 3 ] } + - length: { hits.hits.1._source: 2 } + - match: { hits.hits.1._source.id: 1 } + - length: { hits.hits.1._source.nested_field: 3 } + - match: { hits.hits.1._source.nested_field.a: 2 } + - match: { hits.hits.1._source.nested_field.b: 3 } + - length: { hits.hits.1._source.nested_field.sub_nested_field: 2 } + - length: { hits.hits.1._source.nested_field.sub_nested_field.0: 2 } + - match: { hits.hits.1._source.nested_field.sub_nested_field.0.foo: "baz" } + - match: { hits.hits.1._source.nested_field.sub_nested_field.0.k: [ 4, 6, 50 ] } + - length: { hits.hits.1._source.nested_field.sub_nested_field.1: 1 } + - match: { hits.hits.1._source.nested_field.sub_nested_field.1.foo: "bar" } + - length: { hits.hits.2._source: 2 } + - match: { hits.hits.2._source.id: 2 } + - length: { hits.hits.2._source.nested_field: 2 } + - length: { hits.hits.2._source.nested_field.0: 3 } + - match: { hits.hits.2._source.nested_field.0.a: 20 } + - match: { hits.hits.2._source.nested_field.0.b: 30 } + - length: { hits.hits.2._source.nested_field.0.sub_nested_field: 2 } + - length: { hits.hits.2._source.nested_field.0.sub_nested_field.0: 2 } + - match: { hits.hits.2._source.nested_field.0.sub_nested_field.0.foo: "foobar" } + - match: { hits.hits.2._source.nested_field.0.sub_nested_field.0.k: [ 7, 8, 9 ] } + - length: { hits.hits.2._source.nested_field.0.sub_nested_field.1: 1 } + - match: { hits.hits.2._source.nested_field.0.sub_nested_field.1.k: [6, 400, 500] } + - length: { hits.hits.2._source.nested_field.1: 3 } + - match: { hits.hits.2._source.nested_field.1.a: 0 } + - match: { hits.hits.2._source.nested_field.1.b: 33 } + - length: { hits.hits.2._source.nested_field.1.sub_nested_field: 2 } + - length: { hits.hits.2._source.nested_field.1.sub_nested_field.0: 2 } + - match: { hits.hits.2._source.nested_field.1.sub_nested_field.0.other: "value" } + - match: { hits.hits.2._source.nested_field.1.sub_nested_field.0.k: [ -3, 1, 2 ] } + - length: { hits.hits.2._source.nested_field.1.sub_nested_field.1: 1 } + - match: { hits.hits.2._source.nested_field.1.sub_nested_field.1.number: 42 } + - length: { hits.hits.3._source: 1 } + - match: { hits.hits.3._source.id: 3 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index a89a89472a678..fbdb285069556 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -132,7 +132,8 @@ private static void internalParseDocument(MetadataFieldMapper[] metadataFieldsMa new IgnoredSourceFieldMapper.NameValue( MapperService.SINGLE_MAPPING_NAME, 0, - XContentDataHelper.encodeToken(context.parser()) + XContentDataHelper.encodeToken(context.parser()), + context.doc() ) ); } else { @@ -268,7 +269,8 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio new IgnoredSourceFieldMapper.NameValue( context.parent().fullPath(), context.parent().fullPath().indexOf(currentFieldName), - XContentDataHelper.encodeToken(parser) + XContentDataHelper.encodeToken(parser), + context.doc() ) ); } else { @@ -287,20 +289,6 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio } if (context.parent().isNested()) { - // Handle a nested object that doesn't contain an array. Arrays are handled in #parseNonDynamicArray. - if (context.mappingLookup().isSourceSynthetic() && context.getClonedSource() == false) { - Tuple tuple = XContentDataHelper.cloneSubContext(context); - context.addIgnoredField( - new IgnoredSourceFieldMapper.NameValue( - context.parent().name(), - context.parent().fullPath().indexOf(context.parent().simpleName()), - XContentDataHelper.encodeXContentBuilder(tuple.v2()) - ) - ); - context = tuple.v1(); - token = context.parser().currentToken(); - parser = context.parser(); - } context = context.createNestedContext((NestedObjectMapper) context.parent()); } @@ -661,9 +649,8 @@ private static void parseNonDynamicArray( && (objectMapper.storeArraySource() || objectMapper.dynamic == ObjectMapper.Dynamic.RUNTIME); boolean fieldWithFallbackSyntheticSource = mapper instanceof FieldMapper fieldMapper && fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK; - boolean nestedObject = mapper instanceof NestedObjectMapper; boolean dynamicRuntimeContext = context.dynamic() == ObjectMapper.Dynamic.RUNTIME; - if (objectRequiresStoringSource || fieldWithFallbackSyntheticSource || nestedObject || dynamicRuntimeContext) { + if (objectRequiresStoringSource || fieldWithFallbackSyntheticSource || dynamicRuntimeContext) { Tuple tuple = XContentDataHelper.cloneSubContext(context); context.addIgnoredField( IgnoredSourceFieldMapper.NameValue.fromContext( diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index fe1ad85d6a7c1..f47d86b746a38 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -24,13 +24,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeSet; /** * Context used when parsing incoming documents. Holds everything that is needed to parse a document as well as @@ -106,7 +104,7 @@ public int get() { private final MappingParserContext mappingParserContext; private final SourceToParse sourceToParse; private final Set ignoredFields; - private final Set ignoredFieldValues; + private final List ignoredFieldValues; private final Map> dynamicMappers; private final DynamicMapperSize dynamicMappersSize; private final Map dynamicObjectMappers; @@ -128,7 +126,7 @@ private DocumentParserContext( MappingParserContext mappingParserContext, SourceToParse sourceToParse, Set ignoreFields, - Set ignoredFieldValues, + List ignoredFieldValues, Map> dynamicMappers, Map dynamicObjectMappers, Map> dynamicRuntimeFields, @@ -198,7 +196,7 @@ protected DocumentParserContext( mappingParserContext, source, new HashSet<>(), - new TreeSet<>(Comparator.comparing(IgnoredSourceFieldMapper.NameValue::name)), + new ArrayList<>(), new HashMap<>(), new HashMap<>(), new HashMap<>(), diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index 6e243e3575d37..ec13a8916ccd6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -13,11 +13,15 @@ import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.index.query.support.NestedScope; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; +import java.util.List; /** @@ -53,7 +57,7 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper { * the full name of the parent field * - the value, encoded as a byte array */ - public record NameValue(String name, int parentOffset, BytesRef value) { + public record NameValue(String name, int parentOffset, BytesRef value, LuceneDocument doc) { /** * Factory method, for use with fields under the parent object. It doesn't apply to objects at root level. * @param context the parser context, containing a non-null parent @@ -62,7 +66,7 @@ public record NameValue(String name, int parentOffset, BytesRef value) { */ public static NameValue fromContext(DocumentParserContext context, String name, BytesRef value) { int parentOffset = context.parent() instanceof RootObjectMapper ? 0 : context.parent().fullPath().length() + 1; - return new NameValue(name, parentOffset, value); + return new NameValue(name, parentOffset, value, context.doc()); } String getParentFieldName() { @@ -112,8 +116,11 @@ protected String contentType() { public void postParse(DocumentParserContext context) { // Ignored values are only expected in synthetic mode. assert context.getIgnoredFieldValues().isEmpty() || context.mappingLookup().isSourceSynthetic(); - for (NameValue nameValue : context.getIgnoredFieldValues()) { - context.doc().add(new StoredField(NAME, encode(nameValue))); + List ignoredFieldValues = new ArrayList<>(context.getIgnoredFieldValues()); + // ensure consistent ordering when retrieving synthetic source + Collections.sort(ignoredFieldValues, Comparator.comparing(NameValue::name)); + for (NameValue nameValue : ignoredFieldValues) { + nameValue.doc().add(new StoredField(NAME, encode(nameValue))); } } @@ -136,13 +143,13 @@ static NameValue decode(Object field) { int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8); BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4); - return new NameValue(name, parentOffset, value); + return new NameValue(name, parentOffset, value, null); } // This mapper doesn't contribute to source directly as it has no access to the object structure. Instead, its contents // are loaded by SourceLoader and passed to object mappers that, in turn, write their ignore fields at the appropriate level. @Override - public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { + public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(NestedScope nestedScope) { return SourceLoader.SyntheticFieldLoader.NOTHING; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java index e98b8ba7deba1..cb8d9e7ba8d5f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java @@ -8,16 +8,33 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.util.BitSet; import org.elasticsearch.common.Explicit; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; +import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.query.support.NestedScope; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.elasticsearch.index.mapper.SourceFieldMetrics.NOOP; /** * A Mapper for nested objects @@ -31,10 +48,12 @@ public static class Builder extends ObjectMapper.Builder { private Explicit includeInRoot = Explicit.IMPLICIT_FALSE; private Explicit includeInParent = Explicit.IMPLICIT_FALSE; private final IndexVersion indexCreatedVersion; + private final Function bitSetProducer; - public Builder(String name, IndexVersion indexCreatedVersion) { + public Builder(String name, IndexVersion indexCreatedVersion, Function bitSetProducer) { super(name, Explicit.IMPLICIT_TRUE); this.indexCreatedVersion = indexCreatedVersion; + this.bitSetProducer = bitSetProducer; } Builder includeInRoot(boolean includeInRoot) { @@ -84,7 +103,9 @@ public NestedObjectMapper build(MapperBuilderContext context) { includeInParent, includeInRoot, nestedTypePath, - NestedPathFieldMapper.filter(indexCreatedVersion, nestedTypePath) + NestedPathFieldMapper.filter(indexCreatedVersion, nestedTypePath), + bitSetProducer, + indexCreatedVersion ); } } @@ -96,7 +117,11 @@ public Mapper.Builder parse(String name, Map node, MappingParser if (parseSubobjects(node).explicit()) { throw new MapperParsingException("Nested type [" + name + "] does not support [subobjects] parameter"); } - NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(name, parserContext.indexVersionCreated()); + NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder( + name, + parserContext.indexVersionCreated(), + query -> parserContext.bitSetProducer(query) + ); parseNested(name, node, builder); parseObjectFields(node, parserContext, builder); return builder; @@ -137,6 +162,8 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) { private final Explicit includeInParent; private final String nestedTypePath; private final Query nestedTypeFilter; + private final Function bitSetProducer; + private final IndexVersion indexVersionCreated; NestedObjectMapper( String name, @@ -147,13 +174,17 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) { Explicit includeInParent, Explicit includeInRoot, String nestedTypePath, - Query nestedTypeFilter + Query nestedTypeFilter, + Function bitSetProducer, + IndexVersion indexVersionCreated ) { super(name, fullPath, enabled, Explicit.IMPLICIT_TRUE, Explicit.IMPLICIT_FALSE, dynamic, mappers); this.nestedTypePath = nestedTypePath; this.nestedTypeFilter = nestedTypeFilter; this.includeInParent = includeInParent; this.includeInRoot = includeInRoot; + this.bitSetProducer = bitSetProducer; + this.indexVersionCreated = indexVersionCreated; } public Query nestedTypeFilter() { @@ -183,7 +214,7 @@ public Map getChildren() { @Override public ObjectMapper.Builder newBuilder(IndexVersion indexVersionCreated) { - NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(simpleName(), indexVersionCreated); + NestedObjectMapper.Builder builder = new NestedObjectMapper.Builder(simpleName(), indexVersionCreated, bitSetProducer); builder.enabled = enabled; builder.dynamic = dynamic; builder.includeInRoot = includeInRoot; @@ -202,7 +233,9 @@ NestedObjectMapper withoutMappers() { includeInParent, includeInRoot, nestedTypePath, - nestedTypeFilter + nestedTypeFilter, + bitSetProducer, + indexVersionCreated ); } @@ -271,7 +304,9 @@ public ObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeContex incInParent, incInRoot, nestedTypePath, - nestedTypeFilter + nestedTypeFilter, + bitSetProducer, + indexVersionCreated ); } @@ -293,8 +328,111 @@ protected MapperMergeContext createChildContext(MapperMergeContext mapperMergeCo } @Override - public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { - // IgnoredSourceFieldMapper integration takes care of writing the source for nested objects. - return SourceLoader.SyntheticFieldLoader.NOTHING; + public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(NestedScope nestedScope) { + final NestedObjectMapper mapper = this; + SourceLoader sourceLoader = new SourceLoader.Synthetic(() -> { + nestedScope.nextLevel(mapper); + try { + return super.syntheticFieldLoader(nestedScope, mappers.values().stream(), true); + } finally { + nestedScope.previousLevel(); + } + }, NOOP); + var storedFieldsLoader = StoredFieldLoader.create(false, sourceLoader.requiredStoredFields()); + var parentFilter = nestedScope.getObjectMapper() == null + ? Queries.newNonNestedFilter(indexVersionCreated) + : nestedScope.getObjectMapper().nestedTypeFilter(); + return new NestedFieldLoader(storedFieldsLoader, sourceLoader, () -> bitSetProducer.apply(parentFilter), nestedTypeFilter); + } + + private class NestedFieldLoader implements SourceLoader.SyntheticFieldLoader { + private final org.elasticsearch.index.fieldvisitor.StoredFieldLoader storedFieldsLoader; + private final SourceLoader sourceLoader; + private final Supplier parentBitSetProducer; + private final Query childFilter; + + private List children; + private LeafStoredFieldLoader leafStoredFields; + private SourceLoader.Leaf leafSource; + + private NestedFieldLoader( + org.elasticsearch.index.fieldvisitor.StoredFieldLoader storedFieldsLoader, + SourceLoader sourceLoader, + Supplier parentBitSetProducer, + Query childFilter + ) { + this.storedFieldsLoader = storedFieldsLoader; + this.sourceLoader = sourceLoader; + this.parentBitSetProducer = parentBitSetProducer; + this.childFilter = childFilter; + } + + @Override + public Stream> storedFieldLoaders() { + return Stream.of(); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + this.children = null; + this.leafStoredFields = storedFieldsLoader.getLoader(leafReader.getContext(), null); + this.leafSource = sourceLoader.leaf(leafReader, null); + IndexSearcher searcher = new IndexSearcher(leafReader); + searcher.setQueryCache(null); + var childScorer = searcher.createWeight(childFilter, ScoreMode.COMPLETE_NO_SCORES, 1f).scorer(leafReader.getContext()); + var parentDocs = parentBitSetProducer.get().getBitSet(leafReader.getContext()); + return parentDoc -> { + this.children = childScorer != null ? getChildren(parentDoc, parentDocs, childScorer.iterator()) : List.of(); + return children.size() > 0; + }; + } + + @Override + public boolean hasValue() { + return children.size() > 0; + } + + @Override + public void write(XContentBuilder b) throws IOException { + assert (children != null && children.size() > 0); + switch (children.size()) { + case 1: + b.startObject(simpleName()); + leafStoredFields.advanceTo(children.get(0)); + leafSource.write(leafStoredFields, children.get(0), b); + b.endObject(); + break; + + default: + b.startArray(simpleName()); + for (int childId : children) { + b.startObject(); + leafStoredFields.advanceTo(childId); + leafSource.write(leafStoredFields, childId, b); + b.endObject(); + } + b.endArray(); + break; + } + } + + @Override + public String fieldName() { + return name(); + } + } + + private static List getChildren(int parentDoc, BitSet parentDocs, DocIdSetIterator childIt) throws IOException { + final int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); + int childDocId = childIt.docID(); + if (childDocId <= prevParentDoc) { + childDocId = childIt.advance(prevParentDoc + 1); + } + + List res = new ArrayList<>(); + for (; childDocId < parentDoc; childDocId = childIt.nextDoc()) { + res.add(childDocId); + } + return res; } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 71a0e001dc72a..d3c475ebbcc79 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -87,8 +87,8 @@ public void testMultipleIgnoredFieldsRootObject() throws IOException { String stringValue = randomAlphaOfLength(20); String syntheticSource = getSyntheticSourceWithFieldLimit(b -> { b.field("boolean_value", booleanValue); - b.field("string_value", stringValue); b.field("int_value", intValue); + b.field("string_value", stringValue); }); assertEquals(String.format(Locale.ROOT, """ {"boolean_value":%s,"int_value":%s,"string_value":"%s"}""", booleanValue, intValue, stringValue), syntheticSource); @@ -640,7 +640,7 @@ public void testNestedObjectWithField() throws IOException { b -> { b.startObject("path").field("foo", "A").field("bar", "B").endObject(); } ); assertEquals(""" - {"path":{"foo":"A","bar":"B"}}""", syntheticSource); + {"path":{"bar":"B","foo":"A"}}""", syntheticSource); } public void testNestedObjectWithArray() throws IOException { @@ -665,7 +665,7 @@ public void testNestedObjectWithArray() throws IOException { b.endArray(); }); assertEquals(""" - {"path":[{"foo":"A","bar":"B"},{"foo":"C","bar":"D"}]}""", syntheticSource); + {"path":[{"bar":"B","foo":"A"},{"bar":"D","foo":"C"}]}""", syntheticSource); } public void testNestedSubobjectWithField() throws IOException { @@ -705,7 +705,7 @@ public void testNestedSubobjectWithField() throws IOException { b.endObject(); }); assertEquals(String.format(Locale.ROOT, """ - {"boolean_value":%s,"path":{"int_value":%s,"to":{"foo":"A","bar":"B"}}}""", booleanValue, intValue), syntheticSource); + {"boolean_value":%s,"path":{"int_value":%s,"to":{"bar":"B","foo":"A"}}}""", booleanValue, intValue), syntheticSource); } public void testNestedSubobjectWithArray() throws IOException { @@ -751,7 +751,7 @@ public void testNestedSubobjectWithArray() throws IOException { }); assertEquals( String.format(Locale.ROOT, """ - {"boolean_value":%s,"path":{"int_value":%s,"to":[{"foo":"A","bar":"B"},{"foo":"C","bar":"D"}]}}""", booleanValue, intValue), + {"boolean_value":%s,"path":{"int_value":%s,"to":[{"bar":"B","foo":"A"},{"bar":"D","foo":"C"}]}}""", booleanValue, intValue), syntheticSource ); } @@ -774,7 +774,7 @@ public void testNestedObjectIncludeInRoot() throws IOException { b -> { b.startObject("path").field("foo", "A").field("bar", "B").endObject(); } ); assertEquals(""" - {"path":{"foo":"A","bar":"B"}}""", syntheticSource); + {"path":{"bar":"B","foo":"A"}}""", syntheticSource); } public void testNoDynamicObjectSingleField() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 791f6230933ad..14497f1249d3c 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; @@ -34,6 +35,7 @@ import org.elasticsearch.common.util.MockPageCacheRecycler; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.analysis.AnalyzerScope; @@ -211,6 +213,13 @@ protected final MapperService createMapperService(IndexVersion version, Settings ).getMapperRegistry(); SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of()); + BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, new BitsetFilterCache.Listener() { + @Override + public void onCache(ShardId shardId, Accountable accountable) {} + + @Override + public void onRemoval(ShardId shardId, Accountable accountable) {} + }); return new MapperService( () -> TransportVersion.current(), indexSettings, @@ -223,8 +232,8 @@ protected final MapperService createMapperService(IndexVersion version, Settings }, indexSettings.getMode().buildIdFieldMapper(idFieldDataEnabled), this::compileScript, + query -> bitsetFilterCache.getBitSetProducer(query), MapperMetrics.NOOP - ); } @@ -716,12 +725,14 @@ protected RandomIndexWriter indexWriterForSyntheticSource(Directory directory) t protected final String syntheticSource(DocumentMapper mapper, CheckedConsumer build) throws IOException { try (Directory directory = newDirectory()) { RandomIndexWriter iw = indexWriterForSyntheticSource(directory); - LuceneDocument doc = mapper.parse(source(build)).rootDoc(); - iw.addDocument(doc); + ParsedDocument doc = mapper.parse(source(build)); + doc.updateSeqID(0, 0); + doc.version().setLongValue(0); + iw.addDocuments(doc.docs()); iw.close(); - try (DirectoryReader reader = DirectoryReader.open(directory)) { - String syntheticSource = syntheticSource(mapper, reader, 0); - roundTripSyntheticSource(mapper, syntheticSource, reader); + try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { + String syntheticSource = syntheticSource(mapper, indexReader, doc.docs().size() - 1); + roundTripSyntheticSource(mapper, syntheticSource, indexReader); return syntheticSource; } } @@ -740,10 +751,13 @@ protected final String syntheticSource(DocumentMapper mapper, CheckedConsumer