Skip to content

Commit

Permalink
Track source for objects and fields with [synthetic_source_keep:array…
Browse files Browse the repository at this point in the history
…s] in arrays as ignored (elastic#116065)

* Track source for objects and fields with [synthetic_source_keep:arrays] in arrays as ignored

* Update TransportResumeFollowActionTests.java

* rest compat fixes

* rest compat fixes

* update test
  • Loading branch information
kkrik-es authored Nov 4, 2024
1 parent 3fc349b commit 6cf4536
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 263 deletions.
6 changes: 2 additions & 4 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,8 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling with vector_operations_count")
task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility")
task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility")
task.skipTest("indices.create/20_synthetic_source/object with dynamic override", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/object with unmapped fields", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/empty object with unmapped fields", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/nested object with unmapped fields", "temporary until backported")
task.skipTest("indices.create/21_synthetic_source_stored/index param - nested array within array - disabled second pass", "temporary until backported")
task.skipTest("indices.create/21_synthetic_source_stored/index param - root arrays", "temporary until backported")
task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "temporary until backported")
task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy")
task.skipTest("cat.shards/10_basic/Help", "sync_id is removed in 9.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,11 +368,8 @@ object param - nested object with stored array:
sort: name
- match: { hits.total.value: 2 }
- match: { hits.hits.0._source.name: A }
# due to a workaround for #115261
- match: { hits.hits.0._source.nested_array_regular.0.b.0.c: 10 }
- match: { hits.hits.0._source.nested_array_regular.0.b.1.c: 100 }
- match: { hits.hits.0._source.nested_array_regular.1.b.0.c: 20 }
- match: { hits.hits.0._source.nested_array_regular.1.b.1.c: 200 }
- match: { hits.hits.0._source.nested_array_regular.0.b.c: [ 10, 100 ] }
- match: { hits.hits.0._source.nested_array_regular.1.b.c: [ 20, 200 ] }
- match: { hits.hits.1._source.name: B }
- match: { hits.hits.1._source.nested_array_stored.0.b.0.c: 10 }
- match: { hits.hits.1._source.nested_array_stored.0.b.1.c: 100 }
Expand Down Expand Up @@ -427,55 +424,6 @@ index param - nested array within array:
- match: { hits.hits.0._source.path.to.some.3.id: [ 1000, 2000 ] }


---
index param - nested array within array - disabled second pass:
- requires:
cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"]
reason: requires tracking ignored source

- do:
indices.create:
index: test
body:
settings:
index:
synthetic_source:
enable_second_doc_parsing_pass: false
mapping.source.mode: synthetic

mappings:
properties:
name:
type: keyword
path:
properties:
to:
properties:
some:
synthetic_source_keep: arrays
properties:
id:
type: integer

- do:
bulk:
index: test
refresh: true
body:
- '{ "create": { } }'
- '{ "name": "A", "path": [ { "to": [ { "some" : [ { "id": 10 }, { "id": [1, 3, 2] } ] }, { "some": { "id": 100 } } ] }, { "to": { "some": { "id": [1000, 2000] } } } ] }'
- match: { errors: false }

- do:
search:
index: test
sort: name
- match: { hits.hits.0._source.name: A }
- length: { hits.hits.0._source.path.to.some: 2}
- match: { hits.hits.0._source.path.to.some.0.id: 10 }
- match: { hits.hits.0._source.path.to.some.1.id: [ 1, 3, 2] }


---
# 112156
stored field under object with store_array_source:
Expand Down Expand Up @@ -944,8 +892,10 @@ index param - root arrays:
- match: { hits.hits.1._source.obj.1.span.id: "2" }

- match: { hits.hits.2._source.id: 3 }
- match: { hits.hits.2._source.obj_default.trace.id: [aa, bb] }
- match: { hits.hits.2._source.obj_default.span.id: "2" }
- match: { hits.hits.2._source.obj_default.trace.0.id: bb }
- match: { hits.hits.2._source.obj_default.trace.1.id: aa }
- match: { hits.hits.2._source.obj_default.span.0.id: "2" }
- match: { hits.hits.2._source.obj_default.span.1.id: "2" }


---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
FieldMapper.SYNTHETIC_SOURCE_KEEP_INDEX_SETTING,
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING,
IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING,
IndexSettings.SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING,
SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING,

// validate that built-in similarities don't get redefined
Expand Down
21 changes: 0 additions & 21 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -653,13 +653,6 @@ public Iterator<Setting<?>> settings() {
Property.Final
);

public static final Setting<Boolean> SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING = Setting.boolSetting(
"index.synthetic_source.enable_second_doc_parsing_pass",
true,
Property.IndexScope,
Property.Dynamic
);

/**
* Returns <code>true</code> if TSDB encoding is enabled. The default is <code>true</code>
*/
Expand Down Expand Up @@ -829,7 +822,6 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private volatile long mappingDimensionFieldsLimit;
private volatile boolean skipIgnoredSourceWrite;
private volatile boolean skipIgnoredSourceRead;
private volatile boolean syntheticSourceSecondDocParsingPassEnabled;
private final SourceFieldMapper.Mode indexMappingSourceMode;
private final boolean recoverySourceEnabled;

Expand Down Expand Up @@ -992,7 +984,6 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
es87TSDBCodecEnabled = scopedSettings.get(TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING);
skipIgnoredSourceWrite = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_WRITE_SETTING);
skipIgnoredSourceRead = scopedSettings.get(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING);
syntheticSourceSecondDocParsingPassEnabled = scopedSettings.get(SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING);
indexMappingSourceMode = scopedSettings.get(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING);
recoverySourceEnabled = RecoverySettings.INDICES_RECOVERY_SOURCE_ENABLED_SETTING.get(nodeSettings);

Expand Down Expand Up @@ -1082,10 +1073,6 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
this::setSkipIgnoredSourceWrite
);
scopedSettings.addSettingsUpdateConsumer(IgnoredSourceFieldMapper.SKIP_IGNORED_SOURCE_READ_SETTING, this::setSkipIgnoredSourceRead);
scopedSettings.addSettingsUpdateConsumer(
SYNTHETIC_SOURCE_SECOND_DOC_PARSING_PASS_SETTING,
this::setSyntheticSourceSecondDocParsingPassEnabled
);
}

private void setSearchIdleAfter(TimeValue searchIdleAfter) {
Expand Down Expand Up @@ -1678,14 +1665,6 @@ private void setSkipIgnoredSourceRead(boolean value) {
this.skipIgnoredSourceRead = value;
}

private void setSyntheticSourceSecondDocParsingPassEnabled(boolean syntheticSourceSecondDocParsingPassEnabled) {
this.syntheticSourceSecondDocParsingPassEnabled = syntheticSourceSecondDocParsingPassEnabled;
}

public boolean isSyntheticSourceSecondDocParsingPassEnabled() {
return syntheticSourceSecondDocParsingPassEnabled;
}

public SourceFieldMapper.Mode getIndexMappingSourceMode() {
return indexMappingSourceMode;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;

import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
Expand Down Expand Up @@ -148,9 +145,6 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,

executeIndexTimeScripts(context);

// Record additional entries for {@link IgnoredSourceFieldMapper} before calling #postParse, so that they get stored.
addIgnoredSourceMissingValues(context);

for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) {
metadataMapper.postParse(context);
}
Expand All @@ -159,128 +153,6 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,
}
}

private void addIgnoredSourceMissingValues(DocumentParserContext context) throws IOException {
Collection<IgnoredSourceFieldMapper.NameValue> ignoredFieldsMissingValues = context.getIgnoredFieldsMissingValues();
if (ignoredFieldsMissingValues.isEmpty()) {
return;
}

// Clean up any conflicting ignored values, to avoid double-printing them as array elements in synthetic source.
Map<String, IgnoredSourceFieldMapper.NameValue> fields = new HashMap<>(ignoredFieldsMissingValues.size());
for (var field : ignoredFieldsMissingValues) {
fields.put(field.name(), field);
}
context.deduplicateIgnoredFieldValues(fields.keySet());

assert context.mappingLookup().isSourceSynthetic();
try (
XContentParser parser = XContentHelper.createParser(
parserConfiguration,
context.sourceToParse().source(),
context.sourceToParse().getXContentType()
)
) {
DocumentParserContext newContext = new RootDocumentParserContext(
context.mappingLookup(),
mappingParserContext,
context.sourceToParse(),
parser
);
var nameValues = parseDocForMissingValues(newContext, fields);
for (var nameValue : nameValues) {
context.addIgnoredField(nameValue);
}
}
}

/**
* Simplified parsing version for retrieving the source of a given set of fields.
*/
private static List<IgnoredSourceFieldMapper.NameValue> parseDocForMissingValues(
DocumentParserContext context,
Map<String, IgnoredSourceFieldMapper.NameValue> fields
) throws IOException {
// Generate all possible parent names for the given fields.
// This is used to skip processing objects that can't generate missing values.
Set<String> parentNames = getPossibleParentNames(fields.keySet());
List<IgnoredSourceFieldMapper.NameValue> result = new ArrayList<>();

XContentParser parser = context.parser();
XContentParser.Token currentToken = parser.nextToken();
List<String> path = new ArrayList<>();
List<Boolean> isObjectInPath = new ArrayList<>(); // Tracks if path components correspond to an object or an array.
String fieldName = null;
while (currentToken != null) {
while (currentToken != XContentParser.Token.FIELD_NAME) {
if (fieldName != null
&& (currentToken == XContentParser.Token.START_OBJECT || currentToken == XContentParser.Token.START_ARRAY)) {
if (parentNames.contains(getCurrentPath(path, fieldName)) == false) {
// No missing value under this parsing subtree, skip it.
parser.skipChildren();
} else {
path.add(fieldName);
isObjectInPath.add(currentToken == XContentParser.Token.START_OBJECT);
}
fieldName = null;
} else if (currentToken == XContentParser.Token.END_OBJECT || currentToken == XContentParser.Token.END_ARRAY) {
// Remove the path, if the scope type matches the one when the path was added.
if (isObjectInPath.isEmpty() == false
&& (isObjectInPath.getLast() && currentToken == XContentParser.Token.END_OBJECT
|| isObjectInPath.getLast() == false && currentToken == XContentParser.Token.END_ARRAY)) {
path.removeLast();
isObjectInPath.removeLast();
}
fieldName = null;
}
currentToken = parser.nextToken();
if (currentToken == null) {
return result;
}
}
fieldName = parser.currentName();
String fullName = getCurrentPath(path, fieldName);
var leaf = fields.get(fullName); // There may be multiple matches for array elements, don't use #remove.
if (leaf != null) {
parser.nextToken(); // Advance the parser to the value to be read.
result.add(leaf.cloneWithValue(context.encodeFlattenedToken()));
fieldName = null;
}
currentToken = parser.nextToken();
}
return result;
}

private static String getCurrentPath(List<String> path, String fieldName) {
assert fieldName != null;
return path.isEmpty() ? fieldName : String.join(".", path) + "." + fieldName;
}

/**
* Generates all possible parent object names for the given full names.
* For instance, for input ['path.to.foo', 'another.path.to.bar'], it returns:
* [ 'path', 'path.to', 'another', 'another.path', 'another.path.to' ]
*/
private static Set<String> getPossibleParentNames(Set<String> fullPaths) {
if (fullPaths.isEmpty()) {
return Collections.emptySet();
}
Set<String> paths = new HashSet<>();
for (String fullPath : fullPaths) {
String[] split = fullPath.split("\\.");
if (split.length < 2) {
continue;
}
StringBuilder builder = new StringBuilder(split[0]);
paths.add(builder.toString());
for (int i = 1; i < split.length - 1; i++) {
builder.append(".");
builder.append(split[i]);
paths.add(builder.toString());
}
}
return paths;
}

private static void executeIndexTimeScripts(DocumentParserContext context) {
List<FieldMapper> indexTimeScriptMappers = context.mappingLookup().indexTimeScriptMappers();
if (indexTimeScriptMappers.isEmpty()) {
Expand Down Expand Up @@ -426,7 +298,10 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio
throwOnConcreteValue(context.parent(), currentFieldName, context);
}

if (context.canAddIgnoredField() && getSourceKeepMode(context, context.parent().sourceKeepMode()) == Mapper.SourceKeepMode.ALL) {
var sourceKeepMode = getSourceKeepMode(context, context.parent().sourceKeepMode());
if (context.canAddIgnoredField()
&& (sourceKeepMode == Mapper.SourceKeepMode.ALL
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope()))) {
context = context.addIgnoredFieldFromContext(
new IgnoredSourceFieldMapper.NameValue(
context.parent().fullPath(),
Expand Down Expand Up @@ -571,9 +446,11 @@ static void parseObjectOrField(DocumentParserContext context, Mapper mapper) thr
parseObjectOrNested(context.createFlattenContext(currentFieldName));
context.path().add(currentFieldName);
} else {
var sourceKeepMode = getSourceKeepMode(context, fieldMapper.sourceKeepMode());
if (context.canAddIgnoredField()
&& (fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK
|| getSourceKeepMode(context, fieldMapper.sourceKeepMode()) == Mapper.SourceKeepMode.ALL
|| sourceKeepMode == Mapper.SourceKeepMode.ALL
|| (sourceKeepMode == Mapper.SourceKeepMode.ARRAYS && context.inArrayScope())
|| (context.isWithinCopyTo() == false && context.isCopyToDestinationField(mapper.fullPath())))) {
context = context.addIgnoredFieldFromContext(
IgnoredSourceFieldMapper.NameValue.fromContext(context, fieldMapper.fullPath(), null)
Expand Down Expand Up @@ -811,9 +688,7 @@ private static void parseNonDynamicArray(
if (mapper instanceof ObjectMapper objectMapper) {
mode = getSourceKeepMode(context, objectMapper.sourceKeepMode());
objectWithFallbackSyntheticSource = mode == Mapper.SourceKeepMode.ALL
// Inside nested objects we always store object arrays as a workaround for #115261.
|| ((context.inNestedScope() || mode == Mapper.SourceKeepMode.ARRAYS)
&& objectMapper instanceof NestedObjectMapper == false);
|| (mode == Mapper.SourceKeepMode.ARRAYS && objectMapper instanceof NestedObjectMapper == false);
}
boolean fieldWithFallbackSyntheticSource = false;
boolean fieldWithStoredArraySource = false;
Expand Down
Loading

0 comments on commit 6cf4536

Please sign in to comment.