Skip to content

Commit

Permalink
Add Optional Source Filtering to Source Loaders
Browse files Browse the repository at this point in the history
Spinoff of elastic#113036.

This change introduces optional source filtering directly within source loaders (both synthetic and stored).
The main benefit is seen in synthetic source loaders, as synthetic fields are stored independently.
By filtering while loading the synthetic source, generating the source becomes linear in the number of fields that match the filter.

This update also modifies the get document API to apply source filters earlier—directly through the source loader.
The search API, however, is not affected in this change, since the loaded source is still used by other features (e.g., highlighting, fields, nested hits),
and source filtering is always applied as the final step.
A follow-up will be required to ensure careful handling of all search-related scenarios.
  • Loading branch information
jimczi committed Sep 30, 2024
1 parent 8cb1266 commit f62f4f6
Show file tree
Hide file tree
Showing 31 changed files with 629 additions and 164 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import org.elasticsearch.xcontent.provider.filtering.FilterPathBasedFilter;
import org.elasticsearch.xcontent.support.filtering.FilterPath;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;

public class XContentParserConfigurationImpl implements XContentParserConfiguration {
Expand Down Expand Up @@ -102,20 +104,49 @@ public RestApiVersion restApiVersion() {
}

public XContentParserConfiguration withFiltering(
String rootPath,
Set<String> includeStrings,
Set<String> excludeStrings,
boolean filtersMatchFieldNamesWithDots
) {
FilterPath[] includePaths = FilterPath.compile(includeStrings);
FilterPath[] excludePaths = FilterPath.compile(excludeStrings);

if (rootPath != null) {
if (includePaths != null) {
List<FilterPath> includeFilters = new ArrayList<>();
for (var incl : includePaths) {
incl.matches(rootPath, includeFilters, true);
}
includePaths = includeFilters.isEmpty() ? null : includeFilters.toArray(FilterPath[]::new);
}

if (excludePaths != null) {
List<FilterPath> excludeFilters = new ArrayList<>();
for (var excl : excludePaths) {
excl.matches(rootPath, excludeFilters, true);
}
excludePaths = excludeFilters.isEmpty() ? null : excludeFilters.toArray(FilterPath[]::new);
}
}
return new XContentParserConfigurationImpl(
registry,
deprecationHandler,
restApiVersion,
FilterPath.compile(includeStrings),
FilterPath.compile(excludeStrings),
includePaths,
excludePaths,
filtersMatchFieldNamesWithDots
);
}

public XContentParserConfiguration withFiltering(
Set<String> includeStrings,
Set<String> excludeStrings,
boolean filtersMatchFieldNamesWithDots
) {
return withFiltering(null, includeStrings, excludeStrings, filtersMatchFieldNamesWithDots);
}

public JsonParser filter(JsonParser parser) {
JsonParser filtered = parser;
if (excludes != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,11 @@ XContentParserConfiguration withFiltering(
Set<String> excludeStrings,
boolean filtersMatchFieldNamesWithDots
);

XContentParserConfiguration withFiltering(
String rootPath,
Set<String> includeStrings,
Set<String> excludeStrings,
boolean filtersMatchFieldNamesWithDots
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.IntStream;

Expand Down Expand Up @@ -332,6 +333,24 @@ protected final void testFilter(Builder expected, Builder sample, Collection<Str
private void testFilter(Builder expected, Builder sample, Set<String> includes, Set<String> excludes, boolean matchFieldNamesWithDots)
throws IOException {
assertFilterResult(expected.apply(createBuilder()), filter(sample, includes, excludes, matchFieldNamesWithDots));

String rootPrefix = "root.path.random";
if (includes != null) {
Set<String> rootIncludes = new HashSet<>();
for (var incl : includes) {
rootIncludes.add(rootPrefix + (randomBoolean() ? "." : "*.") + incl);
}
includes = rootIncludes;
}

if (excludes != null) {
Set<String> rootExcludes = new HashSet<>();
for (var excl : excludes) {
rootExcludes.add(rootPrefix + (randomBoolean() ? "." : "*.") + excl);
}
excludes = rootExcludes;
}
assertFilterResult(expected.apply(createBuilder()), filterSub(sample, rootPrefix, includes, excludes, matchFieldNamesWithDots));
}

public void testArrayWithEmptyObjectInInclude() throws IOException {
Expand Down Expand Up @@ -413,21 +432,36 @@ private XContentBuilder filter(Builder sample, Set<String> includes, Set<String>
&& matchFieldNamesWithDots == false) {
return filterOnBuilder(sample, includes, excludes);
}
return filterOnParser(sample, includes, excludes, matchFieldNamesWithDots);
return filterOnParser(sample, null, includes, excludes, matchFieldNamesWithDots);
}

private XContentBuilder filterSub(
Builder sample,
String root,
Set<String> includes,
Set<String> excludes,
boolean matchFieldNamesWithDots
) throws IOException {
return filterOnParser(sample, root, includes, excludes, matchFieldNamesWithDots);
}

private XContentBuilder filterOnBuilder(Builder sample, Set<String> includes, Set<String> excludes) throws IOException {
return sample.apply(XContentBuilder.builder(getXContentType(), includes, excludes));
}

private XContentBuilder filterOnParser(Builder sample, Set<String> includes, Set<String> excludes, boolean matchFieldNamesWithDots)
throws IOException {
private XContentBuilder filterOnParser(
Builder sample,
String rootPath,
Set<String> includes,
Set<String> excludes,
boolean matchFieldNamesWithDots
) throws IOException {
try (XContentBuilder builtSample = sample.apply(createBuilder())) {
BytesReference sampleBytes = BytesReference.bytes(builtSample);
try (
XContentParser parser = getXContentType().xContent()
.createParser(
XContentParserConfiguration.EMPTY.withFiltering(includes, excludes, matchFieldNamesWithDots),
XContentParserConfiguration.EMPTY.withFiltering(rootPath, includes, excludes, matchFieldNamesWithDots),
sampleBytes.streamInput()
)
) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,31 +274,24 @@ public static Map<String, Object> filter(Map<String, Object> map, String[] inclu
*/
public static Function<Map<String, Object>, Map<String, Object>> filter(String[] includes, String[] excludes) {
CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());

CharacterRunAutomaton include;
if (includes == null || includes.length == 0) {
include = matchAllAutomaton;
} else {
Automaton includeA = Regex.simpleMatchToAutomaton(includes);
includeA = makeMatchDotsInFieldNames(includeA);
include = new CharacterRunAutomaton(includeA, MAX_DETERMINIZED_STATES);
}

Automaton excludeA;
if (excludes == null || excludes.length == 0) {
excludeA = Automata.makeEmpty();
} else {
excludeA = Regex.simpleMatchToAutomaton(excludes);
excludeA = makeMatchDotsInFieldNames(excludeA);
}
CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA, MAX_DETERMINIZED_STATES);
CharacterRunAutomaton include = compileAutomaton(includes, matchAllAutomaton);
CharacterRunAutomaton exclude = compileAutomaton(excludes, new CharacterRunAutomaton(Automata.makeEmpty()));

// NOTE: We cannot use Operations.minus because of the special case that
// we want all sub properties to match as soon as an object matches

return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton);
}

public static CharacterRunAutomaton compileAutomaton(String[] patterns, CharacterRunAutomaton defaultValue) {
if (patterns == null || patterns.length == 0) {
return defaultValue;
}
var aut = Regex.simpleMatchToAutomaton(patterns);
aut = makeMatchDotsInFieldNames(aut);
return new CharacterRunAutomaton(aut, MAX_DETERMINIZED_STATES);
}

/** Make matches on objects also match dots in field names.
* For instance, if the original simple regex is `foo`, this will translate
* it into `foo` OR `foo.*`. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,14 @@ private GetResult innerGetFetch(
Map<String, DocumentField> metadataFields = null;
DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
SourceLoader loader = forceSyntheticSource
? new SourceLoader.Synthetic(mappingLookup.getMapping()::syntheticFieldLoader, mapperMetrics.sourceFieldMetrics())
: mappingLookup.newSourceLoader(mapperMetrics.sourceFieldMetrics());
? new SourceLoader.Synthetic(
() -> mappingLookup.getMapping().syntheticFieldLoader(fetchSourceContext.hasFilter() ? fetchSourceContext.filter() : null),
mapperMetrics.sourceFieldMetrics()
)
: mappingLookup.newSourceLoader(
fetchSourceContext.hasFilter() ? fetchSourceContext.filter() : null,
mapperMetrics.sourceFieldMetrics()
);
StoredFieldLoader storedFieldLoader = buildStoredFieldLoader(storedFields, fetchSourceContext, loader);
LeafStoredFieldLoader leafStoredFieldLoader = storedFieldLoader.getLoader(docIdAndVersion.reader.getContext(), null);
try {
Expand Down Expand Up @@ -367,10 +373,6 @@ private GetResult innerGetFetch(
if (mapperService.mappingLookup().isSourceEnabled() && fetchSourceContext.fetchSource()) {
Source source = loader.leaf(docIdAndVersion.reader, new int[] { docIdAndVersion.docId })
.source(leafStoredFieldLoader, docIdAndVersion.docId);

if (fetchSourceContext.hasFilter()) {
source = source.filter(fetchSourceContext.filter());
}
sourceBytes = source.internalSourceRef();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParserConfiguration;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -169,7 +170,7 @@ public MalformedValuesLayer(String fieldName) {
@Override
protected void writeValue(Object value, XContentBuilder b) throws IOException {
if (value instanceof BytesRef r) {
XContentDataHelper.decodeAndWrite(b, r);
XContentDataHelper.decodeAndWrite(XContentParserConfiguration.EMPTY, b, r);
} else {
b.value(value);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ public void validate(IndexSettings settings, boolean checkLimits) {
* with the source loading strategy declared on the source field mapper.
*/
try {
sourceMapper().newSourceLoader(mapping(), mapperMetrics.sourceFieldMetrics());
mappingLookup.newSourceLoader(null, mapperMetrics.sourceFieldMetrics());
} catch (IllegalArgumentException e) {
mapperMetrics.sourceFieldMetrics().recordSyntheticSourceIncompatibleMapping();
throw e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,4 @@ public FieldAliasMapper build(MapperBuilderContext context) {
return new FieldAliasMapper(leafName(), fullName, path);
}
}

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
return SourceLoader.SyntheticFieldLoader.NOTHING;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.SourceFilter;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;
Expand Down Expand Up @@ -484,15 +485,14 @@ final SyntheticSourceMode syntheticSourceMode() {
/**
* Returns synthetic field loader for the mapper.
* If mapper does not support synthetic source, it is handled using generic implementation
* in {@link DocumentParser#parseObjectOrField} and {@link ObjectMapper#syntheticFieldLoader()}.
* in {@link DocumentParser#parseObjectOrField} and {@link ObjectMapper#syntheticFieldLoader(SourceFilter)}.
* <br>
*
* This method is final in order to support common use cases like fallback synthetic source.
* Mappers that need custom support of synthetic source should override {@link #syntheticSourceSupport()}.
*
* @return implementation of {@link SourceLoader.SyntheticFieldLoader}
*/
@Override
public final SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
if (hasScript()) {
return SourceLoader.SyntheticFieldLoader.NOTHING;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;

import java.io.IOException;
import java.util.List;
Expand Down Expand Up @@ -128,7 +129,7 @@ public int count() {
public void write(XContentBuilder b) throws IOException {
for (Object v : values) {
if (v instanceof BytesRef r) {
XContentDataHelper.decodeAndWrite(b, r);
XContentDataHelper.decodeAndWrite(XContentParserConfiguration.EMPTY, b, r);
} else {
b.value(v);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
Expand Down Expand Up @@ -281,7 +282,7 @@ public static MappedNameValue decodeAsMap(byte[] value) throws IOException {
IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(bytes);
XContentBuilder xContentBuilder = XContentBuilder.builder(XContentDataHelper.getXContentType(nameValue.value()).xContent());
xContentBuilder.startObject().field(nameValue.name());
XContentDataHelper.decodeAndWrite(xContentBuilder, nameValue.value());
XContentDataHelper.decodeAndWrite(XContentParserConfiguration.EMPTY, xContentBuilder, nameValue.value());
xContentBuilder.endObject();
Tuple<XContentType, Map<String, Object>> result = XContentHelper.convertToMap(BytesReference.bytes(xContentBuilder), true);
return new MappedNameValue(nameValue, result.v1(), result.v2());
Expand Down
12 changes: 0 additions & 12 deletions server/src/main/java/org/elasticsearch/index/mapper/Mapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -163,18 +163,6 @@ public final String leafName() {
*/
public abstract void validate(MappingLookup mappers);

/**
* Create a {@link SourceLoader.SyntheticFieldLoader} to populate synthetic source.
*
* @throws IllegalArgumentException if the field is configured in a way that doesn't
* support synthetic source. This translates nicely into a 400 error when
* users configure synthetic source in the mapping without configuring all
* fields properly.
*/
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
throw new IllegalArgumentException("field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source");
}

@Override
public String toString() {
return Strings.toString(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.search.lookup.SourceFilter;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;

Expand All @@ -22,6 +24,7 @@
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
Expand Down Expand Up @@ -126,9 +129,9 @@ private boolean isSourceSynthetic() {
return sfm != null && sfm.isSynthetic();
}

public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
var stream = Stream.concat(Stream.of(metadataMappers), root.mappers.values().stream());
return root.syntheticFieldLoader(stream);
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(@Nullable SourceFilter filter) {
var mappers = Stream.concat(Stream.of(metadataMappers), root.mappers.values().stream()).collect(Collectors.toList());
return root.syntheticFieldLoader(filter, mappers, false);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
import org.apache.lucene.codecs.PostingsFormat;
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.inference.InferenceService;
import org.elasticsearch.search.lookup.SourceFilter;

import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -497,9 +499,11 @@ public boolean isSourceSynthetic() {
/**
* Build something to load source {@code _source}.
*/
public SourceLoader newSourceLoader(SourceFieldMetrics metrics) {
SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class);
return sfm == null ? SourceLoader.FROM_STORED_SOURCE : sfm.newSourceLoader(mapping, metrics);
public SourceLoader newSourceLoader(@Nullable SourceFilter filter, SourceFieldMetrics metrics) {
if (isSourceSynthetic()) {
return new SourceLoader.Synthetic(() -> mapping.syntheticFieldLoader(filter), metrics);
}
return filter == null ? SourceLoader.FROM_STORED_SOURCE : new SourceLoader.Stored(filter);
}

/**
Expand Down
Loading

0 comments on commit f62f4f6

Please sign in to comment.