Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DerivedField] Support DerivedField object type and integration with Mapper #13720

Merged
merged 11 commits into from
Jun 6, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,8 @@ public void apply(Settings value, Settings current, Settings previous) {
RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING,
RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING,
RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS,
RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA
RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA,
SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING
)
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {

// Settings for concurrent segment search
IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING,

IndexSettings.ALLOW_DERIVED_FIELDS,
// validate that built-in similarities don't get redefined
Setting.groupSetting("index.similarity.", (s) -> {
Map<String, Settings> groups = s.getAsGroups();
Expand Down
22 changes: 22 additions & 0 deletions server/src/main/java/org/opensearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ public static IndexMergePolicy fromString(String text) {
true,
Property.IndexScope
);

public static final Setting<Boolean> ALLOW_DERIVED_FIELDS = Setting.boolSetting(
"index.query.derived_field.enabled",
true,
Property.Dynamic,
Property.IndexScope
);

public static final Setting<TimeValue> INDEX_TRANSLOG_SYNC_INTERVAL_SETTING = Setting.timeSetting(
"index.translog.sync_interval",
TimeValue.timeValueSeconds(5),
Expand Down Expand Up @@ -763,6 +771,7 @@ public static IndexMergePolicy fromString(String text) {
private final boolean assignedOnRemoteNode;
private final RemoteStorePathStrategy remoteStorePathStrategy;
private final boolean isTranslogMetadataEnabled;
private volatile boolean allowDerivedField;

/**
* The maximum age of a retention lease before it is considered expired.
Expand Down Expand Up @@ -856,6 +865,10 @@ private void setDefaultFields(List<String> defaultFields) {
this.defaultFields = defaultFields;
}

private void setAllowDerivedField(boolean allowDerivedField) {
this.allowDerivedField = allowDerivedField;
}

/**
* Returns <code>true</code> if query string parsing should be lenient. The default is <code>false</code>
*/
Expand Down Expand Up @@ -884,6 +897,13 @@ public boolean isDefaultAllowUnmappedFields() {
return defaultAllowUnmappedFields;
}

/**
* Returns <code>true</code> if queries are allowed to define and use derived fields. The default is <code>true</code>
*/
public boolean isDerivedFieldAllowed() {
return allowDerivedField;
}

/**
* Creates a new {@link IndexSettings} instance. The given node settings will be merged with the settings in the metadata
* while index level settings will overwrite node settings.
Expand Down Expand Up @@ -931,6 +951,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
this.queryStringAnalyzeWildcard = QUERY_STRING_ANALYZE_WILDCARD.get(nodeSettings);
this.queryStringAllowLeadingWildcard = QUERY_STRING_ALLOW_LEADING_WILDCARD.get(nodeSettings);
this.defaultAllowUnmappedFields = scopedSettings.get(ALLOW_UNMAPPED);
this.allowDerivedField = scopedSettings.get(ALLOW_DERIVED_FIELDS);
this.durability = scopedSettings.get(INDEX_TRANSLOG_DURABILITY_SETTING);
defaultFields = scopedSettings.get(DEFAULT_FIELD_SETTING);
syncInterval = INDEX_TRANSLOG_SYNC_INTERVAL_SETTING.get(settings);
Expand Down Expand Up @@ -1105,6 +1126,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
INDEX_DOC_ID_FUZZY_SET_FALSE_POSITIVE_PROBABILITY_SETTING,
this::setDocIdFuzzySetFalsePositiveProbability
);
scopedSettings.addSettingsUpdateConsumer(ALLOW_DERIVED_FIELDS, this::setAllowDerivedField);
}

private void setSearchIdleAfter(TimeValue searchIdleAfter) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.common.regex.Regex;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.script.Script;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import static org.opensearch.index.mapper.FieldMapper.IGNORE_MALFORMED_SETTING;

/**
* Accepts definition of DerivedField from search request in both forms: map parsed from SearchRequest and {@link DerivedField} defined using client.
* The object is initialized per search request and is responsible to resolve {@link DerivedFieldType} given a field name.
* It uses {@link FieldTypeInference} to infer field type for a nested field within DerivedField of {@link DerivedFieldSupportedTypes#OBJECT} type.
*/
public class DefaultDerivedFieldResolver implements DerivedFieldResolver {
private final QueryShardContext queryShardContext;
private final Map<String, DerivedFieldType> derivedFieldTypeMap = new ConcurrentHashMap<>();
msfroh marked this conversation as resolved.
Show resolved Hide resolved
private final FieldTypeInference typeInference;
private static final Logger logger = LogManager.getLogger(DefaultDerivedFieldResolver.class);

DefaultDerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields
) {
this(
queryShardContext,
derivedFieldsObject,
derivedFields,
new FieldTypeInference(
queryShardContext.index().getName(),
queryShardContext.getMapperService(),
queryShardContext.getIndexReader()
)
);
}

DefaultDerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields,
FieldTypeInference typeInference
) {
this.queryShardContext = queryShardContext;
initDerivedFieldTypes(derivedFieldsObject, derivedFields);
this.typeInference = typeInference;
}

@Override
public Set<String> resolvePattern(String pattern) {
Set<String> derivedFields = new HashSet<>();
if (queryShardContext != null && queryShardContext.getMapperService() != null) {
for (MappedFieldType fieldType : queryShardContext.getMapperService().fieldTypes()) {
if (Regex.simpleMatch(pattern, fieldType.name()) && fieldType instanceof DerivedFieldType) {
derivedFields.add(fieldType.name());
}
}
}
for (String fieldName : derivedFieldTypeMap.keySet()) {
if (Regex.simpleMatch(pattern, fieldName)) {
derivedFields.add(fieldName);
}
}
return derivedFields;
}

/**
* Resolves the fieldName. The search request definitions are given precedence over derived fields definitions in the index mapping.
* It caches the response for previously resolved field names
* @param fieldName name of the field. It also accepts nested derived field
* @return DerivedFieldType if resolved successfully, a null otherwise.
*/
@Override
public DerivedFieldType resolve(String fieldName) {
return Optional.ofNullable(resolveUsingSearchDefinitions(fieldName)).orElseGet(() -> resolveUsingMappings(fieldName));
}

private DerivedFieldType resolveUsingSearchDefinitions(String fieldName) {
return Optional.ofNullable(derivedFieldTypeMap.get(fieldName))
.orElseGet(
() -> Optional.ofNullable((DerivedFieldType) getParentDerivedField(fieldName))
.map(
// compute and cache nested derived field
parentDerivedField -> derivedFieldTypeMap.computeIfAbsent(
fieldName,
f -> this.resolveNestedField(f, parentDerivedField)
)
)
.orElse(null)
);
}

private DerivedFieldType resolveNestedField(String fieldName, DerivedFieldType parentDerivedField) {
Objects.requireNonNull(parentDerivedField);
try {
Script script = parentDerivedField.derivedField.getScript();
String nestedType = explicitTypeFromParent(parentDerivedField.derivedField, fieldName.substring(fieldName.indexOf(".") + 1));
if (nestedType == null) {
Mapper inferredFieldMapper = typeInference.infer(
getValueFetcher(fieldName, script, parentDerivedField.derivedField.getIgnoreMalformed())
);
if (inferredFieldMapper != null) {
nestedType = inferredFieldMapper.typeName();
}
}
if (nestedType != null) {
DerivedField derivedField = new DerivedField(fieldName, nestedType, script);
if (parentDerivedField.derivedField.getProperties() != null) {
derivedField.setProperties(parentDerivedField.derivedField.getProperties());
}
if (parentDerivedField.derivedField.getPrefilterField() != null) {
derivedField.setPrefilterField(parentDerivedField.derivedField.getPrefilterField());
}
if (parentDerivedField.derivedField.getFormat() != null) {
derivedField.setFormat(parentDerivedField.derivedField.getFormat());
}
if (parentDerivedField.derivedField.getIgnoreMalformed()) {
derivedField.setIgnoreMalformed(parentDerivedField.derivedField.getIgnoreMalformed());
}
return getDerivedFieldType(derivedField);
} else {
logger.warn(
"Field type cannot be inferred. Ensure the field {} is not rare across entire index or provide explicit mapping using [properties] under parent object [{}] ",
fieldName,
parentDerivedField.derivedField.getName()
);
}
} catch (IOException e) {
logger.warn(e.getMessage());
}
return null;
}

private MappedFieldType getParentDerivedField(String fieldName) {
if (fieldName.contains(".")) {
return resolve(fieldName.split("\\.")[0]);
}
return null;
}

private static String explicitTypeFromParent(DerivedField parentDerivedField, String subField) {
if (parentDerivedField == null) {
return null;

Check warning on line 162 in server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java#L162

Added line #L162 was not covered by tests
}
return parentDerivedField.getNestedFieldType(subField);
}

ValueFetcher getValueFetcher(String fieldName, Script script, boolean ignoreMalformed) {
String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1);
return new ObjectDerivedFieldType.ObjectDerivedFieldValueFetcher(
subFieldName,
DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()),
o -> o, // raw object returned will be used to infer the type without modifying it
ignoreMalformed
);
}

private void initDerivedFieldTypes(Map<String, Object> derivedFieldsObject, List<DerivedField> derivedFields) {
if (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) {
Map<String, Object> derivedFieldObject = new HashMap<>();
derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject);
derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject));
}
if (derivedFields != null) {
for (DerivedField derivedField : derivedFields) {
derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField));
}
}
}

private Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject(Map<String, Object> derivedFieldObject) {
Map<String, DerivedFieldType> derivedFieldTypes = new HashMap<>();
DocumentMapper documentMapper = queryShardContext.getMapperService()
.documentMapperParser()
.parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject);
if (documentMapper != null && documentMapper.mappers() != null) {
for (Mapper mapper : documentMapper.mappers()) {
if (mapper instanceof DerivedFieldMapper) {
DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType();
derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType);
}
}
}
return derivedFieldTypes;
}

private DerivedFieldType getDerivedFieldType(DerivedField derivedField) {
Mapper.BuilderContext builderContext = new Mapper.BuilderContext(
queryShardContext.getMapperService().getIndexSettings().getSettings(),
new ContentPath(1)
);
DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder(
derivedField,
queryShardContext.getMapperService().getIndexAnalyzers(),
null,
IGNORE_MALFORMED_SETTING.getDefault(queryShardContext.getIndexSettings().getSettings())
);
return builder.build(builderContext).fieldType();
}

private DerivedFieldType resolveUsingMappings(String name) {
if (queryShardContext != null && queryShardContext.getMapperService() != null) {
MappedFieldType mappedFieldType = queryShardContext.getMapperService().fieldType(name);
if (mappedFieldType instanceof DerivedFieldType) {
return (DerivedFieldType) mappedFieldType;
}
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class DerivedField implements Writeable, ToXContentFragment {
private final String name;
private final String type;
private final Script script;
private String sourceIndexedField;
private String prefilterField;
private Map<String, Object> properties;
private Boolean ignoreMalformed;
private String format;
Expand All @@ -49,7 +49,7 @@ public DerivedField(StreamInput in) throws IOException {
if (in.readBoolean()) {
properties = in.readMap();
}
sourceIndexedField = in.readOptionalString();
prefilterField = in.readOptionalString();
format = in.readOptionalString();
ignoreMalformed = in.readOptionalBoolean();
}
Expand All @@ -67,7 +67,7 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(true);
out.writeMap(properties);
}
out.writeOptionalString(sourceIndexedField);
out.writeOptionalString(prefilterField);
out.writeOptionalString(format);
out.writeOptionalBoolean(ignoreMalformed);
}
Expand All @@ -81,8 +81,8 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
if (properties != null) {
builder.field("properties", properties);
}
if (sourceIndexedField != null) {
builder.field("source_indexed_field", sourceIndexedField);
if (prefilterField != null) {
builder.field("prefilter_field", prefilterField);
}
if (format != null) {
builder.field("format", format);
Expand Down Expand Up @@ -110,8 +110,15 @@ public Map<String, Object> getProperties() {
return properties;
}

public String getSourceIndexedField() {
return sourceIndexedField;
public String getNestedFieldType(String fieldName) {
if (properties == null || properties.isEmpty() || fieldName == null || fieldName.isEmpty()) {
return null;
}
return (String) properties.get(fieldName);
}

public String getPrefilterField() {
return prefilterField;
}

public String getFormat() {
Expand All @@ -126,8 +133,8 @@ public void setProperties(Map<String, Object> properties) {
this.properties = properties;
}

public void setSourceIndexedField(String sourceIndexedField) {
this.sourceIndexedField = sourceIndexedField;
public void setPrefilterField(String prefilterField) {
this.prefilterField = prefilterField;
}

public void setFormat(String format) {
Expand All @@ -140,7 +147,7 @@ public void setIgnoreMalformed(boolean ignoreMalformed) {

@Override
public int hashCode() {
return Objects.hash(name, type, script, sourceIndexedField, properties, ignoreMalformed, format);
return Objects.hash(name, type, script, prefilterField, properties, ignoreMalformed, format);
}

@Override
Expand All @@ -155,7 +162,7 @@ public boolean equals(Object obj) {
return Objects.equals(name, other.name)
&& Objects.equals(type, other.type)
&& Objects.equals(script, other.script)
&& Objects.equals(sourceIndexedField, other.sourceIndexedField)
&& Objects.equals(prefilterField, other.prefilterField)
&& Objects.equals(properties, other.properties)
&& Objects.equals(ignoreMalformed, other.ignoreMalformed)
&& Objects.equals(format, other.format);
Expand Down
Loading
Loading