Skip to content

Commit

Permalink
[DerivedField] Support DerivedField object type and integration with …
Browse files Browse the repository at this point in the history
…Mapper (#13720)

* Support DerivedField object type

* ValueFetcher for DerivedField of object type to parse and fetch nested fields.
* DerivedFieldResolver to resolve DerivedField type on the fly.
* Introduction of new support types - text, float.
* Use index analyzer for derived fields set in index settings.
* Support of derived fields from query string
* Unit tests

Signed-off-by: Rishabh Maurya <[email protected]>

* Derived field object type properties to also support Object as a value

Signed-off-by: Rishabh Maurya <[email protected]>

* Index and cluster setting to control derived fields

Signed-off-by: Rishabh Maurya <[email protected]>

* Fix test

Signed-off-by: Rishabh Maurya <[email protected]>

* Fixed missing javadoc

Signed-off-by: Rishabh Maurya <[email protected]>

* Fix the rewrite issue when profile is set as true

Signed-off-by: Rishabh Maurya <[email protected]>

* Address PR comments

Signed-off-by: Rishabh Maurya <[email protected]>

* Address PR comments

Signed-off-by: Rishabh Maurya <[email protected]>

* Use conjuction query with filter on source_indexed_field query and DerivedFieldQuery

Signed-off-by: Rishabh Maurya <[email protected]>

* support source_indexed_field for non-object type derived fields

Signed-off-by: Rishabh Maurya <[email protected]>

* rename source_indexed_field to prefilter_field

Signed-off-by: Rishabh Maurya <[email protected]>

---------

Signed-off-by: Rishabh Maurya <[email protected]>
(cherry picked from commit 9da6170)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
github-actions[bot] committed Jun 9, 2024
1 parent 78b30e2 commit 1fb514a
Show file tree
Hide file tree
Showing 30 changed files with 2,736 additions and 320 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,8 @@ public void apply(Settings value, Settings current, Settings previous) {
RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING,
RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING,
RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS,
RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA
RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA,
SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING
)
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {

// Settings for concurrent segment search
IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING,

IndexSettings.ALLOW_DERIVED_FIELDS,
// validate that built-in similarities don't get redefined
Setting.groupSetting("index.similarity.", (s) -> {
Map<String, Settings> groups = s.getAsGroups();
Expand Down
22 changes: 22 additions & 0 deletions server/src/main/java/org/opensearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,14 @@ public static IndexMergePolicy fromString(String text) {
true,
Property.IndexScope
);

public static final Setting<Boolean> ALLOW_DERIVED_FIELDS = Setting.boolSetting(
"index.query.derived_field.enabled",
true,
Property.Dynamic,
Property.IndexScope
);

public static final Setting<TimeValue> INDEX_TRANSLOG_SYNC_INTERVAL_SETTING = Setting.timeSetting(
"index.translog.sync_interval",
TimeValue.timeValueSeconds(5),
Expand Down Expand Up @@ -763,6 +771,7 @@ public static IndexMergePolicy fromString(String text) {
private final boolean assignedOnRemoteNode;
private final RemoteStorePathStrategy remoteStorePathStrategy;
private final boolean isTranslogMetadataEnabled;
private volatile boolean allowDerivedField;

/**
* The maximum age of a retention lease before it is considered expired.
Expand Down Expand Up @@ -856,6 +865,10 @@ private void setDefaultFields(List<String> defaultFields) {
this.defaultFields = defaultFields;
}

private void setAllowDerivedField(boolean allowDerivedField) {
this.allowDerivedField = allowDerivedField;
}

/**
* Returns <code>true</code> if query string parsing should be lenient. The default is <code>false</code>
*/
Expand Down Expand Up @@ -884,6 +897,13 @@ public boolean isDefaultAllowUnmappedFields() {
return defaultAllowUnmappedFields;
}

/**
* Returns <code>true</code> if queries are allowed to define and use derived fields. The default is <code>true</code>
*/
public boolean isDerivedFieldAllowed() {
return allowDerivedField;
}

/**
* Creates a new {@link IndexSettings} instance. The given node settings will be merged with the settings in the metadata
* while index level settings will overwrite node settings.
Expand Down Expand Up @@ -930,6 +950,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
this.queryStringAnalyzeWildcard = QUERY_STRING_ANALYZE_WILDCARD.get(nodeSettings);
this.queryStringAllowLeadingWildcard = QUERY_STRING_ALLOW_LEADING_WILDCARD.get(nodeSettings);
this.defaultAllowUnmappedFields = scopedSettings.get(ALLOW_UNMAPPED);
this.allowDerivedField = scopedSettings.get(ALLOW_DERIVED_FIELDS);
this.durability = scopedSettings.get(INDEX_TRANSLOG_DURABILITY_SETTING);
defaultFields = scopedSettings.get(DEFAULT_FIELD_SETTING);
syncInterval = INDEX_TRANSLOG_SYNC_INTERVAL_SETTING.get(settings);
Expand Down Expand Up @@ -1104,6 +1125,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
INDEX_DOC_ID_FUZZY_SET_FALSE_POSITIVE_PROBABILITY_SETTING,
this::setDocIdFuzzySetFalsePositiveProbability
);
scopedSettings.addSettingsUpdateConsumer(ALLOW_DERIVED_FIELDS, this::setAllowDerivedField);
}

private void setSearchIdleAfter(TimeValue searchIdleAfter) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.common.regex.Regex;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.script.Script;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import static org.opensearch.index.mapper.FieldMapper.IGNORE_MALFORMED_SETTING;

/**
* Accepts definition of DerivedField from search request in both forms: map parsed from SearchRequest and {@link DerivedField} defined using client.
* The object is initialized per search request and is responsible to resolve {@link DerivedFieldType} given a field name.
* It uses {@link FieldTypeInference} to infer field type for a nested field within DerivedField of {@link DerivedFieldSupportedTypes#OBJECT} type.
*/
public class DefaultDerivedFieldResolver implements DerivedFieldResolver {
private final QueryShardContext queryShardContext;
private final Map<String, DerivedFieldType> derivedFieldTypeMap = new ConcurrentHashMap<>();
private final FieldTypeInference typeInference;
private static final Logger logger = LogManager.getLogger(DefaultDerivedFieldResolver.class);

DefaultDerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields
) {
this(
queryShardContext,
derivedFieldsObject,
derivedFields,
new FieldTypeInference(
queryShardContext.index().getName(),
queryShardContext.getMapperService(),
queryShardContext.getIndexReader()
)
);
}

DefaultDerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields,
FieldTypeInference typeInference
) {
this.queryShardContext = queryShardContext;
initDerivedFieldTypes(derivedFieldsObject, derivedFields);
this.typeInference = typeInference;
}

@Override
public Set<String> resolvePattern(String pattern) {
Set<String> derivedFields = new HashSet<>();
if (queryShardContext != null && queryShardContext.getMapperService() != null) {
for (MappedFieldType fieldType : queryShardContext.getMapperService().fieldTypes()) {
if (Regex.simpleMatch(pattern, fieldType.name()) && fieldType instanceof DerivedFieldType) {
derivedFields.add(fieldType.name());
}
}
}
for (String fieldName : derivedFieldTypeMap.keySet()) {
if (Regex.simpleMatch(pattern, fieldName)) {
derivedFields.add(fieldName);
}
}
return derivedFields;
}

/**
* Resolves the fieldName. The search request definitions are given precedence over derived fields definitions in the index mapping.
* It caches the response for previously resolved field names
* @param fieldName name of the field. It also accepts nested derived field
* @return DerivedFieldType if resolved successfully, a null otherwise.
*/
@Override
public DerivedFieldType resolve(String fieldName) {
return Optional.ofNullable(resolveUsingSearchDefinitions(fieldName)).orElseGet(() -> resolveUsingMappings(fieldName));
}

private DerivedFieldType resolveUsingSearchDefinitions(String fieldName) {
return Optional.ofNullable(derivedFieldTypeMap.get(fieldName))
.orElseGet(
() -> Optional.ofNullable((DerivedFieldType) getParentDerivedField(fieldName))
.map(
// compute and cache nested derived field
parentDerivedField -> derivedFieldTypeMap.computeIfAbsent(
fieldName,
f -> this.resolveNestedField(f, parentDerivedField)
)
)
.orElse(null)
);
}

private DerivedFieldType resolveNestedField(String fieldName, DerivedFieldType parentDerivedField) {
Objects.requireNonNull(parentDerivedField);
try {
Script script = parentDerivedField.derivedField.getScript();
String nestedType = explicitTypeFromParent(parentDerivedField.derivedField, fieldName.substring(fieldName.indexOf(".") + 1));
if (nestedType == null) {
Mapper inferredFieldMapper = typeInference.infer(
getValueFetcher(fieldName, script, parentDerivedField.derivedField.getIgnoreMalformed())
);
if (inferredFieldMapper != null) {
nestedType = inferredFieldMapper.typeName();
}
}
if (nestedType != null) {
DerivedField derivedField = new DerivedField(fieldName, nestedType, script);
if (parentDerivedField.derivedField.getProperties() != null) {
derivedField.setProperties(parentDerivedField.derivedField.getProperties());
}
if (parentDerivedField.derivedField.getPrefilterField() != null) {
derivedField.setPrefilterField(parentDerivedField.derivedField.getPrefilterField());
}
if (parentDerivedField.derivedField.getFormat() != null) {
derivedField.setFormat(parentDerivedField.derivedField.getFormat());
}
if (parentDerivedField.derivedField.getIgnoreMalformed()) {
derivedField.setIgnoreMalformed(parentDerivedField.derivedField.getIgnoreMalformed());
}
return getDerivedFieldType(derivedField);
} else {
logger.warn(
"Field type cannot be inferred. Ensure the field {} is not rare across entire index or provide explicit mapping using [properties] under parent object [{}] ",
fieldName,
parentDerivedField.derivedField.getName()
);
}
} catch (IOException e) {
logger.warn(e.getMessage());
}
return null;
}

private MappedFieldType getParentDerivedField(String fieldName) {
if (fieldName.contains(".")) {
return resolve(fieldName.split("\\.")[0]);
}
return null;
}

private static String explicitTypeFromParent(DerivedField parentDerivedField, String subField) {
if (parentDerivedField == null) {
return null;
}
return parentDerivedField.getNestedFieldType(subField);
}

ValueFetcher getValueFetcher(String fieldName, Script script, boolean ignoreMalformed) {
String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1);
return new ObjectDerivedFieldType.ObjectDerivedFieldValueFetcher(
subFieldName,
DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()),
o -> o, // raw object returned will be used to infer the type without modifying it
ignoreMalformed
);
}

private void initDerivedFieldTypes(Map<String, Object> derivedFieldsObject, List<DerivedField> derivedFields) {
if (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) {
Map<String, Object> derivedFieldObject = new HashMap<>();
derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject);
derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject));
}
if (derivedFields != null) {
for (DerivedField derivedField : derivedFields) {
derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField));
}
}
}

private Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject(Map<String, Object> derivedFieldObject) {
Map<String, DerivedFieldType> derivedFieldTypes = new HashMap<>();
DocumentMapper documentMapper = queryShardContext.getMapperService()
.documentMapperParser()
.parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject);
if (documentMapper != null && documentMapper.mappers() != null) {
for (Mapper mapper : documentMapper.mappers()) {
if (mapper instanceof DerivedFieldMapper) {
DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType();
derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType);
}
}
}
return derivedFieldTypes;
}

private DerivedFieldType getDerivedFieldType(DerivedField derivedField) {
Mapper.BuilderContext builderContext = new Mapper.BuilderContext(
queryShardContext.getMapperService().getIndexSettings().getSettings(),
new ContentPath(1)
);
DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder(
derivedField,
queryShardContext.getMapperService().getIndexAnalyzers(),
null,
IGNORE_MALFORMED_SETTING.getDefault(queryShardContext.getIndexSettings().getSettings())
);
return builder.build(builderContext).fieldType();
}

private DerivedFieldType resolveUsingMappings(String name) {
if (queryShardContext != null && queryShardContext.getMapperService() != null) {
MappedFieldType mappedFieldType = queryShardContext.getMapperService().fieldType(name);
if (mappedFieldType instanceof DerivedFieldType) {
return (DerivedFieldType) mappedFieldType;
}
}
return null;
}
}
29 changes: 18 additions & 11 deletions server/src/main/java/org/opensearch/index/mapper/DerivedField.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class DerivedField implements Writeable, ToXContentFragment {
private final String name;
private final String type;
private final Script script;
private String sourceIndexedField;
private String prefilterField;
private Map<String, Object> properties;
private Boolean ignoreMalformed;
private String format;
Expand All @@ -49,7 +49,7 @@ public DerivedField(StreamInput in) throws IOException {
if (in.readBoolean()) {
properties = in.readMap();
}
sourceIndexedField = in.readOptionalString();
prefilterField = in.readOptionalString();
format = in.readOptionalString();
ignoreMalformed = in.readOptionalBoolean();
}
Expand All @@ -67,7 +67,7 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(true);
out.writeMap(properties);
}
out.writeOptionalString(sourceIndexedField);
out.writeOptionalString(prefilterField);
out.writeOptionalString(format);
out.writeOptionalBoolean(ignoreMalformed);
}
Expand All @@ -81,8 +81,8 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
if (properties != null) {
builder.field("properties", properties);
}
if (sourceIndexedField != null) {
builder.field("source_indexed_field", sourceIndexedField);
if (prefilterField != null) {
builder.field("prefilter_field", prefilterField);
}
if (format != null) {
builder.field("format", format);
Expand Down Expand Up @@ -110,8 +110,15 @@ public Map<String, Object> getProperties() {
return properties;
}

public String getSourceIndexedField() {
return sourceIndexedField;
public String getNestedFieldType(String fieldName) {
if (properties == null || properties.isEmpty() || fieldName == null || fieldName.isEmpty()) {
return null;
}
return (String) properties.get(fieldName);
}

public String getPrefilterField() {
return prefilterField;
}

public String getFormat() {
Expand All @@ -126,8 +133,8 @@ public void setProperties(Map<String, Object> properties) {
this.properties = properties;
}

public void setSourceIndexedField(String sourceIndexedField) {
this.sourceIndexedField = sourceIndexedField;
public void setPrefilterField(String prefilterField) {
this.prefilterField = prefilterField;
}

public void setFormat(String format) {
Expand All @@ -140,7 +147,7 @@ public void setIgnoreMalformed(boolean ignoreMalformed) {

@Override
public int hashCode() {
return Objects.hash(name, type, script, sourceIndexedField, properties, ignoreMalformed, format);
return Objects.hash(name, type, script, prefilterField, properties, ignoreMalformed, format);
}

@Override
Expand All @@ -155,7 +162,7 @@ public boolean equals(Object obj) {
return Objects.equals(name, other.name)
&& Objects.equals(type, other.type)
&& Objects.equals(script, other.script)
&& Objects.equals(sourceIndexedField, other.sourceIndexedField)
&& Objects.equals(prefilterField, other.prefilterField)
&& Objects.equals(properties, other.properties)
&& Objects.equals(ignoreMalformed, other.ignoreMalformed)
&& Objects.equals(format, other.format);
Expand Down
Loading

0 comments on commit 1fb514a

Please sign in to comment.