From 6e34dca1b7fde637f4a6d6fff60445e2cb594132 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Thu, 16 May 2024 12:19:40 -0700 Subject: [PATCH 01/11] Support DerivedField object type * ValueFetcher for DerivedField of object type to parse and fetch nested fields. * DerivedFieldResolver to resolve DerivedField type on the fly. * Introduction of new support types - text, float. * Use index analyzer for derived fields set in index settings. * Support of derived fields from query string * Unit tests Signed-off-by: Rishabh Maurya --- .../mapper/DefaultDerivedFieldResolver.java | 233 +++++++ .../index/mapper/DerivedFieldMapper.java | 184 ++++-- .../index/mapper/DerivedFieldResolver.java | 36 + .../mapper/DerivedFieldSupportedTypes.java | 95 ++- .../index/mapper/DerivedFieldType.java | 264 +++++++- .../mapper/DerivedFieldValueFetcher.java | 18 +- .../index/mapper/ObjectDerivedFieldType.java | 191 ++++++ .../index/query/DerivedFieldQuery.java | 73 ++- .../index/query/QueryShardContext.java | 45 +- .../VectorGeoPointShapeQueryProcessor.java | 2 +- .../index/search/QueryParserHelper.java | 3 + .../opensearch/script/DerivedFieldScript.java | 4 +- .../org/opensearch/search/SearchService.java | 30 +- .../subphase/highlight/HighlightPhase.java | 4 +- .../highlight/UnifiedHighlighter.java | 6 +- .../mapper/DerivedFieldMapperQueryTests.java | 614 +++++++++++++++++- .../mapper/DerivedFieldResolverTests.java | 469 +++++++++++++ .../index/mapper/DerivedFieldTypeTests.java | 62 +- .../index/query/DerivedFieldQueryTests.java | 154 ++++- .../index/query/QueryShardContextTests.java | 37 +- .../opensearch/search/SearchServiceTests.java | 5 +- .../DerivedFieldFetchAndHighlightTests.java | 239 +++++-- 22 files changed, 2472 insertions(+), 296 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java create mode 100644 server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java create mode 100644 server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java diff --git a/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java new file mode 100644 index 0000000000000..199e98ac81e7b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java @@ -0,0 +1,233 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.regex.Regex; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.Script; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import static org.opensearch.index.mapper.FieldMapper.IGNORE_MALFORMED_SETTING; + +/** + * Accepts definition of DerivedField from search request in both forms: map parsed from SearchRequest and {@link DerivedField} defined using client. + * The object is initialized per search request and is responsible to resolve {@link DerivedFieldType} given a field name. + * It uses {@link FieldTypeInference} to infer field type for a nested field within DerivedField of {@link DerivedFieldSupportedTypes#OBJECT} type. + */ +public class DefaultDerivedFieldResolver implements DerivedFieldResolver { + private final QueryShardContext queryShardContext; + private final Map derivedFieldTypeMap = new ConcurrentHashMap<>(); + private final FieldTypeInference typeInference; + private static final Logger logger = LogManager.getLogger(DefaultDerivedFieldResolver.class); + + public DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields + ) { + this( + queryShardContext, + derivedFieldsObject, + derivedFields, + new FieldTypeInference( + queryShardContext.index().getName(), + queryShardContext.getMapperService(), + queryShardContext.getIndexReader() + ) + ); + } + + public DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields, + FieldTypeInference typeInference + ) { + this.queryShardContext = queryShardContext; + initDerivedFieldTypes(derivedFieldsObject, derivedFields); + this.typeInference = typeInference; + } + + @Override + public Set resolvePattern(String pattern) { + Set derivedFields = new HashSet<>(); + if (queryShardContext != null && queryShardContext.getMapperService() != null) { + for (MappedFieldType fieldType : queryShardContext.getMapperService().fieldTypes()) { + if (Regex.simpleMatch(pattern, fieldType.name()) && fieldType instanceof DerivedFieldType) { + derivedFields.add(fieldType.name()); + } + } + } + for (String fieldName : derivedFieldTypeMap.keySet()) { + if (Regex.simpleMatch(pattern, fieldName)) { + derivedFields.add(fieldName); + } + } + return derivedFields; + } + + /** + * Resolves the fieldName. The search request definitions are given precedence over derived fields definitions in the index mapping. + * It caches the response for previously resolved field names + * @param fieldName name of the field. It also accepts nested derived field + * @return DerivedFieldType if resolved successfully, a null otherwise. + */ + @Override + public DerivedFieldType resolve(String fieldName) { + return Optional.ofNullable(resolveUsingSearchDefinitions(fieldName)).orElseGet(() -> resolveUsingMappings(fieldName)); + } + + private DerivedFieldType resolveUsingSearchDefinitions(String fieldName) { + if (derivedFieldTypeMap.containsKey(fieldName)) { + return derivedFieldTypeMap.get(fieldName); + } + DerivedFieldType resolvedNestedType = resolveNestedField(fieldName); + if (resolvedNestedType != null) { + derivedFieldTypeMap.put(fieldName, resolvedNestedType); + } + return resolvedNestedType; + } + + private DerivedFieldType resolveNestedField(String fieldName) { + DerivedFieldType parentDerivedField = (DerivedFieldType) getParentDerivedField(fieldName); + if (parentDerivedField != null) { + try { + Script script = parentDerivedField.derivedField.getScript(); + String nestedType = explicitTypeFromParent( + parentDerivedField.derivedField, + fieldName.substring(fieldName.indexOf(".") + 1) + ); + if (nestedType == null) { + Mapper inferredFieldMapper = typeInference.infer( + getValueFetcher(fieldName, script, parentDerivedField.derivedField.getIgnoreMalformed()) + ); + if (inferredFieldMapper != null) { + nestedType = inferredFieldMapper.typeName(); + } + } + if (nestedType != null) { + DerivedField derivedField = new DerivedField(fieldName, nestedType, script); + if (parentDerivedField.derivedField.getProperties() != null) { + derivedField.setProperties(parentDerivedField.derivedField.getProperties()); + } + if (parentDerivedField.derivedField.getSourceIndexedField() != null) { + derivedField.setSourceIndexedField(parentDerivedField.derivedField.getSourceIndexedField()); + } + if (parentDerivedField.derivedField.getFormat() != null) { + derivedField.setFormat(parentDerivedField.derivedField.getFormat()); + } + if (parentDerivedField.derivedField.getIgnoreMalformed()) { + derivedField.setIgnoreMalformed(parentDerivedField.derivedField.getIgnoreMalformed()); + } + return getDerivedFieldType(derivedField); + } else { + logger.warn( + "Field type cannot be inferred. Ensure the field {} is not rare across entire index or provide explicit mapping using [properties] under parent object [{}] ", + fieldName, + parentDerivedField.derivedField.getName() + ); + } + } catch (IOException e) { + logger.warn(e.getMessage()); + } + } + return null; + } + + private MappedFieldType getParentDerivedField(String fieldName) { + if (fieldName.contains(".")) { + return resolve(fieldName.split("\\.")[0]); + } + return null; + } + + private static String explicitTypeFromParent(DerivedField parentDerivedField, String subField) { + if (parentDerivedField == null + || parentDerivedField.getProperties() == null + || parentDerivedField.getProperties().isEmpty() + || subField == null + || subField.isEmpty()) { + return null; + } + return parentDerivedField.getProperties().get(subField); + } + + ValueFetcher getValueFetcher(String fieldName, Script script, boolean ignoreMalformed) { + String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1); + return new ObjectDerivedFieldType.ObjectDerivedFieldValueFetcher( + subFieldName, + DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()), + o -> o, // raw object returned will be used to infer the type without modifying it + ignoreMalformed + ); + } + + private void initDerivedFieldTypes(Map derivedFieldsObject, List derivedFields) { + if (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) { + Map derivedFieldObject = new HashMap<>(); + derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject); + derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject)); + } + if (derivedFields != null) { + for (DerivedField derivedField : derivedFields) { + derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField)); + } + } + } + + private Map getAllDerivedFieldTypeFromObject(Map derivedFieldObject) { + Map derivedFieldTypes = new HashMap<>(); + DocumentMapper documentMapper = queryShardContext.getMapperService() + .documentMapperParser() + .parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject); + if (documentMapper != null && documentMapper.mappers() != null) { + for (Mapper mapper : documentMapper.mappers()) { + if (mapper instanceof DerivedFieldMapper) { + DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType(); + derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType); + } + } + } + return derivedFieldTypes; + } + + private DerivedFieldType getDerivedFieldType(DerivedField derivedField) { + Mapper.BuilderContext builderContext = new Mapper.BuilderContext( + queryShardContext.getMapperService().getIndexSettings().getSettings(), + new ContentPath(1) + ); + DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder( + derivedField, + queryShardContext.getMapperService().getIndexAnalyzers(), + null, + IGNORE_MALFORMED_SETTING.getDefault(queryShardContext.getIndexSettings().getSettings()) + ); + return builder.build(builderContext).fieldType(); + } + + private DerivedFieldType resolveUsingMappings(String name) { + if (queryShardContext != null && queryShardContext.getMapperService() != null) { + MappedFieldType mappedFieldType = queryShardContext.getMapperService().fieldType(name); + if (mappedFieldType instanceof DerivedFieldType) { + return (DerivedFieldType) mappedFieldType; + } + } + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java index c6ae71320c35c..85a9a09cf295a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java @@ -9,16 +9,20 @@ package org.opensearch.index.mapper; import org.apache.lucene.index.IndexableField; +import org.opensearch.common.time.DateFormatter; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.script.Script; import java.io.IOException; import java.util.Arrays; -import java.util.HashMap; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.Function; +import static org.opensearch.index.mapper.DateFieldMapper.getDefaultDateTimeFormatter; + /** * A field mapper for derived fields * @@ -28,6 +32,8 @@ public class DerivedFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "derived"; + protected final IndexAnalyzers indexAnalyzers; + private static DerivedFieldMapper toType(FieldMapper in) { return (DerivedFieldMapper) in; } @@ -38,62 +44,184 @@ private static DerivedFieldMapper toType(FieldMapper in) { * @opensearch.internal */ public static class Builder extends ParametrizedFieldMapper.Builder { - // TODO: The type of parameter may change here if the actual underlying FieldType object is needed - private final Parameter type = Parameter.stringParam("type", false, m -> toType(m).type, ""); + private final Parameter type = Parameter.stringParam("type", true, m -> toType(m).type, ""); + private final IndexAnalyzers indexAnalyzers; + private final boolean defaultIgnoreMalformed; + private final DateFormatter defaultDateFormatter; private final Parameter