diff --git a/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java new file mode 100644 index 0000000000000..9cabadc8d93ad --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DefaultDerivedFieldResolver.java @@ -0,0 +1,185 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.regex.Regex; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.Script; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Accepts definition of DerivedField from search request in both forms: map parsed from SearchRequest and {@link DerivedField} defined using client. + * The object is initialized per search request and is responsible to resolve {@link DerivedFieldType} given a field name. + * It uses {@link FieldTypeInference} to infer field type for a nested field within DerivedField of {@link DerivedFieldSupportedTypes#OBJECT} type. + */ +public class DefaultDerivedFieldResolver implements DerivedFieldResolver { + private final QueryShardContext queryShardContext; + private final Map derivedFieldTypeMap = new ConcurrentHashMap<>(); + private final FieldTypeInference typeInference; + private static final Logger logger = LogManager.getLogger(DefaultDerivedFieldResolver.class); + + public DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields + ) { + this( + queryShardContext, + derivedFieldsObject, + derivedFields, + new FieldTypeInference( + queryShardContext.index().getName(), + queryShardContext.getMapperService(), + queryShardContext.getIndexReader() + ) + ); + } + + public DefaultDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields, + FieldTypeInference typeInference + ) { + this.queryShardContext = queryShardContext; + initDerivedFieldTypes(derivedFieldsObject, derivedFields); + this.typeInference = typeInference; + } + + @Override + public Set resolvePattern(String pattern) { + Set derivedFields = new HashSet<>(); + for (MappedFieldType fieldType : queryShardContext.getMapperService().fieldTypes()) { + if (Regex.simpleMatch(pattern, fieldType.name()) && fieldType instanceof DerivedFieldType) { + derivedFields.add(fieldType.name()); + } + } + for (String fieldName : derivedFieldTypeMap.keySet()) { + if (Regex.simpleMatch(pattern, fieldName)) { + derivedFields.add(fieldName); + } + } + return derivedFields; + } + + /** + * Resolves the fieldName. The search request definitions are given precedence over derived fields definitions in the index mapping. + * It caches the response for previously resolved field names + * @param fieldName name of the field. It also accepts nested derived field + * @return DerivedFieldType if resolved successfully, a null otherwise. + */ + @Override + public DerivedFieldType resolve(String fieldName) { + return Optional.ofNullable(resolveUsingSearchDefinitions(fieldName)) + .orElseGet(() -> (DerivedFieldType) queryShardContext.getMapperService().fieldType(fieldName)); + } + + private DerivedFieldType resolveUsingSearchDefinitions(String fieldName) { + if (derivedFieldTypeMap.containsKey(fieldName)) { + return derivedFieldTypeMap.get(fieldName); + } + DerivedFieldType resolvedNestedType = resolveNestedField(fieldName); + if (resolvedNestedType != null) { + derivedFieldTypeMap.put(fieldName, resolvedNestedType); + } + return resolvedNestedType; + } + + private DerivedFieldType resolveNestedField(String fieldName) { + DerivedFieldType parentDerivedField = (DerivedFieldType) getParentDerivedField(fieldName); + if (parentDerivedField != null) { + try { + Script script = parentDerivedField.derivedField.getScript(); + Mapper inferredFieldMapper = typeInference.infer(getValueFetcher(fieldName, script)); + if (inferredFieldMapper != null) { + return getDerivedFieldType( + new DerivedField( + fieldName, + inferredFieldMapper.typeName(), + script, + parentDerivedField.derivedField.getSourceIndexedField() + ) + ); + } else { + logger.warn("Field type cannot be inferred. Ensure the field {} is not rare across entire index", fieldName); + } + } catch (IOException e) { + logger.warn(e); + } + } + return null; + } + + private MappedFieldType getParentDerivedField(String fieldName) { + if (fieldName.contains(".")) { + return resolve(fieldName.split("\\.")[0]); + } + return null; + } + + ValueFetcher getValueFetcher(String fieldName, Script script) { + String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1); + return new ObjectDerivedFieldType.ObjectDerivedFieldValueFetcher( + subFieldName, + DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()), + o -> o // raw object returned will be used to infer the type without modifying it + ); + } + + private void initDerivedFieldTypes(Map derivedFieldsObject, List derivedFields) { + if (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) { + Map derivedFieldObject = new HashMap<>(); + derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject); + derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject)); + } + if (derivedFields != null) { + for (DerivedField derivedField : derivedFields) { + derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField)); + } + } + } + + private Map getAllDerivedFieldTypeFromObject(Map derivedFieldObject) { + Map derivedFieldTypes = new HashMap<>(); + DocumentMapper documentMapper = queryShardContext.getMapperService() + .documentMapperParser() + .parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject); + if (documentMapper != null && documentMapper.mappers() != null) { + for (Mapper mapper : documentMapper.mappers()) { + if (mapper instanceof DerivedFieldMapper) { + DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType(); + derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType); + } + } + } + return derivedFieldTypes; + } + + private DerivedFieldType getDerivedFieldType(DerivedField derivedField) { + Mapper.BuilderContext builderContext = new Mapper.BuilderContext( + queryShardContext.getMapperService().getIndexSettings().getSettings(), + new ContentPath(1) + ); + DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder( + derivedField, + queryShardContext.getMapperService().getIndexAnalyzers() + ); + return builder.build(builderContext).fieldType(); + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java index 9e1ac046aa56e..6f59ba36e3ef8 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldMapper.java @@ -85,7 +85,7 @@ public DerivedFieldMapper build(BuilderContext context) { ); DerivedFieldType ft; if (name.contains(".")) { - ft = new DerivedObjectFieldType( + ft = new ObjectDerivedFieldType( new DerivedField(buildFullName(context), type.getValue(), script.getValue(), sourceIndexedField.getValue()), fieldMapper, fieldFunction, diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java index 352ee1a0a7079..04fff0c8eb50f 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldResolver.java @@ -8,169 +8,29 @@ package org.opensearch.index.mapper; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.opensearch.common.annotation.PublicApi; -import org.opensearch.common.regex.Regex; -import org.opensearch.index.query.QueryShardContext; -import org.opensearch.script.Script; -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; +/** + * DerivedFieldResolver is used as a lookup to resolve derived fields from their name. + * It is created per search request and needs to be set at {@link org.opensearch.index.query.QueryShardContext#setDerivedFieldResolver(DerivedFieldResolver)} + * for derived fields resolution. + */ @PublicApi(since = "2.15.0") -public class DerivedFieldResolver { - private final QueryShardContext queryShardContext; - private final Map derivedFieldTypeMap = new ConcurrentHashMap<>(); - private final FieldTypeInference typeInference; - private static final Logger logger = LogManager.getLogger(DerivedFieldResolver.class); - - public DerivedFieldResolver( - QueryShardContext queryShardContext, - Map derivedFieldsObject, - List derivedFields - ) { - this( - queryShardContext, - derivedFieldsObject, - derivedFields, - new FieldTypeInference( - queryShardContext.index().getName(), - queryShardContext.getMapperService(), - queryShardContext.getIndexReader() - ) - ); - } - - public DerivedFieldResolver( - QueryShardContext queryShardContext, - Map derivedFieldsObject, - List derivedFields, - FieldTypeInference typeInference - ) { - this.queryShardContext = queryShardContext; - initializeDerivedFieldTypes(derivedFieldsObject); - initializeDerivedFieldTypesFromList(derivedFields); - this.typeInference = typeInference; - } - - private void initializeDerivedFieldTypes(Map derivedFieldsObject) { - if (derivedFieldsObject != null) { - Map derivedFieldObject = new HashMap<>(); - derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject); - derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject)); - } - } - - private void initializeDerivedFieldTypesFromList(List derivedFields) { - if (derivedFields != null) { - for (DerivedField derivedField : derivedFields) { - derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField)); - } - } - } - - public Set resolvePattern(String pattern) { - Set matchingDerivedFields = new HashSet<>(); - for (String fieldName : derivedFieldTypeMap.keySet()) { - if (!matchingDerivedFields.contains(fieldName) && Regex.simpleMatch(pattern, fieldName)) { - matchingDerivedFields.add(fieldName); - } - } - return matchingDerivedFields; - } - - public MappedFieldType resolve(String fieldName) { - MappedFieldType fieldType = derivedFieldTypeMap.get(fieldName); - if (fieldType != null) { - return fieldType; - } - - fieldType = queryShardContext.getMapperService().fieldType(fieldName); - if (fieldType != null) { - return fieldType; - } - - if (fieldName.contains(".")) { - return resolveNestedField(fieldName); - } - return null; - } - - private MappedFieldType resolveNestedField(String fieldName) { - DerivedFieldType parentDerivedField = getParentDerivedField(fieldName); - if (parentDerivedField == null) { - return null; - } - ValueFetcher valueFetcher = getValueFetcher(fieldName, parentDerivedField.derivedField.getScript()); - Mapper inferredFieldMapper; - try { - inferredFieldMapper = typeInference.infer(valueFetcher); - } catch (IOException e) { - logger.warn(e); - return null; - } - if (inferredFieldMapper == null) { - return null; - } - return getDerivedFieldType( - new DerivedField( - fieldName, - inferredFieldMapper.typeName(), - parentDerivedField.derivedField.getScript(), - parentDerivedField.derivedField.getSourceIndexedField() - ) - ); - } - - private DerivedFieldType getParentDerivedField(String fieldName) { - String parentFieldName = fieldName.split("\\.")[0]; - DerivedFieldType parentDerivedFieldType = (DerivedFieldType) derivedFieldTypeMap.get(parentFieldName); - if (parentDerivedFieldType == null) { - parentDerivedFieldType = (DerivedFieldType) queryShardContext.getMapperService().fieldType(parentFieldName); - } - return parentDerivedFieldType; - } - - private ValueFetcher getValueFetcher(String fieldName, Script script) { - String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1); - return new DerivedObjectFieldType.DerivedObjectFieldValueFetcher( - subFieldName, - DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()), - o -> o // raw object returned will be used to infer the type without modifying it - ); - } - - private Map getAllDerivedFieldTypeFromObject(Map derivedFieldObject) { - Map derivedFieldTypes = new HashMap<>(); - DocumentMapper documentMapper = queryShardContext.getMapperService() - .documentMapperParser() - .parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject); - if (documentMapper != null && documentMapper.mappers() != null) { - for (Mapper mapper : documentMapper.mappers()) { - if (mapper instanceof DerivedFieldMapper) { - DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType(); - derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType); - } - } - } - return derivedFieldTypes; - } - - private DerivedFieldType getDerivedFieldType(DerivedField derivedField) { - Mapper.BuilderContext builderContext = new Mapper.BuilderContext( - queryShardContext.getMapperService().getIndexSettings().getSettings(), - new ContentPath(1) - ); - DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder( - derivedField, - queryShardContext.getMapperService().getIndexAnalyzers() - ); - return builder.build(builderContext).fieldType(); - } +public interface DerivedFieldResolver { + /** + * Resolves all derived fields matching a given pattern. It includes derived fields defined both in search requests + * and index mapping. + * @param pattern regex pattern + * @return all derived fields matching the pattern + */ + Set resolvePattern(String pattern); + + /** + * Resolves the MappedFieldType associated with a derived field + * @param fieldName field name to lookup + * @return mapped field type + */ + MappedFieldType resolve(String fieldName); } diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java index 9cedf4f9ab2cc..fd416fbd4743b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java @@ -20,6 +20,7 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.common.Booleans; import org.opensearch.common.TriFunction; @@ -123,7 +124,19 @@ public enum DerivedFieldSupportedTypes { NumberFieldMapper.Builder floatBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.FLOAT, false, false); return floatBuilder.build(context); }, name -> o -> new FloatField(name, Float.parseFloat(o.toString()), Field.Store.NO), o -> o), - OBJECT("object", KEYWORD::getFieldMapper, name -> o -> new KeywordField(name, (String) o, Field.Store.NO), o -> o); + OBJECT("object", (name, context, indexAnalyzers) -> { + // we create a keyword field type with index options set as NONE as we don't support queries directly on object type + KeywordFieldMapper.Builder keywordBuilder = new KeywordFieldMapper.Builder(name); + KeywordFieldMapper.KeywordFieldType keywordFieldType = keywordBuilder.buildFieldType(context, new FieldType()); + return new KeywordFieldMapper( + name, + new FieldType(), + keywordFieldType, + keywordBuilder.multiFieldsBuilder.build(keywordBuilder, context), + keywordBuilder.copyTo.build(), + keywordBuilder + ); + }, name -> o -> { throw new OpenSearchException("Cannot create IndexableField to execute queries on object derived field"); }, o -> o); final String name; private final TriFunction builder; diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java index e85d2acdef4b2..e53c821d0ae98 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java @@ -39,10 +39,11 @@ * Contains logic to execute different type of queries on a derived field of given type. * @opensearch.internal */ + public class DerivedFieldType extends MappedFieldType implements GeoShapeQueryable { final DerivedField derivedField; - private final FieldMapper typeFieldMapper; - private final Function indexableFieldGenerator; + final FieldMapper typeFieldMapper; + final Function indexableFieldGenerator; public DerivedFieldType( DerivedField derivedField, diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java b/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java index 096bc92beb29e..72a488a144e88 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldTypeInference.java @@ -49,6 +49,7 @@ public class FieldTypeInference { // TODO expose using a index setting? private int sampleSize; private static final int DEFAULT_SAMPLE_SIZE = 60; + private static final int MAX_SAMPLE_SIZE_ALLOWED = 1000; public FieldTypeInference(String indexName, MapperService mapperService, IndexReader indexReader) { this.indexName = indexName; @@ -58,7 +59,7 @@ public FieldTypeInference(String indexName, MapperService mapperService, IndexRe } public void setSampleSize(int sampleSize) { - this.sampleSize = sampleSize; + this.sampleSize = Math.min(sampleSize, MAX_SAMPLE_SIZE_ALLOWED); } public int getSampleSize() { @@ -70,7 +71,7 @@ public Mapper infer(ValueFetcher valueFetcher) throws IOException { Mapper inferredMapper = null; while (inferredMapper == null && valuesGenerator.hasNext()) { List values = valuesGenerator.next(); - if (values == null) { + if (values == null || values.isEmpty()) { continue; } // always use first value in case of multi value field to infer type diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedObjectFieldType.java b/server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java similarity index 65% rename from server/src/main/java/org/opensearch/index/mapper/DerivedObjectFieldType.java rename to server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java index 20181ab9a7776..05cbd76de0f1b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedObjectFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/ObjectDerivedFieldType.java @@ -9,6 +9,7 @@ package org.opensearch.index.mapper; import org.apache.lucene.index.IndexableField; +import org.opensearch.OpenSearchParseException; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.index.analysis.IndexAnalyzers; @@ -22,15 +23,28 @@ import java.util.Map; import java.util.function.Function; -public class DerivedObjectFieldType extends DerivedFieldType { +/** + * ObjectDerivedFieldType is similar to object field type in context of derived fields. + * It is not a primitive field type and doesn't support any queries directly. However, any nested derived field with parent as + * ObjectDerivedFieldType will make use of it to run query once the field type is inferred. + */ +public class ObjectDerivedFieldType extends DerivedFieldType { - DerivedObjectFieldType( + ObjectDerivedFieldType( DerivedField derivedField, FieldMapper typeFieldMapper, Function fieldFunction, IndexAnalyzers indexAnalyzers ) { - super(derivedField, typeFieldMapper, fieldFunction, indexAnalyzers); + super(derivedField, typeFieldMapper, derivedField.getType().equals(DerivedFieldSupportedTypes.DATE.getName()) ? (o -> { + // this is needed to support date type for nested fields, they need to be converted to long to create + // IndexableField + if (o instanceof String) { + return fieldFunction.apply(((DateFieldMapper) typeFieldMapper).fieldType().parse((String) o)); + } else { + return fieldFunction.apply(o); + } + }) : fieldFunction, indexAnalyzers); } @Override @@ -54,34 +68,43 @@ public DerivedFieldValueFetcher valueFetcher(QueryShardContext context, SearchLo } Function valueForDisplay = DerivedFieldSupportedTypes.getValueForDisplayGenerator(getType()); String subFieldName = name().substring(name().indexOf(".") + 1); - return new DerivedObjectFieldValueFetcher( + return new ObjectDerivedFieldValueFetcher( subFieldName, getDerivedFieldLeafFactory(derivedField.getScript(), context, searchLookup == null ? context.lookup() : searchLookup), valueForDisplay ); } - public static class DerivedObjectFieldValueFetcher extends DerivedFieldValueFetcher { + public static class ObjectDerivedFieldValueFetcher extends DerivedFieldValueFetcher { private final String subField; - public DerivedObjectFieldValueFetcher( + // TODO add it as part of index setting? + private final boolean failOnInvalidJsonObjects; + + public ObjectDerivedFieldValueFetcher( String subField, DerivedFieldScript.LeafFactory derivedFieldScriptFactory, Function valueForDisplay ) { super(derivedFieldScriptFactory, valueForDisplay); this.subField = subField; + this.failOnInvalidJsonObjects = true; } @Override public List fetchValuesInternal(SourceLookup lookup) { List jsonObjects = super.fetchValuesInternal(lookup); - // TODO add check for valid json and error handling around the same if mismatch - // parse the value of field from the json object and return that instead List result = new ArrayList<>(); for (Object o : jsonObjects) { - Map s = XContentHelper.convertToMap(JsonXContent.jsonXContent, (String) o, false); - result.add(getNestedField(s, subField)); + try { + Map s = XContentHelper.convertToMap(JsonXContent.jsonXContent, (String) o, false); + result.add(getNestedField(s, subField)); + } catch (OpenSearchParseException e) { + if (failOnInvalidJsonObjects) { + throw e; + } + // TODO cannot log warnings as it can bloat up the logs. Add to some stats? + } } return result; } diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java index d3be3a9e55f35..200109bc702bd 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java @@ -58,6 +58,7 @@ import org.opensearch.index.cache.bitset.BitsetFilterCache; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.DefaultDerivedFieldResolver; import org.opensearch.index.mapper.DerivedFieldResolver; import org.opensearch.index.mapper.DerivedFieldType; import org.opensearch.index.mapper.DocumentMapper; @@ -123,7 +124,6 @@ public class QueryShardContext extends QueryRewriteContext { private NestedScope nestedScope; private final ValuesSourceRegistry valuesSourceRegistry; private BitSetProducer parentFilter; - private DerivedFieldResolver derivedFieldResolver; public QueryShardContext( @@ -271,7 +271,7 @@ private QueryShardContext( this.fullyQualifiedIndex = fullyQualifiedIndex; this.allowExpensiveQueries = allowExpensiveQueries; this.valuesSourceRegistry = valuesSourceRegistry; - this.derivedFieldResolver = new DerivedFieldResolver(this, emptyMap(), emptyList()); + this.derivedFieldResolver = new DefaultDerivedFieldResolver(this, emptyMap(), emptyList()); } private void reset() { diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 8c6a385c8fc80..d694fd25f4bde 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -77,6 +77,7 @@ import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.Engine; +import org.opensearch.index.mapper.DefaultDerivedFieldResolver; import org.opensearch.index.mapper.DerivedFieldResolver; import org.opensearch.index.query.InnerHitContextBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; @@ -1079,7 +1080,7 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear if (request.source() != null && request.source().size() != 0 && (request.source().getDerivedFieldsObject() != null || request.source().getDerivedFields() != null)) { - DerivedFieldResolver derivedFieldResolver = new DerivedFieldResolver( + DerivedFieldResolver derivedFieldResolver = new DefaultDerivedFieldResolver( searchContext.getQueryShardContext(), request.source().getDerivedFieldsObject(), request.source().getDerivedFields() diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java index 1307028dd27b0..9061c532a0f8e 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldMapperQueryTests.java @@ -25,6 +25,7 @@ import org.opensearch.geometry.Rectangle; import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.script.DerivedFieldScript; import java.io.IOException; @@ -35,6 +36,7 @@ import org.mockito.Mockito; import static org.opensearch.index.query.QueryBuilders.geoShapeQuery; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { @@ -50,7 +52,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 1.5, 1500L, - new Tuple<>(10.0, 20.0) }, + new Tuple<>(10.0, 20.0), + "{ " + + "\"float_field\": 1.5," + + "\"boolean_field\": true," + + "\"long_field\": 1500," + + "\"date_field\": \"2024-03-20T08:30:45\"," + + "\"keyword_field\": \"GET\"," + + "\"text_field\": \"document number 1\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "232.0.0.0 GET /images/hm_bg.jpg?size=2.3KB HTTP/1.0 400 2024-03-20T09:15:20 2300", false, @@ -59,7 +70,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 2.3, 2300L, - new Tuple<>(20.0, 30.0) }, + new Tuple<>(20.0, 30.0), + "{ " + + "\"float_field\": 2.3," + + "\"boolean_field\": false," + + "\"long_field\": 2300," + + "\"date_field\": \"2024-03-20T09:15:20\"," + + "\"keyword_field\": \"GET\"," + + "\"text_field\": \"document number 2\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "26.1.0.0 DELETE /images/hm_bg.jpg?size=3.7KB HTTP/1.0 200 2024-03-20T10:05:55 3700", true, @@ -68,7 +88,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "DELETE", 3.7, 3700L, - new Tuple<>(30.0, 40.0) }, + new Tuple<>(30.0, 40.0), + "{ " + + "\"float_field\": 3.7," + + "\"boolean_field\": true," + + "\"long_field\": 3700," + + "\"date_field\": \"2024-03-20T10:05:55\"," + + "\"keyword_field\": \"DELETE\"," + + "\"text_field\": \"document number 3\"," + + "\"nested_field\": { \"sub_field_1\": \"DELETE\"}" + + "}" }, { "247.37.0.0 GET /french/splash_inet.html?size=4.1KB HTTP/1.0 400 2024-03-20T11:20:10 4100", false, @@ -77,7 +106,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 4.1, 4100L, - new Tuple<>(40.0, 50.0) }, + new Tuple<>(40.0, 50.0), + "{ " + + "\"float_field\": 4.1," + + "\"boolean_field\": false," + + "\"long_field\": 4100," + + "\"date_field\": \"2024-03-20T11:20:10\"," + + "\"keyword_field\": \"GET\"," + + "\"text_field\": \"document number 4\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "247.37.0.0 DELETE /french/splash_inet.html?size=5.8KB HTTP/1.0 400 2024-03-20T12:45:30 5800", false, @@ -86,7 +124,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "DELETE", 5.8, 5800L, - new Tuple<>(50.0, 60.0) }, + new Tuple<>(50.0, 60.0), + "{ " + + "\"float_field\": 5.8," + + "\"boolean_field\": false," + + "\"long_field\": 5800," + + "\"date_field\": \"2024-03-20T12:45:30\"," + + "\"keyword_field\": \"DELETE\"," + + "\"text_field\": \"document number 5\"," + + "\"nested_field\": { \"sub_field_1\": \"DELETE\"}" + + "}" }, { "10.20.30.40 GET /path/to/resource?size=6.3KB HTTP/1.0 200 2024-03-20T13:10:15 6300", true, @@ -95,7 +142,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 6.3, 6300L, - new Tuple<>(60.0, 70.0) }, + new Tuple<>(60.0, 70.0), + "{ " + + "\"float_field\": 6.3," + + "\"boolean_field\": true," + + "\"long_field\": 6300," + + "\"date_field\": \"2024-03-20T13:10:15\"," + + "\"keyword_field\": \"GET\"," + + "\"text_field\": \"document number 6\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "50.60.70.80 GET /path/to/resource?size=7.2KB HTTP/1.0 404 2024-03-20T14:20:50 7200", false, @@ -104,7 +160,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 7.2, 7200L, - new Tuple<>(70.0, 80.0) }, + new Tuple<>(70.0, 80.0), + "{ " + + "\"float_field\": 7.2," + + "\"boolean_field\": false," + + "\"long_field\": 7200," + + "\"date_field\": \"2024-03-20T14:20:50\"," + + "\"keyword_field\": \"GET\"," + + "\"text_field\": \"document number 7\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "127.0.0.1 PUT /path/to/resource?size=8.9KB HTTP/1.0 500 2024-03-20T15:30:25 8900", false, @@ -113,7 +178,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "PUT", 8.9, 8900L, - new Tuple<>(80.0, 90.0) }, + new Tuple<>(80.0, 90.0), + "{ " + + "\"float_field\": 8.9," + + "\"boolean_field\": false," + + "\"long_field\": 8900," + + "\"date_field\": \"2024-03-20T15:30:25\"," + + "\"keyword_field\": \"PUT\"," + + "\"text_field\": \"document number 8\"," + + "\"nested_field\": { \"sub_field_1\": \"PUT\"}" + + "}" }, { "127.0.0.1 GET /path/to/resource?size=9.4KB HTTP/1.0 200 2024-03-20T16:40:15 9400", true, @@ -122,7 +196,16 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 9.4, 9400L, - new Tuple<>(85.0, 90.0) }, + new Tuple<>(85.0, 90.0), + "{ " + + "\"float_field\": 9.4," + + "\"boolean_field\": true," + + "\"long_field\": 9400," + + "\"date_field\": \"2024-03-20T16:40:15\"," + + "\"keyword_field\": \"GET\"," + + "\"text_field\": \"document number 9\"," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" }, { "192.168.1.1 GET /path/to/resource?size=10.7KB HTTP/1.0 400 2024-03-20T17:50:40 10700", false, @@ -131,7 +214,17 @@ public class DerivedFieldMapperQueryTests extends MapperServiceTestCase { "GET", 10.7, 10700L, - new Tuple<>(90.0, 90.0) } }; + new Tuple<>(90.0, 90.0), + "{ " + + "\"float_field\": 10.7," + + "\"boolean_field\": false," + + "\"long_field\": 10700," + + "\"date_field\": \"2024-03-20T17:50:40\"," + + "\"keyword_field\": \"GET\"," + + "\"text_field\": \"document number 10\"," + + "\"invalid_field\": {}," + + "\"nested_field\": { \"sub_field_1\": \"GET\"}" + + "}" } }; public void testAllPossibleQueriesOnDerivedFields() throws IOException { MapperService mapperService = createMapperService(topMapping(b -> { @@ -188,6 +281,12 @@ public void testAllPossibleQueriesOnDerivedFields() throws IOException { b.field("script", ""); } b.endObject(); + b.startObject("object_field"); + { + b.field("type", "object"); + b.field("script", ""); + } + b.endObject(); } b.endObject(); })); @@ -302,4 +401,135 @@ public void execute() { } } } + + public void testObjectDerivedFields() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("raw_message"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("object_field"); + { + b.field("type", "object"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + + List docs = new ArrayList<>(); + for (Object[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_message", (String) request[0], Field.Store.YES)); + docs.add(document); + } + + int[] scriptIndex = { 8 }; + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> new DerivedFieldScript( + params, + lookup, + ctx + ) { + int docId = 0; + + @Override + public void setDocument(int docId) { + super.setDocument(docId); + this.docId = docId; + } + + @Override + public void execute() { + addEmittedValue(raw_requests[docId][scriptIndex[0]]); + } + }; + + // Index and Search + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.compile(Mockito.any(), Mockito.any())).thenReturn(factory); + when(queryShardContext.sourcePath("raw_message")).thenReturn(Set.of("raw_message")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + when(queryShardContext.getIndexReader()).thenReturn(reader); + when(queryShardContext.fieldMapper(anyString())).thenAnswer(inv -> { + MappedFieldType res = mapperService.fieldType(inv.getArguments()[0].toString()); + if (res == null) { + DerivedFieldResolver derivedFieldResolver = new DefaultDerivedFieldResolver(queryShardContext, null, null); + res = derivedFieldResolver.resolve(inv.getArguments()[0].toString()); + } + return res; + }); + IndexSearcher searcher = new IndexSearcher(reader); + + // NOTE: object_field.keyword_field will be resolved to a text field for nested field as that's the default behaviour in + // dynamic mappings too + TermQueryBuilder termQueryBuilder = new TermQueryBuilder("object_field.keyword_field", "GET"); + termQueryBuilder.caseInsensitive(true); + Query query = termQueryBuilder.toQuery(queryShardContext); + TopDocs topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + query = QueryBuilders.matchPhraseQuery("object_field.text_field", "document number 1").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(1, topDocs.totalHits.value); + + query = QueryBuilders.matchPhraseQuery("object_field.text_field", "document number 11").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(0, topDocs.totalHits.value); + + // Range queries of types - date, long and double + query = QueryBuilders.rangeQuery("object_field.date_field").from("2024-03-20T14:20:50").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(4, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.float_field").from("4.1").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + query = QueryBuilders.rangeQuery("object_field.long_field").from("5800").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(6, topDocs.totalHits.value); + + // Prefix Query + query = QueryBuilders.prefixQuery("object_field.keyword_field", "de").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + + query = QueryBuilders.wildcardQuery("object_field.keyword_field", "g*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(7, topDocs.totalHits.value); + + // Regexp Query + query = QueryBuilders.regexpQuery("object_field.keyword_field", ".*let.*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + + // tested deep nested field + query = QueryBuilders.regexpQuery("object_field.nested_field.sub_field_1", ".*let.*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(2, topDocs.totalHits.value); + + // tested missing nested field + query = QueryBuilders.regexpQuery("object_field.invalid_field.sub_field", ".*let.*").toQuery(queryShardContext); + topDocs = searcher.search(query, 10); + assertEquals(0, topDocs.totalHits.value); + } + } + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java new file mode 100644 index 0000000000000..acdc18fccef97 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldResolverTests.java @@ -0,0 +1,407 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.core.index.Index; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.Script; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class DerivedFieldResolverTests extends MapperServiceTestCase { + public void testResolutionFromIndexMapping() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = new DefaultDerivedFieldResolver(queryShardContext, null, null); + assertEquals("keyword", resolver.resolve("derived_text").getType()); + assertEqualDerivedField(new DerivedField("derived_text", "keyword", null), resolver.resolve("derived_text").derivedField); + } + + public void testResolutionFromSearchRequest() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = new DefaultDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields() + ); + assertEquals("text", resolver.resolve("derived_text").getType()); + assertEqualDerivedField(new DerivedField("derived_text", "text", null), resolver.resolve("derived_text").derivedField); + assertEquals("object", resolver.resolve("derived_object").getType()); + assertEqualDerivedField(new DerivedField("derived_object", "object", null), resolver.resolve("derived_object").derivedField); + assertEquals("keyword", resolver.resolve("derived_keyword").getType()); + assertEqualDerivedField(new DerivedField("derived_keyword", "keyword", null), resolver.resolve("derived_keyword").derivedField); + } + + public void testEmpty() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = new DefaultDerivedFieldResolver(queryShardContext, null, null); + assertNull(resolver.resolve("derived_keyword")); + } + + public void testResolutionPrecedence() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + b.startObject("derived_2"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = new DefaultDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields() + ); + + // precedence given to search definition; derived_text is present in both - + // search definition uses type text, whereas index definition uses the type keyword + + assertEquals("text", resolver.resolve("derived_text").getType()); + assertEqualDerivedField(new DerivedField("derived_text", "text", null), resolver.resolve("derived_text").derivedField); + + assertEquals("keyword", resolver.resolve("derived_2").getType()); + assertEqualDerivedField(new DerivedField("derived_2", "keyword", null), resolver.resolve("derived_2").derivedField); + } + + public void testNestedWithParentDefinedInIndexMapping() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_obj"); + { + b.field("type", "object"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver(queryShardContext, null, null, typeInference); + assertEquals("text", resolver.resolve("derived_obj.sub_field1").getType()); + assertEqualDerivedField( + new DerivedField("derived_obj.sub_field1", "text", null), + resolver.resolve("derived_obj.sub_field1").derivedField + ); + assertEquals("text", resolver.resolve("derived_obj.sub_field1.sub_field2").getType()); + assertEqualDerivedField( + new DerivedField("derived_obj.sub_field1.sub_field2", "text", null), + resolver.resolve("derived_obj.sub_field1.sub_field2").derivedField + ); + } + } + } + + public void testNestedWithParentDefinedInSearchRequest() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInference + ); + assertEquals("text", resolver.resolve("derived_object.sub_field1").getType()); + assertEqualDerivedField( + new DerivedField("derived_object.sub_field1", "text", null), + resolver.resolve("derived_object.sub_field1").derivedField + ); + assertEquals("text", resolver.resolve("derived_object.sub_field1.sub_field2").getType()); + assertEqualDerivedField( + new DerivedField("derived_object.sub_field1.sub_field2", "text", null), + resolver.resolve("derived_object.sub_field1.sub_field2").derivedField + ); + assertEquals(2, resolver.cnt); + + } + } + } + + public void testNestedWithParentUndefined() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver(queryShardContext, null, null, typeInference); + assertNull(resolver.resolve("derived_object.sub_field1")); + } + } + } + + public void testInferredTypeNull() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInference, + true + ); + assertNull(resolver.resolve("derived_object.field")); + } + } + } + + public void testInferThrowsIOException() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + FieldTypeInference typeInferenceMock = mock(FieldTypeInference.class); + when(typeInferenceMock.infer(any())).thenThrow(new IOException("Simulated IOException")); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInferenceMock, + true + ); + assertNull(resolver.resolve("derived_object.field")); + } + + public void testResolutionCaching() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> {})); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + Document d = new Document(); + for (int i = 0; i < 10; i++) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + FieldTypeInference typeInference = new FieldTypeInference("test_index", queryShardContext.getMapperService(), reader); + TestDerivedFieldResolver resolver = new TestDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields(), + typeInference + ); + assertEquals("text", resolver.resolve("derived_object.sub_field1").getType()); + assertEquals("text", resolver.resolve("derived_object.sub_field1").getType()); + assertEquals(1, resolver.cnt); + } + } + } + + public void testResolvePattern() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> { + b.startObject("properties"); + { + b.startObject("indexed_field"); + { + b.field("type", "text"); + } + b.endObject(); + } + b.endObject(); + b.startObject("derived"); + { + b.startObject("derived_text"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + b.startObject("derived_2"); + { + b.field("type", "keyword"); + b.field("script", ""); + } + b.endObject(); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + DefaultDerivedFieldResolver resolver = new DefaultDerivedFieldResolver( + queryShardContext, + createDerivedFieldsObject(), + createDerivedFields() + ); + assertEquals(4, resolver.resolvePattern("derived_*").size()); + assertEquals(4, resolver.resolvePattern("*").size()); // should not include regular field indexed_field + } + + private void assertEqualDerivedField(DerivedField expected, DerivedField actual) { + assertEquals(expected.getName(), actual.getName()); + assertEquals(expected.getType(), actual.getType()); + } + + private Map createDerivedFieldsObject() { + return new HashMap<>() { + { + put("derived_text", new HashMap() { + { + put("type", "text"); + put("script", ""); + } + }); + put("derived_object", new HashMap() { + { + put("type", "object"); + put("script", ""); + } + }); + } + }; + } + + private static class TestDerivedFieldResolver extends DefaultDerivedFieldResolver { + private final boolean error; + private int cnt; + + public TestDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields, + FieldTypeInference typeInference + ) { + this(queryShardContext, derivedFieldsObject, derivedFields, typeInference, false); + } + + public TestDerivedFieldResolver( + QueryShardContext queryShardContext, + Map derivedFieldsObject, + List derivedFields, + FieldTypeInference typeInference, + boolean error + ) { + super(queryShardContext, derivedFieldsObject, derivedFields, typeInference); + this.error = error; + this.cnt = 0; + } + + @Override + ValueFetcher getValueFetcher(String fieldName, Script script) { + cnt++; + if (!error) { + return lookup -> List.of("text field content"); + } else { + return lookup -> null; + } + } + } + + private List createDerivedFields() { + DerivedField derivedField = new DerivedField("derived_keyword", "keyword", new Script(""), null); + return Collections.singletonList(derivedField); + } + +} diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java index 6b050ce4e70ff..f65acd0db0627 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java @@ -15,11 +15,13 @@ import org.apache.lucene.document.LatLonPoint; import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; +import org.opensearch.OpenSearchException; import org.opensearch.common.collect.Tuple; import org.opensearch.script.Script; import java.util.List; +import static org.apache.lucene.index.IndexOptions.NONE; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -88,6 +90,13 @@ public void testDoubleType() { expectThrows(Exception.class, () -> dft.getIndexableFieldGenerator().apply("")); } + public void testObjectType() { + DerivedFieldType dft = createDerivedFieldType("object"); + assertTrue(dft.getFieldMapper() instanceof KeywordFieldMapper); + assertEquals(dft.getFieldMapper().fieldType.indexOptions(), NONE); + assertThrows(OpenSearchException.class, () -> dft.getIndexableFieldGenerator().apply("")); + } + public void testUnsupportedType() { expectThrows(IllegalArgumentException.class, () -> createDerivedFieldType("match_only_text")); } diff --git a/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java b/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java index 3909fa2b7f6dd..12677edc8efa7 100644 --- a/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java +++ b/server/src/test/java/org/opensearch/index/query/QueryShardContextTests.java @@ -31,7 +31,6 @@ package org.opensearch.index.query; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; @@ -64,18 +63,12 @@ import org.opensearch.index.fielddata.LeafFieldData; import org.opensearch.index.fielddata.ScriptDocValues; import org.opensearch.index.fielddata.plain.AbstractLeafOrdinalsFieldData; -import org.opensearch.index.mapper.ContentPath; -import org.opensearch.index.mapper.DerivedField; -import org.opensearch.index.mapper.DerivedFieldMapper; import org.opensearch.index.mapper.DerivedFieldResolver; -import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.DerivedFieldType; import org.opensearch.index.mapper.IndexFieldMapper; import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.mapper.MappingLookup; import org.opensearch.index.mapper.TextFieldMapper; -import org.opensearch.script.Script; import org.opensearch.search.lookup.LeafDocLookup; import org.opensearch.search.lookup.LeafSearchLookup; import org.opensearch.search.lookup.SearchLookup; @@ -86,7 +79,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.function.BiFunction; import java.util.function.Supplier; @@ -130,27 +122,14 @@ public void testFailIfFieldMappingNotFound() { public void testDerivedFieldMapping() { QueryShardContext context = createQueryShardContext(IndexMetadata.INDEX_UUID_NA_VALUE, null); - assertNull(context.failIfFieldMappingNotFound("test_derived", null)); - assertNull(context.failIfFieldMappingNotFound("test_derived", null)); - DocumentMapper documentMapper = mock(DocumentMapper.class); - Mapper.BuilderContext builderContext = new Mapper.BuilderContext(Settings.EMPTY, new ContentPath(0)); - DerivedFieldMapper derivedFieldMapper = new DerivedFieldMapper.Builder( - new DerivedField("test_derived", "keyword", new Script("")), - context.getIndexAnalyzers() - ).build(builderContext); - MappingLookup mappingLookup = new MappingLookup( - Collections.singletonList(derivedFieldMapper), - Collections.emptyList(), - Collections.emptyList(), - 0, - new StandardAnalyzer() - ); - when(documentMapper.mappers()).thenReturn(mappingLookup); - context.setDerivedFieldResolver( - new DerivedFieldResolver(null, Map.of("test_derived", derivedFieldMapper.fieldType()), Collections.emptyList()) - ); - context.setAllowUnmappedFields(false); - assertEquals(derivedFieldMapper.fieldType(), context.failIfFieldMappingNotFound("test_derived", null)); + assertNull(context.failIfFieldMappingNotFound("derived_field_search_req", null)); + DerivedFieldResolver derivedFieldResolver = mock(DerivedFieldResolver.class); + context.setDerivedFieldResolver(derivedFieldResolver); + DerivedFieldType mockDerivedFieldType = mock(DerivedFieldType.class); + when(derivedFieldResolver.resolve("derived_field_search_req")).thenReturn(mockDerivedFieldType); + assertEquals(mockDerivedFieldType, context.failIfFieldMappingNotFound("derived_field_search_req", null)); + when(derivedFieldResolver.resolve("field_missing")).thenReturn(null); + assertNull(context.failIfFieldMappingNotFound("field_missing", null)); } public void testToQueryFails() { diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java index 78b3a2ced292a..55a2e00bf7d7c 100644 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java +++ b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java @@ -27,8 +27,8 @@ import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.DefaultDerivedFieldResolver; import org.opensearch.index.mapper.DerivedField; -import org.opensearch.index.mapper.DerivedFieldResolver; import org.opensearch.index.mapper.DerivedFieldSupportedTypes; import org.opensearch.index.mapper.DerivedFieldType; import org.opensearch.index.mapper.Mapper; @@ -221,7 +221,7 @@ public void testDerivedFieldFromSearchMapping() throws IOException { // This mock behavior is similar to adding derived fields in search request mockShardContext.setDerivedFieldResolver( - new DerivedFieldResolver( + new DefaultDerivedFieldResolver( mockShardContext, null, List.of(