diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java new file mode 100644 index 0000000000000..c04a760ff7cf1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldSupportedTypes.java @@ -0,0 +1,151 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.document.KeywordField; +import org.apache.lucene.document.LatLonPoint; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.opensearch.Version; +import org.opensearch.common.Booleans; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.network.InetAddresses; + +import java.net.InetAddress; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; + +public enum DerivedFieldSupportedTypes { + + BOOLEAN("boolean", (name, context) -> { + BooleanFieldMapper.Builder builder = new BooleanFieldMapper.Builder(name); + return builder.build(context); + }, name -> o -> { + // Trying to mimic the logic for parsing source value as used in BooleanFieldMapper valueFetcher + Boolean value; + if (o instanceof Boolean) { + value = (Boolean) o; + } else { + String textValue = o.toString(); + value = Booleans.parseBooleanStrict(textValue, false); + } + return new Field(name, value ? "T" : "F", BooleanFieldMapper.Defaults.FIELD_TYPE); + }), + DATE("date", (name, context) -> { + // TODO: should we support mapping settings exposed by a given field type from derived fields too? + // for example, support `format` for date type? + DateFieldMapper.Builder builder = new DateFieldMapper.Builder( + name, + DateFieldMapper.Resolution.MILLISECONDS, + DateFieldMapper.getDefaultDateTimeFormatter(), + false, + Version.CURRENT + ); + return builder.build(context); + }, name -> o -> new LongPoint(name, (long) o)), + GEO_POINT("geo_point", (name, context) -> { + GeoPointFieldMapper.Builder builder = new GeoPointFieldMapper.Builder(name); + return builder.build(context); + }, name -> o -> { + // convert o to array of double + if (!(o instanceof List) || ((List) o).size() != 2 || !(((List) o).get(0) instanceof Double)) { + throw new ClassCastException("geo_point should be in format emit(double lat, double lon) for derived fields"); + } + return new LatLonPoint(name, (Double) ((List) o).get(0), (Double) ((List) o).get(1)); + }), + IP("ip", (name, context) -> { + IpFieldMapper.Builder builder = new IpFieldMapper.Builder(name, false, Version.CURRENT); + return builder.build(context); + }, name -> o -> { + InetAddress address; + if (o instanceof InetAddress) { + address = (InetAddress) o; + } else { + address = InetAddresses.forString(o.toString()); + } + return new InetAddressPoint(name, address); + }), + KEYWORD("keyword", (name, context) -> { + FieldType dummyFieldType = new FieldType(); + dummyFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + KeywordFieldMapper.Builder keywordBuilder = new KeywordFieldMapper.Builder(name); + KeywordFieldMapper.KeywordFieldType keywordFieldType = keywordBuilder.buildFieldType(context, dummyFieldType); + keywordFieldType.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); + return new KeywordFieldMapper( + name, + dummyFieldType, + keywordFieldType, + keywordBuilder.multiFieldsBuilder.build(keywordBuilder, context), + keywordBuilder.copyTo.build(), + keywordBuilder + ); + }, name -> o -> new KeywordField(name, (String) o, Field.Store.NO)), + LONG("long", (name, context) -> { + NumberFieldMapper.Builder longBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.LONG, false, false); + return longBuilder.build(context); + }, name -> o -> new LongField(name, Long.parseLong(o.toString()), Field.Store.NO)), + DOUBLE("double", (name, context) -> { + NumberFieldMapper.Builder doubleBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.DOUBLE, false, false); + return doubleBuilder.build(context); + }, name -> o -> new DoubleField(name, Double.parseDouble(o.toString()), Field.Store.NO)); + + final String name; + private final BiFunction builder; + + private final Function> indexableFieldBuilder; + + DerivedFieldSupportedTypes( + String name, + BiFunction builder, + Function> indexableFieldBuilder + ) { + this.name = name; + this.builder = builder; + this.indexableFieldBuilder = indexableFieldBuilder; + } + + public String getName() { + return name; + } + + private FieldMapper getFieldMapper(String name, Mapper.BuilderContext context) { + return builder.apply(name, context); + } + + private Function getIndexableFieldGenerator(String name) { + return indexableFieldBuilder.apply(name); + } + + private static final Map enumMap = Arrays.stream(DerivedFieldSupportedTypes.values()) + .collect(Collectors.toMap(DerivedFieldSupportedTypes::getName, enumValue -> enumValue)); + + public static FieldMapper getFieldMapperFromType(String type, String name, Mapper.BuilderContext context) { + if (!enumMap.containsKey(type)) { + throw new IllegalArgumentException("Type [" + type + "] isn't supported in Derived field context."); + } + return enumMap.get(type).getFieldMapper(name, context); + } + + public static Function getIndexableFieldGeneratorType(String type, String name) { + if (!enumMap.containsKey(type)) { + throw new IllegalArgumentException("Type [" + type + "] isn't supported in Derived field context."); + } + return enumMap.get(type).getIndexableFieldGenerator(name); + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java new file mode 100644 index 0000000000000..5fd18271aaf40 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldType.java @@ -0,0 +1,363 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.queries.spans.SpanQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.opensearch.common.Nullable; +import org.opensearch.common.geo.ShapeRelation; +import org.opensearch.common.time.DateMathParser; +import org.opensearch.common.unit.Fuzziness; +import org.opensearch.index.query.DerivedFieldQuery; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.script.DerivedFieldScript; +import org.opensearch.script.Script; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.time.ZoneId; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +/** + * MappedFieldType for Derived Fields + * Contains logic to different type of queries on derived fields + * @opensearch.internal + */ +public final class DerivedFieldType extends MappedFieldType { + private final String type; + + private final Script script; + + FieldMapper typeFieldMapper; + + final Function indexableFieldGenerator; + + public DerivedFieldType( + String name, + String type, + Script script, + boolean isIndexed, + boolean isStored, + boolean hasDocValues, + Map meta, + FieldMapper typeFieldMapper, + Function fieldFunction + ) { + super(name, isIndexed, isStored, hasDocValues, typeFieldMapper.fieldType().getTextSearchInfo(), meta); + this.type = type; + this.script = script; + this.typeFieldMapper = typeFieldMapper; + this.indexableFieldGenerator = fieldFunction; + } + + public DerivedFieldType( + String name, + String type, + Script script, + FieldMapper typeFieldMapper, + Function fieldFunction + ) { + this(name, type, script, false, false, false, Collections.emptyMap(), typeFieldMapper, fieldFunction); + } + + @Override + public String typeName() { + return "derived"; + } + + public String getType() { + return type; + } + + @Override + public DerivedFieldValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + return new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + Query query = typeFieldMapper.mappedFieldType.termQuery(value, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query termQueryCaseInsensitive(Object value, @Nullable QueryShardContext context) { + Query query = typeFieldMapper.mappedFieldType.termQueryCaseInsensitive(value, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query termsQuery(List values, @Nullable QueryShardContext context) { + Query query = typeFieldMapper.mappedFieldType.termsQuery(values, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query rangeQuery( + Object lowerTerm, + Object upperTerm, + boolean includeLower, + boolean includeUpper, + ShapeRelation relation, + ZoneId timeZone, + DateMathParser parser, + QueryShardContext context + ) { + Query query = typeFieldMapper.mappedFieldType.rangeQuery( + lowerTerm, + upperTerm, + includeLower, + includeUpper, + relation, + timeZone, + parser, + context + ); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + QueryShardContext context + ) { + Query query = typeFieldMapper.mappedFieldType.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + Query query = typeFieldMapper.mappedFieldType.fuzzyQuery( + value, + fuzziness, + prefixLength, + maxExpansions, + transpositions, + method, + context + ); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query prefixQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitive, + QueryShardContext context + ) { + Query query = typeFieldMapper.mappedFieldType.prefixQuery(value, method, caseInsensitive, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query wildcardQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitive, + QueryShardContext context + ) { + Query query = typeFieldMapper.mappedFieldType.wildcardQuery(value, method, caseInsensitive, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) { + Query query = typeFieldMapper.mappedFieldType.normalizedWildcardQuery(value, method, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query regexpQuery( + String value, + int syntaxFlags, + int matchFlags, + int maxDeterminizedStates, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + Query query = typeFieldMapper.mappedFieldType.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + Query query = typeFieldMapper.mappedFieldType.phraseQuery(stream, slop, enablePositionIncrements, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { + Query query = typeFieldMapper.mappedFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + Query query = typeFieldMapper.mappedFieldType.phrasePrefixQuery(stream, slop, maxExpansions, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + throw new IllegalArgumentException( + "Can only use span prefix queries on text fields - not on [" + name() + "] which is of type [" + typeName() + "]" + ); + } + + @Override + public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) { + Query query = typeFieldMapper.mappedFieldType.distanceFeatureQuery(origin, pivot, boost, context); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(getDerivedFieldLeafFactory(context)); + return new DerivedFieldQuery( + query, + valueFetcher, + context.lookup(), + indexableFieldGenerator, + typeFieldMapper.mappedFieldType.indexAnalyzer() + ); + } + + @Override + public Query existsQuery(QueryShardContext context) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support exist queries"); + } + + @Override + public boolean isAggregatable() { + return false; + } + + private DerivedFieldScript.LeafFactory getDerivedFieldLeafFactory(QueryShardContext context) { + if (!context.documentMapper("").sourceMapper().enabled()) { + throw new IllegalArgumentException( + "DerivedFieldQuery error: unable to fetch fields from _source field: _source is disabled in the mappings " + + "for index [" + + context.index().getName() + + "]" + ); + } + DerivedFieldScript.Factory factory = context.compile(script, DerivedFieldScript.CONTEXT); + return factory.newFactory(script.getParams(), context.lookup()); + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index c14b2c92c89c3..42b974734e5e7 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -218,7 +218,7 @@ protected List> getParameters() { ); } - private KeywordFieldType buildFieldType(BuilderContext context, FieldType fieldType) { + protected KeywordFieldType buildFieldType(BuilderContext context, FieldType fieldType) { NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER; String normalizerName = this.normalizer.getValue(); diff --git a/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java new file mode 100644 index 0000000000000..72fb7c88cc478 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/DerivedFieldTypeTests.java @@ -0,0 +1,94 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.document.KeywordField; +import org.apache.lucene.document.LatLonPoint; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.LongPoint; +import org.opensearch.script.Script; + +import java.util.List; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class DerivedFieldTypeTests extends FieldTypeTestCase { + + private DerivedFieldType createDerivedFieldType(String type) { + Mapper.BuilderContext context = mock(Mapper.BuilderContext.class); + when(context.path()).thenReturn(new ContentPath()); + return new DerivedFieldType( + type + " _derived_field", + type, + new Script(""), + DerivedFieldSupportedTypes.getFieldMapperFromType(type, type + "_derived_field", context), + DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(type, type + "_derived_field") + ); + } + + public void testBooleanType() { + DerivedFieldType dft = createDerivedFieldType("boolean"); + assertTrue(dft.typeFieldMapper instanceof BooleanFieldMapper); + assertTrue(dft.indexableFieldGenerator.apply(true) instanceof Field); + assertTrue(dft.indexableFieldGenerator.apply(false) instanceof Field); + } + + public void testDateType() { + DerivedFieldType dft = createDerivedFieldType("date"); + assertTrue(dft.typeFieldMapper instanceof DateFieldMapper); + assertTrue(dft.indexableFieldGenerator.apply(System.currentTimeMillis()) instanceof LongPoint); + expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply("blah")); + } + + public void testGeoPointType() { + DerivedFieldType dft = createDerivedFieldType("geo_point"); + assertTrue(dft.typeFieldMapper instanceof GeoPointFieldMapper); + assertTrue(dft.indexableFieldGenerator.apply(List.of(10.0, 20.0)) instanceof LatLonPoint); + expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of(10.0))); + expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of())); + expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of("10"))); + expectThrows(ClassCastException.class, () -> dft.indexableFieldGenerator.apply(List.of(10.0, 20.0, 30.0))); + } + + public void testIPType() { + DerivedFieldType dft = createDerivedFieldType("ip"); + assertTrue(dft.typeFieldMapper instanceof IpFieldMapper); + assertTrue(dft.indexableFieldGenerator.apply("127.0.0.1") instanceof InetAddressPoint); + expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply("blah")); + } + + public void testKeywordType() { + DerivedFieldType dft = createDerivedFieldType("keyword"); + assertTrue(dft.typeFieldMapper instanceof KeywordFieldMapper); + assertTrue(dft.indexableFieldGenerator.apply("test_keyword") instanceof KeywordField); + expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply(10)); + } + + public void testLongType() { + DerivedFieldType dft = createDerivedFieldType("long"); + assertTrue(dft.typeFieldMapper instanceof NumberFieldMapper); + assertTrue(dft.indexableFieldGenerator.apply(10) instanceof LongField); + expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply(10.0)); + } + + public void testDoubleType() { + DerivedFieldType dft = createDerivedFieldType("double"); + assertTrue(dft.typeFieldMapper instanceof NumberFieldMapper); + assertTrue(dft.indexableFieldGenerator.apply(10.0) instanceof DoubleField); + expectThrows(Exception.class, () -> dft.indexableFieldGenerator.apply("")); + } + + public void testUnsupportedType() { + expectThrows(IllegalArgumentException.class, () -> createDerivedFieldType("match_only_text")); + } +}