Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] [DerivedFields] PR2 - Implementation for all supported types, DerivedFieldType and DerivedFieldMapper #13041

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591))
- Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583))
- Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039))
- Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569))

### Dependencies
- Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.index.IndexableField;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.script.Script;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.function.Function;

/**
* A field mapper for derived fields
*
* @opensearch.internal
*/
public class DerivedFieldMapper extends ParametrizedFieldMapper {

public static final String CONTENT_TYPE = "derived";

private static DerivedFieldMapper toType(FieldMapper in) {
return (DerivedFieldMapper) in;
}

/**
* Builder for this field mapper
*
* @opensearch.internal
*/
public static class Builder extends ParametrizedFieldMapper.Builder {
// TODO: The type of parameter may change here if the actual underlying FieldType object is needed
private final Parameter<String> type = Parameter.stringParam("type", false, m -> toType(m).type, "text");

private final Parameter<Script> script = new Parameter<>(
"script",
false,
() -> null,
(n, c, o) -> o == null ? null : Script.parse(o),
m -> toType(m).script
).setSerializerCheck((id, ic, value) -> value != null);

public Builder(String name) {
super(name);
}

@Override
protected List<Parameter<?>> getParameters() {
return Arrays.asList(type, script);
}

@Override
public DerivedFieldMapper build(BuilderContext context) {
FieldMapper fieldMapper = DerivedFieldSupportedTypes.getFieldMapperFromType(type.getValue(), name, context);
Function<Object, IndexableField> fieldFunction = DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(
type.getValue(),
name
);
DerivedFieldType ft = new DerivedFieldType(
buildFullName(context),
type.getValue(),
script.getValue(),
fieldMapper,
fieldFunction
);
return new DerivedFieldMapper(name, ft, multiFieldsBuilder.build(this, context), copyTo.build(), this);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n));
private final String type;
private final Script script;

protected DerivedFieldMapper(
String simpleName,
MappedFieldType mappedFieldType,
MultiFields multiFields,
CopyTo copyTo,
Builder builder
) {
super(simpleName, mappedFieldType, multiFields, copyTo);
this.type = builder.type.getValue();
this.script = builder.script.getValue();
}

@Override
public DerivedFieldType fieldType() {
return (DerivedFieldType) super.fieldType();
}

@Override
protected void parseCreateField(ParseContext context) throws IOException {
// Leaving this empty as the parsing should be handled via the Builder when root object is parsed.
// The context would not contain anything in this case since the DerivedFieldMapper is not indexed or stored.
throw new UnsupportedOperationException("should not be invoked");
}

@Override
public ParametrizedFieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName()).init(this);
}

@Override
protected String contentType() {
return CONTENT_TYPE;
}

@Override
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
getMergeBuilder().toXContent(builder, includeDefaults);
multiFields.toXContent(builder, params);
copyTo.toXContent(builder, params);
}

public String getType() {
return type;
}

public Script getScript() {
return script;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.KeywordField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.opensearch.Version;
import org.opensearch.common.Booleans;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.network.InetAddresses;

import java.net.InetAddress;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;

/**
* Contains logic to get the FieldMapper for a given type of derived field. Also, for a given type of derived field,
* it is used to create an IndexableField for the provided type and object. It is useful when indexing into
* lucene MemoryIndex in {@link org.opensearch.index.query.DerivedFieldQuery}.
*/
enum DerivedFieldSupportedTypes {

BOOLEAN("boolean", (name, context) -> {
BooleanFieldMapper.Builder builder = new BooleanFieldMapper.Builder(name);
return builder.build(context);
}, name -> o -> {
// Trying to mimic the logic for parsing source value as used in BooleanFieldMapper valueFetcher
Boolean value;
if (o instanceof Boolean) {
value = (Boolean) o;
} else {
String textValue = o.toString();
value = Booleans.parseBooleanStrict(textValue, false);
}
return new Field(name, value ? "T" : "F", BooleanFieldMapper.Defaults.FIELD_TYPE);
}),
DATE("date", (name, context) -> {
// TODO: should we support mapping settings exposed by a given field type from derived fields too?
// for example, support `format` for date type?
DateFieldMapper.Builder builder = new DateFieldMapper.Builder(
name,
DateFieldMapper.Resolution.MILLISECONDS,
DateFieldMapper.getDefaultDateTimeFormatter(),
false,
Version.CURRENT
);
return builder.build(context);
}, name -> o -> new LongPoint(name, (long) o)),
GEO_POINT("geo_point", (name, context) -> {
GeoPointFieldMapper.Builder builder = new GeoPointFieldMapper.Builder(name);
return builder.build(context);
}, name -> o -> {
// convert o to array of double
if (!(o instanceof List) || ((List<?>) o).size() != 2 || !(((List<?>) o).get(0) instanceof Double)) {
throw new ClassCastException("geo_point should be in format emit(double lat, double lon) for derived fields");
}
return new LatLonPoint(name, (Double) ((List<?>) o).get(0), (Double) ((List<?>) o).get(1));
}),
IP("ip", (name, context) -> {
IpFieldMapper.Builder builder = new IpFieldMapper.Builder(name, false, Version.CURRENT);
return builder.build(context);
}, name -> o -> {
InetAddress address;
if (o instanceof InetAddress) {
address = (InetAddress) o;
} else {
address = InetAddresses.forString(o.toString());
}
return new InetAddressPoint(name, address);
}),
KEYWORD("keyword", (name, context) -> {
FieldType dummyFieldType = new FieldType();
dummyFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
KeywordFieldMapper.Builder keywordBuilder = new KeywordFieldMapper.Builder(name);
KeywordFieldMapper.KeywordFieldType keywordFieldType = keywordBuilder.buildFieldType(context, dummyFieldType);
keywordFieldType.setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
return new KeywordFieldMapper(
name,
dummyFieldType,
keywordFieldType,
keywordBuilder.multiFieldsBuilder.build(keywordBuilder, context),
keywordBuilder.copyTo.build(),
keywordBuilder
);
}, name -> o -> new KeywordField(name, (String) o, Field.Store.NO)),
LONG("long", (name, context) -> {
NumberFieldMapper.Builder longBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.LONG, false, false);
return longBuilder.build(context);
}, name -> o -> new LongField(name, Long.parseLong(o.toString()), Field.Store.NO)),
DOUBLE("double", (name, context) -> {
NumberFieldMapper.Builder doubleBuilder = new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.DOUBLE, false, false);
return doubleBuilder.build(context);
}, name -> o -> new DoubleField(name, Double.parseDouble(o.toString()), Field.Store.NO));

final String name;
private final BiFunction<String, Mapper.BuilderContext, FieldMapper> builder;

private final Function<String, Function<Object, IndexableField>> indexableFieldBuilder;

DerivedFieldSupportedTypes(
String name,
BiFunction<String, Mapper.BuilderContext, FieldMapper> builder,
Function<String, Function<Object, IndexableField>> indexableFieldBuilder
) {
this.name = name;
this.builder = builder;
this.indexableFieldBuilder = indexableFieldBuilder;
}

public String getName() {
return name;
}

private FieldMapper getFieldMapper(String name, Mapper.BuilderContext context) {
return builder.apply(name, context);
}

private Function<Object, IndexableField> getIndexableFieldGenerator(String name) {
return indexableFieldBuilder.apply(name);
}

private static final Map<String, DerivedFieldSupportedTypes> enumMap = Arrays.stream(DerivedFieldSupportedTypes.values())
.collect(Collectors.toMap(DerivedFieldSupportedTypes::getName, enumValue -> enumValue));

public static FieldMapper getFieldMapperFromType(String type, String name, Mapper.BuilderContext context) {
if (!enumMap.containsKey(type)) {
throw new IllegalArgumentException("Type [" + type + "] isn't supported in Derived field context.");
}
return enumMap.get(type).getFieldMapper(name, context);
}

public static Function<Object, IndexableField> getIndexableFieldGeneratorType(String type, String name) {
if (!enumMap.containsKey(type)) {
throw new IllegalArgumentException("Type [" + type + "] isn't supported in Derived field context.");
}
return enumMap.get(type).getIndexableFieldGenerator(name);
}
}
Loading
Loading