Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Remove] Type from nested fields using new metadata field mapper #3004

Merged
merged 3 commits into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -455,21 +455,23 @@ private static void innerParseObject(
private static void nested(ParseContext context, ObjectMapper.Nested nested) {
ParseContext.Document nestedDoc = context.doc();
ParseContext.Document parentDoc = nestedDoc.getParent();
Version indexVersion = context.indexSettings().getIndexVersionCreated();
if (nested.isIncludeInParent()) {
addFields(nestedDoc, parentDoc);
addFields(indexVersion, nestedDoc, parentDoc);
}
if (nested.isIncludeInRoot()) {
ParseContext.Document rootDoc = context.rootDoc();
// don't add it twice, if its included in parent, and we are handling the master doc...
if (!nested.isIncludeInParent() || parentDoc != rootDoc) {
addFields(nestedDoc, rootDoc);
addFields(indexVersion, nestedDoc, rootDoc);
}
}
}

private static void addFields(ParseContext.Document nestedDoc, ParseContext.Document rootDoc) {
private static void addFields(Version indexVersion, ParseContext.Document nestedDoc, ParseContext.Document rootDoc) {
String nestedPathFieldName = NestedPathFieldMapper.name(indexVersion);
for (IndexableField field : nestedDoc.getFields()) {
if (!field.name().equals(TypeFieldMapper.NAME)) {
if (field.name().equals(nestedPathFieldName) == false) {
rootDoc.add(field);
}
}
Expand Down Expand Up @@ -498,7 +500,7 @@ private static ParseContext nestedContext(ParseContext context, ObjectMapper map
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
nestedDoc.add(new Field(TypeFieldMapper.NAME, mapper.nestedTypePathAsString(), TypeFieldMapper.Defaults.NESTED_FIELD_TYPE));
nestedDoc.add(NestedPathFieldMapper.field(context.indexSettings().getIndexVersionCreated(), mapper.nestedTypePath()));
return context;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.Version;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.lookup.SearchLookup;

import java.util.Collections;

public class NestedPathFieldMapper extends MetadataFieldMapper {
// OpenSearch version 2.0 removed types; this name is used for bwc
public static final String LEGACY_NAME = "_type";
public static final String NAME = "_nested_path";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just an opinion, _nested_path looks too verbose, may be just _path?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up sticking with _nested_path because it's more descriptive when analyzing lucene segments (which has no object->field relationship). The fieldType is easily identifiable as the path for a nested field when just reading the field name.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


public static class Defaults {
public static final FieldType FIELD_TYPE = new FieldType();
static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
}

/** private ctor; using SINGLETON to control BWC */
private NestedPathFieldMapper(String name) {
super(new NestedPathFieldType(name));
}

/** returns the field name */
public static String name(Version version) {
if (version.before(Version.V_2_0_0)) {
return LEGACY_NAME;
}
return NAME;
}

@Override
protected String contentType() {
return NAME;
}

private static final NestedPathFieldMapper LEGACY_INSTANCE = new NestedPathFieldMapper(LEGACY_NAME);
private static final NestedPathFieldMapper INSTANCE = new NestedPathFieldMapper(NAME);

public static final TypeParser PARSER = new FixedTypeParser(
c -> c.indexVersionCreated().before(Version.V_2_0_0) ? LEGACY_INSTANCE : INSTANCE
);

/** helper method to create a lucene field based on the opensearch version */
public static Field field(Version version, String path) {
return new Field(name(version), path, Defaults.FIELD_TYPE);
}

/** helper method to create a query based on the opensearch version */
public static Query filter(Version version, String path) {
return new TermQuery(new Term(name(version), new BytesRef(path)));
}

/** field type for the NestPath field */
public static final class NestedPathFieldType extends StringFieldType {
private NestedPathFieldType(String name) {
super(name, true, false, false, TextSearchInfo.SIMPLE_MATCH_ONLY, Collections.emptyMap());
}

@Override
public String typeName() {
return NAME;
}

@Override
public Query existsQuery(QueryShardContext context) {
throw new UnsupportedOperationException("Cannot run exists() query against the nested field path");
}

@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,9 @@

package org.opensearch.index.mapper;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.OpenSearchParseException;
import org.opensearch.Version;
import org.opensearch.common.Explicit;
import org.opensearch.common.Nullable;
import org.opensearch.common.collect.CopyOnWriteHashMap;
Expand Down Expand Up @@ -388,8 +386,7 @@ protected static void parseProperties(ObjectMapper.Builder objBuilder, Map<Strin

private final Nested nested;

private final String nestedTypePathAsString;
private final BytesRef nestedTypePathAsBytes;
private final String nestedTypePath;

private final Query nestedTypeFilter;

Expand Down Expand Up @@ -420,9 +417,13 @@ protected static void parseProperties(ObjectMapper.Builder objBuilder, Map<Strin
} else {
this.mappers = CopyOnWriteHashMap.copyOf(mappers);
}
this.nestedTypePathAsString = "__" + fullPath;
this.nestedTypePathAsBytes = new BytesRef(nestedTypePathAsString);
this.nestedTypeFilter = new TermQuery(new Term(TypeFieldMapper.NAME, nestedTypePathAsBytes));
Version version = Version.indexCreated(settings);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit risky call: when fixing one of the Joda conversions, realized that settings may not hold the index created version all the time [1], may be worth checking here as well.

[1] https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/index/mapper/Mapper.java#L258

Copy link
Collaborator Author

@nknize nknize Apr 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Woah! Did you find this to be true in production or could it have just been a test that didn't set the Version? I don't think we can have a case where an index is created without a version, otherwise rolling upgrades would fail?

Copy link
Collaborator

@reta reta Apr 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is this one https://github.com/opensearch-project/OpenSearch/pull/2094/files, to clarify, it is not caused by absence of the index created but prefabricated index settings which may not include it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just getting back to this. The testDefaultConfig I had in there would've failed since it uses prefabricated empty settings but it was updating w/ different mappings so I added an explicit test based on #1921 to update with the same mappings and Empty settings. There are no issues; we may revisit if this rears it's head again.

if (version.before(Version.V_2_0_0)) {
this.nestedTypePath = "__" + fullPath;
} else {
this.nestedTypePath = fullPath;
}
this.nestedTypeFilter = NestedPathFieldMapper.filter(version, nestedTypePath);
}

@Override
Expand Down Expand Up @@ -486,8 +487,8 @@ public String fullPath() {
return this.fullPath;
}

public String nestedTypePathAsString() {
return nestedTypePathAsString;
public String nestedTypePath() {
return nestedTypePath;
}

public final Dynamic dynamic() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.MetadataFieldMapper;
import org.opensearch.index.mapper.NestedPathFieldMapper;
import org.opensearch.index.mapper.NumberFieldMapper;
import org.opensearch.index.mapper.ObjectMapper;
import org.opensearch.index.mapper.RangeType;
Expand Down Expand Up @@ -184,6 +185,7 @@ private static Map<String, MetadataFieldMapper.TypeParser> initBuiltInMetadataMa
builtInMetadataMappers.put(IndexFieldMapper.NAME, IndexFieldMapper.PARSER);
builtInMetadataMappers.put(DataStreamFieldMapper.NAME, DataStreamFieldMapper.PARSER);
builtInMetadataMappers.put(SourceFieldMapper.NAME, SourceFieldMapper.PARSER);
builtInMetadataMappers.put(NestedPathFieldMapper.NAME, NestedPathFieldMapper.PARSER);
builtInMetadataMappers.put(VersionFieldMapper.NAME, VersionFieldMapper.PARSER);
builtInMetadataMappers.put(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PARSER);
// _field_names must be added last so that it has a chance to see all the other mappers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.opensearch.Version;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.MetadataFieldMapper;
import org.opensearch.index.mapper.NestedPathFieldMapper;
import org.opensearch.plugins.MapperPlugin;

import java.util.Collections;
Expand All @@ -50,6 +51,7 @@ public final class MapperRegistry {

private final Map<String, Mapper.TypeParser> mapperParsers;
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers;
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsersPre20;
private final Function<String, Predicate<String>> fieldFilter;

public MapperRegistry(
Expand All @@ -59,6 +61,9 @@ public MapperRegistry(
) {
this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers));
this.metadataMapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(metadataMapperParsers));
Map<String, MetadataFieldMapper.TypeParser> tempPre20 = new LinkedHashMap<>(metadataMapperParsers);
tempPre20.remove(NestedPathFieldMapper.NAME);
this.metadataMapperParsersPre20 = Collections.unmodifiableMap(tempPre20);
this.fieldFilter = fieldFilter;
}

Expand All @@ -75,7 +80,7 @@ public Map<String, Mapper.TypeParser> getMapperParsers() {
* returned map uses the name of the field as a key.
*/
public Map<String, MetadataFieldMapper.TypeParser> getMetadataMapperParsers(Version indexCreatedVersion) {
return metadataMapperParsers;
return indexCreatedVersion.onOrAfter(Version.V_2_0_0) ? metadataMapperParsers : metadataMapperParsersPre20;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,14 @@ public void testNestedHaveIdAndTypeFields() throws Exception {
assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
assertEquals(Uid.encodeId("1"), result.docs().get(0).getField(IdFieldMapper.NAME).binaryValue());
assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
assertNotNull(result.docs().get(0).getField(NestedPathFieldMapper.NAME));
assertEquals("foo", result.docs().get(0).getField(NestedPathFieldMapper.NAME).stringValue());
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
// Root document:
assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
assertEquals(Uid.encodeId("1"), result.docs().get(1).getField(IdFieldMapper.NAME).binaryValue());
assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
assertNull(result.docs().get(1).getField(NestedPathFieldMapper.NAME));
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ private static ObjectMapper createObjectMapper(String name) {
ObjectMapper.Nested.NO,
ObjectMapper.Dynamic.FALSE,
emptyMap(),
Settings.EMPTY
SETTINGS
);
}

Expand All @@ -232,7 +232,7 @@ private static ObjectMapper createNestedObjectMapper(String name) {
ObjectMapper.Nested.newNested(),
ObjectMapper.Dynamic.FALSE,
emptyMap(),
Settings.EMPTY
SETTINGS
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public void testSingleNested() throws Exception {
);

assertThat(doc.docs().size(), equalTo(2));
assertThat(doc.docs().get(0).get(TypeFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePathAsString()));
assertThat(doc.docs().get(0).get(NestedPathFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePath()));
assertThat(doc.docs().get(0).get("nested1.field1"), equalTo("1"));
assertThat(doc.docs().get(0).get("nested1.field2"), equalTo("2"));

Expand Down Expand Up @@ -180,10 +180,10 @@ public void testSingleNested() throws Exception {
);

assertThat(doc.docs().size(), equalTo(3));
assertThat(doc.docs().get(0).get(TypeFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePathAsString()));
assertThat(doc.docs().get(0).get(NestedPathFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePath()));
assertThat(doc.docs().get(0).get("nested1.field1"), equalTo("1"));
assertThat(doc.docs().get(0).get("nested1.field2"), equalTo("2"));
assertThat(doc.docs().get(1).get(TypeFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePathAsString()));
assertThat(doc.docs().get(1).get(NestedPathFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePath()));
assertThat(doc.docs().get(1).get("nested1.field1"), equalTo("3"));
assertThat(doc.docs().get(1).get("nested1.field2"), equalTo("4"));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.index.IndexableField;
import org.opensearch.common.bytes.BytesArray;
import org.opensearch.common.compress.CompressedXContent;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.test.OpenSearchSingleNodeTestCase;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;

/** tests for {@link org.opensearch.index.mapper.NestedPathFieldMapper} */
public class NestedPathFieldMapperTests extends OpenSearchSingleNodeTestCase {

public void testDefaultConfig() throws IOException {
Settings indexSettings = Settings.EMPTY;
MapperService mapperService = createIndex("test", indexSettings).mapperService();
DocumentMapper mapper = mapperService.merge(
MapperService.SINGLE_MAPPING_NAME,
new CompressedXContent("{\"" + MapperService.SINGLE_MAPPING_NAME + "\":{}}"),
MapperService.MergeReason.MAPPING_UPDATE
);
ParsedDocument document = mapper.parse(new SourceToParse("index", "id", new BytesArray("{}"), XContentType.JSON));
assertEquals(Collections.<IndexableField>emptyList(), Arrays.asList(document.rootDoc().getFields(NestedPathFieldMapper.NAME)));
}

public void testUpdatesWithSameMappings() throws IOException {
Settings indexSettings = Settings.EMPTY;
MapperService mapperService = createIndex("test", indexSettings).mapperService();
DocumentMapper mapper = mapperService.merge(
MapperService.SINGLE_MAPPING_NAME,
new CompressedXContent("{\"" + MapperService.SINGLE_MAPPING_NAME + "\":{}}"),
MapperService.MergeReason.MAPPING_UPDATE
);
mapper.merge(mapper.mapping(), MapperService.MergeReason.MAPPING_UPDATE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.index.IndexService;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.NestedPathFieldMapper;
import org.opensearch.index.query.MatchAllQueryBuilder;
import org.opensearch.index.query.NestedQueryBuilder;
import org.opensearch.index.query.QueryShardContext;
Expand Down Expand Up @@ -324,7 +325,7 @@ public void testNested() throws IOException {

Query expectedChildQuery = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), Occur.MUST)
// we automatically add a filter since the inner query might match non-nested docs
.add(new TermQuery(new Term("_type", "__nested1")), Occur.FILTER)
.add(new TermQuery(new Term(NestedPathFieldMapper.NAME, "nested1")), Occur.FILTER)
.build();
assertEquals(expectedChildQuery, query.getChildQuery());

Expand Down Expand Up @@ -352,7 +353,7 @@ public void testNested() throws IOException {

// we need to add the filter again because of include_in_parent
expectedChildQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("nested2.foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("_type", "__nested2")), Occur.FILTER)
.add(new TermQuery(new Term(NestedPathFieldMapper.NAME, "nested2")), Occur.FILTER)
.build();
assertEquals(expectedChildQuery, query.getChildQuery());

Expand All @@ -367,7 +368,7 @@ public void testNested() throws IOException {

// we need to add the filter again because of include_in_root
expectedChildQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("nested3.foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("_type", "__nested3")), Occur.FILTER)
.add(new TermQuery(new Term(NestedPathFieldMapper.NAME, "nested3")), Occur.FILTER)
.build();
assertEquals(expectedChildQuery, query.getChildQuery());

Expand Down
Loading