Skip to content

Commit

Permalink
javadoc and code refactor
Browse files Browse the repository at this point in the history
Signed-off-by: Rishabh Maurya <[email protected]>
  • Loading branch information
rishabhmaurya committed May 7, 2024
1 parent 5f5f54d commit 2f45c5a
Show file tree
Hide file tree
Showing 14 changed files with 930 additions and 221 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.common.regex.Regex;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.script.Script;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

/**
* Accepts definition of DerivedField from search request in both forms: map parsed from SearchRequest and {@link DerivedField} defined using client.
* The object is initialized per search request and is responsible to resolve {@link DerivedFieldType} given a field name.
* It uses {@link FieldTypeInference} to infer field type for a nested field within DerivedField of {@link DerivedFieldSupportedTypes#OBJECT} type.
*/
public class DefaultDerivedFieldResolver implements DerivedFieldResolver {
private final QueryShardContext queryShardContext;
private final Map<String, DerivedFieldType> derivedFieldTypeMap = new ConcurrentHashMap<>();
private final FieldTypeInference typeInference;
private static final Logger logger = LogManager.getLogger(DefaultDerivedFieldResolver.class);

public DefaultDerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields
) {
this(
queryShardContext,
derivedFieldsObject,
derivedFields,
new FieldTypeInference(
queryShardContext.index().getName(),
queryShardContext.getMapperService(),
queryShardContext.getIndexReader()
)
);
}

public DefaultDerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields,
FieldTypeInference typeInference
) {
this.queryShardContext = queryShardContext;
initDerivedFieldTypes(derivedFieldsObject, derivedFields);
this.typeInference = typeInference;
}

@Override
public Set<String> resolvePattern(String pattern) {
Set<String> derivedFields = new HashSet<>();
for (MappedFieldType fieldType : queryShardContext.getMapperService().fieldTypes()) {
if (Regex.simpleMatch(pattern, fieldType.name()) && fieldType instanceof DerivedFieldType) {
derivedFields.add(fieldType.name());
}
}
for (String fieldName : derivedFieldTypeMap.keySet()) {
if (Regex.simpleMatch(pattern, fieldName)) {
derivedFields.add(fieldName);
}
}
return derivedFields;
}

/**
* Resolves the fieldName. The search request definitions are given precedence over derived fields definitions in the index mapping.
* It caches the response for previously resolved field names
* @param fieldName name of the field. It also accepts nested derived field
* @return DerivedFieldType if resolved successfully, a null otherwise.
*/
@Override
public DerivedFieldType resolve(String fieldName) {
return Optional.ofNullable(resolveUsingSearchDefinitions(fieldName))
.orElseGet(() -> (DerivedFieldType) queryShardContext.getMapperService().fieldType(fieldName));
}

private DerivedFieldType resolveUsingSearchDefinitions(String fieldName) {
if (derivedFieldTypeMap.containsKey(fieldName)) {
return derivedFieldTypeMap.get(fieldName);
}
DerivedFieldType resolvedNestedType = resolveNestedField(fieldName);
if (resolvedNestedType != null) {
derivedFieldTypeMap.put(fieldName, resolvedNestedType);
}
return resolvedNestedType;
}

private DerivedFieldType resolveNestedField(String fieldName) {
DerivedFieldType parentDerivedField = (DerivedFieldType) getParentDerivedField(fieldName);
if (parentDerivedField != null) {
try {
Script script = parentDerivedField.derivedField.getScript();
Mapper inferredFieldMapper = typeInference.infer(getValueFetcher(fieldName, script));
if (inferredFieldMapper != null) {
return getDerivedFieldType(
new DerivedField(
fieldName,
inferredFieldMapper.typeName(),
script,
parentDerivedField.derivedField.getSourceIndexedField()
)
);
} else {
logger.warn("Field type cannot be inferred. Ensure the field {} is not rare across entire index", fieldName);
}
} catch (IOException e) {
logger.warn(e);
}
}
return null;
}

private MappedFieldType getParentDerivedField(String fieldName) {
if (fieldName.contains(".")) {
return resolve(fieldName.split("\\.")[0]);
}
return null;
}

ValueFetcher getValueFetcher(String fieldName, Script script) {
String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1);
return new ObjectDerivedFieldType.ObjectDerivedFieldValueFetcher(
subFieldName,
DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()),
o -> o // raw object returned will be used to infer the type without modifying it
);
}

private void initDerivedFieldTypes(Map<String, Object> derivedFieldsObject, List<DerivedField> derivedFields) {
if (derivedFieldsObject != null && !derivedFieldsObject.isEmpty()) {
Map<String, Object> derivedFieldObject = new HashMap<>();
derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject);
derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject));
}
if (derivedFields != null) {
for (DerivedField derivedField : derivedFields) {
derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField));
}
}
}

private Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject(Map<String, Object> derivedFieldObject) {
Map<String, DerivedFieldType> derivedFieldTypes = new HashMap<>();
DocumentMapper documentMapper = queryShardContext.getMapperService()
.documentMapperParser()
.parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject);
if (documentMapper != null && documentMapper.mappers() != null) {
for (Mapper mapper : documentMapper.mappers()) {
if (mapper instanceof DerivedFieldMapper) {
DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType();
derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType);
}
}
}
return derivedFieldTypes;
}

private DerivedFieldType getDerivedFieldType(DerivedField derivedField) {
Mapper.BuilderContext builderContext = new Mapper.BuilderContext(
queryShardContext.getMapperService().getIndexSettings().getSettings(),
new ContentPath(1)
);
DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder(
derivedField,
queryShardContext.getMapperService().getIndexAnalyzers()
);
return builder.build(builderContext).fieldType();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public DerivedFieldMapper build(BuilderContext context) {
);
DerivedFieldType ft;
if (name.contains(".")) {
ft = new DerivedObjectFieldType(
ft = new ObjectDerivedFieldType(
new DerivedField(buildFullName(context), type.getValue(), script.getValue(), sourceIndexedField.getValue()),
fieldMapper,
fieldFunction,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,169 +8,29 @@

package org.opensearch.index.mapper;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.common.regex.Regex;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.script.Script;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

/**
* DerivedFieldResolver is used as a lookup to resolve derived fields from their name.
* It is created per search request and needs to be set at {@link org.opensearch.index.query.QueryShardContext#setDerivedFieldResolver(DerivedFieldResolver)}
* for derived fields resolution.
*/
@PublicApi(since = "2.15.0")
public class DerivedFieldResolver {
private final QueryShardContext queryShardContext;
private final Map<String, MappedFieldType> derivedFieldTypeMap = new ConcurrentHashMap<>();
private final FieldTypeInference typeInference;
private static final Logger logger = LogManager.getLogger(DerivedFieldResolver.class);

public DerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields
) {
this(
queryShardContext,
derivedFieldsObject,
derivedFields,
new FieldTypeInference(
queryShardContext.index().getName(),
queryShardContext.getMapperService(),
queryShardContext.getIndexReader()
)
);
}

public DerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields,
FieldTypeInference typeInference
) {
this.queryShardContext = queryShardContext;
initializeDerivedFieldTypes(derivedFieldsObject);
initializeDerivedFieldTypesFromList(derivedFields);
this.typeInference = typeInference;
}

private void initializeDerivedFieldTypes(Map<String, Object> derivedFieldsObject) {
if (derivedFieldsObject != null) {
Map<String, Object> derivedFieldObject = new HashMap<>();
derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject);
derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject));
}
}

private void initializeDerivedFieldTypesFromList(List<DerivedField> derivedFields) {
if (derivedFields != null) {
for (DerivedField derivedField : derivedFields) {
derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField));
}
}
}

public Set<String> resolvePattern(String pattern) {
Set<String> matchingDerivedFields = new HashSet<>();
for (String fieldName : derivedFieldTypeMap.keySet()) {
if (!matchingDerivedFields.contains(fieldName) && Regex.simpleMatch(pattern, fieldName)) {
matchingDerivedFields.add(fieldName);
}
}
return matchingDerivedFields;
}

public MappedFieldType resolve(String fieldName) {
MappedFieldType fieldType = derivedFieldTypeMap.get(fieldName);
if (fieldType != null) {
return fieldType;
}

fieldType = queryShardContext.getMapperService().fieldType(fieldName);
if (fieldType != null) {
return fieldType;
}

if (fieldName.contains(".")) {
return resolveNestedField(fieldName);
}
return null;
}

private MappedFieldType resolveNestedField(String fieldName) {
DerivedFieldType parentDerivedField = getParentDerivedField(fieldName);
if (parentDerivedField == null) {
return null;
}
ValueFetcher valueFetcher = getValueFetcher(fieldName, parentDerivedField.derivedField.getScript());
Mapper inferredFieldMapper;
try {
inferredFieldMapper = typeInference.infer(valueFetcher);
} catch (IOException e) {
logger.warn(e);
return null;
}
if (inferredFieldMapper == null) {
return null;
}
return getDerivedFieldType(
new DerivedField(
fieldName,
inferredFieldMapper.typeName(),
parentDerivedField.derivedField.getScript(),
parentDerivedField.derivedField.getSourceIndexedField()
)
);
}

private DerivedFieldType getParentDerivedField(String fieldName) {
String parentFieldName = fieldName.split("\\.")[0];
DerivedFieldType parentDerivedFieldType = (DerivedFieldType) derivedFieldTypeMap.get(parentFieldName);
if (parentDerivedFieldType == null) {
parentDerivedFieldType = (DerivedFieldType) queryShardContext.getMapperService().fieldType(parentFieldName);
}
return parentDerivedFieldType;
}

private ValueFetcher getValueFetcher(String fieldName, Script script) {
String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1);
return new DerivedObjectFieldType.DerivedObjectFieldValueFetcher(
subFieldName,
DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()),
o -> o // raw object returned will be used to infer the type without modifying it
);
}

private Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject(Map<String, Object> derivedFieldObject) {
Map<String, DerivedFieldType> derivedFieldTypes = new HashMap<>();
DocumentMapper documentMapper = queryShardContext.getMapperService()
.documentMapperParser()
.parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject);
if (documentMapper != null && documentMapper.mappers() != null) {
for (Mapper mapper : documentMapper.mappers()) {
if (mapper instanceof DerivedFieldMapper) {
DerivedFieldType derivedFieldType = ((DerivedFieldMapper) mapper).fieldType();
derivedFieldTypes.put(derivedFieldType.name(), derivedFieldType);
}
}
}
return derivedFieldTypes;
}

private DerivedFieldType getDerivedFieldType(DerivedField derivedField) {
Mapper.BuilderContext builderContext = new Mapper.BuilderContext(
queryShardContext.getMapperService().getIndexSettings().getSettings(),
new ContentPath(1)
);
DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder(
derivedField,
queryShardContext.getMapperService().getIndexAnalyzers()
);
return builder.build(builderContext).fieldType();
}
public interface DerivedFieldResolver {
/**
* Resolves all derived fields matching a given pattern. It includes derived fields defined both in search requests
* and index mapping.
* @param pattern regex pattern
* @return all derived fields matching the pattern
*/
Set<String> resolvePattern(String pattern);

/**
* Resolves the MappedFieldType associated with a derived field
* @param fieldName field name to lookup
* @return mapped field type
*/
MappedFieldType resolve(String fieldName);
}
Loading

0 comments on commit 2f45c5a

Please sign in to comment.