Skip to content

Commit

Permalink
Unit test and code refactor
Browse files Browse the repository at this point in the history
Signed-off-by: Rishabh Maurya <[email protected]>
  • Loading branch information
rishabhmaurya committed May 7, 2024
1 parent d0aa5c0 commit 25fecf3
Show file tree
Hide file tree
Showing 5 changed files with 370 additions and 158 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,71 @@

package org.opensearch.index.mapper;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.common.regex.Regex;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.script.Script;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;

@PublicApi(since = "2.14.0")
@PublicApi(since = "2.15.0")
public class DerivedFieldResolver {
private final QueryShardContext queryShardContext;
private final Map<String, MappedFieldType> derivedFieldTypeMap = new ConcurrentHashMap<>();
private final FieldTypeInference typeInference;
private static final Logger logger = LogManager.getLogger(DerivedFieldResolver.class);

public DerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields
) {
this(
queryShardContext,
derivedFieldsObject,
derivedFields,
new FieldTypeInference(
queryShardContext.index().getName(),
queryShardContext.getMapperService(),
queryShardContext.getIndexReader()
)
);
}

public DerivedFieldResolver(
QueryShardContext queryShardContext,
Map<String, Object> derivedFieldsObject,
List<DerivedField> derivedFields,
FieldTypeInference typeInference
) {
this.queryShardContext = queryShardContext;
initializeDerivedFieldTypes(derivedFieldsObject);
initializeDerivedFieldTypesFromList(derivedFields);
this.typeInference = typeInference;
}

private void initializeDerivedFieldTypes(Map<String, Object> derivedFieldsObject) {
if (derivedFieldsObject != null) {
Map<String, Object> derivedFieldObject = new HashMap<>();
derivedFieldObject.put(DerivedFieldMapper.CONTENT_TYPE, derivedFieldsObject);
derivedFieldTypeMap.putAll(getAllDerivedFieldTypeFromObject(derivedFieldObject));
}
}

private void initializeDerivedFieldTypesFromList(List<DerivedField> derivedFields) {
if (derivedFields != null) {
for (DerivedField derivedField : derivedFields) {
derivedFieldTypeMap.put(derivedField.getName(), getDerivedFieldType(derivedField));
}
}
this.typeInference = new FieldTypeInference(queryShardContext);
}

public Set<String> resolvePattern(String pattern) {
Expand All @@ -65,60 +86,69 @@ public Set<String> resolvePattern(String pattern) {
}

public MappedFieldType resolve(String fieldName) {
if (derivedFieldTypeMap.containsKey(fieldName)) {
return derivedFieldTypeMap.get(fieldName);
MappedFieldType fieldType = derivedFieldTypeMap.get(fieldName);
if (fieldType != null) {
return fieldType;
}
MappedFieldType derivedFieldType = queryShardContext.getMapperService().fieldType(fieldName);
if (derivedFieldType != null) {
return derivedFieldType;

fieldType = queryShardContext.getMapperService().fieldType(fieldName);
if (fieldType != null) {
return fieldType;
}

if (fieldName.contains(".")) {
DerivedFieldType parentDerivedField = getParentDerivedField(fieldName);
if (parentDerivedField == null) {
return null;
}
String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1);
Mapper inferredFieldMapper = typeInference.infer(subFieldName, parentDerivedField.derivedField.getScript());
Mapper.BuilderContext builderContext = new Mapper.BuilderContext(
this.queryShardContext.getMapperService().getIndexSettings().getSettings(),
new ContentPath(1)
);
derivedFieldType = new DerivedObjectFieldType(
new DerivedField(
fieldName,
inferredFieldMapper.typeName(),
parentDerivedField.derivedField.getScript(),
parentDerivedField.derivedField.getSourceIndexedField()
),
DerivedFieldSupportedTypes.getFieldMapperFromType(
inferredFieldMapper.typeName(),
fieldName,
builderContext,
queryShardContext.getIndexAnalyzers()
),
DerivedFieldSupportedTypes.getIndexableFieldGeneratorType(inferredFieldMapper.typeName(), fieldName),
queryShardContext.getIndexAnalyzers()
);
if (derivedFieldType != null) {
derivedFieldTypeMap.put(fieldName, derivedFieldType);
}
return derivedFieldType;
return resolveNestedField(fieldName);
}
return null;
}

private MappedFieldType resolveNestedField(String fieldName) {
DerivedFieldType parentDerivedField = getParentDerivedField(fieldName);
if (parentDerivedField == null) {
return null;
}
ValueFetcher valueFetcher = getValueFetcher(fieldName, parentDerivedField.derivedField.getScript());
Mapper inferredFieldMapper;
try {
inferredFieldMapper = typeInference.infer(valueFetcher);
} catch (IOException e) {
logger.warn(e);
return null;
}
if (inferredFieldMapper == null) {
return null;
}
return getDerivedFieldType(
new DerivedField(
fieldName,
inferredFieldMapper.typeName(),
parentDerivedField.derivedField.getScript(),
parentDerivedField.derivedField.getSourceIndexedField()
)
);
}

private DerivedFieldType getParentDerivedField(String fieldName) {
String parentFieldName = fieldName.split("\\.")[0];
DerivedFieldType parentDerivedFieldType = (DerivedFieldType) derivedFieldTypeMap.get(parentFieldName);
if (parentDerivedFieldType == null) {
parentDerivedFieldType = (DerivedFieldType) this.queryShardContext.getMapperService().fieldType(parentFieldName);
parentDerivedFieldType = (DerivedFieldType) queryShardContext.getMapperService().fieldType(parentFieldName);
}
return parentDerivedFieldType;
}

private ValueFetcher getValueFetcher(String fieldName, Script script) {
String subFieldName = fieldName.substring(fieldName.indexOf(".") + 1);
return new DerivedObjectFieldType.DerivedObjectFieldValueFetcher(
subFieldName,
DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()),
o -> o // raw object returned will be used to infer the type without modifying it
);
}

private Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject(Map<String, Object> derivedFieldObject) {
Map<String, DerivedFieldType> derivedFieldTypes = new HashMap<>();
DocumentMapper documentMapper = this.queryShardContext.getMapperService()
DocumentMapper documentMapper = queryShardContext.getMapperService()
.documentMapperParser()
.parse(DerivedFieldMapper.CONTENT_TYPE, derivedFieldObject);
if (documentMapper != null && documentMapper.mappers() != null) {
Expand All @@ -134,110 +164,13 @@ private Map<String, DerivedFieldType> getAllDerivedFieldTypeFromObject(Map<Strin

private DerivedFieldType getDerivedFieldType(DerivedField derivedField) {
Mapper.BuilderContext builderContext = new Mapper.BuilderContext(
this.queryShardContext.getMapperService().getIndexSettings().getSettings(),
queryShardContext.getMapperService().getIndexSettings().getSettings(),
new ContentPath(1)
);
DerivedFieldMapper.Builder builder = new DerivedFieldMapper.Builder(
derivedField,
this.queryShardContext.getMapperService().getIndexAnalyzers()
queryShardContext.getMapperService().getIndexAnalyzers()
);
return builder.build(builderContext).fieldType();
}

static class FieldTypeInference {
private final QueryShardContext queryShardContext;

public FieldTypeInference(QueryShardContext queryShardContext) {
this.queryShardContext = queryShardContext;
}

public Mapper infer(String name, Script script) {
try {
DerivedObjectFieldType.DerivedObjectFieldValueFetcher valueFetcher =
new DerivedObjectFieldType.DerivedObjectFieldValueFetcher(
name,
DerivedFieldType.getDerivedFieldLeafFactory(script, queryShardContext, queryShardContext.lookup()),
o -> o // raw object returned will be used to infer the type without modifying it
);
int iter = 0;
int totalDocs = queryShardContext.getIndexReader().numDocs();
// this will lead to the probability of more than 0.95 to select on the document containing this field,
// when at least 5% of the overall documents contain the field
int limit = Math.min(totalDocs, 50);
int[] docs = getSortedRandomNum(limit, totalDocs, 10000);
int offset = 0;
int leaf = 0;
LeafReaderContext leafReaderContext = queryShardContext.getIndexReader().leaves().get(leaf);
valueFetcher.setNextReader(leafReaderContext);
SourceLookup sourceLookup = new SourceLookup();
while (iter < limit) {
int docID = docs[iter] - offset;
if (docID >= leafReaderContext.reader().numDocs()) {
leaf++;
offset += leafReaderContext.reader().numDocs();
docID = docs[iter] - offset;
leafReaderContext = queryShardContext.getIndexReader().leaves().get(leaf);
valueFetcher.setNextReader(leafReaderContext);
}
sourceLookup.setSegmentAndDocument(leafReaderContext, docID);
List<Object> objects = valueFetcher.fetchValues(sourceLookup);
Mapper inferredMapper = inferTypeFromObject(name, objects.get(0));
if (inferredMapper == null) {
iter++;
continue;
}
return inferredMapper;
}
} catch (IllegalArgumentException e) {
// TODO remove illegal argument exception from DerivedFieldSupportedTypes and let consumers handle themselves.
// If inference didn't work, defaulting to keyword field type
// TODO: warning?
} catch (IOException e) {
throw new RuntimeException(e);
}
// the field isn't found in documents where it was checked
// TODO: should fallback to keyword field type>
throw new MapperException(
"Unable to infer the derived field ["
+ name
+ "] within object type. "
+ "Ensure the field is present in majority of the documents"
);
}

public static int[] getSortedRandomNum(int k, int n, int attempts) {

Set<Integer> generatedNumbers = new HashSet<>();
Random random = new Random();
int itr = 0;
while (generatedNumbers.size() < k && itr++ < attempts) {
int randomNumber = random.nextInt(n);
generatedNumbers.add(randomNumber);
}
int[] result = new int[k];
int i = 0;
for (int number : generatedNumbers) {
result[i++] = number;
}
Arrays.sort(result);
return result;
}

private Mapper inferTypeFromObject(String name, Object o) throws IOException {
// TODO error handling - 1. missing value? 2. Multi-valued field?
if (o == null) {
return null;
}
DocumentMapper mapper = queryShardContext.getMapperService().documentMapper();
SourceToParse sourceToParse = new SourceToParse(
queryShardContext.index().getName(),
"_id",
BytesReference.bytes(jsonBuilder().startObject().field(name, o).endObject()),
JsonXContent.jsonXContent.mediaType()
);
ParsedDocument parsedDocument = mapper.parse(sourceToParse);
Mapping mapping = parsedDocument.dynamicMappingsUpdate();
return mapping.root.getMapper(name);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ public DerivedFieldValueFetcher valueFetcher(QueryShardContext context, SearchLo
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats.");
}
Function<Object, Object> valueForDisplay = DerivedFieldSupportedTypes.getValueForDisplayGenerator(getType());
String subFieldName = name().substring(name().indexOf(".") + 1);
return new DerivedObjectFieldValueFetcher(
getSubField(),
subFieldName,
getDerivedFieldLeafFactory(derivedField.getScript(), context, searchLookup == null ? context.lookup() : searchLookup),
valueForDisplay
);
Expand All @@ -80,13 +81,23 @@ public List<Object> fetchValuesInternal(SourceLookup lookup) {
List<Object> result = new ArrayList<>();
for (Object o : jsonObjects) {
Map<String, Object> s = XContentHelper.convertToMap(JsonXContent.jsonXContent, (String) o, false);
result.add(s.get(subField));
result.add(getNestedField(s, subField));
}
return result;
}
}

private String getSubField() {
return name().split("\\.")[1];
private static Object getNestedField(Map<String, Object> obj, String key) {
String[] keyParts = key.split("\\.");
Map<String, Object> currentObj = obj;
for (int i = 0; i < keyParts.length - 1; i++) {
Object value = currentObj.get(keyParts[i]);
if (value instanceof Map) {
currentObj = (Map<String, Object>) value;
} else {
return null;
}
}
return currentObj.get(keyParts[keyParts.length - 1]);
}
}
}
Loading

0 comments on commit 25fecf3

Please sign in to comment.