Skip to content

Commit

Permalink
Refatcor the semantic_text to register its sub fields in the mapping …
Browse files Browse the repository at this point in the history
…instead of re-creating them each time.
  • Loading branch information
jimczi committed Mar 20, 2024
1 parent 64e8e43 commit 1c18fbc
Show file tree
Hide file tree
Showing 10 changed files with 399 additions and 171 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ public static Map<String, Object> nodeMapValue(Object node, String desc) {
if (node instanceof Map) {
return (Map<String, Object>) node;
} else {
throw new ElasticsearchParseException(desc + " should be a hash but was of type: " + node.getClass());
throw new ElasticsearchParseException(desc + " should be a map but was of type: " + node.getClass());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public CompressedXContent toCompressedXContent() {
/**
* Returns the root object for the current mapping
*/
RootObjectMapper getRoot() {
public RootObjectMapper getRoot() {
return root;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.DocumentParsingException;
import org.elasticsearch.index.mapper.FieldMapper;
Expand Down Expand Up @@ -172,71 +173,60 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
}
}

private SemanticTextFieldMapper updateSemanticTextFieldMapper(
private NestedObjectMapper updateSemanticTextFieldMapper(
DocumentParserContext docContext,
MapperBuilderContext mapperBuilderContext,
ObjectMapper parent,
SemanticTextFieldMapper original,
SemanticTextModelSettings modelSettings,
XContentLocation xContentLocation
) {
if (modelSettings.inferenceId().equals(original.fieldType().getInferenceId()) == false) {
throw new DocumentParsingException(
xContentLocation,
"Model settings for field ["
+ original.fieldType().name()
+ "] is already set to ["
+ original.fieldType().getInferenceId()
+ "], got ["
+ modelSettings.inferenceId()
+ "]"
Strings.format(
"The configured %s [%s] for field [%s] doesn't match the %s [%s] reported in the document.",
SemanticTextModelSettings.INFERENCE_ID_FIELD.getPreferredName(),
modelSettings.inferenceId(),
original.name(),
SemanticTextModelSettings.INFERENCE_ID_FIELD.getPreferredName(),
modelSettings.inferenceId()
)
);
}
if (modelSettings.taskType() == TaskType.TEXT_EMBEDDING && modelSettings.dimensions() == null) {
throw new DocumentParsingException(
xContentLocation,
"Model settings for field [" + original.fieldType().name() + "] must contain dimensions"
"Model settings for field [" + original.name() + "] must contain dimensions"
);
}

if (original.getModelSettings() == null) {
if (parent != docContext.root()) {
mapperBuilderContext = mapperBuilderContext.createChildContext(parent.name(), ObjectMapper.Dynamic.FALSE);
}
SemanticTextFieldMapper newMapper = new SemanticTextFieldMapper.Builder(
original.name(),
original.simpleName(),
docContext.indexSettings().getIndexVersionCreated(),
docContext.indexAnalyzers()
).setModelId(modelSettings.inferenceId()).setModelSettings(modelSettings).build(mapperBuilderContext);
).setInferenceId(modelSettings.inferenceId()).setModelSettings(modelSettings).build(mapperBuilderContext);
docContext.addDynamicMapper(newMapper);
return newMapper;
return newMapper.getSubMappers();
} else {
var conflicts = new Conflicts(original.name());
SemanticTextModelSettings.checkCompatibility(original.getModelSettings(), modelSettings, conflicts);
SemanticTextFieldMapper.Conflicts conflicts = new Conflicts(original.name());
SemanticTextFieldMapper.canMergeModelSettings(original.getModelSettings(), modelSettings, conflicts);
try {
conflicts.check();
} catch (Exception exc) {
throw new DocumentParsingException(xContentLocation, "Failed to update field [" + original.name() + "]", exc);
}
}
return original;
}

private record FieldMapperAndParent(ObjectMapper parent, Mapper mapper) {}

private FieldMapperAndParent findFieldMapper(ObjectMapper mapper, String fullName) {
String[] pathElements = fullName.split("\\.");
for (int i = 0; i < pathElements.length - 1; i++) {
Mapper next = mapper.getMapper(pathElements[i]);
if (next == null || next instanceof ObjectMapper == false) {
return null;
throw new DocumentParsingException(xContentLocation, "Incompatible model_settings", exc);
}
mapper = (ObjectMapper) next;
}
return new FieldMapperAndParent(mapper, mapper.getMapper(pathElements[pathElements.length - 1]));
return original.getSubMappers();
}

@SuppressWarnings("unchecked")
private void parseSingleField(DocumentParserContext context, MapperBuilderContext mapperBuilderContext) throws IOException {
XContentParser parser = context.parser();
String fieldName = parser.currentName();
var res = findFieldMapper(context.root(), fieldName);
var res = findMapper(context.mappingLookup().getMapping().getRoot(), fieldName);
if (res == null || res.mapper == null || res.mapper instanceof SemanticTextFieldMapper == false) {
throw new DocumentParsingException(
parser.getTokenLocation(),
Expand All @@ -245,28 +235,59 @@ private void parseSingleField(DocumentParserContext context, MapperBuilderContex
}
parser.nextToken();
failIfTokenIsNot(parser.getTokenLocation(), parser, XContentParser.Token.START_OBJECT);
XContentLocation xContentLocation = parser.getTokenLocation();

// record the location of the inference field in the original source
XContentLocation xContentLocation = parser.getTokenLocation();
// parse eagerly to extract the model settings first
Map<String, Object> map = parser.mapOrdered();
Map<String, String> modelSettingsMap = (Map<String, String>) map.remove(SemanticTextModelSettings.NAME);
var modelSettings = SemanticTextModelSettings.parse(
XContentHelper.mapToXContentParser(XContentParserConfiguration.EMPTY, modelSettingsMap)
);
var fieldMapper = updateSemanticTextFieldMapper(
Object modelSettingsObj = map.remove(SemanticTextModelSettings.NAME);
if (modelSettingsObj == null) {
throw new DocumentParsingException(
parser.getTokenLocation(),
Strings.format(
"Missing required [%s] for field [%s] of type [%s]",
SemanticTextModelSettings.NAME,
fieldName,
SemanticTextFieldMapper.CONTENT_TYPE
)
);
}
Map<String, Object> modelSettingsMap = XContentMapValues.nodeMapValue(modelSettingsObj, "model_settings");
final SemanticTextModelSettings modelSettings;
try {
modelSettings = SemanticTextModelSettings.parse(
XContentHelper.mapToXContentParser(XContentParserConfiguration.EMPTY, modelSettingsMap)
);
} catch (Exception exc) {
throw new DocumentParsingException(
xContentLocation,
Strings.format(
"Error parsing [%s] for field [%s] of type [%s]",
SemanticTextModelSettings.NAME,
fieldName,
SemanticTextFieldMapper.CONTENT_TYPE
),
exc
);
}
var nestedObjectMapper = updateSemanticTextFieldMapper(
context,
mapperBuilderContext,
res.parent,
(SemanticTextFieldMapper) res.mapper,
modelSettings,
xContentLocation
);

// we know the model settings, so we can (re) parse the results array now
XContentParser subParser = new MapXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
map,
XContentType.JSON
);
DocumentParserContext mapContext = context.switchParser(subParser);
parseFieldInferenceObject(xContentLocation, subParser, mapContext, fieldMapper.getNestedField());
parseFieldInferenceObject(xContentLocation, subParser, mapContext, nestedObjectMapper);
}

private void parseFieldInferenceObject(
Expand Down Expand Up @@ -312,9 +333,16 @@ private void parseResultsObject(
visited.add(parser.currentName());
FieldMapper fieldMapper = (FieldMapper) nestedMapper.getMapper(parser.currentName());
if (fieldMapper == null) {
logger.debug("Skipping indexing of unrecognized field name [" + parser.currentName() + "]");
advancePastCurrentFieldName(xContentLocation, parser);
continue;
if (REQUIRED_SUBFIELDS.contains(parser.currentName())) {
throw new DocumentParsingException(
xContentLocation,
"Missing sub-fields definition for [" + parser.currentName() + "]"
);
} else {
logger.debug("Skipping indexing of unrecognized field name [" + parser.currentName() + "]");
advancePastCurrentFieldName(xContentLocation, parser);
continue;
}
}
parser.nextToken();
fieldMapper.parse(context);
Expand Down Expand Up @@ -382,4 +410,18 @@ public static void applyFieldInference(
fieldMap.put(InferenceMetadataFieldMapper.RESULTS, chunks);
inferenceMap.put(field, fieldMap);
}

record MapperAndParent(ObjectMapper parent, Mapper mapper) {}

static MapperAndParent findMapper(ObjectMapper mapper, String fullPath) {
String[] pathElements = fullPath.split("\\.");
for (int i = 0; i < pathElements.length - 1; i++) {
Mapper next = mapper.getMapper(pathElements[i]);
if (next == null || next instanceof ObjectMapper == false) {
return null;
}
mapper = (ObjectMapper) next;
}
return new MapperAndParent(mapper, mapper.getMapper(pathElements[pathElements.length - 1]));
}
}
Loading

0 comments on commit 1c18fbc

Please sign in to comment.