Skip to content

Commit

Permalink
Allow missing semantic text field in bulk updates
Browse files Browse the repository at this point in the history
This update enables bulk update operations to succeed even if the semantic text field is absent in the partial update.
For the simple case where the field isn’t referenced by a copy_to operation from another source, the inference can be safely bypassed, allowing the update to proceed without errors.
  • Loading branch information
jimczi committed Nov 8, 2024
1 parent 8cc2801 commit 78a5f14
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ public Set<NodeFeature> getFeatures() {

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX);
return Set.of(
SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX,
SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.inference.ChunkedInferenceServiceResults;
import org.elasticsearch.inference.ChunkingOptions;
import org.elasticsearch.inference.InferenceService;
Expand Down Expand Up @@ -446,10 +447,12 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu
String field = entry.getName();
String inferenceId = entry.getInferenceId();
var originalFieldValue = XContentMapValues.extractValue(field, docMap);
if (originalFieldValue instanceof Map) {
if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) {
// Inference has already been computed, or there is no inference required.
continue;
}
int order = 0;
List<FieldInferenceRequest> fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>());
for (var sourceField : entry.getSourceFields()) {
boolean isOriginalFieldInput = sourceField.equals(field);
var valueObj = XContentMapValues.extractValue(sourceField, docMap);
Expand All @@ -476,7 +479,6 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu
addInferenceResponseFailure(item.id(), exc);
break;
}
List<FieldInferenceRequest> fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>());
for (var v : values) {
fieldRequests.add(new FieldInferenceRequest(itemIndex, field, v, order++, isOriginalFieldInput));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2");
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");

public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");

public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;

Expand Down Expand Up @@ -679,9 +681,9 @@ public static void insertValue(String path, Map<?, ?> map, Object newValue) {
}
}

private record SuffixMap(String suffix, Map<String, Object> map) {}
public record SuffixMap(String suffix, Map<String, Object> map) {}

private static List<SuffixMap> extractSuffixMaps(String[] pathElements, int index, Object currentValue) {
public static List<SuffixMap> extractSuffixMaps(String[] pathElements, int index, Object currentValue) {
if (currentValue instanceof List<?> valueList) {
List<SuffixMap> suffixMaps = new ArrayList<>(valueList.size());
for (Object o : valueList) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -610,3 +610,59 @@ setup:
- exists: _source.dense_field.inference.chunks.0.embeddings
- match: { _source.dense_field.inference.chunks.0.text: "another updated inference test" }
- match: { _source.non_inference_field: "updated non inference test" }

---
"Bypass inference on bulk update operation":
- requires:
cluster_features: semantic_text.single_field_update_fix
reason: Standalone semantic text fields are now optional in a bulk update operation

# Update as upsert
- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "sparse_field": "inference test", "dense_field": "another inference test", "non_inference_field": "non inference test" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "created" }

- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "non_inference_field": "another value" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "updated" }

- do:
get:
index: test-index
id: doc_1

- match: { _source.sparse_field.text: "inference test" }
- exists: _source.sparse_field.inference.chunks.0.embeddings
- match: { _source.sparse_field.inference.chunks.0.text: "inference test" }
- match: { _source.dense_field.text: "another inference test" }
- exists: _source.dense_field.inference.chunks.0.embeddings
- match: { _source.dense_field.inference.chunks.0.text: "another inference test" }
- match: { _source.non_inference_field: "non inference test" }

- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "sparse_field": null, "dense_field": null, "non_inference_field": "updated value" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "updated" }

- do:
get:
index: test-index
id: doc_1

- match: { _source.sparse_field: null }
- match: { _source.dense_field: null }
- match: { _source.non_inference_field: "updated value" }

0 comments on commit 78a5f14

Please sign in to comment.