Skip to content

Commit

Permalink
Allow missing semantic text field in bulk updates (elastic#116478)
Browse files Browse the repository at this point in the history
This update enables bulk update operations to succeed even if the semantic text field is absent in the partial update.
For the simple case where the field isn’t referenced by a copy_to operation from another source, the inference can be safely bypassed, allowing the update to proceed without errors.
  • Loading branch information
jimczi committed Nov 8, 2024
1 parent fc120f7 commit 40e096a
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 2 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/116478.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116478
summary: Semantic text simple partial update
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ public Set<NodeFeature> getFeatures() {

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX);
return Set.of(
SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX,
SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,8 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu
String field = entry.getName();
String inferenceId = entry.getInferenceId();
var originalFieldValue = XContentMapValues.extractValue(field, docMap);
if (originalFieldValue instanceof Map) {
if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) {
// Inference has already been computed, or there is no inference required.
continue;
}
int order = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2");
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");

public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");

public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -610,3 +610,59 @@ setup:
- exists: _source.dense_field.inference.chunks.0.embeddings
- match: { _source.dense_field.inference.chunks.0.text: "another updated inference test" }
- match: { _source.non_inference_field: "updated non inference test" }

---
"Bypass inference on bulk update operation":
- requires:
cluster_features: semantic_text.single_field_update_fix
reason: Standalone semantic text fields are now optional in a bulk update operation

# Update as upsert
- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "sparse_field": "inference test", "dense_field": "another inference test", "non_inference_field": "non inference test" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "created" }

- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "non_inference_field": "another value" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "updated" }

- do:
get:
index: test-index
id: doc_1

- match: { _source.sparse_field.text: "inference test" }
- exists: _source.sparse_field.inference.chunks.0.embeddings
- match: { _source.sparse_field.inference.chunks.0.text: "inference test" }
- match: { _source.dense_field.text: "another inference test" }
- exists: _source.dense_field.inference.chunks.0.embeddings
- match: { _source.dense_field.inference.chunks.0.text: "another inference test" }
- match: { _source.non_inference_field: "another value" }

- do:
bulk:
body:
- '{"update": {"_index": "test-index", "_id": "doc_1"}}'
- '{"doc": { "sparse_field": null, "dense_field": null, "non_inference_field": "updated value" }, "doc_as_upsert": true}'

- match: { errors: false }
- match: { items.0.update.result: "updated" }

- do:
get:
index: test-index
id: doc_1

- match: { _source.sparse_field: null }
- match: { _source.dense_field: null }
- match: { _source.non_inference_field: "updated value" }

0 comments on commit 40e096a

Please sign in to comment.