Allow missing semantic text field in bulk updates

This update enables bulk update operations to succeed even if the semantic text field is absent in the partial update. For the simple case where the field isn’t referenced by a copy_to operation from another source, the inference can be safely bypassed, allowing the update to proceed without errors.
jimczi · Nov 8, 2024 · 78a5f14 · 78a5f14
1 parent 8cc2801
commit 78a5f14
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 5 deletions.
diff --git a/...k/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/...k/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
@@ -38,6 +38,9 @@ public Set<NodeFeature> getFeatures() {
 
     @Override
     public Set<NodeFeature> getTestFeatures() {
-        return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX);
+        return Set.of(
+            SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX,
+            SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX
+        );
     }
 }
diff --git a/.../java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/.../java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java
@@ -29,6 +29,7 @@
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.Releasable;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.core.Tuple;
 import org.elasticsearch.inference.ChunkedInferenceServiceResults;
 import org.elasticsearch.inference.ChunkingOptions;
 import org.elasticsearch.inference.InferenceService;
@@ -446,10 +447,12 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu
                     String field = entry.getName();
                     String inferenceId = entry.getInferenceId();
                     var originalFieldValue = XContentMapValues.extractValue(field, docMap);
-                    if (originalFieldValue instanceof Map) {
+                    if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) {
+                        // Inference has already been computed, or there is no inference required.
                         continue;
                     }
                     int order = 0;
+                    List<FieldInferenceRequest> fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>());
                     for (var sourceField : entry.getSourceFields()) {
                         boolean isOriginalFieldInput = sourceField.equals(field);
                         var valueObj = XContentMapValues.extractValue(sourceField, docMap);
@@ -476,7 +479,6 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu
                             addInferenceResponseFailure(item.id(), exc);
                             break;
                         }
-                        List<FieldInferenceRequest> fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>());
                         for (var v : values) {
                             fieldRequests.add(new FieldInferenceRequest(itemIndex, field, v, order++, isOriginalFieldInput));
                         }

diff --git a/...rence/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/...rence/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
@@ -91,6 +91,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
     public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2");
     public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
 
+    public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
+
     public static final String CONTENT_TYPE = "semantic_text";
     public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
 
@@ -679,9 +681,9 @@ public static void insertValue(String path, Map<?, ?> map, Object newValue) {
         }
     }
 
-    private record SuffixMap(String suffix, Map<String, Object> map) {}
+    public record SuffixMap(String suffix, Map<String, Object> map) {}
 
-    private static List<SuffixMap> extractSuffixMaps(String[] pathElements, int index, Object currentValue) {
+    public static List<SuffixMap> extractSuffixMaps(String[] pathElements, int index, Object currentValue) {
         if (currentValue instanceof List<?> valueList) {
             List<SuffixMap> suffixMaps = new ArrayList<>(valueList.size());
             for (Object o : valueList) {

diff --git a/...yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml b/...yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml
@@ -610,3 +610,59 @@ setup:
   - exists: _source.dense_field.inference.chunks.0.embeddings
   - match: { _source.dense_field.inference.chunks.0.text: "another updated inference test" }
   - match: { _source.non_inference_field: "updated non inference test" }
+
+---
+"Bypass inference on bulk update operation":
+  - requires:
+      cluster_features: semantic_text.single_field_update_fix
+      reason: Standalone semantic text fields are now optional in a bulk update operation
+
+  # Update as upsert
+  - do:
+      bulk:
+        body:
+          - '{"update": {"_index": "test-index", "_id": "doc_1"}}'
+          - '{"doc": { "sparse_field": "inference test", "dense_field": "another inference test", "non_inference_field": "non inference test" }, "doc_as_upsert": true}'
+
+  - match: { errors: false }
+  - match: { items.0.update.result: "created" }
+
+  - do:
+      bulk:
+        body:
+          - '{"update": {"_index": "test-index", "_id": "doc_1"}}'
+          - '{"doc": { "non_inference_field": "another value" }, "doc_as_upsert": true}'
+
+  - match: { errors: false }
+  - match: { items.0.update.result: "updated" }
+
+  - do:
+      get:
+        index: test-index
+        id: doc_1
+
+  - match: { _source.sparse_field.text: "inference test" }
+  - exists: _source.sparse_field.inference.chunks.0.embeddings
+  - match: { _source.sparse_field.inference.chunks.0.text: "inference test" }
+  - match: { _source.dense_field.text: "another inference test" }
+  - exists: _source.dense_field.inference.chunks.0.embeddings
+  - match: { _source.dense_field.inference.chunks.0.text: "another inference test" }
+  - match: { _source.non_inference_field: "non inference test" }
+
+  - do:
+      bulk:
+        body:
+          - '{"update": {"_index": "test-index", "_id": "doc_1"}}'
+          - '{"doc": { "sparse_field": null, "dense_field": null, "non_inference_field": "updated value" }, "doc_as_upsert": true}'
+
+  - match: { errors: false }
+  - match: { items.0.update.result: "updated" }
+
+  - do:
+      get:
+        index: test-index
+        id: doc_1
+
+  - match: { _source.sparse_field: null }
+  - match: { _source.dense_field: null }
+  - match: { _source.non_inference_field: "updated value" }