From 78a5f144a4402277baca5c80f4e9f01de8ddb72c Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 8 Nov 2024 12:17:03 +0000 Subject: [PATCH] Allow missing semantic text field in bulk updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This update enables bulk update operations to succeed even if the semantic text field is absent in the partial update. For the simple case where the field isn’t referenced by a copy_to operation from another source, the inference can be safely bypassed, allowing the update to proceed without errors. --- .../xpack/inference/InferenceFeatures.java | 5 +- .../ShardBulkInferenceActionFilter.java | 6 +- .../mapper/SemanticTextFieldMapper.java | 6 +- .../60_semantic_text_inference_update.yml | 56 +++++++++++++++++++ 4 files changed, 68 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 216b5c984eca5..10ffedef14e26 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -38,6 +38,9 @@ public Set getFeatures() { @Override public Set getTestFeatures() { - return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX); + return Set.of( + SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX, + SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX + ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index 1a6e4760fe125..a9e02f5b77af5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -29,6 +29,7 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.InferenceService; @@ -446,10 +447,12 @@ private Map> createFieldInferenceRequests(Bu String field = entry.getName(); String inferenceId = entry.getInferenceId(); var originalFieldValue = XContentMapValues.extractValue(field, docMap); - if (originalFieldValue instanceof Map) { + if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) { + // Inference has already been computed, or there is no inference required. continue; } int order = 0; + List fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>()); for (var sourceField : entry.getSourceFields()) { boolean isOriginalFieldInput = sourceField.equals(field); var valueObj = XContentMapValues.extractValue(sourceField, docMap); @@ -476,7 +479,6 @@ private Map> createFieldInferenceRequests(Bu addInferenceResponseFailure(item.id(), exc); break; } - List fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>()); for (var v : values) { fieldRequests.add(new FieldInferenceRequest(itemIndex, field, v, order++, isOriginalFieldInput)); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 4c07516051287..23707b79fec9d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -91,6 +91,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2"); public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); + public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); + public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; @@ -679,9 +681,9 @@ public static void insertValue(String path, Map map, Object newValue) { } } - private record SuffixMap(String suffix, Map map) {} + public record SuffixMap(String suffix, Map map) {} - private static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { + public static List extractSuffixMaps(String[] pathElements, int index, Object currentValue) { if (currentValue instanceof List valueList) { List suffixMaps = new ArrayList<>(valueList.size()); for (Object o : valueList) { diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml index 59ce439d954a2..510a9bc459d8f 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml @@ -610,3 +610,59 @@ setup: - exists: _source.dense_field.inference.chunks.0.embeddings - match: { _source.dense_field.inference.chunks.0.text: "another updated inference test" } - match: { _source.non_inference_field: "updated non inference test" } + +--- +"Bypass inference on bulk update operation": + - requires: + cluster_features: semantic_text.single_field_update_fix + reason: Standalone semantic text fields are now optional in a bulk update operation + + # Update as upsert + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "sparse_field": "inference test", "dense_field": "another inference test", "non_inference_field": "non inference test" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "created" } + + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "non_inference_field": "another value" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "updated" } + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field.text: "inference test" } + - exists: _source.sparse_field.inference.chunks.0.embeddings + - match: { _source.sparse_field.inference.chunks.0.text: "inference test" } + - match: { _source.dense_field.text: "another inference test" } + - exists: _source.dense_field.inference.chunks.0.embeddings + - match: { _source.dense_field.inference.chunks.0.text: "another inference test" } + - match: { _source.non_inference_field: "non inference test" } + + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "sparse_field": null, "dense_field": null, "non_inference_field": "updated value" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "updated" } + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field: null } + - match: { _source.dense_field: null } + - match: { _source.non_inference_field: "updated value" }