From 5d98e8b0e225d02eb9778e063dd05ec309aecff0 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 8 Nov 2024 15:53:03 +0000 Subject: [PATCH] Allow missing semantic text field in bulk updates (#116478) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This update enables bulk update operations to succeed even if the semantic text field is absent in the partial update. For the simple case where the field isn’t referenced by a copy_to operation from another source, the inference can be safely bypassed, allowing the update to proceed without errors. --- docs/changelog/116478.yaml | 5 ++ .../xpack/inference/InferenceFeatures.java | 5 +- .../ShardBulkInferenceActionFilter.java | 3 +- .../mapper/SemanticTextFieldMapper.java | 2 + .../60_semantic_text_inference_update.yml | 56 +++++++++++++++++++ 5 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/116478.yaml diff --git a/docs/changelog/116478.yaml b/docs/changelog/116478.yaml new file mode 100644 index 0000000000000..ec50799eb2019 --- /dev/null +++ b/docs/changelog/116478.yaml @@ -0,0 +1,5 @@ +pr: 116478 +summary: Semantic text simple partial update +area: Search +type: bug +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 632ba74f3b7b5..9d3a263b506c9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -36,6 +36,9 @@ public Set getFeatures() { @Override public Set getTestFeatures() { - return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX); + return Set.of( + SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX, + SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX + ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index 1a6e4760fe125..b3bbe3a7df9bc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -446,7 +446,8 @@ private Map> createFieldInferenceRequests(Bu String field = entry.getName(); String inferenceId = entry.getInferenceId(); var originalFieldValue = XContentMapValues.extractValue(field, docMap); - if (originalFieldValue instanceof Map) { + if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) { + // Inference has already been computed, or there is no inference required. continue; } int order = 0; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index f65e200697157..1de316127ed33 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -89,6 +89,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2"); public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); + public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); + public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml index 59ce439d954a2..294761608ee81 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml @@ -610,3 +610,59 @@ setup: - exists: _source.dense_field.inference.chunks.0.embeddings - match: { _source.dense_field.inference.chunks.0.text: "another updated inference test" } - match: { _source.non_inference_field: "updated non inference test" } + +--- +"Bypass inference on bulk update operation": + - requires: + cluster_features: semantic_text.single_field_update_fix + reason: Standalone semantic text fields are now optional in a bulk update operation + + # Update as upsert + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "sparse_field": "inference test", "dense_field": "another inference test", "non_inference_field": "non inference test" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "created" } + + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "non_inference_field": "another value" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "updated" } + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field.text: "inference test" } + - exists: _source.sparse_field.inference.chunks.0.embeddings + - match: { _source.sparse_field.inference.chunks.0.text: "inference test" } + - match: { _source.dense_field.text: "another inference test" } + - exists: _source.dense_field.inference.chunks.0.embeddings + - match: { _source.dense_field.inference.chunks.0.text: "another inference test" } + - match: { _source.non_inference_field: "another value" } + + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "sparse_field": null, "dense_field": null, "non_inference_field": "updated value" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "updated" } + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field: null } + - match: { _source.dense_field: null } + - match: { _source.non_inference_field: "updated value" }