From 64b16dc72595b4134409554976f7c137da9a23be Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 1 Nov 2023 15:44:50 +0100 Subject: [PATCH] Include source for update_by_query as a hacking example --- .../reindex/RestUpdateByQueryAction.java | 2 ++ .../FieldInferenceBulkRequestPreprocessor.java | 14 ++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/RestUpdateByQueryAction.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/RestUpdateByQueryAction.java index 50a2b7de6db39..ecddfef7e971f 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/RestUpdateByQueryAction.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/RestUpdateByQueryAction.java @@ -71,6 +71,8 @@ protected UpdateByQueryRequest buildRequest(RestRequest request, NamedWriteableR consumers.put("script", o -> internal.setScript(Script.parse(o))); consumers.put("max_docs", s -> setMaxDocsValidateIdentical(internal, ((Number) s).intValue())); + // TODO There surely must be a better way of doing this + request.params().put("_source_includes", "*"); parseInternalRequest(internal, request, namedWriteableRegistry, consumers); internal.setPipeline(request.param("pipeline")); diff --git a/server/src/main/java/org/elasticsearch/ingest/FieldInferenceBulkRequestPreprocessor.java b/server/src/main/java/org/elasticsearch/ingest/FieldInferenceBulkRequestPreprocessor.java index 758fcff4dade6..2430ba33373d9 100644 --- a/server/src/main/java/org/elasticsearch/ingest/FieldInferenceBulkRequestPreprocessor.java +++ b/server/src/main/java/org/elasticsearch/ingest/FieldInferenceBulkRequestPreprocessor.java @@ -8,6 +8,8 @@ package org.elasticsearch.ingest; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.index.IndexRequest; @@ -33,6 +35,7 @@ public class FieldInferenceBulkRequestPreprocessor extends AbstractBulkRequestPreprocessor { + private static final Logger logger = LogManager.getLogger(FieldInferenceBulkRequestPreprocessor.class); public static final String SEMANTIC_TEXT_ORIGIN = "semantic_text"; private final IndicesService indicesService; @@ -99,11 +102,6 @@ public boolean shouldExecuteOnIngestNode() { } private boolean fieldNeedsInference(IndexRequest indexRequest, String fieldName, Object fieldValue) { - - if (fieldValue instanceof String == false) { - return false; - } - return getModelForField(indexRequest, fieldName) != null; } @@ -147,13 +145,17 @@ private void runInferenceForFields( String fieldName = fieldNames.get(0); List nextFieldNames = fieldNames.subList(1, fieldNames.size()); final String fieldValue = ingestDocument.getFieldValue(fieldName, String.class); - if (fieldValue == null) { + Object existingInference = ingestDocument.getFieldValue(SemanticTextInferenceFieldMapper.FIELD_NAME + "." + fieldName, Object.class, true); + if (fieldValue == null || existingInference != null) { // Run inference for next field + logger.info("Skipping inference for field [" + fieldName + "]"); runInferenceForFields(indexRequest, nextFieldNames, ref, position, ingestDocument, onFailure); + return; } String modelForField = getModelForField(indexRequest, fieldName); assert modelForField != null : "Field " + fieldName + " has no model associated in mappings"; + logger.info("Calculating inference for field [" + fieldName + "]"); // TODO Hardcoding task type, how to get that from model ID? InferenceAction.Request inferenceRequest = new InferenceAction.Request(