From 1a939e922e08cd02c5c76030082ea5f5f4f68073 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Mon, 29 Jul 2024 09:46:02 -0400 Subject: [PATCH 01/22] [ML] Create and inject APM Inference Metrics (#111293) We are migrating from in-memory cumulative counter to an Time Series Data Stream delta counter. The goal is to avoid metrics suddenly dropping to zero when a node restarts, hopefully increasing accuracy of the metric. Co-authored-by: Jonathan Buttner <56361221+jonathan-buttner@users.noreply.github.com> --- .../inference/ServiceSettings.java | 2 + .../xpack/inference/InferencePlugin.java | 11 +- .../action/TransportInferenceAction.java | 7 +- .../embeddings/CohereEmbeddingsModel.java | 4 +- .../OpenAiEmbeddingsServiceSettings.java | 2 +- .../telemetry/ApmInferenceStats.java | 49 ++++++++ .../telemetry/InferenceAPMStats.java | 47 ------- .../inference/telemetry/InferenceStats.java | 52 ++------ .../xpack/inference/telemetry/Stats.java | 30 ----- .../xpack/inference/telemetry/StatsMap.java | 57 --------- .../telemetry/ApmInferenceStatsTests.java | 69 ++++++++++ .../inference/telemetry/StatsMapTests.java | 119 ------------------ 12 files changed, 142 insertions(+), 307 deletions(-) create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStats.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceAPMStats.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/Stats.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/StatsMap.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStatsTests.java delete mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/StatsMapTests.java diff --git a/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java b/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java index 34a58f83963ce..58e87105f70a3 100644 --- a/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java +++ b/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java @@ -9,6 +9,7 @@ package org.elasticsearch.inference; import org.elasticsearch.common.io.stream.VersionedNamedWriteable; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.xcontent.ToXContentObject; @@ -48,5 +49,6 @@ default DenseVectorFieldMapper.ElementType elementType() { * be chosen when initializing a deployment within their service. In this situation, return null. * @return the model used to perform inference or null if the model is not defined */ + @Nullable String modelId(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index fce2c54c535c9..ec9398358d180 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -26,6 +26,7 @@ import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InferenceServiceRegistry; +import org.elasticsearch.node.PluginComponentBinding; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.ExtensiblePlugin; import org.elasticsearch.plugins.MapperPlugin; @@ -84,8 +85,8 @@ import org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserService; import org.elasticsearch.xpack.inference.services.mistral.MistralService; import org.elasticsearch.xpack.inference.services.openai.OpenAiService; -import org.elasticsearch.xpack.inference.telemetry.InferenceAPMStats; -import org.elasticsearch.xpack.inference.telemetry.StatsMap; +import org.elasticsearch.xpack.inference.telemetry.ApmInferenceStats; +import org.elasticsearch.xpack.inference.telemetry.InferenceStats; import java.util.ArrayList; import java.util.Collection; @@ -196,10 +197,10 @@ public Collection createComponents(PluginServices services) { var actionFilter = new ShardBulkInferenceActionFilter(registry, modelRegistry); shardBulkInferenceActionFilter.set(actionFilter); - var statsFactory = new InferenceAPMStats.Factory(services.telemetryProvider().getMeterRegistry()); - var statsMap = new StatsMap<>(InferenceAPMStats::key, statsFactory::newInferenceRequestAPMCounter); + var meterRegistry = services.telemetryProvider().getMeterRegistry(); + var stats = new PluginComponentBinding<>(InferenceStats.class, ApmInferenceStats.create(meterRegistry)); - return List.of(modelRegistry, registry, httpClientManager, statsMap); + return List.of(modelRegistry, registry, httpClientManager, stats); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java index 575697b5d0d39..b7fff3b704695 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java @@ -21,22 +21,26 @@ import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.inference.registry.ModelRegistry; +import org.elasticsearch.xpack.inference.telemetry.InferenceStats; public class TransportInferenceAction extends HandledTransportAction { private final ModelRegistry modelRegistry; private final InferenceServiceRegistry serviceRegistry; + private final InferenceStats inferenceStats; @Inject public TransportInferenceAction( TransportService transportService, ActionFilters actionFilters, ModelRegistry modelRegistry, - InferenceServiceRegistry serviceRegistry + InferenceServiceRegistry serviceRegistry, + InferenceStats inferenceStats ) { super(InferenceAction.NAME, transportService, actionFilters, InferenceAction.Request::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); this.modelRegistry = modelRegistry; this.serviceRegistry = serviceRegistry; + this.inferenceStats = inferenceStats; } @Override @@ -76,6 +80,7 @@ protected void doExecute(Task task, InferenceAction.Request request, ActionListe unparsedModel.settings(), unparsedModel.secrets() ); + inferenceStats.incrementRequestCount(model); inferOnService(model, request, service.get(), delegate); }); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/embeddings/CohereEmbeddingsModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/embeddings/CohereEmbeddingsModel.java index 538d88a59ca76..fea5226bf9c6f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/embeddings/CohereEmbeddingsModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/embeddings/CohereEmbeddingsModel.java @@ -28,7 +28,7 @@ public static CohereEmbeddingsModel of(CohereEmbeddingsModel model, Map serviceSettings, @@ -37,7 +37,7 @@ public CohereEmbeddingsModel( ConfigurationParseContext context ) { this( - modelId, + inferenceId, taskType, service, CohereEmbeddingsServiceSettings.fromMap(serviceSettings, context), diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java index d474e935fbda7..6ef1f6f0feefe 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java @@ -150,7 +150,7 @@ public OpenAiEmbeddingsServiceSettings( @Nullable RateLimitSettings rateLimitSettings ) { this.uri = uri; - this.modelId = modelId; + this.modelId = Objects.requireNonNull(modelId); this.organizationId = organizationId; this.similarity = similarity; this.dimensions = dimensions; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStats.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStats.java new file mode 100644 index 0000000000000..ae14a0792dead --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStats.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.telemetry; + +import org.elasticsearch.inference.Model; +import org.elasticsearch.telemetry.metric.LongCounter; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +import java.util.HashMap; +import java.util.Objects; + +public class ApmInferenceStats implements InferenceStats { + private final LongCounter inferenceAPMRequestCounter; + + public ApmInferenceStats(LongCounter inferenceAPMRequestCounter) { + this.inferenceAPMRequestCounter = Objects.requireNonNull(inferenceAPMRequestCounter); + } + + @Override + public void incrementRequestCount(Model model) { + var service = model.getConfigurations().getService(); + var taskType = model.getTaskType(); + var modelId = model.getServiceSettings().modelId(); + + var attributes = new HashMap(5); + attributes.put("service", service); + attributes.put("task_type", taskType.toString()); + if (modelId != null) { + attributes.put("model_id", modelId); + } + + inferenceAPMRequestCounter.incrementBy(1, attributes); + } + + public static ApmInferenceStats create(MeterRegistry meterRegistry) { + return new ApmInferenceStats( + meterRegistry.registerLongCounter( + "es.inference.requests.count.total", + "Inference API request counts for a particular service, task type, model ID", + "operations" + ) + ); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceAPMStats.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceAPMStats.java deleted file mode 100644 index 76977fef76045..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceAPMStats.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.telemetry; - -import org.elasticsearch.inference.Model; -import org.elasticsearch.telemetry.metric.LongCounter; -import org.elasticsearch.telemetry.metric.MeterRegistry; - -import java.util.Map; -import java.util.Objects; - -public class InferenceAPMStats extends InferenceStats { - - private final LongCounter inferenceAPMRequestCounter; - - public InferenceAPMStats(Model model, MeterRegistry meterRegistry) { - super(model); - this.inferenceAPMRequestCounter = meterRegistry.registerLongCounter( - "es.inference.requests.count", - "Inference API request counts for a particular service, task type, model ID", - "operations" - ); - } - - @Override - public void increment() { - super.increment(); - inferenceAPMRequestCounter.incrementBy(1, Map.of("service", service, "task_type", taskType.toString(), "model_id", modelId)); - } - - public static final class Factory { - private final MeterRegistry meterRegistry; - - public Factory(MeterRegistry meterRegistry) { - this.meterRegistry = Objects.requireNonNull(meterRegistry); - } - - public InferenceAPMStats newInferenceRequestAPMCounter(Model model) { - return new InferenceAPMStats(model, meterRegistry); - } - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java index d639f9da71f56..d080e818e45fc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java @@ -8,52 +8,14 @@ package org.elasticsearch.xpack.inference.telemetry; import org.elasticsearch.inference.Model; -import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.core.inference.InferenceRequestStats; -import java.util.Objects; -import java.util.concurrent.atomic.LongAdder; +public interface InferenceStats { -public class InferenceStats implements Stats { - protected final String service; - protected final TaskType taskType; - protected final String modelId; - protected final LongAdder counter = new LongAdder(); + /** + * Increment the counter for a particular value in a thread safe manner. + * @param model the model to increment request count for + */ + void incrementRequestCount(Model model); - public static String key(Model model) { - StringBuilder builder = new StringBuilder(); - builder.append(model.getConfigurations().getService()); - builder.append(":"); - builder.append(model.getTaskType()); - - if (model.getServiceSettings().modelId() != null) { - builder.append(":"); - builder.append(model.getServiceSettings().modelId()); - } - - return builder.toString(); - } - - public InferenceStats(Model model) { - Objects.requireNonNull(model); - - service = model.getConfigurations().getService(); - taskType = model.getTaskType(); - modelId = model.getServiceSettings().modelId(); - } - - @Override - public void increment() { - counter.increment(); - } - - @Override - public long getCount() { - return counter.sum(); - } - - @Override - public InferenceRequestStats toSerializableForm() { - return new InferenceRequestStats(service, taskType, modelId, getCount()); - } + InferenceStats NOOP = model -> {}; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/Stats.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/Stats.java deleted file mode 100644 index bb1e9c98fc2cb..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/Stats.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.telemetry; - -import org.elasticsearch.xpack.core.inference.SerializableStats; - -public interface Stats { - - /** - * Increase the counter by one. - */ - void increment(); - - /** - * Return the current value of the counter. - * @return the current value of the counter - */ - long getCount(); - - /** - * Convert the object into a serializable form that can be written across nodes and returned in xcontent format. - * @return the serializable format of the object - */ - SerializableStats toSerializableForm(); -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/StatsMap.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/StatsMap.java deleted file mode 100644 index 1cfecfb4507d6..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/StatsMap.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.telemetry; - -import org.elasticsearch.xpack.core.inference.SerializableStats; - -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.function.Function; -import java.util.stream.Collectors; - -/** - * A map to provide tracking incrementing statistics. - * - * @param The input to derive the keys and values for the map - * @param The type of the values stored in the map - */ -public class StatsMap { - - private final ConcurrentMap stats = new ConcurrentHashMap<>(); - private final Function keyCreator; - private final Function valueCreator; - - /** - * @param keyCreator a function for creating a key in the map based on the input provided - * @param valueCreator a function for creating a value in the map based on the input provided - */ - public StatsMap(Function keyCreator, Function valueCreator) { - this.keyCreator = Objects.requireNonNull(keyCreator); - this.valueCreator = Objects.requireNonNull(valueCreator); - } - - /** - * Increment the counter for a particular value in a thread safe manner. - * @param input the input to derive the appropriate key in the map - */ - public void increment(Input input) { - var value = stats.computeIfAbsent(keyCreator.apply(input), key -> valueCreator.apply(input)); - value.increment(); - } - - /** - * Build a map that can be serialized. This takes a snapshot of the current state. Any concurrent calls to increment may or may not - * be represented in the resulting serializable map. - * @return a map that is more easily serializable - */ - public Map toSerializableMap() { - return stats.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toSerializableForm())); - } -} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStatsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStatsTests.java new file mode 100644 index 0000000000000..1a5aba5f89ad2 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/ApmInferenceStatsTests.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.telemetry; + +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.telemetry.metric.LongCounter; +import org.elasticsearch.telemetry.metric.MeterRegistry; +import org.elasticsearch.test.ESTestCase; + +import java.util.Map; + +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class ApmInferenceStatsTests extends ESTestCase { + + public void testRecordWithModel() { + var longCounter = mock(LongCounter.class); + + var stats = new ApmInferenceStats(longCounter); + + stats.incrementRequestCount(model("service", TaskType.ANY, "modelId")); + + verify(longCounter).incrementBy( + eq(1L), + eq(Map.of("service", "service", "task_type", TaskType.ANY.toString(), "model_id", "modelId")) + ); + } + + public void testRecordWithoutModel() { + var longCounter = mock(LongCounter.class); + + var stats = new ApmInferenceStats(longCounter); + + stats.incrementRequestCount(model("service", TaskType.ANY, null)); + + verify(longCounter).incrementBy(eq(1L), eq(Map.of("service", "service", "task_type", TaskType.ANY.toString()))); + } + + public void testCreation() { + assertNotNull(ApmInferenceStats.create(MeterRegistry.NOOP)); + } + + private Model model(String service, TaskType taskType, String modelId) { + var configuration = mock(ModelConfigurations.class); + when(configuration.getService()).thenReturn(service); + var settings = mock(ServiceSettings.class); + if (modelId != null) { + when(settings.modelId()).thenReturn(modelId); + } + + var model = mock(Model.class); + when(model.getTaskType()).thenReturn(taskType); + when(model.getConfigurations()).thenReturn(configuration); + when(model.getServiceSettings()).thenReturn(settings); + + return model; + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/StatsMapTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/StatsMapTests.java deleted file mode 100644 index fcd8d3d7cefbc..0000000000000 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/telemetry/StatsMapTests.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.telemetry; - -import org.elasticsearch.inference.TaskType; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; -import org.elasticsearch.xpack.inference.services.cohere.embeddings.CohereEmbeddingsModel; -import org.elasticsearch.xpack.inference.services.cohere.embeddings.CohereEmbeddingsServiceSettingsTests; -import org.elasticsearch.xpack.inference.services.cohere.embeddings.CohereEmbeddingsTaskSettingsTests; -import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsModel; -import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsServiceSettingsTests; -import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsTaskSettingsTests; - -import java.util.Map; - -import static org.hamcrest.Matchers.is; - -public class StatsMapTests extends ESTestCase { - public void testAddingEntry_InitializesTheCountToOne() { - var stats = new StatsMap<>(InferenceStats::key, InferenceStats::new); - - stats.increment( - new OpenAiEmbeddingsModel( - "inference_id", - TaskType.TEXT_EMBEDDING, - "openai", - OpenAiEmbeddingsServiceSettingsTests.getServiceSettingsMap("modelId", null, null), - OpenAiEmbeddingsTaskSettingsTests.getTaskSettingsMap(null), - null, - ConfigurationParseContext.REQUEST - ) - ); - - var converted = stats.toSerializableMap(); - - assertThat( - converted, - is( - Map.of( - "openai:text_embedding:modelId", - new org.elasticsearch.xpack.core.inference.InferenceRequestStats("openai", TaskType.TEXT_EMBEDDING, "modelId", 1) - ) - ) - ); - } - - public void testIncrementingWithSeparateModels_IncrementsTheCounterToTwo() { - var stats = new StatsMap<>(InferenceStats::key, InferenceStats::new); - - var model1 = new OpenAiEmbeddingsModel( - "inference_id", - TaskType.TEXT_EMBEDDING, - "openai", - OpenAiEmbeddingsServiceSettingsTests.getServiceSettingsMap("modelId", null, null), - OpenAiEmbeddingsTaskSettingsTests.getTaskSettingsMap(null), - null, - ConfigurationParseContext.REQUEST - ); - - var model2 = new OpenAiEmbeddingsModel( - "inference_id", - TaskType.TEXT_EMBEDDING, - "openai", - OpenAiEmbeddingsServiceSettingsTests.getServiceSettingsMap("modelId", null, null), - OpenAiEmbeddingsTaskSettingsTests.getTaskSettingsMap(null), - null, - ConfigurationParseContext.REQUEST - ); - - stats.increment(model1); - stats.increment(model2); - - var converted = stats.toSerializableMap(); - - assertThat( - converted, - is( - Map.of( - "openai:text_embedding:modelId", - new org.elasticsearch.xpack.core.inference.InferenceRequestStats("openai", TaskType.TEXT_EMBEDDING, "modelId", 2) - ) - ) - ); - } - - public void testNullModelId_ResultsInKeyWithout() { - var stats = new StatsMap<>(InferenceStats::key, InferenceStats::new); - - stats.increment( - new CohereEmbeddingsModel( - "inference_id", - TaskType.TEXT_EMBEDDING, - "cohere", - CohereEmbeddingsServiceSettingsTests.getServiceSettingsMap(null, null, null), - CohereEmbeddingsTaskSettingsTests.getTaskSettingsMap(null, null), - null, - ConfigurationParseContext.REQUEST - ) - ); - - var converted = stats.toSerializableMap(); - - assertThat( - converted, - is( - Map.of( - "cohere:text_embedding", - new org.elasticsearch.xpack.core.inference.InferenceRequestStats("cohere", TaskType.TEXT_EMBEDDING, null, 1) - ) - ) - ); - } -} From 80d539d986c06968aa7441d835626e73907093d1 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 29 Jul 2024 17:11:36 +0200 Subject: [PATCH 02/22] [ML] Fix failing test DetectionRulesTests.testEqualsAndHashcode (#111351) Fixes #111308 --- muted-tests.yml | 3 --- .../xpack/core/ml/job/config/DetectionRuleTests.java | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 6c8b5bc39553d..d106ca3c9d701 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -108,9 +108,6 @@ tests: - class: org.elasticsearch.upgrades.LogsIndexModeFullClusterRestartIT method: testLogsIndexing {cluster=UPGRADED} issue: https://github.com/elastic/elasticsearch/issues/111306 -- class: org.elasticsearch.xpack.core.ml.job.config.DetectionRuleTests - method: testEqualsAndHashcode - issue: https://github.com/elastic/elasticsearch/issues/111308 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT issue: https://github.com/elastic/elasticsearch/issues/111319 - class: org.elasticsearch.xpack.esql.analysis.VerifierTests diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/config/DetectionRuleTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/config/DetectionRuleTests.java index d716f34f86e6c..127088d82bade 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/config/DetectionRuleTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/config/DetectionRuleTests.java @@ -139,6 +139,8 @@ protected DetectionRule mutateInstance(DetectionRule instance) { if (actions.contains(RuleAction.FORCE_TIME_SHIFT) && params.getForceTimeShift() == null) { params = new RuleParams(new RuleParamsForForceTimeShift(randomLong())); + } else if (actions.contains(RuleAction.FORCE_TIME_SHIFT) == false && params.getForceTimeShift() != null) { + params = new RuleParams(); } return new DetectionRule.Builder(conditions).setActions(actions).setScope(scope).setParams(params).build(); From e24a7c12707cab4f306de8ec7af8d9196d26e0bf Mon Sep 17 00:00:00 2001 From: Chris Earle Date: Mon, 29 Jul 2024 10:28:13 -0600 Subject: [PATCH 03/22] [Service Account] Add AutoOps account (#111316) This adds a `ServiceAccount` for AutoOps usage to collect monitoring stats from the cluster. --- docs/changelog/111316.yaml | 5 ++ .../authc/service/ServiceAccountIT.java | 31 +++++++ .../authc/service/ElasticServiceAccounts.java | 20 +++++ ...TransportGetServiceAccountActionTests.java | 15 ++-- .../service/ElasticServiceAccountsTests.java | 90 +++++++++++++++++++ .../service/ServiceAccountServiceTests.java | 12 ++- .../test/service_accounts/10_basic.yml | 6 +- 7 files changed, 167 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/111316.yaml diff --git a/docs/changelog/111316.yaml b/docs/changelog/111316.yaml new file mode 100644 index 0000000000000..0d915cd1ec3ea --- /dev/null +++ b/docs/changelog/111316.yaml @@ -0,0 +1,5 @@ +pr: 111316 +summary: "[Service Account] Add `AutoOps` account" +area: Security +type: enhancement +issues: [] diff --git a/x-pack/plugin/security/qa/service-account/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountIT.java b/x-pack/plugin/security/qa/service-account/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountIT.java index e790866cf3d77..c1686a500fb2c 100644 --- a/x-pack/plugin/security/qa/service-account/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountIT.java +++ b/x-pack/plugin/security/qa/service-account/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountIT.java @@ -80,6 +80,33 @@ public class ServiceAccountIT extends ESRestTestCase { } """; + private static final String ELASTIC_AUTO_OPS_ROLE_DESCRIPTOR = """ + { + "cluster": [ + "monitor", + "read_ilm", + "read_slm" + ], + "indices": [ + { + "names": [ + "*" + ], + "privileges": [ + "monitor", + "view_index_metadata" + ], + "allow_restricted_indices": true + } + ], + "applications": [], + "run_as": [], + "metadata": {}, + "transient_metadata": { + "enabled": true + } + }"""; + private static final String ELASTIC_FLEET_SERVER_ROLE_DESCRIPTOR = """ { "cluster": [ @@ -400,6 +427,10 @@ public void testGetServiceAccount() throws IOException { assertOK(getServiceAccountResponse3); assertServiceAccountRoleDescriptor(getServiceAccountResponse3, "elastic/fleet-server", ELASTIC_FLEET_SERVER_ROLE_DESCRIPTOR); + final Request getServiceAccountRequestAutoOps = new Request("GET", "_security/service/elastic/auto-ops"); + final Response getServiceAccountResponseAutoOps = client().performRequest(getServiceAccountRequestAutoOps); + assertServiceAccountRoleDescriptor(getServiceAccountResponseAutoOps, "elastic/auto-ops", ELASTIC_AUTO_OPS_ROLE_DESCRIPTOR); + final Request getServiceAccountRequestKibana = new Request("GET", "_security/service/elastic/kibana"); final Response getServiceAccountResponseKibana = client().performRequest(getServiceAccountRequestKibana); assertOK(getServiceAccountResponseKibana); diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccounts.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccounts.java index abd586920f2d8..b62ce28422a9c 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccounts.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccounts.java @@ -22,6 +22,25 @@ final class ElasticServiceAccounts { static final String NAMESPACE = "elastic"; + private static final ServiceAccount AUTO_OPS_ACCOUNT = new ElasticServiceAccount( + "auto-ops", + new RoleDescriptor( + NAMESPACE + "/auto-ops", + new String[] { "monitor", "read_ilm", "read_slm" }, + new RoleDescriptor.IndicesPrivileges[] { + RoleDescriptor.IndicesPrivileges.builder() + .allowRestrictedIndices(true) + .indices("*") + .privileges("monitor", "view_index_metadata") + .build(), }, + null, + null, + null, + null, + null + ) + ); + private static final ServiceAccount ENTERPRISE_SEARCH_ACCOUNT = new ElasticServiceAccount( "enterprise-search-server", new RoleDescriptor( @@ -173,6 +192,7 @@ final class ElasticServiceAccounts { ); static final Map ACCOUNTS = Stream.of( + AUTO_OPS_ACCOUNT, ENTERPRISE_SEARCH_ACCOUNT, FLEET_ACCOUNT, FLEET_REMOTE_ACCOUNT, diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountActionTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountActionTests.java index b313d94a46ce5..7e35297fcb655 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountActionTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountActionTests.java @@ -20,7 +20,6 @@ import java.util.Arrays; import java.util.Collections; -import java.util.stream.Collectors; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; @@ -47,12 +46,16 @@ public void testDoExecute() { final PlainActionFuture future1 = new PlainActionFuture<>(); transportGetServiceAccountAction.doExecute(mock(Task.class), request1, future1); final GetServiceAccountResponse getServiceAccountResponse1 = future1.actionGet(); - assertThat(getServiceAccountResponse1.getServiceAccountInfos().length, equalTo(4)); + assertThat(getServiceAccountResponse1.getServiceAccountInfos().length, equalTo(5)); assertThat( - Arrays.stream(getServiceAccountResponse1.getServiceAccountInfos()) - .map(ServiceAccountInfo::getPrincipal) - .collect(Collectors.toList()), - containsInAnyOrder("elastic/enterprise-search-server", "elastic/fleet-server", "elastic/fleet-server-remote", "elastic/kibana") + Arrays.stream(getServiceAccountResponse1.getServiceAccountInfos()).map(ServiceAccountInfo::getPrincipal).toList(), + containsInAnyOrder( + "elastic/auto-ops", + "elastic/enterprise-search-server", + "elastic/fleet-server", + "elastic/fleet-server-remote", + "elastic/kibana" + ) ); final GetServiceAccountRequest request2 = new GetServiceAccountRequest("elastic", "fleet-server"); diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccountsTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccountsTests.java index 756d53285a8f6..21e29469bb02b 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccountsTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ElasticServiceAccountsTests.java @@ -8,18 +8,30 @@ package org.elasticsearch.xpack.security.authc.service; import org.elasticsearch.action.admin.cluster.health.TransportClusterHealthAction; +import org.elasticsearch.action.admin.cluster.node.stats.TransportNodesStatsAction; import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsAction; +import org.elasticsearch.action.admin.cluster.snapshots.create.TransportCreateSnapshotAction; +import org.elasticsearch.action.admin.cluster.snapshots.delete.TransportDeleteSnapshotAction; +import org.elasticsearch.action.admin.cluster.snapshots.get.TransportGetSnapshotsAction; +import org.elasticsearch.action.admin.cluster.snapshots.restore.TransportRestoreSnapshotAction; +import org.elasticsearch.action.admin.indices.alias.TransportIndicesAliasesAction; +import org.elasticsearch.action.admin.indices.alias.get.GetAliasesAction; import org.elasticsearch.action.admin.indices.create.AutoCreateAction; import org.elasticsearch.action.admin.indices.create.TransportCreateIndexAction; import org.elasticsearch.action.admin.indices.delete.TransportDeleteIndexAction; import org.elasticsearch.action.admin.indices.mapping.put.TransportAutoPutMappingAction; import org.elasticsearch.action.admin.indices.refresh.RefreshAction; +import org.elasticsearch.action.admin.indices.settings.get.GetSettingsAction; import org.elasticsearch.action.admin.indices.settings.put.TransportUpdateSettingsAction; import org.elasticsearch.action.admin.indices.stats.IndicesStatsAction; import org.elasticsearch.action.admin.indices.template.delete.TransportDeleteIndexTemplateAction; +import org.elasticsearch.action.admin.indices.template.get.GetComponentTemplateAction; +import org.elasticsearch.action.admin.indices.template.get.GetComposableIndexTemplateAction; import org.elasticsearch.action.admin.indices.template.get.GetIndexTemplatesAction; import org.elasticsearch.action.admin.indices.template.put.TransportPutIndexTemplateAction; import org.elasticsearch.action.bulk.TransportBulkAction; +import org.elasticsearch.action.datastreams.DataStreamsStatsAction; +import org.elasticsearch.action.datastreams.lifecycle.GetDataStreamLifecycleAction; import org.elasticsearch.action.delete.TransportDeleteAction; import org.elasticsearch.action.get.TransportGetAction; import org.elasticsearch.action.get.TransportMultiGetAction; @@ -52,6 +64,11 @@ import org.elasticsearch.xpack.core.security.authz.store.ReservedRolesStore; import org.elasticsearch.xpack.core.security.user.KibanaSystemUser; import org.elasticsearch.xpack.core.security.user.User; +import org.elasticsearch.xpack.core.slm.action.DeleteSnapshotLifecycleAction; +import org.elasticsearch.xpack.core.slm.action.ExecuteSnapshotLifecycleAction; +import org.elasticsearch.xpack.core.slm.action.GetSLMStatusAction; +import org.elasticsearch.xpack.core.slm.action.GetSnapshotLifecycleAction; +import org.elasticsearch.xpack.core.slm.action.PutSnapshotLifecycleAction; import org.elasticsearch.xpack.security.authc.service.ElasticServiceAccounts.ElasticServiceAccount; import java.util.List; @@ -67,6 +84,79 @@ public class ElasticServiceAccountsTests extends ESTestCase { + public void testAutoOpsPrivileges() { + final Role role = Role.buildFromRoleDescriptor( + ElasticServiceAccounts.ACCOUNTS.get("elastic/auto-ops").roleDescriptor(), + new FieldPermissionsCache(Settings.EMPTY), + RESTRICTED_INDICES + ); + + final Authentication authentication = AuthenticationTestHelper.builder().serviceAccount().build(); + final TransportRequest request = mock(TransportRequest.class); + + // monitor + assertThat(role.cluster().check(GetComponentTemplateAction.NAME, request, authentication), is(true)); + assertThat(role.cluster().check(GetComposableIndexTemplateAction.NAME, request, authentication), is(true)); + assertThat(role.cluster().check(GetIndexTemplatesAction.NAME, request, authentication), is(true)); + assertThat(role.cluster().check(TransportClusterHealthAction.NAME, request, authentication), is(true)); + assertThat(role.cluster().check(TransportNodesStatsAction.TYPE.name(), request, authentication), is(true)); + + assertThat(role.cluster().check(ClusterUpdateSettingsAction.NAME, request, authentication), is(false)); + assertThat(role.cluster().check(TransportPutIndexTemplateAction.TYPE.name(), request, authentication), is(false)); + assertThat(role.cluster().check(TransportDeleteIndexTemplateAction.TYPE.name(), request, authentication), is(false)); + + // read_ilm + assertThat(role.cluster().check(GetLifecycleAction.NAME, request, authentication), is(true)); + + assertThat(role.cluster().check(ILMActions.STOP.name(), request, authentication), is(false)); + assertThat(role.cluster().check(ILMActions.PUT.name(), request, authentication), is(false)); + + // read_slm + assertThat(role.cluster().check(GetSLMStatusAction.NAME, request, authentication), is(true)); + assertThat(role.cluster().check(GetSnapshotLifecycleAction.NAME, request, authentication), is(true)); + + assertThat(role.cluster().check(DeleteSnapshotLifecycleAction.NAME, request, authentication), is(false)); + assertThat(role.cluster().check(ExecuteSnapshotLifecycleAction.NAME, request, authentication), is(false)); + assertThat(role.cluster().check(PutSnapshotLifecycleAction.NAME, request, authentication), is(false)); + assertThat(role.cluster().check(TransportGetSnapshotsAction.TYPE.name(), request, authentication), is(false)); + assertThat(role.cluster().check(TransportCreateSnapshotAction.TYPE.name(), request, authentication), is(false)); + assertThat(role.cluster().check(TransportDeleteSnapshotAction.TYPE.name(), request, authentication), is(false)); + assertThat(role.cluster().check(TransportRestoreSnapshotAction.TYPE.name(), request, authentication), is(false)); + + // index monitor + List.of( + "search-" + randomAlphaOfLengthBetween(1, 20), + ".kibana-" + randomAlphaOfLengthBetween(1, 20), + ".elastic-analytics-collections", + "logs-" + randomAlphaOfLengthBetween(1, 20), + "my-index-" + randomAlphaOfLengthBetween(1, 20), + ".internal.alerts-default.alerts-default-" + randomAlphaOfLengthBetween(1, 20) + ).forEach(index -> { + final IndexAbstraction anyIndex = mockIndexAbstraction(index); + + assertThat(role.indices().allowedIndicesMatcher(IndicesStatsAction.NAME).test(anyIndex), is(true)); + assertThat(role.indices().allowedIndicesMatcher(DataStreamsStatsAction.NAME).test(anyIndex), is(true)); + assertThat(role.indices().allowedIndicesMatcher(GetAliasesAction.NAME).test(anyIndex), is(true)); + assertThat(role.indices().allowedIndicesMatcher(GetSettingsAction.NAME).test(anyIndex), is(true)); + assertThat(role.indices().allowedIndicesMatcher(GetDataStreamLifecycleAction.INSTANCE.name()).test(anyIndex), is(true)); + + assertThat(role.indices().allowedIndicesMatcher(AutoCreateAction.NAME).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportCreateIndexAction.TYPE.name()).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportDeleteAction.NAME).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportIndicesAliasesAction.NAME).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportBulkAction.NAME).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportMultiGetAction.NAME).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportMultiSearchAction.TYPE.name()).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(TransportUpdateSettingsAction.TYPE.name()).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher(RefreshAction.NAME).test(anyIndex), is(false)); + assertThat(role.indices().allowedIndicesMatcher("indices:foo").test(anyIndex), is(false)); + }); + } + public void testKibanaSystemPrivileges() { final RoleDescriptor serviceAccountRoleDescriptor = ElasticServiceAccounts.ACCOUNTS.get("elastic/kibana").roleDescriptor(); final RoleDescriptor reservedRolesStoreRoleDescriptor = ReservedRolesStore.kibanaSystemRoleDescriptor(KibanaSystemUser.ROLE_NAME); diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountServiceTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountServiceTests.java index c66f3168c7b7d..43fe57dd8b313 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountServiceTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/service/ServiceAccountServiceTests.java @@ -17,7 +17,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.SecureString; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.MockLog; @@ -82,7 +81,6 @@ public void init() throws UnknownHostException { indexServiceAccountTokenStore = mock(IndexServiceAccountTokenStore.class); when(fileServiceAccountTokenStore.getTokenSource()).thenReturn(TokenInfo.TokenSource.FILE); when(indexServiceAccountTokenStore.getTokenSource()).thenReturn(TokenInfo.TokenSource.INDEX); - final Settings.Builder builder = Settings.builder().put("xpack.security.enabled", true); client = mock(Client.class); when(client.threadPool()).thenReturn(threadPool); serviceAccountService = new ServiceAccountService(client, fileServiceAccountTokenStore, indexServiceAccountTokenStore); @@ -96,11 +94,17 @@ public void stopThreadPool() { public void testGetServiceAccountPrincipals() { assertThat( ServiceAccountService.getServiceAccountPrincipals(), - containsInAnyOrder("elastic/enterprise-search-server", "elastic/fleet-server", "elastic/fleet-server-remote", "elastic/kibana") + containsInAnyOrder( + "elastic/auto-ops", + "elastic/enterprise-search-server", + "elastic/fleet-server", + "elastic/fleet-server-remote", + "elastic/kibana" + ) ); } - public void testTryParseToken() throws IOException, IllegalAccessException { + public void testTryParseToken() throws IOException { // Null for null assertNull(ServiceAccountService.tryParseToken(null)); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/service_accounts/10_basic.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/service_accounts/10_basic.yml index 47d6cdec2858b..a72e2d15c8e85 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/service_accounts/10_basic.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/service_accounts/10_basic.yml @@ -31,7 +31,8 @@ teardown: "Test get service accounts": - do: security.get_service_accounts: {} - - length: { '': 4 } + - length: { '': 5 } + - is_true: "elastic/auto-ops" - is_true: "elastic/enterprise-search-server" - is_true: "elastic/fleet-server" - is_true: "elastic/fleet-server-remote" @@ -40,7 +41,8 @@ teardown: - do: security.get_service_accounts: namespace: elastic - - length: { '': 4 } + - length: { '': 5 } + - is_true: "elastic/auto-ops" - is_true: "elastic/enterprise-search-server" - is_true: "elastic/fleet-server" - is_true: "elastic/fleet-server-remote" From 735d80dffd17bedd75fdd964e85760ef03f63c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Mon, 29 Jul 2024 19:07:15 +0200 Subject: [PATCH 04/22] ESQL: Add COUNT and COUNT_DISTINCT aggregation tests (#111409) --- docs/changelog/111367.yaml | 5 + .../functions/aggregation-functions.asciidoc | 12 +- .../appendix/count_distinct.asciidoc | 25 + .../esql/functions/appendix/values.asciidoc | 10 + .../esql/functions/count-distinct.asciidoc | 85 --- .../esql/functions/description/count.asciidoc | 5 + .../description/count_distinct.asciidoc | 5 + .../functions/description/values.asciidoc | 5 + .../functions/{ => examples}/count.asciidoc | 46 +- .../examples/count_distinct.asciidoc | 31 + .../esql/functions/examples/values.asciidoc | 13 + .../functions/kibana/definition/count.json | 159 +++++ .../kibana/definition/count_distinct.json | 607 ++++++++++++++++++ .../functions/kibana/definition/values.json | 119 ++++ .../esql/functions/kibana/docs/count.md | 11 + .../functions/kibana/docs/count_distinct.md | 11 + .../esql/functions/kibana/docs/values.md | 13 + .../esql/functions/layout/count.asciidoc | 15 + .../functions/layout/count_distinct.asciidoc | 16 + .../esql/functions/layout/values.asciidoc | 18 + .../esql/functions/parameters/count.asciidoc | 6 + .../parameters/count_distinct.asciidoc | 9 + .../esql/functions/parameters/values.asciidoc | 6 + .../esql/functions/signature/count.svg | 1 + .../functions/signature/count_distinct.svg | 1 + .../esql/functions/signature/values.svg | 1 + .../esql/functions/types/count.asciidoc | 20 + .../functions/types/count_distinct.asciidoc | 44 ++ .../esql/functions/types/values.asciidoc | 17 + .../aggregation/ValuesBytesRefAggregator.java | 2 +- .../aggregation/X-ValuesAggregator.java.st | 2 +- .../src/main/resources/meta.csv-spec | 8 +- .../expression/function/FunctionInfo.java | 5 + .../expression/function/aggregate/Count.java | 30 +- .../function/aggregate/CountDistinct.java | 77 ++- .../expression/function/aggregate/Values.java | 21 +- .../function/AbstractFunctionTestCase.java | 23 +- .../function/MultiRowTestCaseSupplier.java | 122 +++- .../expression/function/TestCaseSupplier.java | 15 + .../aggregate/CountDistinctTests.java | 176 +++++ .../function/aggregate/CountTests.java | 106 +++ .../function/aggregate/ValuesTests.java | 110 ++++ 42 files changed, 1849 insertions(+), 164 deletions(-) create mode 100644 docs/changelog/111367.yaml create mode 100644 docs/reference/esql/functions/appendix/count_distinct.asciidoc create mode 100644 docs/reference/esql/functions/appendix/values.asciidoc delete mode 100644 docs/reference/esql/functions/count-distinct.asciidoc create mode 100644 docs/reference/esql/functions/description/count.asciidoc create mode 100644 docs/reference/esql/functions/description/count_distinct.asciidoc create mode 100644 docs/reference/esql/functions/description/values.asciidoc rename docs/reference/esql/functions/{ => examples}/count.asciidoc (63%) create mode 100644 docs/reference/esql/functions/examples/count_distinct.asciidoc create mode 100644 docs/reference/esql/functions/examples/values.asciidoc create mode 100644 docs/reference/esql/functions/kibana/definition/count.json create mode 100644 docs/reference/esql/functions/kibana/definition/count_distinct.json create mode 100644 docs/reference/esql/functions/kibana/definition/values.json create mode 100644 docs/reference/esql/functions/kibana/docs/count.md create mode 100644 docs/reference/esql/functions/kibana/docs/count_distinct.md create mode 100644 docs/reference/esql/functions/kibana/docs/values.md create mode 100644 docs/reference/esql/functions/layout/count.asciidoc create mode 100644 docs/reference/esql/functions/layout/count_distinct.asciidoc create mode 100644 docs/reference/esql/functions/layout/values.asciidoc create mode 100644 docs/reference/esql/functions/parameters/count.asciidoc create mode 100644 docs/reference/esql/functions/parameters/count_distinct.asciidoc create mode 100644 docs/reference/esql/functions/parameters/values.asciidoc create mode 100644 docs/reference/esql/functions/signature/count.svg create mode 100644 docs/reference/esql/functions/signature/count_distinct.svg create mode 100644 docs/reference/esql/functions/signature/values.svg create mode 100644 docs/reference/esql/functions/types/count.asciidoc create mode 100644 docs/reference/esql/functions/types/count_distinct.asciidoc create mode 100644 docs/reference/esql/functions/types/values.asciidoc create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinctTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java diff --git a/docs/changelog/111367.yaml b/docs/changelog/111367.yaml new file mode 100644 index 0000000000000..89e6c1d3b4da4 --- /dev/null +++ b/docs/changelog/111367.yaml @@ -0,0 +1,5 @@ +pr: 111367 +summary: "ESQL: Add Values aggregation tests, fix `ConstantBytesRefBlock` memory handling" +area: ES|QL +type: bug +issues: [] diff --git a/docs/reference/esql/functions/aggregation-functions.asciidoc b/docs/reference/esql/functions/aggregation-functions.asciidoc index 821b109741a0a..518aee563e952 100644 --- a/docs/reference/esql/functions/aggregation-functions.asciidoc +++ b/docs/reference/esql/functions/aggregation-functions.asciidoc @@ -9,8 +9,8 @@ The <> command supports these aggregate functions: // tag::agg_list[] * <> -* <> -* <> +* <> +* <> * <> * <> * <> @@ -19,13 +19,13 @@ The <> command supports these aggregate functions: * experimental:[] <> * <> * <> -* <> +* <> * experimental:[] <> // end::agg_list[] -include::count.asciidoc[] -include::count-distinct.asciidoc[] include::layout/avg.asciidoc[] +include::layout/count.asciidoc[] +include::layout/count_distinct.asciidoc[] include::layout/max.asciidoc[] include::layout/median.asciidoc[] include::layout/median_absolute_deviation.asciidoc[] @@ -34,5 +34,5 @@ include::layout/percentile.asciidoc[] include::layout/st_centroid_agg.asciidoc[] include::layout/sum.asciidoc[] include::layout/top.asciidoc[] -include::values.asciidoc[] +include::layout/values.asciidoc[] include::weighted-avg.asciidoc[] diff --git a/docs/reference/esql/functions/appendix/count_distinct.asciidoc b/docs/reference/esql/functions/appendix/count_distinct.asciidoc new file mode 100644 index 0000000000000..065065cf34e06 --- /dev/null +++ b/docs/reference/esql/functions/appendix/count_distinct.asciidoc @@ -0,0 +1,25 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-agg-count-distinct-approximate]] +==== Counts are approximate + +Computing exact counts requires loading values into a set and returning its +size. This doesn't scale when working on high-cardinality sets and/or large +values as the required memory usage and the need to communicate those +per-shard sets between nodes would utilize too many resources of the cluster. + +This `COUNT_DISTINCT` function is based on the +https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf[HyperLogLog++] +algorithm, which counts based on the hashes of the values with some interesting +properties: + +include::../../../aggregations/metrics/cardinality-aggregation.asciidoc[tag=explanation] + +The `COUNT_DISTINCT` function takes an optional second parameter to configure +the precision threshold. The precision_threshold options allows to trade memory +for accuracy, and defines a unique count below which counts are expected to be +close to accurate. Above this value, counts might become a bit more fuzzy. The +maximum supported value is 40000, thresholds above this number will have the +same effect as a threshold of 40000. The default value is `3000`. + diff --git a/docs/reference/esql/functions/appendix/values.asciidoc b/docs/reference/esql/functions/appendix/values.asciidoc new file mode 100644 index 0000000000000..ec3cfff2db6a6 --- /dev/null +++ b/docs/reference/esql/functions/appendix/values.asciidoc @@ -0,0 +1,10 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[WARNING] +==== +This can use a significant amount of memory and ES|QL doesn't yet +grow aggregations beyond memory. So this aggregation will work until +it is used to collect more values than can fit into memory. Once it +collects too many values it will fail the query with +a <>. +==== diff --git a/docs/reference/esql/functions/count-distinct.asciidoc b/docs/reference/esql/functions/count-distinct.asciidoc deleted file mode 100644 index a9f30d24e0e83..0000000000000 --- a/docs/reference/esql/functions/count-distinct.asciidoc +++ /dev/null @@ -1,85 +0,0 @@ -[discrete] -[[esql-agg-count-distinct]] -=== `COUNT_DISTINCT` - -*Syntax* - -[source,esql] ----- -COUNT_DISTINCT(expression[, precision_threshold]) ----- - -*Parameters* - -`expression`:: -Expression that outputs the values on which to perform a distinct count. - -`precision_threshold`:: -Precision threshold. Refer to <>. The -maximum supported value is 40000. Thresholds above this number will have the -same effect as a threshold of 40000. The default value is 3000. - -*Description* - -Returns the approximate number of distinct values. - -*Supported types* - -Can take any field type as input. - -*Examples* - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-result] -|=== - -With the optional second parameter to configure the precision threshold: - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision-result] -|=== - -The expression can use inline functions. This example splits a string into -multiple values using the `SPLIT` function and counts the unique values: - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression-result] -|=== - -[discrete] -[[esql-agg-count-distinct-approximate]] -==== Counts are approximate - -Computing exact counts requires loading values into a set and returning its -size. This doesn't scale when working on high-cardinality sets and/or large -values as the required memory usage and the need to communicate those -per-shard sets between nodes would utilize too many resources of the cluster. - -This `COUNT_DISTINCT` function is based on the -https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf[HyperLogLog++] -algorithm, which counts based on the hashes of the values with some interesting -properties: - -include::../../aggregations/metrics/cardinality-aggregation.asciidoc[tag=explanation] - -The `COUNT_DISTINCT` function takes an optional second parameter to configure -the precision threshold. The precision_threshold options allows to trade memory -for accuracy, and defines a unique count below which counts are expected to be -close to accurate. Above this value, counts might become a bit more fuzzy. The -maximum supported value is 40000, thresholds above this number will have the -same effect as a threshold of 40000. The default value is `3000`. \ No newline at end of file diff --git a/docs/reference/esql/functions/description/count.asciidoc b/docs/reference/esql/functions/description/count.asciidoc new file mode 100644 index 0000000000000..ee806d65a8ea3 --- /dev/null +++ b/docs/reference/esql/functions/description/count.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns the total number (count) of input values. diff --git a/docs/reference/esql/functions/description/count_distinct.asciidoc b/docs/reference/esql/functions/description/count_distinct.asciidoc new file mode 100644 index 0000000000000..d10825bb991f5 --- /dev/null +++ b/docs/reference/esql/functions/description/count_distinct.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns the approximate number of distinct values. diff --git a/docs/reference/esql/functions/description/values.asciidoc b/docs/reference/esql/functions/description/values.asciidoc new file mode 100644 index 0000000000000..b3cebcce955f0 --- /dev/null +++ b/docs/reference/esql/functions/description/values.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. If you need the values returned in order use <>. diff --git a/docs/reference/esql/functions/count.asciidoc b/docs/reference/esql/functions/examples/count.asciidoc similarity index 63% rename from docs/reference/esql/functions/count.asciidoc rename to docs/reference/esql/functions/examples/count.asciidoc index 66cfe76350cdd..fb696b51e054c 100644 --- a/docs/reference/esql/functions/count.asciidoc +++ b/docs/reference/esql/functions/examples/count.asciidoc @@ -1,27 +1,4 @@ -[discrete] -[[esql-agg-count]] -=== `COUNT` - -*Syntax* - -[source,esql] ----- -COUNT([expression]) ----- - -*Parameters* - -`expression`:: -Expression that outputs values to be counted. -If omitted, equivalent to `COUNT(*)` (the number of rows). - -*Description* - -Returns the total number (count) of input values. - -*Supported types* - -Can take any field type as input. +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. *Examples* @@ -33,9 +10,7 @@ include::{esql-specs}/stats.csv-spec[tag=count] |=== include::{esql-specs}/stats.csv-spec[tag=count-result] |=== - -To count the number of rows, use `COUNT()` or `COUNT(*)`: - +To count the number of rows, use `COUNT()` or `COUNT(*)` [source.merge.styled,esql] ---- include::{esql-specs}/docs.csv-spec[tag=countAll] @@ -44,10 +19,7 @@ include::{esql-specs}/docs.csv-spec[tag=countAll] |=== include::{esql-specs}/docs.csv-spec[tag=countAll-result] |=== - -The expression can use inline functions. This example splits a string into -multiple values using the `SPLIT` function and counts the values: - +The expression can use inline functions. This example splits a string into multiple values using the `SPLIT` function and counts the values [source.merge.styled,esql] ---- include::{esql-specs}/stats.csv-spec[tag=docsCountWithExpression] @@ -56,11 +28,7 @@ include::{esql-specs}/stats.csv-spec[tag=docsCountWithExpression] |=== include::{esql-specs}/stats.csv-spec[tag=docsCountWithExpression-result] |=== - -[[esql-agg-count-or-null]] -To count the number of times an expression returns `TRUE` use -a <> command to remove rows that shouldn't be included: - +To count the number of times an expression returns `TRUE` use a <> command to remove rows that shouldn't be included [source.merge.styled,esql] ---- include::{esql-specs}/stats.csv-spec[tag=count-where] @@ -69,10 +37,7 @@ include::{esql-specs}/stats.csv-spec[tag=count-where] |=== include::{esql-specs}/stats.csv-spec[tag=count-where-result] |=== - -To count the same stream of data based on two different expressions -use the pattern `COUNT( OR NULL)`: - +To count the same stream of data based on two different expressions use the pattern `COUNT( OR NULL)` [source.merge.styled,esql] ---- include::{esql-specs}/stats.csv-spec[tag=count-or-null] @@ -81,3 +46,4 @@ include::{esql-specs}/stats.csv-spec[tag=count-or-null] |=== include::{esql-specs}/stats.csv-spec[tag=count-or-null-result] |=== + diff --git a/docs/reference/esql/functions/examples/count_distinct.asciidoc b/docs/reference/esql/functions/examples/count_distinct.asciidoc new file mode 100644 index 0000000000000..44968c0652ec0 --- /dev/null +++ b/docs/reference/esql/functions/examples/count_distinct.asciidoc @@ -0,0 +1,31 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Examples* + +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-result] +|=== +With the optional second parameter to configure the precision threshold +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision-result] +|=== +The expression can use inline functions. This example splits a string into multiple values using the `SPLIT` function and counts the unique values +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression-result] +|=== + diff --git a/docs/reference/esql/functions/examples/values.asciidoc b/docs/reference/esql/functions/examples/values.asciidoc new file mode 100644 index 0000000000000..c013fc39d92ca --- /dev/null +++ b/docs/reference/esql/functions/examples/values.asciidoc @@ -0,0 +1,13 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=values-grouped] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/string.csv-spec[tag=values-grouped-result] +|=== + diff --git a/docs/reference/esql/functions/kibana/definition/count.json b/docs/reference/esql/functions/kibana/definition/count.json new file mode 100644 index 0000000000000..e05ebc6789816 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/count.json @@ -0,0 +1,159 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "count", + "description" : "Returns the total number (count) of input values.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "cartesian_point", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "geo_point", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "unsigned_long", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + } + ], + "examples" : [ + "FROM employees\n| STATS COUNT(height)", + "FROM employees \n| STATS count = COUNT(*) BY languages \n| SORT languages DESC", + "ROW words=\"foo;bar;baz;qux;quux;foo\"\n| STATS word_count = COUNT(SPLIT(words, \";\"))", + "ROW n=1\n| WHERE n < 0\n| STATS COUNT(n)", + "ROW n=1\n| STATS COUNT(n > 0 OR NULL), COUNT(n < 0 OR NULL)" + ] +} diff --git a/docs/reference/esql/functions/kibana/definition/count_distinct.json b/docs/reference/esql/functions/kibana/definition/count_distinct.json new file mode 100644 index 0000000000000..801bd26f7d022 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/count_distinct.json @@ -0,0 +1,607 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "count_distinct", + "description" : "Returns the approximate number of distinct values.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + } + ], + "examples" : [ + "FROM hosts\n| STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1)", + "FROM hosts\n| STATS COUNT_DISTINCT(ip0, 80000), COUNT_DISTINCT(ip1, 5)", + "ROW words=\"foo;bar;baz;qux;quux;foo\"\n| STATS distinct_word_count = COUNT_DISTINCT(SPLIT(words, \";\"))" + ] +} diff --git a/docs/reference/esql/functions/kibana/definition/values.json b/docs/reference/esql/functions/kibana/definition/values.json new file mode 100644 index 0000000000000..3e0036c4d25b6 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/values.json @@ -0,0 +1,119 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "values", + "description" : "Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. If you need the values returned in order use <>.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "datetime" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "integer" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "ip" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "text" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "version" + } + ], + "examples" : [ + " FROM employees\n| EVAL first_letter = SUBSTRING(first_name, 0, 1)\n| STATS first_name=MV_SORT(VALUES(first_name)) BY first_letter\n| SORT first_letter" + ] +} diff --git a/docs/reference/esql/functions/kibana/docs/count.md b/docs/reference/esql/functions/kibana/docs/count.md new file mode 100644 index 0000000000000..dc9c356a847ed --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/count.md @@ -0,0 +1,11 @@ + + +### COUNT +Returns the total number (count) of input values. + +``` +FROM employees +| STATS COUNT(height) +``` diff --git a/docs/reference/esql/functions/kibana/docs/count_distinct.md b/docs/reference/esql/functions/kibana/docs/count_distinct.md new file mode 100644 index 0000000000000..a6b451bf9d38d --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/count_distinct.md @@ -0,0 +1,11 @@ + + +### COUNT_DISTINCT +Returns the approximate number of distinct values. + +``` +FROM hosts +| STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1) +``` diff --git a/docs/reference/esql/functions/kibana/docs/values.md b/docs/reference/esql/functions/kibana/docs/values.md new file mode 100644 index 0000000000000..cba62fc27255e --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/values.md @@ -0,0 +1,13 @@ + + +### VALUES +Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. If you need the values returned in order use <>. + +``` + FROM employees +| EVAL first_letter = SUBSTRING(first_name, 0, 1) +| STATS first_name=MV_SORT(VALUES(first_name)) BY first_letter +| SORT first_letter +``` diff --git a/docs/reference/esql/functions/layout/count.asciidoc b/docs/reference/esql/functions/layout/count.asciidoc new file mode 100644 index 0000000000000..8c16d74cde9a7 --- /dev/null +++ b/docs/reference/esql/functions/layout/count.asciidoc @@ -0,0 +1,15 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-count]] +=== `COUNT` + +*Syntax* + +[.text-center] +image::esql/functions/signature/count.svg[Embedded,opts=inline] + +include::../parameters/count.asciidoc[] +include::../description/count.asciidoc[] +include::../types/count.asciidoc[] +include::../examples/count.asciidoc[] diff --git a/docs/reference/esql/functions/layout/count_distinct.asciidoc b/docs/reference/esql/functions/layout/count_distinct.asciidoc new file mode 100644 index 0000000000000..2c9848186e806 --- /dev/null +++ b/docs/reference/esql/functions/layout/count_distinct.asciidoc @@ -0,0 +1,16 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-count_distinct]] +=== `COUNT_DISTINCT` + +*Syntax* + +[.text-center] +image::esql/functions/signature/count_distinct.svg[Embedded,opts=inline] + +include::../parameters/count_distinct.asciidoc[] +include::../description/count_distinct.asciidoc[] +include::../types/count_distinct.asciidoc[] +include::../examples/count_distinct.asciidoc[] +include::../appendix/count_distinct.asciidoc[] diff --git a/docs/reference/esql/functions/layout/values.asciidoc b/docs/reference/esql/functions/layout/values.asciidoc new file mode 100644 index 0000000000000..7d90d4314699a --- /dev/null +++ b/docs/reference/esql/functions/layout/values.asciidoc @@ -0,0 +1,18 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-values]] +=== `VALUES` + +preview::["Do not use `VALUES` on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] + +*Syntax* + +[.text-center] +image::esql/functions/signature/values.svg[Embedded,opts=inline] + +include::../parameters/values.asciidoc[] +include::../description/values.asciidoc[] +include::../types/values.asciidoc[] +include::../examples/values.asciidoc[] +include::../appendix/values.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/count.asciidoc b/docs/reference/esql/functions/parameters/count.asciidoc new file mode 100644 index 0000000000000..d470061a83e2e --- /dev/null +++ b/docs/reference/esql/functions/parameters/count.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: +Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows). diff --git a/docs/reference/esql/functions/parameters/count_distinct.asciidoc b/docs/reference/esql/functions/parameters/count_distinct.asciidoc new file mode 100644 index 0000000000000..f84cf27c3e075 --- /dev/null +++ b/docs/reference/esql/functions/parameters/count_distinct.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: +Column or literal for which to count the number of distinct values. + +`precision`:: +Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000. diff --git a/docs/reference/esql/functions/parameters/values.asciidoc b/docs/reference/esql/functions/parameters/values.asciidoc new file mode 100644 index 0000000000000..8903aa1a472a3 --- /dev/null +++ b/docs/reference/esql/functions/parameters/values.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: + diff --git a/docs/reference/esql/functions/signature/count.svg b/docs/reference/esql/functions/signature/count.svg new file mode 100644 index 0000000000000..9b19652b98788 --- /dev/null +++ b/docs/reference/esql/functions/signature/count.svg @@ -0,0 +1 @@ +COUNT(field) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/count_distinct.svg b/docs/reference/esql/functions/signature/count_distinct.svg new file mode 100644 index 0000000000000..a5b77da7c555a --- /dev/null +++ b/docs/reference/esql/functions/signature/count_distinct.svg @@ -0,0 +1 @@ +COUNT_DISTINCT(field,precision) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/values.svg b/docs/reference/esql/functions/signature/values.svg new file mode 100644 index 0000000000000..0fa116ce1eb14 --- /dev/null +++ b/docs/reference/esql/functions/signature/values.svg @@ -0,0 +1 @@ +VALUES(field) \ No newline at end of file diff --git a/docs/reference/esql/functions/types/count.asciidoc b/docs/reference/esql/functions/types/count.asciidoc new file mode 100644 index 0000000000000..70e79d4899605 --- /dev/null +++ b/docs/reference/esql/functions/types/count.asciidoc @@ -0,0 +1,20 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | result +boolean | long +cartesian_point | long +datetime | long +double | long +geo_point | long +integer | long +ip | long +keyword | long +long | long +text | long +unsigned_long | long +version | long +|=== diff --git a/docs/reference/esql/functions/types/count_distinct.asciidoc b/docs/reference/esql/functions/types/count_distinct.asciidoc new file mode 100644 index 0000000000000..4b201d45732f1 --- /dev/null +++ b/docs/reference/esql/functions/types/count_distinct.asciidoc @@ -0,0 +1,44 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | precision | result +boolean | integer | long +boolean | long | long +boolean | unsigned_long | long +boolean | | long +datetime | integer | long +datetime | long | long +datetime | unsigned_long | long +datetime | | long +double | integer | long +double | long | long +double | unsigned_long | long +double | | long +integer | integer | long +integer | long | long +integer | unsigned_long | long +integer | | long +ip | integer | long +ip | long | long +ip | unsigned_long | long +ip | | long +keyword | integer | long +keyword | long | long +keyword | unsigned_long | long +keyword | | long +long | integer | long +long | long | long +long | unsigned_long | long +long | | long +text | integer | long +text | long | long +text | unsigned_long | long +text | | long +version | integer | long +version | long | long +version | unsigned_long | long +version | | long +|=== diff --git a/docs/reference/esql/functions/types/values.asciidoc b/docs/reference/esql/functions/types/values.asciidoc new file mode 100644 index 0000000000000..705745d76dbab --- /dev/null +++ b/docs/reference/esql/functions/types/values.asciidoc @@ -0,0 +1,17 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | result +boolean | boolean +datetime | datetime +double | double +integer | integer +ip | ip +keyword | keyword +long | long +text | text +version | version +|=== diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesBytesRefAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesBytesRefAggregator.java index 736b320a9dde8..602fd29433193 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesBytesRefAggregator.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesBytesRefAggregator.java @@ -100,7 +100,7 @@ Block toBlock(BlockFactory blockFactory) { } BytesRef scratch = new BytesRef(); if (values.size() == 1) { - return blockFactory.newConstantBytesRefBlockWith(values.get(0, scratch), 1); + return blockFactory.newConstantBytesRefBlockWith(BytesRef.deepCopyOf(values.get(0, scratch)), 1); } try (BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder((int) values.size())) { builder.beginPositionEntry(); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-ValuesAggregator.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-ValuesAggregator.java.st index ea62dcf295825..a8884c58116f3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-ValuesAggregator.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-ValuesAggregator.java.st @@ -192,7 +192,7 @@ $elseif(double)$ $elseif(int)$ return blockFactory.newConstantIntBlockWith((int) values.get(0), 1); $elseif(BytesRef)$ - return blockFactory.newConstantBytesRefBlockWith(values.get(0, scratch), 1); + return blockFactory.newConstantBytesRefBlockWith(BytesRef.deepCopyOf(values.get(0, scratch)), 1); $endif$ } try ($Type$Block.Builder builder = blockFactory.new$Type$BlockBuilder((int) values.size())) { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index c036e04bc8ba3..7b5941b88988d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -19,7 +19,7 @@ synopsis:keyword "double cos(angle:double|integer|long|unsigned_long)" "double cosh(angle:double|integer|long|unsigned_long)" "long count(?field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version)" -"long count_distinct(field:boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|version, ?precision:integer)" +"long count_distinct(field:boolean|date|double|integer|ip|keyword|long|text|version, ?precision:integer|long|unsigned_long)" "integer date_diff(unit:keyword|text, startTimestamp:date, endTimestamp:date)" "long date_extract(datePart:keyword|text, date:date)" "keyword date_format(?dateFormat:keyword|text, date:date)" @@ -139,8 +139,8 @@ coalesce |first |"boolean|cartesian_point|car concat |[string1, string2] |["keyword|text", "keyword|text"] |[Strings to concatenate., Strings to concatenate.] cos |angle |"double|integer|long|unsigned_long" |An angle, in radians. If `null`, the function returns `null`. cosh |angle |"double|integer|long|unsigned_long" |An angle, in radians. If `null`, the function returns `null`. -count |field |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version" |Column or literal for which to count the number of values. -count_distinct|[field, precision] |["boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|version", integer] |[Column or literal for which to count the number of distinct values., ] +count |field |"boolean|cartesian_point|date|double|geo_point|integer|ip|keyword|long|text|unsigned_long|version" |Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows). +count_distinct|[field, precision] |["boolean|date|double|integer|ip|keyword|long|text|version", "integer|long|unsigned_long"] |[Column or literal for which to count the number of distinct values., Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000.] date_diff |[unit, startTimestamp, endTimestamp]|["keyword|text", date, date] |[Time difference unit, A string representing a start timestamp, A string representing an end timestamp] date_extract |[datePart, date] |["keyword|text", date] |[Part of the date to extract. Can be: `aligned_day_of_week_in_month`\, `aligned_day_of_week_in_year`\, `aligned_week_of_month`\, `aligned_week_of_year`\, `ampm_of_day`\, `clock_hour_of_ampm`\, `clock_hour_of_day`\, `day_of_month`\, `day_of_week`\, `day_of_year`\, `epoch_day`\, `era`\, `hour_of_ampm`\, `hour_of_day`\, `instant_seconds`\, `micro_of_day`\, `micro_of_second`\, `milli_of_day`\, `milli_of_second`\, `minute_of_day`\, `minute_of_hour`\, `month_of_year`\, `nano_of_day`\, `nano_of_second`\, `offset_seconds`\, `proleptic_month`\, `second_of_day`\, `second_of_minute`\, `year`\, or `year_of_era`. Refer to https://docs.oracle.com/javase/8/docs/api/java/time/temporal/ChronoField.html[java.time.temporal.ChronoField] for a description of these values. If `null`\, the function returns `null`., Date expression. If `null`\, the function returns `null`.] date_format |[dateFormat, date] |["keyword|text", date] |[Date format (optional). If no format is specified\, the `yyyy-MM-dd'T'HH:mm:ss.SSSZ` format is used. If `null`\, the function returns `null`., Date expression. If `null`\, the function returns `null`.] @@ -356,7 +356,7 @@ to_ver |Converts an input string to a version value. to_version |Converts an input string to a version value. top |Collects the top values for a field. Includes repeated values. trim |Removes leading and trailing whitespaces from a string. -values |Collect values for a field. +values |Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. If you need the values returned in order use <>. weighted_avg |The weighted average of a numeric field. ; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/FunctionInfo.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/FunctionInfo.java index 94e3aa4e1dd68..f275496c6787a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/FunctionInfo.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/FunctionInfo.java @@ -23,6 +23,11 @@ */ String[] returnType(); + /** + * Whether this function is a preview (Not ready for production environments) or not. + */ + boolean preview() default false; + /** * The description of the function rendered in {@code META FUNCTIONS} * and the docs. These should be complete sentences. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java index 52e053f843e14..9b6190408dbd4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java @@ -19,6 +19,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.StringUtils; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvCount; @@ -35,7 +36,32 @@ public class Count extends AggregateFunction implements EnclosedAgg, ToAggregator, SurrogateExpression { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Count", Count::new); - @FunctionInfo(returnType = "long", description = "Returns the total number (count) of input values.", isAggregation = true) + @FunctionInfo( + returnType = "long", + description = "Returns the total number (count) of input values.", + isAggregation = true, + examples = { + @Example(file = "stats", tag = "count"), + @Example(description = "To count the number of rows, use `COUNT()` or `COUNT(*)`", file = "docs", tag = "countAll"), + @Example( + description = "The expression can use inline functions. This example splits a string into " + + "multiple values using the `SPLIT` function and counts the values", + file = "stats", + tag = "docsCountWithExpression" + ), + @Example( + description = "To count the number of times an expression returns `TRUE` use " + + "a <> command to remove rows that shouldn't be included", + file = "stats", + tag = "count-where" + ), + @Example( + description = "To count the same stream of data based on two different expressions " + + "use the pattern `COUNT( OR NULL)`", + file = "stats", + tag = "count-or-null" + ) } + ) public Count( Source source, @Param( @@ -54,7 +80,7 @@ public Count( "text", "unsigned_long", "version" }, - description = "Column or literal for which to count the number of values." + description = "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." ) Expression field ) { super(source, field); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 7686d10a03d9e..858c6e659449c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -24,6 +24,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.EsqlTypeResolutions; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; import org.elasticsearch.xpack.esql.expression.function.Param; @@ -53,15 +54,63 @@ public class CountDistinct extends AggregateFunction implements OptionalArgument private static final int DEFAULT_PRECISION = 3000; private final Expression precision; - @FunctionInfo(returnType = "long", description = "Returns the approximate number of distinct values.", isAggregation = true) + @FunctionInfo( + returnType = "long", + description = "Returns the approximate number of distinct values.", + appendix = """ + [discrete] + [[esql-agg-count-distinct-approximate]] + ==== Counts are approximate + + Computing exact counts requires loading values into a set and returning its + size. This doesn't scale when working on high-cardinality sets and/or large + values as the required memory usage and the need to communicate those + per-shard sets between nodes would utilize too many resources of the cluster. + + This `COUNT_DISTINCT` function is based on the + https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf[HyperLogLog++] + algorithm, which counts based on the hashes of the values with some interesting + properties: + + include::../../../aggregations/metrics/cardinality-aggregation.asciidoc[tag=explanation] + + The `COUNT_DISTINCT` function takes an optional second parameter to configure + the precision threshold. The precision_threshold options allows to trade memory + for accuracy, and defines a unique count below which counts are expected to be + close to accurate. Above this value, counts might become a bit more fuzzy. The + maximum supported value is 40000, thresholds above this number will have the + same effect as a threshold of 40000. The default value is `3000`. + """, + isAggregation = true, + examples = { + @Example(file = "stats_count_distinct", tag = "count-distinct"), + @Example( + description = "With the optional second parameter to configure the precision threshold", + file = "stats_count_distinct", + tag = "count-distinct-precision" + ), + @Example( + description = "The expression can use inline functions. This example splits a string into " + + "multiple values using the `SPLIT` function and counts the unique values", + file = "stats_count_distinct", + tag = "docsCountDistinctWithExpression" + ) } + ) public CountDistinct( Source source, @Param( name = "field", - type = { "boolean", "cartesian_point", "date", "double", "geo_point", "integer", "ip", "keyword", "long", "text", "version" }, + type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "Column or literal for which to count the number of distinct values." ) Expression field, - @Param(optional = true, name = "precision", type = { "integer" }) Expression precision + @Param( + optional = true, + name = "precision", + type = { "integer", "long", "unsigned_long" }, + description = "Precision threshold. Refer to <>. " + + "The maximum supported value is 40000. Thresholds above this number will have the " + + "same effect as a threshold of 40000. The default value is 3000." + ) Expression precision ) { super(source, field, precision != null ? List.of(precision) : List.of()); this.precision = precision; @@ -108,19 +157,17 @@ protected TypeResolution resolveType() { return new TypeResolution("Unresolved children"); } - TypeResolution resolution = EsqlTypeResolutions.isExact(field(), sourceText(), DEFAULT); - if (resolution.unresolved()) { - return resolution; - } + TypeResolution resolution = EsqlTypeResolutions.isExact(field(), sourceText(), DEFAULT) + .and( + isType( + field(), + dt -> dt != DataType.UNSIGNED_LONG && dt != DataType.SOURCE, + sourceText(), + DEFAULT, + "any exact type except unsigned_long, _source, or counter types" + ) + ); - boolean resolved = resolution.resolved(); - resolution = isType( - field(), - dt -> resolved && dt != DataType.UNSIGNED_LONG && dt != DataType.SOURCE, - sourceText(), - DEFAULT, - "any exact type except unsigned_long, _source, or counter types" - ); if (resolution.unresolved() || precision == null) { return resolution; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java index 7d2fbcddb113b..79276b26be6d5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java @@ -21,6 +21,7 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.EsqlTypeResolutions; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.planner.ToAggregator; @@ -34,13 +35,25 @@ public class Values extends AggregateFunction implements ToAggregator { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Values", Values::new); @FunctionInfo( - returnType = { "boolean|date|double|integer|ip|keyword|long|text|version" }, - description = "Collect values for a field.", - isAggregation = true + returnType = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + preview = true, + description = "Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. " + + "If you need the values returned in order use <>.", + appendix = """ + [WARNING] + ==== + This can use a significant amount of memory and ES|QL doesn't yet + grow aggregations beyond memory. So this aggregation will work until + it is used to collect more values than can fit into memory. Once it + collects too many values it will fail the query with + a <>. + ====""", + isAggregation = true, + examples = @Example(file = "string", tag = "values-grouped") ) public Values( Source source, - @Param(name = "field", type = { "boolean|date|double|integer|ip|keyword|long|text|version" }) Expression v + @Param(name = "field", type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }) Expression v ) { super(source, v); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java index 0ec0a29dc530b..64c72b46c303b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java @@ -77,6 +77,7 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.math.BigInteger; import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; @@ -298,7 +299,12 @@ protected final List rows(List multirowFields) ) { var multiRowData = field.multiRowData(); for (int row = initialRow; row < initialRow + pageSize; row++) { - wrapper.accept(multiRowData.get(row)); + var data = multiRowData.get(row); + if (data instanceof BigInteger bigIntegerData) { + wrapper.accept(NumericUtils.asLongUnsigned(bigIntegerData)); + } else { + wrapper.accept(data); + } } blocks[i] = wrapper.builder().build(); @@ -545,7 +551,7 @@ public static void renderDocs() throws IOException { renderDescription(description.description(), info.detailedDescription(), info.note()); boolean hasExamples = renderExamples(info); boolean hasAppendix = renderAppendix(info.appendix()); - renderFullLayout(name, hasExamples, hasAppendix); + renderFullLayout(name, info.preview(), hasExamples, hasAppendix); renderKibanaInlineDocs(name, info); List args = description.args(); if (name.equals("case")) { @@ -571,6 +577,11 @@ public static void renderDocs() throws IOException { private static final String DOCS_WARNING = "// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.\n\n"; + private static final String PREVIEW_CALLOUT = + "\npreview::[\"Do not use `VALUES` on production environments. This functionality is in technical preview and " + + "may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview " + + "are not subject to the support SLA of official GA features.\"]\n"; + private static void renderTypes(List argNames) throws IOException { StringBuilder header = new StringBuilder(); for (String arg : argNames) { @@ -686,12 +697,12 @@ private static boolean renderAppendix(String appendix) throws IOException { return true; } - private static void renderFullLayout(String name, boolean hasExamples, boolean hasAppendix) throws IOException { + private static void renderFullLayout(String name, boolean preview, boolean hasExamples, boolean hasAppendix) throws IOException { String rendered = DOCS_WARNING + """ [discrete] [[esql-$NAME$]] === `$UPPER_NAME$` - + $PREVIEW_CALLOUT$ *Syntax* [.text-center] @@ -700,7 +711,9 @@ private static void renderFullLayout(String name, boolean hasExamples, boolean h include::../parameters/$NAME$.asciidoc[] include::../description/$NAME$.asciidoc[] include::../types/$NAME$.asciidoc[] - """.replace("$NAME$", name).replace("$UPPER_NAME$", name.toUpperCase(Locale.ROOT)); + """.replace("$NAME$", name) + .replace("$UPPER_NAME$", name.toUpperCase(Locale.ROOT)) + .replace("$PREVIEW_CALLOUT$", preview ? PREVIEW_CALLOUT : ""); if (hasExamples) { rendered += "include::../examples/" + name + ".asciidoc[]\n"; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/MultiRowTestCaseSupplier.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/MultiRowTestCaseSupplier.java index 2896dec814a71..973249e4a743c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/MultiRowTestCaseSupplier.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/MultiRowTestCaseSupplier.java @@ -14,7 +14,9 @@ import org.elasticsearch.geo.ShapeTestUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.versionfield.Version; +import java.math.BigInteger; import java.util.ArrayList; import java.util.List; @@ -149,6 +151,55 @@ public static List longCases(int minRows, int maxRows, long m return cases; } + public static List ulongCases(int minRows, int maxRows, BigInteger min, BigInteger max, boolean includeZero) { + List cases = new ArrayList<>(); + + // Zero + if (BigInteger.ZERO.compareTo(max) <= 0 && BigInteger.ZERO.compareTo(min) >= 0 && includeZero) { + cases.add( + new TypedDataSupplier( + "<0 unsigned longs>", + () -> randomList(minRows, maxRows, () -> BigInteger.ZERO), + DataType.UNSIGNED_LONG, + false, + true + ) + ); + } + + // Small values, less than Long.MAX_VALUE + BigInteger lower1 = min.max(BigInteger.ONE); + BigInteger upper1 = max.min(BigInteger.valueOf(Long.MAX_VALUE)); + if (lower1.compareTo(upper1) < 0) { + cases.add( + new TypedDataSupplier( + "", + () -> randomList(minRows, maxRows, () -> ESTestCase.randomUnsignedLongBetween(lower1, upper1)), + DataType.UNSIGNED_LONG, + false, + true + ) + ); + } + + // Big values, greater than Long.MAX_VALUE + BigInteger lower2 = min.max(BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)); + BigInteger upper2 = max.min(ESTestCase.UNSIGNED_LONG_MAX); + if (lower2.compareTo(upper2) < 0) { + cases.add( + new TypedDataSupplier( + "", + () -> randomList(minRows, maxRows, () -> ESTestCase.randomUnsignedLongBetween(lower2, upper2)), + DataType.UNSIGNED_LONG, + false, + true + ) + ); + } + + return cases; + } + public static List doubleCases(int minRows, int maxRows, double min, double max, boolean includeZero) { List cases = new ArrayList<>(); @@ -326,6 +377,41 @@ public static List ipCases(int minRows, int maxRows) { ); } + public static List versionCases(int minRows, int maxRows) { + return List.of( + new TypedDataSupplier( + "", + () -> randomList(minRows, maxRows, () -> new Version(Integer.toString(ESTestCase.between(0, 100))).toBytesRef()), + DataType.VERSION, + false, + true + ), + new TypedDataSupplier( + "", + () -> randomList( + minRows, + maxRows, + () -> new Version(ESTestCase.between(0, 100) + "." + ESTestCase.between(0, 100)).toBytesRef() + ), + DataType.VERSION, + false, + true + ), + new TypedDataSupplier( + "", + () -> randomList( + minRows, + maxRows, + () -> new Version(ESTestCase.between(0, 100) + "." + ESTestCase.between(0, 100) + "." + ESTestCase.between(0, 100)) + .toBytesRef() + ), + DataType.VERSION, + false, + true + ) + ); + } + public static List geoPointCases(int minRows, int maxRows, boolean withAltitude) { List cases = new ArrayList<>(); @@ -343,7 +429,7 @@ public static List geoPointCases(int minRows, int maxRows, bo cases.add( new TypedDataSupplier( "", - () -> randomList(minRows, maxRows, () -> GEO.asWkb(GeometryTestUtils.randomPoint(true))), + () -> randomList(minRows, maxRows, () -> GEO.asWkb(GeometryTestUtils.randomPoint(false))), DataType.GEO_POINT, false, true @@ -381,4 +467,38 @@ public static List cartesianPointCases(int minRows, int maxRo return cases; } + + public static List stringCases(int minRows, int maxRows, DataType type) { + return List.of( + new TypedDataSupplier("", () -> randomList(minRows, maxRows, () -> new BytesRef("")), type, false, true), + new TypedDataSupplier( + "", + () -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomAlphaOfLengthBetween(1, 30))), + type, + false, + true + ), + new TypedDataSupplier( + "", + () -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomAlphaOfLengthBetween(300, 3000))), + type, + false, + true + ), + new TypedDataSupplier( + "", + () -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomRealisticUnicodeOfLengthBetween(1, 30))), + type, + false, + true + ), + new TypedDataSupplier( + "", + () -> randomList(minRows, maxRows, () -> new BytesRef(ESTestCase.randomRealisticUnicodeOfLengthBetween(300, 3000))), + type, + false, + true + ) + ); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java index 3c9c1795ff210..cd375b8c53595 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java @@ -880,6 +880,12 @@ public static List longCases(long min, long max, boolean incl return cases; } + /** + * Generate cases for {@link DataType#UNSIGNED_LONG}. + *

+ * For multi-row parameters, see {@link MultiRowTestCaseSupplier#ulongCases}. + *

+ */ public static List ulongCases(BigInteger min, BigInteger max, boolean includeZero) { List cases = new ArrayList<>(); @@ -1142,6 +1148,12 @@ public static List ipCases() { ); } + /** + * Generate cases for String DataTypes. + *

+ * For multi-row parameters, see {@link MultiRowTestCaseSupplier#stringCases}. + *

+ */ public static List stringCases(DataType type) { List result = new ArrayList<>(); result.add(new TypedDataSupplier("", () -> new BytesRef(""), type)); @@ -1170,6 +1182,9 @@ public static List stringCases(DataType type) { /** * Supplier test case data for {@link Version} fields. + *

+ * For multi-row parameters, see {@link MultiRowTestCaseSupplier#versionCases}. + *

*/ public static List versionCases(String prefix) { return List.of( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinctTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinctTests.java new file mode 100644 index 0000000000000..c2638e8da9196 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinctTests.java @@ -0,0 +1,176 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.aggregate; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.internal.hppc.BitMixer; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.hash.MurmurHash3; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.search.aggregations.metrics.HyperLogLogPlusPlus; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; +import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.equalTo; + +public class CountDistinctTests extends AbstractAggregationTestCase { + public CountDistinctTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + var suppliers = new ArrayList(); + + var precisionSuppliers = Stream.of( + TestCaseSupplier.intCases(0, 100_000, true), + TestCaseSupplier.longCases(0L, 100_000L, true), + TestCaseSupplier.ulongCases(BigInteger.ZERO, BigInteger.valueOf(100_000L), true) + ).flatMap(List::stream).toList(); + + Stream.of( + MultiRowTestCaseSupplier.intCases(1, 1000, Integer.MIN_VALUE, Integer.MAX_VALUE, true), + MultiRowTestCaseSupplier.longCases(1, 1000, Long.MIN_VALUE, Long.MAX_VALUE, true), + MultiRowTestCaseSupplier.doubleCases(1, 1000, -Double.MAX_VALUE, Double.MAX_VALUE, true), + MultiRowTestCaseSupplier.dateCases(1, 1000), + MultiRowTestCaseSupplier.booleanCases(1, 1000), + MultiRowTestCaseSupplier.ipCases(1, 1000), + MultiRowTestCaseSupplier.versionCases(1, 1000), + // Lower values for strings, as they take more space and may trigger the circuit breaker + MultiRowTestCaseSupplier.stringCases(1, 100, DataType.KEYWORD), + MultiRowTestCaseSupplier.stringCases(1, 100, DataType.TEXT) + ).flatMap(List::stream).forEach(fieldCaseSupplier -> { + // With precision + for (var precisionCaseSupplier : precisionSuppliers) { + suppliers.add(makeSupplier(fieldCaseSupplier, precisionCaseSupplier)); + } + + // Without precision + suppliers.add(makeSupplier(fieldCaseSupplier)); + }); + + // No rows + for (var dataType : List.of( + DataType.INTEGER, + DataType.LONG, + DataType.DOUBLE, + DataType.DATETIME, + DataType.BOOLEAN, + DataType.IP, + DataType.VERSION, + DataType.KEYWORD, + DataType.TEXT + )) { + var emptyFieldSupplier = new TestCaseSupplier.TypedDataSupplier("No rows (" + dataType + ")", List::of, dataType, false, true); + + // With precision + for (var precisionCaseSupplier : precisionSuppliers) { + suppliers.add(makeSupplier(emptyFieldSupplier, precisionCaseSupplier)); + } + + // Without precision + suppliers.add(makeSupplier(emptyFieldSupplier)); + } + + // "No rows" expects 0 here instead of null + // return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers); + return parameterSuppliersFromTypedData(randomizeBytesRefsOffset(suppliers)); + } + + @Override + protected Expression build(Source source, List args) { + return new CountDistinct(source, args.get(0), args.size() > 1 ? args.get(1) : null); + } + + private static TestCaseSupplier makeSupplier( + TestCaseSupplier.TypedDataSupplier fieldSupplier, + TestCaseSupplier.TypedDataSupplier precisionSupplier + ) { + return new TestCaseSupplier(fieldSupplier.name(), List.of(fieldSupplier.type(), precisionSupplier.type()), () -> { + var fieldTypedData = fieldSupplier.get(); + var precisionTypedData = precisionSupplier.get().forceLiteral(); + var values = fieldTypedData.multiRowData(); + var precision = ((Number) precisionTypedData.data()).intValue(); + + long result; + + if (fieldTypedData.type() == DataType.BOOLEAN) { + result = values.stream().distinct().count(); + } else { + result = calculateExpectedResult(values, precision); + } + + return new TestCaseSupplier.TestCase( + List.of(fieldTypedData, precisionTypedData), + "CountDistinct[field=Attribute[channel=0],precision=Attribute[channel=1]]", + DataType.LONG, + equalTo(result) + ); + }); + } + + private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier fieldSupplier) { + return new TestCaseSupplier(fieldSupplier.name() + ", no precision", List.of(fieldSupplier.type()), () -> { + var fieldTypedData = fieldSupplier.get(); + var values = fieldTypedData.multiRowData(); + + long result; + + if (fieldTypedData.type() == DataType.BOOLEAN) { + result = values.stream().distinct().count(); + } else { + result = calculateExpectedResult(values, 3000); + } + + return new TestCaseSupplier.TestCase( + List.of(fieldTypedData), + "CountDistinct[field=Attribute[channel=0]]", + DataType.LONG, + equalTo(result) + ); + }); + } + + private static long calculateExpectedResult(List values, int precision) { + // Can't use driverContext().bigArrays() from a static context + var bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking(); + try (var hll = new HyperLogLogPlusPlus(HyperLogLogPlusPlus.precisionFromThreshold(precision), bigArrays, 1)) { + var hash = new MurmurHash3.Hash128(); + for (var value : values) { + if (value instanceof Integer casted) { + hll.collect(0, BitMixer.mix64(casted)); + } else if (value instanceof Long casted) { + hll.collect(0, BitMixer.mix64(casted)); + } else if (value instanceof Double casted) { + hll.collect(0, BitMixer.mix64(Double.doubleToLongBits(casted))); + } else if (value instanceof BytesRef casted) { + MurmurHash3.hash128(casted.bytes, casted.offset, casted.length, 0, hash); + hll.collect(0, BitMixer.mix64(hash.h1)); + } else { + throw new IllegalArgumentException("Unsupported data type: " + value.getClass()); + } + } + + return hll.cardinality(0); + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountTests.java new file mode 100644 index 0000000000000..09076f2d70fd9 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountTests.java @@ -0,0 +1,106 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.aggregate; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; +import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.equalTo; + +public class CountTests extends AbstractAggregationTestCase { + public CountTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + var suppliers = new ArrayList(); + + Stream.of( + MultiRowTestCaseSupplier.intCases(1, 1000, Integer.MIN_VALUE, Integer.MAX_VALUE, true), + MultiRowTestCaseSupplier.longCases(1, 1000, Long.MIN_VALUE, Long.MAX_VALUE, true), + MultiRowTestCaseSupplier.ulongCases(1, 1000, BigInteger.ZERO, UNSIGNED_LONG_MAX, true), + MultiRowTestCaseSupplier.doubleCases(1, 1000, -Double.MAX_VALUE, Double.MAX_VALUE, true), + MultiRowTestCaseSupplier.dateCases(1, 1000), + MultiRowTestCaseSupplier.booleanCases(1, 1000), + MultiRowTestCaseSupplier.ipCases(1, 1000), + MultiRowTestCaseSupplier.versionCases(1, 1000), + MultiRowTestCaseSupplier.geoPointCases(1, 1000, true), + MultiRowTestCaseSupplier.cartesianPointCases(1, 1000, true), + // Lower values for strings, as they take more space and may trigger the circuit breaker + MultiRowTestCaseSupplier.stringCases(1, 100, DataType.KEYWORD), + MultiRowTestCaseSupplier.stringCases(1, 100, DataType.TEXT) + ).flatMap(List::stream).map(CountTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers)); + + // No rows + for (var dataType : List.of( + DataType.INTEGER, + DataType.LONG, + DataType.DOUBLE, + DataType.DATETIME, + DataType.BOOLEAN, + DataType.IP, + DataType.VERSION, + DataType.KEYWORD, + DataType.TEXT, + DataType.GEO_POINT, + DataType.CARTESIAN_POINT, + DataType.UNSIGNED_LONG + )) { + suppliers.add( + new TestCaseSupplier( + "No rows (" + dataType + ")", + List.of(dataType), + () -> new TestCaseSupplier.TestCase( + List.of(TestCaseSupplier.TypedData.multiRow(List.of(), dataType, "field")), + "Count[field=Attribute[channel=0]]", + DataType.LONG, + equalTo(0L) + ) + ) + ); + } + + // "No rows" expects 0 here instead of null + // return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers); + return parameterSuppliersFromTypedData(randomizeBytesRefsOffset(suppliers)); + } + + @Override + protected Expression build(Source source, List args) { + return new Count(source, args.get(0)); + } + + private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier fieldSupplier) { + return new TestCaseSupplier(fieldSupplier.name(), List.of(fieldSupplier.type()), () -> { + var fieldTypedData = fieldSupplier.get(); + var rowCount = fieldTypedData.multiRowData().size(); + + return new TestCaseSupplier.TestCase( + List.of(fieldTypedData), + "Count[field=Attribute[channel=0]]", + DataType.LONG, + equalTo((long) rowCount) + ); + }); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java new file mode 100644 index 0000000000000..704bd3ab204a3 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java @@ -0,0 +1,110 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.aggregate; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractAggregationTestCase; +import org.elasticsearch.xpack.esql.expression.function.MultiRowTestCaseSupplier; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.hamcrest.BaseMatcher; +import org.hamcrest.Description; +import org.hamcrest.Matcher; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; + +public class ValuesTests extends AbstractAggregationTestCase { + public ValuesTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + var suppliers = new ArrayList(); + + Stream.of( + MultiRowTestCaseSupplier.intCases(1, 1000, Integer.MIN_VALUE, Integer.MAX_VALUE, true), + MultiRowTestCaseSupplier.longCases(1, 1000, Long.MIN_VALUE, Long.MAX_VALUE, true), + MultiRowTestCaseSupplier.doubleCases(1, 1000, -Double.MAX_VALUE, Double.MAX_VALUE, true), + MultiRowTestCaseSupplier.dateCases(1, 1000), + MultiRowTestCaseSupplier.booleanCases(1, 1000), + MultiRowTestCaseSupplier.ipCases(1, 1000), + MultiRowTestCaseSupplier.versionCases(1, 1000), + // Lower values for strings, as they take more space and may trigger the circuit breaker + MultiRowTestCaseSupplier.stringCases(1, 100, DataType.KEYWORD), + MultiRowTestCaseSupplier.stringCases(1, 100, DataType.TEXT) + ).flatMap(List::stream).map(ValuesTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers)); + + return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers); + } + + @Override + protected Expression build(Source source, List args) { + return new Values(source, args.get(0)); + } + + @SuppressWarnings("unchecked") + private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier fieldSupplier) { + return new TestCaseSupplier(fieldSupplier.name(), List.of(fieldSupplier.type()), () -> { + var fieldTypedData = fieldSupplier.get(); + + var expected = fieldTypedData.multiRowData() + .stream() + .map(v -> (Comparable>) v) + .collect(Collectors.toSet()); + + return new TestCaseSupplier.TestCase( + List.of(fieldTypedData), + "Values[field=Attribute[channel=0]]", + fieldSupplier.type(), + expected.isEmpty() ? nullValue() : valuesInAnyOrder(expected) + ); + }); + } + + private static Matcher valuesInAnyOrder(Collection data) { + if (data == null) { + return nullValue(); + } + if (data.size() == 1) { + return equalTo(data.iterator().next()); + } + var matcher = containsInAnyOrder(data.toArray()); + // New Matcher, as `containsInAnyOrder` returns Matcher> instead of Matcher + return new BaseMatcher<>() { + @Override + public void describeTo(Description description) { + matcher.describeTo(description); + } + + @Override + public boolean matches(Object item) { + if (item instanceof Iterable == false) { + return false; + } + + var castedItem = (Iterable) item; + + return matcher.matches(castedItem); + } + }; + } +} From a4e6cf9cd2f74322df87ee0e737ceed806542ce3 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Mon, 29 Jul 2024 11:34:59 -0700 Subject: [PATCH 05/22] Integrate data generator in LogsDB mode challenge test (#111303) --- .../logsdb/qa/AbstractChallengeRestTest.java | 2 - ...ardVersusLogsIndexModeChallengeRestIT.java | 140 +++++++++--------- .../logsdb/qa/matchers/ArrayEqualMatcher.java | 2 +- .../logsdb/qa/matchers/ListEqualMatcher.java | 2 +- .../logsdb/datageneration/DataGenerator.java | 47 +++++- .../DataGeneratorSpecification.java | 26 +++- .../GenericSubObjectFieldDataGenerator.java | 108 +++++++------- .../fields/NestedFieldDataGenerator.java | 28 +++- .../fields/ObjectFieldDataGenerator.java | 29 ++-- .../fields/PredefinedField.java | 13 ++ .../TopLevelObjectFieldDataGenerator.java | 61 ++++++++ .../{ => leaf}/KeywordFieldDataGenerator.java | 2 +- .../{ => leaf}/LongFieldDataGenerator.java | 2 +- .../DataGeneratorSnapshotTests.java | 2 +- 14 files changed, 312 insertions(+), 152 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java rename test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/{ => leaf}/KeywordFieldDataGenerator.java (96%) rename test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/{ => leaf}/LongFieldDataGenerator.java (95%) diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java index 8ee0e4d715c4c..6724a40fddd22 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java @@ -202,9 +202,7 @@ private Settings.Builder createContenderSettings() throws IOException { private XContentBuilder createMappings(final CheckedConsumer builderConsumer) throws IOException { final XContentBuilder builder = XContentFactory.jsonBuilder(); - builder.startObject(); builderConsumer.accept(builder); - builder.endObject(); return builder; } diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java index 63db21e45ae9f..5f08cb9ee3e13 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java @@ -20,6 +20,11 @@ import org.elasticsearch.datastreams.logsdb.qa.matchers.MatchResult; import org.elasticsearch.datastreams.logsdb.qa.matchers.Matcher; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.logsdb.datageneration.DataGenerator; +import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; +import org.elasticsearch.logsdb.datageneration.FieldType; +import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary; +import org.elasticsearch.logsdb.datageneration.fields.PredefinedField; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; @@ -39,85 +44,71 @@ import java.util.List; import java.util.Map; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + public class StandardVersusLogsIndexModeChallengeRestIT extends AbstractChallengeRestTest { + private final DataGenerator dataGenerator; public StandardVersusLogsIndexModeChallengeRestIT() { super("standard-apache-baseline", "logs-apache-contender", "baseline-template", "contender-template", 101, 101); + this.dataGenerator = new DataGenerator( + DataGeneratorSpecification.builder() + // Nested fields don't work with subobjects: false. + .withNestedFieldsLimit(0) + // TODO increase depth of objects + // Currently matching fails because in synthetic source all fields are flat (given that we have subobjects: false) + // but stored source is identical to original document which has nested structure. + .withMaxObjectDepth(0) + .withArbitrary(new RandomBasedArbitrary() { + // TODO enable null values + // Matcher does not handle nulls currently + @Override + public boolean generateNullValue() { + return false; + } + + // TODO enable arrays + // List matcher currently does not apply matching logic recursively + // and equality check fails because arrays are sorted in synthetic source. + @Override + public boolean generateArrayOfValues() { + return false; + } + }) + .withPredefinedFields(List.of(new PredefinedField("host.name", FieldType.KEYWORD))) + .build() + ); } @Override public void baselineMappings(XContentBuilder builder) throws IOException { if (randomBoolean()) { - builder.startObject("properties") - - .startObject("@timestamp") - .field("type", "date") - .endObject() - - .startObject("host.name") - .field("type", "keyword") - .field("ignore_above", randomIntBetween(1000, 1200)) - .endObject() - - .startObject("message") - .field("type", "keyword") - .field("ignore_above", randomIntBetween(1000, 1200)) - .endObject() - - .startObject("method") - .field("type", "keyword") - .field("ignore_above", randomIntBetween(1000, 1200)) - .endObject() - - .startObject("memory_usage_bytes") - .field("type", "long") - .field("ignore_malformed", randomBoolean()) - .endObject() - - .endObject(); + dataGenerator.writeMapping(builder); } else { - builder.startObject("properties") + // We want dynamic mapping, but we need host.name to be a keyword instead of text to support aggregations. + builder.startObject() + .startObject("properties") .startObject("host.name") .field("type", "keyword") .field("ignore_above", randomIntBetween(1000, 1200)) .endObject() + .endObject() .endObject(); } } @Override public void contenderMappings(XContentBuilder builder) throws IOException { - builder.field("subobjects", false); if (randomBoolean()) { - builder.startObject("properties") - - .startObject("@timestamp") - .field("type", "date") - .endObject() - - .startObject("host.name") - .field("type", "keyword") - .field("ignore_above", randomIntBetween(1000, 1200)) - .endObject() - - .startObject("message") - .field("type", "keyword") - .field("ignore_above", randomIntBetween(1000, 1200)) - .endObject() - - .startObject("method") - .field("type", "keyword") - .field("ignore_above", randomIntBetween(1000, 1200)) - .endObject() - - .startObject("memory_usage_bytes") - .field("type", "long") - .field("ignore_malformed", randomBoolean()) - .endObject() - - .endObject(); + dataGenerator.writeMapping(builder, b -> builder.field("subobjects", false)); + } else { + // Sometimes we go with full dynamic mapping. + builder.startObject(); + builder.field("subobjects", false); + builder.endObject(); } } @@ -133,11 +124,13 @@ private static void settings(final Settings.Builder settings) { @Override public void contenderSettings(Settings.Builder builder) { builder.put("index.mode", "logsdb"); + builder.put("index.mapping.total_fields.limit", 5000); settings(builder); } @Override public void baselineSettings(Settings.Builder builder) { + builder.put("index.mapping.total_fields.limit", 5000); settings(builder); } @@ -261,22 +254,27 @@ public void testDateHistogramAggregation() throws IOException { assertTrue(matchResult.getMessage(), matchResult.isMatch()); } - private static XContentBuilder generateDocument(final Instant timestamp) throws IOException { - return XContentFactory.jsonBuilder() - .startObject() - .field("@timestamp", DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(timestamp)) - .field("host.name", randomFrom("foo", "bar", "baz")) - .field("message", randomFrom("a message", "another message", "still another message", "one more message")) - .field("method", randomFrom("put", "post", "get")) - .field("memory_usage_bytes", randomLongBetween(1000, 2000)) - .endObject(); + private XContentBuilder generateDocument(final Instant timestamp) throws IOException { + var document = XContentFactory.jsonBuilder(); + dataGenerator.generateDocument(document, doc -> { + doc.field("@timestamp", DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(timestamp)); + // Needed for terms query + doc.field("method", randomFrom("put", "post", "get")); + // We can generate this but we would get "too many buckets" + doc.field("memory_usage_bytes", randomLongBetween(1000, 2000)); + }); + + return document; } @SuppressWarnings("unchecked") private static List> getQueryHits(final Response response) throws IOException { final Map map = XContentHelper.convertToMap(XContentType.JSON.xContent(), response.getEntity().getContent(), true); final Map hitsMap = (Map) map.get("hits"); + final List> hitsList = (List>) hitsMap.get("hits"); + assertThat(hitsList.size(), greaterThan(0)); + return hitsList.stream().map(hit -> (Map) hit.get("_source")).toList(); } @@ -285,13 +283,23 @@ private static List> getAggregationBuckets(final Response re final Map map = XContentHelper.convertToMap(XContentType.JSON.xContent(), response.getEntity().getContent(), true); final Map aggs = (Map) map.get("aggregations"); final Map agg = (Map) aggs.get(aggName); - return (List>) agg.get("buckets"); + + var buckets = (List>) agg.get("buckets"); + assertThat(buckets.size(), greaterThan(0)); + + return buckets; } private void assertDocumentIndexing(List documents) throws IOException { final Tuple tuple = indexDocuments(() -> documents, () -> documents); + assertThat(tuple.v1().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + var baselineResponseBody = entityAsMap(tuple.v1()); + assertThat("errors in baseline bulk response:\n " + baselineResponseBody, baselineResponseBody.get("errors"), equalTo(false)); + assertThat(tuple.v2().getStatusLine().getStatusCode(), Matchers.equalTo(RestStatus.OK.getStatus())); + var contenderResponseBody = entityAsMap(tuple.v2()); + assertThat("errors in contender bulk response:\n " + contenderResponseBody, contenderResponseBody.get("errors"), equalTo(false)); } } diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ArrayEqualMatcher.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ArrayEqualMatcher.java index 25e6dc8ef31c9..ecfe5840689fb 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ArrayEqualMatcher.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ArrayEqualMatcher.java @@ -47,7 +47,7 @@ private MatchResult matchArraysEqual(final Object[] actualArray, final Object[] actualSettings, expectedMappings, expectedSettings, - "Arrays do not match when ignoreing sort order" + "Arrays do not match when ignoring sort order" ) ); } else { diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java index 56c24712f635c..e5429ddad99ff 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java @@ -47,7 +47,7 @@ private MatchResult matchListEquals(final List actualList, final List {}).accept(mapping); mapping.endObject(); } + /** + * Writes a fully built mapping document (enclosed in a top-level object) to a provided builder. + * Allows customizing parameters of top level object mapper. + * @param mapping destination + * @param customMappingParameters writer of custom mapping parameters of top level object mapping + * @throws IOException + */ + public void writeMapping(XContentBuilder mapping, CheckedConsumer customMappingParameters) + throws IOException { + mapping.startObject().field("_doc"); + topLevelGenerator.mappingWriter(customMappingParameters).accept(mapping); + mapping.endObject(); + } + + /** + * Generates a document and writes it to a provided builder. New document is generated every time. + * @param document + * @throws IOException + */ public void generateDocument(XContentBuilder document) throws IOException { - topLevelGenerator.fieldValueGenerator().accept(document); + topLevelGenerator.fieldValueGenerator(b -> {}).accept(document); + } + + /** + * Generates a document and writes it to a provided builder. New document is generated every time. + * Supports appending custom content to generated document (e.g. a custom generated field). + * @param document + * @param customDocumentModifications + * @throws IOException + */ + public void generateDocument(XContentBuilder document, CheckedConsumer customDocumentModifications) + throws IOException { + topLevelGenerator.fieldValueGenerator(customDocumentModifications).accept(document); } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java index 4a0ed074b1411..ea47ad3be1fa6 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java @@ -10,6 +10,10 @@ import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary; import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary; +import org.elasticsearch.logsdb.datageneration.fields.PredefinedField; + +import java.util.ArrayList; +import java.util.List; /** * Allows configuring behavior of {@link DataGenerator}. @@ -18,8 +22,15 @@ * Applies to subobjects. * @param maxObjectDepth maximum depth of nested objects * @param nestedFieldsLimit how many total nested fields can be present in a produced mapping + * @param predefinedFields predefined fields that must be present in mapping and documents. Only top level fields are supported. */ -public record DataGeneratorSpecification(Arbitrary arbitrary, int maxFieldCountPerLevel, int maxObjectDepth, int nestedFieldsLimit) { +public record DataGeneratorSpecification( + Arbitrary arbitrary, + int maxFieldCountPerLevel, + int maxObjectDepth, + int nestedFieldsLimit, + List predefinedFields +) { public static Builder builder() { return new Builder(); @@ -34,14 +45,16 @@ public static class Builder { private int maxFieldCountPerLevel; private int maxObjectDepth; private int nestedFieldsLimit; + private List predefinedFields; public Builder() { + arbitrary = new RandomBasedArbitrary(); // Simply sufficiently big numbers to get some permutations maxFieldCountPerLevel = 50; - maxObjectDepth = 3; + maxObjectDepth = 2; // Default value of index.mapping.nested_fields.limit nestedFieldsLimit = 50; - arbitrary = new RandomBasedArbitrary(); + predefinedFields = new ArrayList<>(); } public Builder withArbitrary(Arbitrary arbitrary) { @@ -64,8 +77,13 @@ public Builder withNestedFieldsLimit(int nestedFieldsLimit) { return this; } + public Builder withPredefinedFields(List predefinedFields) { + this.predefinedFields = predefinedFields; + return this; + } + public DataGeneratorSpecification build() { - return new DataGeneratorSpecification(arbitrary, maxFieldCountPerLevel, maxObjectDepth, nestedFieldsLimit); + return new DataGeneratorSpecification(arbitrary, maxFieldCountPerLevel, maxObjectDepth, nestedFieldsLimit, predefinedFields); } } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java index 5d05fc1f35a77..24f59867f85b8 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java @@ -11,6 +11,8 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; import org.elasticsearch.logsdb.datageneration.FieldType; +import org.elasticsearch.logsdb.datageneration.fields.leaf.KeywordFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.LongFieldDataGenerator; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; @@ -25,84 +27,78 @@ public class GenericSubObjectFieldDataGenerator { private final Context context; - private final List childFields; - - public GenericSubObjectFieldDataGenerator(Context context) { + GenericSubObjectFieldDataGenerator(Context context) { this.context = context; - - childFields = new ArrayList<>(); - generateChildFields(); } - public CheckedConsumer mappingWriter( - CheckedConsumer customMappingParameters - ) { - return b -> { - b.startObject(); - customMappingParameters.accept(b); - - b.startObject("properties"); - for (var childField : childFields) { - b.field(childField.fieldName); - childField.generator.mappingWriter().accept(b); - } - b.endObject(); - - b.endObject(); - }; - } + List generateChildFields() { + var existingFieldNames = new HashSet(); + // no child fields is legal + var childFieldsCount = context.specification().arbitrary().childFieldCount(0, context.specification().maxFieldCountPerLevel()); + var result = new ArrayList(childFieldsCount); - public CheckedConsumer fieldValueGenerator() { - return b -> { - if (context.shouldGenerateObjectArray()) { - int size = context.specification().arbitrary().objectArraySize(); + for (int i = 0; i < childFieldsCount; i++) { + var fieldName = generateFieldName(existingFieldNames); - b.startArray(); - for (int i = 0; i < size; i++) { - writeObject(b, childFields); - } - b.endArray(); + if (context.shouldAddObjectField()) { + result.add(new ChildField(fieldName, new ObjectFieldDataGenerator(context.subObject()))); + } else if (context.shouldAddNestedField()) { + result.add(new ChildField(fieldName, new NestedFieldDataGenerator(context.nestedObject()))); } else { - writeObject(b, childFields); + var fieldType = context.specification().arbitrary().fieldType(); + result.add(leafField(fieldType, fieldName)); } - }; + } + + return result; } - private static void writeObject(XContentBuilder document, Iterable childFields) throws IOException { - document.startObject(); + List generateChildFields(List predefinedFields) { + return predefinedFields.stream().map(pf -> leafField(pf.fieldType(), pf.fieldName())).toList(); + } + + static void writeChildFieldsMapping(XContentBuilder mapping, List childFields) throws IOException { for (var childField : childFields) { - document.field(childField.fieldName); - childField.generator.fieldValueGenerator().accept(document); + mapping.field(childField.fieldName); + childField.generator.mappingWriter().accept(mapping); } - document.endObject(); } - private void generateChildFields() { - var existingFields = new HashSet(); - // no child fields is legal - var childFieldsCount = context.specification().arbitrary().childFieldCount(0, context.specification().maxFieldCountPerLevel()); - - for (int i = 0; i < childFieldsCount; i++) { - var fieldName = generateFieldName(existingFields); + static void writeObjectsData(XContentBuilder document, Context context, CheckedConsumer objectWriter) + throws IOException { + if (context.shouldGenerateObjectArray()) { + int size = context.specification().arbitrary().objectArraySize(); - if (context.shouldAddObjectField()) { - childFields.add(new ChildField(fieldName, new ObjectFieldDataGenerator(context.subObject()))); - } else if (context.shouldAddNestedField()) { - childFields.add(new ChildField(fieldName, new NestedFieldDataGenerator(context.nestedObject()))); - } else { - var fieldType = context.specification().arbitrary().fieldType(); - addLeafField(fieldType, fieldName); + document.startArray(); + for (int i = 0; i < size; i++) { + objectWriter.accept(document); } + document.endArray(); + } else { + objectWriter.accept(document); + } + } + + static void writeSingleObject(XContentBuilder document, Iterable childFields) throws IOException { + document.startObject(); + writeChildFieldsData(document, childFields); + document.endObject(); + } + + static void writeChildFieldsData(XContentBuilder document, Iterable childFields) throws IOException { + for (var childField : childFields) { + document.field(childField.fieldName); + childField.generator.fieldValueGenerator().accept(document); } } - private void addLeafField(FieldType type, String fieldName) { + private ChildField leafField(FieldType type, String fieldName) { var generator = switch (type) { case LONG -> new LongFieldDataGenerator(context.specification().arbitrary()); case KEYWORD -> new KeywordFieldDataGenerator(context.specification().arbitrary()); }; - childFields.add(new ChildField(fieldName, generator)); + return new ChildField(fieldName, generator); } private String generateFieldName(Set existingFields) { @@ -115,5 +111,5 @@ private String generateFieldName(Set existingFields) { return fieldName; } - private record ChildField(String fieldName, FieldDataGenerator generator) {} + record ChildField(String fieldName, FieldDataGenerator generator) {} } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java index acceb3aebe421..f52b739418034 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java @@ -13,21 +13,39 @@ import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.List; public class NestedFieldDataGenerator implements FieldDataGenerator { - private final GenericSubObjectFieldDataGenerator delegate; + private final Context context; + private final List childFields; - public NestedFieldDataGenerator(Context context) { - this.delegate = new GenericSubObjectFieldDataGenerator(context); + NestedFieldDataGenerator(Context context) { + this.context = context; + var genericGenerator = new GenericSubObjectFieldDataGenerator(context); + this.childFields = genericGenerator.generateChildFields(); } @Override public CheckedConsumer mappingWriter() { - return delegate.mappingWriter(b -> b.field("type", "nested")); + return b -> { + b.startObject(); + + b.field("type", "nested"); + + b.startObject("properties"); + GenericSubObjectFieldDataGenerator.writeChildFieldsMapping(b, childFields); + b.endObject(); + + b.endObject(); + }; } @Override public CheckedConsumer fieldValueGenerator() { - return delegate.fieldValueGenerator(); + CheckedConsumer objectWriter = object -> GenericSubObjectFieldDataGenerator.writeSingleObject( + object, + childFields + ); + return b -> GenericSubObjectFieldDataGenerator.writeObjectsData(b, context, objectWriter); } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java index 8cbedefe14ae5..522bb2b1772b0 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java @@ -9,30 +9,41 @@ package org.elasticsearch.logsdb.datageneration.fields; import org.elasticsearch.core.CheckedConsumer; -import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.List; public class ObjectFieldDataGenerator implements FieldDataGenerator { - private final GenericSubObjectFieldDataGenerator delegate; - - public ObjectFieldDataGenerator(DataGeneratorSpecification specification) { - this(new Context(specification)); - } + private final Context context; + private final List childFields; ObjectFieldDataGenerator(Context context) { - this.delegate = new GenericSubObjectFieldDataGenerator(context); + this.context = context; + var genericGenerator = new GenericSubObjectFieldDataGenerator(context); + this.childFields = genericGenerator.generateChildFields(); } @Override public CheckedConsumer mappingWriter() { - return delegate.mappingWriter(b -> {}); + return b -> { + b.startObject(); + + b.startObject("properties"); + GenericSubObjectFieldDataGenerator.writeChildFieldsMapping(b, childFields); + b.endObject(); + + b.endObject(); + }; } @Override public CheckedConsumer fieldValueGenerator() { - return delegate.fieldValueGenerator(); + CheckedConsumer objectWriter = object -> GenericSubObjectFieldDataGenerator.writeSingleObject( + object, + childFields + ); + return b -> GenericSubObjectFieldDataGenerator.writeObjectsData(b, context, objectWriter); } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java new file mode 100644 index 0000000000000..6adae35dc909c --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java @@ -0,0 +1,13 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.logsdb.datageneration.fields; + +import org.elasticsearch.logsdb.datageneration.FieldType; + +public record PredefinedField(String fieldName, FieldType fieldType) {} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java new file mode 100644 index 0000000000000..1debc6b1fa7a1 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.logsdb.datageneration.fields; + +import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; + +public class TopLevelObjectFieldDataGenerator { + private final Context context; + private final List predefinedFields; + private final List generatedChildFields; + + public TopLevelObjectFieldDataGenerator(DataGeneratorSpecification specification) { + this.context = new Context(specification); + var genericGenerator = new GenericSubObjectFieldDataGenerator(context); + this.predefinedFields = genericGenerator.generateChildFields(specification.predefinedFields()); + this.generatedChildFields = genericGenerator.generateChildFields(); + } + + public CheckedConsumer mappingWriter( + CheckedConsumer customMappingParameters + ) { + return b -> { + b.startObject(); + + customMappingParameters.accept(b); + + b.startObject("properties"); + GenericSubObjectFieldDataGenerator.writeChildFieldsMapping(b, predefinedFields); + GenericSubObjectFieldDataGenerator.writeChildFieldsMapping(b, generatedChildFields); + b.endObject(); + + b.endObject(); + }; + } + + public CheckedConsumer fieldValueGenerator( + CheckedConsumer customDocumentModification + ) { + CheckedConsumer objectWriter = b -> { + b.startObject(); + + customDocumentModification.accept(b); + GenericSubObjectFieldDataGenerator.writeChildFieldsData(b, predefinedFields); + GenericSubObjectFieldDataGenerator.writeChildFieldsData(b, generatedChildFields); + + b.endObject(); + }; + return b -> GenericSubObjectFieldDataGenerator.writeObjectsData(b, context, objectWriter); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/KeywordFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/KeywordFieldDataGenerator.java similarity index 96% rename from test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/KeywordFieldDataGenerator.java rename to test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/KeywordFieldDataGenerator.java index 11413d33a97c7..89ae1d6034c15 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/KeywordFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/KeywordFieldDataGenerator.java @@ -6,7 +6,7 @@ * Side Public License, v 1. */ -package org.elasticsearch.logsdb.datageneration.fields; +package org.elasticsearch.logsdb.datageneration.fields.leaf; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/LongFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/LongFieldDataGenerator.java similarity index 95% rename from test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/LongFieldDataGenerator.java rename to test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/LongFieldDataGenerator.java index f1bb35f1f0401..097c5fe024d2b 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/LongFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/LongFieldDataGenerator.java @@ -6,7 +6,7 @@ * Side Public License, v 1. */ -package org.elasticsearch.logsdb.datageneration.fields; +package org.elasticsearch.logsdb.datageneration.fields.leaf; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java index 41066e9ba3cac..e476e02d03778 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java @@ -207,5 +207,5 @@ public boolean generateArrayOfObjects() { public int objectArraySize() { return 2; } - }; + } } From c722cebb13c45c9400457767fdf5a2ba16261d1a Mon Sep 17 00:00:00 2001 From: Adam Demjen Date: Mon, 29 Jul 2024 15:03:39 -0400 Subject: [PATCH 06/22] Fix score count validation in reranker response (#111212) * Fix rerank score validation * Update docs/changelog/111212.yaml * Add test case for invalid document indices in reranker result * Preemptive top_n config check * Reorg code + refine tests * Add support for Google Vertex AI task settings * Spotless * Make top N eval async * Update test * Fix broken unit test * Clean up tests * Spotless * Add size check + compare against rankWindowSize * Fix import --- docs/changelog/111212.yaml | 6 ++ ...ankFeaturePhaseRankCoordinatorContext.java | 96 +++++++++++++------ ...aturePhaseRankCoordinatorContextTests.java | 7 +- .../TextSimilarityRankTests.java | 70 +++++++++++--- .../TextSimilarityTestPlugin.java | 63 ++++++++++-- 5 files changed, 188 insertions(+), 54 deletions(-) create mode 100644 docs/changelog/111212.yaml diff --git a/docs/changelog/111212.yaml b/docs/changelog/111212.yaml new file mode 100644 index 0000000000000..67d1513b3ff6f --- /dev/null +++ b/docs/changelog/111212.yaml @@ -0,0 +1,6 @@ +pr: 111212 +summary: Fix score count validation in reranker response +area: Ranking +type: bug +issues: + - 111202 diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java index a22126439e9e2..42413c35fcbff 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java @@ -14,8 +14,11 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.search.rank.context.RankFeaturePhaseRankCoordinatorContext; import org.elasticsearch.search.rank.feature.RankFeatureDoc; +import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankTaskSettings; +import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankTaskSettings; import java.util.Arrays; import java.util.Comparator; @@ -53,24 +56,77 @@ public TextSimilarityRankFeaturePhaseRankCoordinatorContext( protected void computeScores(RankFeatureDoc[] featureDocs, ActionListener scoreListener) { // Wrap the provided rankListener to an ActionListener that would handle the response from the inference service // and then pass the results - final ActionListener actionListener = scoreListener.delegateFailureAndWrap((l, r) -> { - float[] scores = extractScoresFromResponse(r); - if (scores.length != featureDocs.length) { + final ActionListener inferenceListener = scoreListener.delegateFailureAndWrap((l, r) -> { + InferenceServiceResults results = r.getResults(); + assert results instanceof RankedDocsResults; + + // Ensure we get exactly as many scores as the number of docs we passed, otherwise we may return incorrect results + List rankedDocs = ((RankedDocsResults) results).getRankedDocs(); + if (rankedDocs.size() != featureDocs.length) { l.onFailure( - new IllegalStateException("Document and score count mismatch: [" + featureDocs.length + "] vs [" + scores.length + "]") + new IllegalStateException( + "Reranker input document count and returned score count mismatch: [" + + featureDocs.length + + "] vs [" + + rankedDocs.size() + + "]" + ) ); } else { + float[] scores = extractScoresFromRankedDocs(rankedDocs); l.onResponse(scores); } }); - List featureData = Arrays.stream(featureDocs).map(x -> x.featureData).toList(); - InferenceAction.Request request = generateRequest(featureData); - try { - client.execute(InferenceAction.INSTANCE, request, actionListener); - } finally { - request.decRef(); - } + // top N listener + ActionListener topNListener = scoreListener.delegateFailureAndWrap((l, r) -> { + // The rerank inference endpoint may have an override to return top N documents only, in that case let's fail fast to avoid + // assigning scores to the wrong input + Integer configuredTopN = null; + if (r.getEndpoints().isEmpty() == false + && r.getEndpoints().get(0).getTaskSettings() instanceof CohereRerankTaskSettings cohereTaskSettings) { + configuredTopN = cohereTaskSettings.getTopNDocumentsOnly(); + } else if (r.getEndpoints().isEmpty() == false + && r.getEndpoints().get(0).getTaskSettings() instanceof GoogleVertexAiRerankTaskSettings googleVertexAiTaskSettings) { + configuredTopN = googleVertexAiTaskSettings.topN(); + } + if (configuredTopN != null && configuredTopN < rankWindowSize) { + l.onFailure( + new IllegalArgumentException( + "Inference endpoint [" + + inferenceId + + "] is configured to return the top [" + + configuredTopN + + "] results, but rank_window_size is [" + + rankWindowSize + + "]. Reduce rank_window_size to be less than or equal to the configured top N value." + ) + ); + return; + } + List featureData = Arrays.stream(featureDocs).map(x -> x.featureData).toList(); + InferenceAction.Request inferenceRequest = generateRequest(featureData); + try { + client.execute(InferenceAction.INSTANCE, inferenceRequest, inferenceListener); + } finally { + inferenceRequest.decRef(); + } + }); + + GetInferenceModelAction.Request getModelRequest = new GetInferenceModelAction.Request(inferenceId, TaskType.RERANK); + client.execute(GetInferenceModelAction.INSTANCE, getModelRequest, topNListener); + } + + /** + * Sorts documents by score descending and discards those with a score less than minScore. + * @param originalDocs documents to process + */ + @Override + protected RankFeatureDoc[] preprocess(RankFeatureDoc[] originalDocs) { + return Arrays.stream(originalDocs) + .filter(doc -> minScore == null || doc.score >= minScore) + .sorted(Comparator.comparing((RankFeatureDoc doc) -> doc.score).reversed()) + .toArray(RankFeatureDoc[]::new); } protected InferenceAction.Request generateRequest(List docFeatures) { @@ -85,11 +141,7 @@ protected InferenceAction.Request generateRequest(List docFeatures) { ); } - private float[] extractScoresFromResponse(InferenceAction.Response response) { - InferenceServiceResults results = response.getResults(); - assert results instanceof RankedDocsResults; - - List rankedDocs = ((RankedDocsResults) results).getRankedDocs(); + private float[] extractScoresFromRankedDocs(List rankedDocs) { float[] scores = new float[rankedDocs.size()]; for (RankedDocsResults.RankedDoc rankedDoc : rankedDocs) { scores[rankedDoc.index()] = rankedDoc.relevanceScore(); @@ -97,16 +149,4 @@ private float[] extractScoresFromResponse(InferenceAction.Response response) { return scores; } - - /** - * Sorts documents by score descending and discards those with a score less than minScore. - * @param originalDocs documents to process - */ - @Override - protected RankFeatureDoc[] preprocess(RankFeatureDoc[] originalDocs) { - return Arrays.stream(originalDocs) - .filter(doc -> minScore == null || doc.score >= minScore) - .sorted(Comparator.comparing((RankFeatureDoc doc) -> doc.score).reversed()) - .toArray(RankFeatureDoc[]::new); - } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java index 50d91a2271de6..2e9be42b5c5d4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java @@ -12,7 +12,7 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.search.rank.feature.RankFeatureDoc; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; @@ -54,10 +54,9 @@ public void onFailure(Exception e) { fail(); } }); - verify(mockClient).execute( - eq(InferenceAction.INSTANCE), - argThat(actionRequest -> ((InferenceAction.Request) actionRequest).getTaskType().equals(TaskType.RERANK)), + eq(GetInferenceModelAction.INSTANCE), + argThat(actionRequest -> ((GetInferenceModelAction.Request) actionRequest).getTaskType().equals(TaskType.RERANK)), any() ); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java index 7fbfe70dbcfe7..a26dc50097cf5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.inference.rank.textsimilarity; +import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.client.internal.Client; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.inference.InputType; @@ -29,22 +30,46 @@ import java.util.Objects; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; public class TextSimilarityRankTests extends ESSingleNodeTestCase { /** - * {@code TextSimilarityRankBuilder} that simulates an inference call that returns a different number of results as the input. + * {@code TextSimilarityRankBuilder} that sets top_n in the inference endpoint's task settings. + * See {@code TextSimilarityTestPlugin -> TestFilter -> handleGetInferenceModelActionRequest} for the logic that extracts the top_n + * value. */ - public static class InvalidInferenceResultCountProvidingTextSimilarityRankBuilder extends TextSimilarityRankBuilder { + public static class TopNConfigurationAcceptingTextSimilarityRankBuilder extends TextSimilarityRankBuilder { - public InvalidInferenceResultCountProvidingTextSimilarityRankBuilder( + public TopNConfigurationAcceptingTextSimilarityRankBuilder( String field, String inferenceId, String inferenceText, int rankWindowSize, - Float minScore + Float minScore, + int topN + ) { + super(field, inferenceId + "-task-settings-top-" + topN, inferenceText, rankWindowSize, minScore); + } + } + + /** + * {@code TextSimilarityRankBuilder} that simulates an inference call returning N results. + */ + public static class InferenceResultCountAcceptingTextSimilarityRankBuilder extends TextSimilarityRankBuilder { + + private final int inferenceResultCount; + + public InferenceResultCountAcceptingTextSimilarityRankBuilder( + String field, + String inferenceId, + String inferenceText, + int rankWindowSize, + Float minScore, + int inferenceResultCount ) { super(field, inferenceId, inferenceText, rankWindowSize, minScore); + this.inferenceResultCount = inferenceResultCount; } @Override @@ -62,10 +87,10 @@ public RankFeaturePhaseRankCoordinatorContext buildRankFeaturePhaseCoordinatorCo protected InferenceAction.Request generateRequest(List docFeatures) { return new InferenceAction.Request( TaskType.RERANK, - inferenceId, + this.inferenceId, inferenceText, docFeatures, - Map.of("invalidInferenceResultCount", true), + Map.of("inferenceResultCount", inferenceResultCount), InputType.SEARCH, InferenceAction.Request.DEFAULT_TIMEOUT ); @@ -151,17 +176,38 @@ public void testRerankInferenceFailure() { ); } - public void testRerankInferenceResultMismatch() { - ElasticsearchAssertions.assertFailures( + public void testRerankTopNConfigurationAndRankWindowSizeMismatch() { + SearchPhaseExecutionException ex = expectThrows( + SearchPhaseExecutionException.class, // Execute search with text similarity reranking client.prepareSearch() .setRankBuilder( - new InvalidInferenceResultCountProvidingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f) + // Simulate reranker configuration with top_n=3 in task_settings, which is different from rank_window_size=10 + // (Note: top_n comes from inferenceId, there's no other easy way of passing this to the mocked get model request) + new TopNConfigurationAcceptingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f, 3) ) - .setQuery(QueryBuilders.matchAllQuery()), - RestStatus.INTERNAL_SERVER_ERROR, - containsString("Failed to execute phase [rank-feature], Computing updated ranks for results failed") + .setQuery(QueryBuilders.matchAllQuery()) + ); + assertThat(ex.status(), equalTo(RestStatus.BAD_REQUEST)); + assertThat( + ex.getDetailedMessage(), + containsString("Reduce rank_window_size to be less than or equal to the configured top N value") + ); + } + + public void testRerankInputSizeAndInferenceResultsMismatch() { + SearchPhaseExecutionException ex = expectThrows( + SearchPhaseExecutionException.class, + // Execute search with text similarity reranking + client.prepareSearch() + .setRankBuilder( + // Simulate reranker returning different number of results from input + new InferenceResultCountAcceptingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f, 4) + ) + .setQuery(QueryBuilders.matchAllQuery()) ); + assertThat(ex.status(), equalTo(RestStatus.INTERNAL_SERVER_ERROR)); + assertThat(ex.getDetailedMessage(), containsString("Reranker input document count and returned score count mismatch")); } private static void assertHitHasRankScoreAndText(SearchHit hit, int expectedRank, float expectedScore, String expectedText) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java index 1e457a1a27c92..6d0c15d5c0bfe 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java @@ -21,7 +21,9 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; @@ -39,8 +41,12 @@ import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.services.cohere.CohereService; +import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankServiceSettings; +import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankTaskSettings; import java.io.IOException; import java.util.ArrayList; @@ -48,6 +54,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static java.util.Collections.singletonList; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; @@ -100,7 +108,6 @@ public int order() { } @Override - @SuppressWarnings("unchecked") public void apply( Task task, String action, @@ -108,23 +115,59 @@ public void app ActionListener listener, ActionFilterChain chain ) { - // For any other action than inference, execute normally - if (action.equals(InferenceAction.INSTANCE.name()) == false) { + if (action.equals(GetInferenceModelAction.INSTANCE.name())) { + assert request instanceof GetInferenceModelAction.Request; + handleGetInferenceModelActionRequest((GetInferenceModelAction.Request) request, listener); + } else if (action.equals(InferenceAction.INSTANCE.name())) { + assert request instanceof InferenceAction.Request; + handleInferenceActionRequest((InferenceAction.Request) request, listener); + } else { + // For any other action than get model and inference, execute normally chain.proceed(task, action, request, listener); - return; } + } - assert request instanceof InferenceAction.Request; - boolean shouldThrow = (boolean) ((InferenceAction.Request) request).getTaskSettings().getOrDefault("throwing", false); - boolean hasInvalidInferenceResultCount = (boolean) ((InferenceAction.Request) request).getTaskSettings() - .getOrDefault("invalidInferenceResultCount", false); + @SuppressWarnings("unchecked") + private void handleGetInferenceModelActionRequest( + GetInferenceModelAction.Request request, + ActionListener listener + ) { + String inferenceEntityId = request.getInferenceEntityId(); + Integer topN = null; + Matcher extractTopN = Pattern.compile(".*(task-settings-top-\\d+).*").matcher(inferenceEntityId); + if (extractTopN.find()) { + topN = Integer.parseInt(extractTopN.group(1).replaceAll("\\D", "")); + } + + ActionResponse response = new GetInferenceModelAction.Response( + List.of( + new ModelConfigurations( + request.getInferenceEntityId(), + request.getTaskType(), + CohereService.NAME, + new CohereRerankServiceSettings("uri", "model", null), + topN == null ? new EmptyTaskSettings() : new CohereRerankTaskSettings(topN, null, null) + ) + ) + ); + listener.onResponse((Response) response); + } + + @SuppressWarnings("unchecked") + private void handleInferenceActionRequest( + InferenceAction.Request request, + ActionListener listener + ) { + Map taskSettings = request.getTaskSettings(); + boolean shouldThrow = (boolean) taskSettings.getOrDefault("throwing", false); + Integer inferenceResultCount = (Integer) taskSettings.get("inferenceResultCount"); if (shouldThrow) { listener.onFailure(new UnsupportedOperationException("simulated failure")); } else { List rankedDocsResults = new ArrayList<>(); - List inputs = ((InferenceAction.Request) request).getInput(); - int resultCount = hasInvalidInferenceResultCount ? inputs.size() - 1 : inputs.size(); + List inputs = request.getInput(); + int resultCount = inferenceResultCount == null ? inputs.size() : inferenceResultCount; for (int i = 0; i < resultCount; i++) { rankedDocsResults.add(new RankedDocsResults.RankedDoc(i, Float.parseFloat(inputs.get(i)), inputs.get(i))); } From 7de305c4ec1df3b5fef5704930b1b1d7711af6da Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 29 Jul 2024 15:20:39 -0400 Subject: [PATCH 07/22] Remove 4096 bool query max limit from docs (#111421) indices.query.bool.max_clause_count is set automatically and does not default to 4096 as before. This remove mentions of 4096 from query documentations. Relates to PR#91811 --- docs/reference/query-dsl/query-string-query.asciidoc | 4 ++-- docs/reference/query-dsl/span-multi-term-query.asciidoc | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index 319ede7c4ac05..b45247ace3735 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -30,7 +30,7 @@ If you don't need to support a query syntax, consider using the syntax, use the <> query, which is less strict. ==== - + [[query-string-query-ex-request]] ==== Example request @@ -83,7 +83,7 @@ could be expensive. There is a limit on the number of fields times terms that can be queried at once. It is defined by the `indices.query.bool.max_clause_count` -<>, which defaults to 4096. +<>. ==== -- diff --git a/docs/reference/query-dsl/span-multi-term-query.asciidoc b/docs/reference/query-dsl/span-multi-term-query.asciidoc index aefb3e4b75eb5..5a5f0e1f5ff99 100644 --- a/docs/reference/query-dsl/span-multi-term-query.asciidoc +++ b/docs/reference/query-dsl/span-multi-term-query.asciidoc @@ -39,7 +39,8 @@ GET /_search -------------------------------------------------- WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the -boolean query limit (defaults to 4096).To avoid an unbounded expansion you can set the <>. +To avoid an unbounded expansion you can set the <> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only, you can activate the <> field option of the `text` field instead. This will rewrite any prefix query on the field to a single term query that matches the indexed prefix. From 5e6c2e533c09a923099a8920e98ac28d13a8d42b Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Mon, 29 Jul 2024 12:34:32 -0700 Subject: [PATCH 08/22] Fix LogsIndexModeFullClusterRestartIT (#111362) --- muted-tests.yml | 3 --- .../upgrades/LogsIndexModeFullClusterRestartIT.java | 5 ++++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index d106ca3c9d701..51f1b56786e86 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -105,9 +105,6 @@ tests: - class: org.elasticsearch.action.admin.indices.create.SplitIndexIT method: testSplitIndexPrimaryTerm issue: https://github.com/elastic/elasticsearch/issues/111282 -- class: org.elasticsearch.upgrades.LogsIndexModeFullClusterRestartIT - method: testLogsIndexing {cluster=UPGRADED} - issue: https://github.com/elastic/elasticsearch/issues/111306 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT issue: https://github.com/elastic/elasticsearch/issues/111319 - class: org.elasticsearch.xpack.esql.analysis.VerifierTests diff --git a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/LogsIndexModeFullClusterRestartIT.java b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/LogsIndexModeFullClusterRestartIT.java index da168f2999086..739b4e302bb54 100644 --- a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/LogsIndexModeFullClusterRestartIT.java +++ b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/LogsIndexModeFullClusterRestartIT.java @@ -19,6 +19,7 @@ import org.elasticsearch.test.MapMatcher; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.RestTestLegacyFeatures; import org.hamcrest.Matcher; import org.hamcrest.Matchers; import org.junit.ClassRule; @@ -37,6 +38,7 @@ public class LogsIndexModeFullClusterRestartIT extends ParameterizedFullClusterR @ClassRule public static final ElasticsearchCluster cluster = ElasticsearchCluster.local() .distribution(DistributionType.DEFAULT) + .version(getOldClusterTestVersion()) .module("constant-keyword") .module("data-streams") .module("mapper-extras") @@ -44,7 +46,6 @@ public class LogsIndexModeFullClusterRestartIT extends ParameterizedFullClusterR .module("x-pack-stack") .setting("xpack.security.enabled", "false") .setting("xpack.license.self_generated.type", "trial") - .setting("cluster.logsdb.enabled", "true") .build(); public LogsIndexModeFullClusterRestartIT(@Name("cluster") FullClusterRestartUpgradeStatus upgradeStatus) { @@ -123,6 +124,8 @@ protected ElasticsearchCluster getUpgradeCluster() { }"""; public void testLogsIndexing() throws IOException { + assumeTrue("Test uses data streams", oldClusterHasFeature(RestTestLegacyFeatures.DATA_STREAMS_SUPPORTED)); + if (isRunningAgainstOldCluster()) { assertOK(client().performRequest(putTemplate(client(), "logs-template", STANDARD_TEMPLATE))); assertOK(client().performRequest(createDataStream("logs-apache-production"))); From 69c96974de548ee3bfbfed482f0c205e18d42c8d Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 29 Jul 2024 16:01:56 -0400 Subject: [PATCH 09/22] Ensure vector similarity correctly limits inner_hits returned for nested kNN (#111363) For nested kNN we support not only similarity thresholds, but also multi-passage search while retrieving more than one nearest passage. However, the inner_hits retrieved for the kNN search would ignore the restricted similarity. Meaning, the inner hits would return all passages, not just the ones within the limited similarity and this is confusing. closes: https://github.com/elastic/elasticsearch/issues/111093 --- docs/changelog/111363.yaml | 6 +++ .../search.vectors/100_knn_nested_search.yml | 50 +++++++++++++++++++ .../org/elasticsearch/TransportVersions.java | 2 + .../action/search/DfsQueryPhase.java | 3 +- .../vectors/DenseVectorFieldMapper.java | 8 ++- .../search/vectors/ExactKnnQueryBuilder.java | 37 ++++++++------ .../vectors/KnnScoreDocQueryBuilder.java | 29 +++++++++-- .../search/vectors/KnnSearchBuilder.java | 4 ++ .../search/vectors/KnnVectorQueryBuilder.java | 2 +- .../action/search/DfsQueryPhaseTests.java | 6 ++- .../vectors/DenseVectorFieldTypeTests.java | 4 +- ...AbstractKnnVectorQueryBuilderTestCase.java | 17 +++++++ .../vectors/ExactKnnQueryBuilderTests.java | 23 ++++++++- .../vectors/KnnScoreDocQueryBuilderTests.java | 19 ++++--- 14 files changed, 176 insertions(+), 34 deletions(-) create mode 100644 docs/changelog/111363.yaml diff --git a/docs/changelog/111363.yaml b/docs/changelog/111363.yaml new file mode 100644 index 0000000000000..2cb3c5342ea5c --- /dev/null +++ b/docs/changelog/111363.yaml @@ -0,0 +1,6 @@ +pr: 111363 +summary: Ensure vector similarity correctly limits `inner_hits` returned for nested + kNN +area: Vector Search +type: bug +issues: [111093] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml index 72c6abab22600..d627be2fb15c3 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml @@ -411,3 +411,53 @@ setup: - match: {hits.total.value: 1} - match: {hits.hits.0._id: "2"} +--- +"nested Knn search with required similarity appropriately filters inner_hits": + - requires: + cluster_features: "gte_v8.16.0" + reason: 'bugfix for 8.16' + + - do: + search: + index: test + body: + query: + nested: + path: nested + inner_hits: + size: 3 + _source: false + fields: + - nested.paragraph_id + query: + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + num_candidates: 3 + similarity: 10.5 + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "2"} + - length: {hits.hits.0.inner_hits.nested.hits.hits: 1} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} + + - do: + search: + index: test + body: + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + num_candidates: 3 + k: 3 + similarity: 10.5 + inner_hits: + size: 3 + _source: false + fields: + - nested.paragraph_id + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "2"} + - length: {hits.hits.0.inner_hits.nested.hits.hits: 1} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 34324ec2a1c16..7d1204d1a51c0 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -169,6 +169,7 @@ static TransportVersion def(int id) { public static final TransportVersion VERSIONED_MASTER_NODE_REQUESTS = def(8_701_00_0); public static final TransportVersion ML_INFERENCE_AMAZON_BEDROCK_ADDED = def(8_702_00_0); public static final TransportVersion ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15 = def(8_702_00_1); + public static final TransportVersion FIX_VECTOR_SIMILARITY_INNER_HITS_BACKPORT_8_15 = def(8_702_00_2); public static final TransportVersion ML_INFERENCE_DONT_DELETE_WHEN_SEMANTIC_TEXT_EXISTS = def(8_703_00_0); public static final TransportVersion INFERENCE_ADAPTIVE_ALLOCATIONS = def(8_704_00_0); public static final TransportVersion INDEX_REQUEST_UPDATE_BY_SCRIPT_ORIGIN = def(8_705_00_0); @@ -179,6 +180,7 @@ static TransportVersion def(int id) { public static final TransportVersion MASTER_NODE_METRICS = def(8_710_00_0); public static final TransportVersion SEGMENT_LEVEL_FIELDS_STATS = def(8_711_00_0); public static final TransportVersion ML_ADD_DETECTION_RULE_PARAMS = def(8_712_00_0); + public static final TransportVersion FIX_VECTOR_SIMILARITY_INNER_HITS = def(8_713_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 9ddac7f13eb51..7a33eaa59eb03 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -155,7 +155,8 @@ ShardSearchRequest rewriteShardSearchRequest(ShardSearchRequest request) { QueryBuilder query = new KnnScoreDocQueryBuilder( scoreDocs.toArray(Lucene.EMPTY_SCORE_DOCS), source.knnSearch().get(i).getField(), - source.knnSearch().get(i).getQueryVector() + source.knnSearch().get(i).getQueryVector(), + source.knnSearch().get(i).getSimilarity() ).boost(source.knnSearch().get(i).boost()).queryName(source.knnSearch().get(i).queryName()); if (nestedPath != null) { query = new NestedQueryBuilder(nestedPath, query, ScoreMode.Max).innerHit(source.knnSearch().get(i).innerHit()); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 8ffe4b4cc4a66..81fb7990f09eb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -1789,17 +1789,21 @@ public Query termQuery(Object value, SearchExecutionContext context) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support term queries"); } - public Query createExactKnnQuery(VectorData queryVector) { + public Query createExactKnnQuery(VectorData queryVector, Float vectorSimilarity) { if (isIndexed() == false) { throw new IllegalArgumentException( "to perform knn search on field [" + name() + "], its mapping must have [index] set to [true]" ); } - return switch (elementType) { + Query knnQuery = switch (elementType) { case BYTE -> createExactKnnByteQuery(queryVector.asByteVector()); case FLOAT -> createExactKnnFloatQuery(queryVector.asFloatVector()); case BIT -> createExactKnnBitQuery(queryVector.asByteVector()); }; + if (vectorSimilarity != null) { + knnQuery = new VectorSimilarityQuery(knnQuery, vectorSimilarity, similarity.score(vectorSimilarity, elementType, dims)); + } + return knnQuery; } private Query createExactKnnBitQuery(byte[] queryVector) { diff --git a/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java index 4ac8d14c0b79d..4f36ddbedf23b 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java @@ -32,6 +32,7 @@ public class ExactKnnQueryBuilder extends AbstractQueryBuilder rewrittenQueries = new ArrayList<>(filterQueries.size()); diff --git a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java index 47dbe8f126556..3c698f1b790e5 100644 --- a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java @@ -353,12 +353,14 @@ public void testRewriteShardSearchRequestWithRank() { KnnScoreDocQueryBuilder ksdqb0 = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(1, 3.0f, 1), new ScoreDoc(4, 1.5f, 1) }, "vector", - VectorData.fromFloats(new float[] { 0.0f }) + VectorData.fromFloats(new float[] { 0.0f }), + null ); KnnScoreDocQueryBuilder ksdqb1 = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(1, 2.0f, 1) }, "vector2", - VectorData.fromFloats(new float[] { 0.0f }) + VectorData.fromFloats(new float[] { 0.0f }), + null ); assertEquals( List.of(bm25, ksdqb0, ksdqb1), diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index 9ee895f6de003..9ef2d0df90cce 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -215,7 +215,7 @@ public void testExactKnnQuery() { for (int i = 0; i < dims; i++) { queryVector[i] = randomFloat(); } - Query query = field.createExactKnnQuery(VectorData.fromFloats(queryVector)); + Query query = field.createExactKnnQuery(VectorData.fromFloats(queryVector), null); assertTrue(query instanceof DenseVectorQuery.Floats); } { @@ -233,7 +233,7 @@ public void testExactKnnQuery() { for (int i = 0; i < dims; i++) { queryVector[i] = randomByte(); } - Query query = field.createExactKnnQuery(VectorData.fromBytes(queryVector)); + Query query = field.createExactKnnQuery(VectorData.fromBytes(queryVector), null); assertTrue(query instanceof DenseVectorQuery.Bytes); } } diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java index f0899384dbc5e..565a09dbff0d3 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.query.InnerHitsRewriteContext; import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -306,6 +307,22 @@ private void assertBWCSerialization(QueryBuilder newQuery, QueryBuilder bwcQuery } } + public void testRewriteForInnerHits() throws IOException { + SearchExecutionContext context = createSearchExecutionContext(); + InnerHitsRewriteContext innerHitsRewriteContext = new InnerHitsRewriteContext(context.getParserConfig(), System::currentTimeMillis); + KnnVectorQueryBuilder queryBuilder = createTestQueryBuilder(); + queryBuilder.boost(randomFloat()); + queryBuilder.queryName(randomAlphaOfLength(10)); + QueryBuilder rewritten = queryBuilder.rewrite(innerHitsRewriteContext); + assertTrue(rewritten instanceof ExactKnnQueryBuilder); + ExactKnnQueryBuilder exactKnnQueryBuilder = (ExactKnnQueryBuilder) rewritten; + assertEquals(queryBuilder.queryVector(), exactKnnQueryBuilder.getQuery()); + assertEquals(queryBuilder.getFieldName(), exactKnnQueryBuilder.getField()); + assertEquals(queryBuilder.boost(), exactKnnQueryBuilder.boost(), 0.0001f); + assertEquals(queryBuilder.queryName(), exactKnnQueryBuilder.queryName()); + assertEquals(queryBuilder.getVectorSimilarity(), exactKnnQueryBuilder.vectorSimilarity()); + } + public void testRewriteWithQueryVectorBuilder() throws Exception { int dims = randomInt(1024); float[] expectedArray = new float[dims]; diff --git a/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java index 5f4fb61718a7e..c302dc7bd63c9 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java @@ -53,12 +53,12 @@ protected ExactKnnQueryBuilder doCreateTestQueryBuilder() { for (int i = 0; i < VECTOR_DIMENSION; i++) { query[i] = randomFloat(); } - return new ExactKnnQueryBuilder(query, VECTOR_FIELD); + return new ExactKnnQueryBuilder(VectorData.fromFloats(query), VECTOR_FIELD, randomBoolean() ? randomFloat() : null); } @Override public void testValidOutput() { - ExactKnnQueryBuilder query = new ExactKnnQueryBuilder(new float[] { 1.0f, 2.0f, 3.0f }, "field"); + ExactKnnQueryBuilder query = new ExactKnnQueryBuilder(VectorData.fromFloats(new float[] { 1.0f, 2.0f, 3.0f }), "field", null); String expected = """ { "exact_knn" : { @@ -71,10 +71,29 @@ public void testValidOutput() { } }"""; assertEquals(expected, query.toString()); + query = new ExactKnnQueryBuilder(VectorData.fromFloats(new float[] { 1.0f, 2.0f, 3.0f }), "field", 1f); + expected = """ + { + "exact_knn" : { + "query" : [ + 1.0, + 2.0, + 3.0 + ], + "field" : "field", + "similarity" : 1.0 + } + }"""; + assertEquals(expected, query.toString()); } @Override protected void doAssertLuceneQuery(ExactKnnQueryBuilder queryBuilder, Query query, SearchExecutionContext context) throws IOException { + if (queryBuilder.vectorSimilarity() != null) { + assertTrue(query instanceof VectorSimilarityQuery); + VectorSimilarityQuery vectorSimilarityQuery = (VectorSimilarityQuery) query; + query = vectorSimilarityQuery.getInnerKnnQuery(); + } assertTrue(query instanceof DenseVectorQuery.Floats); DenseVectorQuery.Floats denseVectorQuery = (DenseVectorQuery.Floats) query; assertEquals(VECTOR_FIELD, denseVectorQuery.field); diff --git a/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java index a558081c2d16f..c09ed24668963 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java @@ -56,7 +56,8 @@ protected KnnScoreDocQueryBuilder doCreateTestQueryBuilder() { return new KnnScoreDocQueryBuilder( scoreDocs.toArray(new ScoreDoc[0]), randomBoolean() ? "field" : null, - randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null + randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null, + randomBoolean() ? randomFloat() : null ); } @@ -65,7 +66,8 @@ public void testValidOutput() { KnnScoreDocQueryBuilder query = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(0, 4.25f), new ScoreDoc(5, 1.6f) }, "field", - VectorData.fromFloats(new float[] { 1.0f, 2.0f }) + VectorData.fromFloats(new float[] { 1.0f, 2.0f }), + null ); String expected = """ { @@ -155,7 +157,8 @@ public void testRewriteToMatchNone() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( new ScoreDoc[0], randomBoolean() ? "field" : null, - randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null + randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null, + randomBoolean() ? randomFloat() : null ); QueryRewriteContext context = randomBoolean() ? new InnerHitsRewriteContext(createSearchExecutionContext().getParserConfig(), System::currentTimeMillis) @@ -169,7 +172,8 @@ public void testRewriteForInnerHits() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(0, 4.25f), new ScoreDoc(5, 1.6f) }, randomAlphaOfLength(10), - VectorData.fromFloats(randomVector(10)) + VectorData.fromFloats(randomVector(10)), + randomBoolean() ? randomFloat() : null ); queryBuilder.boost(randomFloat()); queryBuilder.queryName(randomAlphaOfLength(10)); @@ -180,6 +184,7 @@ public void testRewriteForInnerHits() throws IOException { assertEquals(queryBuilder.fieldName(), exactKnnQueryBuilder.getField()); assertEquals(queryBuilder.boost(), exactKnnQueryBuilder.boost(), 0.0001f); assertEquals(queryBuilder.queryName(), exactKnnQueryBuilder.queryName()); + assertEquals(queryBuilder.vectorSimilarity(), exactKnnQueryBuilder.vectorSimilarity()); } @Override @@ -221,7 +226,8 @@ public void testScoreDocQueryWeightCount() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( scoreDocs, "field", - VectorData.fromFloats(randomVector(10)) + VectorData.fromFloats(randomVector(10)), + null ); Query query = queryBuilder.doToQuery(context); final Weight w = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0f); @@ -268,7 +274,8 @@ public void testScoreDocQuery() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( scoreDocs, "field", - VectorData.fromFloats(randomVector(10)) + VectorData.fromFloats(randomVector(10)), + null ); final Query query = queryBuilder.doToQuery(context); final Weight w = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0f); From d568949d660c2395bb9a7108daba038d2e6d0ad9 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 30 Jul 2024 06:13:33 +1000 Subject: [PATCH 10/22] Mute org.elasticsearch.repositories.blobstore.testkit.AzureSnapshotRepoTestKitIT testRepositoryAnalysis #111280 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 51f1b56786e86..9a29329cae470 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -132,6 +132,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/111279 - class: org.elasticsearch.repositories.azure.RepositoryAzureClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/111345 +- class: org.elasticsearch.repositories.blobstore.testkit.AzureSnapshotRepoTestKitIT + method: testRepositoryAnalysis + issue: https://github.com/elastic/elasticsearch/issues/111280 # Examples: # From b601e3bcd2936a9b93a733fc3d7be9a27b276d32 Mon Sep 17 00:00:00 2001 From: Samiul Monir <150824886+Samiul-TheSoccerFan@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:20:49 -0400 Subject: [PATCH 11/22] Update semantic_text field to support indexing numeric and boolean data types (#111284) * adding support for additional data types * Adding unit tests for additional data types * updating integration tests to feed random data types * Fix code styles by running spotlessApply * Adding yml tests for additional data type support * fix failed yml tests and added tests for dense and boolean type * Removed util class and moved the random function into own specific test files * rewrite the terms to match most up to date terminology * Update docs/changelog/111284.yaml * update changelog yml text to fit into one line * limit changelog limit to only 1 area * Updating text_expansion with sparse_embedding to keep the terminalogy up to date * refactoring randomSemanticTextInput function * Update docs/changelog/111284.yaml * Adding comments and addressing nitpiks --- docs/changelog/111284.yaml | 6 + .../ShardBulkInferenceActionFilterIT.java | 5 +- .../ShardBulkInferenceActionFilter.java | 8 +- .../ShardBulkInferenceActionFilterTests.java | 37 ++++++- .../mapper/SemanticTextFieldTests.java | 30 ++++- .../inference/30_semantic_text_inference.yml | 89 ++++++++++++++- .../test/inference/40_semantic_text_query.yml | 104 ++++++++++++++++++ 7 files changed, 267 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/111284.yaml diff --git a/docs/changelog/111284.yaml b/docs/changelog/111284.yaml new file mode 100644 index 0000000000000..f87649a134af6 --- /dev/null +++ b/docs/changelog/111284.yaml @@ -0,0 +1,6 @@ +pr: 111284 +summary: Update `semantic_text` field to support indexing numeric and boolean data + types +area: Mapping +type: enhancement +issues: [] diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java index 8da1aaabd517a..73c0f6d4c7685 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java @@ -33,6 +33,7 @@ import java.util.Locale; import java.util.Map; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticTextInput; import static org.hamcrest.Matchers.equalTo; public class ShardBulkInferenceActionFilterIT extends ESIntegTestCase { @@ -93,8 +94,8 @@ public void testBulkOperations() throws Exception { String id = Long.toString(totalDocs); boolean isIndexRequest = randomBoolean(); Map source = new HashMap<>(); - source.put("sparse_field", isIndexRequest && rarely() ? null : randomAlphaOfLengthBetween(0, 1000)); - source.put("dense_field", isIndexRequest && rarely() ? null : randomAlphaOfLengthBetween(0, 1000)); + source.put("sparse_field", isIndexRequest && rarely() ? null : randomSemanticTextInput()); + source.put("dense_field", isIndexRequest && rarely() ? null : randomSemanticTextInput()); if (isIndexRequest) { bulkReqBuilder.add(new IndexRequestBuilder(client()).setIndex(INDEX_NAME).setId(id).setSource(source)); totalDocs++; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index f1a590e647dbc..ade0748ef10bf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -492,12 +492,16 @@ private Map> createFieldInferenceRequests(Bu * If {@code valueObj} is not a string or a collection of strings, it throws an ElasticsearchStatusException. */ private static List nodeStringValues(String field, Object valueObj) { - if (valueObj instanceof String value) { + if (valueObj instanceof Number || valueObj instanceof Boolean) { + return List.of(valueObj.toString()); + } else if (valueObj instanceof String value) { return List.of(value); } else if (valueObj instanceof Collection values) { List valuesString = new ArrayList<>(); for (var v : values) { - if (v instanceof String value) { + if (v instanceof Number || v instanceof Boolean) { + valuesString.add(v.toString()); + } else if (v instanceof String value) { valuesString.add(value); } else { throw new ElasticsearchStatusException( diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java index f63a6369b21a6..d78ea7933e836 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java @@ -35,6 +35,7 @@ import org.elasticsearch.xcontent.json.JsonXContent; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; +import org.elasticsearch.xpack.inference.mapper.SemanticTextField; import org.elasticsearch.xpack.inference.model.TestModel; import org.elasticsearch.xpack.inference.registry.ModelRegistry; import org.junit.After; @@ -56,7 +57,9 @@ import static org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilter.DEFAULT_BATCH_SIZE; import static org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilter.getIndexRequestOrNull; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticTextInput; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSparseEmbeddings; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.semanticTextFieldFromChunkedInferenceResults; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.toChunkedResult; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -331,16 +334,34 @@ private static BulkItemRequest[] randomBulkItemRequest( for (var entry : fieldInferenceMap.values()) { String field = entry.getName(); var model = modelMap.get(entry.getInferenceId()); - String text = randomAlphaOfLengthBetween(10, 20); - docMap.put(field, text); - expectedDocMap.put(field, text); + Object inputObject = randomSemanticTextInput(); + String inputText = inputObject.toString(); + docMap.put(field, inputObject); + expectedDocMap.put(field, inputText); if (model == null) { // ignore results, the doc should fail with a resource not found exception continue; } - var result = randomSemanticText(field, model, List.of(text), requestContentType); - model.putResult(text, toChunkedResult(result)); - expectedDocMap.put(field, result); + + SemanticTextField semanticTextField; + // The model is not field aware and that is why we are skipping the embedding generation process for existing values. + // This prevents a situation where embeddings in the expected docMap do not match those in the model, which could happen if + // embeddings were overwritten. + if (model.hasResult(inputText)) { + ChunkedInferenceServiceResults results = model.getResults(inputText); + semanticTextField = semanticTextFieldFromChunkedInferenceResults( + field, + model, + List.of(inputText), + results, + requestContentType + ); + } else { + semanticTextField = randomSemanticText(field, model, List.of(inputText), requestContentType); + model.putResult(inputText, toChunkedResult(semanticTextField)); + } + + expectedDocMap.put(field, semanticTextField); } int requestId = randomIntBetween(0, Integer.MAX_VALUE); @@ -383,5 +404,9 @@ ChunkedInferenceServiceResults getResults(String text) { void putResult(String text, ChunkedInferenceServiceResults result) { resultMap.put(text, result); } + + boolean hasResult(String text) { + return resultMap.containsKey(text); + } } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java index 2a64f77e28756..563093930c358 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java @@ -86,7 +86,7 @@ protected void assertEqualInstances(SemanticTextField expectedInstance, Semantic @Override protected SemanticTextField createTestInstance() { - List rawValues = randomList(1, 5, () -> randomAlphaOfLengthBetween(10, 20)); + List rawValues = randomList(1, 5, () -> randomSemanticTextInput().toString()); try { // try catch required for override return randomSemanticText(NAME, TestModel.createRandomInstance(), rawValues, randomFrom(XContentType.values())); } catch (IOException e) { @@ -192,6 +192,16 @@ public static SemanticTextField randomSemanticText(String fieldName, Model model case SPARSE_EMBEDDING -> randomSparseEmbeddings(inputs); default -> throw new AssertionError("invalid task type: " + model.getTaskType().name()); }; + return semanticTextFieldFromChunkedInferenceResults(fieldName, model, inputs, results, contentType); + } + + public static SemanticTextField semanticTextFieldFromChunkedInferenceResults( + String fieldName, + Model model, + List inputs, + ChunkedInferenceServiceResults results, + XContentType contentType + ) { return new SemanticTextField( fieldName, inputs, @@ -204,6 +214,24 @@ public static SemanticTextField randomSemanticText(String fieldName, Model model ); } + /** + * Returns a randomly generated object for Semantic Text tests purpose. + */ + public static Object randomSemanticTextInput() { + if (rarely()) { + return switch (randomIntBetween(0, 4)) { + case 0 -> randomInt(); + case 1 -> randomLong(); + case 2 -> randomFloat(); + case 3 -> randomBoolean(); + case 4 -> randomDouble(); + default -> throw new IllegalStateException("Illegal state while generating random semantic text input"); + }; + } else { + return randomAlphaOfLengthBetween(10, 20); + } + } + public static ChunkedInferenceServiceResults toChunkedResult(SemanticTextField field) throws IOException { switch (field.inference().modelSettings().taskType()) { case SPARSE_EMBEDDING -> { diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml index f467691600766..f58a5c33fd85d 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml @@ -51,7 +51,7 @@ setup: type: text --- -"Calculates text expansion and embedding results for new documents": +"Calculates sparse embedding and text embedding results for new documents": - do: index: index: test-index @@ -74,6 +74,93 @@ setup: - match: { _source.dense_field.inference.chunks.0.text: "another inference test" } - match: { _source.non_inference_field: "non inference test" } +--- +"Calculates sparse embedding and text embedding results for new documents with integer value": + - do: + index: + index: test-index + id: doc_1 + body: + sparse_field: 75 + dense_field: 100 + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field.text: "75" } + - exists: _source.sparse_field.inference.chunks.0.embeddings + - match: { _source.sparse_field.inference.chunks.0.text: "75" } + - match: { _source.dense_field.text: "100" } + - exists: _source.dense_field.inference.chunks.0.embeddings + - match: { _source.dense_field.inference.chunks.0.text: "100" } + +--- +"Calculates sparse embedding and text embedding results for new documents with boolean value": + - do: + index: + index: test-index + id: doc_1 + body: + sparse_field: true + dense_field: false + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field.text: "true" } + - exists: _source.sparse_field.inference.chunks.0.embeddings + - match: { _source.sparse_field.inference.chunks.0.text: "true" } + - match: { _source.dense_field.text: "false" } + - exists: _source.dense_field.inference.chunks.0.embeddings + - match: { _source.dense_field.inference.chunks.0.text: "false" } + +--- +"Calculates sparse embedding and text embedding results for new documents with a collection of mixed data types": + - do: + index: + index: test-index + id: doc_1 + body: + sparse_field: [false, 75, "inference test", 13.49] + dense_field: [true, 49.99, "another inference test", 5654] + + - do: + get: + index: test-index + id: doc_1 + + - length: { _source.sparse_field.text: 4 } + - match: { _source.sparse_field.text.0: "false" } + - match: { _source.sparse_field.text.1: "75" } + - match: { _source.sparse_field.text.2: "inference test" } + - match: { _source.sparse_field.text.3: "13.49" } + - exists: _source.sparse_field.inference.chunks.0.embeddings + - exists: _source.sparse_field.inference.chunks.1.embeddings + - exists: _source.sparse_field.inference.chunks.2.embeddings + - exists: _source.sparse_field.inference.chunks.3.embeddings + - match: { _source.sparse_field.inference.chunks.0.text: "false" } + - match: { _source.sparse_field.inference.chunks.1.text: "75" } + - match: { _source.sparse_field.inference.chunks.2.text: "inference test" } + - match: { _source.sparse_field.inference.chunks.3.text: "13.49" } + + - length: { _source.dense_field.text: 4 } + - match: { _source.dense_field.text.0: "true" } + - match: { _source.dense_field.text.1: "49.99" } + - match: { _source.dense_field.text.2: "another inference test" } + - match: { _source.dense_field.text.3: "5654" } + - exists: _source.dense_field.inference.chunks.0.embeddings + - exists: _source.dense_field.inference.chunks.1.embeddings + - exists: _source.dense_field.inference.chunks.2.embeddings + - exists: _source.dense_field.inference.chunks.3.embeddings + - match: { _source.dense_field.inference.chunks.0.text: "true" } + - match: { _source.dense_field.inference.chunks.1.text: "49.99" } + - match: { _source.dense_field.inference.chunks.2.text: "another inference test" } + - match: { _source.dense_field.inference.chunks.3.text: "5654" } + --- "Inference fields do not create new mappings": - do: diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml index 5ee7a943c4d35..932ee4854f445 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml @@ -90,6 +90,58 @@ setup: - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } +--- +"Numeric query using a sparse embedding model": + - skip: + features: [ "headers", "close_to" ] + + - do: + index: + index: test-sparse-index + id: doc_1 + body: + inference_field: [40, 49.678] + refresh: true + + - do: + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "40" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } + +--- +"Boolean query using a sparse embedding model": + - skip: + features: [ "headers", "close_to" ] + + - do: + index: + index: test-sparse-index + id: doc_1 + body: + inference_field: true + refresh: true + + - do: + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "true" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0._source.inference_field.inference.chunks: 1 } + --- "Query using a dense embedding model": - skip: @@ -121,6 +173,58 @@ setup: - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } +--- +"Numeric query using a dense embedding model": + - skip: + features: [ "headers", "close_to" ] + + - do: + index: + index: test-dense-index + id: doc_1 + body: + inference_field: [45.1, 100] + refresh: true + + - do: + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "45.1" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } + +--- +"Boolean query using a dense embedding model": + - skip: + features: [ "headers", "close_to" ] + + - do: + index: + index: test-dense-index + id: doc_1 + body: + inference_field: false + refresh: true + + - do: + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "false" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0._source.inference_field.inference.chunks: 1 } + --- "Query using a dense embedding model that uses byte embeddings": - skip: From 73a1dd6932c5b4d052dae2f9b605f853eb236194 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 30 Jul 2024 06:28:37 +1000 Subject: [PATCH 12/22] Mute org.elasticsearch.xpack.repositories.metering.azure.AzureRepositoriesMeteringIT org.elasticsearch.xpack.repositories.metering.azure.AzureRepositoriesMeteringIT #111307 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 9a29329cae470..c2a8c32694d61 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -135,6 +135,8 @@ tests: - class: org.elasticsearch.repositories.blobstore.testkit.AzureSnapshotRepoTestKitIT method: testRepositoryAnalysis issue: https://github.com/elastic/elasticsearch/issues/111280 +- class: org.elasticsearch.xpack.repositories.metering.azure.AzureRepositoriesMeteringIT + issue: https://github.com/elastic/elasticsearch/issues/111307 # Examples: # From 90f6e8c96ba3948a1a081e9eb5ac711d88594b96 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:03:15 +1000 Subject: [PATCH 13/22] Mute org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests testGroupingAggregate {TestCase=} #111429 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index c2a8c32694d61..a4418c34e2589 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -137,6 +137,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/111280 - class: org.elasticsearch.xpack.repositories.metering.azure.AzureRepositoriesMeteringIT issue: https://github.com/elastic/elasticsearch/issues/111307 +- class: org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests + method: testGroupingAggregate {TestCase=} + issue: https://github.com/elastic/elasticsearch/issues/111429 # Examples: # From 65c2fac99efc655b56b0788adf5c672f5388860b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:03:22 +1000 Subject: [PATCH 14/22] Mute org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests testGroupingAggregate {TestCase=} #111428 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index a4418c34e2589..eb504dfb10219 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -140,6 +140,9 @@ tests: - class: org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests method: testGroupingAggregate {TestCase=} issue: https://github.com/elastic/elasticsearch/issues/111429 +- class: org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests + method: testGroupingAggregate {TestCase=} + issue: https://github.com/elastic/elasticsearch/issues/111428 # Examples: # From f9007c59ee0aa1b47b09a394d67a252a253e5359 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:03:47 +1000 Subject: [PATCH 15/22] Mute org.elasticsearch.xpack.restart.FullClusterRestartIT testSingleDoc {cluster=OLD} #111430 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index eb504dfb10219..1df885cdc72c0 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -143,6 +143,9 @@ tests: - class: org.elasticsearch.xpack.esql.expression.function.aggregate.ValuesTests method: testGroupingAggregate {TestCase=} issue: https://github.com/elastic/elasticsearch/issues/111428 +- class: org.elasticsearch.xpack.restart.FullClusterRestartIT + method: testSingleDoc {cluster=OLD} + issue: https://github.com/elastic/elasticsearch/issues/111430 # Examples: # From fb19b4b098e1e02c44cb9ae55f924d65ab6ffa05 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 29 Jul 2024 22:14:08 +0100 Subject: [PATCH 16/22] Make `EnrichPolicyRunner` more properly async (#111321) Today `EnrichPolicyRunner` carries its listener in a field, with various morally-async methods masquerading as synchronous ones because they don't accept the listener from the caller as one might expect. This commit removes the `listener` field in favour of passing a listener explicitly between the methods that require it, making it easier to spot listener leaks. --- .../xpack/enrich/EnrichPolicyExecutor.java | 12 +- .../xpack/enrich/EnrichPolicyRunner.java | 68 ++- .../xpack/enrich/EnrichPolicyRunnerTests.java | 541 +++++------------- 3 files changed, 201 insertions(+), 420 deletions(-) diff --git a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyExecutor.java b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyExecutor.java index 2ebe268cc788d..746ae2f4eee2b 100644 --- a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyExecutor.java +++ b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyExecutor.java @@ -13,6 +13,7 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.admin.cluster.node.tasks.get.GetTaskRequest; import org.elasticsearch.action.admin.cluster.node.tasks.get.GetTaskResponse; import org.elasticsearch.client.internal.Client; @@ -126,8 +127,9 @@ public void runPolicyLocally( } task.setStatus(new ExecuteEnrichPolicyStatus(ExecuteEnrichPolicyStatus.PolicyPhases.SCHEDULED)); - Runnable runnable = createPolicyRunner(policyName, policy, enrichIndexName, task, listener); - threadPool.executor(ThreadPool.Names.GENERIC).execute(runnable); + var policyRunner = createPolicyRunner(policyName, policy, enrichIndexName, task); + threadPool.executor(ThreadPool.Names.GENERIC) + .execute(ActionRunnable.wrap(ActionListener.assertOnce(listener), policyRunner::run)); } catch (Exception e) { task.setStatus(new ExecuteEnrichPolicyStatus(ExecuteEnrichPolicyStatus.PolicyPhases.FAILED)); throw e; @@ -206,18 +208,16 @@ public void onFailure(Exception exception) { }); } - private Runnable createPolicyRunner( + private EnrichPolicyRunner createPolicyRunner( String policyName, EnrichPolicy policy, String enrichIndexName, - ExecuteEnrichPolicyTask task, - ActionListener listener + ExecuteEnrichPolicyTask task ) { return new EnrichPolicyRunner( policyName, policy, task, - listener, clusterService, indicesService, client, diff --git a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java index 810fd03f061ea..0891f24feda68 100644 --- a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java +++ b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java @@ -76,7 +76,7 @@ import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.xpack.core.ClientHelper.ENRICH_ORIGIN; -public class EnrichPolicyRunner implements Runnable { +public class EnrichPolicyRunner { private static final Logger logger = LogManager.getLogger(EnrichPolicyRunner.class); @@ -92,7 +92,6 @@ public class EnrichPolicyRunner implements Runnable { private final String policyName; private final EnrichPolicy policy; private final ExecuteEnrichPolicyTask task; - private final ActionListener listener; private final ClusterService clusterService; private final IndicesService indicesService; private final Client client; @@ -105,7 +104,6 @@ public class EnrichPolicyRunner implements Runnable { String policyName, EnrichPolicy policy, ExecuteEnrichPolicyTask task, - ActionListener listener, ClusterService clusterService, IndicesService indicesService, Client client, @@ -117,7 +115,6 @@ public class EnrichPolicyRunner implements Runnable { this.policyName = Objects.requireNonNull(policyName); this.policy = Objects.requireNonNull(policy); this.task = Objects.requireNonNull(task); - this.listener = Objects.requireNonNull(listener); this.clusterService = Objects.requireNonNull(clusterService); this.indicesService = indicesService; this.client = wrapClient(client, policyName, task, clusterService); @@ -127,8 +124,7 @@ public class EnrichPolicyRunner implements Runnable { this.maxForceMergeAttempts = maxForceMergeAttempts; } - @Override - public void run() { + public void run(ActionListener listener) { try { logger.info("Policy [{}]: Running enrich policy", policyName); task.setStatus(new ExecuteEnrichPolicyStatus(ExecuteEnrichPolicyStatus.PolicyPhases.RUNNING)); @@ -139,7 +135,7 @@ public void run() { // This call does not set the origin to ensure that the user executing the policy has permission to access the source index client.admin().indices().getIndex(getIndexRequest, listener.delegateFailureAndWrap((l, getIndexResponse) -> { validateMappings(getIndexResponse); - prepareAndCreateEnrichIndex(toMappings(getIndexResponse), clusterService.getSettings()); + prepareAndCreateEnrichIndex(toMappings(getIndexResponse), clusterService.getSettings(), l); })); } catch (Exception e) { listener.onFailure(e); @@ -204,9 +200,7 @@ static void validateMappings( } } - private record MappingTypeAndFormat(String type, String format) { - - } + private record MappingTypeAndFormat(String type, String format) {} private static MappingTypeAndFormat validateAndGetMappingTypeAndFormat( String fieldName, @@ -436,7 +430,11 @@ static boolean isIndexableField(MapperService mapperService, String field, Strin } } - private void prepareAndCreateEnrichIndex(List> mappings, Settings settings) { + private void prepareAndCreateEnrichIndex( + List> mappings, + Settings settings, + ActionListener listener + ) { int numberOfReplicas = settings.getAsInt(ENRICH_MIN_NUMBER_OF_REPLICAS_NAME, 0); Settings enrichIndexSettings = Settings.builder() .put("index.number_of_shards", 1) @@ -453,23 +451,23 @@ private void prepareAndCreateEnrichIndex(List> mappings, Set .indices() .create( createEnrichIndexRequest, - listener.delegateFailure((l, createIndexResponse) -> prepareReindexOperation(enrichIndexName)) + listener.delegateFailure((l, createIndexResponse) -> prepareReindexOperation(enrichIndexName, l)) ); } - private void prepareReindexOperation(final String destinationIndexName) { + private void prepareReindexOperation(final String destinationIndexName, ActionListener listener) { // Check to make sure that the enrich pipeline exists, and create it if it is missing. if (EnrichPolicyReindexPipeline.exists(clusterService.state()) == false) { EnrichPolicyReindexPipeline.create( enrichOriginClient(), - listener.delegateFailure((l, r) -> transferDataToEnrichIndex(destinationIndexName)) + listener.delegateFailure((l, r) -> transferDataToEnrichIndex(destinationIndexName, l)) ); } else { - transferDataToEnrichIndex(destinationIndexName); + transferDataToEnrichIndex(destinationIndexName, listener); } } - private void transferDataToEnrichIndex(final String destinationIndexName) { + private void transferDataToEnrichIndex(final String destinationIndexName, ActionListener listener) { logger.debug("Policy [{}]: Transferring source data to new enrich index [{}]", policyName, destinationIndexName); // Filter down the source fields to just the ones required by the policy final Set retainFields = new HashSet<>(); @@ -540,13 +538,17 @@ public void onResponse(BulkByScrollResponse bulkByScrollResponse) { bulkByScrollResponse.getCreated(), destinationIndexName ); - forceMergeEnrichIndex(destinationIndexName, 1); + forceMergeEnrichIndex(destinationIndexName, 1, delegate); } } }); } - private void forceMergeEnrichIndex(final String destinationIndexName, final int attempt) { + private void forceMergeEnrichIndex( + final String destinationIndexName, + final int attempt, + ActionListener listener + ) { logger.debug( "Policy [{}]: Force merging newly created enrich index [{}] (Attempt {}/{})", policyName, @@ -558,21 +560,29 @@ private void forceMergeEnrichIndex(final String destinationIndexName, final int .indices() .forceMerge( new ForceMergeRequest(destinationIndexName).maxNumSegments(1), - listener.delegateFailure((l, r) -> refreshEnrichIndex(destinationIndexName, attempt)) + listener.delegateFailure((l, r) -> refreshEnrichIndex(destinationIndexName, attempt, l)) ); } - private void refreshEnrichIndex(final String destinationIndexName, final int attempt) { + private void refreshEnrichIndex( + final String destinationIndexName, + final int attempt, + ActionListener listener + ) { logger.debug("Policy [{}]: Refreshing enrich index [{}]", policyName, destinationIndexName); enrichOriginClient().admin() .indices() .refresh( new RefreshRequest(destinationIndexName), - listener.delegateFailure((l, r) -> ensureSingleSegment(destinationIndexName, attempt)) + listener.delegateFailure((l, r) -> ensureSingleSegment(destinationIndexName, attempt, l)) ); } - protected void ensureSingleSegment(final String destinationIndexName, final int attempt) { + protected void ensureSingleSegment( + final String destinationIndexName, + final int attempt, + ActionListener listener + ) { enrichOriginClient().admin() .indices() .segments(new IndicesSegmentsRequest(destinationIndexName), listener.delegateFailureAndWrap((l, indicesSegmentResponse) -> { @@ -644,29 +654,29 @@ protected void ensureSingleSegment(final String destinationIndexName, final int nextAttempt, maxForceMergeAttempts ); - forceMergeEnrichIndex(destinationIndexName, nextAttempt); + forceMergeEnrichIndex(destinationIndexName, nextAttempt, listener); } } else { // Force merge down to one segment successful - setIndexReadOnly(destinationIndexName); + setIndexReadOnly(destinationIndexName, listener); } })); } - private void setIndexReadOnly(final String destinationIndexName) { + private void setIndexReadOnly(final String destinationIndexName, ActionListener listener) { logger.debug("Policy [{}]: Setting new enrich index [{}] to be read only", policyName, destinationIndexName); UpdateSettingsRequest request = new UpdateSettingsRequest(destinationIndexName).setPreserveExisting(true) .settings(Settings.builder().put("index.auto_expand_replicas", "0-all").put("index.blocks.write", "true")); enrichOriginClient().admin() .indices() - .updateSettings(request, listener.delegateFailure((l, r) -> waitForIndexGreen(destinationIndexName))); + .updateSettings(request, listener.delegateFailure((l, r) -> waitForIndexGreen(destinationIndexName, l))); } - private void waitForIndexGreen(final String destinationIndexName) { + private void waitForIndexGreen(final String destinationIndexName, ActionListener listener) { ClusterHealthRequest request = new ClusterHealthRequest(destinationIndexName).waitForGreenStatus(); enrichOriginClient().admin() .cluster() - .health(request, listener.delegateFailureAndWrap((l, r) -> updateEnrichPolicyAlias(destinationIndexName))); + .health(request, listener.delegateFailureAndWrap((l, r) -> updateEnrichPolicyAlias(destinationIndexName, l))); } /** @@ -720,7 +730,7 @@ private void validateIndexBeforePromotion(String destinationIndexName, ClusterSt } } - private void updateEnrichPolicyAlias(final String destinationIndexName) { + private void updateEnrichPolicyAlias(final String destinationIndexName, ActionListener listener) { String enrichIndexBase = EnrichPolicy.getBaseName(policyName); logger.debug("Policy [{}]: Promoting new enrich index [{}] to alias [{}]", policyName, destinationIndexName, enrichIndexBase); GetAliasesRequest aliasRequest = new GetAliasesRequest(enrichIndexBase); diff --git a/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java b/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java index 7ba3b356d6015..75e10e7069563 100644 --- a/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java +++ b/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java @@ -12,7 +12,6 @@ import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.DocWriteResponse; -import org.elasticsearch.action.LatchedActionListener; import org.elasticsearch.action.admin.cluster.health.TransportClusterHealthAction; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; @@ -34,7 +33,6 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.support.DefaultShardOperationFailedException; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.internal.Client; import org.elasticsearch.client.internal.FilterClient; @@ -79,18 +77,15 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Consumer; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.CoreMatchers.allOf; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.notNullValue; @@ -147,17 +142,10 @@ public void testRunner() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -227,17 +215,10 @@ public void testRunnerGeoMatchType() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -318,17 +299,10 @@ private void testNumberRangeMatchType(String rangeType) throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -415,17 +389,10 @@ public void testRunnerRangeTypeWithIpRange() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -514,17 +481,10 @@ public void testRunnerMultiSource() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -633,17 +593,10 @@ public void testRunnerMultiSourceDocIdCollisions() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -756,17 +709,10 @@ public void testRunnerMultiSourceEnrichKeyCollisions() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -820,7 +766,7 @@ public void testRunnerMultiSourceEnrichKeyCollisions() throws Exception { ensureEnrichIndexIsReadOnly(createdEnrichIndex); } - public void testRunnerNoSourceIndex() throws Exception { + public void testRunnerNoSourceIndex() { final String sourceIndex = "source-index"; List enrichFields = List.of("field2", "field5"); @@ -829,24 +775,16 @@ public void testRunnerNoSourceIndex() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - Exception thrown = exception.get(); - assertThat(thrown, instanceOf(IndexNotFoundException.class)); - assertThat(thrown.getMessage(), containsString("no such index [" + sourceIndex + "]")); - } else { - fail("Expected exception but nothing was thrown"); - } + assertThat( + asInstanceOf(IndexNotFoundException.class, safeExecuteExpectFailure(enrichPolicyRunner)).getMessage(), + containsString("no such index [" + sourceIndex + "]") + ); } - public void testRunnerNoSourceMapping() throws Exception { + public void testRunnerNoSourceMapping() { final String sourceIndex = "source-index"; CreateIndexResponse createResponse = indicesAdmin().create(new CreateIndexRequest(sourceIndex)).actionGet(); assertTrue(createResponse.isAcknowledged()); @@ -857,32 +795,21 @@ public void testRunnerNoSourceMapping() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - Exception thrown = exception.get(); - assertThat(thrown, instanceOf(ElasticsearchException.class)); - assertThat( - thrown.getMessage(), - containsString( - "Enrich policy execution for [" - + policyName - + "] failed. No mapping available on source [" - + sourceIndex - + "] included in [[" - + sourceIndex - + "]]" - ) - ); - } else { - fail("Expected exception but nothing was thrown"); - } + assertThat( + asInstanceOf(ElasticsearchException.class, safeExecuteExpectFailure(enrichPolicyRunner)).getMessage(), + containsString( + "Enrich policy execution for [" + + policyName + + "] failed. No mapping available on source [" + + sourceIndex + + "] included in [[" + + sourceIndex + + "]]" + ) + ); } public void testRunnerKeyNestedSourceMapping() throws Exception { @@ -914,36 +841,22 @@ public void testRunnerKeyNestedSourceMapping() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - Exception thrown = exception.get(); - assertThat(thrown, instanceOf(ElasticsearchException.class)); - assertThat( - thrown.getMessage(), - containsString( - "Enrich policy execution for [" - + policyName - + "] failed while validating field mappings for index [" - + sourceIndex - + "]" - ) - ); - assertThat( - thrown.getCause().getMessage(), - containsString( - "Could not traverse mapping to field [nesting.key]. The [nesting" + "] field must be regular object but was [nested]." - ) - ); - } else { - fail("Expected exception but nothing was thrown"); - } + final var thrown = asInstanceOf(ElasticsearchException.class, safeExecuteExpectFailure(enrichPolicyRunner)); + assertThat( + thrown.getMessage(), + containsString( + "Enrich policy execution for [" + policyName + "] failed while validating field mappings for index [" + sourceIndex + "]" + ) + ); + assertThat( + thrown.getCause().getMessage(), + containsString( + "Could not traverse mapping to field [nesting.key]. The [nesting" + "] field must be regular object but was [nested]." + ) + ); } public void testRunnerValueNestedSourceMapping() throws Exception { @@ -975,37 +888,22 @@ public void testRunnerValueNestedSourceMapping() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - Exception thrown = exception.get(); - assertThat(thrown, instanceOf(ElasticsearchException.class)); - assertThat( - thrown.getMessage(), - containsString( - "Enrich policy execution for [" - + policyName - + "] failed while validating field mappings for index [" - + sourceIndex - + "]" - ) - ); - assertThat( - thrown.getCause().getMessage(), - containsString( - "Could not traverse mapping to field [nesting.field2]. " - + "The [nesting] field must be regular object but was [nested]." - ) - ); - } else { - fail("Expected exception but nothing was thrown"); - } + final var thrown = asInstanceOf(ElasticsearchException.class, safeExecuteExpectFailure(enrichPolicyRunner)); + assertThat( + thrown.getMessage(), + containsString( + "Enrich policy execution for [" + policyName + "] failed while validating field mappings for index [" + sourceIndex + "]" + ) + ); + assertThat( + thrown.getCause().getMessage(), + containsString( + "Could not traverse mapping to field [nesting.field2]. " + "The [nesting] field must be regular object but was [nested]." + ) + ); } public void testRunnerObjectSourceMapping() throws Exception { @@ -1062,17 +960,10 @@ public void testRunnerObjectSourceMapping() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -1176,17 +1067,10 @@ public void testRunnerExplicitObjectSourceMapping() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -1290,17 +1174,10 @@ public void testRunnerExplicitObjectSourceMappingRangePolicy() throws Exception final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -1414,17 +1291,10 @@ public void testRunnerTwoObjectLevelsSourceMapping() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -1544,17 +1414,10 @@ public void testRunnerTwoObjectLevelsSourceMappingRangePolicy() throws Exception final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -1678,17 +1541,10 @@ public void testRunnerTwoObjectLevelsSourceMappingDateRangeWithFormat() throws E final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -1815,17 +1671,10 @@ public void testRunnerDottedKeyNameSourceMapping() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -1903,9 +1752,6 @@ public void testRunnerWithForceMergeRetry() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); ClusterService clusterService = getInstanceFromNode(ClusterService.class); IndexNameExpressionResolver resolver = getInstanceFromNode(IndexNameExpressionResolver.class); Task asyncTask = testTaskManager.register("enrich", "policy_execution", new TaskAwareRequest() { @@ -1931,22 +1777,6 @@ public String getDescription() { } }); ExecuteEnrichPolicyTask task = ((ExecuteEnrichPolicyTask) asyncTask); - // The executor would wrap the listener in order to clean up the task in the - // task manager, but we're just testing the runner, so we make sure to clean - // up after ourselves. - ActionListener wrappedListener = new ActionListener<>() { - @Override - public void onResponse(ExecuteEnrichPolicyStatus policyExecutionResult) { - testTaskManager.unregister(task); - listener.onResponse(policyExecutionResult); - } - - @Override - public void onFailure(Exception e) { - testTaskManager.unregister(task); - listener.onFailure(e); - } - }; AtomicInteger forceMergeAttempts = new AtomicInteger(0); final XContentBuilder unmergedDocument = SmileXContent.contentBuilder() .startObject() @@ -1958,7 +1788,6 @@ public void onFailure(Exception e) { policyName, policy, task, - wrappedListener, clusterService, getInstanceFromNode(IndicesService.class), client(), @@ -1968,7 +1797,19 @@ public void onFailure(Exception e) { randomIntBetween(3, 10) ) { @Override - protected void ensureSingleSegment(String destinationIndexName, int attempt) { + public void run(ActionListener listener) { + // The executor would wrap the listener in order to clean up the task in the + // task manager, but we're just testing the runner, so we make sure to clean + // up after ourselves. + super.run(ActionListener.runBefore(listener, () -> testTaskManager.unregister(task))); + } + + @Override + protected void ensureSingleSegment( + String destinationIndexName, + int attempt, + ActionListener listener + ) { forceMergeAttempts.incrementAndGet(); if (attempt == 1) { // Put and flush a document to increase the number of segments, simulating not @@ -1980,16 +1821,12 @@ protected void ensureSingleSegment(String destinationIndexName, int attempt) { ).actionGet(); assertEquals(RestStatus.CREATED, indexRequest.status()); } - super.ensureSingleSegment(destinationIndexName, attempt); + super.ensureSingleSegment(destinationIndexName, attempt, listener); } }; logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - if (exception.get() != null) { - throw exception.get(); - } + safeExecute(enrichPolicyRunner); // Validate number of force merges assertThat(forceMergeAttempts.get(), equalTo(2)); @@ -2080,9 +1917,6 @@ public void testRunnerWithEmptySegmentsResponse() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); ClusterService clusterService = getInstanceFromNode(ClusterService.class); IndexNameExpressionResolver resolver = getInstanceFromNode(IndexNameExpressionResolver.class); Task asyncTask = testTaskManager.register("enrich", "policy_execution", new TaskAwareRequest() { @@ -2108,13 +1942,6 @@ public String getDescription() { } }); ExecuteEnrichPolicyTask task = ((ExecuteEnrichPolicyTask) asyncTask); - // The executor would wrap the listener in order to clean up the task in the - // task manager, but we're just testing the runner, so we make sure to clean - // up after ourselves. - ActionListener wrappedListener = ActionListener.runBefore( - listener, - () -> testTaskManager.unregister(task) - ); // Wrap the client so that when we receive the indices segments action, we intercept the request and complete it on another thread // with an empty segments response. @@ -2141,7 +1968,6 @@ protected void policyName, policy, task, - wrappedListener, clusterService, getInstanceFromNode(IndicesService.class), client, @@ -2149,21 +1975,21 @@ protected void createdEnrichIndex, randomIntBetween(1, 10000), randomIntBetween(3, 10) - ); + ) { + @Override + public void run(ActionListener listener) { + // The executor would wrap the listener in order to clean up the task in the + // task manager, but we're just testing the runner, so we make sure to clean + // up after ourselves. + super.run(ActionListener.runBefore(listener, () -> testTaskManager.unregister(task))); + } + }; logger.info("Starting policy run"); - enrichPolicyRunner.run(); - if (latch.await(1, TimeUnit.MINUTES) == false) { - fail("Timeout while waiting for runner to complete"); - } - Exception exceptionThrown = exception.get(); - if (exceptionThrown == null) { - fail("Expected exception to be thrown from segment api"); - } - - // Validate exception information - assertThat(exceptionThrown, instanceOf(ElasticsearchException.class)); - assertThat(exceptionThrown.getMessage(), containsString("Could not locate segment information for newly created index")); + assertThat( + asInstanceOf(ElasticsearchException.class, safeExecuteExpectFailure(enrichPolicyRunner)).getMessage(), + containsString("Could not locate segment information for newly created index") + ); } public void testRunnerWithShardFailuresInSegmentResponse() throws Exception { @@ -2197,9 +2023,6 @@ public void testRunnerWithShardFailuresInSegmentResponse() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); ClusterService clusterService = getInstanceFromNode(ClusterService.class); IndexNameExpressionResolver resolver = getInstanceFromNode(IndexNameExpressionResolver.class); Task asyncTask = testTaskManager.register("enrich", "policy_execution", new TaskAwareRequest() { @@ -2228,10 +2051,6 @@ public String getDescription() { // The executor would wrap the listener in order to clean up the task in the // task manager, but we're just testing the runner, so we make sure to clean // up after ourselves. - ActionListener wrappedListener = ActionListener.runBefore( - listener, - () -> testTaskManager.unregister(task) - ); // Wrap the client so that when we receive the indices segments action, we intercept the request and complete it on another thread // with an failed segments response. @@ -2270,7 +2089,6 @@ protected void policyName, policy, task, - wrappedListener, clusterService, getInstanceFromNode(IndicesService.class), client, @@ -2278,26 +2096,23 @@ protected void createdEnrichIndex, randomIntBetween(1, 10000), randomIntBetween(3, 10) - ); + ) { + @Override + public void run(ActionListener listener) { + // The executor would wrap the listener in order to clean up the task in the + // task manager, but we're just testing the runner, so we make sure to clean + // up after ourselves. + super.run(ActionListener.runBefore(listener, () -> testTaskManager.unregister(task))); + } + }; logger.info("Starting policy run"); - enrichPolicyRunner.run(); - if (latch.await(1, TimeUnit.MINUTES) == false) { - fail("Timeout while waiting for runner to complete"); - } - Exception exceptionThrown = exception.get(); - if (exceptionThrown == null) { - fail("Expected exception to be thrown from segment api"); - } - - // Validate exception information - assertThat(exceptionThrown, instanceOf(ElasticsearchException.class)); + final var exceptionThrown = asInstanceOf(ElasticsearchException.class, safeExecuteExpectFailure(enrichPolicyRunner)); assertThat(exceptionThrown.getMessage(), containsString("Could not obtain segment information for newly created index")); - assertThat(exceptionThrown.getCause(), instanceOf(ElasticsearchException.class)); - assertThat(exceptionThrown.getCause().getMessage(), containsString("failure1")); + assertThat(asInstanceOf(ElasticsearchException.class, exceptionThrown.getCause()).getMessage(), containsString("failure1")); } - public void testRunnerCancel() throws Exception { + public void testRunnerCancel() { final String sourceIndex = "source-index"; DocWriteResponse indexRequest = client().index(new IndexRequest().index(sourceIndex).id("id").source(""" { @@ -2315,9 +2130,6 @@ public void testRunnerCancel() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); ActionType randomActionType = randomFrom( EnrichReindexAction.INSTANCE, @@ -2349,12 +2161,12 @@ protected void } }; - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(client, policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(client, policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - assertThat(exception.get(), notNullValue()); - assertThat(exception.get().getMessage(), containsString("cancelled policy execution [test1], status [")); + assertThat( + safeExecuteExpectFailure(enrichPolicyRunner).getMessage(), + containsString("cancelled policy execution [test1], status [") + ); } public void testRunRangePolicyWithObjectFieldAsMatchField() throws Exception { @@ -2386,17 +2198,13 @@ public void testRunRangePolicyWithObjectFieldAsMatchField() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - Exception e = exception.get(); - assertThat(e, notNullValue()); - assertThat(e.getMessage(), equalTo("Field 'field1' has type [object] which doesn't appear to be a range type")); + assertThat( + safeExecuteExpectFailure(enrichPolicyRunner).getMessage(), + equalTo("Field 'field1' has type [object] which doesn't appear to be a range type") + ); } public void testEnrichFieldsConflictMappingTypes() throws Exception { @@ -2427,10 +2235,7 @@ public void testEnrichFieldsConflictMappingTypes() throws Exception { String policyName = "test1"; final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - PlainActionFuture future = new PlainActionFuture<>(); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, future, createdEnrichIndex); - enrichPolicyRunner.run(); - future.actionGet(); + safeExecute(createPolicyRunner(policyName, policy, createdEnrichIndex)); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -2473,10 +2278,7 @@ public void testEnrichMappingConflictFormats() throws ExecutionException, Interr String policyName = "test1"; final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - PlainActionFuture future = new PlainActionFuture<>(); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, future, createdEnrichIndex); - enrichPolicyRunner.run(); - future.actionGet(); + safeExecute(createPolicyRunner(policyName, policy, createdEnrichIndex)); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -2508,10 +2310,7 @@ public void testEnrichObjectField() throws ExecutionException, InterruptedExcept String policyName = "test1"; final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - PlainActionFuture future = new PlainActionFuture<>(); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, future, createdEnrichIndex); - enrichPolicyRunner.run(); - future.actionGet(); + safeExecute(createPolicyRunner(policyName, policy, createdEnrichIndex)); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -2566,12 +2365,10 @@ public void testEnrichNestedField() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - PlainActionFuture future = new PlainActionFuture<>(); - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, future, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - future.actionGet(); + safeExecute(enrichPolicyRunner); // Validate Index definition GetIndexResponse enrichIndex = getGetIndexResponseAndCheck(createdEnrichIndex); @@ -2625,9 +2422,6 @@ public void testRunnerValidatesIndexIntegrity() throws Exception { final long createTime = randomNonNegativeLong(); String createdEnrichIndex = ".enrich-test1-" + createTime; - final AtomicReference exception = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); - ActionListener listener = createTestListener(latch, exception::set); // Wrap the client so that when we receive the reindex action, we delete the index then resume operation. This mimics an invalid // state for the resulting index. @@ -2654,36 +2448,20 @@ protected void } } }; - EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(client, policyName, policy, listener, createdEnrichIndex); + EnrichPolicyRunner enrichPolicyRunner = createPolicyRunner(client, policyName, policy, createdEnrichIndex); logger.info("Starting policy run"); - enrichPolicyRunner.run(); - latch.await(); - Exception runnerException = exception.get(); - if (runnerException == null) { - fail("Expected the runner to fail when the underlying index was deleted during policy execution!"); - } - assertThat(runnerException, is(instanceOf(ElasticsearchException.class))); - assertThat(runnerException.getMessage(), containsString("Could not verify enrich index")); - assertThat(runnerException.getMessage(), containsString("mapping meta field missing")); + assertThat( + asInstanceOf(ElasticsearchException.class, safeExecuteExpectFailure(enrichPolicyRunner)).getMessage(), + allOf(containsString("Could not verify enrich index"), containsString("mapping meta field missing")) + ); } - private EnrichPolicyRunner createPolicyRunner( - String policyName, - EnrichPolicy policy, - ActionListener listener, - String targetIndex - ) { - return createPolicyRunner(client(), policyName, policy, listener, targetIndex); + private EnrichPolicyRunner createPolicyRunner(String policyName, EnrichPolicy policy, String targetIndex) { + return createPolicyRunner(client(), policyName, policy, targetIndex); } - private EnrichPolicyRunner createPolicyRunner( - Client client, - String policyName, - EnrichPolicy policy, - ActionListener listener, - String targetIndex - ) { + private EnrichPolicyRunner createPolicyRunner(Client client, String policyName, EnrichPolicy policy, String targetIndex) { ClusterService clusterService = getInstanceFromNode(ClusterService.class); IndexNameExpressionResolver resolver = getInstanceFromNode(IndexNameExpressionResolver.class); Task asyncTask = testTaskManager.register("enrich", "policy_execution", new TaskAwareRequest() { @@ -2709,27 +2487,10 @@ public String getDescription() { } }); ExecuteEnrichPolicyTask task = ((ExecuteEnrichPolicyTask) asyncTask); - // The executor would wrap the listener in order to clean up the task in the - // task manager, but we're just testing the runner, so we make sure to clean - // up after ourselves. - ActionListener wrappedListener = new ActionListener<>() { - @Override - public void onResponse(ExecuteEnrichPolicyStatus policyExecutionResult) { - testTaskManager.unregister(task); - listener.onResponse(policyExecutionResult); - } - - @Override - public void onFailure(Exception e) { - testTaskManager.unregister(task); - listener.onFailure(e); - } - }; return new EnrichPolicyRunner( policyName, policy, task, - wrappedListener, clusterService, getInstanceFromNode(IndicesService.class), client, @@ -2737,14 +2498,24 @@ public void onFailure(Exception e) { targetIndex, randomIntBetween(1, 10000), randomIntBetween(1, 10) - ); + ) { + @Override + public void run(ActionListener listener) { + // The executor would wrap the listener in order to clean up the task in the + // task manager, but we're just testing the runner, so we make sure to clean + // up after ourselves. + super.run(ActionListener.runBefore(listener, () -> testTaskManager.unregister(task))); + } + }; + } + + private void safeExecute(EnrichPolicyRunner enrichPolicyRunner) { + safeAwait(enrichPolicyRunner::run); + logger.debug("Run complete"); } - private ActionListener createTestListener( - final CountDownLatch latch, - final Consumer exceptionConsumer - ) { - return new LatchedActionListener<>(ActionListener.wrap((r) -> logger.debug("Run complete"), exceptionConsumer), latch); + private Exception safeExecuteExpectFailure(EnrichPolicyRunner enrichPolicyRunner) { + return safeAwaitFailure(enrichPolicyRunner::run); } private void validateMappingMetadata(Map mapping, String policyName, EnrichPolicy policy) { From d778b9d85170b404903e32d9f9c461d885c5995c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 30 Jul 2024 07:51:22 +1000 Subject: [PATCH 17/22] Mute org.elasticsearch.xpack.restart.FullClusterRestartIT testSingleDoc {cluster=UPGRADED} #111434 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 1df885cdc72c0..dfec80328588d 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -146,6 +146,9 @@ tests: - class: org.elasticsearch.xpack.restart.FullClusterRestartIT method: testSingleDoc {cluster=OLD} issue: https://github.com/elastic/elasticsearch/issues/111430 +- class: org.elasticsearch.xpack.restart.FullClusterRestartIT + method: testSingleDoc {cluster=UPGRADED} + issue: https://github.com/elastic/elasticsearch/issues/111434 # Examples: # From 1f4788fced685ac20eb5afca1d1ab3692c671d03 Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Mon, 29 Jul 2024 18:09:48 -0500 Subject: [PATCH 18/22] Removing the use of watcher stats from WatchAcTests (#111435) --- .../xpack/watcher/test/integration/WatchAckTests.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/WatchAckTests.java b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/WatchAckTests.java index 1308597b7bcf9..6402b71d3b810 100644 --- a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/WatchAckTests.java +++ b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/WatchAckTests.java @@ -24,7 +24,6 @@ import org.elasticsearch.xpack.core.watcher.transport.actions.get.GetWatchRequestBuilder; import org.elasticsearch.xpack.core.watcher.transport.actions.get.GetWatchResponse; import org.elasticsearch.xpack.core.watcher.transport.actions.put.PutWatchRequestBuilder; -import org.elasticsearch.xpack.core.watcher.transport.actions.stats.WatcherStatsRequestBuilder; import org.elasticsearch.xpack.core.watcher.watch.Watch; import org.elasticsearch.xpack.watcher.condition.CompareCondition; import org.elasticsearch.xpack.watcher.test.AbstractWatcherIntegrationTestCase; @@ -75,7 +74,6 @@ public void testAckSingleAction() throws Exception { .get(); assertThat(putWatchResponse.isCreated(), is(true)); - assertThat(new WatcherStatsRequestBuilder(client()).get().getWatchesCount(), is(1L)); timeWarp().trigger("_id", 4, TimeValue.timeValueSeconds(5)); AckWatchResponse ackResponse = new AckWatchRequestBuilder(client(), "_id").setActionIds("_a1").get(); @@ -148,7 +146,6 @@ public void testAckAllActions() throws Exception { .get(); assertThat(putWatchResponse.isCreated(), is(true)); - assertThat(new WatcherStatsRequestBuilder(client()).get().getWatchesCount(), is(1L)); timeWarp().trigger("_id", 4, TimeValue.timeValueSeconds(5)); @@ -226,7 +223,6 @@ public void testAckWithRestart() throws Exception { ) .get(); assertThat(putWatchResponse.isCreated(), is(true)); - assertThat(new WatcherStatsRequestBuilder(client()).get().getWatchesCount(), is(1L)); timeWarp().trigger("_name", 4, TimeValue.timeValueSeconds(5)); restartWatcherRandomly(); From e9888de64dfa67a65844a15ffaa2cde059410d9d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 30 Jul 2024 14:33:08 +1000 Subject: [PATCH 19/22] Mute org.elasticsearch.xpack.snapshotbasedrecoveries.recovery.AzureSnapshotBasedRecoveryIT testRecoveryUsingSnapshots #111377 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index dfec80328588d..dd04662a56ed7 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -149,6 +149,9 @@ tests: - class: org.elasticsearch.xpack.restart.FullClusterRestartIT method: testSingleDoc {cluster=UPGRADED} issue: https://github.com/elastic/elasticsearch/issues/111434 +- class: org.elasticsearch.xpack.snapshotbasedrecoveries.recovery.AzureSnapshotBasedRecoveryIT + method: testRecoveryUsingSnapshots + issue: https://github.com/elastic/elasticsearch/issues/111377 # Examples: # From 68a18305ef987e2f4ab1fa96f5a7cb4f17624305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Tue, 30 Jul 2024 08:21:46 +0200 Subject: [PATCH 20/22] Add more logging to MetricsApmIT to identify why test fails (#111360) --- .../apm-integration/build.gradle | 1 + .../test/apmintegration/MetricsApmIT.java | 21 ++++++++----- .../TestApmIntegrationRestHandler.java | 2 +- .../test/apmintegration/TestMeterUsages.java | 31 ++++++++++++++++--- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/test/external-modules/apm-integration/build.gradle b/test/external-modules/apm-integration/build.gradle index 3ae62c72968b3..98090f33ee2c7 100644 --- a/test/external-modules/apm-integration/build.gradle +++ b/test/external-modules/apm-integration/build.gradle @@ -22,4 +22,5 @@ tasks.named('javaRestTest').configure { dependencies { clusterModules project(':modules:apm') + implementation project(':libs:elasticsearch-logging') } diff --git a/test/external-modules/apm-integration/src/javaRestTest/java/org/elasticsearch/test/apmintegration/MetricsApmIT.java b/test/external-modules/apm-integration/src/javaRestTest/java/org/elasticsearch/test/apmintegration/MetricsApmIT.java index 9980c0a25a5dd..e8d8e20727c0a 100644 --- a/test/external-modules/apm-integration/src/javaRestTest/java/org/elasticsearch/test/apmintegration/MetricsApmIT.java +++ b/test/external-modules/apm-integration/src/javaRestTest/java/org/elasticsearch/test/apmintegration/MetricsApmIT.java @@ -89,10 +89,17 @@ public void testApmIntegration() throws Exception { var metricset = (Map) apmMessage.get("metricset"); var samples = (Map) metricset.get("samples"); - samples.entrySet().forEach(sampleEntry -> { - var assertion = sampleAssertions.get(sampleEntry.getKey());// sample name - if (assertion != null && assertion.test((Map) sampleEntry.getValue())) {// sample object - sampleAssertions.remove(sampleEntry.getKey()); + samples.forEach((key, value) -> { + var assertion = sampleAssertions.get(key);// sample name + if (assertion != null) { + logger.info("Matched {}", key); + var sampleObject = (Map) value; + if (assertion.test(sampleObject)) {// sample object + logger.info("{} assertion PASSED", key); + sampleAssertions.remove(key); + } else { + logger.error("{} assertion FAILED: {}", key, sampleObject.get("value")); + } } }); } @@ -106,10 +113,8 @@ public void testApmIntegration() throws Exception { client().performRequest(new Request("GET", "/_use_apm_metrics")); - assertTrue( - "Timeout when waiting for assertions to complete. Remaining assertions to match: " + sampleAssertions, - finished.await(30, TimeUnit.SECONDS) - ); + var completed = finished.await(30, TimeUnit.SECONDS); + assertTrue("Timeout when waiting for assertions to complete. Remaining assertions to match: " + sampleAssertions, completed); } private Map.Entry>> assertion( diff --git a/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestApmIntegrationRestHandler.java b/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestApmIntegrationRestHandler.java index b4f6708cc71e8..e667fa5ddcbfb 100644 --- a/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestApmIntegrationRestHandler.java +++ b/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestApmIntegrationRestHandler.java @@ -22,7 +22,7 @@ public class TestApmIntegrationRestHandler extends BaseRestHandler { - private SetOnce testMeterUsages = new SetOnce<>(); + private final SetOnce testMeterUsages = new SetOnce<>(); TestApmIntegrationRestHandler() {} diff --git a/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestMeterUsages.java b/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestMeterUsages.java index 9c23ce371e044..07d43090f70ab 100644 --- a/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestMeterUsages.java +++ b/test/external-modules/apm-integration/src/main/java/org/elasticsearch/test/apmintegration/TestMeterUsages.java @@ -8,6 +8,8 @@ package org.elasticsearch.test.apmintegration; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.telemetry.metric.DoubleCounter; import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.DoubleWithAttributes; @@ -20,6 +22,8 @@ public class TestMeterUsages { + private static final Logger logger = LogManager.getLogger(TestMeterUsages.class); + private final DoubleCounter doubleCounter; private final DoubleCounter longCounter; private final DoubleHistogram doubleHistogram; @@ -32,14 +36,30 @@ public TestMeterUsages(MeterRegistry meterRegistry) { this.longCounter = meterRegistry.registerDoubleCounter("es.test.double_counter.total", "test", "unit"); this.doubleHistogram = meterRegistry.registerDoubleHistogram("es.test.double_histogram.histogram", "test", "unit"); this.longHistogram = meterRegistry.registerLongHistogram("es.test.long_histogram.histogram", "test", "unit"); - meterRegistry.registerDoubleGauge("es.test.double_gauge.current", "test", "unit", doubleWithAttributes::get); - meterRegistry.registerLongGauge("es.test.long_gauge.current", "test", "unit", longWithAttributes::get); - - meterRegistry.registerLongAsyncCounter("es.test.async_long_counter.total", "test", "unit", longWithAttributes::get); - meterRegistry.registerDoubleAsyncCounter("es.test.async_double_counter.total", "test", "unit", doubleWithAttributes::get); + meterRegistry.registerDoubleGauge("es.test.double_gauge.current", "test", "unit", () -> { + var value = doubleWithAttributes.get(); + logger.info("[es.test.double_gauge.current] callback with value [{}]", value); + return value; + }); + meterRegistry.registerLongGauge("es.test.long_gauge.current", "test", "unit", () -> { + var value = longWithAttributes.get(); + logger.info("[es.test.long_gauge.current] callback with value [{}]", value); + return value; + }); + meterRegistry.registerLongAsyncCounter("es.test.async_long_counter.total", "test", "unit", () -> { + var value = longWithAttributes.get(); + logger.info("[es.test.async_long_counter.total] callback with value [{}]", value); + return value; + }); + meterRegistry.registerDoubleAsyncCounter("es.test.async_double_counter.total", "test", "unit", () -> { + var value = doubleWithAttributes.get(); + logger.info("[es.test.async_double_counter.total] callback with value [{}]", value); + return value; + }); } public void testUponRequest() { + logger.info("setting counters"); doubleCounter.increment(); longCounter.increment(); doubleHistogram.record(1.0); @@ -48,6 +68,7 @@ public void testUponRequest() { longHistogram.record(2); // triggers gauges and async counters + logger.info("setting async counters"); doubleWithAttributes.set(new DoubleWithAttributes(1.0, Map.of())); longWithAttributes.set(new LongWithAttributes(1, Map.of())); } From b8af2a066ee67d3d8b4a252e88e0a54dca5f347d Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 30 Jul 2024 07:33:19 +0100 Subject: [PATCH 21/22] Remove usages of more test-only request builders (#111400) Deprecates for removal the following methods from `ClusterAdminClient`: - `prepareSearchShards` - `preparePutStoredScript` - `prepareDeleteStoredScript` - `prepareGetStoredScript` Also replaces all usages of these methods with more suitable test utilities. This will permit their removal, and the removal of the corresponding `RequestBuilder` objects, in a followup. Relates #107984 --- .../datastreams/DataStreamIT.java | 6 +- .../ingest/common/IngestRestartIT.java | 5 +- .../script/expression/StoredExpressionIT.java | 9 ++- .../script/mustache/SearchTemplateIT.java | 58 +++++++++------- .../script/mustache/SearchUsageStatsIT.java | 8 +-- .../SearchProgressActionListenerIT.java | 8 ++- .../action/termvectors/GetTermVectorsIT.java | 7 +- .../cluster/routing/ShardRoutingRoleIT.java | 10 +-- .../cluster/shards/ClusterSearchShardsIT.java | 41 ++++++++---- .../elasticsearch/script/StoredScriptsIT.java | 66 ++++++++++++++----- .../metrics/ScriptedMetricIT.java | 19 +++--- .../aggregations/pipeline/BucketScriptIT.java | 17 ++--- .../SnapshotCustomPluginStateIT.java | 26 ++++---- .../client/internal/ClusterAdminClient.java | 5 +- .../AbstractClientHeadersTestCase.java | 10 +-- .../StoredScriptIntegTestUtils.java | 33 ++++++++++ .../elasticsearch/test/ESIntegTestCase.java | 12 ++++ .../test/ESSingleNodeTestCase.java | 12 ++++ .../org/elasticsearch/test/ESTestCase.java | 13 ++++ .../downsample/ILMDownsampleDisruptionIT.java | 13 ++-- .../integration/DlsFlsRequestCacheTests.java | 6 +- .../test/integration/BasicWatcherTests.java | 28 ++++---- .../transform/TransformIntegrationTests.java | 8 +-- 23 files changed, 279 insertions(+), 141 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java index f79eea8676b3e..637c48813de35 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java @@ -14,6 +14,8 @@ import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.RequestBuilder; +import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; +import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; import org.elasticsearch.action.admin.indices.alias.Alias; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; @@ -583,7 +585,7 @@ public void testResolvabilityOfDataStreamsInAPIs() throws Exception { verifyResolvability(dataStreamName, indicesAdmin().prepareOpen(dataStreamName), false); verifyResolvability(dataStreamName, indicesAdmin().prepareClose(dataStreamName), true); verifyResolvability(aliasToDataStream, indicesAdmin().prepareClose(aliasToDataStream), true); - verifyResolvability(dataStreamName, clusterAdmin().prepareSearchShards(dataStreamName), false); + verifyResolvability(client().execute(TransportClusterSearchShardsAction.TYPE, new ClusterSearchShardsRequest(dataStreamName))); verifyResolvability(client().execute(TransportIndicesShardStoresAction.TYPE, new IndicesShardStoresRequest(dataStreamName))); request = new CreateDataStreamAction.Request("logs-barbaz"); @@ -627,7 +629,7 @@ public void testResolvabilityOfDataStreamsInAPIs() throws Exception { verifyResolvability(wildcardExpression, indicesAdmin().prepareGetIndex().addIndices(wildcardExpression), false); verifyResolvability(wildcardExpression, indicesAdmin().prepareOpen(wildcardExpression), false); verifyResolvability(wildcardExpression, indicesAdmin().prepareClose(wildcardExpression), false); - verifyResolvability(wildcardExpression, clusterAdmin().prepareSearchShards(wildcardExpression), false); + verifyResolvability(client().execute(TransportClusterSearchShardsAction.TYPE, new ClusterSearchShardsRequest(wildcardExpression))); verifyResolvability(client().execute(TransportIndicesShardStoresAction.TYPE, new IndicesShardStoresRequest(wildcardExpression))); } diff --git a/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/ingest/common/IngestRestartIT.java b/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/ingest/common/IngestRestartIT.java index 9f9d53d4d5081..f1c592e6e8345 100644 --- a/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/ingest/common/IngestRestartIT.java +++ b/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/ingest/common/IngestRestartIT.java @@ -49,6 +49,7 @@ import java.util.stream.IntStream; import static org.elasticsearch.action.admin.cluster.node.stats.NodesStatsRequestParameters.Metric.INGEST; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.test.NodeRoles.onlyRole; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -193,9 +194,9 @@ public Settings onNodeStopped(String nodeName) { public void testPipelineWithScriptProcessorThatHasStoredScript() throws Exception { internalCluster().startNode(); - clusterAdmin().preparePutStoredScript().setId("1").setContent(new BytesArray(Strings.format(""" + putJsonStoredScript("1", Strings.format(""" {"script": {"lang": "%s", "source": "my_script"} } - """, MockScriptEngine.NAME)), XContentType.JSON).get(); + """, MockScriptEngine.NAME)); BytesReference pipeline = new BytesArray(""" { "processors" : [ diff --git a/modules/lang-expression/src/internalClusterTest/java/org/elasticsearch/script/expression/StoredExpressionIT.java b/modules/lang-expression/src/internalClusterTest/java/org/elasticsearch/script/expression/StoredExpressionIT.java index 121a6b01ea792..c41fea6b86ceb 100644 --- a/modules/lang-expression/src/internalClusterTest/java/org/elasticsearch/script/expression/StoredExpressionIT.java +++ b/modules/lang-expression/src/internalClusterTest/java/org/elasticsearch/script/expression/StoredExpressionIT.java @@ -8,7 +8,6 @@ package org.elasticsearch.script.expression; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.script.Script; @@ -18,10 +17,10 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xcontent.XContentType; -import java.io.IOException; import java.util.Collection; import java.util.Collections; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.hamcrest.Matchers.containsString; //TODO: please convert to unit tests! @@ -38,9 +37,9 @@ protected Collection> nodePlugins() { return Collections.singleton(ExpressionPlugin.class); } - public void testAllOpsDisabledIndexedScripts() throws IOException { - clusterAdmin().preparePutStoredScript().setId("script1").setContent(new BytesArray(""" - {"script": {"lang": "expression", "source": "2"} }"""), XContentType.JSON).get(); + public void testAllOpsDisabledIndexedScripts() { + putJsonStoredScript("script1", """ + {"script": {"lang": "expression", "source": "2"} }"""); prepareIndex("test").setId("1").setSource("{\"theField\":\"foo\"}", XContentType.JSON).get(); try { client().prepareUpdate("test", "1").setScript(new Script(ScriptType.STORED, null, "script1", Collections.emptyMap())).get(); diff --git a/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java b/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java index e17fb4b26cd28..46b75d89c5b0e 100644 --- a/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java +++ b/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java @@ -8,7 +8,13 @@ package org.elasticsearch.script.mustache; import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest; +import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; +import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptResponse; +import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest; +import org.elasticsearch.action.admin.cluster.storedscripts.TransportDeleteStoredScriptAction; +import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.common.bytes.BytesArray; @@ -155,8 +161,8 @@ public void testTemplateQueryAsEscapedStringWithConditionalClauseAtEnd() throws ); } - public void testIndexedTemplateClient() throws Exception { - assertAcked(clusterAdmin().preparePutStoredScript().setId("testTemplate").setContent(new BytesArray(""" + public void testIndexedTemplateClient() { + putJsonStoredScript("testTemplate", """ { "script": { "lang": "mustache", @@ -168,9 +174,9 @@ public void testIndexedTemplateClient() throws Exception { } } } - }"""), XContentType.JSON)); + }"""); - GetStoredScriptResponse getResponse = clusterAdmin().prepareGetStoredScript("testTemplate").get(); + GetStoredScriptResponse getResponse = safeExecute(GetStoredScriptAction.INSTANCE, new GetStoredScriptRequest("testTemplate")); assertNotNull(getResponse.getSource()); BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); @@ -193,9 +199,9 @@ public void testIndexedTemplateClient() throws Exception { 4 ); - assertAcked(clusterAdmin().prepareDeleteStoredScript("testTemplate")); + assertAcked(safeExecute(TransportDeleteStoredScriptAction.TYPE, new DeleteStoredScriptRequest("testTemplate"))); - getResponse = clusterAdmin().prepareGetStoredScript("testTemplate").get(); + getResponse = safeExecute(GetStoredScriptAction.INSTANCE, new GetStoredScriptRequest("testTemplate")); assertNull(getResponse.getSource()); } @@ -250,7 +256,7 @@ public void testBadTemplate() { } } - public void testIndexedTemplate() throws Exception { + public void testIndexedTemplate() { String script = """ { @@ -267,9 +273,9 @@ public void testIndexedTemplate() throws Exception { } """; - assertAcked(clusterAdmin().preparePutStoredScript().setId("1a").setContent(new BytesArray(script), XContentType.JSON)); - assertAcked(clusterAdmin().preparePutStoredScript().setId("2").setContent(new BytesArray(script), XContentType.JSON)); - assertAcked(clusterAdmin().preparePutStoredScript().setId("3").setContent(new BytesArray(script), XContentType.JSON)); + putJsonStoredScript("1a", script); + putJsonStoredScript("2", script); + putJsonStoredScript("3", script); BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); bulkRequestBuilder.add(prepareIndex("test").setId("1").setSource("{\"theField\":\"foo\"}", XContentType.JSON)); @@ -335,13 +341,9 @@ public void testIndexedTemplateOverwrite() throws Exception { } }"""; for (int i = 1; i < iterations; i++) { - assertAcked( - clusterAdmin().preparePutStoredScript() - .setId("git01") - .setContent(new BytesArray(query.replace("{{slop}}", Integer.toString(-1))), XContentType.JSON) - ); + putJsonStoredScript("git01", query.replace("{{slop}}", Integer.toString(-1))); - GetStoredScriptResponse getResponse = clusterAdmin().prepareGetStoredScript("git01").get(); + GetStoredScriptResponse getResponse = safeExecute(GetStoredScriptAction.INSTANCE, new GetStoredScriptRequest("git01")); assertNotNull(getResponse.getSource()); Map templateParams = new HashMap<>(); @@ -357,11 +359,8 @@ public void testIndexedTemplateOverwrite() throws Exception { ); assertThat(e.getMessage(), containsString("No negative slop allowed")); - assertAcked( - clusterAdmin().preparePutStoredScript() - .setId("git01") - .setContent(new BytesArray(query.replace("{{slop}}", Integer.toString(0))), XContentType.JSON) - ); + putJsonStoredScript("git01", query.replace("{{slop}}", Integer.toString(0))); + assertHitCount( new SearchTemplateRequestBuilder(client()).setRequest(new SearchRequest("testindex")) .setScript("git01") @@ -373,8 +372,8 @@ public void testIndexedTemplateOverwrite() throws Exception { } } - public void testIndexedTemplateWithArray() throws Exception { - String multiQuery = """ + public void testIndexedTemplateWithArray() { + putJsonStoredScript("4", """ { "script": { "lang": "mustache", @@ -390,8 +389,8 @@ public void testIndexedTemplateWithArray() throws Exception { } } } - }"""; - assertAcked(clusterAdmin().preparePutStoredScript().setId("4").setContent(new BytesArray(multiQuery), XContentType.JSON)); + }"""); + BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); bulkRequestBuilder.add(prepareIndex("test").setId("1").setSource("{\"theField\":\"foo\"}", XContentType.JSON)); bulkRequestBuilder.add(prepareIndex("test").setId("2").setSource("{\"theField\":\"foo 2\"}", XContentType.JSON)); @@ -454,4 +453,13 @@ public void testCCSCheckCompatibility() throws Exception { public static void assertHitCount(SearchTemplateRequestBuilder requestBuilder, long expectedHitCount) { assertResponse(requestBuilder, response -> ElasticsearchAssertions.assertHitCount(response.getResponse(), expectedHitCount)); } + + private void putJsonStoredScript(String id, String jsonContent) { + assertAcked( + safeExecute( + TransportPutStoredScriptAction.TYPE, + new PutStoredScriptRequest().id(id).content(new BytesArray(jsonContent), XContentType.JSON) + ) + ); + } } diff --git a/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchUsageStatsIT.java b/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchUsageStatsIT.java index b14ca7ea7cfa2..e51a4822c67ba 100644 --- a/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchUsageStatsIT.java +++ b/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchUsageStatsIT.java @@ -10,16 +10,14 @@ import org.elasticsearch.action.admin.cluster.stats.SearchUsageStats; import org.elasticsearch.client.Request; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.Collection; import java.util.List; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 1) public class SearchUsageStatsIT extends ESIntegTestCase { @@ -62,7 +60,7 @@ public void testSearchUsageStats() throws IOException { getRestClient().performRequest(request); } { - assertAcked(clusterAdmin().preparePutStoredScript().setId("testTemplate").setContent(new BytesArray(""" + putJsonStoredScript("testTemplate", """ { "script": { "lang": "mustache", @@ -74,7 +72,7 @@ public void testSearchUsageStats() throws IOException { } } } - }"""), XContentType.JSON)); + }"""); Request request = new Request("GET", "/_search/template"); request.setJsonEntity(""" { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java index e5dca62a97494..872d0cad0b2cb 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java @@ -9,7 +9,9 @@ package org.elasticsearch.action.search; import org.apache.lucene.search.TotalHits; +import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse; +import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.elasticsearch.client.internal.Client; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.core.Strings; @@ -197,7 +199,11 @@ private static List createRandomIndices(Client client) { client.prepareIndex(indexName).setSource("number", i, "foo", "bar").get(); } client.admin().indices().prepareRefresh("index-*").get(); - ClusterSearchShardsResponse resp = client.admin().cluster().prepareSearchShards("index-*").get(); + ClusterSearchShardsResponse resp = safeExecute( + client, + TransportClusterSearchShardsAction.TYPE, + new ClusterSearchShardsRequest("index-*") + ); return Arrays.stream(resp.getGroups()).map(e -> new SearchShard(null, e.getShardId())).sorted().toList(); } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java index cf8decc5655ec..75ced16124d8b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java @@ -16,7 +16,9 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.ActionFuture; +import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse; +import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.elasticsearch.action.admin.indices.alias.Alias; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.common.Strings; @@ -1013,7 +1015,10 @@ public void testArtificialDocWithPreference() throws InterruptedException, IOExc indexRandom(true, prepareIndex("test").setId("1").setSource("field1", "random permutation")); // Get search shards - ClusterSearchShardsResponse searchShardsResponse = clusterAdmin().prepareSearchShards("test").get(); + ClusterSearchShardsResponse searchShardsResponse = safeExecute( + TransportClusterSearchShardsAction.TYPE, + new ClusterSearchShardsRequest("test") + ); List shardIds = Arrays.stream(searchShardsResponse.getGroups()).map(s -> s.getShardId().id()).toList(); // request termvectors of artificial document from each shard diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/ShardRoutingRoleIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/ShardRoutingRoleIT.java index 8b551e00caeeb..6b33f017c9e10 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/ShardRoutingRoleIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/ShardRoutingRoleIT.java @@ -13,6 +13,8 @@ import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteUtils; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsGroup; +import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; +import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.elasticsearch.action.admin.indices.refresh.TransportUnpromotableShardRefreshAction; import org.elasticsearch.action.search.ClosePointInTimeRequest; import org.elasticsearch.action.search.OpenPointInTimeRequest; @@ -548,15 +550,15 @@ public void testSearchRouting() throws Exception { } // search-shards API for (int i = 0; i < 10; i++) { - final var search = clusterAdmin().prepareSearchShards(INDEX_NAME); + final var search = new ClusterSearchShardsRequest(INDEX_NAME); switch (randomIntBetween(0, 2)) { - case 0 -> search.setRouting(randomAlphaOfLength(10)); - case 1 -> search.setRouting(randomSearchPreference(routingTableWatcher.numShards, internalCluster().getNodeNames())); + case 0 -> search.routing(randomAlphaOfLength(10)); + case 1 -> search.routing(randomSearchPreference(routingTableWatcher.numShards, internalCluster().getNodeNames())); default -> { // do nothing } } - ClusterSearchShardsGroup[] groups = search.get().getGroups(); + ClusterSearchShardsGroup[] groups = safeExecute(client(), TransportClusterSearchShardsAction.TYPE, search).getGroups(); for (ClusterSearchShardsGroup group : groups) { for (ShardRouting shr : group.getShards()) { String profileKey = "[" + shr.currentNodeId() + "][" + INDEX_NAME + "][" + shr.id() + "]"; diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java index d52498043366a..82e4ccf75a182 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/shards/ClusterSearchShardsIT.java @@ -7,9 +7,13 @@ */ package org.elasticsearch.cluster.shards; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsGroup; +import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse; +import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions; +import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.common.Priority; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; @@ -38,10 +42,10 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { }; } - public void testSingleShardAllocation() throws Exception { + public void testSingleShardAllocation() { indicesAdmin().prepareCreate("test").setSettings(indexSettings(1, 0).put("index.routing.allocation.include.tag", "A")).get(); ensureGreen(); - ClusterSearchShardsResponse response = clusterAdmin().prepareSearchShards("test").get(); + ClusterSearchShardsResponse response = safeExecute(new ClusterSearchShardsRequest("test")); assertThat(response.getGroups().length, equalTo(1)); assertThat(response.getGroups()[0].getShardId().getIndexName(), equalTo("test")); assertThat(response.getGroups()[0].getShardId().getId(), equalTo(0)); @@ -49,7 +53,7 @@ public void testSingleShardAllocation() throws Exception { assertThat(response.getNodes().length, equalTo(1)); assertThat(response.getGroups()[0].getShards()[0].currentNodeId(), equalTo(response.getNodes()[0].getId())); - response = clusterAdmin().prepareSearchShards("test").setRouting("A").get(); + response = safeExecute(new ClusterSearchShardsRequest("test").routing("A")); assertThat(response.getGroups().length, equalTo(1)); assertThat(response.getGroups()[0].getShardId().getIndexName(), equalTo("test")); assertThat(response.getGroups()[0].getShardId().getId(), equalTo(0)); @@ -59,25 +63,25 @@ public void testSingleShardAllocation() throws Exception { } - public void testMultipleShardsSingleNodeAllocation() throws Exception { + public void testMultipleShardsSingleNodeAllocation() { indicesAdmin().prepareCreate("test").setSettings(indexSettings(4, 0).put("index.routing.allocation.include.tag", "A")).get(); ensureGreen(); - ClusterSearchShardsResponse response = clusterAdmin().prepareSearchShards("test").get(); + ClusterSearchShardsResponse response = safeExecute(new ClusterSearchShardsRequest("test")); assertThat(response.getGroups().length, equalTo(4)); assertThat(response.getGroups()[0].getShardId().getIndexName(), equalTo("test")); assertThat(response.getNodes().length, equalTo(1)); assertThat(response.getGroups()[0].getShards()[0].currentNodeId(), equalTo(response.getNodes()[0].getId())); - response = clusterAdmin().prepareSearchShards("test").setRouting("ABC").get(); + response = safeExecute(new ClusterSearchShardsRequest("test").routing("ABC")); assertThat(response.getGroups().length, equalTo(1)); - response = clusterAdmin().prepareSearchShards("test").setPreference("_shards:2").get(); + response = safeExecute(new ClusterSearchShardsRequest("test").preference("_shards:2")); assertThat(response.getGroups().length, equalTo(1)); assertThat(response.getGroups()[0].getShardId().getId(), equalTo(2)); } - public void testMultipleIndicesAllocation() throws Exception { + public void testMultipleIndicesAllocation() { createIndex("test1", 4, 1); createIndex("test2", 4, 1); indicesAdmin().prepareAliases() @@ -86,7 +90,7 @@ public void testMultipleIndicesAllocation() throws Exception { .get(); clusterAdmin().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().get(); - ClusterSearchShardsResponse response = clusterAdmin().prepareSearchShards("routing_alias").get(); + ClusterSearchShardsResponse response = safeExecute(new ClusterSearchShardsRequest("routing_alias")); assertThat(response.getGroups().length, equalTo(2)); assertThat(response.getGroups()[0].getShards().length, equalTo(2)); assertThat(response.getGroups()[1].getShards().length, equalTo(2)); @@ -128,7 +132,7 @@ public void testClusterSearchShardsWithBlocks() { )) { try { enableIndexBlock("test-blocks", blockSetting); - ClusterSearchShardsResponse response = clusterAdmin().prepareSearchShards("test-blocks").get(); + ClusterSearchShardsResponse response = safeExecute(new ClusterSearchShardsRequest("test-blocks")); assertThat(response.getGroups().length, equalTo(numShards.numPrimaries)); } finally { disableIndexBlock("test-blocks", blockSetting); @@ -138,9 +142,24 @@ public void testClusterSearchShardsWithBlocks() { // Request is blocked try { enableIndexBlock("test-blocks", SETTING_BLOCKS_METADATA); - assertBlocked(clusterAdmin().prepareSearchShards("test-blocks")); + assertBlocked( + null, + asInstanceOf( + ClusterBlockException.class, + ExceptionsHelper.unwrapCause( + safeAwaitFailure( + ClusterSearchShardsResponse.class, + l -> client().execute(TransportClusterSearchShardsAction.TYPE, new ClusterSearchShardsRequest("test-blocks"), l) + ) + ) + ) + ); } finally { disableIndexBlock("test-blocks", SETTING_BLOCKS_METADATA); } } + + private static ClusterSearchShardsResponse safeExecute(ClusterSearchShardsRequest request) { + return safeExecute(TransportClusterSearchShardsAction.TYPE, request); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java index 619e7c9d9edec..abb58474d84c5 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java @@ -7,6 +7,14 @@ */ package org.elasticsearch.script; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest; +import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; +import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest; +import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest; +import org.elasticsearch.action.admin.cluster.storedscripts.TransportDeleteStoredScriptAction; +import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; @@ -20,6 +28,7 @@ import java.util.Map; import java.util.function.Function; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; public class StoredScriptsIT extends ESIntegTestCase { @@ -41,34 +50,57 @@ protected Collection> nodePlugins() { } public void testBasics() { - assertAcked(clusterAdmin().preparePutStoredScript().setId("foobar").setContent(new BytesArray(Strings.format(""" + putJsonStoredScript("foobar", Strings.format(""" {"script": {"lang": "%s", "source": "1"} } - """, LANG)), XContentType.JSON)); - String script = clusterAdmin().prepareGetStoredScript("foobar").get().getSource().getSource(); + """, LANG)); + String script = safeExecute(GetStoredScriptAction.INSTANCE, new GetStoredScriptRequest("foobar")).getSource().getSource(); assertNotNull(script); assertEquals("1", script); - assertAcked(clusterAdmin().prepareDeleteStoredScript("foobar")); - StoredScriptSource source = clusterAdmin().prepareGetStoredScript("foobar").get().getSource(); + assertAcked(safeExecute(TransportDeleteStoredScriptAction.TYPE, new DeleteStoredScriptRequest("foobar"))); + StoredScriptSource source = safeExecute(GetStoredScriptAction.INSTANCE, new GetStoredScriptRequest("foobar")).getSource(); assertNull(source); - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - clusterAdmin().preparePutStoredScript().setId("id#").setContent(new BytesArray(Strings.format(""" - {"script": {"lang": "%s", "source": "1"} } - """, LANG)), XContentType.JSON) + assertEquals( + "Validation Failed: 1: id cannot contain '#' for stored script;", + asInstanceOf( + IllegalArgumentException.class, + ExceptionsHelper.unwrapCause( + safeAwaitFailure( + AcknowledgedResponse.class, + l -> client().execute( + TransportPutStoredScriptAction.TYPE, + new PutStoredScriptRequest().id("id#").content(new BytesArray(Strings.format(""" + {"script": {"lang": "%s", "source": "1"} } + """, LANG)), XContentType.JSON), + l + ) + ) + ) + ).getMessage() ); - assertEquals("Validation Failed: 1: id cannot contain '#' for stored script;", e.getMessage()); } public void testMaxScriptSize() { - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - clusterAdmin().preparePutStoredScript().setId("foobar").setContent(new BytesArray(Strings.format(""" - {"script": { "lang": "%s", "source":"0123456789abcdef"} }\ - """, LANG)), XContentType.JSON) + assertEquals( + "exceeded max allowed stored script size in bytes [64] with size [65] for script [foobar]", + asInstanceOf( + IllegalArgumentException.class, + ExceptionsHelper.unwrapCause( + safeAwaitFailure( + AcknowledgedResponse.class, + l -> client().execute( + TransportPutStoredScriptAction.TYPE, + new PutStoredScriptRequest().id("foobar").content(new BytesArray(Strings.format(""" + {"script": { "lang": "%s", "source":"0123456789abcdef"} }\ + """, LANG)), XContentType.JSON), + l + ) + + ) + ) + ).getMessage() ); - assertEquals("exceeded max allowed stored script size in bytes [64] with size [65] for script [foobar]", e.getMessage()); } public static class CustomScriptPlugin extends MockScriptPlugin { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ScriptedMetricIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ScriptedMetricIT.java index eeee745b32f92..5fcc2cf858ab2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ScriptedMetricIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ScriptedMetricIT.java @@ -10,7 +10,6 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Strings; @@ -27,7 +26,6 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; -import org.elasticsearch.xcontent.XContentType; import org.junit.Before; import java.io.IOException; @@ -42,6 +40,7 @@ import java.util.function.Consumer; import java.util.function.Function; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.search.aggregations.AggregationBuilders.global; import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram; @@ -300,21 +299,21 @@ public void setupSuiteScopeCluster() throws Exception { // When using the MockScriptPlugin we can map Stored scripts to inline scripts: // the id of the stored script is used in test method while the source of the stored script // must match a predefined script from CustomScriptPlugin.pluginScripts() method - assertAcked(clusterAdmin().preparePutStoredScript().setId("initScript_stored").setContent(new BytesArray(Strings.format(""" + putJsonStoredScript("initScript_stored", Strings.format(""" {"script": {"lang": "%s", "source": "vars.multiplier = 3"} } - """, MockScriptPlugin.NAME)), XContentType.JSON)); + """, MockScriptPlugin.NAME)); - assertAcked(clusterAdmin().preparePutStoredScript().setId("mapScript_stored").setContent(new BytesArray(Strings.format(""" + putJsonStoredScript("mapScript_stored", Strings.format(""" {"script": {"lang": "%s", "source": "state.list.add(vars.multiplier)"} } - """, MockScriptPlugin.NAME)), XContentType.JSON)); + """, MockScriptPlugin.NAME)); - assertAcked(clusterAdmin().preparePutStoredScript().setId("combineScript_stored").setContent(new BytesArray(Strings.format(""" + putJsonStoredScript("combineScript_stored", Strings.format(""" {"script": {"lang": "%s", "source": "sum state values as a new aggregation"} } - """, MockScriptPlugin.NAME)), XContentType.JSON)); + """, MockScriptPlugin.NAME)); - assertAcked(clusterAdmin().preparePutStoredScript().setId("reduceScript_stored").setContent(new BytesArray(Strings.format(""" + putJsonStoredScript("reduceScript_stored", Strings.format(""" {"script": {"lang": "%s", "source": "sum all states (lists) values as a new aggregation"} } - """, MockScriptPlugin.NAME)), XContentType.JSON)); + """, MockScriptPlugin.NAME)); indexRandom(true, builders); ensureSearchable(); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/pipeline/BucketScriptIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/pipeline/BucketScriptIT.java index dc612d6bad5ce..bc8142a629f7c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/pipeline/BucketScriptIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/pipeline/BucketScriptIT.java @@ -11,7 +11,6 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.script.MockScriptPlugin; import org.elasticsearch.script.Script; @@ -24,7 +23,6 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.ArrayList; @@ -35,12 +33,12 @@ import java.util.Map; import java.util.function.Function; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.search.aggregations.AggregationBuilders.dateRange; import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram; import static org.elasticsearch.search.aggregations.AggregationBuilders.percentiles; import static org.elasticsearch.search.aggregations.AggregationBuilders.sum; import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.equalTo; @@ -514,14 +512,11 @@ public void testInlineScriptReturnNull() { } public void testStoredScript() { - assertAcked( - clusterAdmin().preparePutStoredScript() - .setId("my_script") - // Script source is not interpreted but it references a pre-defined script from CustomScriptPlugin - .setContent( - new BytesArray("{ \"script\": {\"lang\": \"" + CustomScriptPlugin.NAME + "\", \"source\": \"my_script\" } }"), - XContentType.JSON - ) + + putJsonStoredScript( + "my_script", + // Script source is not interpreted but it references a pre-defined script from CustomScriptPlugin + "{ \"script\": {\"lang\": \"" + CustomScriptPlugin.NAME + "\", \"source\": \"my_script\" } }" ); assertNoFailuresAndResponse( diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotCustomPluginStateIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotCustomPluginStateIT.java index b0c5e73de5859..77fb19a241105 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotCustomPluginStateIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotCustomPluginStateIT.java @@ -12,11 +12,14 @@ import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse; +import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest; +import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; +import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptResponse; +import org.elasticsearch.action.admin.cluster.storedscripts.TransportDeleteStoredScriptAction; import org.elasticsearch.action.admin.indices.template.get.GetIndexTemplatesResponse; import org.elasticsearch.action.ingest.DeletePipelineRequest; import org.elasticsearch.action.ingest.GetPipelineResponse; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.ingest.IngestTestPlugin; import org.elasticsearch.plugins.Plugin; @@ -29,6 +32,7 @@ import java.util.Collection; import java.util.Collections; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertIndexTemplateExists; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertIndexTemplateMissing; @@ -96,14 +100,7 @@ public void testIncludeGlobalState() throws Exception { if (testScript) { logger.info("--> creating test script"); - assertAcked( - clusterAdmin().preparePutStoredScript() - .setId("foobar") - .setContent( - new BytesArray("{\"script\": { \"lang\": \"" + MockScriptEngine.NAME + "\", \"source\": \"1\"} }"), - XContentType.JSON - ) - ); + putJsonStoredScript("foobar", "{\"script\": { \"lang\": \"" + MockScriptEngine.NAME + "\", \"source\": \"1\"} }"); } logger.info("--> snapshot without global state"); @@ -152,7 +149,7 @@ public void testIncludeGlobalState() throws Exception { if (testScript) { logger.info("--> delete test script"); - assertAcked(clusterAdmin().prepareDeleteStoredScript("foobar").get()); + assertAcked(safeExecute(TransportDeleteStoredScriptAction.TYPE, new DeleteStoredScriptRequest("foobar"))); } logger.info("--> try restoring from snapshot without global state"); @@ -188,7 +185,10 @@ public void testIncludeGlobalState() throws Exception { if (testScript) { logger.info("--> check that script is restored"); - GetStoredScriptResponse getStoredScriptResponse = clusterAdmin().prepareGetStoredScript("foobar").get(); + GetStoredScriptResponse getStoredScriptResponse = safeExecute( + GetStoredScriptAction.INSTANCE, + new GetStoredScriptRequest("foobar") + ); assertNotNull(getStoredScriptResponse.getSource()); } @@ -217,7 +217,7 @@ public void testIncludeGlobalState() throws Exception { } if (testScript) { - assertAcked(clusterAdmin().prepareDeleteStoredScript("foobar").get()); + assertAcked(safeExecute(TransportDeleteStoredScriptAction.TYPE, new DeleteStoredScriptRequest("foobar"))); } getIndexTemplatesResponse = indicesAdmin().prepareGetTemplates().get(); @@ -236,7 +236,7 @@ public void testIncludeGlobalState() throws Exception { getIndexTemplatesResponse = indicesAdmin().prepareGetTemplates().get(); assertIndexTemplateMissing(getIndexTemplatesResponse, "test-template"); assertFalse(clusterAdmin().prepareGetPipeline("barbaz").get().isFound()); - assertNull(clusterAdmin().prepareGetStoredScript("foobar").get().getSource()); + assertNull(safeExecute(GetStoredScriptAction.INSTANCE, new GetStoredScriptRequest("foobar")).getSource()); assertDocCount("test-idx", 100L); } } diff --git a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java index f14a2f6fb5247..20351c8e28909 100644 --- a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java +++ b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java @@ -290,6 +290,7 @@ public void searchShards(final ClusterSearchShardsRequest request, final ActionL execute(TransportClusterSearchShardsAction.TYPE, request, listener); } + @Deprecated(forRemoval = true) // temporary compatibility shim public ClusterSearchShardsRequestBuilder prepareSearchShards(String... indices) { return new ClusterSearchShardsRequestBuilder(this).setIndices(indices); } @@ -476,6 +477,7 @@ public SimulatePipelineRequestBuilder prepareSimulatePipeline(BytesReference sou return new SimulatePipelineRequestBuilder(this, source, xContentType); } + @Deprecated(forRemoval = true) // temporary compatibility shim public PutStoredScriptRequestBuilder preparePutStoredScript() { return new PutStoredScriptRequestBuilder(this); } @@ -484,15 +486,16 @@ public void deleteStoredScript(DeleteStoredScriptRequest request, ActionListener execute(TransportDeleteStoredScriptAction.TYPE, request, listener); } + @Deprecated(forRemoval = true) // temporary compatibility shim public DeleteStoredScriptRequestBuilder prepareDeleteStoredScript(String id) { return new DeleteStoredScriptRequestBuilder(client).setId(id); } public void putStoredScript(final PutStoredScriptRequest request, ActionListener listener) { execute(TransportPutStoredScriptAction.TYPE, request, listener); - } + @Deprecated(forRemoval = true) // temporary compatibility shim public GetStoredScriptRequestBuilder prepareGetStoredScript(String id) { return new GetStoredScriptRequestBuilder(this).setId(id); } diff --git a/server/src/test/java/org/elasticsearch/client/internal/AbstractClientHeadersTestCase.java b/server/src/test/java/org/elasticsearch/client/internal/AbstractClientHeadersTestCase.java index 39e424adecfce..3052338f2549f 100644 --- a/server/src/test/java/org/elasticsearch/client/internal/AbstractClientHeadersTestCase.java +++ b/server/src/test/java/org/elasticsearch/client/internal/AbstractClientHeadersTestCase.java @@ -15,6 +15,7 @@ import org.elasticsearch.action.admin.cluster.reroute.TransportClusterRerouteAction; import org.elasticsearch.action.admin.cluster.snapshots.create.TransportCreateSnapshotAction; import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction; +import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.TransportDeleteStoredScriptAction; import org.elasticsearch.action.admin.indices.cache.clear.TransportClearIndicesCacheAction; import org.elasticsearch.action.admin.indices.create.TransportCreateIndexAction; @@ -101,10 +102,11 @@ public void testActions() { client.prepareGet("idx", "id").execute(new AssertingActionListener<>(TransportGetAction.TYPE.name(), client.threadPool())); client.prepareSearch().execute(new AssertingActionListener<>(TransportSearchAction.TYPE.name(), client.threadPool())); client.prepareDelete("idx", "id").execute(new AssertingActionListener<>(TransportDeleteAction.NAME, client.threadPool())); - client.admin() - .cluster() - .prepareDeleteStoredScript("id") - .execute(new AssertingActionListener<>(TransportDeleteStoredScriptAction.TYPE.name(), client.threadPool())); + client.execute( + TransportDeleteStoredScriptAction.TYPE, + new DeleteStoredScriptRequest("id"), + new AssertingActionListener<>(TransportDeleteStoredScriptAction.TYPE.name(), client.threadPool()) + ); client.prepareIndex("idx") .setId("id") .setSource("source", XContentType.JSON) diff --git a/test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java b/test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java new file mode 100644 index 0000000000000..e140c2bfbf986 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.storedscripts; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xcontent.XContentType; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +public class StoredScriptIntegTestUtils { + private StoredScriptIntegTestUtils() {/* no instances */} + + public static void putJsonStoredScript(String id, String jsonContent) { + putJsonStoredScript(id, new BytesArray(jsonContent)); + } + + public static void putJsonStoredScript(String id, BytesReference jsonContent) { + assertAcked( + ESIntegTestCase.safeExecute( + TransportPutStoredScriptAction.TYPE, + new PutStoredScriptRequest().id(id).content(jsonContent, XContentType.JSON) + ) + ); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 33d36ed5e2cdb..f1b012d757926 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -23,6 +23,9 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; @@ -667,6 +670,15 @@ public static Client dataNodeClient() { return client; } + /** + * Execute the given {@link ActionRequest} using the given {@link ActionType} and a default node client, wait for it to complete with + * a timeout of {@link #SAFE_AWAIT_TIMEOUT}, and then return the result. An exceptional response, timeout or interrupt triggers a test + * failure. + */ + public static T safeExecute(ActionType action, ActionRequest request) { + return safeExecute(client(), action, request); + } + public static Iterable clients() { return cluster().getClients(); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java index a538c39704a73..2fc4d63d0120a 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java @@ -9,6 +9,9 @@ import com.carrotsearch.randomizedtesting.RandomizedContext; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; @@ -292,6 +295,15 @@ public Client client() { return wrapClient(NODE.client()); } + /** + * Execute the given {@link ActionRequest} using the given {@link ActionType} and the default node client, wait for it to complete with + * a timeout of {@link #SAFE_AWAIT_TIMEOUT}, and then return the result. An exceptional response, timeout or interrupt triggers a test + * failure. + */ + public T safeExecute(ActionType action, ActionRequest request) { + return safeExecute(client(), action, request); + } + /** * Returns an admin client. */ diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index b5c03d118a43b..ac9ff92329b39 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -42,11 +42,15 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; import org.elasticsearch.action.RequestBuilder; import org.elasticsearch.action.support.ActionTestUtils; import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.TestPlainActionFuture; import org.elasticsearch.bootstrap.BootstrapForTesting; +import org.elasticsearch.client.internal.ElasticsearchClient; import org.elasticsearch.client.internal.Requests; import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -2319,6 +2323,15 @@ public static T safeAwait(CheckedConsumer, ?> consumer) { return safeAwait(SubscribableListener.newForked(consumer)); } + /** + * Execute the given {@link ActionRequest} using the given {@link ActionType} and the given {@link ElasticsearchClient}, wait for + * it to complete with a timeout of {@link #SAFE_AWAIT_TIMEOUT}, and then return the result. An exceptional response, timeout or + * interrupt triggers a test failure. + */ + public static T safeExecute(ElasticsearchClient client, ActionType action, ActionRequest request) { + return safeAwait(l -> client.execute(action, request, l)); + } + /** * Wait for the successful completion of the given {@link Future}, with a timeout of {@link #SAFE_AWAIT_TIMEOUT}, preserving the * thread's interrupt status flag and converting all exceptions into an {@link AssertionError} to trigger a test failure. diff --git a/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/ILMDownsampleDisruptionIT.java b/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/ILMDownsampleDisruptionIT.java index 8a0d9edae4993..16daed5e0faab 100644 --- a/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/ILMDownsampleDisruptionIT.java +++ b/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/ILMDownsampleDisruptionIT.java @@ -10,6 +10,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.action.DocWriteRequest; +import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; +import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.elasticsearch.action.admin.indices.get.GetIndexRequest; import org.elasticsearch.action.admin.indices.get.GetIndexResponse; import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; @@ -324,12 +326,11 @@ private Disruptor( public void run() { listener.disruptionStart(); try { - final String candidateNode = cluster.client(clientNode) - .admin() - .cluster() - .prepareSearchShards(sourceIndex) - .get() - .getNodes()[0].getName(); + final String candidateNode = safeExecute( + cluster.client(clientNode), + TransportClusterSearchShardsAction.TYPE, + new ClusterSearchShardsRequest(sourceIndex) + ).getNodes()[0].getName(); logger.info("Candidate node [" + candidateNode + "]"); disruption.accept(candidateNode); ensureGreen(sourceIndex); diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java index 3fbcd00690e82..4973b54c5b658 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java @@ -8,6 +8,8 @@ package org.elasticsearch.integration; import org.elasticsearch.ElasticsearchSecurityException; +import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest; +import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction; import org.elasticsearch.action.admin.indices.alias.Alias; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.support.broadcast.BroadcastResponse; @@ -348,8 +350,8 @@ public void testRequestCacheWithTemplateRoleQuery() { private void prepareIndices() { final Client client = client(); - assertAcked(client.admin().cluster().preparePutStoredScript().setId("my-script").setContent(new BytesArray(""" - {"script":{"source":"{\\"match\\":{\\"username\\":\\"{{_user.username}}\\"}}","lang":"mustache"}}"""), XContentType.JSON)); + assertAcked(safeExecute(TransportPutStoredScriptAction.TYPE, new PutStoredScriptRequest().id("my-script").content(new BytesArray(""" + {"script":{"source":"{\\"match\\":{\\"username\\":\\"{{_user.username}}\\"}}","lang":"mustache"}}"""), XContentType.JSON))); assertAcked(indicesAdmin().prepareCreate(DLS_INDEX).addAlias(new Alias("dls-alias")).get()); client.prepareIndex(DLS_INDEX).setId("101").setSource("number", 101, "letter", "A").get(); diff --git a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/BasicWatcherTests.java b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/BasicWatcherTests.java index c8dd4d42ac4a1..f59123fd66593 100644 --- a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/BasicWatcherTests.java +++ b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/BasicWatcherTests.java @@ -21,7 +21,6 @@ import org.elasticsearch.search.searchafter.SearchAfterBuilder; import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.watcher.client.WatchSourceBuilder; import org.elasticsearch.xpack.core.watcher.support.xcontent.XContentSource; import org.elasticsearch.xpack.core.watcher.transport.actions.QueryWatchesAction; @@ -45,6 +44,7 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; import static org.elasticsearch.index.query.QueryBuilders.termQuery; @@ -221,21 +221,17 @@ public void testConditionSearchWithSource() throws Exception { public void testConditionSearchWithIndexedTemplate() throws Exception { SearchSourceBuilder searchSourceBuilder = searchSource().query(matchQuery("level", "a")); - assertAcked( - clusterAdmin().preparePutStoredScript() - .setId("my-template") - .setContent( - BytesReference.bytes( - jsonBuilder().startObject() - .startObject("script") - .field("lang", "mustache") - .field("source") - .value(searchSourceBuilder) - .endObject() - .endObject() - ), - XContentType.JSON - ) + putJsonStoredScript( + "my-template", + BytesReference.bytes( + jsonBuilder().startObject() + .startObject("script") + .field("lang", "mustache") + .field("source") + .value(searchSourceBuilder) + .endObject() + .endObject() + ) ); Script template = new Script(ScriptType.STORED, null, "my-template", Collections.emptyMap()); diff --git a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/transform/TransformIntegrationTests.java b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/transform/TransformIntegrationTests.java index bdc040ff2eca7..92ac91a63e097 100644 --- a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/transform/TransformIntegrationTests.java +++ b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/transform/TransformIntegrationTests.java @@ -6,7 +6,6 @@ */ package org.elasticsearch.xpack.watcher.transform; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Strings; @@ -15,7 +14,6 @@ import org.elasticsearch.script.MockScriptPlugin; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptType; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.watcher.transport.actions.execute.ExecuteWatchRequestBuilder; import org.elasticsearch.xpack.core.watcher.transport.actions.put.PutWatchRequestBuilder; import org.elasticsearch.xpack.watcher.support.search.WatcherSearchTemplateRequest; @@ -33,9 +31,9 @@ import java.util.function.Function; import static java.util.Collections.singletonMap; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.xpack.watcher.actions.ActionBuilders.indexAction; import static org.elasticsearch.xpack.watcher.client.WatchSourceBuilders.watchBuilder; @@ -106,13 +104,13 @@ public void testScriptTransform() throws Exception { script = mockScript("['key3' : ctx.payload.key1 + ctx.payload.key2]"); } else { logger.info("testing script transform with an indexed script"); - assertAcked(clusterAdmin().preparePutStoredScript().setId("my-script").setContent(new BytesArray(Strings.format(""" + putJsonStoredScript("my-script", Strings.format(""" { "script": { "lang": "%s", "source": "['key3' : ctx.payload.key1 + ctx.payload.key2]" } - }""", MockScriptPlugin.NAME)), XContentType.JSON).get()); + }""", MockScriptPlugin.NAME)); script = new Script(ScriptType.STORED, null, "my-script", Collections.emptyMap()); } From 72571df1ae5955772557daf5ac2f8d00fbd998f0 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 30 Jul 2024 07:33:36 +0100 Subject: [PATCH 22/22] Remove deprecated `ClusterAdminClient` methods (#111418) These methods are no longer used in ES or any of its dependent code. Relates #109828 Relates #107984 --- .../client/internal/ClusterAdminClient.java | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java index 20351c8e28909..76b666c9f918f 100644 --- a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java +++ b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java @@ -122,9 +122,7 @@ import org.elasticsearch.action.ingest.SimulatePipelineRequest; import org.elasticsearch.action.ingest.SimulatePipelineRequestBuilder; import org.elasticsearch.action.ingest.SimulatePipelineResponse; -import org.elasticsearch.action.support.master.AcknowledgedRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; -import org.elasticsearch.action.support.master.MasterNodeRequest; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.core.TimeValue; import org.elasticsearch.tasks.TaskId; @@ -303,15 +301,6 @@ public PutRepositoryRequestBuilder preparePutRepository(TimeValue masterNodeTime return new PutRepositoryRequestBuilder(this, masterNodeTimeout, ackTimeout, name); } - @Deprecated(forRemoval = true) // temporary compatibility shim - public PutRepositoryRequestBuilder preparePutRepository(String name) { - return preparePutRepository( - MasterNodeRequest.TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, - AcknowledgedRequest.DEFAULT_ACK_TIMEOUT, - name - ); - } - public void deleteRepository(DeleteRepositoryRequest request, ActionListener listener) { execute(TransportDeleteRepositoryAction.TYPE, request, listener); } @@ -320,15 +309,6 @@ public DeleteRepositoryRequestBuilder prepareDeleteRepository(TimeValue masterNo return new DeleteRepositoryRequestBuilder(this, masterNodeTimeout, ackTimeout, name); } - @Deprecated(forRemoval = true) // temporary compatibility shim - public DeleteRepositoryRequestBuilder prepareDeleteRepository(String name) { - return prepareDeleteRepository( - MasterNodeRequest.TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, - AcknowledgedRequest.DEFAULT_ACK_TIMEOUT, - name - ); - } - public void getRepositories(GetRepositoriesRequest request, ActionListener listener) { execute(GetRepositoriesAction.INSTANCE, request, listener); } @@ -361,11 +341,6 @@ public void createSnapshot(CreateSnapshotRequest request, ActionListener listener) { execute(TransportDeleteSnapshotAction.TYPE, request, listener); } - @Deprecated(forRemoval = true) // temporary compatibility shim - public DeleteSnapshotRequestBuilder prepareDeleteSnapshot(String repository, String... names) { - return prepareDeleteSnapshot(MasterNodeRequest.TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, repository, names); - } - public DeleteSnapshotRequestBuilder prepareDeleteSnapshot(TimeValue masterNodeTimeout, String repository, String... names) { return new DeleteSnapshotRequestBuilder(this, masterNodeTimeout, repository, names); } @@ -412,11 +377,6 @@ public void restoreSnapshot(RestoreSnapshotRequest request, ActionListener