From f0982c7c736928831bed7fca6fbfa8dd0b2f6c11 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Wed, 28 Aug 2024 15:03:55 +0300 Subject: [PATCH 001/144] Removing trace logging for SearchProgressActionListenerIT (#112275) --- .../action/search/SearchProgressActionListenerIT.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java index 428e116ecd1ca..88d934973fc49 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/search/SearchProgressActionListenerIT.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.tasks.TaskId; import org.elasticsearch.test.ESSingleNodeTestCase; -import org.elasticsearch.test.junit.annotations.TestIssueLogging; import java.util.ArrayList; import java.util.Arrays; @@ -41,10 +40,6 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; -@TestIssueLogging( - issueUrl = "https://github.com/elastic/elasticsearch/issues/109830", - value = "org.elasticsearch.action.search:TRACE," + "org.elasticsearch.search.SearchService:TRACE" -) public class SearchProgressActionListenerIT extends ESSingleNodeTestCase { private List shards; From 2ad66007861714234a9262edbd9376e559eae785 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:24:40 +1000 Subject: [PATCH 002/144] Mute org.elasticsearch.search.query.ScriptScoreQueryTests testScriptTermStatsAvailable #112278 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 5199221c25aaf..26bded1d09dc8 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -163,6 +163,9 @@ tests: - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=indices.create/20_synthetic_source/stored field under object with store_array_source} issue: https://github.com/elastic/elasticsearch/issues/112264 +- class: org.elasticsearch.search.query.ScriptScoreQueryTests + method: testScriptTermStatsAvailable + issue: https://github.com/elastic/elasticsearch/issues/112278 # Examples: # From defda1900ebd987bc30218d1e33509dc21fb2551 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Wed, 28 Aug 2024 13:38:23 +0100 Subject: [PATCH 003/144] Fix toReleaseVersion() when called on the current version id (#112242) --- docs/changelog/112242.yaml | 5 +++++ .../src/main/java/org/elasticsearch/ReleaseVersions.java | 9 ++++++--- .../main/java/org/elasticsearch/TransportVersions.java | 2 +- .../main/java/org/elasticsearch/index/IndexVersions.java | 2 +- .../java/org/elasticsearch/ReleaseVersionsTests.java | 7 ++++--- .../java/org/elasticsearch/TransportVersionTests.java | 4 ++++ 6 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 docs/changelog/112242.yaml diff --git a/docs/changelog/112242.yaml b/docs/changelog/112242.yaml new file mode 100644 index 0000000000000..7292a00166de2 --- /dev/null +++ b/docs/changelog/112242.yaml @@ -0,0 +1,5 @@ +pr: 112242 +summary: Fix toReleaseVersion() when called on the current version id +area: Infra/Core +type: bug +issues: [111900] diff --git a/server/src/main/java/org/elasticsearch/ReleaseVersions.java b/server/src/main/java/org/elasticsearch/ReleaseVersions.java index 7b5c8d1d42382..cacdca1c5b528 100644 --- a/server/src/main/java/org/elasticsearch/ReleaseVersions.java +++ b/server/src/main/java/org/elasticsearch/ReleaseVersions.java @@ -41,7 +41,7 @@ public class ReleaseVersions { private static final Pattern VERSION_LINE = Pattern.compile("(\\d+\\.\\d+\\.\\d+),(\\d+)"); - public static IntFunction generateVersionsLookup(Class versionContainer) { + public static IntFunction generateVersionsLookup(Class versionContainer, int current) { if (USES_VERSIONS == false) return Integer::toString; try { @@ -52,6 +52,9 @@ public static IntFunction generateVersionsLookup(Class versionContain } NavigableMap> versions = new TreeMap<>(); + // add the current version id, which won't be in the csv + versions.put(current, List.of(Version.CURRENT)); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(versionsFile, StandardCharsets.UTF_8))) { String line; while ((line = reader.readLine()) != null) { @@ -121,8 +124,8 @@ private static IntFunction lookupFunction(NavigableMap getAllVersions() { return VERSION_IDS.values(); } - static final IntFunction VERSION_LOOKUP = ReleaseVersions.generateVersionsLookup(TransportVersions.class); + static final IntFunction VERSION_LOOKUP = ReleaseVersions.generateVersionsLookup(TransportVersions.class, LATEST_DEFINED.id()); // no instance private TransportVersions() {} diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index fa40c0316fdcc..608d88fdef664 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -221,7 +221,7 @@ static Collection getAllVersions() { return VERSION_IDS.values(); } - static final IntFunction VERSION_LOOKUP = ReleaseVersions.generateVersionsLookup(IndexVersions.class); + static final IntFunction VERSION_LOOKUP = ReleaseVersions.generateVersionsLookup(IndexVersions.class, LATEST_DEFINED.id()); // no instance private IndexVersions() {} diff --git a/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java b/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java index b80e953bd8aea..3b5f5eea57f66 100644 --- a/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java +++ b/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java @@ -17,19 +17,20 @@ public class ReleaseVersionsTests extends ESTestCase { public void testReleaseVersions() { - IntFunction versions = ReleaseVersions.generateVersionsLookup(ReleaseVersionsTests.class); + IntFunction versions = ReleaseVersions.generateVersionsLookup(ReleaseVersionsTests.class, 23); assertThat(versions.apply(10), equalTo("8.0.0")); assertThat(versions.apply(14), equalTo("8.1.0-8.1.1")); assertThat(versions.apply(21), equalTo("8.2.0")); assertThat(versions.apply(22), equalTo("8.2.1")); + assertThat(versions.apply(23), equalTo(Version.CURRENT.toString())); } public void testReturnsRange() { - IntFunction versions = ReleaseVersions.generateVersionsLookup(ReleaseVersionsTests.class); + IntFunction versions = ReleaseVersions.generateVersionsLookup(ReleaseVersionsTests.class, 23); assertThat(versions.apply(17), equalTo("8.1.2-8.2.0")); assertThat(versions.apply(9), equalTo("0.0.0")); - assertThat(versions.apply(24), equalTo("8.2.2-snapshot[24]")); + assertThat(versions.apply(24), equalTo(new Version(Version.CURRENT.id + 100) + "-[24]")); } } diff --git a/server/src/test/java/org/elasticsearch/TransportVersionTests.java b/server/src/test/java/org/elasticsearch/TransportVersionTests.java index 2de973622248b..a3728f20a23d4 100644 --- a/server/src/test/java/org/elasticsearch/TransportVersionTests.java +++ b/server/src/test/java/org/elasticsearch/TransportVersionTests.java @@ -186,6 +186,10 @@ public void testCURRENTIsLatest() { assertThat(Collections.max(TransportVersions.getAllVersions()), is(TransportVersion.current())); } + public void testToReleaseVersion() { + assertThat(TransportVersion.current().toReleaseVersion(), equalTo(Version.CURRENT.toString())); + } + public void testToString() { assertEquals("5000099", TransportVersion.fromId(5_00_00_99).toString()); assertEquals("2030099", TransportVersion.fromId(2_03_00_99).toString()); From 7c5c471904c042b5a548ac1aa454ab9f2bce8b00 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 28 Aug 2024 14:31:25 +0100 Subject: [PATCH 004/144] Upgrade `repository-azure` dependencies (#112277) `azure-core-1.51.0` includes fixes for a handful of bugs we reported. This commit upgrades the `azure-storage-blob` and `azure-identity` dependencies to pick up this new fixed version of `azure-core`. --- docs/changelog/112277.yaml | 5 +++ gradle/verification-metadata.xml | 65 ++++++++++++++------------- modules/repository-azure/build.gradle | 22 ++++----- 3 files changed, 51 insertions(+), 41 deletions(-) create mode 100644 docs/changelog/112277.yaml diff --git a/docs/changelog/112277.yaml b/docs/changelog/112277.yaml new file mode 100644 index 0000000000000..eac474555999a --- /dev/null +++ b/docs/changelog/112277.yaml @@ -0,0 +1,5 @@ +pr: 112277 +summary: Upgrade `repository-azure` dependencies +area: Snapshot/Restore +type: upgrade +issues: [] diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 261e210cdbe11..fd514103449c1 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -119,44 +119,44 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + @@ -411,9 +411,9 @@ - - - + + + @@ -901,9 +901,9 @@ - - - + + + @@ -3394,6 +3394,11 @@ + + + + + diff --git a/modules/repository-azure/build.gradle b/modules/repository-azure/build.gradle index 9c63304e8267b..6334e5ae6a195 100644 --- a/modules/repository-azure/build.gradle +++ b/modules/repository-azure/build.gradle @@ -24,16 +24,16 @@ versions << [ dependencies { // Microsoft - api "com.azure:azure-core-http-netty:1.15.1" - api "com.azure:azure-core:1.50.0" - api "com.azure:azure-identity:1.13.1" - api "com.azure:azure-json:1.1.0" - api "com.azure:azure-storage-blob:12.26.1" - api "com.azure:azure-storage-common:12.26.0" - api "com.azure:azure-storage-internal-avro:12.11.1" - api "com.azure:azure-xml:1.0.0" + api "com.azure:azure-core-http-netty:1.15.3" + api "com.azure:azure-core:1.51.0" + api "com.azure:azure-identity:1.13.2" + api "com.azure:azure-json:1.2.0" + api "com.azure:azure-storage-blob:12.27.1" + api "com.azure:azure-storage-common:12.26.1" + api "com.azure:azure-storage-internal-avro:12.12.1" + api "com.azure:azure-xml:1.1.0" api "com.microsoft.azure:msal4j-persistence-extension:1.3.0" - api "com.microsoft.azure:msal4j:1.16.1" + api "com.microsoft.azure:msal4j:1.16.2" // Jackson api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" @@ -57,7 +57,7 @@ dependencies { api "org.reactivestreams:reactive-streams:1.0.4" // Others - api "com.fasterxml.woodstox:woodstox-core:6.4.0" + api "com.fasterxml.woodstox:woodstox-core:6.7.0" api "com.github.stephenc.jcip:jcip-annotations:1.0-1" api "com.nimbusds:content-type:2.3" api "com.nimbusds:lang-tag:1.7" @@ -69,7 +69,7 @@ dependencies { api "net.java.dev.jna:jna:${versions.jna}" // Maven says 5.14.0 but this aligns with the Elasticsearch-wide version api "net.minidev:accessors-smart:2.5.0" api "net.minidev:json-smart:2.5.0" - api "org.codehaus.woodstox:stax2-api:4.2.1" + api "org.codehaus.woodstox:stax2-api:4.2.2" api "org.ow2.asm:asm:9.3" runtimeOnly "com.google.crypto.tink:tink:1.14.0" From 046d6ee7544bac7a5ced7d38431b4c6374efa279 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 28 Aug 2024 07:59:13 -0700 Subject: [PATCH 005/144] Avoid wrapping rejection exception in exchange (#112178) We should avoid wrapping EsRejectedExecutionException in an ElasticsearchException as it would change the status code from 429 to 500. Ideally, we should avoid wrapping exceptions altogether, but that would require bigger changes. Closes #112106 --- docs/changelog/112178.yaml | 6 ++ .../compute/operator/AsyncOperator.java | 2 +- .../exchange/ExchangeSourceHandler.java | 2 +- .../xpack/esql/action/EnrichIT.java | 25 ++++++++ .../xpack/esql/action/ManyShardsIT.java | 60 +++++++++++++++++++ 5 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/112178.yaml diff --git a/docs/changelog/112178.yaml b/docs/changelog/112178.yaml new file mode 100644 index 0000000000000..f1011291542b8 --- /dev/null +++ b/docs/changelog/112178.yaml @@ -0,0 +1,6 @@ +pr: 112178 +summary: Avoid wrapping rejection exception in exchange +area: ES|QL +type: bug +issues: + - 112106 diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java index 92213eca7b477..2c36b42dee277 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java @@ -146,7 +146,7 @@ private void checkFailure() { Exception e = failureCollector.getFailure(); if (e != null) { discardPages(); - throw ExceptionsHelper.convertToElastic(e); + throw ExceptionsHelper.convertToRuntime(e); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java index 406dc4494208c..e3fc0e26e34e0 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java @@ -54,7 +54,7 @@ private class ExchangeSourceImpl implements ExchangeSource { private void checkFailure() { Exception e = failure.getFailure(); if (e != null) { - throw ExceptionsHelper.convertToElastic(e); + throw ExceptionsHelper.convertToRuntime(e); } } diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EnrichIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EnrichIT.java index e7bb054221c89..dab99a0f719dd 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EnrichIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EnrichIT.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.action; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.TransportAction; @@ -16,6 +17,7 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.operator.DriverProfile; import org.elasticsearch.compute.operator.DriverStatus; @@ -30,6 +32,9 @@ import org.elasticsearch.protocol.xpack.XPackInfoRequest; import org.elasticsearch.protocol.xpack.XPackInfoResponse; import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.transport.MockTransportService; +import org.elasticsearch.transport.RemoteTransportException; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; import org.elasticsearch.xpack.core.XPackSettings; @@ -43,6 +48,7 @@ import org.elasticsearch.xpack.enrich.EnrichPlugin; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; import org.junit.After; @@ -82,6 +88,7 @@ protected Collection> nodePlugins() { plugins.add(IngestCommonPlugin.class); plugins.add(ReindexPlugin.class); plugins.add(InternalTransportSettingPlugin.class); + plugins.add(MockTransportService.TestPlugin.class); return plugins; } @@ -420,6 +427,24 @@ public void testManyDocuments() { } } + public void testRejection() { + for (var ts : internalCluster().getInstances(TransportService.class)) { + ((MockTransportService) ts).addRequestHandlingBehavior(EnrichLookupService.LOOKUP_ACTION_NAME, (h, r, channel, t) -> { + EsRejectedExecutionException ex = new EsRejectedExecutionException("test", false); + channel.sendResponse(new RemoteTransportException("test", ex)); + }); + } + try { + String query = "FROM listen* | " + enrichSongCommand(); + Exception error = expectThrows(Exception.class, () -> run(query).close()); + assertThat(ExceptionsHelper.status(error), equalTo(RestStatus.TOO_MANY_REQUESTS)); + } finally { + for (var ts : internalCluster().getInstances(TransportService.class)) { + ((MockTransportService) ts).clearAllRules(); + } + } + } + public static class LocalStateEnrich extends LocalStateCompositeXPackPlugin { public LocalStateEnrich(final Settings settings, final Path configPath) throws Exception { diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java index fb598cb855013..1ce92ded8acc6 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java @@ -8,14 +8,24 @@ package org.elasticsearch.xpack.esql.action; import org.apache.lucene.tests.util.LuceneTestCase; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; +import org.elasticsearch.compute.operator.exchange.ExchangeService; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.MockSearchService; import org.elasticsearch.search.SearchService; +import org.elasticsearch.test.transport.MockTransportService; +import org.elasticsearch.transport.RemoteTransportException; +import org.elasticsearch.transport.TransportChannel; +import org.elasticsearch.transport.TransportResponse; +import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.esql.plugin.QueryPragmas; import org.hamcrest.Matchers; import org.junit.Before; @@ -27,6 +37,10 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; /** * Make sures that we can run many concurrent requests with large number of shards with any data_partitioning. @@ -38,6 +52,7 @@ public class ManyShardsIT extends AbstractEsqlIntegTestCase { protected Collection> getMockPlugins() { var plugins = new ArrayList<>(super.getMockPlugins()); plugins.add(MockSearchService.TestPlugin.class); + plugins.add(MockTransportService.TestPlugin.class); return plugins; } @@ -97,6 +112,51 @@ public void testConcurrentQueries() throws Exception { } } + public void testRejection() throws Exception { + String[] nodes = internalCluster().getNodeNames(); + for (String node : nodes) { + MockTransportService ts = (MockTransportService) internalCluster().getInstance(TransportService.class, node); + ts.addRequestHandlingBehavior(ExchangeService.EXCHANGE_ACTION_NAME, (handler, request, channel, task) -> { + handler.messageReceived(request, new TransportChannel() { + @Override + public String getProfileName() { + return channel.getProfileName(); + } + + @Override + public void sendResponse(TransportResponse response) { + channel.sendResponse(new RemoteTransportException("simulated", new EsRejectedExecutionException("test queue"))); + } + + @Override + public void sendResponse(Exception exception) { + channel.sendResponse(exception); + } + }, task); + }); + } + try { + AtomicReference failure = new AtomicReference<>(); + EsqlQueryRequest request = new EsqlQueryRequest(); + request.query("from test-* | stats count(user) by tags"); + request.acceptedPragmaRisks(true); + request.pragmas(randomPragmas()); + CountDownLatch queryLatch = new CountDownLatch(1); + client().execute(EsqlQueryAction.INSTANCE, request, ActionListener.runAfter(ActionListener.wrap(r -> { + r.close(); + throw new AssertionError("expected failure"); + }, failure::set), queryLatch::countDown)); + assertTrue(queryLatch.await(10, TimeUnit.SECONDS)); + assertThat(failure.get(), instanceOf(EsRejectedExecutionException.class)); + assertThat(ExceptionsHelper.status(failure.get()), equalTo(RestStatus.TOO_MANY_REQUESTS)); + assertThat(failure.get().getMessage(), equalTo("test queue")); + } finally { + for (String node : nodes) { + ((MockTransportService) internalCluster().getInstance(TransportService.class, node)).clearAllRules(); + } + } + } + static class SearchContextCounter { private final int maxAllowed; private final AtomicInteger current = new AtomicInteger(); From 50871a3d28b413ff846497393459a3af1e937d3a Mon Sep 17 00:00:00 2001 From: Patrick Doyle <810052+prdoyle@users.noreply.github.com> Date: Wed, 28 Aug 2024 11:13:47 -0400 Subject: [PATCH 006/144] New injector (#111722) * Initial new injector * Allow createComponents to return classes * Downsample injection * Remove more vestiges of subtype handling * Lowercase logger * Respond to code review comments * Only one object per class * Some additional cleanup incl spotless * PR feedback * Missed one * Rename workQueue * Remove Injector.addRecordContents * TelemetryProvider requires us to inject an object using a supertype * Address Simon's comments * Clarify the reason for SuppressForbidden * Make log indentation code less intrusive --- server/src/main/java/module-info.java | 1 + .../org/elasticsearch/injection/Injector.java | 314 ++++++++++++++++++ .../injection/PlanInterpreter.java | 108 ++++++ .../org/elasticsearch/injection/Planner.java | 128 +++++++ .../elasticsearch/injection/api/Inject.java | 23 ++ .../elasticsearch/injection/package-info.java | 41 +++ .../injection/spec/ExistingInstanceSpec.java | 17 + .../injection/spec/InjectionSpec.java | 13 + .../injection/spec/MethodHandleSpec.java | 30 ++ .../injection/spec/ParameterSpec.java | 24 ++ .../injection/spec/package-info.java | 25 ++ .../injection/step/InjectionStep.java | 11 + .../injection/step/InstantiateStep.java | 17 + .../injection/step/package-info.java | 15 + .../elasticsearch/node/NodeConstruction.java | 70 ++-- .../node/PluginServiceInstances.java | 52 +++ .../injection/InjectorTests.java | 154 +++++++++ .../xpack/downsample/Downsample.java | 2 +- .../xpack/downsample/DownsampleMetrics.java | 5 +- 19 files changed, 1025 insertions(+), 25 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/injection/Injector.java create mode 100644 server/src/main/java/org/elasticsearch/injection/PlanInterpreter.java create mode 100644 server/src/main/java/org/elasticsearch/injection/Planner.java create mode 100644 server/src/main/java/org/elasticsearch/injection/api/Inject.java create mode 100644 server/src/main/java/org/elasticsearch/injection/package-info.java create mode 100644 server/src/main/java/org/elasticsearch/injection/spec/ExistingInstanceSpec.java create mode 100644 server/src/main/java/org/elasticsearch/injection/spec/InjectionSpec.java create mode 100644 server/src/main/java/org/elasticsearch/injection/spec/MethodHandleSpec.java create mode 100644 server/src/main/java/org/elasticsearch/injection/spec/ParameterSpec.java create mode 100644 server/src/main/java/org/elasticsearch/injection/spec/package-info.java create mode 100644 server/src/main/java/org/elasticsearch/injection/step/InjectionStep.java create mode 100644 server/src/main/java/org/elasticsearch/injection/step/InstantiateStep.java create mode 100644 server/src/main/java/org/elasticsearch/injection/step/package-info.java create mode 100644 server/src/main/java/org/elasticsearch/node/PluginServiceInstances.java create mode 100644 server/src/test/java/org/elasticsearch/injection/InjectorTests.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index d412748ed4e57..086bfece87172 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -190,6 +190,7 @@ exports org.elasticsearch.common.file; exports org.elasticsearch.common.geo; exports org.elasticsearch.common.hash; + exports org.elasticsearch.injection.api; exports org.elasticsearch.injection.guice; exports org.elasticsearch.injection.guice.binder; exports org.elasticsearch.injection.guice.internal; diff --git a/server/src/main/java/org/elasticsearch/injection/Injector.java b/server/src/main/java/org/elasticsearch/injection/Injector.java new file mode 100644 index 0000000000000..03fcf18509fcc --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/Injector.java @@ -0,0 +1,314 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection; + +import org.elasticsearch.injection.api.Inject; +import org.elasticsearch.injection.spec.ExistingInstanceSpec; +import org.elasticsearch.injection.spec.InjectionSpec; +import org.elasticsearch.injection.spec.MethodHandleSpec; +import org.elasticsearch.injection.spec.ParameterSpec; +import org.elasticsearch.injection.step.InjectionStep; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Constructor; +import java.util.ArrayDeque; +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Stream; + +import static java.util.function.Predicate.not; +import static java.util.stream.Collectors.joining; +import static java.util.stream.Collectors.toCollection; +import static java.util.stream.Collectors.toMap; + +/** + * The main object for dependency injection. + *

+ * Allows the user to specify the requirements, then call {@link #inject} to create an object plus all its dependencies. + *

+ * Implementation note: this class itself contains logic for specifying the injection requirements; + * the actual injection operations are performed in other classes like {@link Planner} and {@link PlanInterpreter}, + */ +public final class Injector { + private static final Logger logger = LogManager.getLogger(Injector.class); + + /** + * The specifications supplied by the user, as opposed to those inferred by the injector. + */ + private final Map, InjectionSpec> seedSpecs; + + Injector(Map, InjectionSpec> seedSpecs) { + this.seedSpecs = seedSpecs; + } + + public static Injector create() { + return new Injector(new LinkedHashMap<>()); + } + + /** + * Instructs the injector to instantiate classToProcess + * in accordance with whatever annotations may be present on that class. + *

+ * There are only three ways the injector can find out that it must instantiate some class: + *

    + *
  1. + * This method + *
  2. + *
  3. + * The parameter passed to {@link #inject} + *
  4. + *
  5. + * A constructor parameter of some other class being instantiated, + * having exactly the right class (not a supertype) + *
  6. + *
+ * + * @return this + */ + public Injector addClass(Class classToProcess) { + MethodHandleSpec methodHandleSpec = methodHandleSpecFor(classToProcess); + var existing = seedSpecs.put(classToProcess, methodHandleSpec); + if (existing != null) { + throw new IllegalArgumentException("class " + classToProcess.getSimpleName() + " has already been added"); + } + return this; + } + + /** + * Equivalent to multiple chained calls to {@link #addClass}. + */ + public Injector addClasses(Collection> classesToProcess) { + classesToProcess.forEach(this::addClass); + return this; + } + + /** + * Equivalent to {@link #addInstance addInstance(object.getClass(), object)}. + */ + public Injector addInstance(Object object) { + @SuppressWarnings("unchecked") + Class actualClass = (Class) object.getClass(); // Whatever the runtime type is, it's represented by T + return addInstance(actualClass, actualClass.cast(object)); + } + + /** + * Equivalent to multiple calls to {@link #addInstance(Object)}. + */ + public Injector addInstances(Collection objects) { + for (var x : objects) { + addInstance(x); + } + return this; + } + + /** + * Indicates that object is to be injected for parameters of type type. + * The given object is treated as though it had been instantiated by the injector. + */ + public Injector addInstance(Class type, T object) { + assert type.isInstance(object); // No unchecked casting shenanigans allowed + var existing = seedSpecs.put(type, new ExistingInstanceSpec(type, object)); + if (existing != null) { + throw new IllegalStateException("There's already an object for " + type); + } + return this; + } + + /** + * Main entry point. Causes objects to be constructed. + * @return {@link Map} whose keys are all the requested resultTypes and whose values are all the instances of those types. + */ + public Map, Object> inject(Collection> resultTypes) { + resultTypes.forEach(this::ensureClassIsSpecified); + PlanInterpreter i = doInjection(); + return resultTypes.stream().collect(toMap(c -> c, i::theInstanceOf)); + } + + private void ensureClassIsSpecified(Class resultType) { + if (seedSpecs.containsKey(resultType) == false) { + addClass(resultType); + } + } + + private PlanInterpreter doInjection() { + logger.debug("Starting injection"); + Map, InjectionSpec> specMap = specClosure(seedSpecs); + Map, Object> existingInstances = new LinkedHashMap<>(); + specMap.values().forEach((spec) -> { + if (spec instanceof ExistingInstanceSpec e) { + existingInstances.put(e.requestedType(), e.instance()); + } + }); + PlanInterpreter interpreter = new PlanInterpreter(existingInstances); + interpreter.executePlan(injectionPlan(seedSpecs.keySet(), specMap)); + logger.debug("Done injection"); + return interpreter; + } + + /** + * Finds an {@link InjectionSpec} for every class the injector is capable of injecting. + *

+ * We do this once the injector is fully configured, with all calls to {@link #addClass} and {@link #addInstance} finished, + * so that we can easily build the complete picture of how injection should occur. + *

+ * This is not part of the planning process; it's just discovering all the things + * the injector needs to know about. This logic isn't concerned with ordering or dependency cycles. + * + * @param seedMap the injections the user explicitly asked for + * @return an {@link InjectionSpec} for every class the injector is capable of injecting. + */ + private static Map, InjectionSpec> specClosure(Map, InjectionSpec> seedMap) { + assert seedMapIsValid(seedMap); + + // For convenience, we pretend there's a gigantic method out there that takes + // all the seed types as parameters. + Queue workQueue = seedMap.values() + .stream() + .map(InjectionSpec::requestedType) + .map(Injector::syntheticParameterSpec) + .collect(toCollection(ArrayDeque::new)); + + // This map doubles as a checklist of classes we're already finished processing + Map, InjectionSpec> result = new LinkedHashMap<>(); + + ParameterSpec p; + while ((p = workQueue.poll()) != null) { + Class c = p.injectableType(); + InjectionSpec existingResult = result.get(c); + if (existingResult != null) { + logger.trace("Spec for {} already exists", c.getSimpleName()); + continue; + } + + InjectionSpec spec = seedMap.get(c); + if (spec instanceof ExistingInstanceSpec) { + // simple! + result.put(c, spec); + continue; + } + + // At this point, we know we'll need a MethodHandleSpec + MethodHandleSpec methodHandleSpec; + if (spec == null) { + // The user didn't specify this class; we must infer it now + spec = methodHandleSpec = methodHandleSpecFor(c); + } else if (spec instanceof MethodHandleSpec m) { + methodHandleSpec = m; + } else { + throw new AssertionError("Unexpected spec: " + spec); + } + + logger.trace("Inspecting parameters for constructor of {}", c); + for (var ps : methodHandleSpec.parameters()) { + logger.trace("Enqueue {}", ps); + workQueue.add(ps); + } + + registerSpec(spec, result); + } + + if (logger.isTraceEnabled()) { + logger.trace("Specs: {}", result.values().stream().map(Object::toString).collect(joining("\n\t", "\n\t", ""))); + } + return result; + } + + private static MethodHandleSpec methodHandleSpecFor(Class c) { + Constructor constructor = getSuitableConstructorIfAny(c); + if (constructor == null) { + throw new IllegalStateException("No suitable constructor for " + c); + } + + MethodHandle ctorHandle; + try { + ctorHandle = lookup().unreflectConstructor(constructor); + } catch (IllegalAccessException e) { + throw new IllegalStateException(e); + } + + List parameters = Stream.of(constructor.getParameters()).map(ParameterSpec::from).toList(); + + return new MethodHandleSpec(c, ctorHandle, parameters); + } + + /** + * @return true (unless an assertion fails). Never returns false. + */ + private static boolean seedMapIsValid(Map, InjectionSpec> seed) { + seed.forEach( + (c, s) -> { assert s.requestedType().equals(c) : "Spec must be associated with its requestedType, not " + c + ": " + s; } + ); + return true; + } + + /** + * For the classes we've been explicitly asked to inject, + * pretend there's some massive method taking all of them as parameters + */ + private static ParameterSpec syntheticParameterSpec(Class c) { + return new ParameterSpec("synthetic_" + c.getSimpleName(), c, c); + } + + private static Constructor getSuitableConstructorIfAny(Class type) { + var constructors = Stream.of(type.getConstructors()).filter(not(Constructor::isSynthetic)).toList(); + if (constructors.size() == 1) { + return constructors.get(0); + } + var injectConstructors = constructors.stream().filter(c -> c.isAnnotationPresent(Inject.class)).toList(); + if (injectConstructors.size() == 1) { + return injectConstructors.get(0); + } + logger.trace("No suitable constructor for {}", type); + return null; + } + + private static void registerSpec(InjectionSpec spec, Map, InjectionSpec> specsByClass) { + Class requestedType = spec.requestedType(); + var existing = specsByClass.put(requestedType, spec); + if (existing == null || existing.equals(spec)) { + logger.trace("Register spec: {}", spec); + } else { + throw new IllegalStateException("Ambiguous specifications for " + requestedType + ": " + existing + " and " + spec); + } + } + + private List injectionPlan(Set> requiredClasses, Map, InjectionSpec> specsByClass) { + logger.trace("Constructing instantiation plan"); + Set> allParameterTypes = new HashSet<>(); + specsByClass.values().forEach(spec -> { + if (spec instanceof MethodHandleSpec m) { + m.parameters().stream().map(ParameterSpec::injectableType).forEachOrdered(allParameterTypes::add); + } + }); + + var plan = new Planner(specsByClass, requiredClasses, allParameterTypes).injectionPlan(); + if (logger.isDebugEnabled()) { + logger.debug("Injection plan: {}", plan.stream().map(Object::toString).collect(joining("\n\t", "\n\t", ""))); + } + return plan; + } + + /** + * Evolution note: there may be cases in the where we allow the user to + * supply a {@link java.lang.invoke.MethodHandles.Lookup} for convenience, + * so that they aren't required to make things public just to participate in injection. + */ + private static MethodHandles.Lookup lookup() { + return MethodHandles.publicLookup(); + } + +} diff --git a/server/src/main/java/org/elasticsearch/injection/PlanInterpreter.java b/server/src/main/java/org/elasticsearch/injection/PlanInterpreter.java new file mode 100644 index 0000000000000..cf38dbcb24b7d --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/PlanInterpreter.java @@ -0,0 +1,108 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection; + +import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.injection.spec.MethodHandleSpec; +import org.elasticsearch.injection.spec.ParameterSpec; +import org.elasticsearch.injection.step.InjectionStep; +import org.elasticsearch.injection.step.InstantiateStep; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Performs the actual injection operations by running the {@link InjectionStep}s. + *

+ * The intent is that this logic is as simple as possible so that we don't run complex injection + * logic alongside the user-supplied constructor logic. All the injector complexity is already + * supposed to have happened in the planning phase. In particular, no injection-related errors + * are supposed to be detected during execution; they should be detected during planning and validation. + * All exceptions thrown during execution are supposed to be caused by user-supplied code. + * + *

+ * Execution model: + * The state of the injector during injection comprises a map from classes to objects. + * Before any steps execute, the map is pre-populated by object instances added via + * {@link Injector#addInstance(Object)} Injector.addInstance}, + * and then the steps begin to execute, reading and writing from this map. + * Some steps create objects and add them to this map; others manipulate the map itself. + */ +final class PlanInterpreter { + private static final Logger logger = LogManager.getLogger(PlanInterpreter.class); + private final Map, Object> instances = new LinkedHashMap<>(); + + PlanInterpreter(Map, Object> existingInstances) { + existingInstances.forEach(this::addInstance); + } + + /** + * Main entry point. Contains the implementation logic for each {@link InjectionStep}. + */ + void executePlan(List plan) { + int numConstructorCalls = 0; + for (InjectionStep step : plan) { + if (step instanceof InstantiateStep i) { + MethodHandleSpec spec = i.spec(); + logger.trace("Instantiating {}", spec.requestedType().getSimpleName()); + addInstance(spec.requestedType(), instantiate(spec)); + ++numConstructorCalls; + } else { + // TODO: switch patterns would make this unnecessary + assert false : "Unexpected step type: " + step.getClass().getSimpleName(); + throw new IllegalStateException("Unexpected step type: " + step.getClass().getSimpleName()); + } + } + logger.debug("Instantiated {} objects", numConstructorCalls); + } + + /** + * @return the list element corresponding to instances.get(type).get(0), + * assuming that instances.get(type) has exactly one element. + * @throws IllegalStateException if instances.get(type) does not have exactly one element + */ + public T theInstanceOf(Class type) { + Object instance = instances.get(type); + if (instance == null) { + throw new IllegalStateException("No object of type " + type.getSimpleName()); + } + return type.cast(instance); + } + + private void addInstance(Class requestedType, Object instance) { + Object old = instances.put(requestedType, instance); + if (old != null) { + throw new IllegalStateException("Multiple objects for " + requestedType); + } + } + + /** + * @throws IllegalStateException if the MethodHandle throws. + */ + @SuppressForbidden( + reason = "Can't call invokeExact because we don't know the method argument types statically, " + + "since each constructor has a different signature" + ) + private Object instantiate(MethodHandleSpec spec) { + Object[] args = spec.parameters().stream().map(this::parameterValue).toArray(); + try { + return spec.methodHandle().invokeWithArguments(args); + } catch (Throwable e) { + throw new IllegalStateException("Unexpected exception while instantiating {}" + spec, e); + } + } + + private Object parameterValue(ParameterSpec parameterSpec) { + return theInstanceOf(parameterSpec.formalType()); + } + +} diff --git a/server/src/main/java/org/elasticsearch/injection/Planner.java b/server/src/main/java/org/elasticsearch/injection/Planner.java new file mode 100644 index 0000000000000..4b6af05d57c04 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/Planner.java @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection; + +import org.elasticsearch.injection.spec.ExistingInstanceSpec; +import org.elasticsearch.injection.spec.InjectionSpec; +import org.elasticsearch.injection.spec.MethodHandleSpec; +import org.elasticsearch.injection.step.InjectionStep; +import org.elasticsearch.injection.step.InstantiateStep; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Supplier; + +import static java.util.Collections.unmodifiableMap; +import static java.util.Collections.unmodifiableSet; + +/** + * Evolution note: the intent is to plan one domain/subsystem at a time. + */ +final class Planner { + private static final Logger logger = LogManager.getLogger(Planner.class); + + final List plan; + final Map, InjectionSpec> specsByClass; + final Set> requiredTypes; // The injector's job is to ensure there is an instance of these; this is like the "root set" + final Set> allParameterTypes; // All the injectable types in all dependencies (recursively) of all required types + final Set startedPlanning; + final Set finishedPlanning; + final Set> alreadyProxied; + + /** + * @param specsByClass an {@link InjectionSpec} indicating how each class should be injected + * @param requiredTypes the classes of which we need instances + * @param allParameterTypes the classes that appear as the type of any parameter of any constructor we might call + */ + Planner(Map, InjectionSpec> specsByClass, Set> requiredTypes, Set> allParameterTypes) { + this.requiredTypes = requiredTypes; + this.plan = new ArrayList<>(); + this.specsByClass = unmodifiableMap(specsByClass); + this.allParameterTypes = unmodifiableSet(allParameterTypes); + this.startedPlanning = new HashSet<>(); + this.finishedPlanning = new HashSet<>(); + this.alreadyProxied = new HashSet<>(); + } + + /** + * Intended to be called once. + *

+ * Note that not all proxies are resolved once this plan has been executed. + *

+ * + * Evolution note: in a world with multiple domains/subsystems, + * it will become necessary to defer proxy resolution until after other plans + * have been executed, because they could create additional objects that ought + * to be included in the proxies created by this plan. + * + * @return the {@link InjectionStep} objects listed in execution order. + */ + List injectionPlan() { + for (Class c : requiredTypes) { + planForClass(c, 0); + } + return plan; + } + + /** + * Recursive procedure that determines what effect requestedClass + * should have on the plan under construction. + * + * @param depth is used just for indenting the logs + */ + private void planForClass(Class requestedClass, int depth) { + InjectionSpec spec = specsByClass.get(requestedClass); + if (spec == null) { + throw new IllegalStateException("Cannot instantiate " + requestedClass + ": no specification provided"); + } + planForSpec(spec, depth); + } + + private void planForSpec(InjectionSpec spec, int depth) { + if (finishedPlanning.contains(spec)) { + logger.trace("{}Already planned {}", indent(depth), spec); + return; + } + + logger.trace("{}Planning for {}", indent(depth), spec); + if (startedPlanning.add(spec) == false) { + // TODO: Better cycle detection and reporting. Use SCCs + throw new IllegalStateException("Cyclic dependency involving " + spec); + } + + if (spec instanceof MethodHandleSpec m) { + for (var p : m.parameters()) { + logger.trace("{}- Recursing into {} for actual parameter {}", indent(depth), p.injectableType(), p); + planForClass(p.injectableType(), depth + 1); + } + addStep(new InstantiateStep(m), depth); + } else if (spec instanceof ExistingInstanceSpec e) { + logger.trace("{}- Plan {}", indent(depth), e); + // Nothing to do. The injector will already have the required object. + } else { + throw new AssertionError("Unexpected injection spec: " + spec); + } + + finishedPlanning.add(spec); + } + + private void addStep(InjectionStep newStep, int depth) { + logger.trace("{}- Add step {}", indent(depth), newStep); + plan.add(newStep); + } + + private static Supplier indent(int depth) { + return () -> "\t".repeat(depth); + } +} diff --git a/server/src/main/java/org/elasticsearch/injection/api/Inject.java b/server/src/main/java/org/elasticsearch/injection/api/Inject.java new file mode 100644 index 0000000000000..d5c57d1e5e2e2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/api/Inject.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection.api; + +import java.lang.annotation.Retention; +import java.lang.annotation.Target; + +import static java.lang.annotation.ElementType.CONSTRUCTOR; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +/** + * Designates a constructor to be called by the injector. + */ +@Target(CONSTRUCTOR) +@Retention(RUNTIME) +public @interface Inject { +} diff --git a/server/src/main/java/org/elasticsearch/injection/package-info.java b/server/src/main/java/org/elasticsearch/injection/package-info.java new file mode 100644 index 0000000000000..01dd1e878651c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/package-info.java @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +/** + * Our dependency injection technologies: our bespoke injector, plus our legacy vendored version of Google Guice. + *

Usage

+ * The new injector is {@link org.elasticsearch.injection.Injector}. + * You create an instance using {@link org.elasticsearch.injection.Injector#create()}, + * call various methods like {@link org.elasticsearch.injection.Injector#addClass} to configure it, + * then call {@link org.elasticsearch.injection.Injector#inject} to cause the constructors to be called. + * + *

Operation

+ * Injection proceeds in three phases: + *
    + *
  1. + * Configuration: the {@link org.elasticsearch.injection.Injector} captures the user's + * intent in the form of {@link org.elasticsearch.injection.spec.InjectionSpec} objects, + * one for each class. + *
  2. + *
  3. + * Planning: the {@link org.elasticsearch.injection.Planner} analyzes the + * {@link org.elasticsearch.injection.spec.InjectionSpec} objects, validates them, + * and generates a plan in the form of a list of {@link org.elasticsearch.injection.step.InjectionStep} objects. + *
  4. + *
  5. + * Execution: the {@link org.elasticsearch.injection.PlanInterpreter} runs + * the steps in the plan, in sequence, to actually instantiate the objects and pass them + * to each others' constructors. + *
  6. + *
+ * + *

Google Guice

+ * The older injector, based on Google Guice, is in the {@code guice} package. + * The new injector is unrelated to Guice, and is intended to replace Guice eventually. + */ +package org.elasticsearch.injection; diff --git a/server/src/main/java/org/elasticsearch/injection/spec/ExistingInstanceSpec.java b/server/src/main/java/org/elasticsearch/injection/spec/ExistingInstanceSpec.java new file mode 100644 index 0000000000000..f443e045442c9 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/spec/ExistingInstanceSpec.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection.spec; + +public record ExistingInstanceSpec(Class requestedType, Object instance) implements InjectionSpec { + @Override + public String toString() { + // Don't call instance.toString; who knows what that will return + return "ExistingInstanceSpec[" + "requestedType=" + requestedType + ']'; + } +} diff --git a/server/src/main/java/org/elasticsearch/injection/spec/InjectionSpec.java b/server/src/main/java/org/elasticsearch/injection/spec/InjectionSpec.java new file mode 100644 index 0000000000000..552d2c2ba9ebb --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/spec/InjectionSpec.java @@ -0,0 +1,13 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection.spec; + +public sealed interface InjectionSpec permits MethodHandleSpec, ExistingInstanceSpec { + Class requestedType(); +} diff --git a/server/src/main/java/org/elasticsearch/injection/spec/MethodHandleSpec.java b/server/src/main/java/org/elasticsearch/injection/spec/MethodHandleSpec.java new file mode 100644 index 0000000000000..06c4cd0faac63 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/spec/MethodHandleSpec.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection.spec; + +import java.lang.invoke.MethodHandle; +import java.util.List; +import java.util.Objects; + +/** + * Indicates that a type should be instantiated by calling the given {@link java.lang.invoke.MethodHandle}. + *

+ * Design note: the intent is that the semantics are fully specified by this record, + * and no additional reflection logic is required to determine how the object should be injected. + * Roughly speaking: all the reflection should be finished, and the results should be stored in this object. + */ +public record MethodHandleSpec(Class requestedType, MethodHandle methodHandle, List parameters) implements InjectionSpec { + public MethodHandleSpec { + assert Objects.equals(methodHandle.type().parameterList(), parameters.stream().map(ParameterSpec::formalType).toList()) + : "MethodHandle parameter types must match the supplied parameter info; " + + methodHandle.type().parameterList() + + " vs " + + parameters; + } +} diff --git a/server/src/main/java/org/elasticsearch/injection/spec/ParameterSpec.java b/server/src/main/java/org/elasticsearch/injection/spec/ParameterSpec.java new file mode 100644 index 0000000000000..da15bd024fbf4 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/spec/ParameterSpec.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection.spec; + +import java.lang.reflect.Parameter; + +/** + * Captures the pertinent info required to inject one of the arguments of a constructor. + * @param name is for troubleshooting; it's not strictly needed + * @param formalType is the declared class of the parameter + * @param injectableType is the target type of the injection dependency + */ +public record ParameterSpec(String name, Class formalType, Class injectableType) { + public static ParameterSpec from(Parameter parameter) { + // We currently have no cases where the formal and injectable types are different. + return new ParameterSpec(parameter.getName(), parameter.getType(), parameter.getType()); + } +} diff --git a/server/src/main/java/org/elasticsearch/injection/spec/package-info.java b/server/src/main/java/org/elasticsearch/injection/spec/package-info.java new file mode 100644 index 0000000000000..26cb1e8ff8543 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/spec/package-info.java @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +/** + * Objects that describe the means by which an object instance is created for (or associated with) some given type. + *

+ * The hierarchy is rooted at {@link org.elasticsearch.injection.spec.InjectionSpec}. + *

+ * Differs from {@link org.elasticsearch.injection.step.InjectionStep InjectionStep} in that: + * + *

    + *
  • + * this describes the requirements, while InjectionStep describes the solution + *
  • + *
  • + * this is declarative, while InjectionStep is imperative + *
  • + *
+ */ +package org.elasticsearch.injection.spec; diff --git a/server/src/main/java/org/elasticsearch/injection/step/InjectionStep.java b/server/src/main/java/org/elasticsearch/injection/step/InjectionStep.java new file mode 100644 index 0000000000000..6e27f45b4f4df --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/step/InjectionStep.java @@ -0,0 +1,11 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection.step; + +public sealed interface InjectionStep permits InstantiateStep {} diff --git a/server/src/main/java/org/elasticsearch/injection/step/InstantiateStep.java b/server/src/main/java/org/elasticsearch/injection/step/InstantiateStep.java new file mode 100644 index 0000000000000..2342978dcfdb0 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/step/InstantiateStep.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection.step; + +import org.elasticsearch.injection.spec.MethodHandleSpec; + +/** + * Constructs a new object by invoking a {@link java.lang.invoke.MethodHandle} + * as specified by a given {@link MethodHandleSpec}. + */ +public record InstantiateStep(MethodHandleSpec spec) implements InjectionStep {} diff --git a/server/src/main/java/org/elasticsearch/injection/step/package-info.java b/server/src/main/java/org/elasticsearch/injection/step/package-info.java new file mode 100644 index 0000000000000..c0a3e05cb53f6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/injection/step/package-info.java @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +/** + * Objects that describe one operation to be performed by the PlanInterpreter. + * Injection is achieved by executing the steps in order. + *

+ * See PlanInterpreter for more details on the execution model. + */ +package org.elasticsearch.injection.step; diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index ec0d293dc0064..eb9ef08b329ab 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -80,6 +80,7 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.Tuple; import org.elasticsearch.discovery.DiscoveryModule; @@ -216,6 +217,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.IdentityHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; @@ -228,6 +230,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static java.lang.invoke.MethodHandles.lookup; +import static java.util.Collections.newSetFromMap; +import static java.util.function.Predicate.not; import static org.elasticsearch.core.Types.forciblyCast; /** @@ -831,27 +836,6 @@ private void construct( metadataCreateIndexService ); - record PluginServiceInstances( - Client client, - ClusterService clusterService, - RerouteService rerouteService, - ThreadPool threadPool, - ResourceWatcherService resourceWatcherService, - ScriptService scriptService, - NamedXContentRegistry xContentRegistry, - Environment environment, - NodeEnvironment nodeEnvironment, - NamedWriteableRegistry namedWriteableRegistry, - IndexNameExpressionResolver indexNameExpressionResolver, - RepositoriesService repositoriesService, - TelemetryProvider telemetryProvider, - AllocationService allocationService, - IndicesService indicesService, - FeatureService featureService, - SystemIndices systemIndices, - DataStreamGlobalRetentionSettings dataStreamGlobalRetentionSettings, - DocumentParsingProvider documentParsingProvider - ) implements Plugin.PluginServices {} PluginServiceInstances pluginServices = new PluginServiceInstances( client, clusterService, @@ -874,7 +858,30 @@ record PluginServiceInstances( documentParsingProvider ); - Collection pluginComponents = pluginsService.flatMap(p -> p.createComponents(pluginServices)).toList(); + Collection pluginComponents = pluginsService.flatMap(plugin -> { + Collection allItems = plugin.createComponents(pluginServices); + List componentObjects = allItems.stream().filter(not(x -> x instanceof Class)).toList(); + List> classes = allItems.stream().filter(x -> x instanceof Class).map(x -> (Class) x).toList(); + + // Then, injection + Collection componentsFromInjector; + if (classes.isEmpty()) { + componentsFromInjector = Set.of(); + } else { + logger.debug("Using injector to instantiate classes for {}: {}", plugin.getClass().getSimpleName(), classes); + var injector = org.elasticsearch.injection.Injector.create(); + injector.addInstances(componentObjects); + addRecordContents(injector, pluginServices); + var resultMap = injector.inject(classes); + // For now, assume we want all components added to the Guice injector + var distinctObjects = newSetFromMap(new IdentityHashMap<>()); + distinctObjects.addAll(resultMap.values()); + componentsFromInjector = distinctObjects; + } + + // Return both + return Stream.of(componentObjects, componentsFromInjector).flatMap(Collection::stream).toList(); + }).toList(); var terminationHandlers = pluginsService.loadServiceProviders(TerminationHandlerProvider.class) .stream() @@ -1175,6 +1182,24 @@ record PluginServiceInstances( postInjection(clusterModule, actionModule, clusterService, transportService, featureService); } + /** + * For each "component" (getter) c of a {@link Record}, + * calls {@link org.elasticsearch.injection.Injector#addInstance(Object) Injector.addInstance} + * to register the value with the component's declared type. + */ + @SuppressForbidden(reason = "Can't call invokeExact because we don't know the exact Record subtype statically") + private static void addRecordContents(org.elasticsearch.injection.Injector injector, Record r) { + for (var c : r.getClass().getRecordComponents()) { + try { + @SuppressWarnings("unchecked") + Class type = (Class) c.getType(); // T represents the declared type of the record component, whatever it is + injector.addInstance(type, type.cast(lookup().unreflect(c.getAccessor()).invoke(r))); + } catch (Throwable e) { + throw new IllegalStateException("Unable to read record component " + c, e); + } + } + } + private ClusterService createClusterService(SettingsModule settingsModule, ThreadPool threadPool, TaskManager taskManager) { ClusterService clusterService = new ClusterService( settingsModule.getSettings(), @@ -1595,4 +1620,5 @@ private Module loadPersistentTasksService( b.bind(PersistentTasksClusterService.class).toInstance(persistentTasksClusterService); }; } + } diff --git a/server/src/main/java/org/elasticsearch/node/PluginServiceInstances.java b/server/src/main/java/org/elasticsearch/node/PluginServiceInstances.java new file mode 100644 index 0000000000000..7c8775502fd64 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/node/PluginServiceInstances.java @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.node; + +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.metadata.DataStreamGlobalRetentionSettings; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.routing.RerouteService; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.NodeEnvironment; +import org.elasticsearch.features.FeatureService; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.indices.SystemIndices; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.internal.DocumentParsingProvider; +import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.script.ScriptService; +import org.elasticsearch.telemetry.TelemetryProvider; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.watcher.ResourceWatcherService; +import org.elasticsearch.xcontent.NamedXContentRegistry; + +public record PluginServiceInstances( + Client client, + ClusterService clusterService, + RerouteService rerouteService, + ThreadPool threadPool, + ResourceWatcherService resourceWatcherService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + Environment environment, + NodeEnvironment nodeEnvironment, + NamedWriteableRegistry namedWriteableRegistry, + IndexNameExpressionResolver indexNameExpressionResolver, + RepositoriesService repositoriesService, + TelemetryProvider telemetryProvider, + AllocationService allocationService, + IndicesService indicesService, + FeatureService featureService, + SystemIndices systemIndices, + DataStreamGlobalRetentionSettings dataStreamGlobalRetentionSettings, + DocumentParsingProvider documentParsingProvider +) implements Plugin.PluginServices {} diff --git a/server/src/test/java/org/elasticsearch/injection/InjectorTests.java b/server/src/test/java/org/elasticsearch/injection/InjectorTests.java new file mode 100644 index 0000000000000..025596e640896 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/injection/InjectorTests.java @@ -0,0 +1,154 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.injection; + +import org.elasticsearch.test.ESTestCase; + +import java.lang.invoke.MethodHandles; +import java.util.List; +import java.util.Set; + +public class InjectorTests extends ESTestCase { + + public record First() {} + + public record Second(First first) {} + + public record Third(First first, Second second) {} + + public record ExistingInstances(First first, Second second) {} + + public void testMultipleResultsMap() { + Injector injector = Injector.create().addClasses(List.of(Service1.class, Component3.class)); + var resultMap = injector.inject(List.of(Service1.class, Component3.class)); + assertEquals(Set.of(Service1.class, Component3.class), resultMap.keySet()); + Service1 service1 = (Service1) resultMap.get(Service1.class); + Component3 component3 = (Component3) resultMap.get(Component3.class); + assertSame(service1, component3.service1()); + } + + /** + * In most cases, if there are two objects that are instances of a class, that's ambiguous. + * However, if a concrete (non-abstract) superclass is configured directly, that is not ambiguous: + * the instance of that superclass takes precedence over any instances of any subclasses. + */ + public void testConcreteSubclass() { + MethodHandles.lookup(); + assertEquals( + Superclass.class, + Injector.create() + .addClasses(List.of(Superclass.class, Subclass.class)) // Superclass first + .inject(List.of(Superclass.class)) + .get(Superclass.class) + .getClass() + ); + MethodHandles.lookup(); + assertEquals( + Superclass.class, + Injector.create() + .addClasses(List.of(Subclass.class, Superclass.class)) // Subclass first + .inject(List.of(Superclass.class)) + .get(Superclass.class) + .getClass() + ); + MethodHandles.lookup(); + assertEquals( + Superclass.class, + Injector.create() + .addClasses(List.of(Subclass.class)) + .inject(List.of(Superclass.class)) // Superclass is not mentioned until here + .get(Superclass.class) + .getClass() + ); + } + + // + // Sad paths + // + + public void testBadInterfaceClass() { + assertThrows(IllegalStateException.class, () -> { + MethodHandles.lookup(); + Injector.create().addClass(Listener.class).inject(List.of()); + }); + } + + public void testBadUnknownType() { + // Injector knows only about Component4, discovers Listener, but can't find any subtypes + MethodHandles.lookup(); + Injector injector = Injector.create().addClass(Component4.class); + + assertThrows(IllegalStateException.class, () -> injector.inject(List.of())); + } + + public void testBadCircularDependency() { + assertThrows(IllegalStateException.class, () -> { + MethodHandles.lookup(); + Injector injector = Injector.create(); + injector.addClasses(List.of(Circular1.class, Circular2.class)).inject(List.of()); + }); + } + + /** + * For this one, we don't explicitly tell the injector about the classes involved in the cycle; + * it finds them on its own. + */ + public void testBadCircularDependencyViaParameter() { + record UsesCircular1(Circular1 circular1) {} + assertThrows(IllegalStateException.class, () -> { + MethodHandles.lookup(); + Injector.create().addClass(UsesCircular1.class).inject(List.of()); + }); + } + + public void testBadCircularDependencyViaSupertype() { + interface Service1 {} + record Service2(Service1 service1) {} + record Service3(Service2 service2) implements Service1 {} + assertThrows(IllegalStateException.class, () -> { + MethodHandles.lookup(); + Injector injector = Injector.create(); + injector.addClasses(List.of(Service2.class, Service3.class)).inject(List.of()); + }); + } + + // Common injectable things + + public record Service1() {} + + public interface Listener {} + + public record Component1() implements Listener {} + + public record Component2(Component1 component1) {} + + public record Component3(Service1 service1) {} + + public record Component4(Listener listener) {} + + public record GoodService(List components) {} + + public record BadService(List components) { + public BadService { + // Shouldn't be using the component list here! + assert components.isEmpty() == false; + } + } + + public record MultiService(List component1s, List component2s) {} + + public record Circular1(Circular2 service2) {} + + public record Circular2(Circular1 service2) {} + + public static class Superclass {} + + public static class Subclass extends Superclass {} + +} diff --git a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/Downsample.java b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/Downsample.java index a6ba4346b1a25..7dcda9c2b0032 100644 --- a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/Downsample.java +++ b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/Downsample.java @@ -137,6 +137,6 @@ public List getNamedWriteables() { @Override public Collection createComponents(PluginServices services) { - return List.of(new DownsampleMetrics(services.telemetryProvider().getMeterRegistry())); + return List.of(DownsampleMetrics.class); } } diff --git a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleMetrics.java b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleMetrics.java index c950658b411ed..b5ac4b0ae37a3 100644 --- a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleMetrics.java +++ b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DownsampleMetrics.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.downsample; import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.telemetry.TelemetryProvider; import org.elasticsearch.telemetry.metric.MeterRegistry; import java.io.IOException; @@ -36,8 +37,8 @@ public class DownsampleMetrics extends AbstractLifecycleComponent { private final MeterRegistry meterRegistry; - public DownsampleMetrics(MeterRegistry meterRegistry) { - this.meterRegistry = meterRegistry; + public DownsampleMetrics(TelemetryProvider telemetryProvider) { + this.meterRegistry = telemetryProvider.getMeterRegistry(); } @Override From 3dc21f99b6ceb0a4c233c3723a5a81ec40c0a9d6 Mon Sep 17 00:00:00 2001 From: Henning Andersen <33268011+henningandersen@users.noreply.github.com> Date: Wed, 28 Aug 2024 17:20:54 +0200 Subject: [PATCH 007/144] Avoid cache io field volatile accesses (#112184) The blob cache has an io field per region that is declared volatile, since it is originally null and then later initialized. However, during "tryRead" we do not need the volatile access. This commit changes the field to be non-volatile and use proper volatile accesses only when needed. --- .../shared/SharedBlobCacheService.java | 159 +++++++++++------- .../shared/SharedBlobCacheServiceTests.java | 9 +- 2 files changed, 105 insertions(+), 63 deletions(-) diff --git a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java index 6a55738b864d1..3dfece0a9b20e 100644 --- a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java +++ b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java @@ -311,9 +311,9 @@ private CacheEntry(T chunk) { private final int numRegions; private final ConcurrentLinkedQueue freeRegions = new ConcurrentLinkedQueue<>(); - private final Cache cache; + private final Cache> cache; - private final ConcurrentHashMap regionOwners; // to assert exclusive access of regions + private final ConcurrentHashMap> regionOwners; // to assert exclusive access of regions private final LongAdder writeCount = new LongAdder(); private final LongAdder writeBytes = new LongAdder(); @@ -471,7 +471,7 @@ public int getRegionSize() { return regionSize; } - CacheFileRegion get(KeyType cacheKey, long fileLength, int region) { + CacheFileRegion get(KeyType cacheKey, long fileLength, int region) { return cache.get(cacheKey, fileLength, region).chunk; } @@ -516,7 +516,7 @@ public boolean maybeFetchFullEntry( return true; } final ActionListener regionListener = refCountingListener.acquire(ignored -> {}); - final CacheFileRegion entry; + final CacheFileRegion entry; try { entry = get(cacheKey, length, region); } catch (AlreadyClosedException e) { @@ -583,7 +583,7 @@ public void maybeFetchRegion( listener.onResponse(false); return; } - final CacheFileRegion entry = get(cacheKey, blobLength, region); + final CacheFileRegion entry = get(cacheKey, blobLength, region); entry.populate(regionRange, writer, fetchExecutor, listener); } catch (Exception e) { listener.onFailure(e); @@ -631,7 +631,7 @@ public void maybeFetchRange( listener.onResponse(false); return; } - final CacheFileRegion entry = get(cacheKey, blobLength, region); + final CacheFileRegion entry = get(cacheKey, blobLength, region); entry.populate( regionRange, writerWithOffset(writer, Math.toIntExact(range.start() - getRegionStart(region))), @@ -705,7 +705,7 @@ public int forceEvict(Predicate cacheKeyPredicate) { } // used by tests - int getFreq(CacheFileRegion cacheFileRegion) { + int getFreq(CacheFileRegion cacheFileRegion) { if (cache instanceof LFUCache lfuCache) { return lfuCache.getFreq(cacheFileRegion); } @@ -787,25 +787,45 @@ protected boolean assertOffsetsWithinFileLength(long offset, long length, long f /** * While this class has incRef and tryIncRef methods, incRefEnsureOpen and tryIncrefEnsureOpen should * always be used, ensuring the right ordering between incRef/tryIncRef and ensureOpen - * (see {@link LFUCache#maybeEvictAndTakeForFrequency(Runnable, int)}) + * (see {@link SharedBlobCacheService.LFUCache#maybeEvictAndTakeForFrequency(Runnable, int)}) */ - class CacheFileRegion extends EvictableRefCounted { + static class CacheFileRegion extends EvictableRefCounted { + + private static final VarHandle VH_IO = findIOVarHandle(); + + private static VarHandle findIOVarHandle() { + try { + return MethodHandles.lookup().in(CacheFileRegion.class).findVarHandle(CacheFileRegion.class, "io", SharedBytes.IO.class); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + final SharedBlobCacheService blobCacheService; final RegionKey regionKey; final SparseFileTracker tracker; // io can be null when not init'ed or after evict/take - volatile SharedBytes.IO io = null; - - CacheFileRegion(RegionKey regionKey, int regionSize) { + // io does not need volatile access on the read path, since it goes from null to a single value (and then possbily back to null). + // "cache.get" never returns a `CacheFileRegion` without checking the value is non-null (with a volatile read, ensuring the value is + // visible in that thread). + // We assume any IndexInput passing among threads is done with proper happens-before semantics (otherwise they'd themselves break). + // In general, assertions should use `nonVolatileIO` (when they can) to access this over `volatileIO` to avoid memory visibility + // side effects + private SharedBytes.IO io = null; + + CacheFileRegion(SharedBlobCacheService blobCacheService, RegionKey regionKey, int regionSize) { + this.blobCacheService = blobCacheService; this.regionKey = regionKey; assert regionSize > 0; // NOTE we use a constant string for description to avoid consume extra heap space tracker = new SparseFileTracker("file", regionSize); } - public long physicalStartOffset() { - var ioRef = io; - return ioRef == null ? -1L : (long) regionKey.region * regionSize; + // only used for logging + private long physicalStartOffset() { + var ioRef = nonVolatileIO(); + return ioRef == null ? -1L : (long) regionKey.region * blobCacheService.regionSize; } public boolean tryIncRefEnsureOpen() { @@ -832,10 +852,10 @@ private void ensureOpenOrDecRef() { // tries to evict this chunk if noone is holding onto its resources anymore // visible for tests. boolean tryEvict() { - assert Thread.holdsLock(SharedBlobCacheService.this) : "must hold lock when evicting"; + assert Thread.holdsLock(blobCacheService) : "must hold lock when evicting"; if (refCount() <= 1 && evict()) { logger.trace("evicted {} with channel offset {}", regionKey, physicalStartOffset()); - evictCount.increment(); + blobCacheService.evictCount.increment(); decRef(); return true; } @@ -843,10 +863,10 @@ boolean tryEvict() { } boolean tryEvictNoDecRef() { - assert Thread.holdsLock(SharedBlobCacheService.this) : "must hold lock when evicting"; + assert Thread.holdsLock(blobCacheService) : "must hold lock when evicting"; if (refCount() <= 1 && evict()) { logger.trace("evicted and take {} with channel offset {}", regionKey, physicalStartOffset()); - evictCount.increment(); + blobCacheService.evictCount.increment(); return true; } @@ -854,10 +874,10 @@ boolean tryEvictNoDecRef() { } public boolean forceEvict() { - assert Thread.holdsLock(SharedBlobCacheService.this) : "must hold lock when evicting"; + assert Thread.holdsLock(blobCacheService) : "must hold lock when evicting"; if (evict()) { logger.trace("force evicted {} with channel offset {}", regionKey, physicalStartOffset()); - evictCount.increment(); + blobCacheService.evictCount.increment(); decRef(); return true; } @@ -868,9 +888,10 @@ public boolean forceEvict() { protected void closeInternal() { // now actually free the region associated with this chunk // we held the "this" lock when this was evicted, hence if io is not filled in, chunk will never be registered. + SharedBytes.IO io = volatileIO(); if (io != null) { - assert regionOwners.remove(io) == this; - freeRegions.add(io); + assert blobCacheService.regionOwners.remove(io) == this; + blobCacheService.freeRegions.add(io); } logger.trace("closed {} with channel offset {}", regionKey, physicalStartOffset()); } @@ -879,14 +900,31 @@ private static void throwAlreadyEvicted() { throwAlreadyClosed("File chunk is evicted"); } + private SharedBytes.IO volatileIO() { + return (SharedBytes.IO) VH_IO.getVolatile(this); + } + + private void volatileIO(SharedBytes.IO io) { + VH_IO.setVolatile(this, io); + } + + private SharedBytes.IO nonVolatileIO() { + return io; + } + + // for use in tests *only* + SharedBytes.IO testOnlyNonVolatileIO() { + return io; + } + /** * Optimistically try to read from the region * @return true if successful, i.e., not evicted and data available, false if evicted */ boolean tryRead(ByteBuffer buf, long offset) throws IOException { - SharedBytes.IO ioRef = this.io; + SharedBytes.IO ioRef = nonVolatileIO(); if (ioRef != null) { - int readBytes = ioRef.read(buf, getRegionRelativePosition(offset)); + int readBytes = ioRef.read(buf, blobCacheService.getRegionRelativePosition(offset)); if (isEvicted()) { buf.position(buf.position() - readBytes); return false; @@ -922,7 +960,7 @@ void populate( rangeToWrite, rangeToWrite, Assertions.ENABLED ? ActionListener.releaseAfter(ActionListener.running(() -> { - assert regionOwners.get(io) == this; + assert blobCacheService.regionOwners.get(nonVolatileIO()) == this; }), refs.acquire()) : refs.acquireListener() ); if (gaps.isEmpty()) { @@ -958,8 +996,8 @@ void populateAndRead( rangeToWrite, rangeToRead, ActionListener.releaseAfter(listener, refs.acquire()).delegateFailureAndWrap((l, success) -> { - var ioRef = io; - assert regionOwners.get(ioRef) == this; + var ioRef = nonVolatileIO(); + assert blobCacheService.regionOwners.get(ioRef) == this; final int start = Math.toIntExact(rangeToRead.start()); final int read = reader.onRangeAvailable(ioRef, start, start, Math.toIntExact(rangeToRead.length())); assert read == rangeToRead.length() @@ -970,7 +1008,7 @@ void populateAndRead( + '-' + rangeToRead.start() + ']'; - readCount.increment(); + blobCacheService.readCount.increment(); l.onResponse(read); }) ); @@ -1016,8 +1054,8 @@ private Runnable fillGapRunnable( ActionListener listener ) { return () -> ActionListener.run(listener, l -> { - var ioRef = io; - assert regionOwners.get(ioRef) == CacheFileRegion.this; + var ioRef = nonVolatileIO(); + assert blobCacheService.regionOwners.get(ioRef) == CacheFileRegion.this; assert CacheFileRegion.this.hasReferences() : CacheFileRegion.this; int start = Math.toIntExact(gap.start()); writer.fillCacheRange( @@ -1028,9 +1066,9 @@ private Runnable fillGapRunnable( Math.toIntExact(gap.end() - start), progress -> gap.onProgress(start + progress), l.map(unused -> { - assert regionOwners.get(ioRef) == CacheFileRegion.this; + assert blobCacheService.regionOwners.get(ioRef) == CacheFileRegion.this; assert CacheFileRegion.this.hasReferences() : CacheFileRegion.this; - writeCount.increment(); + blobCacheService.writeCount.increment(); gap.onCompletion(); return null; }).delegateResponse((delegate, e) -> failGapAndListener(gap, delegate, e)) @@ -1058,7 +1096,7 @@ public class CacheFile { private final KeyType cacheKey; private final long length; - private CacheEntry lastAccessedRegion; + private CacheEntry> lastAccessedRegion; private CacheFile(KeyType cacheKey, long length) { this.cacheKey = cacheKey; @@ -1161,7 +1199,7 @@ private int readSingleRegion( int region ) throws InterruptedException, ExecutionException { final PlainActionFuture readFuture = new PlainActionFuture<>(); - final CacheFileRegion fileRegion = get(cacheKey, length, region); + final CacheFileRegion fileRegion = get(cacheKey, length, region); final long regionStart = getRegionStart(region); fileRegion.populateAndRead( mapSubRangeToRegion(rangeToWrite, region), @@ -1193,7 +1231,7 @@ private int readMultiRegions( } ActionListener listener = listeners.acquire(i -> bytesRead.updateAndGet(j -> Math.addExact(i, j))); try { - final CacheFileRegion fileRegion = get(cacheKey, length, region); + final CacheFileRegion fileRegion = get(cacheKey, length, region); final long regionStart = getRegionStart(region); fileRegion.populateAndRead( mapSubRangeToRegion(rangeToWrite, region), @@ -1213,7 +1251,7 @@ private int readMultiRegions( return bytesRead.get(); } - private RangeMissingHandler writerWithOffset(RangeMissingHandler writer, CacheFileRegion fileRegion, int writeOffset) { + private RangeMissingHandler writerWithOffset(RangeMissingHandler writer, CacheFileRegion fileRegion, int writeOffset) { final RangeMissingHandler adjustedWriter; if (writeOffset == 0) { // no need to allocate a new capturing lambda if the offset isn't adjusted @@ -1263,8 +1301,8 @@ public void fillCacheRange( len, progressUpdater, Assertions.ENABLED ? ActionListener.runBefore(completionListener, () -> { - assert regionOwners.get(fileRegion.io) == fileRegion - : "File chunk [" + fileRegion.regionKey + "] no longer owns IO [" + fileRegion.io + "]"; + assert regionOwners.get(fileRegion.nonVolatileIO()) == fileRegion + : "File chunk [" + fileRegion.regionKey + "] no longer owns IO [" + fileRegion.nonVolatileIO() + "]"; }) : completionListener ); } @@ -1274,7 +1312,7 @@ public void fillCacheRange( return adjustedWriter; } - private RangeAvailableHandler readerWithOffset(RangeAvailableHandler reader, CacheFileRegion fileRegion, int readOffset) { + private RangeAvailableHandler readerWithOffset(RangeAvailableHandler reader, CacheFileRegion fileRegion, int readOffset) { final RangeAvailableHandler adjustedReader = (channel, channelPos, relativePos, len) -> reader.onRangeAvailable( channel, channelPos, @@ -1285,18 +1323,18 @@ private RangeAvailableHandler readerWithOffset(RangeAvailableHandler reader, Cac return (channel, channelPos, relativePos, len) -> { assert assertValidRegionAndLength(fileRegion, channelPos, len); final int bytesRead = adjustedReader.onRangeAvailable(channel, channelPos, relativePos, len); - assert regionOwners.get(fileRegion.io) == fileRegion - : "File chunk [" + fileRegion.regionKey + "] no longer owns IO [" + fileRegion.io + "]"; + assert regionOwners.get(fileRegion.nonVolatileIO()) == fileRegion + : "File chunk [" + fileRegion.regionKey + "] no longer owns IO [" + fileRegion.nonVolatileIO() + "]"; return bytesRead; }; } return adjustedReader; } - private boolean assertValidRegionAndLength(CacheFileRegion fileRegion, int channelPos, int len) { - assert fileRegion.io != null; + private boolean assertValidRegionAndLength(CacheFileRegion fileRegion, int channelPos, int len) { + assert fileRegion.nonVolatileIO() != null; assert fileRegion.hasReferences(); - assert regionOwners.get(fileRegion.io) == fileRegion; + assert regionOwners.get(fileRegion.nonVolatileIO()) == fileRegion; assert channelPos >= 0 && channelPos + len <= regionSize; return true; } @@ -1421,15 +1459,15 @@ public record Stats( public static final Stats EMPTY = new Stats(0, 0L, 0L, 0L, 0L, 0L, 0L, 0L); } - private class LFUCache implements Cache { + private class LFUCache implements Cache> { - class LFUCacheEntry extends CacheEntry { + class LFUCacheEntry extends CacheEntry> { LFUCacheEntry prev; LFUCacheEntry next; int freq; volatile long lastAccessedEpoch; - LFUCacheEntry(CacheFileRegion chunk, long lastAccessed) { + LFUCacheEntry(CacheFileRegion chunk, long lastAccessed) { super(chunk); this.lastAccessedEpoch = lastAccessed; // todo: consider whether freq=1 is still right for new entries. @@ -1467,7 +1505,7 @@ public void close() { decayAndNewEpochTask.close(); } - int getFreq(CacheFileRegion cacheFileRegion) { + int getFreq(CacheFileRegion cacheFileRegion) { return keyMapping.get(cacheFileRegion.regionKey).freq; } @@ -1480,12 +1518,15 @@ public LFUCacheEntry get(KeyType cacheKey, long fileLength, int region) { var entry = keyMapping.get(regionKey); if (entry == null) { final int effectiveRegionSize = computeCacheFileRegionSize(fileLength, region); - entry = keyMapping.computeIfAbsent(regionKey, key -> new LFUCacheEntry(new CacheFileRegion(key, effectiveRegionSize), now)); + entry = keyMapping.computeIfAbsent( + regionKey, + key -> new LFUCacheEntry(new CacheFileRegion(SharedBlobCacheService.this, key, effectiveRegionSize), now) + ); } - // io is volatile, double locking is fine, as long as we assign it last. - if (entry.chunk.io == null) { + // checks using volatile, double locking is fine, as long as we assign io last. + if (entry.chunk.volatileIO() == null) { synchronized (entry.chunk) { - if (entry.chunk.io == null && entry.chunk.isEvicted() == false) { + if (entry.chunk.volatileIO() == null && entry.chunk.isEvicted() == false) { return initChunk(entry); } } @@ -1515,7 +1556,7 @@ public int forceEvict(Predicate cacheKeyPredicate) { for (LFUCacheEntry entry : matchingEntries) { int frequency = entry.freq; boolean evicted = entry.chunk.forceEvict(); - if (evicted && entry.chunk.io != null) { + if (evicted && entry.chunk.volatileIO() != null) { unlink(entry); keyMapping.remove(entry.chunk.regionKey, entry); evictedCount++; @@ -1576,7 +1617,7 @@ private void assignToSlot(LFUCacheEntry entry, SharedBytes.IO freeSlot) { } pushEntryToBack(entry); // assign io only when chunk is ready for use. Under lock to avoid concurrent tryEvict. - entry.chunk.io = freeSlot; + entry.chunk.volatileIO(freeSlot); } } @@ -1641,7 +1682,7 @@ private boolean assertChunkActiveOrEvicted(LFUCacheEntry entry) { assert entry.prev != null || entry.chunk.isEvicted(); } - SharedBytes.IO io = entry.chunk.io; + SharedBytes.IO io = entry.chunk.nonVolatileIO(); assert io != null || entry.chunk.isEvicted(); assert io == null || regionOwners.get(io) == entry.chunk || entry.chunk.isEvicted(); return true; @@ -1764,13 +1805,13 @@ private SharedBytes.IO maybeEvictAndTakeForFrequency(Runnable evictedNotificatio boolean evicted = entry.chunk.tryEvictNoDecRef(); if (evicted) { try { - SharedBytes.IO ioRef = entry.chunk.io; + SharedBytes.IO ioRef = entry.chunk.volatileIO(); if (ioRef != null) { try { if (entry.chunk.refCount() == 1) { // we own that one refcount (since we CAS'ed evicted to 1) // grab io, rely on incref'ers also checking evicted field. - entry.chunk.io = null; + entry.chunk.volatileIO(null); assert regionOwners.remove(ioRef) == entry.chunk; return ioRef; } @@ -1809,7 +1850,7 @@ public boolean maybeEvictLeastUsed() { synchronized (SharedBlobCacheService.this) { for (LFUCacheEntry entry = freqs[0]; entry != null; entry = entry.next) { boolean evicted = entry.chunk.tryEvict(); - if (evicted && entry.chunk.io != null) { + if (evicted && entry.chunk.volatileIO() != null) { unlink(entry); keyMapping.remove(entry.chunk.regionKey, entry); return true; diff --git a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java index 597180a1d1c31..d7e8ad19382e5 100644 --- a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java +++ b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java @@ -149,7 +149,7 @@ public void testBasicEviction() throws IOException { } } - private static boolean tryEvict(SharedBlobCacheService.CacheFileRegion region1) { + private static boolean tryEvict(SharedBlobCacheService.CacheFileRegion region1) { if (randomBoolean()) { return region1.tryEvict(); } else { @@ -486,7 +486,7 @@ public void testGetMultiThreaded() throws IOException { ready.await(); for (int i = 0; i < iterations; ++i) { try { - SharedBlobCacheService.CacheFileRegion cacheFileRegion; + SharedBlobCacheService.CacheFileRegion cacheFileRegion; try { cacheFileRegion = cacheService.get(cacheKeys[i], fileLength, regions[i]); } catch (AlreadyClosedException e) { @@ -497,6 +497,7 @@ public void testGetMultiThreaded() throws IOException { if (yield[i] == 0) { Thread.yield(); } + assertNotNull(cacheFileRegion.testOnlyNonVolatileIO()); cacheFileRegion.decRef(); } if (evict[i] == 0) { @@ -865,7 +866,7 @@ public void testMaybeEvictLeastUsed() throws Exception { final DeterministicTaskQueue taskQueue = new DeterministicTaskQueue(); try ( NodeEnvironment environment = new NodeEnvironment(settings, TestEnvironment.newEnvironment(settings)); - var cacheService = new SharedBlobCacheService<>( + var cacheService = new SharedBlobCacheService( environment, settings, taskQueue.getThreadPool(), @@ -873,7 +874,7 @@ public void testMaybeEvictLeastUsed() throws Exception { BlobCacheMetrics.NOOP ) ) { - final Map.CacheFileRegion> cacheEntries = new HashMap<>(); + final Map> cacheEntries = new HashMap<>(); assertThat("All regions are free", cacheService.freeRegionCount(), equalTo(numRegions)); assertThat("Cache has no entries", cacheService.maybeEvictLeastUsed(), is(false)); From b20b8012d5437e51f966a4ac5abc05a2710367ba Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 29 Aug 2024 01:32:17 +1000 Subject: [PATCH 008/144] Mute org.elasticsearch.search.query.ScriptScoreQueryTests testScriptTermStatsNotAvailable #112290 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 26bded1d09dc8..6498413e33cf5 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -166,6 +166,9 @@ tests: - class: org.elasticsearch.search.query.ScriptScoreQueryTests method: testScriptTermStatsAvailable issue: https://github.com/elastic/elasticsearch/issues/112278 +- class: org.elasticsearch.search.query.ScriptScoreQueryTests + method: testScriptTermStatsNotAvailable + issue: https://github.com/elastic/elasticsearch/issues/112290 # Examples: # From d832e6ee0cda82ecffde9fc28c9dd90c629e29e1 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 28 Aug 2024 17:36:29 +0100 Subject: [PATCH 009/144] Extract `BlobStoreCorruptionUtils` (#112229) Makes these utility methods available to other test suites (to be added in future PRs). Relates #111954 --- .../blobstore/BlobStoreCorruptionIT.java | 68 +------------- .../blobstore/BlobStoreCorruptionUtils.java | 89 +++++++++++++++++++ 2 files changed, 90 insertions(+), 67 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionUtils.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionIT.java index 422696d6b61c6..4665dc486a904 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionIT.java @@ -8,7 +8,6 @@ package org.elasticsearch.repositories.blobstore; -import org.apache.lucene.tests.mockfile.ExtrasFS; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; @@ -23,18 +22,10 @@ import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.snapshots.SnapshotState; -import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; import org.junit.Before; -import java.io.IOException; -import java.nio.file.FileVisitResult; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.SimpleFileVisitor; -import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; -import java.util.Base64; import java.util.List; public class BlobStoreCorruptionIT extends AbstractSnapshotIntegTestCase { @@ -57,7 +48,7 @@ public void testCorruptionDetection() throws Exception { flushAndRefresh(indexName); createSnapshot(repositoryName, snapshotName, List.of(indexName)); - final var corruptedFile = corruptRandomFile(repositoryRootPath); + final var corruptedFile = BlobStoreCorruptionUtils.corruptRandomFile(repositoryRootPath); final var corruptedFileType = RepositoryFileType.getRepositoryFileType(repositoryRootPath, corruptedFile); final var corruptionDetectors = new ArrayList, ?>>(); @@ -126,61 +117,4 @@ public void testCorruptionDetection() throws Exception { logger.info(Strings.format("--> corrupted [%s] and caught exception", corruptedFile), exception); } } - - private static Path corruptRandomFile(Path repositoryRootPath) throws IOException { - final var corruptedFileType = getRandomCorruptibleFileType(); - final var corruptedFile = getRandomFileToCorrupt(repositoryRootPath, corruptedFileType); - if (randomBoolean()) { - logger.info("--> deleting [{}]", corruptedFile); - Files.delete(corruptedFile); - } else { - corruptFileContents(corruptedFile); - } - return corruptedFile; - } - - private static void corruptFileContents(Path fileToCorrupt) throws IOException { - final var oldFileContents = Files.readAllBytes(fileToCorrupt); - logger.info("--> contents of [{}] before corruption: [{}]", fileToCorrupt, Base64.getEncoder().encodeToString(oldFileContents)); - final byte[] newFileContents = new byte[randomBoolean() ? oldFileContents.length : between(0, oldFileContents.length)]; - System.arraycopy(oldFileContents, 0, newFileContents, 0, newFileContents.length); - if (newFileContents.length == oldFileContents.length) { - final var corruptionPosition = between(0, newFileContents.length - 1); - newFileContents[corruptionPosition] = randomValueOtherThan(oldFileContents[corruptionPosition], ESTestCase::randomByte); - logger.info( - "--> updating byte at position [{}] from [{}] to [{}]", - corruptionPosition, - oldFileContents[corruptionPosition], - newFileContents[corruptionPosition] - ); - } else { - logger.info("--> truncating file from length [{}] to length [{}]", oldFileContents.length, newFileContents.length); - } - Files.write(fileToCorrupt, newFileContents); - logger.info("--> contents of [{}] after corruption: [{}]", fileToCorrupt, Base64.getEncoder().encodeToString(newFileContents)); - } - - private static RepositoryFileType getRandomCorruptibleFileType() { - return randomValueOtherThanMany( - // these blob types do not have reliable corruption detection, so we must skip them - t -> t == RepositoryFileType.ROOT_INDEX_N || t == RepositoryFileType.ROOT_INDEX_LATEST, - () -> randomFrom(RepositoryFileType.values()) - ); - } - - private static Path getRandomFileToCorrupt(Path repositoryRootPath, RepositoryFileType corruptedFileType) throws IOException { - final var corruptibleFiles = new ArrayList(); - Files.walkFileTree(repositoryRootPath, new SimpleFileVisitor<>() { - @Override - public FileVisitResult visitFile(Path filePath, BasicFileAttributes attrs) throws IOException { - if (ExtrasFS.isExtra(filePath.getFileName().toString()) == false - && RepositoryFileType.getRepositoryFileType(repositoryRootPath, filePath) == corruptedFileType) { - corruptibleFiles.add(filePath); - } - return super.visitFile(filePath, attrs); - } - }); - return randomFrom(corruptibleFiles); - } - } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionUtils.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionUtils.java new file mode 100644 index 0000000000000..3670013f571e0 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionUtils.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.repositories.blobstore; + +import org.apache.lucene.tests.mockfile.ExtrasFS; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.ArrayList; +import java.util.Base64; + +import static org.elasticsearch.test.ESTestCase.between; +import static org.elasticsearch.test.ESTestCase.randomBoolean; +import static org.elasticsearch.test.ESTestCase.randomFrom; +import static org.elasticsearch.test.ESTestCase.randomValueOtherThan; +import static org.elasticsearch.test.ESTestCase.randomValueOtherThanMany; + +public class BlobStoreCorruptionUtils { + private static final Logger logger = LogManager.getLogger(BlobStoreCorruptionUtils.class); + + public static Path corruptRandomFile(Path repositoryRootPath) throws IOException { + final var corruptedFileType = getRandomCorruptibleFileType(); + final var corruptedFile = getRandomFileToCorrupt(repositoryRootPath, corruptedFileType); + if (randomBoolean()) { + logger.info("--> deleting [{}]", corruptedFile); + Files.delete(corruptedFile); + } else { + corruptFileContents(corruptedFile); + } + return corruptedFile; + } + + public static void corruptFileContents(Path fileToCorrupt) throws IOException { + final var oldFileContents = Files.readAllBytes(fileToCorrupt); + logger.info("--> contents of [{}] before corruption: [{}]", fileToCorrupt, Base64.getEncoder().encodeToString(oldFileContents)); + final byte[] newFileContents = new byte[randomBoolean() ? oldFileContents.length : between(0, oldFileContents.length)]; + System.arraycopy(oldFileContents, 0, newFileContents, 0, newFileContents.length); + if (newFileContents.length == oldFileContents.length) { + final var corruptionPosition = between(0, newFileContents.length - 1); + newFileContents[corruptionPosition] = randomValueOtherThan(oldFileContents[corruptionPosition], ESTestCase::randomByte); + logger.info( + "--> updating byte at position [{}] from [{}] to [{}]", + corruptionPosition, + oldFileContents[corruptionPosition], + newFileContents[corruptionPosition] + ); + } else { + logger.info("--> truncating file from length [{}] to length [{}]", oldFileContents.length, newFileContents.length); + } + Files.write(fileToCorrupt, newFileContents); + logger.info("--> contents of [{}] after corruption: [{}]", fileToCorrupt, Base64.getEncoder().encodeToString(newFileContents)); + } + + public static RepositoryFileType getRandomCorruptibleFileType() { + return randomValueOtherThanMany( + // these blob types do not have reliable corruption detection, so we must skip them + t -> t == RepositoryFileType.ROOT_INDEX_N || t == RepositoryFileType.ROOT_INDEX_LATEST, + () -> randomFrom(RepositoryFileType.values()) + ); + } + + public static Path getRandomFileToCorrupt(Path repositoryRootPath, RepositoryFileType corruptedFileType) throws IOException { + final var corruptibleFiles = new ArrayList(); + Files.walkFileTree(repositoryRootPath, new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path filePath, BasicFileAttributes attrs) throws IOException { + if (ExtrasFS.isExtra(filePath.getFileName().toString()) == false + && RepositoryFileType.getRepositoryFileType(repositoryRootPath, filePath) == corruptedFileType) { + corruptibleFiles.add(filePath); + } + return super.visitFile(filePath, attrs); + } + }); + return randomFrom(corruptibleFiles); + } +} From 32374dbfe046cbf7283af6b02b333b45fe3dfc90 Mon Sep 17 00:00:00 2001 From: Ankita Kumar Date: Wed, 28 Aug 2024 13:05:58 -0400 Subject: [PATCH 010/144] Metrics for Reindexing (#111845) This PR adds metrics for the Reindexing plugin, to measure the end-to-end time taken by a reindex request, update-by-query request and delete-by-query request. --- .../index/reindex/ReindexPluginMetricsIT.java | 216 ++++++++++++++++++ .../reindex/DeleteByQueryMetrics.java | 33 +++ .../elasticsearch/reindex/ReindexMetrics.java | 32 +++ .../elasticsearch/reindex/ReindexPlugin.java | 8 +- .../org/elasticsearch/reindex/Reindexer.java | 16 +- .../reindex/TransportDeleteByQueryAction.java | 25 +- .../reindex/TransportReindexAction.java | 12 +- .../reindex/TransportUpdateByQueryAction.java | 25 +- .../reindex/UpdateByQueryMetrics.java | 33 +++ .../reindex/DeleteByQueryMetricsTests.java | 39 ++++ .../reindex/ReindexMetricsTests.java | 40 ++++ .../reindex/UpdateByQueryMetricsTests.java | 40 ++++ .../reindex/UpdateByQueryWithScriptTests.java | 1 + .../action/TransportEnrichReindexAction.java | 3 +- 14 files changed, 507 insertions(+), 16 deletions(-) create mode 100644 modules/reindex/src/internalClusterTest/java/org/elasticsearch/index/reindex/ReindexPluginMetricsIT.java create mode 100644 modules/reindex/src/main/java/org/elasticsearch/reindex/DeleteByQueryMetrics.java create mode 100644 modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexMetrics.java create mode 100644 modules/reindex/src/main/java/org/elasticsearch/reindex/UpdateByQueryMetrics.java create mode 100644 modules/reindex/src/test/java/org/elasticsearch/reindex/DeleteByQueryMetricsTests.java create mode 100644 modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexMetricsTests.java create mode 100644 modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryMetricsTests.java diff --git a/modules/reindex/src/internalClusterTest/java/org/elasticsearch/index/reindex/ReindexPluginMetricsIT.java b/modules/reindex/src/internalClusterTest/java/org/elasticsearch/index/reindex/ReindexPluginMetricsIT.java new file mode 100644 index 0000000000000..e7d26b0808a48 --- /dev/null +++ b/modules/reindex/src/internalClusterTest/java/org/elasticsearch/index/reindex/ReindexPluginMetricsIT.java @@ -0,0 +1,216 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.reindex; + +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.PluginsService; +import org.elasticsearch.reindex.BulkIndexByScrollResponseMatcher; +import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.TestTelemetryPlugin; +import org.elasticsearch.test.ESIntegTestCase; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.elasticsearch.index.query.QueryBuilders.termQuery; +import static org.elasticsearch.reindex.DeleteByQueryMetrics.DELETE_BY_QUERY_TIME_HISTOGRAM; +import static org.elasticsearch.reindex.ReindexMetrics.REINDEX_TIME_HISTOGRAM; +import static org.elasticsearch.reindex.UpdateByQueryMetrics.UPDATE_BY_QUERY_TIME_HISTOGRAM; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.hamcrest.Matchers.equalTo; + +@ESIntegTestCase.ClusterScope(numDataNodes = 0, numClientNodes = 0, scope = ESIntegTestCase.Scope.TEST) +public class ReindexPluginMetricsIT extends ESIntegTestCase { + @Override + protected Collection> nodePlugins() { + return Arrays.asList(ReindexPlugin.class, TestTelemetryPlugin.class); + } + + protected ReindexRequestBuilder reindex() { + return new ReindexRequestBuilder(client()); + } + + protected UpdateByQueryRequestBuilder updateByQuery() { + return new UpdateByQueryRequestBuilder(client()); + } + + protected DeleteByQueryRequestBuilder deleteByQuery() { + return new DeleteByQueryRequestBuilder(client()); + } + + public static BulkIndexByScrollResponseMatcher matcher() { + return new BulkIndexByScrollResponseMatcher(); + } + + public void testReindexMetrics() throws Exception { + final String dataNodeName = internalCluster().startNode(); + + indexRandom( + true, + prepareIndex("source").setId("1").setSource("foo", "a"), + prepareIndex("source").setId("2").setSource("foo", "a"), + prepareIndex("source").setId("3").setSource("foo", "b"), + prepareIndex("source").setId("4").setSource("foo", "c") + ); + assertHitCount(prepareSearch("source").setSize(0), 4); + + final TestTelemetryPlugin testTelemetryPlugin = internalCluster().getInstance(PluginsService.class, dataNodeName) + .filterPlugins(TestTelemetryPlugin.class) + .findFirst() + .orElseThrow(); + + // Copy all the docs + reindex().source("source").destination("dest").get(); + // Use assertBusy to wait for all threads to complete so we get deterministic results + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(REINDEX_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(1)); + }); + + // Now none of them + createIndex("none"); + reindex().source("source").destination("none").filter(termQuery("foo", "no_match")).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(REINDEX_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(2)); + }); + + // Now half of them + reindex().source("source").destination("dest_half").filter(termQuery("foo", "a")).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(REINDEX_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(3)); + }); + + // Limit with maxDocs + reindex().source("source").destination("dest_size_one").maxDocs(1).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(REINDEX_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(4)); + }); + } + + public void testDeleteByQueryMetrics() throws Exception { + final String dataNodeName = internalCluster().startNode(); + + indexRandom( + true, + prepareIndex("test").setId("1").setSource("foo", "a"), + prepareIndex("test").setId("2").setSource("foo", "a"), + prepareIndex("test").setId("3").setSource("foo", "b"), + prepareIndex("test").setId("4").setSource("foo", "c"), + prepareIndex("test").setId("5").setSource("foo", "d"), + prepareIndex("test").setId("6").setSource("foo", "e"), + prepareIndex("test").setId("7").setSource("foo", "f") + ); + + assertHitCount(prepareSearch("test").setSize(0), 7); + + final TestTelemetryPlugin testTelemetryPlugin = internalCluster().getInstance(PluginsService.class, dataNodeName) + .filterPlugins(TestTelemetryPlugin.class) + .findFirst() + .orElseThrow(); + + // Deletes two docs that matches "foo:a" + deleteByQuery().source("test").filter(termQuery("foo", "a")).refresh(true).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(DELETE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(1)); + }); + + // Deletes the two first docs with limit by size + DeleteByQueryRequestBuilder request = deleteByQuery().source("test").filter(QueryBuilders.matchAllQuery()).size(2).refresh(true); + request.source().addSort("foo.keyword", SortOrder.ASC); + request.get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(DELETE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(2)); + }); + + // Deletes but match no docs + deleteByQuery().source("test").filter(termQuery("foo", "no_match")).refresh(true).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(DELETE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(3)); + }); + + // Deletes all remaining docs + deleteByQuery().source("test").filter(QueryBuilders.matchAllQuery()).refresh(true).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(DELETE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(4)); + }); + } + + public void testUpdateByQueryMetrics() throws Exception { + final String dataNodeName = internalCluster().startNode(); + + indexRandom( + true, + prepareIndex("test").setId("1").setSource("foo", "a"), + prepareIndex("test").setId("2").setSource("foo", "a"), + prepareIndex("test").setId("3").setSource("foo", "b"), + prepareIndex("test").setId("4").setSource("foo", "c") + ); + assertHitCount(prepareSearch("test").setSize(0), 4); + assertEquals(1, client().prepareGet("test", "1").get().getVersion()); + assertEquals(1, client().prepareGet("test", "4").get().getVersion()); + + final TestTelemetryPlugin testTelemetryPlugin = internalCluster().getInstance(PluginsService.class, dataNodeName) + .filterPlugins(TestTelemetryPlugin.class) + .findFirst() + .orElseThrow(); + + // Reindex all the docs + updateByQuery().source("test").refresh(true).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(UPDATE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(1)); + }); + + // Now none of them + updateByQuery().source("test").filter(termQuery("foo", "no_match")).refresh(true).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(UPDATE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(2)); + }); + + // Now half of them + updateByQuery().source("test").filter(termQuery("foo", "a")).refresh(true).get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(UPDATE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(3)); + }); + + // Limit with size + UpdateByQueryRequestBuilder request = updateByQuery().source("test").size(3).refresh(true); + request.source().addSort("foo.keyword", SortOrder.ASC); + request.get(); + assertBusy(() -> { + testTelemetryPlugin.collect(); + List measurements = testTelemetryPlugin.getLongHistogramMeasurement(UPDATE_BY_QUERY_TIME_HISTOGRAM); + assertThat(measurements.size(), equalTo(4)); + }); + } +} diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/DeleteByQueryMetrics.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/DeleteByQueryMetrics.java new file mode 100644 index 0000000000000..2cedf0d5f5823 --- /dev/null +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/DeleteByQueryMetrics.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.reindex; + +import org.elasticsearch.telemetry.metric.LongHistogram; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +public class DeleteByQueryMetrics { + public static final String DELETE_BY_QUERY_TIME_HISTOGRAM = "es.delete_by_query.duration.histogram"; + + private final LongHistogram deleteByQueryTimeSecsHistogram; + + public DeleteByQueryMetrics(MeterRegistry meterRegistry) { + this( + meterRegistry.registerLongHistogram(DELETE_BY_QUERY_TIME_HISTOGRAM, "Time taken to execute Delete by Query request", "seconds") + ); + } + + private DeleteByQueryMetrics(LongHistogram deleteByQueryTimeSecsHistogram) { + this.deleteByQueryTimeSecsHistogram = deleteByQueryTimeSecsHistogram; + } + + public long recordTookTime(long tookTime) { + deleteByQueryTimeSecsHistogram.record(tookTime); + return tookTime; + } +} diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexMetrics.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexMetrics.java new file mode 100644 index 0000000000000..3025357aa6538 --- /dev/null +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexMetrics.java @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.reindex; + +import org.elasticsearch.telemetry.metric.LongHistogram; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +public class ReindexMetrics { + + public static final String REINDEX_TIME_HISTOGRAM = "es.reindex.duration.histogram"; + + private final LongHistogram reindexTimeSecsHistogram; + + public ReindexMetrics(MeterRegistry meterRegistry) { + this(meterRegistry.registerLongHistogram(REINDEX_TIME_HISTOGRAM, "Time to reindex by search", "millis")); + } + + private ReindexMetrics(LongHistogram reindexTimeSecsHistogram) { + this.reindexTimeSecsHistogram = reindexTimeSecsHistogram; + } + + public long recordTookTime(long tookTime) { + reindexTimeSecsHistogram.record(tookTime); + return tookTime; + } +} diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexPlugin.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexPlugin.java index 1a40f77250e5f..3169d4c4ee1fb 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexPlugin.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexPlugin.java @@ -34,7 +34,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.List; import java.util.function.Predicate; import java.util.function.Supplier; @@ -85,8 +84,11 @@ public List getRestHandlers( @Override public Collection createComponents(PluginServices services) { - return Collections.singletonList( - new ReindexSslConfig(services.environment().settings(), services.environment(), services.resourceWatcherService()) + return List.of( + new ReindexSslConfig(services.environment().settings(), services.environment(), services.resourceWatcherService()), + new ReindexMetrics(services.telemetryProvider().getMeterRegistry()), + new UpdateByQueryMetrics(services.telemetryProvider().getMeterRegistry()), + new DeleteByQueryMetrics(services.telemetryProvider().getMeterRegistry()) ); } diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java index dbe1968bb076a..cb393a42f52a1 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/Reindexer.java @@ -37,6 +37,7 @@ import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.VersionType; @@ -65,6 +66,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiFunction; import java.util.function.LongSupplier; @@ -82,19 +84,22 @@ public class Reindexer { private final ThreadPool threadPool; private final ScriptService scriptService; private final ReindexSslConfig reindexSslConfig; + private final ReindexMetrics reindexMetrics; Reindexer( ClusterService clusterService, Client client, ThreadPool threadPool, ScriptService scriptService, - ReindexSslConfig reindexSslConfig + ReindexSslConfig reindexSslConfig, + @Nullable ReindexMetrics reindexMetrics ) { this.clusterService = clusterService; this.client = client; this.threadPool = threadPool; this.scriptService = scriptService; this.reindexSslConfig = reindexSslConfig; + this.reindexMetrics = reindexMetrics; } public void initTask(BulkByScrollTask task, ReindexRequest request, ActionListener listener) { @@ -102,6 +107,8 @@ public void initTask(BulkByScrollTask task, ReindexRequest request, ActionListen } public void execute(BulkByScrollTask task, ReindexRequest request, Client bulkClient, ActionListener listener) { + long startTime = System.nanoTime(); + BulkByScrollParallelizationHelper.executeSlicedAction( task, request, @@ -122,7 +129,12 @@ public void execute(BulkByScrollTask task, ReindexRequest request, Client bulkCl clusterService.state(), reindexSslConfig, request, - listener + ActionListener.runAfter(listener, () -> { + long elapsedTime = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startTime); + if (reindexMetrics != null) { + reindexMetrics.recordTookTime(elapsedTime); + } + }) ); searchAction.start(); } diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportDeleteByQueryAction.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportDeleteByQueryAction.java index 755587feb47d3..53381c33d7f78 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportDeleteByQueryAction.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportDeleteByQueryAction.java @@ -15,6 +15,7 @@ import org.elasticsearch.client.internal.ParentTaskAssigningClient; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.reindex.BulkByScrollResponse; import org.elasticsearch.index.reindex.BulkByScrollTask; import org.elasticsearch.index.reindex.DeleteByQueryAction; @@ -25,12 +26,15 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; +import java.util.concurrent.TimeUnit; + public class TransportDeleteByQueryAction extends HandledTransportAction { private final ThreadPool threadPool; private final Client client; private final ScriptService scriptService; private final ClusterService clusterService; + private final DeleteByQueryMetrics deleteByQueryMetrics; @Inject public TransportDeleteByQueryAction( @@ -39,18 +43,21 @@ public TransportDeleteByQueryAction( Client client, TransportService transportService, ScriptService scriptService, - ClusterService clusterService + ClusterService clusterService, + @Nullable DeleteByQueryMetrics deleteByQueryMetrics ) { super(DeleteByQueryAction.NAME, transportService, actionFilters, DeleteByQueryRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); this.threadPool = threadPool; this.client = client; this.scriptService = scriptService; this.clusterService = clusterService; + this.deleteByQueryMetrics = deleteByQueryMetrics; } @Override public void doExecute(Task task, DeleteByQueryRequest request, ActionListener listener) { BulkByScrollTask bulkByScrollTask = (BulkByScrollTask) task; + long startTime = System.nanoTime(); BulkByScrollParallelizationHelper.startSlicedAction( request, bulkByScrollTask, @@ -64,8 +71,20 @@ public void doExecute(Task task, DeleteByQueryRequest request, ActionListener { + long elapsedTime = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startTime); + if (deleteByQueryMetrics != null) { + deleteByQueryMetrics.recordTookTime(elapsedTime); + } + }) + ).start(); } ); } diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportReindexAction.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportReindexAction.java index a86af2ca2b83e..821a137ac7566 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportReindexAction.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportReindexAction.java @@ -19,6 +19,7 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.reindex.BulkByScrollResponse; import org.elasticsearch.index.reindex.BulkByScrollTask; import org.elasticsearch.index.reindex.ReindexAction; @@ -53,7 +54,8 @@ public TransportReindexAction( AutoCreateIndex autoCreateIndex, Client client, TransportService transportService, - ReindexSslConfig sslConfig + ReindexSslConfig sslConfig, + @Nullable ReindexMetrics reindexMetrics ) { this( ReindexAction.NAME, @@ -66,7 +68,8 @@ public TransportReindexAction( autoCreateIndex, client, transportService, - sslConfig + sslConfig, + reindexMetrics ); } @@ -81,12 +84,13 @@ protected TransportReindexAction( AutoCreateIndex autoCreateIndex, Client client, TransportService transportService, - ReindexSslConfig sslConfig + ReindexSslConfig sslConfig, + @Nullable ReindexMetrics reindexMetrics ) { super(name, transportService, actionFilters, ReindexRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); this.client = client; this.reindexValidator = new ReindexValidator(settings, clusterService, indexNameExpressionResolver, autoCreateIndex); - this.reindexer = new Reindexer(clusterService, client, threadPool, scriptService, sslConfig); + this.reindexer = new Reindexer(clusterService, client, threadPool, scriptService, sslConfig, reindexMetrics); } @Override diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportUpdateByQueryAction.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportUpdateByQueryAction.java index fc0bfa3c8a214..997d4d32fe042 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportUpdateByQueryAction.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/TransportUpdateByQueryAction.java @@ -18,6 +18,7 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.reindex.BulkByScrollResponse; import org.elasticsearch.index.reindex.BulkByScrollTask; import org.elasticsearch.index.reindex.ScrollableHitSource; @@ -35,6 +36,7 @@ import org.elasticsearch.transport.TransportService; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.function.BiFunction; import java.util.function.LongSupplier; @@ -44,6 +46,7 @@ public class TransportUpdateByQueryAction extends HandledTransportAction listener) { BulkByScrollTask bulkByScrollTask = (BulkByScrollTask) task; + long startTime = System.nanoTime(); BulkByScrollParallelizationHelper.startSlicedAction( request, bulkByScrollTask, @@ -78,8 +84,21 @@ protected void doExecute(Task task, UpdateByQueryRequest request, ActionListener clusterService.localNode(), bulkByScrollTask ); - new AsyncIndexBySearchAction(bulkByScrollTask, logger, assigningClient, threadPool, scriptService, request, state, listener) - .start(); + new AsyncIndexBySearchAction( + bulkByScrollTask, + logger, + assigningClient, + threadPool, + scriptService, + request, + state, + ActionListener.runAfter(listener, () -> { + long elapsedTime = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startTime); + if (updateByQueryMetrics != null) { + updateByQueryMetrics.recordTookTime(elapsedTime); + } + }) + ).start(); } ); } diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/UpdateByQueryMetrics.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/UpdateByQueryMetrics.java new file mode 100644 index 0000000000000..6ca52769a1ba9 --- /dev/null +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/UpdateByQueryMetrics.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.reindex; + +import org.elasticsearch.telemetry.metric.LongHistogram; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +public class UpdateByQueryMetrics { + public static final String UPDATE_BY_QUERY_TIME_HISTOGRAM = "es.update_by_query.duration.histogram"; + + private final LongHistogram updateByQueryTimeSecsHistogram; + + public UpdateByQueryMetrics(MeterRegistry meterRegistry) { + this( + meterRegistry.registerLongHistogram(UPDATE_BY_QUERY_TIME_HISTOGRAM, "Time taken to execute Update by Query request", "seconds") + ); + } + + private UpdateByQueryMetrics(LongHistogram updateByQueryTimeSecsHistogram) { + this.updateByQueryTimeSecsHistogram = updateByQueryTimeSecsHistogram; + } + + public long recordTookTime(long tookTime) { + updateByQueryTimeSecsHistogram.record(tookTime); + return tookTime; + } +} diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/DeleteByQueryMetricsTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/DeleteByQueryMetricsTests.java new file mode 100644 index 0000000000000..58adc6aebaa9b --- /dev/null +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/DeleteByQueryMetricsTests.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.reindex; + +import org.elasticsearch.telemetry.InstrumentType; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.RecordingMeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.util.List; + +import static org.elasticsearch.reindex.DeleteByQueryMetrics.DELETE_BY_QUERY_TIME_HISTOGRAM; + +public class DeleteByQueryMetricsTests extends ESTestCase { + private RecordingMeterRegistry recordingMeterRegistry; + private DeleteByQueryMetrics metrics; + + @Before + public void createMetrics() { + recordingMeterRegistry = new RecordingMeterRegistry(); + metrics = new DeleteByQueryMetrics(recordingMeterRegistry); + } + + public void testRecordTookTime() { + int secondsTaken = randomIntBetween(1, 50); + metrics.recordTookTime(secondsTaken); + List measurements = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_HISTOGRAM, DELETE_BY_QUERY_TIME_HISTOGRAM); + assertEquals(measurements.size(), 1); + assertEquals(measurements.get(0).getLong(), secondsTaken); + } +} diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexMetricsTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexMetricsTests.java new file mode 100644 index 0000000000000..4711530585817 --- /dev/null +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexMetricsTests.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.reindex; + +import org.elasticsearch.telemetry.InstrumentType; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.RecordingMeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.util.List; + +import static org.elasticsearch.reindex.ReindexMetrics.REINDEX_TIME_HISTOGRAM; + +public class ReindexMetricsTests extends ESTestCase { + + private RecordingMeterRegistry recordingMeterRegistry; + private ReindexMetrics metrics; + + @Before + public void createMetrics() { + recordingMeterRegistry = new RecordingMeterRegistry(); + metrics = new ReindexMetrics(recordingMeterRegistry); + } + + public void testRecordTookTime() { + int secondsTaken = randomIntBetween(1, 50); + metrics.recordTookTime(secondsTaken); + List measurements = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_HISTOGRAM, REINDEX_TIME_HISTOGRAM); + assertEquals(measurements.size(), 1); + assertEquals(measurements.get(0).getLong(), secondsTaken); + } +} diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryMetricsTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryMetricsTests.java new file mode 100644 index 0000000000000..548d18d202984 --- /dev/null +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryMetricsTests.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.reindex; + +import org.elasticsearch.telemetry.InstrumentType; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.RecordingMeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.util.List; + +import static org.elasticsearch.reindex.UpdateByQueryMetrics.UPDATE_BY_QUERY_TIME_HISTOGRAM; + +public class UpdateByQueryMetricsTests extends ESTestCase { + + private RecordingMeterRegistry recordingMeterRegistry; + private UpdateByQueryMetrics metrics; + + @Before + public void createMetrics() { + recordingMeterRegistry = new RecordingMeterRegistry(); + metrics = new UpdateByQueryMetrics(recordingMeterRegistry); + } + + public void testRecordTookTime() { + int secondsTaken = randomIntBetween(1, 50); + metrics.recordTookTime(secondsTaken); + List measurements = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_HISTOGRAM, UPDATE_BY_QUERY_TIME_HISTOGRAM); + assertEquals(measurements.size(), 1); + assertEquals(measurements.get(0).getLong(), secondsTaken); + } +} diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryWithScriptTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryWithScriptTests.java index 876ddefda161b..c4d591f804750 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryWithScriptTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryWithScriptTests.java @@ -60,6 +60,7 @@ protected TransportUpdateByQueryAction.AsyncIndexBySearchAction action(ScriptSer null, transportService, scriptService, + null, null ); return new TransportUpdateByQueryAction.AsyncIndexBySearchAction( diff --git a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/TransportEnrichReindexAction.java b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/TransportEnrichReindexAction.java index 0eeb85f4574f7..cc42199ab1019 100644 --- a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/TransportEnrichReindexAction.java +++ b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/TransportEnrichReindexAction.java @@ -61,7 +61,8 @@ public TransportEnrichReindexAction( autoCreateIndex, client, transportService, - new ReindexSslConfig(settings, environment, watcherService) + new ReindexSslConfig(settings, environment, watcherService), + null ); this.bulkClient = new OriginSettingClient(client, ENRICH_ORIGIN); } From e7c0ba953a2f03ecdea9e848478356054689c137 Mon Sep 17 00:00:00 2001 From: Victor Martinez Date: Wed, 28 Aug 2024 19:18:58 +0200 Subject: [PATCH 011/144] Update IronBank docker image base to ubi:9.4 (#111743) --- distribution/docker/src/docker/Dockerfile | 2 +- .../docker/src/docker/iron_bank/hardening_manifest.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/distribution/docker/src/docker/Dockerfile b/distribution/docker/src/docker/Dockerfile index 32f35b05015b9..2a2a77a6df820 100644 --- a/distribution/docker/src/docker/Dockerfile +++ b/distribution/docker/src/docker/Dockerfile @@ -22,7 +22,7 @@ <% if (docker_base == 'iron_bank') { %> ARG BASE_REGISTRY=registry1.dso.mil ARG BASE_IMAGE=ironbank/redhat/ubi/ubi9 -ARG BASE_TAG=9.3 +ARG BASE_TAG=9.4 <% } %> ################################################################################ diff --git a/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml b/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml index 38ce16a413af2..f4364c5008c09 100644 --- a/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml +++ b/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml @@ -14,7 +14,7 @@ tags: # Build args passed to Dockerfile ARGs args: BASE_IMAGE: "redhat/ubi/ubi9" - BASE_TAG: "9.3" + BASE_TAG: "9.4" # Docker image labels labels: From 68b7b7f7fb520056ec9fcbbe5b6b66bce51027d2 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 28 Aug 2024 13:42:31 -0400 Subject: [PATCH 012/144] ESQL: Migrate more physical plan writeable (#112248) Migrates a few more of our physical plan nodes to `NamedWriteable`. --- .../xpack/esql/io/stream/PlanNamedTypes.java | 76 +-------------- .../xpack/esql/plan/logical/EsRelation.java | 21 ++++- .../xpack/esql/plan/physical/EsQueryExec.java | 60 +++++++++++- .../esql/plan/physical/EsSourceExec.java | 5 +- .../xpack/esql/plan/physical/EvalExec.java | 28 ++++++ .../esql/plan/physical/PhysicalPlan.java | 2 +- .../esql/io/stream/PlanNamedTypesTests.java | 68 -------------- ...bstractPhysicalPlanSerializationTests.java | 4 +- .../EsQueryExecSerializationTests.java | 94 +++++++++++++++++++ .../physical/EvalExecSerializationTests.java | 61 ++++++++++++ 10 files changed, 268 insertions(+), 151 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExecSerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EvalExecSerializationTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java index 77d982453203c..af82ceb4bf809 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java @@ -12,16 +12,13 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.util.iterable.Iterables; -import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.transport.RemoteClusterAware; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.esql.expression.Order; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.Grok; @@ -56,8 +53,6 @@ import java.util.Set; import static org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry.Entry.of; -import static org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry.PlanReader.readerFromPlanReader; -import static org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry.PlanWriter.writerFromPlanWriter; /** * A utility class that consists solely of static methods that describe how to serialize and @@ -93,9 +88,9 @@ public static List namedTypeEntries() { // Physical Plan Nodes of(PhysicalPlan.class, AggregateExec.ENTRY), of(PhysicalPlan.class, DissectExec.ENTRY), - of(PhysicalPlan.class, EsQueryExec.class, PlanNamedTypes::writeEsQueryExec, PlanNamedTypes::readEsQueryExec), + of(PhysicalPlan.class, EsQueryExec.ENTRY), of(PhysicalPlan.class, EsSourceExec.ENTRY), - of(PhysicalPlan.class, EvalExec.class, PlanNamedTypes::writeEvalExec, PlanNamedTypes::readEvalExec), + of(PhysicalPlan.class, EvalExec.ENTRY), of(PhysicalPlan.class, EnrichExec.class, PlanNamedTypes::writeEnrichExec, PlanNamedTypes::readEnrichExec), of(PhysicalPlan.class, ExchangeExec.class, PlanNamedTypes::writeExchangeExec, PlanNamedTypes::readExchangeExec), of(PhysicalPlan.class, ExchangeSinkExec.class, PlanNamedTypes::writeExchangeSinkExec, PlanNamedTypes::readExchangeSinkExec), @@ -123,57 +118,6 @@ public static List namedTypeEntries() { } // -- physical plan nodes - static EsQueryExec readEsQueryExec(PlanStreamInput in) throws IOException { - return new EsQueryExec( - Source.readFrom(in), - new EsIndex(in), - readIndexMode(in), - in.readNamedWriteableCollectionAsList(Attribute.class), - in.readOptionalNamedWriteable(QueryBuilder.class), - in.readOptionalNamed(Expression.class), - in.readOptionalCollectionAsList(readerFromPlanReader(PlanNamedTypes::readFieldSort)), - in.readOptionalVInt() - ); - } - - static void writeEsQueryExec(PlanStreamOutput out, EsQueryExec esQueryExec) throws IOException { - assert esQueryExec.children().size() == 0; - Source.EMPTY.writeTo(out); - esQueryExec.index().writeTo(out); - writeIndexMode(out, esQueryExec.indexMode()); - out.writeNamedWriteableCollection(esQueryExec.output()); - out.writeOptionalNamedWriteable(esQueryExec.query()); - out.writeOptionalNamedWriteable(esQueryExec.limit()); - out.writeOptionalCollection(esQueryExec.sorts(), writerFromPlanWriter(PlanNamedTypes::writeFieldSort)); - out.writeOptionalInt(esQueryExec.estimatedRowSize()); - } - - public static IndexMode readIndexMode(StreamInput in) throws IOException { - if (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_ADD_INDEX_MODE_TO_SOURCE)) { - return IndexMode.fromString(in.readString()); - } else { - return IndexMode.STANDARD; - } - } - - public static void writeIndexMode(StreamOutput out, IndexMode indexMode) throws IOException { - if (out.getTransportVersion().onOrAfter(TransportVersions.ESQL_ADD_INDEX_MODE_TO_SOURCE)) { - out.writeString(indexMode.getName()); - } else if (indexMode != IndexMode.STANDARD) { - throw new IllegalStateException("not ready to support index mode [" + indexMode + "]"); - } - } - - static EvalExec readEvalExec(PlanStreamInput in) throws IOException { - return new EvalExec(Source.readFrom(in), in.readPhysicalPlanNode(), in.readCollectionAsList(Alias::new)); - } - - static void writeEvalExec(PlanStreamOutput out, EvalExec evalExec) throws IOException { - Source.EMPTY.writeTo(out); - out.writePhysicalPlanNode(evalExec.child()); - out.writeCollection(evalExec.fields()); - } - static EnrichExec readEnrichExec(PlanStreamInput in) throws IOException { final Source source = Source.readFrom(in); final PhysicalPlan child = in.readPhysicalPlanNode(); @@ -426,20 +370,4 @@ static void writeTopNExec(PlanStreamOutput out, TopNExec topNExec) throws IOExce out.writeNamedWriteable(topNExec.limit()); out.writeOptionalVInt(topNExec.estimatedRowSize()); } - - // -- ancillary supporting classes of plan nodes, etc - - static EsQueryExec.FieldSort readFieldSort(PlanStreamInput in) throws IOException { - return new EsQueryExec.FieldSort( - FieldAttribute.readFrom(in), - in.readEnum(Order.OrderDirection.class), - in.readEnum(Order.NullsPosition.class) - ); - } - - static void writeFieldSort(PlanStreamOutput out, EsQueryExec.FieldSort fieldSort) throws IOException { - fieldSort.field().writeTo(out); - out.writeEnum(fieldSort.direction()); - out.writeEnum(fieldSort.nulls()); - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java index 56c253f166762..b080c425d2312 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java @@ -19,7 +19,6 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.index.EsIndex; -import org.elasticsearch.xpack.esql.io.stream.PlanNamedTypes; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import java.io.IOException; @@ -67,7 +66,7 @@ private static EsRelation readFrom(StreamInput in) throws IOException { in.readOptionalString(); in.readOptionalString(); } - IndexMode indexMode = PlanNamedTypes.readIndexMode(in); + IndexMode indexMode = readIndexMode(in); boolean frozen = in.readBoolean(); return new EsRelation(source, esIndex, attributes, indexMode, frozen); } @@ -83,7 +82,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(null); out.writeOptionalString(null); } - PlanNamedTypes.writeIndexMode(out, indexMode()); + writeIndexMode(out, indexMode()); out.writeBoolean(frozen()); } @@ -174,4 +173,20 @@ public boolean equals(Object obj) { public String nodeString() { return nodeName() + "[" + index + "]" + NodeUtils.limitedToString(attrs); } + + public static IndexMode readIndexMode(StreamInput in) throws IOException { + if (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_ADD_INDEX_MODE_TO_SOURCE)) { + return IndexMode.fromString(in.readString()); + } else { + return IndexMode.STANDARD; + } + } + + public static void writeIndexMode(StreamOutput out, IndexMode indexMode) throws IOException { + if (out.getTransportVersion().onOrAfter(TransportVersions.ESQL_ADD_INDEX_MODE_TO_SOURCE)) { + out.writeString(indexMode.getName()); + } else if (indexMode != IndexMode.STANDARD) { + throw new IllegalStateException("not ready to support index mode [" + indexMode + "]"); + } + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java index 5901d42abbc82..21aa2cb7d1860 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java @@ -8,6 +8,10 @@ package org.elasticsearch.xpack.esql.plan.physical; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.sort.FieldSortBuilder; @@ -22,12 +26,21 @@ import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.expression.Order; import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Objects; public class EsQueryExec extends LeafExec implements EstimatesRowSize { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + PhysicalPlan.class, + "EsQueryExec", + EsQueryExec::new + ); + public static final EsField DOC_ID_FIELD = new EsField("_doc", DataType.DOC_DATA_TYPE, Map.of(), false); private final EsIndex index; @@ -43,7 +56,7 @@ public class EsQueryExec extends LeafExec implements EstimatesRowSize { */ private final Integer estimatedRowSize; - public record FieldSort(FieldAttribute field, Order.OrderDirection direction, Order.NullsPosition nulls) { + public record FieldSort(FieldAttribute field, Order.OrderDirection direction, Order.NullsPosition nulls) implements Writeable { public FieldSortBuilder fieldSortBuilder() { FieldSortBuilder builder = new FieldSortBuilder(field.name()); builder.order(Direction.from(direction).asOrder()); @@ -51,6 +64,21 @@ public FieldSortBuilder fieldSortBuilder() { builder.unmappedType(field.dataType().esType()); return builder; } + + private static FieldSort readFrom(StreamInput in) throws IOException { + return new EsQueryExec.FieldSort( + FieldAttribute.readFrom(in), + in.readEnum(Order.OrderDirection.class), + in.readEnum(Order.NullsPosition.class) + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + field().writeTo(out); + out.writeEnum(direction()); + out.writeEnum(nulls()); + } } public EsQueryExec(Source source, EsIndex index, IndexMode indexMode, List attributes, QueryBuilder query) { @@ -77,6 +105,36 @@ public EsQueryExec( this.estimatedRowSize = estimatedRowSize; } + private EsQueryExec(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + new EsIndex(in), + EsRelation.readIndexMode(in), + in.readNamedWriteableCollectionAsList(Attribute.class), + in.readOptionalNamedWriteable(QueryBuilder.class), + in.readOptionalNamedWriteable(Expression.class), + in.readOptionalCollectionAsList(FieldSort::readFrom), + in.readOptionalVInt() + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + Source.EMPTY.writeTo(out); + index().writeTo(out); + EsRelation.writeIndexMode(out, indexMode()); + out.writeNamedWriteableCollection(output()); + out.writeOptionalNamedWriteable(query()); + out.writeOptionalNamedWriteable(limit()); + out.writeOptionalCollection(sorts()); + out.writeOptionalVInt(estimatedRowSize()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + public static boolean isSourceAttribute(Attribute attr) { return DOC_ID_FIELD.getName().equals(attr.name()); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsSourceExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsSourceExec.java index 275f1182ff97c..cd167b4683493 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsSourceExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsSourceExec.java @@ -17,7 +17,6 @@ import org.elasticsearch.xpack.esql.core.tree.NodeUtils; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.index.EsIndex; -import org.elasticsearch.xpack.esql.io.stream.PlanNamedTypes; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; @@ -55,7 +54,7 @@ private EsSourceExec(StreamInput in) throws IOException { new EsIndex(in), in.readNamedWriteableCollectionAsList(Attribute.class), in.readOptionalNamedWriteable(QueryBuilder.class), - PlanNamedTypes.readIndexMode(in) + EsRelation.readIndexMode(in) ); } @@ -65,7 +64,7 @@ public void writeTo(StreamOutput out) throws IOException { index().writeTo(out); out.writeNamedWriteableCollection(output()); out.writeOptionalNamedWriteable(query()); - PlanNamedTypes.writeIndexMode(out, indexMode()); + EsRelation.writeIndexMode(out, indexMode()); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EvalExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EvalExec.java index 3876891b27752..97b81914f8889 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EvalExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EvalExec.java @@ -7,17 +7,29 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; +import java.io.IOException; import java.util.List; import java.util.Objects; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; public class EvalExec extends UnaryExec implements EstimatesRowSize { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + PhysicalPlan.class, + "EvalExec", + EvalExec::new + ); + private final List fields; public EvalExec(Source source, PhysicalPlan child, List fields) { @@ -25,6 +37,22 @@ public EvalExec(Source source, PhysicalPlan child, List fields) { this.fields = fields; } + private EvalExec(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), ((PlanStreamInput) in).readPhysicalPlanNode(), in.readCollectionAsList(Alias::new)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + Source.EMPTY.writeTo(out); + ((PlanStreamOutput) out).writePhysicalPlanNode(child()); + out.writeCollection(fields()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + public List fields() { return fields; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java index 42a97802038a2..60e44a5140dfa 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java @@ -23,7 +23,7 @@ */ public abstract class PhysicalPlan extends QueryPlan { public static List getNamedWriteables() { - return List.of(AggregateExec.ENTRY, DissectExec.ENTRY, EsSourceExec.ENTRY); + return List.of(AggregateExec.ENTRY, DissectExec.ENTRY, EsQueryExec.ENTRY, EsSourceExec.ENTRY, EvalExec.ENTRY); } public PhysicalPlan(Source source, List children) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java index 56ab1bd41693e..a3d1e70e558d6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java @@ -20,24 +20,9 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.Nullability; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.KeywordEsField; -import org.elasticsearch.xpack.esql.expression.Order; -import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; -import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div; -import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mod; -import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul; -import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Sub; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThanOrEqual; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThan; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThanOrEqual; -import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.NotEquals; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.DissectExec; import org.elasticsearch.xpack.esql.plan.physical.EnrichExec; @@ -137,15 +122,6 @@ public void testWrappedStreamSimple() throws IOException { assertThat(in.readVInt(), equalTo(11_345)); } - public void testFieldSortSimple() throws IOException { - var orig = new EsQueryExec.FieldSort(field("val", DataType.LONG), Order.OrderDirection.ASC, Order.NullsPosition.FIRST); - BytesStreamOutput bso = new BytesStreamOutput(); - PlanStreamOutput out = new PlanStreamOutput(bso, planNameRegistry, null); - PlanNamedTypes.writeFieldSort(out, orig); - var deser = PlanNamedTypes.readFieldSort(planStreamInput(bso)); - EqualsHashCodeTestUtils.checkEqualsAndHashCode(orig, unused -> deser); - } - static FieldAttribute randomFieldAttributeOrNull() { return randomBoolean() ? randomFieldAttribute() : null; } @@ -163,46 +139,6 @@ static FieldAttribute randomFieldAttribute() { ); } - static KeywordEsField randomKeywordEsField() { - return new KeywordEsField( - randomAlphaOfLength(randomIntBetween(1, 25)), // name - randomProperties(), - randomBoolean(), // hasDocValues - randomIntBetween(1, 12), // precision - randomBoolean(), // normalized - randomBoolean() // alias - ); - } - - static EsqlBinaryComparison randomBinaryComparison() { - int v = randomIntBetween(0, 5); - var left = field(randomName(), randomDataType()); - var right = field(randomName(), randomDataType()); - return switch (v) { - case 0 -> new Equals(Source.EMPTY, left, right); - case 1 -> new NotEquals(Source.EMPTY, left, right); - case 2 -> new GreaterThan(Source.EMPTY, left, right); - case 3 -> new GreaterThanOrEqual(Source.EMPTY, left, right); - case 4 -> new LessThan(Source.EMPTY, left, right); - case 5 -> new LessThanOrEqual(Source.EMPTY, left, right); - default -> throw new AssertionError(v); - }; - } - - static ArithmeticOperation randomArithmeticOperation() { - int v = randomIntBetween(0, 4); - var left = field(randomName(), randomDataType()); - var right = field(randomName(), randomDataType()); - return switch (v) { - case 0 -> new Add(Source.EMPTY, left, right); - case 1 -> new Sub(Source.EMPTY, left, right); - case 2 -> new Mul(Source.EMPTY, left, right); - case 3 -> new Div(Source.EMPTY, left, right); - case 4 -> new Mod(Source.EMPTY, left, right); - default -> throw new AssertionError(v); - }; - } - static NameId nameIdOrNull() { return randomBoolean() ? new NameId() : null; } @@ -231,10 +167,6 @@ static EsField randomEsField(int depth) { ); } - static Map randomProperties() { - return randomProperties(0); - } - static Map randomProperties(int depth) { if (depth > 2) { return Map.of(); // prevent infinite recursion (between EsField and properties) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java index 7a0d125ad85ba..2a05c472328e5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.plan.AbstractNodeSerializationTests; import java.util.ArrayList; @@ -47,7 +48,8 @@ protected final NamedWriteableRegistry getNamedWriteableRegistry() { entries.addAll(Attribute.getNamedWriteables()); entries.addAll(Block.getNamedWriteables()); entries.addAll(NamedExpression.getNamedWriteables()); - entries.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables()); + entries.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables()); // Query builders + entries.add(Add.ENTRY); // Used by the eval tests return new NamedWriteableRegistry(entries); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExecSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExecSerializationTests.java new file mode 100644 index 0000000000000..6bb5111b154e6 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExecSerializationTests.java @@ -0,0 +1,94 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan.physical; + +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.TermQueryBuilder; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.Order; +import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.EsIndexSerializationTests; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.xpack.esql.plan.logical.AbstractLogicalPlanSerializationTests.randomFieldAttributes; + +public class EsQueryExecSerializationTests extends AbstractPhysicalPlanSerializationTests { + public static EsQueryExec randomEsQueryExec() { + Source source = randomSource(); + EsIndex index = EsIndexSerializationTests.randomEsIndex(); + IndexMode indexMode = randomFrom(IndexMode.values()); + List attrs = randomFieldAttributes(1, 10, false); + QueryBuilder query = randomQuery(); + Expression limit = new Literal(randomSource(), between(0, Integer.MAX_VALUE), DataType.INTEGER); + List sorts = randomFieldSorts(); + Integer estimatedRowSize = randomEstimatedRowSize(); + return new EsQueryExec(source, index, indexMode, attrs, query, limit, sorts, estimatedRowSize); + } + + public static QueryBuilder randomQuery() { + return randomBoolean() ? new MatchAllQueryBuilder() : new TermQueryBuilder(randomAlphaOfLength(4), randomAlphaOfLength(4)); + } + + public static List randomFieldSorts() { + return randomList(0, 4, EsQueryExecSerializationTests::randomFieldSort); + } + + public static EsQueryExec.FieldSort randomFieldSort() { + FieldAttribute field = FieldAttributeTests.createFieldAttribute(0, false); + Order.OrderDirection direction = randomFrom(Order.OrderDirection.values()); + Order.NullsPosition nulls = randomFrom(Order.NullsPosition.values()); + return new EsQueryExec.FieldSort(field, direction, nulls); + } + + @Override + protected EsQueryExec createTestInstance() { + return randomEsQueryExec(); + } + + @Override + protected EsQueryExec mutateInstance(EsQueryExec instance) throws IOException { + EsIndex index = instance.index(); + IndexMode indexMode = instance.indexMode(); + List attrs = instance.attrs(); + QueryBuilder query = instance.query(); + Expression limit = instance.limit(); + List sorts = instance.sorts(); + Integer estimatedRowSize = instance.estimatedRowSize(); + switch (between(0, 6)) { + case 0 -> index = randomValueOtherThan(index, EsIndexSerializationTests::randomEsIndex); + case 1 -> indexMode = randomValueOtherThan(indexMode, () -> randomFrom(IndexMode.values())); + case 2 -> attrs = randomValueOtherThan(attrs, () -> randomFieldAttributes(1, 10, false)); + case 3 -> query = randomValueOtherThan(query, EsQueryExecSerializationTests::randomQuery); + case 4 -> limit = randomValueOtherThan( + limit, + () -> new Literal(randomSource(), between(0, Integer.MAX_VALUE), DataType.INTEGER) + ); + case 5 -> sorts = randomValueOtherThan(sorts, EsQueryExecSerializationTests::randomFieldSorts); + case 6 -> estimatedRowSize = randomValueOtherThan( + estimatedRowSize, + AbstractPhysicalPlanSerializationTests::randomEstimatedRowSize + ); + } + return new EsQueryExec(instance.source(), index, indexMode, attrs, query, limit, sorts, estimatedRowSize); + } + + @Override + protected boolean alwaysEmptySource() { + return true; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EvalExecSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EvalExecSerializationTests.java new file mode 100644 index 0000000000000..45baf4822b1d2 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/EvalExecSerializationTests.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan.physical; + +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; + +import java.io.IOException; +import java.util.List; + +public class EvalExecSerializationTests extends AbstractPhysicalPlanSerializationTests { + public static EvalExec randomEvalExec(int depth) { + Source source = randomSource(); + PhysicalPlan child = randomChild(depth); + List fields = randomFields(); + return new EvalExec(source, child, fields); + } + + public static List randomFields() { + return randomList(1, 10, EvalExecSerializationTests::randomField); + } + + public static Alias randomField() { + Expression child = new Add( + randomSource(), + FieldAttributeTests.createFieldAttribute(0, true), + FieldAttributeTests.createFieldAttribute(0, true) + ); + return new Alias(randomSource(), randomAlphaOfLength(5), child); + } + + @Override + protected EvalExec createTestInstance() { + return randomEvalExec(0); + } + + @Override + protected EvalExec mutateInstance(EvalExec instance) throws IOException { + PhysicalPlan child = instance.child(); + List fields = instance.fields(); + if (randomBoolean()) { + child = randomValueOtherThan(child, () -> randomChild(0)); + } else { + fields = randomValueOtherThan(fields, EvalExecSerializationTests::randomFields); + } + return new EvalExec(instance.source(), child, fields); + } + + @Override + protected boolean alwaysEmptySource() { + return true; + } +} From 3cbb5264f5e7a2cc5d85b27b1ec72834ef85bcfc Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Wed, 28 Aug 2024 10:46:36 -0700 Subject: [PATCH 013/144] ESQL: use this. prefix in grammar semantic predicate (#112291) Fix #112285 --- .../esql/src/main/antlr/EsqlBaseLexer.g4 | 14 ++++++------- .../esql/src/main/antlr/EsqlBaseParser.g4 | 10 +++++----- .../xpack/esql/parser/EsqlBaseLexer.java | 10 +++++----- .../xpack/esql/parser/EsqlBaseParser.java | 20 +++++++++---------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4 b/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4 index 897bfa5e1ce15..6570a25469971 100644 --- a/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4 +++ b/x-pack/plugin/esql/src/main/antlr/EsqlBaseLexer.g4 @@ -29,7 +29,7 @@ options { * * Since the tokens/modes are in development, simply define them under the * "// in development section" and follow the section comments in that section. - * That is use the DEV_ prefix and use the {isDevVersion()}? conditional. + * That is use the DEV_ prefix and use the {this.isDevVersion()}? conditional. * They are defined at the end of the file, to minimize the impact on the existing * token types. * @@ -80,15 +80,15 @@ WHERE : 'where' -> pushMode(EXPRESSION_MODE); // Before adding a new in-development command, to sandbox the behavior when running in production environments // // For example: to add myCommand use the following declaration: -// DEV_MYCOMMAND : {isDevVersion()}? 'mycommand' -> ... +// DEV_MYCOMMAND : {this.isDevVersion()}? 'mycommand' -> ... // // Once the command has been stabilized, remove the DEV_ prefix and the {}? conditional and move the command to the // main section while preserving alphabetical order: // MYCOMMAND : 'mycommand' -> ... -DEV_INLINESTATS : {isDevVersion()}? 'inlinestats' -> pushMode(EXPRESSION_MODE); -DEV_LOOKUP : {isDevVersion()}? 'lookup' -> pushMode(LOOKUP_MODE); -DEV_MATCH : {isDevVersion()}? 'match' -> pushMode(EXPRESSION_MODE); -DEV_METRICS : {isDevVersion()}? 'metrics' -> pushMode(METRICS_MODE); +DEV_INLINESTATS : {this.isDevVersion()}? 'inlinestats' -> pushMode(EXPRESSION_MODE); +DEV_LOOKUP : {this.isDevVersion()}? 'lookup' -> pushMode(LOOKUP_MODE); +DEV_MATCH : {this.isDevVersion()}? 'match' -> pushMode(EXPRESSION_MODE); +DEV_METRICS : {this.isDevVersion()}? 'metrics' -> pushMode(METRICS_MODE); // // Catch-all for unrecognized commands - don't define any beyond this line @@ -211,7 +211,7 @@ SLASH : '/'; PERCENT : '%'; // move it in the main section if the feature gets promoted -DEV_MATCH_OP : {isDevVersion()}? DEV_MATCH -> type(DEV_MATCH); +DEV_MATCH_OP : {this.isDevVersion()}? DEV_MATCH -> type(DEV_MATCH); NAMED_OR_POSITIONAL_PARAM : PARAM (LETTER | UNDERSCORE) UNQUOTED_ID_BODY* diff --git a/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4 b/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4 index ce748b3af03d1..a3ef2471d4e56 100644 --- a/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4 +++ b/x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4 @@ -36,7 +36,7 @@ sourceCommand | rowCommand | showCommand // in development - | {isDevVersion()}? metricsCommand + | {this.isDevVersion()}? metricsCommand ; processingCommand @@ -53,9 +53,9 @@ processingCommand | enrichCommand | mvExpandCommand // in development - | {isDevVersion()}? inlinestatsCommand - | {isDevVersion()}? lookupCommand - | {isDevVersion()}? matchCommand + | {this.isDevVersion()}? inlinestatsCommand + | {this.isDevVersion()}? lookupCommand + | {this.isDevVersion()}? matchCommand ; whereCommand @@ -70,7 +70,7 @@ booleanExpression | left=booleanExpression operator=OR right=booleanExpression #logicalBinary | valueExpression (NOT)? IN LP valueExpression (COMMA valueExpression)* RP #logicalIn | valueExpression IS NOT? NULL #isNull - | {isDevVersion()}? matchBooleanExpression #matchExpression + | {this.isDevVersion()}? matchBooleanExpression #matchExpression ; regexBooleanExpression diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseLexer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseLexer.java index 5fc5ab20810a6..a746a0d49004f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseLexer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseLexer.java @@ -238,35 +238,35 @@ public boolean sempred(RuleContext _localctx, int ruleIndex, int predIndex) { private boolean DEV_INLINESTATS_sempred(RuleContext _localctx, int predIndex) { switch (predIndex) { case 0: - return isDevVersion(); + return this.isDevVersion(); } return true; } private boolean DEV_LOOKUP_sempred(RuleContext _localctx, int predIndex) { switch (predIndex) { case 1: - return isDevVersion(); + return this.isDevVersion(); } return true; } private boolean DEV_MATCH_sempred(RuleContext _localctx, int predIndex) { switch (predIndex) { case 2: - return isDevVersion(); + return this.isDevVersion(); } return true; } private boolean DEV_METRICS_sempred(RuleContext _localctx, int predIndex) { switch (predIndex) { case 3: - return isDevVersion(); + return this.isDevVersion(); } return true; } private boolean DEV_MATCH_OP_sempred(RuleContext _localctx, int predIndex) { switch (predIndex) { case 4: - return isDevVersion(); + return this.isDevVersion(); } return true; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.java index 359abbc701dd3..fb63e31a37c90 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.java @@ -447,7 +447,7 @@ public final SourceCommandContext sourceCommand() throws RecognitionException { enterOuterAlt(_localctx, 6); { setState(141); - if (!(isDevVersion())) throw new FailedPredicateException(this, "isDevVersion()"); + if (!(this.isDevVersion())) throw new FailedPredicateException(this, "this.isDevVersion()"); setState(142); metricsCommand(); } @@ -627,7 +627,7 @@ public final ProcessingCommandContext processingCommand() throws RecognitionExce enterOuterAlt(_localctx, 13); { setState(157); - if (!(isDevVersion())) throw new FailedPredicateException(this, "isDevVersion()"); + if (!(this.isDevVersion())) throw new FailedPredicateException(this, "this.isDevVersion()"); setState(158); inlinestatsCommand(); } @@ -636,7 +636,7 @@ public final ProcessingCommandContext processingCommand() throws RecognitionExce enterOuterAlt(_localctx, 14); { setState(159); - if (!(isDevVersion())) throw new FailedPredicateException(this, "isDevVersion()"); + if (!(this.isDevVersion())) throw new FailedPredicateException(this, "this.isDevVersion()"); setState(160); lookupCommand(); } @@ -645,7 +645,7 @@ public final ProcessingCommandContext processingCommand() throws RecognitionExce enterOuterAlt(_localctx, 15); { setState(161); - if (!(isDevVersion())) throw new FailedPredicateException(this, "isDevVersion()"); + if (!(this.isDevVersion())) throw new FailedPredicateException(this, "this.isDevVersion()"); setState(162); matchCommand(); } @@ -1018,7 +1018,7 @@ private BooleanExpressionContext booleanExpression(int _p) throws RecognitionExc _ctx = _localctx; _prevctx = _localctx; setState(196); - if (!(isDevVersion())) throw new FailedPredicateException(this, "isDevVersion()"); + if (!(this.isDevVersion())) throw new FailedPredicateException(this, "this.isDevVersion()"); setState(197); matchBooleanExpression(); } @@ -5339,25 +5339,25 @@ private boolean query_sempred(QueryContext _localctx, int predIndex) { private boolean sourceCommand_sempred(SourceCommandContext _localctx, int predIndex) { switch (predIndex) { case 1: - return isDevVersion(); + return this.isDevVersion(); } return true; } private boolean processingCommand_sempred(ProcessingCommandContext _localctx, int predIndex) { switch (predIndex) { case 2: - return isDevVersion(); + return this.isDevVersion(); case 3: - return isDevVersion(); + return this.isDevVersion(); case 4: - return isDevVersion(); + return this.isDevVersion(); } return true; } private boolean booleanExpression_sempred(BooleanExpressionContext _localctx, int predIndex) { switch (predIndex) { case 5: - return isDevVersion(); + return this.isDevVersion(); case 6: return precpred(_ctx, 5); case 7: From 7b4443016fbd15b431a6382200c0043d57e682cc Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Wed, 28 Aug 2024 12:10:24 -0700 Subject: [PATCH 014/144] Use test util for finding platform dir (#112286) The native platform dir can be found using a TestUtil method, but benchmarks was trying to construct it on its own. This commit switches to using the util method. --- benchmarks/build.gradle | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index e2511438e7f95..b16621aaaa471 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -1,4 +1,5 @@ import org.elasticsearch.gradle.internal.info.BuildParams +import org.elasticsearch.gradle.internal.test.TestUtil /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one @@ -29,6 +30,7 @@ tasks.named("javadoc").configure { enabled = false } configurations { expression painless + nativeLib } dependencies { @@ -45,6 +47,7 @@ dependencies { implementation project(path: ':libs:elasticsearch-simdvec') expression(project(path: ':modules:lang-expression', configuration: 'zip')) painless(project(path: ':modules:lang-painless', configuration: 'zip')) + nativeLib(project(':libs:elasticsearch-native')) api "org.openjdk.jmh:jmh-core:$versions.jmh" annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh" // Dependencies of JMH @@ -76,17 +79,8 @@ tasks.register("copyPainless", Copy) { tasks.named("run").configure { executable = "${BuildParams.runtimeJavaHome}/bin/java" args << "-Dplugins.dir=${buildDir}/plugins" << "-Dtests.index=${buildDir}/index" - dependsOn "copyExpression", "copyPainless" - systemProperty 'es.nativelibs.path', file("../libs/native/libraries/build/platform/${platformName()}-${os.arch}") -} - -String platformName() { - String name = System.getProperty("os.name"); - if (name.startsWith("Mac")) { - return "darwin"; - } else { - return name.toLowerCase(Locale.ROOT); - } + dependsOn "copyExpression", "copyPainless", configurations.nativeLib + systemProperty 'es.nativelibs.path', TestUtil.getTestLibraryPath(file("../libs/native/libraries/build/platform/").toString()) } spotless { From b776cf6460c136fd1ae67810c042dfde6a675c52 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Wed, 28 Aug 2024 14:18:50 -0500 Subject: [PATCH 015/144] Do not treat replica as unassigned if new and below time threshold. (#112066) Changes the way we calculate if all replicas are unassigned when primary is recently created. This change will only be used in serverless, not in stateful. When a primary is new, if the primary is active, but the replica is unassigned for less than a buffer time period, do not treat is as unassigned. Control time period through health.shards_availability.replica_unassigned_buffer_time setting. --- docs/changelog/112066.yaml | 6 + ...rdsAvailabilityHealthIndicatorService.java | 69 ++++- .../common/settings/ClusterSettings.java | 4 +- .../ShardsAvailabilityActionGuideTests.java | 14 +- ...ailabilityHealthIndicatorServiceTests.java | 258 +++++++++++++----- 5 files changed, 271 insertions(+), 80 deletions(-) create mode 100644 docs/changelog/112066.yaml diff --git a/docs/changelog/112066.yaml b/docs/changelog/112066.yaml new file mode 100644 index 0000000000000..5dd846766bc8e --- /dev/null +++ b/docs/changelog/112066.yaml @@ -0,0 +1,6 @@ +pr: 112066 +summary: Do not treat replica as unassigned if primary recently created and unassigned + time is below a threshold +area: Health +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorService.java index 8fb91d89417e0..b6c19f331c712 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorService.java @@ -40,9 +40,11 @@ import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.health.Diagnosis; import org.elasticsearch.health.HealthIndicatorDetails; import org.elasticsearch.health.HealthIndicatorImpact; @@ -56,6 +58,7 @@ import org.elasticsearch.snapshots.SearchableSnapshotsSettings; import org.elasticsearch.snapshots.SnapshotShardSizeInfo; +import java.time.Instant; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -108,11 +111,29 @@ public class ShardsAvailabilityHealthIndicatorService implements HealthIndicator private static final String DATA_TIER_ALLOCATION_DECIDER_NAME = "data_tier"; + /** + * Changes the behavior of isNewlyCreatedAndInitializingReplica so that the + * shard_availability health indicator returns YELLOW if a primary + * is STARTED, but a replica is still INITIALIZING and the replica has been + * unassigned for less than the value of this setting. This function is + * only used in serverless, so this setting has no effect in stateless. + */ + public static final Setting REPLICA_UNASSIGNED_BUFFER_TIME = Setting.timeSetting( + "health.shards_availability.replica_unassigned_buffer_time", + TimeValue.timeValueSeconds(3), + TimeValue.timeValueSeconds(0), + TimeValue.timeValueSeconds(20), + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private final ClusterService clusterService; private final AllocationService allocationService; private final SystemIndices systemIndices; + private volatile TimeValue replicaUnassignedBufferTime = TimeValue.timeValueSeconds(0); + public ShardsAvailabilityHealthIndicatorService( ClusterService clusterService, AllocationService allocationService, @@ -121,6 +142,11 @@ public ShardsAvailabilityHealthIndicatorService( this.clusterService = clusterService; this.allocationService = allocationService; this.systemIndices = systemIndices; + clusterService.getClusterSettings().addSettingsUpdateConsumer(REPLICA_UNASSIGNED_BUFFER_TIME, this::setReplicaUnassignedBufferTime); + } + + private void setReplicaUnassignedBufferTime(TimeValue replicaUnassignedBufferTime) { + this.replicaUnassignedBufferTime = replicaUnassignedBufferTime; } @Override @@ -144,7 +170,7 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources var state = clusterService.state(); var shutdown = state.getMetadata().custom(NodesShutdownMetadata.TYPE, NodesShutdownMetadata.EMPTY); var status = createNewStatus(state.getMetadata()); - updateShardAllocationStatus(status, state, shutdown, verbose); + updateShardAllocationStatus(status, state, shutdown, verbose, replicaUnassignedBufferTime); return createIndicator( status.getStatus(), status.getSymptom(), @@ -158,14 +184,15 @@ static void updateShardAllocationStatus( ShardAllocationStatus status, ClusterState state, NodesShutdownMetadata shutdown, - boolean verbose + boolean verbose, + TimeValue replicaUnassignedBufferTime ) { for (IndexRoutingTable indexShardRouting : state.routingTable()) { for (int i = 0; i < indexShardRouting.size(); i++) { IndexShardRoutingTable shardRouting = indexShardRouting.shard(i); status.addPrimary(shardRouting.primaryShard(), state, shutdown, verbose); for (ShardRouting replicaShard : shardRouting.replicaShards()) { - status.addReplica(replicaShard, state, shutdown, verbose); + status.addReplica(replicaShard, state, shutdown, verbose, replicaUnassignedBufferTime); } } } @@ -438,11 +465,18 @@ public class ShardAllocationCounts { public SearchableSnapshotsState searchableSnapshotsState = new SearchableSnapshotsState(); final Map> diagnosisDefinitions = new HashMap<>(); - public void increment(ShardRouting routing, ClusterState state, NodesShutdownMetadata shutdowns, boolean verbose) { + public void increment( + ShardRouting routing, + ClusterState state, + NodesShutdownMetadata shutdowns, + boolean verbose, + TimeValue replicaUnassignedBufferTime + ) { boolean isNew = isUnassignedDueToNewInitialization(routing, state); boolean isRestarting = isUnassignedDueToTimelyRestart(routing, shutdowns); + long replicaUnassignedCutoffTime = Instant.now().toEpochMilli() - replicaUnassignedBufferTime.millis(); boolean allUnavailable = areAllShardsOfThisTypeUnavailable(routing, state) - && isNewlyCreatedAndInitializingReplica(routing, state) == false; + && isNewlyCreatedAndInitializingReplica(routing, state, replicaUnassignedCutoffTime) == false; if (allUnavailable) { indicesWithAllShardsUnavailable.add(routing.getIndexName()); } @@ -520,7 +554,7 @@ boolean areAllShardsOfThisTypeUnavailable(ShardRouting routing, ClusterState sta * (a newly created index having unassigned replicas for example), we don't want the cluster * to turn "unhealthy" for the tiny amount of time before the shards are allocated. */ - static boolean isNewlyCreatedAndInitializingReplica(ShardRouting routing, ClusterState state) { + static boolean isNewlyCreatedAndInitializingReplica(ShardRouting routing, ClusterState state, long replicaUnassignedCutoffTime) { if (routing.active()) { return false; } @@ -528,10 +562,15 @@ static boolean isNewlyCreatedAndInitializingReplica(ShardRouting routing, Cluste return false; } ShardRouting primary = state.routingTable().shardRoutingTable(routing.shardId()).primaryShard(); - if (primary.active()) { - return false; + if (primary.active() == false) { + return ClusterShardHealth.getInactivePrimaryHealth(primary) == ClusterHealthStatus.YELLOW; } - return ClusterShardHealth.getInactivePrimaryHealth(primary) == ClusterHealthStatus.YELLOW; + + Optional ui = Optional.ofNullable(routing.unassignedInfo()); + return ui.filter(info -> info.failedAllocations() == 0) + .filter(info -> info.lastAllocationStatus() != UnassignedInfo.AllocationStatus.DECIDERS_NO) + .filter(info -> info.unassignedTimeMillis() > replicaUnassignedCutoffTime) + .isPresent(); } private static boolean isUnassignedDueToTimelyRestart(ShardRouting routing, NodesShutdownMetadata shutdowns) { @@ -910,11 +949,17 @@ public ShardAllocationStatus(Metadata clusterMetadata) { } void addPrimary(ShardRouting routing, ClusterState state, NodesShutdownMetadata shutdowns, boolean verbose) { - primaries.increment(routing, state, shutdowns, verbose); + primaries.increment(routing, state, shutdowns, verbose, TimeValue.MINUS_ONE); } - void addReplica(ShardRouting routing, ClusterState state, NodesShutdownMetadata shutdowns, boolean verbose) { - replicas.increment(routing, state, shutdowns, verbose); + void addReplica( + ShardRouting routing, + ClusterState state, + NodesShutdownMetadata shutdowns, + boolean verbose, + TimeValue replicaUnassignedBufferTime + ) { + replicas.increment(routing, state, shutdowns, verbose, replicaUnassignedBufferTime); } void updateSearchableSnapshotsOfAvailableIndices() { diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 8d9d8452b12bb..3c60d63f78991 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -55,6 +55,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorService; import org.elasticsearch.cluster.service.ClusterApplierService; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.MasterService; @@ -598,6 +599,7 @@ public void apply(Settings value, Settings current, Settings previous) { MergePolicyConfig.DEFAULT_MAX_TIME_BASED_MERGED_SEGMENT_SETTING, TransportService.ENABLE_STACK_OVERFLOW_AVOIDANCE, DataStreamGlobalRetentionSettings.DATA_STREAMS_DEFAULT_RETENTION_SETTING, - DataStreamGlobalRetentionSettings.DATA_STREAMS_MAX_RETENTION_SETTING + DataStreamGlobalRetentionSettings.DATA_STREAMS_MAX_RETENTION_SETTING, + ShardsAvailabilityHealthIndicatorService.REPLICA_UNASSIGNED_BUFFER_TIME ); } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityActionGuideTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityActionGuideTests.java index b731fd79c82fe..994e892e3ac3c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityActionGuideTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityActionGuideTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorService; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.indices.SystemIndices; import org.elasticsearch.test.ESTestCase; @@ -33,14 +34,17 @@ import static org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorService.TIER_CAPACITY_ACTION_GUIDE; import static org.hamcrest.Matchers.is; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class ShardsAvailabilityActionGuideTests extends ESTestCase { - private final ShardsAvailabilityHealthIndicatorService service = new ShardsAvailabilityHealthIndicatorService( - mock(ClusterService.class), - mock(AllocationService.class), - mock(SystemIndices.class) - ); + private final ShardsAvailabilityHealthIndicatorService service; + + public ShardsAvailabilityActionGuideTests() { + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn(ClusterSettings.createBuiltInClusterSettings()); + service = new ShardsAvailabilityHealthIndicatorService(clusterService, mock(AllocationService.class), mock(SystemIndices.class)); + } public void testRestoreFromSnapshotAction() { assertThat(ACTION_RESTORE_FROM_SNAPSHOT.helpURL(), is(RESTORE_FROM_SNAPSHOT_ACTION_GUIDE)); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorServiceTests.java index 0e3041dda9853..ad30c79a01334 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/shards/ShardsAvailabilityHealthIndicatorServiceTests.java @@ -42,6 +42,8 @@ import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; import org.elasticsearch.health.Diagnosis; import org.elasticsearch.health.HealthIndicatorDetails; import org.elasticsearch.health.HealthIndicatorImpact; @@ -61,9 +63,9 @@ import org.elasticsearch.snapshots.SearchableSnapshotsSettings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; -import org.mockito.Mockito; import org.mockito.stubbing.Answer; +import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -104,6 +106,7 @@ import static org.elasticsearch.cluster.routing.allocation.shards.ShardsAvailabilityHealthIndicatorServiceTests.ShardState.UNAVAILABLE; import static org.elasticsearch.common.util.CollectionUtils.concatLists; import static org.elasticsearch.core.TimeValue.timeValueSeconds; +import static org.elasticsearch.core.Tuple.tuple; import static org.elasticsearch.health.Diagnosis.Resource.Type.FEATURE_STATE; import static org.elasticsearch.health.Diagnosis.Resource.Type.INDEX; import static org.elasticsearch.health.HealthStatus.GREEN; @@ -337,7 +340,8 @@ public void testAllReplicasUnassigned() { status, clusterState, NodesShutdownMetadata.EMPTY, - randomBoolean() + randomBoolean(), + timeValueSeconds(0) ); assertFalse(status.replicas.doAnyIndicesHaveAllUnavailable()); } @@ -359,7 +363,8 @@ public void testAllReplicasUnassigned() { status, clusterState, NodesShutdownMetadata.EMPTY, - randomBoolean() + randomBoolean(), + timeValueSeconds(0) ); assertFalse(status.replicas.doAnyIndicesHaveAllUnavailable()); } @@ -381,7 +386,8 @@ public void testAllReplicasUnassigned() { status, clusterState, NodesShutdownMetadata.EMPTY, - randomBoolean() + randomBoolean(), + timeValueSeconds(0) ); assertTrue(status.replicas.doAnyIndicesHaveAllUnavailable()); } @@ -398,13 +404,15 @@ public void testAllReplicasUnassigned() { ), List.of() ); + var service = createShardsAvailabilityIndicatorService(clusterState); ShardAllocationStatus status = service.createNewStatus(clusterState.metadata()); ShardsAvailabilityHealthIndicatorService.updateShardAllocationStatus( status, clusterState, NodesShutdownMetadata.EMPTY, - randomBoolean() + randomBoolean(), + timeValueSeconds(0) ); assertTrue(status.replicas.doAnyIndicesHaveAllUnavailable()); } @@ -440,7 +448,8 @@ public void testAllReplicasUnassigned() { status, clusterState, NodesShutdownMetadata.EMPTY, - randomBoolean() + randomBoolean(), + timeValueSeconds(0) ); // Here because the replica is unassigned due to the primary being created, it's treated as though the replica can be ignored. assertFalse( @@ -469,7 +478,8 @@ public void testAllReplicasUnassigned() { status, clusterState, NodesShutdownMetadata.EMPTY, - randomBoolean() + randomBoolean(), + timeValueSeconds(0) ); var shardRouting = routingTable.shardsWithState(ShardRoutingState.UNASSIGNED).get(0); assertTrue(service.areAllShardsOfThisTypeUnavailable(shardRouting, clusterState)); @@ -492,7 +502,8 @@ public void testAllReplicasUnassigned() { status, clusterState, NodesShutdownMetadata.EMPTY, - randomBoolean() + randomBoolean(), + timeValueSeconds(0) ); var shardRouting = clusterState.routingTable().index("myindex").shardsWithState(ShardRoutingState.UNASSIGNED).get(0); assertFalse(service.areAllShardsOfThisTypeUnavailable(shardRouting, clusterState)); @@ -922,7 +933,7 @@ public void testRestoreFromSnapshotReportsFeatureStates() { ); HealthIndicatorResult result = service.calculate(true, HealthInfo.EMPTY_HEALTH_INFO); - assertThat(result.status(), is(HealthStatus.RED)); + assertThat(result.status(), is(RED)); assertThat(result.diagnosisList().size(), is(1)); Diagnosis diagnosis = result.diagnosisList().get(0); List affectedResources = diagnosis.affectedResources(); @@ -1925,7 +1936,7 @@ private SystemIndices getSystemIndices( // We expose the indicator name and the diagnoses in the x-pack usage API. In order to index them properly in a telemetry index // they need to be declared in the health-api-indexer.edn in the telemetry repository. public void testMappedFieldsForTelemetry() { - assertThat(ShardsAvailabilityHealthIndicatorService.NAME, equalTo("shards_availability")); + assertThat(NAME, equalTo("shards_availability")); assertThat( ACTION_RESTORE_FROM_SNAPSHOT.getUniqueId(), equalTo("elasticsearch:health:shards_availability:diagnosis:restore_from_snapshot") @@ -1970,8 +1981,10 @@ public void testMappedFieldsForTelemetry() { DIAGNOSIS_WAIT_FOR_INITIALIZATION.getUniqueId(), equalTo("elasticsearch:health:shards_availability:diagnosis:initializing_shards") ); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn(ClusterSettings.createBuiltInClusterSettings()); var service = new ShardsAvailabilityHealthIndicatorService( - mock(ClusterService.class), + clusterService, mock(AllocationService.class), mock(SystemIndices.class) ); @@ -2004,6 +2017,7 @@ public void testMappedFieldsForTelemetry() { } public void testIsNewlyCreatedAndInitializingReplica() { + ShardId id = new ShardId("index", "uuid", 0); IndexMetadata idxMeta = IndexMetadata.builder("index") .numberOfShards(1) @@ -2017,56 +2031,156 @@ public void testIsNewlyCreatedAndInitializingReplica() { .build() ) .build(); - ShardRouting primary = createShardRouting(id, true, new ShardAllocation("node", AVAILABLE)); - var state = createClusterStateWith(List.of(index("index", new ShardAllocation("node", AVAILABLE))), List.of()); - assertFalse(ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica(primary, state)); - - ShardRouting replica = createShardRouting(id, false, new ShardAllocation("node", AVAILABLE)); - state = createClusterStateWith(List.of(index("index", new ShardAllocation("node", AVAILABLE))), List.of()); - assertFalse(ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica(replica, state)); - - ShardRouting unassignedReplica = createShardRouting(id, false, new ShardAllocation("node", UNAVAILABLE)); - state = createClusterStateWith( - List.of(idxMeta), - List.of(index("index", "uuid", new ShardAllocation("node", UNAVAILABLE))), - List.of(), - List.of() - ); - assertFalse(ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica(unassignedReplica, state)); - UnassignedInfo.Reason reason = randomFrom(UnassignedInfo.Reason.NODE_LEFT, UnassignedInfo.Reason.NODE_RESTARTING); - ShardAllocation allocation = new ShardAllocation( - "node", - UNAVAILABLE, - new UnassignedInfo( - reason, - "message", - null, - 0, - 0, - 0, - randomBoolean(), - randomFrom(UnassignedInfo.AllocationStatus.values()), - Set.of(), - reason == UnassignedInfo.Reason.NODE_LEFT ? null : randomAlphaOfLength(20) - ) - ); - ShardRouting unallocatedReplica = createShardRouting(id, false, allocation); - state = createClusterStateWith( - List.of(idxMeta), - List.of(index(idxMeta, new ShardAllocation("node", UNAVAILABLE), allocation)), - List.of(), - List.of() - ); - assertFalse(ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica(unallocatedReplica, state)); + ClusterState state; - state = createClusterStateWith( - List.of(idxMeta), - List.of(index(idxMeta, new ShardAllocation("node", CREATING), allocation)), - List.of(), - List.of() - ); - assertTrue(ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica(unallocatedReplica, state)); + // --------- Test conditions that don't depend on threshold --------- + + TimeValue replicaUnassignedThreshold = randomFrom(timeValueSeconds(3), timeValueSeconds(0)); + { + // active, whether primary or replica + boolean primary = randomBoolean(); + ShardAllocation primaryAllocation = new ShardAllocation("node", AVAILABLE); + ShardRouting shard = createShardRouting(id, primary, primaryAllocation); + state = createClusterStateWith(List.of(index("index", primaryAllocation)), List.of()); + assertFalse( + ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica( + shard, + state, + Instant.now().toEpochMilli() - replicaUnassignedThreshold.millis() + ) + ); + } + + { // primary, but not active + var primaryAllocation = new ShardAllocation("node", INITIALIZING); + ShardRouting primary = createShardRouting(id, true, primaryAllocation); + state = createClusterStateWith(List.of(index("index", primaryAllocation)), List.of()); + assertFalse( + ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica( + primary, + state, + Instant.now().toEpochMilli() - replicaUnassignedThreshold.millis() + ) + ); + } + + // --------- Test conditions that depend on threshold, but with threshold of 0 --------- + replicaUnassignedThreshold = timeValueSeconds(0); + long now = Instant.now().toEpochMilli(); + TimeValue afterCutoffTime = TimeValue.timeValueMillis(now); + { + var unassignedInfo = randomFrom(decidersNo(afterCutoffTime), unassignedInfoNoFailures(afterCutoffTime)); + var replicaAllocation = new ShardAllocation("node", UNAVAILABLE, unassignedInfo); + var primaryAllocation = new ShardAllocation("node", randomFrom(INITIALIZING, UNAVAILABLE, AVAILABLE, RESTARTING)); + + ShardRouting unallocatedReplica = createShardRouting(id, false, replicaAllocation); + state = createClusterStateWith( + List.of(idxMeta), + List.of(index(idxMeta, primaryAllocation, replicaAllocation)), + List.of(), + List.of() + ); + assertFalse( + ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica( + unallocatedReplica, + state, + now - replicaUnassignedThreshold.millis() + ) + ); + } + + { + var unassignedInfo = randomFrom(decidersNo(afterCutoffTime), unassignedInfoNoFailures(afterCutoffTime)); + var replicaAllocation = new ShardAllocation("node", UNAVAILABLE, unassignedInfo); + var primaryAllocation = new ShardAllocation("node", CREATING); + + ShardRouting unallocatedReplica = createShardRouting(id, false, replicaAllocation); + state = createClusterStateWith( + List.of(idxMeta), + List.of(index(idxMeta, primaryAllocation, replicaAllocation)), + List.of(), + List.of() + ); + assertTrue( + ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica( + unallocatedReplica, + state, + now - replicaUnassignedThreshold.millis() + ) + ); + } + + // --------- Test conditions that do depend on threshold, but with non-zero threshold --------- + + replicaUnassignedThreshold = timeValueSeconds(3); + afterCutoffTime = TimeValue.timeValueMillis(now - 3000); + TimeValue beforeCutoffTime = TimeValue.timeValueMillis(now - 2999); + { + List> configs = new ArrayList<>(); + + // return false if primary is not creating and if unassigned info has failed allocations or is after cutoff + var uis = List.of(decidersNo(afterCutoffTime), decidersNo(beforeCutoffTime), unassignedInfoNoFailures(afterCutoffTime)); + var shardStates = List.of(UNAVAILABLE, INITIALIZING, RESTARTING, AVAILABLE); + for (var shardState : shardStates) { + for (var ui : uis) { + configs.add(tuple(shardState, ui)); + } + } + // return false if primary is not creating or available and unassigned time is before cutoff + for (var shardState : List.of(UNAVAILABLE, INITIALIZING, RESTARTING)) { + configs.add(tuple(shardState, unassignedInfoNoFailures(beforeCutoffTime))); + } + + for (var config : configs) { + var replicaAllocation = new ShardAllocation("node", UNAVAILABLE, config.v2()); + var primaryAllocation = new ShardAllocation("node", config.v1()); + ShardRouting unallocatedReplica = createShardRouting(id, false, replicaAllocation); + state = createClusterStateWith( + List.of(idxMeta), + List.of(index(idxMeta, primaryAllocation, replicaAllocation)), + List.of(), + List.of() + ); + assertFalse( + ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica( + unallocatedReplica, + state, + now - replicaUnassignedThreshold.millis() + ) + ); + } + } + + { + var configs = List.of( + // return true because primary is still creating + tuple(CREATING, decidersNo(afterCutoffTime)), + tuple(CREATING, decidersNo(beforeCutoffTime)), + tuple(CREATING, unassignedInfoNoFailures(afterCutoffTime)), + tuple(CREATING, unassignedInfoNoFailures(beforeCutoffTime)), + + // returns true because unassigned time is before cutoff, and no failedAllocations + tuple(AVAILABLE, unassignedInfoNoFailures(beforeCutoffTime)) + ); + + for (var config : configs) { + var replicaAllocation = new ShardAllocation("node", UNAVAILABLE, config.v2()); + var primaryAllocation = new ShardAllocation("node", config.v1()); + + ShardRouting unallocatedReplica = createShardRouting(id, false, replicaAllocation); + IndexRoutingTable index = index(idxMeta, primaryAllocation, replicaAllocation); + + state = createClusterStateWith(List.of(idxMeta), List.of(index), List.of(), List.of()); + assertTrue( + ShardsAvailabilityHealthIndicatorService.isNewlyCreatedAndInitializingReplica( + unallocatedReplica, + state, + now - replicaUnassignedThreshold.millis() + ) + ); + } + } } private HealthIndicatorResult createExpectedResult( @@ -2373,14 +2487,34 @@ private static UnassignedInfo nodeLeft() { ); } + private static UnassignedInfo unassignedInfoNoFailures(TimeValue unassignedTime) { + UnassignedInfo.Reason reason = randomFrom(UnassignedInfo.Reason.NODE_LEFT, UnassignedInfo.Reason.NODE_RESTARTING); + return new UnassignedInfo( + reason, + "message", + null, + 0, + unassignedTime.nanos(), + unassignedTime.millis(), + randomBoolean(), + randomValueOtherThan(UnassignedInfo.AllocationStatus.DECIDERS_NO, () -> randomFrom(UnassignedInfo.AllocationStatus.values())), + Set.of(), + reason == UnassignedInfo.Reason.NODE_LEFT ? null : randomAlphaOfLength(20) + ); + } + private static UnassignedInfo decidersNo() { + return decidersNo(TimeValue.timeValueMillis(0)); + } + + private static UnassignedInfo decidersNo(TimeValue unassignedTime) { return new UnassignedInfo( UnassignedInfo.Reason.ALLOCATION_FAILED, null, null, 1, - 0, - 0, + unassignedTime.nanos(), + unassignedTime.millis(), false, UnassignedInfo.AllocationStatus.DECIDERS_NO, Collections.emptySet(), @@ -2423,7 +2557,7 @@ private static ShardsAvailabilityHealthIndicatorService createAllocationHealthIn when(clusterService.state()).thenReturn(clusterState); var clusterSettings = new ClusterSettings(nodeSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); when(clusterService.getClusterSettings()).thenReturn(clusterSettings); - var allocationService = Mockito.mock(AllocationService.class); + var allocationService = mock(AllocationService.class); when(allocationService.explainShardAllocation(any(), any())).thenAnswer((Answer) invocation -> { ShardRouting shardRouting = invocation.getArgument(0); var key = new ShardRoutingKey(shardRouting.getIndexName(), shardRouting.getId(), shardRouting.primary()); From 67d2380cbd94bf300f686b00d7ea94d707f2f0af Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Wed, 28 Aug 2024 13:36:16 -0600 Subject: [PATCH 016/144] Collecting CCS usage telemetry stats (#111905) * This creates the use CCSUsage and CCSUsageTelemetry classes and wires them up to the UsageService. An initial set of telemetry metrics are now being gathered in TransportSearchAction. Many more will be added later to meet all the requirements for the CCS Telemetry epic of work. Co-authored-by: Michael Peterson --- .../search/ccs/CCSUsageTelemetryIT.java | 708 ++++++++++++++++++ .../cluster/stats/CCSTelemetrySnapshot.java | 404 ++++++++++ .../action/admin/cluster/stats/CCSUsage.java | 246 ++++++ .../cluster/stats/CCSUsageTelemetry.java | 246 ++++++ .../admin/cluster/stats/LongMetric.java | 126 ++++ .../action/search/SearchResponse.java | 9 + .../action/search/TransportSearchAction.java | 246 ++++-- .../org/elasticsearch/usage/UsageService.java | 7 + .../cluster/stats/ApproximateMatcher.java | 46 ++ .../stats/CCSTelemetrySnapshotTests.java | 324 ++++++++ .../cluster/stats/CCSUsageTelemetryTests.java | 342 +++++++++ .../search/TransportSearchActionTests.java | 4 +- .../snapshots/SnapshotResiliencyTests.java | 7 +- .../admin/cluster/stats/telemetry_test.json | 67 ++ .../CCSUsageTelemetryAsyncSearchIT.java | 370 +++++++++ 15 files changed, 3092 insertions(+), 60 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsage.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/LongMetric.java create mode 100644 server/src/test/java/org/elasticsearch/action/admin/cluster/stats/ApproximateMatcher.java create mode 100644 server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java create mode 100644 server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetryTests.java create mode 100644 server/src/test/resources/org/elasticsearch/action/admin/cluster/stats/telemetry_test.json create mode 100644 x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java new file mode 100644 index 0000000000000..40d98b2b5ea71 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java @@ -0,0 +1,708 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.ccs; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.admin.cluster.stats.CCSTelemetrySnapshot; +import org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.Result; +import org.elasticsearch.action.search.ClosePointInTimeRequest; +import org.elasticsearch.action.search.OpenPointInTimeRequest; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.TransportClosePointInTimeAction; +import org.elasticsearch.action.search.TransportOpenPointInTimeAction; +import org.elasticsearch.action.search.TransportSearchAction; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.builder.PointInTimeBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.query.SlowRunningQueryBuilder; +import org.elasticsearch.search.query.ThrowingQueryBuilder; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.usage.UsageService; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.rules.TestRule; +import org.junit.runner.Description; +import org.junit.runners.model.Statement; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.ASYNC_FEATURE; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.MRT_FEATURE; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.WILDCARD_FEATURE; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.Matchers.equalTo; + +public class CCSUsageTelemetryIT extends AbstractMultiClustersTestCase { + private static final Logger LOGGER = LogManager.getLogger(CCSUsageTelemetryIT.class); + private static final String REMOTE1 = "cluster-a"; + private static final String REMOTE2 = "cluster-b"; + + @Override + protected boolean reuseClusters() { + return false; + } + + @Override + protected Collection remoteClusterAlias() { + return List.of(REMOTE1, REMOTE2); + } + + @Rule + public SkipUnavailableRule skipOverride = new SkipUnavailableRule(REMOTE1, REMOTE2); + + @Override + protected Map skipUnavailableForRemoteClusters() { + var map = skipOverride.getMap(); + LOGGER.info("Using skip_unavailable map: [{}]", map); + return map; + } + + @Override + protected Collection> nodePlugins(String clusterAlias) { + return CollectionUtils.appendToCopy(super.nodePlugins(clusterAlias), CrossClusterSearchIT.TestQueryBuilderPlugin.class); + } + + private SearchRequest makeSearchRequest(String... indices) { + SearchRequest searchRequest = new SearchRequest(indices); + searchRequest.allowPartialSearchResults(false); + searchRequest.setBatchedReduceSize(randomIntBetween(3, 20)); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + if (randomBoolean()) { + searchRequest.setPreFilterShardSize(1); + } + searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(10)); + return searchRequest; + } + + /** + * Run search request and get telemetry from it + */ + private CCSTelemetrySnapshot getTelemetryFromSearch(SearchRequest searchRequest) throws ExecutionException, InterruptedException { + // We want to send search to a specific node (we don't care which one) so that we could + // collect the CCS telemetry from it later + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + // We don't care here too much about the response, we just want to trigger the telemetry collection. + // So we check it's not null and leave the rest to other tests. + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + return getTelemetrySnapshot(nodeName); + } + + private CCSTelemetrySnapshot getTelemetryFromFailedSearch(SearchRequest searchRequest) throws Exception { + // We want to send search to a specific node (we don't care which one) so that we could + // collect the CCS telemetry from it later + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + PlainActionFuture queryFuture = new PlainActionFuture<>(); + cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest, queryFuture); + assertBusy(() -> assertTrue(queryFuture.isDone())); + + // We expect failure, but we don't care too much which failure it is in this test + ExecutionException ee = expectThrows(ExecutionException.class, queryFuture::get); + assertNotNull(ee.getCause()); + + return getTelemetrySnapshot(nodeName); + } + + /** + * Create search request for indices and get telemetry from it + */ + private CCSTelemetrySnapshot getTelemetryFromSearch(String... indices) throws ExecutionException, InterruptedException { + return getTelemetryFromSearch(makeSearchRequest(indices)); + } + + /** + * Search on all remotes + */ + public void testAllRemotesSearch() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(localIndex, "*:" + remoteIndex); + boolean minimizeRoundtrips = TransportSearchAction.shouldMinimizeRoundtrips(searchRequest); + + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + assertResponse( + cluster(LOCAL_CLUSTER).client(nodeName) + .filterWithHeader(Map.of(Task.X_ELASTIC_PRODUCT_ORIGIN_HTTP_HEADER, "kibana")) + .search(searchRequest), + Assert::assertNotNull + ); + CCSTelemetrySnapshot telemetry = getTelemetrySnapshot(nodeName); + + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + assertThat(telemetry.getFailureReasons().size(), equalTo(0)); + assertThat(telemetry.getTook().count(), equalTo(1L)); + assertThat(telemetry.getTookMrtTrue().count(), equalTo(minimizeRoundtrips ? 1L : 0L)); + assertThat(telemetry.getTookMrtFalse().count(), equalTo(minimizeRoundtrips ? 0L : 1L)); + assertThat(telemetry.getRemotesPerSearchAvg(), equalTo(2.0)); + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(2L)); + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(0L)); + assertThat(telemetry.getClientCounts().size(), equalTo(1)); + assertThat(telemetry.getClientCounts().get("kibana"), equalTo(1L)); + if (minimizeRoundtrips) { + assertThat(telemetry.getFeatureCounts().get(MRT_FEATURE), equalTo(1L)); + } else { + assertThat(telemetry.getFeatureCounts().get(MRT_FEATURE), equalTo(null)); + } + assertThat(telemetry.getFeatureCounts().get(ASYNC_FEATURE), equalTo(null)); + + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(3)); + for (String clusterAlias : remoteClusterAlias()) { + var clusterTelemetry = perCluster.get(clusterAlias); + assertThat(clusterTelemetry.getCount(), equalTo(1L)); + assertThat(clusterTelemetry.getSkippedCount(), equalTo(0L)); + assertThat(clusterTelemetry.getTook().count(), equalTo(1L)); + } + + // another search + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(2L)); + assertThat(telemetry.getSuccessCount(), equalTo(2L)); + assertThat(telemetry.getFailureReasons().size(), equalTo(0)); + assertThat(telemetry.getTook().count(), equalTo(2L)); + assertThat(telemetry.getTookMrtTrue().count(), equalTo(minimizeRoundtrips ? 2L : 0L)); + assertThat(telemetry.getTookMrtFalse().count(), equalTo(minimizeRoundtrips ? 0L : 2L)); + assertThat(telemetry.getRemotesPerSearchAvg(), equalTo(2.0)); + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(2L)); + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(0L)); + assertThat(telemetry.getClientCounts().size(), equalTo(1)); + assertThat(telemetry.getClientCounts().get("kibana"), equalTo(1L)); + perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(3)); + for (String clusterAlias : remoteClusterAlias()) { + var clusterTelemetry = perCluster.get(clusterAlias); + assertThat(clusterTelemetry.getCount(), equalTo(2L)); + assertThat(clusterTelemetry.getSkippedCount(), equalTo(0L)); + assertThat(clusterTelemetry.getTook().count(), equalTo(2L)); + } + } + + /** + * Search on a specific remote + */ + public void testOneRemoteSearch() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + // Make request to cluster a + SearchRequest searchRequest = makeSearchRequest(localIndex, REMOTE1 + ":" + remoteIndex); + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + CCSTelemetrySnapshot telemetry = getTelemetrySnapshot(nodeName); + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(2)); + assertThat(perCluster.get(REMOTE1).getCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE1).getTook().count(), equalTo(1L)); + assertThat(perCluster.get(REMOTE2), equalTo(null)); + assertThat(telemetry.getClientCounts().size(), equalTo(0)); + + // Make request to cluster b + searchRequest = makeSearchRequest(localIndex, REMOTE2 + ":" + remoteIndex); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(2L)); + assertThat(telemetry.getSuccessCount(), equalTo(2L)); + perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(3)); + assertThat(perCluster.get(REMOTE1).getCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE1).getTook().count(), equalTo(1L)); + assertThat(perCluster.get(REMOTE2).getCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE2).getTook().count(), equalTo(1L)); + } + + /** + * Local search should not produce any telemetry at all + */ + public void testLocalOnlySearch() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + + CCSTelemetrySnapshot telemetry = getTelemetryFromSearch(localIndex); + assertThat(telemetry.getTotalCount(), equalTo(0L)); + } + + /** + * Search on remotes only, without local index + */ + public void testRemoteOnlySearch() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupClusters(); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + CCSTelemetrySnapshot telemetry = getTelemetryFromSearch("*:" + remoteIndex); + var perCluster = telemetry.getByRemoteCluster(); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + assertThat(telemetry.getFailureReasons().size(), equalTo(0)); + assertThat(telemetry.getTook().count(), equalTo(1L)); + assertThat(perCluster.size(), equalTo(2)); + assertThat(telemetry.getClientCounts().size(), equalTo(0)); + assertThat(perCluster.get(REMOTE1).getCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE1).getSkippedCount(), equalTo(0L)); + assertThat(perCluster.get(REMOTE1).getTook().count(), equalTo(1L)); + assertThat(perCluster.get(REMOTE2).getCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE2).getSkippedCount(), equalTo(0L)); + assertThat(perCluster.get(REMOTE2).getTook().count(), equalTo(1L)); + } + + /** + * Count wildcard searches. Only wildcards in index names (not in cluster names) are counted. + */ + public void testWildcardSearch() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(localIndex, "*:" + remoteIndex); + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + CCSTelemetrySnapshot telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getFeatureCounts().get(WILDCARD_FEATURE), equalTo(null)); + + searchRequest = makeSearchRequest("*", REMOTE1 + ":" + remoteIndex); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(2L)); + assertThat(telemetry.getFeatureCounts().get(WILDCARD_FEATURE), equalTo(1L)); + + searchRequest = makeSearchRequest(localIndex, REMOTE2 + ":*"); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(3L)); + assertThat(telemetry.getFeatureCounts().get(WILDCARD_FEATURE), equalTo(2L)); + + // Wildcards in cluster name do not count + searchRequest = makeSearchRequest(localIndex, "*:" + remoteIndex); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(4L)); + assertThat(telemetry.getFeatureCounts().get(WILDCARD_FEATURE), equalTo(2L)); + + // Wildcard in the middle of the index name counts + searchRequest = makeSearchRequest(localIndex, REMOTE2 + ":rem*"); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(5L)); + assertThat(telemetry.getFeatureCounts().get(WILDCARD_FEATURE), equalTo(3L)); + + // Wildcard only counted once per search + searchRequest = makeSearchRequest("*", REMOTE1 + ":rem*", REMOTE2 + ":remote*"); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(6L)); + assertThat(telemetry.getFeatureCounts().get(WILDCARD_FEATURE), equalTo(4L)); + } + + /** + * Test complete search failure + */ + public void testFailedSearch() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(localIndex, "*:" + remoteIndex); + // shardId -1 means to throw the Exception on all shards, so should result in complete search failure + ThrowingQueryBuilder queryBuilder = new ThrowingQueryBuilder(randomLong(), new IllegalStateException("index corrupted"), -1); + searchRequest.source(new SearchSourceBuilder().query(queryBuilder).size(10)); + searchRequest.allowPartialSearchResults(true); + + CCSTelemetrySnapshot telemetry = getTelemetryFromFailedSearch(searchRequest); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(0L)); + assertThat(telemetry.getTook().count(), equalTo(0L)); + assertThat(telemetry.getTookMrtTrue().count(), equalTo(0L)); + assertThat(telemetry.getTookMrtFalse().count(), equalTo(0L)); + Map expectedFailures = Map.of(Result.UNKNOWN.getName(), 1L); + assertThat(telemetry.getFailureReasons(), equalTo(expectedFailures)); + } + + /** + * Search when all the remotes failed and skipped + */ + public void testSkippedAllRemotesSearch() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(localIndex, "*:" + remoteIndex); + // throw Exception on all shards of remoteIndex, but not against localIndex + ThrowingQueryBuilder queryBuilder = new ThrowingQueryBuilder( + randomLong(), + new IllegalStateException("index corrupted"), + remoteIndex + ); + searchRequest.source(new SearchSourceBuilder().query(queryBuilder).size(10)); + searchRequest.allowPartialSearchResults(true); + + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + + CCSTelemetrySnapshot telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + // Note that this counts how many searches had skipped remotes, not how many remotes are skipped + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(1L)); + // Still count the remote that failed + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(2L)); + assertThat(telemetry.getTook().count(), equalTo(1L)); + // Each remote will have its skipped count bumped + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(3)); + for (String remote : remoteClusterAlias()) { + assertThat(perCluster.get(remote).getCount(), equalTo(0L)); + assertThat(perCluster.get(remote).getSkippedCount(), equalTo(1L)); + assertThat(perCluster.get(remote).getTook().count(), equalTo(0L)); + } + } + + public void testSkippedOneRemoteSearch() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + // Remote1 will fail, Remote2 will just do nothing but it counts as success + SearchRequest searchRequest = makeSearchRequest(localIndex, REMOTE1 + ":" + remoteIndex, REMOTE2 + ":" + "nosuchindex*"); + // throw Exception on all shards of remoteIndex, but not against localIndex + ThrowingQueryBuilder queryBuilder = new ThrowingQueryBuilder( + randomLong(), + new IllegalStateException("index corrupted"), + remoteIndex + ); + searchRequest.source(new SearchSourceBuilder().query(queryBuilder).size(10)); + searchRequest.allowPartialSearchResults(true); + + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + assertResponse(cluster(LOCAL_CLUSTER).client(nodeName).search(searchRequest), Assert::assertNotNull); + + CCSTelemetrySnapshot telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + // Note that this counts how many searches had skipped remotes, not how many remotes are skipped + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(1L)); + // Still count the remote that failed + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(2L)); + assertThat(telemetry.getTook().count(), equalTo(1L)); + // Each remote will have its skipped count bumped + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(3)); + // This one is skipped + assertThat(perCluster.get(REMOTE1).getCount(), equalTo(0L)); + assertThat(perCluster.get(REMOTE1).getSkippedCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE1).getTook().count(), equalTo(0L)); + // This one is OK + assertThat(perCluster.get(REMOTE2).getCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE2).getSkippedCount(), equalTo(0L)); + assertThat(perCluster.get(REMOTE2).getTook().count(), equalTo(1L)); + } + + /** + * Test what happens if remote times out - it should be skipped + */ + public void testRemoteTimesOut() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(localIndex, REMOTE1 + ":" + remoteIndex); + // This works only with minimize_roundtrips enabled, since otherwise timed out shards will be counted as + // partial failure, and we disable partial results.. + searchRequest.setCcsMinimizeRoundtrips(true); + + TimeValue searchTimeout = new TimeValue(200, TimeUnit.MILLISECONDS); + // query builder that will sleep for the specified amount of time in the query phase + SlowRunningQueryBuilder slowRunningQueryBuilder = new SlowRunningQueryBuilder(searchTimeout.millis() * 5, remoteIndex); + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().query(slowRunningQueryBuilder).timeout(searchTimeout); + searchRequest.source(sourceBuilder); + + CCSTelemetrySnapshot telemetry = getTelemetryFromSearch(searchRequest); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(1L)); + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(1L)); + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(2)); + assertThat(perCluster.get(REMOTE1).getCount(), equalTo(0L)); + assertThat(perCluster.get(REMOTE1).getSkippedCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE1).getTook().count(), equalTo(0L)); + assertThat(perCluster.get(REMOTE2), equalTo(null)); + } + + /** + * Test what happens if remote times out and there's no local - it should be skipped + */ + public void testRemoteOnlyTimesOut() throws Exception { + Map testClusterInfo = setupClusters(); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(REMOTE1 + ":" + remoteIndex); + // This works only with minimize_roundtrips enabled, since otherwise timed out shards will be counted as + // partial failure, and we disable partial results... + searchRequest.setCcsMinimizeRoundtrips(true); + + TimeValue searchTimeout = new TimeValue(100, TimeUnit.MILLISECONDS); + // query builder that will sleep for the specified amount of time in the query phase + SlowRunningQueryBuilder slowRunningQueryBuilder = new SlowRunningQueryBuilder(searchTimeout.millis() * 5, remoteIndex); + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().query(slowRunningQueryBuilder).timeout(searchTimeout); + searchRequest.source(sourceBuilder); + + CCSTelemetrySnapshot telemetry = getTelemetryFromSearch(searchRequest); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(1L)); + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(1L)); + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(1)); + assertThat(perCluster.get(REMOTE1).getCount(), equalTo(0L)); + assertThat(perCluster.get(REMOTE1).getSkippedCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE1).getTook().count(), equalTo(0L)); + assertThat(perCluster.get(REMOTE2), equalTo(null)); + } + + @SkipOverride(aliases = { REMOTE1 }) + public void testRemoteTimesOutFailure() throws Exception { + Map testClusterInfo = setupClusters(); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(REMOTE1 + ":" + remoteIndex); + + TimeValue searchTimeout = new TimeValue(100, TimeUnit.MILLISECONDS); + // query builder that will sleep for the specified amount of time in the query phase + SlowRunningQueryBuilder slowRunningQueryBuilder = new SlowRunningQueryBuilder(searchTimeout.millis() * 5, remoteIndex); + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().query(slowRunningQueryBuilder).timeout(searchTimeout); + searchRequest.source(sourceBuilder); + + CCSTelemetrySnapshot telemetry = getTelemetryFromFailedSearch(searchRequest); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(0L)); + // Failure is not skipping + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(0L)); + // Still count the remote that failed + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(1L)); + assertThat(telemetry.getTook().count(), equalTo(0L)); + Map expectedFailure = Map.of(Result.TIMEOUT.getName(), 1L); + assertThat(telemetry.getFailureReasons(), equalTo(expectedFailure)); + // No per-cluster data on total failure + assertThat(telemetry.getByRemoteCluster().size(), equalTo(0)); + } + + /** + * Search when all the remotes failed and not skipped + */ + @SkipOverride(aliases = { REMOTE1, REMOTE2 }) + public void testFailedAllRemotesSearch() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SearchRequest searchRequest = makeSearchRequest(localIndex, "*:" + remoteIndex); + // throw Exception on all shards of remoteIndex, but not against localIndex + ThrowingQueryBuilder queryBuilder = new ThrowingQueryBuilder( + randomLong(), + new IllegalStateException("index corrupted"), + remoteIndex + ); + searchRequest.source(new SearchSourceBuilder().query(queryBuilder).size(10)); + + CCSTelemetrySnapshot telemetry = getTelemetryFromFailedSearch(searchRequest); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(0L)); + // Failure is not skipping + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(0L)); + // Still count the remote that failed + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(2L)); + assertThat(telemetry.getTook().count(), equalTo(0L)); + Map expectedFailure = Map.of(Result.REMOTES_UNAVAILABLE.getName(), 1L); + assertThat(telemetry.getFailureReasons(), equalTo(expectedFailure)); + // No per-cluster data on total failure + assertThat(telemetry.getByRemoteCluster().size(), equalTo(0)); + } + + /** + * Test that we're still counting remote search even if remote cluster has no such index + */ + public void testRemoteHasNoIndex() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + + CCSTelemetrySnapshot telemetry = getTelemetryFromSearch(localIndex, REMOTE1 + ":" + "no_such_index*"); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(2)); + assertThat(perCluster.get(REMOTE1).getCount(), equalTo(1L)); + assertThat(perCluster.get(REMOTE1).getTook().count(), equalTo(1L)); + assertThat(perCluster.get(REMOTE2), equalTo(null)); + } + + /** + * Test that we're still counting remote search even if remote cluster has no such index + */ + @SkipOverride(aliases = { REMOTE1 }) + public void testRemoteHasNoIndexFailure() throws Exception { + SearchRequest searchRequest = makeSearchRequest(REMOTE1 + ":no_such_index"); + CCSTelemetrySnapshot telemetry = getTelemetryFromFailedSearch(searchRequest); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(0L)); + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(0)); + Map expectedFailure = Map.of(Result.NOT_FOUND.getName(), 1L); + assertThat(telemetry.getFailureReasons(), equalTo(expectedFailure)); + } + + public void testPITSearch() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + OpenPointInTimeRequest openPITRequest = new OpenPointInTimeRequest(localIndex, "*:" + remoteIndex).keepAlive( + TimeValue.timeValueMinutes(5) + ); + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + var client = cluster(LOCAL_CLUSTER).client(nodeName); + BytesReference pitID = client.execute(TransportOpenPointInTimeAction.TYPE, openPITRequest).actionGet().getPointInTimeId(); + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder(pitID).setKeepAlive(TimeValue.timeValueMinutes(5))) + .sort("@timestamp") + .size(10) + ); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + + assertResponse(client.search(searchRequest), Assert::assertNotNull); + // do it again + assertResponse(client.search(searchRequest), Assert::assertNotNull); + client.execute(TransportClosePointInTimeAction.TYPE, new ClosePointInTimeRequest(pitID)).actionGet(); + CCSTelemetrySnapshot telemetry = getTelemetrySnapshot(nodeName); + + assertThat(telemetry.getTotalCount(), equalTo(2L)); + assertThat(telemetry.getSuccessCount(), equalTo(2L)); + } + + private CCSTelemetrySnapshot getTelemetrySnapshot(String nodeName) { + var usage = cluster(LOCAL_CLUSTER).getInstance(UsageService.class, nodeName); + return usage.getCcsUsageHolder().getCCSTelemetrySnapshot(); + } + + private Map setupClusters() { + String localIndex = "demo"; + int numShardsLocal = randomIntBetween(2, 10); + Settings localSettings = indexSettings(numShardsLocal, randomIntBetween(0, 1)).build(); + assertAcked( + client(LOCAL_CLUSTER).admin() + .indices() + .prepareCreate(localIndex) + .setSettings(localSettings) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + indexDocs(client(LOCAL_CLUSTER), localIndex); + + String remoteIndex = "prod"; + int numShardsRemote = randomIntBetween(2, 10); + for (String clusterAlias : remoteClusterAlias()) { + final InternalTestCluster remoteCluster = cluster(clusterAlias); + remoteCluster.ensureAtLeastNumDataNodes(randomIntBetween(1, 3)); + assertAcked( + client(clusterAlias).admin() + .indices() + .prepareCreate(remoteIndex) + .setSettings(indexSettings(numShardsRemote, randomIntBetween(0, 1))) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + assertFalse( + client(clusterAlias).admin() + .cluster() + .prepareHealth(remoteIndex) + .setWaitForYellowStatus() + .setTimeout(TimeValue.timeValueSeconds(10)) + .get() + .isTimedOut() + ); + indexDocs(client(clusterAlias), remoteIndex); + } + + Map clusterInfo = new HashMap<>(); + clusterInfo.put("local.index", localIndex); + clusterInfo.put("remote.index", remoteIndex); + return clusterInfo; + } + + private int indexDocs(Client client, String index) { + int numDocs = between(5, 20); + for (int i = 0; i < numDocs; i++) { + client.prepareIndex(index).setSource("f", "v", "@timestamp", randomNonNegativeLong()).get(); + } + client.admin().indices().prepareRefresh(index).get(); + return numDocs; + } + + /** + * Annotation to mark specific cluster in a test as not to be skipped when unavailable + */ + @Retention(RetentionPolicy.RUNTIME) + @Target(ElementType.METHOD) + @interface SkipOverride { + String[] aliases(); + } + + /** + * Test rule to process skip annotations + */ + static class SkipUnavailableRule implements TestRule { + private final Map skipMap; + + SkipUnavailableRule(String... clusterAliases) { + this.skipMap = Arrays.stream(clusterAliases).collect(Collectors.toMap(Function.identity(), alias -> true)); + } + + public Map getMap() { + return skipMap; + } + + @Override + public Statement apply(Statement base, Description description) { + // Check for annotation named "SkipOverride" and set the overrides accordingly + var aliases = description.getAnnotation(SkipOverride.class); + if (aliases != null) { + for (String alias : aliases.aliases()) { + skipMap.put(alias, false); + } + } + return base; + } + + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java new file mode 100644 index 0000000000000..fe1da86dd54c7 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshot.java @@ -0,0 +1,404 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.action.admin.cluster.stats.LongMetric.LongMetricValue; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.transport.RemoteClusterAware; +import org.elasticsearch.xcontent.ToXContentFragment; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +/** + * Holds a snapshot of the CCS telemetry statistics from {@link CCSUsageTelemetry}. + * Used to hold the stats for a single node that's part of a {@link ClusterStatsNodeResponse}, as well as to + * accumulate stats for the entire cluster and return them as part of the {@link ClusterStatsResponse}. + *
+ * Theory of operation: + * - The snapshot is created on each particular node with the stats for the node, and is sent to the coordinating node + * - Coordinating node creates an empty snapshot and merges all the node snapshots into it using add() + *
+ * The snapshot contains {@link LongMetricValue}s for latencies, which currently contain full histograms (since you can't + * produce p90 from a set of node p90s, you need the full histogram for that). To avoid excessive copying (histogram weighs several KB), + * the snapshot is designed to be mutable, so that you can add multiple snapshots to it without copying the histograms all the time. + * It is not the intent to mutate the snapshot objects otherwise. + *
+ */ +public final class CCSTelemetrySnapshot implements Writeable, ToXContentFragment { + public static final String CCS_TELEMETRY_FIELD_NAME = "_search"; + private long totalCount; + private long successCount; + private final Map failureReasons; + + /** + * Latency metrics, overall. + */ + private final LongMetricValue took; + /** + * Latency metrics with minimize_roundtrips=true + */ + private final LongMetricValue tookMrtTrue; + /** + * Latency metrics with minimize_roundtrips=false + */ + private final LongMetricValue tookMrtFalse; + private long remotesPerSearchMax; + private double remotesPerSearchAvg; + private long skippedRemotes; + + private final Map featureCounts; + + private final Map clientCounts; + private final Map byRemoteCluster; + + /** + * Creates a new stats instance with the provided info. + */ + public CCSTelemetrySnapshot( + long totalCount, + long successCount, + Map failureReasons, + LongMetricValue took, + LongMetricValue tookMrtTrue, + LongMetricValue tookMrtFalse, + long remotesPerSearchMax, + double remotesPerSearchAvg, + long skippedRemotes, + Map featureCounts, + Map clientCounts, + Map byRemoteCluster + ) { + this.totalCount = totalCount; + this.successCount = successCount; + this.failureReasons = failureReasons; + this.took = took; + this.tookMrtTrue = tookMrtTrue; + this.tookMrtFalse = tookMrtFalse; + this.remotesPerSearchMax = remotesPerSearchMax; + this.remotesPerSearchAvg = remotesPerSearchAvg; + this.skippedRemotes = skippedRemotes; + this.featureCounts = featureCounts; + this.clientCounts = clientCounts; + this.byRemoteCluster = byRemoteCluster; + } + + /** + * Creates a new empty stats instance, that will get additional stats added through {@link #add(CCSTelemetrySnapshot)} + */ + public CCSTelemetrySnapshot() { + // Note this produces modifiable maps, so other snapshots can be merged into it + failureReasons = new HashMap<>(); + featureCounts = new HashMap<>(); + clientCounts = new HashMap<>(); + byRemoteCluster = new HashMap<>(); + took = new LongMetricValue(); + tookMrtTrue = new LongMetricValue(); + tookMrtFalse = new LongMetricValue(); + } + + public CCSTelemetrySnapshot(StreamInput in) throws IOException { + this.totalCount = in.readVLong(); + this.successCount = in.readVLong(); + this.failureReasons = in.readMap(StreamInput::readLong); + this.took = LongMetricValue.fromStream(in); + this.tookMrtTrue = LongMetricValue.fromStream(in); + this.tookMrtFalse = LongMetricValue.fromStream(in); + this.remotesPerSearchMax = in.readVLong(); + this.remotesPerSearchAvg = in.readDouble(); + this.skippedRemotes = in.readVLong(); + this.featureCounts = in.readMap(StreamInput::readLong); + this.clientCounts = in.readMap(StreamInput::readLong); + this.byRemoteCluster = in.readMap(PerClusterCCSTelemetry::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(totalCount); + out.writeVLong(successCount); + out.writeMap(failureReasons, StreamOutput::writeLong); + took.writeTo(out); + tookMrtTrue.writeTo(out); + tookMrtFalse.writeTo(out); + out.writeVLong(remotesPerSearchMax); + out.writeDouble(remotesPerSearchAvg); + out.writeVLong(skippedRemotes); + out.writeMap(featureCounts, StreamOutput::writeLong); + out.writeMap(clientCounts, StreamOutput::writeLong); + out.writeMap(byRemoteCluster, StreamOutput::writeWriteable); + } + + public long getTotalCount() { + return totalCount; + } + + public long getSuccessCount() { + return successCount; + } + + public Map getFailureReasons() { + return Collections.unmodifiableMap(failureReasons); + } + + public LongMetricValue getTook() { + return took; + } + + public LongMetricValue getTookMrtTrue() { + return tookMrtTrue; + } + + public LongMetricValue getTookMrtFalse() { + return tookMrtFalse; + } + + public long getRemotesPerSearchMax() { + return remotesPerSearchMax; + } + + public double getRemotesPerSearchAvg() { + return remotesPerSearchAvg; + } + + public long getSearchCountWithSkippedRemotes() { + return skippedRemotes; + } + + public Map getFeatureCounts() { + return Collections.unmodifiableMap(featureCounts); + } + + public Map getClientCounts() { + return Collections.unmodifiableMap(clientCounts); + } + + public Map getByRemoteCluster() { + return Collections.unmodifiableMap(byRemoteCluster); + } + + public static class PerClusterCCSTelemetry implements Writeable, ToXContentFragment { + private long count; + private long skippedCount; + private final LongMetricValue took; + + public PerClusterCCSTelemetry() { + took = new LongMetricValue(); + } + + public PerClusterCCSTelemetry(long count, long skippedCount, LongMetricValue took) { + this.took = took; + this.skippedCount = skippedCount; + this.count = count; + } + + public PerClusterCCSTelemetry(PerClusterCCSTelemetry other) { + this.count = other.count; + this.skippedCount = other.skippedCount; + this.took = new LongMetricValue(other.took); + } + + public PerClusterCCSTelemetry(StreamInput in) throws IOException { + this.count = in.readVLong(); + this.skippedCount = in.readVLong(); + this.took = LongMetricValue.fromStream(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(count); + out.writeVLong(skippedCount); + took.writeTo(out); + } + + public PerClusterCCSTelemetry add(PerClusterCCSTelemetry v) { + count += v.count; + skippedCount += v.skippedCount; + took.add(v.took); + return this; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("total", count); + builder.field("skipped", skippedCount); + publishLatency(builder, "took", took); + builder.endObject(); + return builder; + } + + public long getCount() { + return count; + } + + public long getSkippedCount() { + return skippedCount; + } + + public LongMetricValue getTook() { + return took; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PerClusterCCSTelemetry that = (PerClusterCCSTelemetry) o; + return count == that.count && skippedCount == that.skippedCount && Objects.equals(took, that.took); + } + + @Override + public int hashCode() { + return Objects.hash(count, skippedCount, took); + } + } + + /** + * Add the provided stats to the ones held by the current instance, effectively merging the two. + * @param stats the other stats object to add to this one + */ + public void add(CCSTelemetrySnapshot stats) { + // This should be called in ClusterStatsResponse ctor only, so we don't need to worry about concurrency + if (stats.totalCount == 0) { + // Just ignore the empty stats. + // This could happen if the node is brand new or if the stats are not available, e.g. because it runs an old version. + return; + } + long oldCount = totalCount; + totalCount += stats.totalCount; + successCount += stats.successCount; + skippedRemotes += stats.skippedRemotes; + stats.failureReasons.forEach((k, v) -> failureReasons.merge(k, v, Long::sum)); + stats.featureCounts.forEach((k, v) -> featureCounts.merge(k, v, Long::sum)); + stats.clientCounts.forEach((k, v) -> clientCounts.merge(k, v, Long::sum)); + took.add(stats.took); + tookMrtTrue.add(stats.tookMrtTrue); + tookMrtFalse.add(stats.tookMrtFalse); + remotesPerSearchMax = Math.max(remotesPerSearchMax, stats.remotesPerSearchMax); + if (totalCount > 0 && oldCount > 0) { + // Weighted average + remotesPerSearchAvg = (remotesPerSearchAvg * oldCount + stats.remotesPerSearchAvg * stats.totalCount) / totalCount; + } else { + // If we didn't have any old value, we just take the new one + remotesPerSearchAvg = stats.remotesPerSearchAvg; + } + // we copy the object here since we'll be modifying it later on subsequent adds + // TODO: this may be sub-optimal, as we'll be copying histograms when adding first snapshot to an empty container, + // which we could have avoided probably. + stats.byRemoteCluster.forEach((r, v) -> byRemoteCluster.merge(r, new PerClusterCCSTelemetry(v), PerClusterCCSTelemetry::add)); + } + + /** + * Publishes the latency statistics to the provided {@link XContentBuilder}. + * Example: + * "took": { + * "max": 345032, + * "avg": 1620, + * "p90": 2570 + * } + */ + public static void publishLatency(XContentBuilder builder, String name, LongMetricValue took) throws IOException { + builder.startObject(name); + { + builder.field("max", took.max()); + builder.field("avg", took.avg()); + builder.field("p90", took.p90()); + } + builder.endObject(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(CCS_TELEMETRY_FIELD_NAME); + { + builder.field("total", totalCount); + builder.field("success", successCount); + builder.field("skipped", skippedRemotes); + publishLatency(builder, "took", took); + publishLatency(builder, "took_mrt_true", tookMrtTrue); + publishLatency(builder, "took_mrt_false", tookMrtFalse); + builder.field("remotes_per_search_max", remotesPerSearchMax); + builder.field("remotes_per_search_avg", remotesPerSearchAvg); + builder.field("failure_reasons", failureReasons); + builder.field("features", featureCounts); + builder.field("clients", clientCounts); + builder.startObject("clusters"); + { + for (var entry : byRemoteCluster.entrySet()) { + String remoteName = entry.getKey(); + if (RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY.equals(remoteName)) { + remoteName = SearchResponse.LOCAL_CLUSTER_NAME_REPRESENTATION; + } + builder.field(remoteName, entry.getValue()); + } + } + builder.endObject(); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CCSTelemetrySnapshot that = (CCSTelemetrySnapshot) o; + return totalCount == that.totalCount + && successCount == that.successCount + && skippedRemotes == that.skippedRemotes + && Objects.equals(failureReasons, that.failureReasons) + && Objects.equals(took, that.took) + && Objects.equals(tookMrtTrue, that.tookMrtTrue) + && Objects.equals(tookMrtFalse, that.tookMrtFalse) + && Objects.equals(remotesPerSearchMax, that.remotesPerSearchMax) + && Objects.equals(remotesPerSearchAvg, that.remotesPerSearchAvg) + && Objects.equals(featureCounts, that.featureCounts) + && Objects.equals(clientCounts, that.clientCounts) + && Objects.equals(byRemoteCluster, that.byRemoteCluster); + } + + @Override + public int hashCode() { + return Objects.hash( + totalCount, + successCount, + failureReasons, + took, + tookMrtTrue, + tookMrtFalse, + remotesPerSearchMax, + remotesPerSearchAvg, + skippedRemotes, + featureCounts, + clientCounts, + byRemoteCluster + ); + } + + @Override + public String toString() { + return Strings.toString(this, true, true); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsage.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsage.java new file mode 100644 index 0000000000000..b2d75ac8f61f3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsage.java @@ -0,0 +1,246 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.ElasticsearchSecurityException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ShardOperationFailedException; +import org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.Result; +import org.elasticsearch.action.search.SearchPhaseExecutionException; +import org.elasticsearch.action.search.ShardSearchFailure; +import org.elasticsearch.action.search.TransportSearchAction; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.search.SearchShardTarget; +import org.elasticsearch.search.query.SearchTimeoutException; +import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.NoSeedNodeLeftException; +import org.elasticsearch.transport.NoSuchRemoteClusterException; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.transport.RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; + +/** + * This is a container for telemetry data from an individual cross-cluster search for _search or _async_search (or + * other search endpoints that use the {@link TransportSearchAction} such as _msearch). + */ +public class CCSUsage { + private final long took; + private final Result status; + private final Set features; + private final int remotesCount; + + private final String client; + + private final Set skippedRemotes; + private final Map perClusterUsage; + + public static class Builder { + private long took; + private final Set features; + private Result status = Result.SUCCESS; + private int remotesCount; + private String client; + private final Set skippedRemotes; + private final Map perClusterUsage; + + public Builder() { + features = new HashSet<>(); + skippedRemotes = new HashSet<>(); + perClusterUsage = new HashMap<>(); + } + + public Builder took(long took) { + this.took = took; + return this; + } + + public Builder setFailure(Result failureType) { + this.status = failureType; + return this; + } + + public Builder setFailure(Exception e) { + return setFailure(getFailureType(e)); + } + + public Builder setFeature(String feature) { + this.features.add(feature); + return this; + } + + public Builder setClient(String client) { + this.client = client; + return this; + } + + public Builder skippedRemote(String remote) { + this.skippedRemotes.add(remote); + return this; + } + + public Builder perClusterUsage(String remote, TimeValue took) { + this.perClusterUsage.put(remote, new PerClusterUsage(took)); + return this; + } + + public CCSUsage build() { + return new CCSUsage(took, status, remotesCount, skippedRemotes, features, client, perClusterUsage); + } + + public Builder setRemotesCount(int remotesCount) { + this.remotesCount = remotesCount; + return this; + } + + public int getRemotesCount() { + return remotesCount; + } + + /** + * Get failure type as {@link Result} from the search failure exception. + */ + public static Result getFailureType(Exception e) { + var unwrapped = ExceptionsHelper.unwrapCause(e); + if (unwrapped instanceof Exception) { + e = (Exception) unwrapped; + } + if (isRemoteUnavailable(e)) { + return Result.REMOTES_UNAVAILABLE; + } + if (ExceptionsHelper.unwrap(e, ResourceNotFoundException.class) != null) { + return Result.NOT_FOUND; + } + if (e instanceof TaskCancelledException || (ExceptionsHelper.unwrap(e, TaskCancelledException.class) != null)) { + return Result.CANCELED; + } + if (ExceptionsHelper.unwrap(e, SearchTimeoutException.class) != null) { + return Result.TIMEOUT; + } + if (ExceptionsHelper.unwrap(e, ElasticsearchSecurityException.class) != null) { + return Result.SECURITY; + } + if (ExceptionsHelper.unwrapCorruption(e) != null) { + return Result.CORRUPTION; + } + // This is kind of last resort check - if we still don't know the reason but all shard failures are remote, + // we assume it's remote's fault somehow. + if (e instanceof SearchPhaseExecutionException spe) { + // If this is a failure that happened because of remote failures only + var groupedFails = ExceptionsHelper.groupBy(spe.shardFailures()); + if (Arrays.stream(groupedFails).allMatch(Builder::isRemoteFailure)) { + return Result.REMOTES_UNAVAILABLE; + } + } + // OK we don't know what happened + return Result.UNKNOWN; + } + + /** + * Is this failure exception because remote was unavailable? + * See also: TransportResolveClusterAction#notConnectedError + */ + static boolean isRemoteUnavailable(Exception e) { + if (ExceptionsHelper.unwrap( + e, + ConnectTransportException.class, + NoSuchRemoteClusterException.class, + NoSeedNodeLeftException.class + ) != null) { + return true; + } + Throwable ill = ExceptionsHelper.unwrap(e, IllegalStateException.class, IllegalArgumentException.class); + if (ill != null && (ill.getMessage().contains("Unable to open any connections") || ill.getMessage().contains("unknown host"))) { + return true; + } + // Ok doesn't look like any of the known remote exceptions + return false; + } + + /** + * Is this failure coming from a remote cluster? + */ + static boolean isRemoteFailure(ShardOperationFailedException failure) { + if (failure instanceof ShardSearchFailure shardFailure) { + SearchShardTarget shard = shardFailure.shard(); + return shard != null && shard.getClusterAlias() != null && LOCAL_CLUSTER_GROUP_KEY.equals(shard.getClusterAlias()) == false; + } + return false; + } + } + + private CCSUsage( + long took, + Result status, + int remotesCount, + Set skippedRemotes, + Set features, + String client, + Map perClusterUsage + ) { + this.status = status; + this.remotesCount = remotesCount; + this.features = features; + this.client = client; + this.took = took; + this.skippedRemotes = skippedRemotes; + this.perClusterUsage = perClusterUsage; + } + + public Map getPerClusterUsage() { + return perClusterUsage; + } + + public Result getStatus() { + return status; + } + + public Set getFeatures() { + return features; + } + + public long getRemotesCount() { + return remotesCount; + } + + public String getClient() { + return client; + } + + public long getTook() { + return took; + } + + public Set getSkippedRemotes() { + return skippedRemotes; + } + + public static class PerClusterUsage { + + // if MRT=true, the took time on the remote cluster (if MRT=true), otherwise the overall took time + private long took; + + public PerClusterUsage(TimeValue took) { + if (took != null) { + this.took = took.millis(); + } + } + + public long getTook() { + return took; + } + } + +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java new file mode 100644 index 0000000000000..60766bd4068e3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetry.java @@ -0,0 +1,246 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.common.util.Maps; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.LongAdder; + +/** + * Service holding accumulated CCS search usage statistics. Individual cross-cluster searches will pass + * CCSUsage data here to have it collated and aggregated. Snapshots of the current CCS Telemetry Usage + * can be obtained by getting {@link CCSTelemetrySnapshot} objects. + *
+ * Theory of operation: + * Each search creates a {@link CCSUsage.Builder}, which can be updated during the progress of the search request, + * and then it instantiates a {@link CCSUsage} object when the request is finished. + * That object is passed to {@link #updateUsage(CCSUsage)} on the request processing end (whether successful or not). + * The {@link #updateUsage(CCSUsage)} method will then update the internal counters and metrics. + *
+ * When we need to return the current state of the telemetry, we can call {@link #getCCSTelemetrySnapshot()} which produces + * a snapshot of the current state of the telemetry as {@link CCSTelemetrySnapshot}. These snapshots are additive so + * when collecting the snapshots from multiple nodes, an empty snapshot is created and then all the node's snapshots are added + * to it to obtain the summary telemetry. + */ +public class CCSUsageTelemetry { + + /** + * Result of the request execution. + * Either "success" or a failure reason. + */ + public enum Result { + SUCCESS("success"), + REMOTES_UNAVAILABLE("remotes_unavailable"), + CANCELED("canceled"), + NOT_FOUND("not_found"), + TIMEOUT("timeout"), + CORRUPTION("corruption"), + SECURITY("security"), + // May be helpful if there's a lot of other reasons, and it may be hard to calculate the unknowns for some clients. + UNKNOWN("other"); + + private final String name; + + Result(String name) { + this.name = name; + } + + public String getName() { + return name; + } + } + + // Not enum because we won't mind other places adding their own features + public static final String MRT_FEATURE = "mrt_on"; + public static final String ASYNC_FEATURE = "async"; + public static final String WILDCARD_FEATURE = "wildcards"; + + // The list of known Elastic clients. May be incomplete. + public static final Set KNOWN_CLIENTS = Set.of( + "kibana", + "cloud", + "logstash", + "beats", + "fleet", + "ml", + "security", + "observability", + "enterprise-search", + "elasticsearch", + "connectors", + "connectors-cli" + ); + + private final LongAdder totalCount; + private final LongAdder successCount; + private final Map failureReasons; + + /** + * Latency metrics overall + */ + private final LongMetric took; + /** + * Latency metrics with minimize_roundtrips=true + */ + private final LongMetric tookMrtTrue; + /** + * Latency metrics with minimize_roundtrips=false + */ + private final LongMetric tookMrtFalse; + private final LongMetric remotesPerSearch; + private final LongAdder skippedRemotes; + + private final Map featureCounts; + + private final Map clientCounts; + private final Map byRemoteCluster; + + public CCSUsageTelemetry() { + this.byRemoteCluster = new ConcurrentHashMap<>(); + totalCount = new LongAdder(); + successCount = new LongAdder(); + failureReasons = new ConcurrentHashMap<>(); + took = new LongMetric(); + tookMrtTrue = new LongMetric(); + tookMrtFalse = new LongMetric(); + remotesPerSearch = new LongMetric(); + skippedRemotes = new LongAdder(); + featureCounts = new ConcurrentHashMap<>(); + clientCounts = new ConcurrentHashMap<>(); + } + + public void updateUsage(CCSUsage ccsUsage) { + assert ccsUsage.getRemotesCount() > 0 : "Expected at least one remote cluster in CCSUsage"; + // TODO: fork this to a background thread? + doUpdate(ccsUsage); + } + + // This is not synchronized, instead we ensure that every metric in the class is thread-safe. + private void doUpdate(CCSUsage ccsUsage) { + totalCount.increment(); + long searchTook = ccsUsage.getTook(); + if (isSuccess(ccsUsage)) { + successCount.increment(); + took.record(searchTook); + if (isMRT(ccsUsage)) { + tookMrtTrue.record(searchTook); + } else { + tookMrtFalse.record(searchTook); + } + ccsUsage.getPerClusterUsage().forEach((r, u) -> byRemoteCluster.computeIfAbsent(r, PerClusterCCSTelemetry::new).update(u)); + } else { + failureReasons.computeIfAbsent(ccsUsage.getStatus(), k -> new LongAdder()).increment(); + } + + remotesPerSearch.record(ccsUsage.getRemotesCount()); + if (ccsUsage.getSkippedRemotes().isEmpty() == false) { + skippedRemotes.increment(); + ccsUsage.getSkippedRemotes().forEach(remote -> byRemoteCluster.computeIfAbsent(remote, PerClusterCCSTelemetry::new).skipped()); + } + ccsUsage.getFeatures().forEach(f -> featureCounts.computeIfAbsent(f, k -> new LongAdder()).increment()); + String client = ccsUsage.getClient(); + if (client != null && KNOWN_CLIENTS.contains(client)) { + // We count only known clients for now + clientCounts.computeIfAbsent(ccsUsage.getClient(), k -> new LongAdder()).increment(); + } + } + + private boolean isMRT(CCSUsage ccsUsage) { + return ccsUsage.getFeatures().contains(MRT_FEATURE); + } + + private boolean isSuccess(CCSUsage ccsUsage) { + return ccsUsage.getStatus() == Result.SUCCESS; + } + + public Map getTelemetryByCluster() { + return byRemoteCluster; + } + + /** + * Telemetry of each remote involved in cross cluster searches + */ + public static class PerClusterCCSTelemetry { + private final String clusterAlias; + // The number of successful (not skipped) requests to this cluster. + private final LongAdder count; + private final LongAdder skippedCount; + // This is only over the successful requetss, skipped ones do not count here. + private final LongMetric took; + + PerClusterCCSTelemetry(String clusterAlias) { + this.clusterAlias = clusterAlias; + this.count = new LongAdder(); + took = new LongMetric(); + this.skippedCount = new LongAdder(); + } + + void update(CCSUsage.PerClusterUsage remoteUsage) { + count.increment(); + took.record(remoteUsage.getTook()); + } + + void skipped() { + skippedCount.increment(); + } + + public long getCount() { + return count.longValue(); + } + + @Override + public String toString() { + return "PerClusterCCSTelemetry{" + + "clusterAlias='" + + clusterAlias + + '\'' + + ", count=" + + count + + ", latency=" + + took.toString() + + '}'; + } + + public long getSkippedCount() { + return skippedCount.longValue(); + } + + public CCSTelemetrySnapshot.PerClusterCCSTelemetry getSnapshot() { + return new CCSTelemetrySnapshot.PerClusterCCSTelemetry(count.longValue(), skippedCount.longValue(), took.getValue()); + } + + } + + public CCSTelemetrySnapshot getCCSTelemetrySnapshot() { + Map reasonsMap = Maps.newMapWithExpectedSize(failureReasons.size()); + failureReasons.forEach((k, v) -> reasonsMap.put(k.getName(), v.longValue())); + + LongMetric.LongMetricValue remotes = remotesPerSearch.getValue(); + + // Maps returned here are unmodifiable, but the empty ctor produces modifiable maps + return new CCSTelemetrySnapshot( + totalCount.longValue(), + successCount.longValue(), + Collections.unmodifiableMap(reasonsMap), + took.getValue(), + tookMrtTrue.getValue(), + tookMrtFalse.getValue(), + remotes.max(), + remotes.avg(), + skippedRemotes.longValue(), + Collections.unmodifiableMap(Maps.transformValues(featureCounts, LongAdder::longValue)), + Collections.unmodifiableMap(Maps.transformValues(clientCounts, LongAdder::longValue)), + Collections.unmodifiableMap(Maps.transformValues(byRemoteCluster, PerClusterCCSTelemetry::getSnapshot)) + ); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/LongMetric.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/LongMetric.java new file mode 100644 index 0000000000000..f3bb936b108c0 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/LongMetric.java @@ -0,0 +1,126 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.HdrHistogram.ConcurrentHistogram; +import org.HdrHistogram.Histogram; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Objects; +import java.util.zip.DataFormatException; + +/** + * Metric class that accepts longs and provides count, average, max and percentiles. + * Abstracts out the details of how exactly the values are stored and calculated. + * {@link LongMetricValue} is a snapshot of the current state of the metric. + */ +public class LongMetric { + private final Histogram values; + private static final int SIGNIFICANT_DIGITS = 2; + + LongMetric() { + values = new ConcurrentHistogram(SIGNIFICANT_DIGITS); + } + + void record(long v) { + values.recordValue(v); + } + + LongMetricValue getValue() { + return new LongMetricValue(values); + } + + /** + * Snapshot of {@link LongMetric} value that provides the current state of the metric. + * Can be added with another {@link LongMetricValue} object. + */ + public static final class LongMetricValue implements Writeable { + // We have to carry the full histogram around since we might need to calculate aggregate percentiles + // after collecting individual stats from the nodes, and we can't do that without having the full histogram. + // This costs about 2K per metric, which was deemed acceptable. + private final Histogram values; + + public LongMetricValue(Histogram values) { + // Copy here since we don't want the snapshot value to change if somebody updates the original one + this.values = values.copy(); + } + + public LongMetricValue(LongMetricValue v) { + this.values = v.values.copy(); + } + + LongMetricValue() { + this.values = new Histogram(SIGNIFICANT_DIGITS); + } + + public void add(LongMetricValue v) { + this.values.add(v.values); + } + + public static LongMetricValue fromStream(StreamInput in) throws IOException { + byte[] b = in.readByteArray(); + ByteBuffer bb = ByteBuffer.wrap(b); + try { + // TODO: not sure what is the good value for minBarForHighestToLowestValueRatio here? + Histogram dh = Histogram.decodeFromCompressedByteBuffer(bb, 1); + return new LongMetricValue(dh); + } catch (DataFormatException e) { + throw new IOException(e); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + ByteBuffer b = ByteBuffer.allocate(values.getNeededByteBufferCapacity()); + values.encodeIntoCompressedByteBuffer(b); + int size = b.position(); + out.writeVInt(size); + out.writeBytes(b.array(), 0, size); + } + + public long count() { + return values.getTotalCount(); + } + + public long max() { + return values.getMaxValue(); + } + + public long avg() { + return (long) Math.ceil(values.getMean()); + } + + public long p90() { + return values.getValueAtPercentile(90); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (LongMetricValue) obj; + return this.values.equals(that.values); + } + + @Override + public int hashCode() { + return Objects.hash(values); + } + + @Override + public String toString() { + return "LongMetricValue[count=" + count() + ", " + "max=" + max() + ", " + "avg=" + avg() + "]"; + } + + } +} diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java b/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java index 45cb118691082..8d70e2dd6bb66 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java @@ -47,6 +47,7 @@ import java.util.Locale; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.function.BiFunction; import java.util.function.Predicate; import java.util.function.Supplier; @@ -701,6 +702,13 @@ public Cluster getCluster(String clusterAlias) { return clusterInfo.get(clusterAlias); } + /** + * @return collection of cluster aliases in the search response (including "(local)" if was searched). + */ + public Set getClusterAliases() { + return clusterInfo.keySet(); + } + /** * Utility to swap a Cluster object. Guidelines for the remapping function: *
    @@ -803,6 +811,7 @@ public boolean hasClusterObjects() { public boolean hasRemoteClusters() { return total > 1 || clusterInfo.keySet().stream().anyMatch(alias -> alias != RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); } + } /** diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 11e767df9c010..6e1645c1ed711 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -23,6 +23,8 @@ import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse; import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; +import org.elasticsearch.action.admin.cluster.stats.CCSUsage; +import org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.HandledTransportAction; import org.elasticsearch.action.support.IndicesOptions; @@ -46,6 +48,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.util.ArrayUtils; @@ -84,6 +87,7 @@ import org.elasticsearch.transport.Transport; import org.elasticsearch.transport.TransportRequestOptions; import org.elasticsearch.transport.TransportService; +import org.elasticsearch.usage.UsageService; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentFactory; @@ -156,6 +160,7 @@ public class TransportSearchAction extends HandledTransportAction buildPerIndexOriginalIndices( @@ -305,43 +312,7 @@ public long buildTookInMillis() { @Override protected void doExecute(Task task, SearchRequest searchRequest, ActionListener listener) { - ActionListener loggingAndMetrics = new ActionListener<>() { - @Override - public void onResponse(SearchResponse searchResponse) { - try { - searchResponseMetrics.recordTookTime(searchResponse.getTookInMillis()); - SearchResponseMetrics.ResponseCountTotalStatus responseCountTotalStatus = - SearchResponseMetrics.ResponseCountTotalStatus.SUCCESS; - if (searchResponse.getShardFailures() != null && searchResponse.getShardFailures().length > 0) { - // Deduplicate failures by exception message and index - ShardOperationFailedException[] groupedFailures = ExceptionsHelper.groupBy(searchResponse.getShardFailures()); - for (ShardOperationFailedException f : groupedFailures) { - boolean causeHas500Status = false; - if (f.getCause() != null) { - causeHas500Status = ExceptionsHelper.status(f.getCause()).getStatus() >= 500; - } - if ((f.status().getStatus() >= 500 || causeHas500Status) - && ExceptionsHelper.isNodeOrShardUnavailableTypeException(f.getCause()) == false) { - logger.warn("TransportSearchAction shard failure (partial results response)", f); - responseCountTotalStatus = SearchResponseMetrics.ResponseCountTotalStatus.PARTIAL_FAILURE; - } - } - } - listener.onResponse(searchResponse); - // increment after the delegated onResponse to ensure we don't - // record both a success and a failure if there is an exception - searchResponseMetrics.incrementResponseCount(responseCountTotalStatus); - } catch (Exception e) { - onFailure(e); - } - } - - @Override - public void onFailure(Exception e) { - searchResponseMetrics.incrementResponseCount(SearchResponseMetrics.ResponseCountTotalStatus.FAILURE); - listener.onFailure(e); - } - }; + ActionListener loggingAndMetrics = new SearchResponseActionListener((SearchTask) task, listener); executeRequest((SearchTask) task, searchRequest, loggingAndMetrics, AsyncSearchActionProvider::new); } @@ -396,8 +367,32 @@ void executeRequest( searchPhaseProvider.apply(delegate) ); } else { + if ((listener instanceof TelemetryListener tl) && CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { + tl.setRemotes(resolvedIndices.getRemoteClusterIndices().size()); + if (isAsyncSearchTask(task)) { + tl.setFeature(CCSUsageTelemetry.ASYNC_FEATURE); + } + String client = task.getHeader(Task.X_ELASTIC_PRODUCT_ORIGIN_HTTP_HEADER); + if (client != null) { + tl.setClient(client); + } + // Check if any of the index patterns are wildcard patterns + var localIndices = resolvedIndices.getLocalIndices(); + if (localIndices != null && Arrays.stream(localIndices.indices()).anyMatch(Regex::isSimpleMatchPattern)) { + tl.setFeature(CCSUsageTelemetry.WILDCARD_FEATURE); + } + if (resolvedIndices.getRemoteClusterIndices() + .values() + .stream() + .anyMatch(indices -> Arrays.stream(indices.indices()).anyMatch(Regex::isSimpleMatchPattern))) { + tl.setFeature(CCSUsageTelemetry.WILDCARD_FEATURE); + } + } final TaskId parentTaskId = task.taskInfo(clusterService.localNode().getId(), false).taskId(); if (shouldMinimizeRoundtrips(rewritten)) { + if ((listener instanceof TelemetryListener tl) && CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { + tl.setFeature(CCSUsageTelemetry.MRT_FEATURE); + } final AggregationReduceContext.Builder aggregationReduceContextBuilder = rewritten.source() != null && rewritten.source().aggregations() != null ? searchService.aggReduceContextBuilder(task::isCancelled, rewritten.source().aggregations()) @@ -805,27 +800,26 @@ static void collectSearchShards( for (Map.Entry entry : remoteIndicesByCluster.entrySet()) { final String clusterAlias = entry.getKey(); boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias); - TransportSearchAction.CCSActionListener> singleListener = - new TransportSearchAction.CCSActionListener<>( - clusterAlias, - skipUnavailable, - responsesCountDown, - exceptions, - clusters, - listener - ) { - @Override - void innerOnResponse(SearchShardsResponse searchShardsResponse) { - assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.SEARCH_COORDINATION); - ccsClusterInfoUpdate(searchShardsResponse, clusters, clusterAlias, timeProvider); - searchShardsResponses.put(clusterAlias, searchShardsResponse); - } + CCSActionListener> singleListener = new CCSActionListener<>( + clusterAlias, + skipUnavailable, + responsesCountDown, + exceptions, + clusters, + listener + ) { + @Override + void innerOnResponse(SearchShardsResponse searchShardsResponse) { + assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.SEARCH_COORDINATION); + ccsClusterInfoUpdate(searchShardsResponse, clusters, clusterAlias, timeProvider); + searchShardsResponses.put(clusterAlias, searchShardsResponse); + } - @Override - Map createFinalResponse() { - return searchShardsResponses; - } - }; + @Override + Map createFinalResponse() { + return searchShardsResponses; + } + }; remoteClusterService.maybeEnsureConnectedAndGetConnection( clusterAlias, skipUnavailable == false, @@ -1520,6 +1514,34 @@ public SearchPhase newSearchPhase( } } + /** + * TransportSearchAction cannot access async-search code, so can't check whether this the Task + * is an instance of AsyncSearchTask, so this roundabout method is used + * @param searchTask SearchTask to analyze + * @return true if this is an async search task; false if a synchronous search task + */ + private boolean isAsyncSearchTask(SearchTask searchTask) { + assert assertAsyncSearchTaskListener(searchTask) : "AsyncSearchTask SearchProgressListener is not one of the expected types"; + // AsyncSearchTask will not return SearchProgressListener.NOOP, since it uses its own progress listener + // which delegates to CCSSingleCoordinatorSearchProgressListener when minimizing roundtrips. + // Only synchronous SearchTask uses SearchProgressListener.NOOP or CCSSingleCoordinatorSearchProgressListener directly + return searchTask.getProgressListener() != SearchProgressListener.NOOP + && searchTask.getProgressListener() instanceof CCSSingleCoordinatorSearchProgressListener == false; + } + + /** + * @param searchTask SearchTask to analyze + * @return true if AsyncSearchTask still uses its own special listener, not one of the two that synchronous SearchTask uses + */ + private boolean assertAsyncSearchTaskListener(SearchTask searchTask) { + if (searchTask.getClass().getSimpleName().contains("AsyncSearchTask")) { + SearchProgressListener progressListener = searchTask.getProgressListener(); + return progressListener != SearchProgressListener.NOOP + && progressListener instanceof CCSSingleCoordinatorSearchProgressListener == false; + } + return true; + } + private static void validateAndResolveWaitForCheckpoint( ClusterState clusterState, IndexNameExpressionResolver resolver, @@ -1824,4 +1846,112 @@ List getLocalShardsIterator( // the returned list must support in-place sorting, so this is the most memory efficient we can do here return Arrays.asList(list); } + + private interface TelemetryListener { + void setRemotes(int count); + + void setFeature(String feature); + + void setClient(String client); + } + + private class SearchResponseActionListener implements ActionListener, TelemetryListener { + private final SearchTask task; + private final ActionListener listener; + private final CCSUsage.Builder usageBuilder; + + SearchResponseActionListener(SearchTask task, ActionListener listener) { + this.task = task; + this.listener = listener; + usageBuilder = new CCSUsage.Builder(); + } + + /** + * Should we collect telemetry for this search? + */ + private boolean collectTelemetry() { + return CCS_TELEMETRY_FEATURE_FLAG.isEnabled() && usageBuilder.getRemotesCount() > 0; + } + + public void setRemotes(int count) { + usageBuilder.setRemotesCount(count); + } + + @Override + public void setFeature(String feature) { + usageBuilder.setFeature(feature); + } + + @Override + public void setClient(String client) { + usageBuilder.setClient(client); + } + + @Override + public void onResponse(SearchResponse searchResponse) { + try { + searchResponseMetrics.recordTookTime(searchResponse.getTookInMillis()); + SearchResponseMetrics.ResponseCountTotalStatus responseCountTotalStatus = + SearchResponseMetrics.ResponseCountTotalStatus.SUCCESS; + if (searchResponse.getShardFailures() != null && searchResponse.getShardFailures().length > 0) { + // Deduplicate failures by exception message and index + ShardOperationFailedException[] groupedFailures = ExceptionsHelper.groupBy(searchResponse.getShardFailures()); + for (ShardOperationFailedException f : groupedFailures) { + boolean causeHas500Status = false; + if (f.getCause() != null) { + causeHas500Status = ExceptionsHelper.status(f.getCause()).getStatus() >= 500; + } + if ((f.status().getStatus() >= 500 || causeHas500Status) + && ExceptionsHelper.isNodeOrShardUnavailableTypeException(f.getCause()) == false) { + logger.warn("TransportSearchAction shard failure (partial results response)", f); + responseCountTotalStatus = SearchResponseMetrics.ResponseCountTotalStatus.PARTIAL_FAILURE; + } + } + } + searchResponseMetrics.incrementResponseCount(responseCountTotalStatus); + + if (collectTelemetry()) { + extractCCSTelemetry(searchResponse); + recordTelemetry(); + } + } catch (Exception e) { + onFailure(e); + return; + } + // This is last because we want to collect telemetry before returning the response. + listener.onResponse(searchResponse); + } + + @Override + public void onFailure(Exception e) { + searchResponseMetrics.incrementResponseCount(SearchResponseMetrics.ResponseCountTotalStatus.FAILURE); + if (collectTelemetry()) { + usageBuilder.setFailure(e); + recordTelemetry(); + } + listener.onFailure(e); + } + + private void recordTelemetry() { + usageService.getCcsUsageHolder().updateUsage(usageBuilder.build()); + } + + /** + * Extract telemetry data from the search response. + * @param searchResponse The final response from the search. + */ + private void extractCCSTelemetry(SearchResponse searchResponse) { + usageBuilder.took(searchResponse.getTookInMillis()); + for (String clusterAlias : searchResponse.getClusters().getClusterAliases()) { + SearchResponse.Cluster cluster = searchResponse.getClusters().getCluster(clusterAlias); + if (cluster.getStatus() == SearchResponse.Cluster.Status.SKIPPED) { + usageBuilder.skippedRemote(clusterAlias); + } else { + usageBuilder.perClusterUsage(clusterAlias, cluster.getTook()); + } + } + + } + + } } diff --git a/server/src/main/java/org/elasticsearch/usage/UsageService.java b/server/src/main/java/org/elasticsearch/usage/UsageService.java index e11b343c7055a..573332060f55d 100644 --- a/server/src/main/java/org/elasticsearch/usage/UsageService.java +++ b/server/src/main/java/org/elasticsearch/usage/UsageService.java @@ -9,6 +9,7 @@ package org.elasticsearch.usage; import org.elasticsearch.action.admin.cluster.node.usage.NodeUsage; +import org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry; import org.elasticsearch.rest.BaseRestHandler; import java.util.HashMap; @@ -23,10 +24,12 @@ public class UsageService { private final Map handlers; private final SearchUsageHolder searchUsageHolder; + private final CCSUsageTelemetry ccsUsageHolder; public UsageService() { this.handlers = new HashMap<>(); this.searchUsageHolder = new SearchUsageHolder(); + this.ccsUsageHolder = new CCSUsageTelemetry(); } /** @@ -81,4 +84,8 @@ public Map getRestUsageStats() { public SearchUsageHolder getSearchUsageHolder() { return searchUsageHolder; } + + public CCSUsageTelemetry getCcsUsageHolder() { + return ccsUsageHolder; + } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/ApproximateMatcher.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/ApproximateMatcher.java new file mode 100644 index 0000000000000..3ceda1c7f4651 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/ApproximateMatcher.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.hamcrest.Description; +import org.hamcrest.TypeSafeMatcher; + +/** + * Matches a value that is within given range (currently 1%) of an expected value. + * + * We need this because histograms do not store exact values, but only value ranges. + * Since we have 2 significant digits, the value should be within 1% of the expected value. + */ +public class ApproximateMatcher extends TypeSafeMatcher { + public static double ACCURACY = 0.01; + private final long expectedValue; + + public ApproximateMatcher(long expectedValue) { + this.expectedValue = expectedValue; + } + + @Override + protected boolean matchesSafely(Long actualValue) { + double lowerBound = Math.floor(expectedValue * (1.00 - ACCURACY)); + double upperBound = Math.ceil(expectedValue * (1.00 + ACCURACY)); + return actualValue >= lowerBound && actualValue <= upperBound; + } + + @Override + public void describeTo(Description description) { + description.appendText("a long value within 1% of ").appendValue(expectedValue); + } + + /** + * Matches a value that is within given range (currently 1%) of an expected value. + */ + public static ApproximateMatcher closeTo(long expectedValue) { + return new ApproximateMatcher(expectedValue); + } +} diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java new file mode 100644 index 0000000000000..9f08934503b69 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java @@ -0,0 +1,324 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.action.admin.cluster.stats.CCSTelemetrySnapshot.PerClusterCCSTelemetry; +import org.elasticsearch.action.admin.cluster.stats.LongMetric.LongMetricValue; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.test.AbstractWireSerializingTestCase; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; + +public class CCSTelemetrySnapshotTests extends AbstractWireSerializingTestCase { + + private LongMetricValue randomLongMetricValue() { + LongMetric v = new LongMetric(); + for (int i = 0; i < randomIntBetween(1, 10); i++) { + v.record(randomIntBetween(0, 1_000_000)); + } + return v.getValue(); + } + + private PerClusterCCSTelemetry randomPerClusterCCSTelemetry() { + return new PerClusterCCSTelemetry(randomLongBetween(0, 1_000_000), randomLongBetween(0, 1_000_000), randomLongMetricValue()); + } + + @Override + protected CCSTelemetrySnapshot createTestInstance() { + if (randomBoolean()) { + return new CCSTelemetrySnapshot(); + } else { + return randomCCSTelemetrySnapshot(); + } + } + + private CCSTelemetrySnapshot randomCCSTelemetrySnapshot() { + return new CCSTelemetrySnapshot( + randomLongBetween(0, 1_000_000), + randomLongBetween(0, 1_000_000), + Map.of(), + randomLongMetricValue(), + randomLongMetricValue(), + randomLongMetricValue(), + randomLongBetween(0, 1_000_000), + randomDoubleBetween(0.0, 100.0, false), + randomLongBetween(0, 1_000_000), + Map.of(), + Map.of(), + randomMap(1, 10, () -> new Tuple<>(randomAlphaOfLengthBetween(5, 10), randomPerClusterCCSTelemetry())) + ); + } + + @Override + protected Writeable.Reader instanceReader() { + return CCSTelemetrySnapshot::new; + } + + @Override + protected CCSTelemetrySnapshot mutateInstance(CCSTelemetrySnapshot instance) throws IOException { + // create a copy of CCSTelemetrySnapshot by extracting each field and mutating it + long totalCount = instance.getTotalCount(); + long successCount = instance.getSuccessCount(); + var failureReasons = instance.getFailureReasons(); + LongMetricValue took = instance.getTook(); + LongMetricValue tookMrtTrue = instance.getTookMrtTrue(); + LongMetricValue tookMrtFalse = instance.getTookMrtFalse(); + long skippedRemotes = instance.getSearchCountWithSkippedRemotes(); + long remotesPerSearchMax = instance.getRemotesPerSearchMax(); + double remotesPerSearchAvg = instance.getRemotesPerSearchAvg(); + var featureCounts = instance.getFeatureCounts(); + var clientCounts = instance.getClientCounts(); + var perClusterCCSTelemetries = instance.getByRemoteCluster(); + + // Mutate values + int i = randomInt(11); + switch (i) { + case 0: + totalCount += randomNonNegativeLong(); + break; + case 1: + successCount += randomNonNegativeLong(); + break; + case 2: + failureReasons = new HashMap<>(failureReasons); + if (failureReasons.isEmpty() || randomBoolean()) { + failureReasons.put(randomAlphaOfLengthBetween(5, 10), randomNonNegativeLong()); + } else { + // modify random element of the map + String key = randomFrom(failureReasons.keySet()); + failureReasons.put(key, randomNonNegativeLong()); + } + break; + case 3: + took = randomLongMetricValue(); + break; + case 4: + tookMrtTrue = randomLongMetricValue(); + break; + case 5: + tookMrtFalse = randomLongMetricValue(); + break; + case 6: + skippedRemotes += randomNonNegativeLong(); + break; + case 7: + remotesPerSearchMax += randomNonNegativeLong(); + break; + case 8: + remotesPerSearchAvg = randomDoubleBetween(0.0, 100.0, false); + break; + case 9: + featureCounts = new HashMap<>(featureCounts); + if (featureCounts.isEmpty() || randomBoolean()) { + featureCounts.put(randomAlphaOfLengthBetween(5, 10), randomNonNegativeLong()); + } else { + // modify random element of the map + String key = randomFrom(featureCounts.keySet()); + featureCounts.put(key, randomNonNegativeLong()); + } + break; + case 10: + clientCounts = new HashMap<>(clientCounts); + if (clientCounts.isEmpty() || randomBoolean()) { + clientCounts.put(randomAlphaOfLengthBetween(5, 10), randomNonNegativeLong()); + } else { + // modify random element of the map + String key = randomFrom(clientCounts.keySet()); + clientCounts.put(key, randomNonNegativeLong()); + } + break; + case 11: + perClusterCCSTelemetries = new HashMap<>(perClusterCCSTelemetries); + if (perClusterCCSTelemetries.isEmpty() || randomBoolean()) { + perClusterCCSTelemetries.put(randomAlphaOfLengthBetween(5, 10), randomPerClusterCCSTelemetry()); + } else { + // modify random element of the map + String key = randomFrom(perClusterCCSTelemetries.keySet()); + perClusterCCSTelemetries.put(key, randomPerClusterCCSTelemetry()); + } + break; + } + // Return new instance + return new CCSTelemetrySnapshot( + totalCount, + successCount, + failureReasons, + took, + tookMrtTrue, + tookMrtFalse, + remotesPerSearchMax, + remotesPerSearchAvg, + skippedRemotes, + featureCounts, + clientCounts, + perClusterCCSTelemetries + ); + } + + public void testAdd() { + CCSTelemetrySnapshot empty = new CCSTelemetrySnapshot(); + CCSTelemetrySnapshot full = randomCCSTelemetrySnapshot(); + empty.add(full); + assertThat(empty, equalTo(full)); + // Add again + empty.add(full); + assertThat(empty.getTotalCount(), equalTo(full.getTotalCount() * 2)); + assertThat(empty.getSuccessCount(), equalTo(full.getSuccessCount() * 2)); + // check that each element of the map is doubled + empty.getFailureReasons().forEach((k, v) -> assertThat(v, equalTo(full.getFailureReasons().get(k) * 2))); + assertThat(empty.getTook().count(), equalTo(full.getTook().count() * 2)); + assertThat(empty.getTookMrtTrue().count(), equalTo(full.getTookMrtTrue().count() * 2)); + assertThat(empty.getTookMrtFalse().count(), equalTo(full.getTookMrtFalse().count() * 2)); + assertThat(empty.getSearchCountWithSkippedRemotes(), equalTo(full.getSearchCountWithSkippedRemotes() * 2)); + assertThat(empty.getRemotesPerSearchMax(), equalTo(full.getRemotesPerSearchMax())); + assertThat(empty.getRemotesPerSearchAvg(), closeTo(full.getRemotesPerSearchAvg(), 0.01)); + empty.getFeatureCounts().forEach((k, v) -> assertThat(v, equalTo(full.getFeatureCounts().get(k) * 2))); + empty.getClientCounts().forEach((k, v) -> assertThat(v, equalTo(full.getClientCounts().get(k) * 2))); + empty.getByRemoteCluster().forEach((k, v) -> { + assertThat(v.getCount(), equalTo(full.getByRemoteCluster().get(k).getCount() * 2)); + assertThat(v.getSkippedCount(), equalTo(full.getByRemoteCluster().get(k).getSkippedCount() * 2)); + assertThat(v.getTook().count(), equalTo(full.getByRemoteCluster().get(k).getTook().count() * 2)); + }); + } + + public void testAddTwo() { + CCSTelemetrySnapshot empty = new CCSTelemetrySnapshot(); + CCSTelemetrySnapshot full = randomCCSTelemetrySnapshot(); + CCSTelemetrySnapshot full2 = randomCCSTelemetrySnapshot(); + + empty.add(full); + empty.add(full2); + assertThat(empty.getTotalCount(), equalTo(full.getTotalCount() + full2.getTotalCount())); + assertThat(empty.getSuccessCount(), equalTo(full.getSuccessCount() + full2.getSuccessCount())); + empty.getFailureReasons() + .forEach( + (k, v) -> assertThat( + v, + equalTo(full.getFailureReasons().getOrDefault(k, 0L) + full2.getFailureReasons().getOrDefault(k, 0L)) + ) + ); + assertThat(empty.getTook().count(), equalTo(full.getTook().count() + full2.getTook().count())); + assertThat(empty.getTookMrtTrue().count(), equalTo(full.getTookMrtTrue().count() + full2.getTookMrtTrue().count())); + assertThat(empty.getTookMrtFalse().count(), equalTo(full.getTookMrtFalse().count() + full2.getTookMrtFalse().count())); + assertThat( + empty.getSearchCountWithSkippedRemotes(), + equalTo(full.getSearchCountWithSkippedRemotes() + full2.getSearchCountWithSkippedRemotes()) + ); + assertThat(empty.getRemotesPerSearchMax(), equalTo(Math.max(full.getRemotesPerSearchMax(), full2.getRemotesPerSearchMax()))); + double expectedAvg = (full.getRemotesPerSearchAvg() * full.getTotalCount() + full2.getRemotesPerSearchAvg() * full2.getTotalCount()) + / empty.getTotalCount(); + assertThat(empty.getRemotesPerSearchAvg(), closeTo(expectedAvg, 0.01)); + empty.getFeatureCounts() + .forEach( + (k, v) -> assertThat(v, equalTo(full.getFeatureCounts().getOrDefault(k, 0L) + full2.getFeatureCounts().getOrDefault(k, 0L))) + ); + empty.getClientCounts() + .forEach( + (k, v) -> assertThat(v, equalTo(full.getClientCounts().getOrDefault(k, 0L) + full2.getClientCounts().getOrDefault(k, 0L))) + ); + PerClusterCCSTelemetry zeroDummy = new PerClusterCCSTelemetry(); + empty.getByRemoteCluster().forEach((k, v) -> { + assertThat( + v.getCount(), + equalTo( + full.getByRemoteCluster().getOrDefault(k, zeroDummy).getCount() + full2.getByRemoteCluster() + .getOrDefault(k, zeroDummy) + .getCount() + ) + ); + assertThat( + v.getSkippedCount(), + equalTo( + full.getByRemoteCluster().getOrDefault(k, zeroDummy).getSkippedCount() + full2.getByRemoteCluster() + .getOrDefault(k, zeroDummy) + .getSkippedCount() + ) + ); + assertThat( + v.getTook().count(), + equalTo( + full.getByRemoteCluster().getOrDefault(k, zeroDummy).getTook().count() + full2.getByRemoteCluster() + .getOrDefault(k, zeroDummy) + .getTook() + .count() + ) + ); + }); + } + + private LongMetricValue manyValuesHistogram(long startingWith) { + LongMetric metric = new LongMetric(); + // Produce 100 values from startingWith to 2 * startingWith with equal intervals + // We need to space values relative to initial value, otherwise the histogram would put them all in one bucket + for (long i = startingWith; i < 2 * startingWith; i += startingWith / 100) { + metric.record(i); + } + return metric.getValue(); + } + + public void testToXContent() throws IOException { + long totalCount = 10; + long successCount = 20; + // Using TreeMap's here to ensure consistent ordering in the JSON output + var failureReasons = new TreeMap<>(Map.of("reason1", 1L, "reason2", 2L, "unknown", 3L)); + LongMetricValue took = manyValuesHistogram(1000); + LongMetricValue tookMrtTrue = manyValuesHistogram(5000); + LongMetricValue tookMrtFalse = manyValuesHistogram(10000); + long skippedRemotes = 5; + long remotesPerSearchMax = 6; + double remotesPerSearchAvg = 7.89; + var featureCounts = new TreeMap<>(Map.of("async", 10L, "mrt", 20L, "wildcard", 30L)); + var clientCounts = new TreeMap<>(Map.of("kibana", 40L, "other", 500L)); + var perClusterCCSTelemetries = new TreeMap<>( + Map.of( + "", + new PerClusterCCSTelemetry(12, 0, manyValuesHistogram(2000)), + "remote1", + new PerClusterCCSTelemetry(100, 22, manyValuesHistogram(2000)), + "remote2", + new PerClusterCCSTelemetry(300, 42, manyValuesHistogram(500000)) + ) + ); + + var snapshot = new CCSTelemetrySnapshot( + totalCount, + successCount, + failureReasons, + took, + tookMrtTrue, + tookMrtFalse, + remotesPerSearchMax, + remotesPerSearchAvg, + skippedRemotes, + featureCounts, + clientCounts, + perClusterCCSTelemetries + ); + String expected = readJSONFromResource("telemetry_test.json"); + assertEquals(expected, snapshot.toString()); + } + + private String readJSONFromResource(String fileName) throws IOException { + try (InputStream inputStream = getClass().getResourceAsStream("/org/elasticsearch/action/admin/cluster/stats/" + fileName)) { + if (inputStream == null) { + throw new IOException("Resource not found: " + fileName); + } + return new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetryTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetryTests.java new file mode 100644 index 0000000000000..bd36f89f38e4d --- /dev/null +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSUsageTelemetryTests.java @@ -0,0 +1,342 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.test.ESTestCase; + +import static org.elasticsearch.action.admin.cluster.stats.ApproximateMatcher.closeTo; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.ASYNC_FEATURE; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.KNOWN_CLIENTS; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.MRT_FEATURE; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.Result.CANCELED; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.WILDCARD_FEATURE; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + +public class CCSUsageTelemetryTests extends ESTestCase { + + public void testSuccessfulSearchResults() { + CCSUsageTelemetry ccsUsageHolder = new CCSUsageTelemetry(); + + long expectedAsyncCount = 0L; + long expectedMinRTCount = 0L; + long expectedSearchesWithSkippedRemotes = 0L; + long took1 = 0L; + long took1Remote1 = 0L; + + // first search + { + boolean minimizeRoundTrips = randomBoolean(); + boolean async = randomBoolean(); + took1 = randomLongBetween(5, 10000); + boolean skippedRemote = randomBoolean(); + expectedSearchesWithSkippedRemotes = skippedRemote ? 1 : 0; + expectedAsyncCount = async ? 1 : 0; + expectedMinRTCount = minimizeRoundTrips ? 1 : 0; + + // per cluster telemetry + long tookLocal = randomLongBetween(2, 8000); + took1Remote1 = randomLongBetween(2, 8000); + + CCSUsage.Builder builder = new CCSUsage.Builder(); + builder.took(took1).setRemotesCount(1); + if (async) { + builder.setFeature(ASYNC_FEATURE); + } + if (minimizeRoundTrips) { + builder.setFeature(MRT_FEATURE); + } + if (skippedRemote) { + builder.skippedRemote("remote1"); + } + builder.perClusterUsage("(local)", new TimeValue(tookLocal)); + builder.perClusterUsage("remote1", new TimeValue(took1Remote1)); + + CCSUsage ccsUsage = builder.build(); + ccsUsageHolder.updateUsage(ccsUsage); + + CCSTelemetrySnapshot snapshot = ccsUsageHolder.getCCSTelemetrySnapshot(); + + assertThat(snapshot.getTotalCount(), equalTo(1L)); + assertThat(snapshot.getSuccessCount(), equalTo(1L)); + assertThat(snapshot.getFeatureCounts().getOrDefault(ASYNC_FEATURE, 0L), equalTo(expectedAsyncCount)); + assertThat(snapshot.getFeatureCounts().getOrDefault(MRT_FEATURE, 0L), equalTo(expectedMinRTCount)); + assertThat(snapshot.getSearchCountWithSkippedRemotes(), equalTo(expectedSearchesWithSkippedRemotes)); + assertThat(snapshot.getTook().avg(), greaterThan(0L)); + // Expect it to be within 1% of the actual value + assertThat(snapshot.getTook().avg(), closeTo(took1)); + assertThat(snapshot.getTook().max(), closeTo(took1)); + if (minimizeRoundTrips) { + assertThat(snapshot.getTookMrtTrue().count(), equalTo(1L)); + assertThat(snapshot.getTookMrtTrue().avg(), greaterThan(0L)); + assertThat(snapshot.getTookMrtTrue().avg(), closeTo(took1)); + assertThat(snapshot.getTookMrtFalse().count(), equalTo(0L)); + assertThat(snapshot.getTookMrtFalse().max(), equalTo(0L)); + } else { + assertThat(snapshot.getTookMrtFalse().count(), equalTo(1L)); + assertThat(snapshot.getTookMrtFalse().avg(), greaterThan(0L)); + assertThat(snapshot.getTookMrtFalse().avg(), closeTo(took1)); + assertThat(snapshot.getTookMrtTrue().count(), equalTo(0L)); + assertThat(snapshot.getTookMrtTrue().max(), equalTo(0L)); + } + // We currently don't count unknown clients + assertThat(snapshot.getClientCounts().size(), equalTo(0)); + + // per cluster telemetry asserts + + var telemetryByCluster = snapshot.getByRemoteCluster(); + assertThat(telemetryByCluster.size(), equalTo(2)); + var localClusterTelemetry = telemetryByCluster.get("(local)"); + assertNotNull(localClusterTelemetry); + assertThat(localClusterTelemetry.getCount(), equalTo(1L)); + assertThat(localClusterTelemetry.getSkippedCount(), equalTo(0L)); + assertThat(localClusterTelemetry.getTook().count(), equalTo(1L)); + assertThat(localClusterTelemetry.getTook().avg(), greaterThan(0L)); + assertThat(localClusterTelemetry.getTook().avg(), closeTo(tookLocal)); + // assertThat(localClusterTelemetry.getTook().max(), greaterThanOrEqualTo(tookLocal)); + + var remote1ClusterTelemetry = telemetryByCluster.get("remote1"); + assertNotNull(remote1ClusterTelemetry); + assertThat(remote1ClusterTelemetry.getCount(), equalTo(1L)); + assertThat(remote1ClusterTelemetry.getSkippedCount(), equalTo(expectedSearchesWithSkippedRemotes)); + assertThat(remote1ClusterTelemetry.getTook().avg(), greaterThan(0L)); + assertThat(remote1ClusterTelemetry.getTook().count(), equalTo(1L)); + assertThat(remote1ClusterTelemetry.getTook().avg(), greaterThan(0L)); + assertThat(remote1ClusterTelemetry.getTook().avg(), closeTo(took1Remote1)); + // assertThat(remote1ClusterTelemetry.getTook().max(), greaterThanOrEqualTo(took1Remote1)); + } + + // second search + { + boolean minimizeRoundTrips = randomBoolean(); + boolean async = randomBoolean(); + expectedAsyncCount += async ? 1 : 0; + expectedMinRTCount += minimizeRoundTrips ? 1 : 0; + long took2 = randomLongBetween(5, 10000); + boolean skippedRemote = randomBoolean(); + expectedSearchesWithSkippedRemotes += skippedRemote ? 1 : 0; + long took2Remote1 = randomLongBetween(2, 8000); + + CCSUsage.Builder builder = new CCSUsage.Builder(); + builder.took(took2).setRemotesCount(1).setClient("kibana"); + if (async) { + builder.setFeature(ASYNC_FEATURE); + } + if (minimizeRoundTrips) { + builder.setFeature(MRT_FEATURE); + } + if (skippedRemote) { + builder.skippedRemote("remote1"); + } + builder.perClusterUsage("remote1", new TimeValue(took2Remote1)); + + CCSUsage ccsUsage = builder.build(); + ccsUsageHolder.updateUsage(ccsUsage); + + CCSTelemetrySnapshot snapshot = ccsUsageHolder.getCCSTelemetrySnapshot(); + + assertThat(snapshot.getTotalCount(), equalTo(2L)); + assertThat(snapshot.getSuccessCount(), equalTo(2L)); + assertThat(snapshot.getFeatureCounts().getOrDefault(ASYNC_FEATURE, 0L), equalTo(expectedAsyncCount)); + assertThat(snapshot.getFeatureCounts().getOrDefault(MRT_FEATURE, 0L), equalTo(expectedMinRTCount)); + assertThat(snapshot.getSearchCountWithSkippedRemotes(), equalTo(expectedSearchesWithSkippedRemotes)); + assertThat(snapshot.getTook().avg(), greaterThan(0L)); + assertThat(snapshot.getTook().avg(), closeTo((took1 + took2) / 2)); + // assertThat(snapshot.getTook().max(), greaterThanOrEqualTo(Math.max(took1, took2))); + + // Counting only known clients + assertThat(snapshot.getClientCounts().get("kibana"), equalTo(1L)); + assertThat(snapshot.getClientCounts().size(), equalTo(1)); + + // per cluster telemetry asserts + + var telemetryByCluster = snapshot.getByRemoteCluster(); + assertThat(telemetryByCluster.size(), equalTo(2)); + var localClusterTelemetry = telemetryByCluster.get("(local)"); + assertNotNull(localClusterTelemetry); + assertThat(localClusterTelemetry.getCount(), equalTo(1L)); + assertThat(localClusterTelemetry.getSkippedCount(), equalTo(0L)); + assertThat(localClusterTelemetry.getTook().count(), equalTo(1L)); + + var remote1ClusterTelemetry = telemetryByCluster.get("remote1"); + assertNotNull(remote1ClusterTelemetry); + assertThat(remote1ClusterTelemetry.getCount(), equalTo(2L)); + assertThat(remote1ClusterTelemetry.getSkippedCount(), equalTo(expectedSearchesWithSkippedRemotes)); + assertThat(remote1ClusterTelemetry.getTook().avg(), greaterThan(0L)); + assertThat(remote1ClusterTelemetry.getTook().count(), equalTo(2L)); + assertThat(remote1ClusterTelemetry.getTook().avg(), greaterThan(0L)); + assertThat(remote1ClusterTelemetry.getTook().avg(), closeTo((took1Remote1 + took2Remote1) / 2)); + // assertThat(remote1ClusterTelemetry.getTook().max(), greaterThanOrEqualTo(Math.max(took1Remote1, took2Remote1))); + } + } + + public void testClientsLimit() { + CCSUsageTelemetry ccsUsageHolder = new CCSUsageTelemetry(); + // Add known clients + for (String knownClient : KNOWN_CLIENTS) { + CCSUsage.Builder builder = new CCSUsage.Builder(); + builder.took(randomLongBetween(5, 10000)).setRemotesCount(1).setClient(knownClient); + CCSUsage ccsUsage = builder.build(); + ccsUsageHolder.updateUsage(ccsUsage); + } + var counts = ccsUsageHolder.getCCSTelemetrySnapshot().getClientCounts(); + for (String knownClient : KNOWN_CLIENTS) { + assertThat(counts.get(knownClient), equalTo(1L)); + } + // Check that knowns are counted + for (String knownClient : KNOWN_CLIENTS) { + CCSUsage.Builder builder = new CCSUsage.Builder(); + builder.took(randomLongBetween(5, 10000)).setRemotesCount(1).setClient(knownClient); + CCSUsage ccsUsage = builder.build(); + ccsUsageHolder.updateUsage(ccsUsage); + } + counts = ccsUsageHolder.getCCSTelemetrySnapshot().getClientCounts(); + for (String knownClient : KNOWN_CLIENTS) { + assertThat(counts.get(knownClient), equalTo(2L)); + } + // Check that new clients are not counted + CCSUsage.Builder builder = new CCSUsage.Builder(); + String randomClient = randomAlphaOfLength(10); + builder.took(randomLongBetween(5, 10000)).setRemotesCount(1).setClient(randomClient); + CCSUsage ccsUsage = builder.build(); + ccsUsageHolder.updateUsage(ccsUsage); + counts = ccsUsageHolder.getCCSTelemetrySnapshot().getClientCounts(); + assertThat(counts.get(randomClient), equalTo(null)); + } + + public void testFailures() { + CCSUsageTelemetry ccsUsageHolder = new CCSUsageTelemetry(); + + // first search + { + boolean skippedRemote = randomBoolean(); + boolean minimizeRoundTrips = randomBoolean(); + boolean async = randomBoolean(); + + CCSUsage.Builder builder = new CCSUsage.Builder(); + builder.setRemotesCount(1).took(10L); + if (skippedRemote) { + builder.skippedRemote("remote1"); + } + builder.perClusterUsage("(local)", new TimeValue(1)); + builder.perClusterUsage("remote1", new TimeValue(2)); + builder.setFailure(CANCELED); + if (async) { + builder.setFeature(ASYNC_FEATURE); + } + if (minimizeRoundTrips) { + builder.setFeature(MRT_FEATURE); + } + + CCSUsage ccsUsage = builder.build(); + ccsUsageHolder.updateUsage(ccsUsage); + + CCSTelemetrySnapshot snapshot = ccsUsageHolder.getCCSTelemetrySnapshot(); + + assertThat(snapshot.getTotalCount(), equalTo(1L)); + assertThat(snapshot.getSuccessCount(), equalTo(0L)); + assertThat(snapshot.getSearchCountWithSkippedRemotes(), equalTo(skippedRemote ? 1L : 0L)); + assertThat(snapshot.getTook().count(), equalTo(0L)); + assertThat(snapshot.getFailureReasons().size(), equalTo(1)); + assertThat(snapshot.getFailureReasons().get(CANCELED.getName()), equalTo(1L)); + // still counting features on failure + assertThat(snapshot.getFeatureCounts().getOrDefault(ASYNC_FEATURE, 0L), equalTo(async ? 1L : 0L)); + assertThat(snapshot.getFeatureCounts().getOrDefault(MRT_FEATURE, 0L), equalTo(minimizeRoundTrips ? 1L : 0L)); + } + + // second search + { + CCSUsage.Builder builder = new CCSUsage.Builder(); + boolean skippedRemote = randomBoolean(); + builder.setRemotesCount(1).took(10L).setClient("kibana"); + if (skippedRemote) { + builder.skippedRemote("remote1"); + } + builder.setFailure(CANCELED); + CCSUsage ccsUsage = builder.build(); + + ccsUsageHolder.updateUsage(ccsUsage); + + CCSTelemetrySnapshot snapshot = ccsUsageHolder.getCCSTelemetrySnapshot(); + + assertThat(snapshot.getTotalCount(), equalTo(2L)); + assertThat(snapshot.getSuccessCount(), equalTo(0L)); + assertThat(snapshot.getTook().count(), equalTo(0L)); + assertThat(snapshot.getFailureReasons().size(), equalTo(1)); + assertThat(snapshot.getFailureReasons().get(CANCELED.getName()), equalTo(2L)); + assertThat(snapshot.getClientCounts().get("kibana"), equalTo(1L)); + } + } + + public void testConcurrentUpdates() throws InterruptedException { + CCSUsageTelemetry ccsUsageHolder = new CCSUsageTelemetry(); + CCSUsageTelemetry expectedHolder = new CCSUsageTelemetry(); + int numSearches = randomIntBetween(1000, 5000); + int numThreads = randomIntBetween(10, 20); + Thread[] threads = new Thread[numThreads]; + CCSUsage[] ccsUsages = new CCSUsage[numSearches]; + + // Make random usage objects + for (int i = 0; i < numSearches; i++) { + CCSUsage.Builder builder = new CCSUsage.Builder(); + builder.took(randomLongBetween(5, 10000)).setRemotesCount(randomIntBetween(1, 10)); + if (randomBoolean()) { + builder.setFeature(ASYNC_FEATURE); + } + if (randomBoolean()) { + builder.setFeature(WILDCARD_FEATURE); + } + if (randomBoolean()) { + builder.setFeature(MRT_FEATURE); + } + if (randomBoolean()) { + builder.setClient("kibana"); + } + if (randomInt(20) == 7) { + // 5% of requests will fail + builder.setFailure(randomFrom(CCSUsageTelemetry.Result.values())); + ccsUsages[i] = builder.build(); + continue; + } + builder.perClusterUsage("", new TimeValue(randomLongBetween(1, 10000))); + if (randomBoolean()) { + builder.skippedRemote("remote1"); + } else { + builder.perClusterUsage("remote1", new TimeValue(randomLongBetween(1, 10000))); + } + builder.perClusterUsage(randomFrom("remote2", "remote3", "remote4"), new TimeValue(randomLongBetween(1, 10000))); + ccsUsages[i] = builder.build(); + } + + // Add each of the search objects to the telemetry holder in a different thread + for (int i = 0; i < numThreads; i++) { + final int threadNo = i; + threads[i] = new Thread(() -> { + for (int j = threadNo; j < numSearches; j += numThreads) { + ccsUsageHolder.updateUsage(ccsUsages[j]); + } + }); + threads[i].start(); + } + + for (int i = 0; i < numThreads; i++) { + threads[i].join(); + } + + // Add the same search objects to the expected holder in a single thread + for (int i = 0; i < numSearches; i++) { + expectedHolder.updateUsage(ccsUsages[i]); + } + + CCSTelemetrySnapshot snapshot = ccsUsageHolder.getCCSTelemetrySnapshot(); + CCSTelemetrySnapshot expectedSnapshot = ccsUsageHolder.getCCSTelemetrySnapshot(); + assertThat(snapshot, equalTo(expectedSnapshot)); + } +} diff --git a/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java b/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java index 487d8c6f3a7ee..f68e5f06bcf08 100644 --- a/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java @@ -99,6 +99,7 @@ import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportRequestOptions; import org.elasticsearch.transport.TransportService; +import org.elasticsearch.usage.UsageService; import java.io.IOException; import java.util.ArrayList; @@ -1765,7 +1766,8 @@ protected void doWriteTo(StreamOutput out) throws IOException { null, new SearchTransportAPMMetrics(TelemetryProvider.NOOP.getMeterRegistry()), new SearchResponseMetrics(TelemetryProvider.NOOP.getMeterRegistry()), - client + client, + new UsageService() ); CountDownLatch latch = new CountDownLatch(1); diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index c6086a8259fbb..f5e69a65a6d06 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -196,6 +196,7 @@ import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportRequestHandler; import org.elasticsearch.transport.TransportService; +import org.elasticsearch.usage.UsageService; import org.elasticsearch.xcontent.NamedXContentRegistry; import org.junit.After; import org.junit.Before; @@ -2059,6 +2060,8 @@ private final class TestClusterNode { private final BigArrays bigArrays; + private final UsageService usageService; + private Coordinator coordinator; TestClusterNode(DiscoveryNode node, TransportInterceptorFactory transportInterceptorFactory) throws IOException { @@ -2069,6 +2072,7 @@ private final class TestClusterNode { masterService = new FakeThreadPoolMasterService(node.getName(), threadPool, deterministicTaskQueue::scheduleNow); final Settings settings = environment.settings(); client = new NodeClient(settings, threadPool); + this.usageService = new UsageService(); final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); clusterService = new ClusterService( settings, @@ -2486,7 +2490,8 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { EmptySystemIndices.INSTANCE.getExecutorSelector(), new SearchTransportAPMMetrics(TelemetryProvider.NOOP.getMeterRegistry()), new SearchResponseMetrics(TelemetryProvider.NOOP.getMeterRegistry()), - client + client, + usageService ) ); actions.put( diff --git a/server/src/test/resources/org/elasticsearch/action/admin/cluster/stats/telemetry_test.json b/server/src/test/resources/org/elasticsearch/action/admin/cluster/stats/telemetry_test.json new file mode 100644 index 0000000000000..fe9c77cb2a183 --- /dev/null +++ b/server/src/test/resources/org/elasticsearch/action/admin/cluster/stats/telemetry_test.json @@ -0,0 +1,67 @@ +{ + "_search" : { + "total" : 10, + "success" : 20, + "skipped" : 5, + "took" : { + "max" : 1991, + "avg" : 1496, + "p90" : 1895 + }, + "took_mrt_true" : { + "max" : 9983, + "avg" : 7476, + "p90" : 9471 + }, + "took_mrt_false" : { + "max" : 19967, + "avg" : 14952, + "p90" : 18943 + }, + "remotes_per_search_max" : 6, + "remotes_per_search_avg" : 7.89, + "failure_reasons" : { + "reason1" : 1, + "reason2" : 2, + "unknown" : 3 + }, + "features" : { + "async" : 10, + "mrt" : 20, + "wildcard" : 30 + }, + "clients" : { + "kibana" : 40, + "other" : 500 + }, + "clusters" : { + "(local)" : { + "total" : 12, + "skipped" : 0, + "took" : { + "max" : 3983, + "avg" : 2992, + "p90" : 3791 + } + }, + "remote1" : { + "total" : 100, + "skipped" : 22, + "took" : { + "max" : 3983, + "avg" : 2992, + "p90" : 3791 + } + }, + "remote2" : { + "total" : 300, + "skipped" : 42, + "took" : { + "max" : 995327, + "avg" : 747531, + "p90" : 946175 + } + } + } + } +} \ No newline at end of file diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java new file mode 100644 index 0000000000000..ac0b26cb4f4cd --- /dev/null +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java @@ -0,0 +1,370 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.search; + +import org.elasticsearch.action.ActionFuture; +import org.elasticsearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequest; +import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse; +import org.elasticsearch.action.admin.cluster.stats.CCSTelemetrySnapshot; +import org.elasticsearch.action.search.TransportSearchAction; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.tasks.TaskInfo; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.usage.UsageService; +import org.elasticsearch.xpack.async.AsyncResultsIndexPlugin; +import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.elasticsearch.xpack.core.async.GetAsyncResultRequest; +import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; +import org.elasticsearch.xpack.core.search.action.GetAsyncSearchAction; +import org.elasticsearch.xpack.core.search.action.SubmitAsyncSearchAction; +import org.elasticsearch.xpack.core.search.action.SubmitAsyncSearchRequest; +import org.hamcrest.Matchers; +import org.junit.Before; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.ASYNC_FEATURE; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.MRT_FEATURE; +import static org.elasticsearch.action.admin.cluster.stats.CCSUsageTelemetry.Result.CANCELED; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + +public class CCSUsageTelemetryAsyncSearchIT extends AbstractMultiClustersTestCase { + private static final String REMOTE1 = "cluster-a"; + private static final String REMOTE2 = "cluster-b"; + + @Override + protected boolean reuseClusters() { + return false; + } + + @Override + protected Collection remoteClusterAlias() { + return List.of(REMOTE1, REMOTE2); + } + + @Override + protected Map skipUnavailableForRemoteClusters() { + return Map.of(REMOTE1, true, REMOTE2, true); + } + + @Override + protected Collection> nodePlugins(String clusterAlias) { + List> plugs = Arrays.asList( + CrossClusterAsyncSearchIT.SearchListenerPlugin.class, + AsyncSearch.class, + AsyncResultsIndexPlugin.class, + LocalStateCompositeXPackPlugin.class, + CrossClusterAsyncSearchIT.TestQueryBuilderPlugin.class + ); + return Stream.concat(super.nodePlugins(clusterAlias).stream(), plugs.stream()).collect(Collectors.toList()); + } + + @Before + public void resetSearchListenerPlugin() { + CrossClusterAsyncSearchIT.SearchListenerPlugin.reset(); + } + + private SubmitAsyncSearchRequest makeSearchRequest(String... indices) { + CrossClusterAsyncSearchIT.SearchListenerPlugin.blockQueryPhase(); + + SubmitAsyncSearchRequest request = new SubmitAsyncSearchRequest(indices); + request.setCcsMinimizeRoundtrips(randomBoolean()); + request.setWaitForCompletionTimeout(TimeValue.timeValueMillis(1)); + request.setKeepOnCompletion(true); + request.getSearchRequest().allowPartialSearchResults(false); + request.getSearchRequest().source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(10)); + if (randomBoolean()) { + request.setBatchedReduceSize(randomIntBetween(2, 256)); + } + + return request; + } + + /** + * Run async search request and get telemetry from it + */ + private CCSTelemetrySnapshot getTelemetryFromSearch(SubmitAsyncSearchRequest searchRequest) throws Exception { + // We want to send search to a specific node (we don't care which one) so that we could + // collect the CCS telemetry from it later + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + final AsyncSearchResponse response = cluster(LOCAL_CLUSTER).client(nodeName) + .execute(SubmitAsyncSearchAction.INSTANCE, searchRequest) + .get(); + // We don't care here too much about the response, we just want to trigger the telemetry collection. + // So we check it's not null and leave the rest to other tests. + final String responseId; + try { + assertNotNull(response.getSearchResponse()); + responseId = response.getId(); + } finally { + response.decRef(); + } + waitForSearchTasksToFinish(); + final AsyncSearchResponse finishedResponse = cluster(LOCAL_CLUSTER).client(nodeName) + .execute(GetAsyncSearchAction.INSTANCE, new GetAsyncResultRequest(responseId)) + .get(); + try { + assertNotNull(finishedResponse.getSearchResponse()); + } finally { + finishedResponse.decRef(); + } + return getTelemetrySnapshot(nodeName); + + } + + private void waitForSearchTasksToFinish() throws Exception { + assertBusy(() -> { + ListTasksResponse listTasksResponse = client(LOCAL_CLUSTER).admin() + .cluster() + .prepareListTasks() + .setActions(TransportSearchAction.TYPE.name()) + .get(); + List tasks = listTasksResponse.getTasks(); + assertThat(tasks.size(), equalTo(0)); + + for (String clusterAlias : remoteClusterAlias()) { + ListTasksResponse remoteTasksResponse = client(clusterAlias).admin() + .cluster() + .prepareListTasks() + .setActions(TransportSearchAction.TYPE.name()) + .get(); + List remoteTasks = remoteTasksResponse.getTasks(); + assertThat(remoteTasks.size(), equalTo(0)); + } + }); + + assertBusy(() -> { + for (String clusterAlias : remoteClusterAlias()) { + final Iterable transportServices = cluster(clusterAlias).getInstances(TransportService.class); + for (TransportService transportService : transportServices) { + assertThat(transportService.getTaskManager().getBannedTaskIds(), Matchers.empty()); + } + } + }); + } + + /** + * Create search request for indices and get telemetry from it + */ + private CCSTelemetrySnapshot getTelemetryFromSearch(String... indices) throws Exception { + return getTelemetryFromSearch(makeSearchRequest(indices)); + } + + /** + * Async search on all remotes + */ + public void testAllRemotesSearch() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SubmitAsyncSearchRequest searchRequest = makeSearchRequest(localIndex, "*:" + remoteIndex); + boolean minimizeRoundtrips = TransportSearchAction.shouldMinimizeRoundtrips(searchRequest.getSearchRequest()); + CrossClusterAsyncSearchIT.SearchListenerPlugin.negate(); + + CCSTelemetrySnapshot telemetry = getTelemetryFromSearch(searchRequest); + + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(1L)); + assertThat(telemetry.getFailureReasons().size(), equalTo(0)); + assertThat(telemetry.getTook().count(), equalTo(1L)); + assertThat(telemetry.getTookMrtTrue().count(), equalTo(minimizeRoundtrips ? 1L : 0L)); + assertThat(telemetry.getTookMrtFalse().count(), equalTo(minimizeRoundtrips ? 0L : 1L)); + assertThat(telemetry.getRemotesPerSearchAvg(), equalTo(2.0)); + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(2L)); + assertThat(telemetry.getSearchCountWithSkippedRemotes(), equalTo(0L)); + assertThat(telemetry.getFeatureCounts().get(ASYNC_FEATURE), equalTo(1L)); + if (minimizeRoundtrips) { + assertThat(telemetry.getFeatureCounts().get(MRT_FEATURE), equalTo(1L)); + } else { + assertThat(telemetry.getFeatureCounts().get(MRT_FEATURE), equalTo(null)); + } + var perCluster = telemetry.getByRemoteCluster(); + assertThat(perCluster.size(), equalTo(3)); + for (String clusterAlias : remoteClusterAlias()) { + var clusterTelemetry = perCluster.get(clusterAlias); + assertThat(clusterTelemetry.getCount(), equalTo(1L)); + assertThat(clusterTelemetry.getSkippedCount(), equalTo(0L)); + assertThat(clusterTelemetry.getTook().count(), equalTo(1L)); + } + } + + /** + * Search that is cancelled + */ + public void testCancelledSearch() throws Exception { + Map testClusterInfo = setupClusters(); + String localIndex = (String) testClusterInfo.get("local.index"); + String remoteIndex = (String) testClusterInfo.get("remote.index"); + + SubmitAsyncSearchRequest searchRequest = makeSearchRequest(localIndex, REMOTE1 + ":" + remoteIndex); + CrossClusterAsyncSearchIT.SearchListenerPlugin.blockQueryPhase(); + + String nodeName = cluster(LOCAL_CLUSTER).getRandomNodeName(); + final AsyncSearchResponse response = cluster(LOCAL_CLUSTER).client(nodeName) + .execute(SubmitAsyncSearchAction.INSTANCE, searchRequest) + .get(); + try { + assertNotNull(response.getSearchResponse()); + } finally { + response.decRef(); + assertTrue(response.isRunning()); + } + CrossClusterAsyncSearchIT.SearchListenerPlugin.waitSearchStarted(); + + ActionFuture cancelFuture; + try { + ListTasksResponse listTasksResponse = client(LOCAL_CLUSTER).admin() + .cluster() + .prepareListTasks() + .setActions(TransportSearchAction.TYPE.name()) + .get(); + List tasks = listTasksResponse.getTasks(); + assertThat(tasks.size(), equalTo(1)); + final TaskInfo rootTask = tasks.get(0); + + AtomicReference> remoteClusterSearchTasks = new AtomicReference<>(); + assertBusy(() -> { + List remoteSearchTasks = client(REMOTE1).admin() + .cluster() + .prepareListTasks() + .get() + .getTasks() + .stream() + .filter(t -> t.action().contains(TransportSearchAction.TYPE.name())) + .collect(Collectors.toList()); + assertThat(remoteSearchTasks.size(), greaterThan(0)); + remoteClusterSearchTasks.set(remoteSearchTasks); + }); + + for (TaskInfo taskInfo : remoteClusterSearchTasks.get()) { + assertFalse("taskInfo on remote cluster should not be cancelled yet: " + taskInfo, taskInfo.cancelled()); + } + + final CancelTasksRequest cancelRequest = new CancelTasksRequest().setTargetTaskId(rootTask.taskId()); + cancelRequest.setWaitForCompletion(randomBoolean()); + cancelFuture = client().admin().cluster().cancelTasks(cancelRequest); + assertBusy(() -> { + final Iterable transportServices = cluster(REMOTE1).getInstances(TransportService.class); + for (TransportService transportService : transportServices) { + Collection cancellableTasks = transportService.getTaskManager().getCancellableTasks().values(); + for (CancellableTask cancellableTask : cancellableTasks) { + if (cancellableTask.getAction().contains(TransportSearchAction.TYPE.name())) { + assertTrue(cancellableTask.getDescription(), cancellableTask.isCancelled()); + } + } + } + }); + + List remoteSearchTasksAfterCancellation = client(REMOTE1).admin() + .cluster() + .prepareListTasks() + .get() + .getTasks() + .stream() + .filter(t -> t.action().contains(TransportSearchAction.TYPE.name())) + .toList(); + for (TaskInfo taskInfo : remoteSearchTasksAfterCancellation) { + assertTrue(taskInfo.description(), taskInfo.cancelled()); + } + } finally { + CrossClusterAsyncSearchIT.SearchListenerPlugin.allowQueryPhase(); + } + + assertBusy(() -> assertTrue(cancelFuture.isDone())); + waitForSearchTasksToFinish(); + + CCSTelemetrySnapshot telemetry = getTelemetrySnapshot(nodeName); + assertThat(telemetry.getTotalCount(), equalTo(1L)); + assertThat(telemetry.getSuccessCount(), equalTo(0L)); + assertThat(telemetry.getFailureReasons().size(), equalTo(1)); + assertThat(telemetry.getFailureReasons().get(CANCELED.getName()), equalTo(1L)); + assertThat(telemetry.getTook().count(), equalTo(0L)); + assertThat(telemetry.getRemotesPerSearchAvg(), equalTo(1.0)); + assertThat(telemetry.getRemotesPerSearchMax(), equalTo(1L)); + // Still counts as async search + assertThat(telemetry.getFeatureCounts().get(ASYNC_FEATURE), equalTo(1L)); + } + + private CCSTelemetrySnapshot getTelemetrySnapshot(String nodeName) { + var usage = cluster(LOCAL_CLUSTER).getInstance(UsageService.class, nodeName); + return usage.getCcsUsageHolder().getCCSTelemetrySnapshot(); + } + + private Map setupClusters() { + String localIndex = "demo"; + int numShardsLocal = randomIntBetween(2, 10); + Settings localSettings = indexSettings(numShardsLocal, randomIntBetween(0, 1)).build(); + assertAcked( + client(LOCAL_CLUSTER).admin() + .indices() + .prepareCreate(localIndex) + .setSettings(localSettings) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + indexDocs(client(LOCAL_CLUSTER), localIndex); + + String remoteIndex = "prod"; + int numShardsRemote = randomIntBetween(2, 10); + for (String clusterAlias : remoteClusterAlias()) { + final InternalTestCluster remoteCluster = cluster(clusterAlias); + remoteCluster.ensureAtLeastNumDataNodes(randomIntBetween(1, 3)); + assertAcked( + client(clusterAlias).admin() + .indices() + .prepareCreate(remoteIndex) + .setSettings(indexSettings(numShardsRemote, randomIntBetween(0, 1))) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + assertFalse( + client(clusterAlias).admin() + .cluster() + .prepareHealth(remoteIndex) + .setWaitForYellowStatus() + .setTimeout(TimeValue.timeValueSeconds(10)) + .get() + .isTimedOut() + ); + indexDocs(client(clusterAlias), remoteIndex); + } + + Map clusterInfo = new HashMap<>(); + clusterInfo.put("local.num_shards", numShardsLocal); + clusterInfo.put("local.index", localIndex); + clusterInfo.put("remote.num_shards", numShardsRemote); + clusterInfo.put("remote.index", remoteIndex); + clusterInfo.put("remote.skip_unavailable", true); + return clusterInfo; + } + + private int indexDocs(Client client, String index) { + int numDocs = between(5, 20); + for (int i = 0; i < numDocs; i++) { + client.prepareIndex(index).setSource("f", "v", "@timestamp", randomNonNegativeLong()).get(); + } + client.admin().indices().prepareRefresh(index).get(); + return numDocs; + } +} From 0fe220a075f4594cedf248e1ec1fe8acaefef165 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Wed, 28 Aug 2024 12:59:46 -0700 Subject: [PATCH 017/144] Hide new test behind a feature (#112301) --- muted-tests.yml | 3 --- .../rest-api-spec/test/indices.create/20_synthetic_source.yml | 4 ++-- .../java/org/elasticsearch/index/mapper/MapperFeatures.java | 3 ++- .../org/elasticsearch/index/mapper/SourceFieldMapper.java | 3 +++ 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 6498413e33cf5..602a377ce5c9f 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -160,9 +160,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAsync issue: https://github.com/elastic/elasticsearch/issues/112212 -- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT - method: test {p0=indices.create/20_synthetic_source/stored field under object with store_array_source} - issue: https://github.com/elastic/elasticsearch/issues/112264 - class: org.elasticsearch.search.query.ScriptScoreQueryTests method: testScriptTermStatsAvailable issue: https://github.com/elastic/elasticsearch/issues/112278 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index a696f3b2b3224..fa08efe402b43 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1342,8 +1342,8 @@ subobjects auto: # 112156 stored field under object with store_array_source: - requires: - cluster_features: ["mapper.track_ignored_source"] - reason: requires tracking ignored source + cluster_features: ["mapper.source.synthetic_source_stored_fields_advance_fix"] + reason: requires bug fix to be implemented - do: indices.create: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 7810fcdc64773..6dce9d6c7b86e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -32,7 +32,8 @@ public Set getFeatures() { IndexModeFieldMapper.QUERYING_INDEX_MODE, NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS, BooleanFieldMapper.BOOLEAN_DIMENSION, - ObjectMapper.SUBOBJECTS_AUTO + ObjectMapper.SUBOBJECTS_AUTO, + SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index 908108bce31da..8d34d3188a388 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -38,6 +38,9 @@ public class SourceFieldMapper extends MetadataFieldMapper { public static final NodeFeature SYNTHETIC_SOURCE_FALLBACK = new NodeFeature("mapper.source.synthetic_source_fallback"); + public static final NodeFeature SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX = new NodeFeature( + "mapper.source.synthetic_source_stored_fields_advance_fix" + ); public static final String NAME = "_source"; public static final String RECOVERY_SOURCE_NAME = "_recovery_source"; From 8f6697e6da28653abc3e123f7dc8c935ef9e2ac8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 29 Aug 2024 06:10:26 +1000 Subject: [PATCH 018/144] Mute org.elasticsearch.search.retriever.rankdoc.RankDocsSortBuilderTests testEqualsAndHashcode #112312 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 602a377ce5c9f..231b3e044379a 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -166,6 +166,9 @@ tests: - class: org.elasticsearch.search.query.ScriptScoreQueryTests method: testScriptTermStatsNotAvailable issue: https://github.com/elastic/elasticsearch/issues/112290 +- class: org.elasticsearch.search.retriever.rankdoc.RankDocsSortBuilderTests + method: testEqualsAndHashcode + issue: https://github.com/elastic/elasticsearch/issues/112312 # Examples: # From a8fbc10fe10257c1616325cdf0c6c5add6d7c53a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 29 Aug 2024 06:20:29 +1000 Subject: [PATCH 019/144] Mute org.elasticsearch.blobcache.shared.SharedBlobCacheServiceTests testGetMultiThreaded #112314 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 231b3e044379a..7feefa1255f48 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -169,6 +169,9 @@ tests: - class: org.elasticsearch.search.retriever.rankdoc.RankDocsSortBuilderTests method: testEqualsAndHashcode issue: https://github.com/elastic/elasticsearch/issues/112312 +- class: org.elasticsearch.blobcache.shared.SharedBlobCacheServiceTests + method: testGetMultiThreaded + issue: https://github.com/elastic/elasticsearch/issues/112314 # Examples: # From 9bf5f61fbb98e5fad6fbf4a2f9945131b67d8046 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 28 Aug 2024 14:48:25 -0600 Subject: [PATCH 020/144] Mute SharedBlobCacheServiceTests.testGetMultiThreaded (#112315) Relates to https://github.com/elastic/elasticsearch/issues/112305 --- .../blobcache/shared/SharedBlobCacheServiceTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java index d7e8ad19382e5..0f3804baef42b 100644 --- a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java +++ b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java @@ -444,6 +444,7 @@ public void testMassiveDecay() throws IOException { * Exercise SharedBlobCacheService#get in multiple threads to trigger any assertion errors. * @throws IOException */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/112305") public void testGetMultiThreaded() throws IOException { final int threads = between(2, 10); final int regionCount = between(1, 20); From 56a1ae5c14a4f7ee48837d4216f08895051f6fdb Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 28 Aug 2024 15:02:13 -0600 Subject: [PATCH 021/144] Mute ThreadContextTests.testDropWarningsExceedingMaxSettings (#112316) Relates to https://github.com/elastic/elasticsearch/issues/112256 --- .../elasticsearch/common/util/concurrent/ThreadContextTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/test/java/org/elasticsearch/common/util/concurrent/ThreadContextTests.java b/server/src/test/java/org/elasticsearch/common/util/concurrent/ThreadContextTests.java index 88e3125655df0..568fa3e36c769 100644 --- a/server/src/test/java/org/elasticsearch/common/util/concurrent/ThreadContextTests.java +++ b/server/src/test/java/org/elasticsearch/common/util/concurrent/ThreadContextTests.java @@ -638,6 +638,7 @@ public void testResponseHeaders() { } } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/112256") public void testDropWarningsExceedingMaxSettings() { Settings settings = Settings.builder() .put(HttpTransportSettings.SETTING_HTTP_MAX_WARNING_HEADER_COUNT.getKey(), 1) From 889e641d9c58283e02791acfeb714f670e990402 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 28 Aug 2024 23:30:57 +0200 Subject: [PATCH 022/144] Fix DLS over Runtime Fields (#112260) There is a DLS query referencing a runtime field loaded from _source, when we create the collector manager we retrieve the numDocs which triggers going through all segments and executing the script for each document. StoredFieldSourceProvider relies on leaf ordinals to build an array, but those ordinals are not populated when computing the numDocs via BaseCompositeReader, as that goes through the subreaders contexts, and not the context leaves (there is a subtle difference that bites us there). Fixes #111637 --- docs/changelog/112260.yaml | 6 +++ .../lookup/StoredFieldSourceProvider.java | 37 ++++----------- .../DocumentLevelSecurityRandomTests.java | 46 +++++++++++++++++-- 3 files changed, 59 insertions(+), 30 deletions(-) create mode 100644 docs/changelog/112260.yaml diff --git a/docs/changelog/112260.yaml b/docs/changelog/112260.yaml new file mode 100644 index 0000000000000..3f5642188a367 --- /dev/null +++ b/docs/changelog/112260.yaml @@ -0,0 +1,6 @@ +pr: 112260 +summary: Fix DLS over Runtime Fields +area: "Authorization" +type: bug +issues: + - 111637 diff --git a/server/src/main/java/org/elasticsearch/search/lookup/StoredFieldSourceProvider.java b/server/src/main/java/org/elasticsearch/search/lookup/StoredFieldSourceProvider.java index 7516ab93f75a5..6f38669edf716 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/StoredFieldSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/StoredFieldSourceProvider.java @@ -8,12 +8,13 @@ package org.elasticsearch.search.lookup; -import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import java.io.IOException; +import java.util.Map; // NB This is written under the assumption that individual segments are accessed by a single // thread, even if separate segments may be searched concurrently. If we ever implement @@ -21,7 +22,7 @@ class StoredFieldSourceProvider implements SourceProvider { private final StoredFieldLoader storedFieldLoader; - private volatile LeafStoredFieldSourceProvider[] leaves; + private final Map leaves = ConcurrentCollections.newConcurrentMap(); StoredFieldSourceProvider(StoredFieldLoader storedFieldLoader) { this.storedFieldLoader = storedFieldLoader; @@ -29,32 +30,14 @@ class StoredFieldSourceProvider implements SourceProvider { @Override public Source getSource(LeafReaderContext ctx, int doc) throws IOException { - LeafStoredFieldSourceProvider[] leaves = getLeavesUnderLock(findParentContext(ctx)); - if (leaves[ctx.ord] == null) { - // individual segments are currently only accessed on one thread so there's no need - // for locking here. - leaves[ctx.ord] = new LeafStoredFieldSourceProvider(storedFieldLoader.getLoader(ctx, null)); + final Object id = ctx.id(); + var provider = leaves.get(id); + if (provider == null) { + provider = new LeafStoredFieldSourceProvider(storedFieldLoader.getLoader(ctx, null)); + var existing = leaves.put(id, provider); + assert existing == null : "unexpected source provider [" + existing + "]"; } - return leaves[ctx.ord].getSource(doc); - } - - private static IndexReaderContext findParentContext(LeafReaderContext ctx) { - if (ctx.parent != null) { - return ctx.parent; - } - assert ctx.isTopLevel; - return ctx; - } - - private LeafStoredFieldSourceProvider[] getLeavesUnderLock(IndexReaderContext parentCtx) { - if (leaves == null) { - synchronized (this) { - if (leaves == null) { - leaves = new LeafStoredFieldSourceProvider[parentCtx.leaves().size()]; - } - } - } - return leaves; + return provider.getSource(doc); } private static class LeafStoredFieldSourceProvider { diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java index 73897fc38633a..fb74631970813 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java @@ -13,13 +13,16 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.test.SecurityIntegTestCase; import org.elasticsearch.test.SecuritySettingsSourceField; +import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xpack.core.XPackSettings; +import org.junit.BeforeClass; import java.util.ArrayList; import java.util.Collections; import java.util.List; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.elasticsearch.xpack.core.security.authc.support.UsernamePasswordToken.BASIC_AUTH_HEADER; import static org.elasticsearch.xpack.core.security.authc.support.UsernamePasswordToken.basicAuthHeaderValue; @@ -29,9 +32,12 @@ public class DocumentLevelSecurityRandomTests extends SecurityIntegTestCase { protected static final SecureString USERS_PASSWD = SecuritySettingsSourceField.TEST_PASSWORD_SECURE_STRING; - // can't add a second test method, because each test run creates a new instance of this class and that will will result - // in a new random value: - private final int numberOfRoles = scaledRandomIntBetween(3, 99); + private static volatile int numberOfRoles; + + @BeforeClass + public static void setupRoleCount() throws Exception { + numberOfRoles = scaledRandomIntBetween(3, 99); + } @Override protected String configUsers() { @@ -119,4 +125,38 @@ public void testDuelWithAliasFilters() throws Exception { } } + public void testWithRuntimeFields() throws Exception { + assertAcked( + indicesAdmin().prepareCreate("test") + .setMapping( + XContentFactory.jsonBuilder() + .startObject() + .startObject("runtime") + .startObject("field1") + .field("type", "keyword") + .endObject() + .endObject() + .startObject("properties") + .startObject("field2") + .field("type", "keyword") + .endObject() + .endObject() + .endObject() + ) + ); + List requests = new ArrayList<>(47); + for (int i = 1; i <= 42; i++) { + requests.add(prepareIndex("test").setSource("field1", "value1", "field2", "foo" + i)); + } + for (int i = 42; i <= 57; i++) { + requests.add(prepareIndex("test").setSource("field1", "value2", "field2", "foo" + i)); + } + indexRandom(true, requests); + assertHitCount( + client().filterWithHeader(Collections.singletonMap(BASIC_AUTH_HEADER, basicAuthHeaderValue("user1", USERS_PASSWD))) + .prepareSearch("test"), + 42L + ); + } + } From 80f602f44b171236c26f2db1e7e84d12e1b9cb04 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Wed, 28 Aug 2024 16:42:21 -0600 Subject: [PATCH 023/144] Fix typo in rest-api-spec capabilities documentation (#112304) --- .../yamlRestTest/resources/rest-api-spec/test/README.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc index 5716afdd205c0..0ddac662e73ef 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc @@ -138,7 +138,7 @@ other test runners to skip tests if they do not support the capabilities API yet path: /_api parameters: [param1, param2] capabilities: [cap1, cap2] - test_runner_feature: [capabilities] + test_runner_features: [capabilities] reason: Capability required to run test - do: ... test definitions ... From 98fe686da4c5cb82d4b03719977be428dc7934e7 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 28 Aug 2024 15:59:12 -0700 Subject: [PATCH 024/144] Upgrade xcontent to Jackson 2.17.2 (#112320) Avoid FasterXML/jackson-core#1256 --- docs/changelog/112320.yaml | 5 +++++ gradle/verification-metadata.xml | 20 ++++++++++++++++++++ libs/x-content/impl/build.gradle | 2 +- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/112320.yaml diff --git a/docs/changelog/112320.yaml b/docs/changelog/112320.yaml new file mode 100644 index 0000000000000..d35a08dfa4e91 --- /dev/null +++ b/docs/changelog/112320.yaml @@ -0,0 +1,5 @@ +pr: 112320 +summary: Upgrade xcontent to Jackson 2.17.2 +area: Infra/Core +type: upgrade +issues: [] diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index fd514103449c1..a27e2083a0849 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -311,6 +311,11 @@ + + + + + @@ -346,6 +351,11 @@ + + + + + @@ -361,6 +371,11 @@ + + + + + @@ -381,6 +396,11 @@ + + + + + diff --git a/libs/x-content/impl/build.gradle b/libs/x-content/impl/build.gradle index 829b75524baeb..6cf278e826d4c 100644 --- a/libs/x-content/impl/build.gradle +++ b/libs/x-content/impl/build.gradle @@ -12,7 +12,7 @@ base { archivesName = "x-content-impl" } -String jacksonVersion = "2.17.0" +String jacksonVersion = "2.17.2" dependencies { compileOnly project(':libs:elasticsearch-core') From b2d400ccf17b6e04012e3c667baa94626e199728 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 29 Aug 2024 14:29:25 +1000 Subject: [PATCH 025/144] Mute org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT testRankDocsRetrieverWithCollapse #112254 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 7feefa1255f48..ec2a846f71c44 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -172,6 +172,9 @@ tests: - class: org.elasticsearch.blobcache.shared.SharedBlobCacheServiceTests method: testGetMultiThreaded issue: https://github.com/elastic/elasticsearch/issues/112314 +- class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT + method: testRankDocsRetrieverWithCollapse + issue: https://github.com/elastic/elasticsearch/issues/112254 # Examples: # From 633f5f9fe37618e1a998e397cdb006db4af55610 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 29 Aug 2024 14:41:59 +1000 Subject: [PATCH 026/144] Mute org.elasticsearch.search.ccs.CCSUsageTelemetryIT org.elasticsearch.search.ccs.CCSUsageTelemetryIT #112324 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index ec2a846f71c44..71a3479201783 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -175,6 +175,8 @@ tests: - class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT method: testRankDocsRetrieverWithCollapse issue: https://github.com/elastic/elasticsearch/issues/112254 +- class: org.elasticsearch.search.ccs.CCSUsageTelemetryIT + issue: https://github.com/elastic/elasticsearch/issues/112324 # Examples: # From 9344f173d32231f1c47e5ef994bffa27b61da876 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 05:54:13 +0100 Subject: [PATCH 027/144] Add general read/write optional support (#112276) Today `StreamOutput#writeOptionalWriteable` allows to write a possibly-null value that implements `Writeable` and therefore carries its own serialization, but sometimes we want to write an optional value and provide a custom `Writer` too. This commit adds `StreamOutput#writeOptional` and a corresponding `StreamInput#readOptional` to support this. --- .../action/bulk/BulkItemRequest.java | 10 +++--- .../action/bulk/BulkItemResponse.java | 32 +++++++------------ .../action/bulk/BulkShardRequest.java | 9 +----- .../action/bulk/BulkShardResponse.java | 2 +- .../common/io/stream/StreamInput.java | 15 +++++++++ .../common/io/stream/StreamOutput.java | 21 ++++++++++++ .../bucket/range/InternalBinaryRange.java | 14 +++----- .../common/io/stream/AbstractStreamTests.java | 11 +++++++ .../core/rollup/job/RollupJobStatus.java | 9 +++--- .../actions/execute/ExecuteWatchRequest.java | 18 +++-------- 10 files changed, 77 insertions(+), 64 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java index 425461d1f4ba1..7c1304f92eefd 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java @@ -101,11 +101,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(primaryResponse); } - public void writeThin(StreamOutput out) throws IOException { - out.writeVInt(id); - DocWriteRequest.writeDocumentRequestThin(out, request); - out.writeOptionalWriteable(primaryResponse == null ? null : primaryResponse::writeThin); - } + public static final Writer THIN_WRITER = (out, item) -> { + out.writeVInt(item.id); + DocWriteRequest.writeDocumentRequestThin(out, item.request); + out.writeOptional(BulkItemResponse.THIN_WRITER, item.primaryResponse); + }; @Override public long ramBytesUsed() { diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java index 151e8795d0f82..d3e550eaf05b3 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java @@ -264,7 +264,7 @@ public String toString() { id = in.readVInt(); opType = OpType.fromId(in.readByte()); response = readResponse(shardId, in); - failure = in.readBoolean() ? new Failure(in) : null; + failure = in.readOptionalWriteable(Failure::new); assertConsistent(); } @@ -272,7 +272,7 @@ public String toString() { id = in.readVInt(); opType = OpType.fromId(in.readByte()); response = readResponse(in); - failure = in.readBoolean() ? new Failure(in) : null; + failure = in.readOptionalWriteable(Failure::new); assertConsistent(); } @@ -384,31 +384,21 @@ public void writeTo(StreamOutput out) throws IOException { writeResponseType(out); response.writeTo(out); } - if (failure == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - failure.writeTo(out); - } + out.writeOptionalWriteable(failure); } - public void writeThin(StreamOutput out) throws IOException { - out.writeVInt(id); - out.writeByte(opType.getId()); + public static final Writer THIN_WRITER = (out, item) -> { + out.writeVInt(item.id); + out.writeByte(item.opType.getId()); - if (response == null) { + if (item.response == null) { out.writeByte((byte) 2); } else { - writeResponseType(out); - response.writeThin(out); + item.writeResponseType(out); + item.response.writeThin(out); } - if (failure == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - failure.writeTo(out); - } - } + out.writeOptionalWriteable(item.failure); + }; private void writeResponseType(StreamOutput out) throws IOException { if (response instanceof SimulateIndexResponse) { diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java index 0d2942e688382..f7860c47d8b73 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java @@ -130,14 +130,7 @@ public void writeTo(StreamOutput out) throws IOException { throw new IllegalStateException("Inference metadata should have been consumed before writing to the stream"); } super.writeTo(out); - out.writeArray((o, item) -> { - if (item != null) { - o.writeBoolean(true); - item.writeThin(o); - } else { - o.writeBoolean(false); - } - }, items); + out.writeArray((o, item) -> o.writeOptional(BulkItemRequest.THIN_WRITER, item), items); if (out.getTransportVersion().onOrAfter(TransportVersions.SIMULATE_VALIDATES_MAPPINGS)) { out.writeBoolean(isSimulated); } diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java index 3eeb96546c9b0..eb1bb0468c9bb 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java @@ -56,6 +56,6 @@ public void setForcedRefresh(boolean forcedRefresh) { public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); shardId.writeTo(out); - out.writeArray((o, item) -> item.writeThin(o), responses); + out.writeArray(BulkItemResponse.THIN_WRITER, responses); } } diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index ec0edb2d07e5a..497028ef37c69 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -1095,8 +1095,23 @@ public T[] readOptionalArray(Writeable.Reader reader, IntFunction ar return readBoolean() ? readArray(reader, arraySupplier) : null; } + /** + * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}. + * + * @see StreamOutput#writeOptionalWriteable + */ + // just an alias for readOptional() since we don't actually care whether T extends Writeable @Nullable public T readOptionalWriteable(Writeable.Reader reader) throws IOException { + return readOptional(reader); + } + + /** + * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}. + * + * @see StreamOutput#writeOptional + */ + public T readOptional(Writeable.Reader reader) throws IOException { if (readBoolean()) { T t = reader.read(this); if (t == null) { diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index c65ae2e3463d4..5780885473b00 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -1015,6 +1015,12 @@ public void writeOptionalArray(@Nullable T[] array) throws writeOptionalArray(StreamOutput::writeWriteable, array); } + /** + * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not + * {@code null}. + * + * @see StreamInput#readOptionalWriteable + */ public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOException { if (writeable != null) { writeBoolean(true); @@ -1024,6 +1030,21 @@ public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOExcep } } + /** + * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not + * {@code null}. + * + * @see StreamInput#readOptional + */ + public void writeOptional(Writer writer, @Nullable T maybeItem) throws IOException { + if (maybeItem != null) { + writeBoolean(true); + writer.write(this, maybeItem); + } else { + writeBoolean(false); + } + } + /** * This method allow to use a method reference when writing collection elements such as * {@code out.writeMap(map, StreamOutput::writeString, StreamOutput::writeWriteable)} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java index 2b5bcd9931f6e..528c37de7a4a8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java @@ -72,8 +72,8 @@ private static Bucket createFromStream(StreamInput in, DocValueFormat format, bo String key = in.getTransportVersion().equals(TransportVersions.V_8_0_0) ? in.readString() : in.getTransportVersion().onOrAfter(TransportVersions.V_7_17_1) ? in.readOptionalString() : in.readString(); - BytesRef from = in.readBoolean() ? in.readBytesRef() : null; - BytesRef to = in.readBoolean() ? in.readBytesRef() : null; + BytesRef from = in.readOptional(StreamInput::readBytesRef); + BytesRef to = in.readOptional(StreamInput::readBytesRef); long docCount = in.readLong(); InternalAggregations aggregations = InternalAggregations.readFrom(in); @@ -89,14 +89,8 @@ public void writeTo(StreamOutput out) throws IOException { } else { out.writeString(key == null ? generateKey(from, to, format) : key); } - out.writeBoolean(from != null); - if (from != null) { - out.writeBytesRef(from); - } - out.writeBoolean(to != null); - if (to != null) { - out.writeBytesRef(to); - } + out.writeOptional(StreamOutput::writeBytesRef, from); + out.writeOptional(StreamOutput::writeBytesRef, to); out.writeLong(docCount); aggregations.writeTo(out); } diff --git a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java index b1104a72400ea..ae686afcbb296 100644 --- a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java +++ b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java @@ -761,6 +761,17 @@ public void checkZonedDateTimeSerialization(TransportVersion tv) throws IOExcept } } + public void testOptional() throws IOException { + try (var output = new BytesStreamOutput()) { + output.writeOptional(StreamOutput::writeString, "not-null"); + output.writeOptional(StreamOutput::writeString, null); + + final var input = getStreamInput(output.bytes()); + assertEquals("not-null", input.readOptional(StreamInput::readString)); + assertNull(input.readOptional(StreamInput::readString)); + } + } + private void assertSerialization( CheckedConsumer outputAssertions, CheckedConsumer inputAssertions, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java index 1ba625a507a46..f7ad1f65628b2 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java @@ -74,7 +74,7 @@ public RollupJobStatus(IndexerState state, @Nullable Map positio public RollupJobStatus(StreamInput in) throws IOException { state = IndexerState.fromStream(in); - currentPosition = in.readBoolean() ? new TreeMap<>(in.readGenericMap()) : null; + currentPosition = in.readOptional(CURRENT_POSITION_READER); if (in.getTransportVersion().before(TransportVersions.V_8_0_0)) { // 7.x nodes serialize `upgradedDocumentID` flag. We don't need it anymore, but // we need to pull it off the stream @@ -83,6 +83,8 @@ public RollupJobStatus(StreamInput in) throws IOException { } } + private static final Reader> CURRENT_POSITION_READER = in -> new TreeMap<>(in.readGenericMap()); + public IndexerState getIndexerState() { return state; } @@ -118,10 +120,7 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { state.writeTo(out); - out.writeBoolean(currentPosition != null); - if (currentPosition != null) { - out.writeGenericMap(currentPosition); - } + out.writeOptional(StreamOutput::writeGenericMap, currentPosition); if (out.getTransportVersion().before(TransportVersions.V_8_0_0)) { // 7.x nodes expect a boolean `upgradedDocumentID` flag. We don't have it anymore, // but we need to tell them we are upgraded in case there is a mixed cluster diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java index 681b004dd1d28..2f2617f956ed9 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java @@ -59,12 +59,8 @@ public ExecuteWatchRequest(StreamInput in) throws IOException { id = in.readOptionalString(); ignoreCondition = in.readBoolean(); recordExecution = in.readBoolean(); - if (in.readBoolean()) { - alternativeInput = in.readGenericMap(); - } - if (in.readBoolean()) { - triggerData = in.readGenericMap(); - } + alternativeInput = in.readOptional(StreamInput::readGenericMap); + triggerData = in.readOptional(StreamInput::readGenericMap); long actionModesCount = in.readLong(); actionModes = new HashMap<>(); for (int i = 0; i < actionModesCount; i++) { @@ -83,14 +79,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(id); out.writeBoolean(ignoreCondition); out.writeBoolean(recordExecution); - out.writeBoolean(alternativeInput != null); - if (alternativeInput != null) { - out.writeGenericMap(alternativeInput); - } - out.writeBoolean(triggerData != null); - if (triggerData != null) { - out.writeGenericMap(triggerData); - } + out.writeOptional(StreamOutput::writeGenericMap, alternativeInput); + out.writeOptional(StreamOutput::writeGenericMap, triggerData); out.writeLong(actionModes.size()); for (Map.Entry entry : actionModes.entrySet()) { out.writeString(entry.getKey()); From 59a42ed41b72ea92e62e4522b83d9f9f48955203 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 06:03:13 +0100 Subject: [PATCH 028/144] Include network disconnect info in troubleshooting docs (#112323) A misplaced `//end::` tag meant that the docs added in #112271 are only included in the page on fault detection and not the equivalent troubleshooting docs. This commit fixes the problem. --- docs/reference/modules/discovery/fault-detection.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index 89c8a78eccbc6..d12985b70597c 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -300,7 +300,6 @@ To reconstruct the output, base64-decode the data and decompress it using ---- cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ---- -//end::troubleshooting[] [discrete] ===== Diagnosing other network disconnections @@ -345,3 +344,4 @@ packet capture simultaneously from the nodes at both ends of an unstable connection and analyse it alongside the {es} logs from those nodes to determine if traffic between the nodes is being disrupted by another device on the network. +//end::troubleshooting[] From aa67bdb5ca8abebcee8a50ebb58e6160d134230c Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Thu, 29 Aug 2024 09:53:09 +0300 Subject: [PATCH 029/144] ES|QL: EsqlAsyncSecurityIT workaround for lazy .async-search indexing (#112287) --- muted-tests.yml | 3 -- .../xpack/esql/EsqlAsyncSecurityIT.java | 30 ++++++++++++++----- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 71a3479201783..508403ee6238c 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -137,9 +137,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAfterMissingIndex issue: https://github.com/elastic/elasticsearch/issues/112088 -- class: org.elasticsearch.xpack.esql.EsqlAsyncSecurityIT - method: testLimitedPrivilege - issue: https://github.com/elastic/elasticsearch/issues/112110 - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: test {stats.ByTwoCalculatedSecondOverwrites SYNC} issue: https://github.com/elastic/elasticsearch/issues/112117 diff --git a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java index 0806e41186395..f2633dfffb0fe 100644 --- a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java +++ b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java @@ -67,7 +67,7 @@ public void testUnauthorizedIndices() throws IOException { var getResponse = runAsyncGet("user1", id); // sanity assertOK(getResponse); ResponseException error; - error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id)); + error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id, true)); // resource not found exception if the authenticated user is not the creator of the original task assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404)); @@ -85,7 +85,7 @@ public void testUnauthorizedIndices() throws IOException { var getResponse = runAsyncGet("user2", id); // sanity assertOK(getResponse); ResponseException error; - error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id)); + error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id, true)); assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404)); error = expectThrows(ResponseException.class, () -> runAsyncDelete("user1", id)); @@ -117,6 +117,10 @@ private Response runAsync(String user, String command) throws IOException { } private Response runAsyncGet(String user, String id) throws IOException { + return runAsyncGet(user, id, false); + } + + private Response runAsyncGet(String user, String id, boolean isAsyncIdNotFound_Expected) throws IOException { int tries = 0; while (tries < 10) { // Sometimes we get 404s fetching the task status. @@ -129,22 +133,32 @@ private Response runAsyncGet(String user, String id) throws IOException { logResponse(response); return response; } catch (ResponseException e) { - if (e.getResponse().getStatusLine().getStatusCode() == 404 - && EntityUtils.toString(e.getResponse().getEntity()).contains("no such index [.async-search]")) { - /* - * Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search - * index may not exist when we try the fetch, but it should exist on next attempt. - */ + var statusCode = e.getResponse().getStatusLine().getStatusCode(); + var message = EntityUtils.toString(e.getResponse().getEntity()); + + if (statusCode == 404 && message.contains("no such index [.async-search]")) { + // Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search + // index may not exist when we try the fetch, but it should exist on next attempt. logger.warn("async-search index does not exist", e); try { Thread.sleep(1000); } catch (InterruptedException ex) { throw new RuntimeException(ex); } + } else if (statusCode == 404 && false == isAsyncIdNotFound_Expected && message.contains("resource_not_found_exception")) { + // Work around for https://github.com/elastic/elasticsearch/issues/112110 + // The async id is not indexed quickly enough in .async-search index for us to retrieve it. + logger.warn("async id not found", e); + try { + Thread.sleep(500); + } catch (InterruptedException ex) { + throw new RuntimeException(ex); + } } else { throw e; } tries++; + logger.warn("retry [" + tries + "] for GET /_query/async/" + id); } } throw new IllegalStateException("couldn't find task status"); From b9dea69b5ca5b34600d1fc51badc3a9b163107b2 Mon Sep 17 00:00:00 2001 From: weizijun Date: Thu, 29 Aug 2024 15:17:27 +0800 Subject: [PATCH 030/144] [Inference API] Add Docs for AlibabaCloud AI Search Support for the Inference API (#112273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- docs/changelog/112273.yaml | 5 + .../inference/inference-apis.asciidoc | 1 + .../inference/put-inference.asciidoc | 1 + .../service-alibabacloud-ai-search.asciidoc | 184 ++++++++++++++++++ .../semantic-search-inference.asciidoc | 1 + .../infer-api-ingest-pipeline-widget.asciidoc | 17 ++ .../infer-api-ingest-pipeline.asciidoc | 26 +++ .../infer-api-mapping-widget.asciidoc | 17 ++ .../inference-api/infer-api-mapping.asciidoc | 32 +++ .../infer-api-reindex-widget.asciidoc | 17 ++ .../inference-api/infer-api-reindex.asciidoc | 23 +++ .../infer-api-requirements-widget.asciidoc | 17 ++ .../infer-api-requirements.asciidoc | 6 + .../infer-api-search-widget.asciidoc | 17 ++ .../inference-api/infer-api-search.asciidoc | 65 +++++++ .../infer-api-task-widget.asciidoc | 17 ++ .../inference-api/infer-api-task.asciidoc | 29 +++ 17 files changed, 475 insertions(+) create mode 100644 docs/changelog/112273.yaml create mode 100644 docs/reference/inference/service-alibabacloud-ai-search.asciidoc diff --git a/docs/changelog/112273.yaml b/docs/changelog/112273.yaml new file mode 100644 index 0000000000000..3182a1884a145 --- /dev/null +++ b/docs/changelog/112273.yaml @@ -0,0 +1,5 @@ +pr: 111181 +summary: "[Inference API] Add Docs for AlibabaCloud AI Search Support for the Inference API" +area: Machine Learning +type: enhancement +issues: [ ] diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index 33db148755d8e..8fdf8aecc2ae5 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -39,6 +39,7 @@ include::delete-inference.asciidoc[] include::get-inference.asciidoc[] include::post-inference.asciidoc[] include::put-inference.asciidoc[] +include::service-alibabacloud-ai-search.asciidoc[] include::service-amazon-bedrock.asciidoc[] include::service-anthropic.asciidoc[] include::service-azure-ai-studio.asciidoc[] diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc index 57485e0720cca..ba26a563541fc 100644 --- a/docs/reference/inference/put-inference.asciidoc +++ b/docs/reference/inference/put-inference.asciidoc @@ -39,6 +39,7 @@ The create {infer} API enables you to create an {infer} endpoint and configure a The following services are available through the {infer} API, click the links to review the configuration details of the services: +* <> * <> * <> * <> diff --git a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc new file mode 100644 index 0000000000000..df5220573d9e4 --- /dev/null +++ b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc @@ -0,0 +1,184 @@ +[[infer-service-alibabacloud-ai-search]] +=== AlibabaCloud AI Search {infer} service + +Creates an {infer} endpoint to perform an {infer} task with the `alibabacloud-ai-search` service. + +[discrete] +[[infer-service-alibabacloud-ai-search-api-request]] +==== {api-request-title} + +`PUT /_inference//` + +[discrete] +[[infer-service-alibabacloud-ai-search-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Available task types: + +* `text_embedding`, +* `sparse_embedding`. +* `rerank`. +-- + +[discrete] +[[infer-service-alibabacloud-ai-search-api-request-body]] +==== {api-request-body-title} + +`service`:: +(Required, string) The type of service supported for the specified task type. +In this case, +`alibabacloud-ai-search`. + +`service_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=service-settings] ++ +-- +These settings are specific to the `alibabacloud-ai-search` service. +-- + +`api_key`::: +(Required, string) +A valid API key for the AlibabaCloud AI Search API. + +`service_id`::: +(Required, string) +The name of the model service to use for the {infer} task. ++ +-- +Available service_ids for the `text_embedding` task: + +* `ops-text-embedding-001` +* `ops-text-embedding-zh-001` +* `ops-text-embedding-en-001` +* `ops-text-embedding-002` + +For the supported `text_embedding` service_ids, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[documentation]. + +Available service_id for the `sparse_embedding` task: + +* `ops-text-sparse-embedding-001` + +For the supported `sparse_embedding` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-sparse-embedding-api-details[documentation]. + +Available service_id for the `rerank` task is: + +* `ops-bge-reranker-larger` + +For the supported `rerank` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/ranker-api-details[documentation]. +-- + +`host`::: +(Required, string) +The name of the host address used for the {infer} task. You can find the host address at https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[ the API keys section] of the documentation. + +`workspace`::: +(Required, string) +The name of the workspace used for the {infer} task. + +`rate_limit`::: +(Optional, object) +By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`. +This helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search. +To modify this, set the `requests_per_minute` setting of this object in your service settings: ++ +-- +include::inference-shared.asciidoc[tag=request-per-minute-example] +-- + + +`task_settings`:: +(Optional, object) +include::inference-shared.asciidoc[tag=task-settings] ++ +.`task_settings` for the `text_embedding` task type +[%collapsible%closed] +===== +`input_type`::: +(Optional, string) +Specifies the type of input passed to the model. +Valid values are: +* `ingest`: for storing document embeddings in a vector database. +* `search`: for storing embeddings of search queries run against a vector database to find relevant documents. +===== ++ +.`task_settings` for the `sparse_embedding` task type +[%collapsible%closed] +===== +`input_type`::: +(Optional, string) +Specifies the type of input passed to the model. +Valid values are: +* `ingest`: for storing document embeddings in a vector database. +* `search`: for storing embeddings of search queries run against a vector database to find relevant documents. + +`return_token`::: +(Optional, boolean) +If `true`, the token name will be returned in the response. Defaults to `false` which means only the token ID will be returned in the response. +===== + +[discrete] +[[inference-example-alibabacloud-ai-search]] +==== AlibabaCloud AI Search service examples + +The following example shows how to create an {infer} endpoint called `alibabacloud_ai_search_embeddings` to perform a `text_embedding` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/alibabacloud_ai_search_embeddings +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-text-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The following example shows how to create an {infer} endpoint called +`alibabacloud_ai_search_sparse` to perform a `sparse_embedding` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-text-sparse-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The next example shows how to create an {infer} endpoint called +`alibabacloud_ai_search_rerank` to perform a `rerank` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/rerank/alibabacloud_ai_search_rerank +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-bge-reranker-larger", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index f74bc65e31bf0..719aeb070fc7c 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -17,6 +17,7 @@ Azure based examples use models available through https://ai.azure.com/explore/m or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI]. Mistral examples use the `mistral-embed` model from https://docs.mistral.ai/getting-started/models/[the Mistral API]. Amazon Bedrock examples use the `amazon.titan-embed-text-v1` model from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[the Amazon Bedrock base models]. +AlibabaCloud AI Search examples use the `ops-text-embedding-zh-001` model from https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[the AlibabaCloud AI Search base models]. Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc index 997dbbe8a20e6..3a686e27cf580 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-ingest-amazon-bedrock"> Amazon Bedrock +
    +
    diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc index 6adf3d2ebbf46..6678b60fabc40 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc @@ -216,3 +216,29 @@ PUT _ingest/pipeline/amazon_bedrock_embeddings and the `output_field` that will contain the {infer} results. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/alibabacloud_ai_search_embeddings +{ + "processors": [ + { + "inference": { + "model_id": "alibabacloud_ai_search_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc index 4e3a453a7bbea..66b790bdd57a5 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-mapping-amazon-bedrock"> Amazon Bedrock +
    +
    diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc index abeeb87f03e75..c86538ceb9c87 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc @@ -270,3 +270,35 @@ the {infer} pipeline configuration in the next step. <6> The field type which is text in this example. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +PUT alibabacloud-ai-search-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "dense_vector", <2> + "dims": 1024, <3> + "element_type": "float" + }, + "content": { <4> + "type": "text" <5> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated tokens. It must be referenced +in the {infer} pipeline configuration in the next step. +<2> The field to contain the tokens is a `dense_vector` field. +<3> The output dimensions of the model. This value may be different depending on the underlying model used. +See the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[AlibabaCloud AI Search embedding model] documentation. +<4> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<5> The field type which is text in this example. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc index 45cb9fc51b9f1..86f52fee2063c 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-reindex-amazon-bedrock"> Amazon Bedrock +
    +
    diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc index d961ec8bd39bd..25d4023c650c0 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc @@ -200,3 +200,26 @@ number makes the update of the reindexing process quicker which enables you to follow the progress closely and detect errors early. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "alibabacloud-ai-search-embeddings", + "pipeline": "alibabacloud_ai_search_embeddings" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc index c867b39b88e3b..fb686a2d8be12 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-requirements-amazon-bedrock"> Amazon Bedrock +
    +
    diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc index 603cd85a8f93d..c9e7ca8b80ba6 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc @@ -52,3 +52,9 @@ You can apply for access to Azure OpenAI by completing the form at https://aka.m * A pair of access and secret keys used to access Amazon Bedrock // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] +* An AlibabaCloud Account with https://console.aliyun.com[AlibabaCloud] access +* An API key generated for your account from the https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section] + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc index fa4a11c59a158..996148d80a4bd 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-search-amazon-bedrock"> Amazon Bedrock +
    +
    diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc index f23ed1dfef05d..fe1f58b6bd1a9 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc @@ -531,3 +531,68 @@ query from the `amazon-bedrock-embeddings` index sorted by their proximity to th // NOTCONSOLE // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +GET alibabacloud-ai-search-embeddings/_search +{ + "knn": { + "field": "content_embedding", + "query_vector_builder": { + "text_embedding": { + "model_id": "alibabacloud_ai_search_embeddings", + "model_text": "Calculate fuel cost" + } + }, + "k": 10, + "num_candidates": 100 + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `alibabacloud-ai-search-embeddings` index sorted by their proximity to the query: + +[source,consol-result] +-------------------------------------------------- +"hits": [ + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "DDd5OowBHxQKHyc3TDSC", + "_score": 0.83704096, + "_source": { + "id": 862114, + "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes." + } + }, + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "ajd5OowBHxQKHyc3TDSC", + "_score": 0.8345704, + "_source": { + "id": 820622, + "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances." + } + }, + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "Djd5OowBHxQKHyc3TDSC", + "_score": 0.8327426, + "_source": { + "id": 8202683, + "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc index f12be341d866d..1dfa6077553fe 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-task-amazon-bedrock"> Amazon Bedrock +
    +
    diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc index b186b2c58ccc5..2b4aa1a200102 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc @@ -223,3 +223,32 @@ PUT _inference/text_embedding/amazon_bedrock_embeddings <1> <6> The model ID or ARN of the model to use. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/alibabacloud_ai_search_embeddings <1> +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", <2> + "service_id": "", <3> + "host": "", <4> + "workspace": "" <5> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `text_embedding` in the path and the `inference_id` which is the unique identifier of the {infer} endpoint is `alibabacloud_ai_search_embeddings`. +<2> The API key for accessing the AlibabaCloud AI Search API. You can find your API keys in +your AlibabaCloud account under the +https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section]. You need to provide +your API key only once. The <> does not return your API +key. +<3> The AlibabaCloud AI Search embeddings model name, for example `ops-text-embedding-zh-001`. +<4> The name our your AlibabaCloud AI Search host address. +<5> The name our your AlibabaCloud AI Search workspace. + +// end::alibabacloud-ai-search[] + From 569184871bc0006ba55c20bcbc4500e98e853aea Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Thu, 29 Aug 2024 08:23:34 +0100 Subject: [PATCH 031/144] Add UpdateForV10 annotation (#112281) In preparation for the next major release of Elasticsearch, this commit adds the UpdateForV10 annotation. --- .../org/elasticsearch/core/UpdateForV10.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java diff --git a/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java new file mode 100644 index 0000000000000..0fe816bd3721d --- /dev/null +++ b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.core; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to identify a block of code (a whole class, a method, or a field) that needs to be reviewed (for cleanup, remove or change) + * before releasing 10.0 + */ +@Retention(RetentionPolicy.SOURCE) +@Target({ ElementType.LOCAL_VARIABLE, ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.METHOD, ElementType.TYPE }) +public @interface UpdateForV10 { +} From 727f1e72c6d930ad763ca307f622eadbbdfff112 Mon Sep 17 00:00:00 2001 From: Dominique Clarke Date: Thu, 29 Aug 2024 03:46:44 -0400 Subject: [PATCH 032/144] [Observability] add .slo-observability.* index privileges to built in editor and viewer roles (#111984) Today, the `editor` and `viewer` roles do not contain the appropriate index privileges for SLO users. This PR updates the index privileges to include the `.slo-observability.*` indices. --------- Co-authored-by: Slobodan Adamovic --- .../authz/store/ReservedRolesStore.java | 9 ++++ .../authz/store/ReservedRolesStoreTests.java | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java index 4f3d7a245fc8f..74434adf61fbb 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java @@ -868,6 +868,11 @@ private static RoleDescriptor buildViewerRoleDescriptor() { .indices("/~(([.]|ilm-history-).*)/") .privileges("read", "view_index_metadata") .build(), + // Observability + RoleDescriptor.IndicesPrivileges.builder() + .indices(".slo-observability.*") + .privileges("read", "view_index_metadata") + .build(), // Security RoleDescriptor.IndicesPrivileges.builder() .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX) @@ -915,6 +920,10 @@ private static RoleDescriptor buildEditorRoleDescriptor() { .indices("observability-annotations") .privileges("read", "view_index_metadata", "write") .build(), + RoleDescriptor.IndicesPrivileges.builder() + .indices(".slo-observability.*") + .privileges("read", "view_index_metadata", "write", "manage") + .build(), // Security RoleDescriptor.IndicesPrivileges.builder() .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java index f0676f35ae316..0cdf7de63ca99 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.action.admin.indices.get.GetIndexAction; import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsAction; import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsAction; +import org.elasticsearch.action.admin.indices.mapping.put.TransportAutoPutMappingAction; import org.elasticsearch.action.admin.indices.mapping.put.TransportPutMappingAction; import org.elasticsearch.action.admin.indices.recovery.RecoveryAction; import org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction; @@ -3662,6 +3663,9 @@ public void testPredefinedViewerRole() { assertOnlyReadAllowed(role, ".profiling-" + randomIntBetween(0, 5)); assertOnlyReadAllowed(role, randomAlphaOfLength(5)); + assertOnlyReadAllowed(role, ".slo-observability." + randomIntBetween(0, 5)); + assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5)); + assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES); assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10)); assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5)); @@ -3740,6 +3744,9 @@ public void testPredefinedEditorRole() { assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".preview.alerts-" + randomIntBetween(0, 5)); assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".internal.preview.alerts-" + randomIntBetween(0, 5)); + assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5)); + assertReadWriteAndManage(role, ".slo-observability." + randomIntBetween(0, 5)); + assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES); assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10)); assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5)); @@ -3865,6 +3872,41 @@ private void assertReadWriteDocsButNotDeleteIndexAllowed(Role role, String index role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), is(false) ); + + assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportUpdateAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportDeleteAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportBulkAction.NAME).test(mockIndexAbstraction(index)), is(true)); + } + + private void assertReadWriteAndManage(Role role, String index) { + assertThat( + role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportFieldCapabilitiesAction.NAME + "*").test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportCreateIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportUpdateSettingsAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat(role.indices().allowedIndicesMatcher(GetRollupIndexCapsAction.NAME + "*").test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher("indices:admin/*").test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher("indices:monitor/*").test(mockIndexAbstraction(index)), is(true)); + assertThat( + role.indices().allowedIndicesMatcher(TransportAutoPutMappingAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat(role.indices().allowedIndicesMatcher(AutoCreateAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true)); From 55ed03fddfa8c77c354a2db2910593b40d2be890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 28 Aug 2024 19:21:00 +0200 Subject: [PATCH 033/144] Remove Scorable#docID implementations This method was removed in https://github.com/apache/lucene/pull/12407 so we also need to remove it in implementations of Scorable. --- .../painless/ScriptedMetricAggContextsTests.java | 5 ----- .../join/aggregations/ParentJoinAggregator.java | 5 ----- .../aggregations/bucket/nested/NestedAggregator.java | 7 ------- .../bucket/sampler/BestDocsDeferringCollector.java | 8 -------- .../search/aggregations/MultiBucketCollectorTests.java | 10 ++-------- .../search/query/QueryPhaseCollectorTests.java | 10 ---------- .../search/sort/BucketedSortForFloatsTests.java | 6 ------ 7 files changed, 2 insertions(+), 49 deletions(-) diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java index 2d3f09fc7243a..8eae139eb8226 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java @@ -73,11 +73,6 @@ public void testMapBasic() throws IOException { Map state = new HashMap<>(); Scorable scorer = new Scorable() { - @Override - public int docID() { - return 0; - } - @Override public float score() { return 0.5f; diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java index 9c6a788ea2f77..ed4dcf2072b8d 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java @@ -133,11 +133,6 @@ protected void prepareSubAggs(long[] ordsToCollect) throws IOException { public float score() { return 1f; } - - @Override - public int docID() { - return childDocsIter.docID(); - } }); final Bits liveDocs = ctx.reader().getLiveDocs(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 39dfd6e4aac3a..28e010f541a74 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -206,19 +206,12 @@ void processBufferedChildBuckets() throws IOException { } private static class CachedScorable extends Scorable { - int doc; float score; @Override public final float score() { return score; } - - @Override - public int docID() { - return doc; - } - } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java index 1344604a8d39c..c72c4b29a478f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java @@ -213,7 +213,6 @@ class PerSegmentCollects extends Scorable { private final AggregationExecutionContext aggCtx; int maxDocId = Integer.MIN_VALUE; private float currentScore; - private int currentDocId = -1; private Scorable currentScorer; PerSegmentCollects(AggregationExecutionContext aggCtx) throws IOException { @@ -248,7 +247,6 @@ public void replayRelatedMatches(List sd) throws IOException { leafCollector.setScorer(this); currentScore = 0; - currentDocId = -1; if (maxDocId < 0) { return; } @@ -258,7 +256,6 @@ public void replayRelatedMatches(List sd) throws IOException { int rebased = scoreDoc.doc - aggCtx.getLeafReaderContext().docBase; if ((rebased >= 0) && (rebased <= maxDocId)) { currentScore = scoreDoc.score; - currentDocId = rebased; // We stored the bucket ID in Lucene's shardIndex property // for convenience. leafCollector.collect(rebased, scoreDoc.shardIndex); @@ -275,11 +272,6 @@ public float score() throws IOException { return currentScore; } - @Override - public int docID() { - return currentDocId; - } - public void collect(int docId, long parentBucket) throws IOException { perBucketSamples = bigArrays.grow(perBucketSamples, parentBucket + 1); PerParentBucketSamples sampler = perBucketSamples.get((int) parentBucket); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java index cfb9c4bb83249..ff4ad059559fc 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java @@ -32,14 +32,8 @@ import static org.hamcrest.Matchers.equalTo; public class MultiBucketCollectorTests extends ESTestCase { - private static class ScoreAndDoc extends Scorable { + private static class Score extends Scorable { float score; - int doc = -1; - - @Override - public int docID() { - return doc; - } @Override public float score() { @@ -246,7 +240,7 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { collector1 = new TerminateAfterBucketCollector(collector1, 1); collector2 = new TerminateAfterBucketCollector(collector2, 2); - Scorable scorer = new ScoreAndDoc(); + Scorable scorer = new Score(); List collectors = Arrays.asList(collector1, collector2); Collections.shuffle(collectors, random()); diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java index f222e697488d2..dbfd9d83ee887 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java @@ -1138,11 +1138,6 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { public float score() { return 0; } - - @Override - public int docID() { - return 0; - } }; QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector( @@ -1472,11 +1467,6 @@ public float score() throws IOException { return 0; } - @Override - public int docID() { - return 0; - } - @Override public void setMinCompetitiveScore(float minScore) { setMinCompetitiveScoreCalled = true; diff --git a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java index 0f088d2948fcb..7f136a097e24a 100644 --- a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java +++ b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java @@ -120,18 +120,12 @@ public void testScorer() throws IOException { } private class MockScorable extends Scorable { - private int doc; private float score; @Override public float score() throws IOException { return score; } - - @Override - public int docID() { - return doc; - } } /** From 5e455db10ecbb1a31cad58ecb1120a66fc50079f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 29 Aug 2024 10:04:27 +0200 Subject: [PATCH 034/144] Revert "Remove Scorable#docID implementations" This reverts commit 55ed03fddfa8c77c354a2db2910593b40d2be890. --- .../painless/ScriptedMetricAggContextsTests.java | 5 +++++ .../join/aggregations/ParentJoinAggregator.java | 5 +++++ .../aggregations/bucket/nested/NestedAggregator.java | 7 +++++++ .../bucket/sampler/BestDocsDeferringCollector.java | 8 ++++++++ .../search/aggregations/MultiBucketCollectorTests.java | 10 ++++++++-- .../search/query/QueryPhaseCollectorTests.java | 10 ++++++++++ .../search/sort/BucketedSortForFloatsTests.java | 6 ++++++ 7 files changed, 49 insertions(+), 2 deletions(-) diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java index 8eae139eb8226..2d3f09fc7243a 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java @@ -73,6 +73,11 @@ public void testMapBasic() throws IOException { Map state = new HashMap<>(); Scorable scorer = new Scorable() { + @Override + public int docID() { + return 0; + } + @Override public float score() { return 0.5f; diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java index ed4dcf2072b8d..9c6a788ea2f77 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java @@ -133,6 +133,11 @@ protected void prepareSubAggs(long[] ordsToCollect) throws IOException { public float score() { return 1f; } + + @Override + public int docID() { + return childDocsIter.docID(); + } }); final Bits liveDocs = ctx.reader().getLiveDocs(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 28e010f541a74..39dfd6e4aac3a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -206,12 +206,19 @@ void processBufferedChildBuckets() throws IOException { } private static class CachedScorable extends Scorable { + int doc; float score; @Override public final float score() { return score; } + + @Override + public int docID() { + return doc; + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java index c72c4b29a478f..1344604a8d39c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java @@ -213,6 +213,7 @@ class PerSegmentCollects extends Scorable { private final AggregationExecutionContext aggCtx; int maxDocId = Integer.MIN_VALUE; private float currentScore; + private int currentDocId = -1; private Scorable currentScorer; PerSegmentCollects(AggregationExecutionContext aggCtx) throws IOException { @@ -247,6 +248,7 @@ public void replayRelatedMatches(List sd) throws IOException { leafCollector.setScorer(this); currentScore = 0; + currentDocId = -1; if (maxDocId < 0) { return; } @@ -256,6 +258,7 @@ public void replayRelatedMatches(List sd) throws IOException { int rebased = scoreDoc.doc - aggCtx.getLeafReaderContext().docBase; if ((rebased >= 0) && (rebased <= maxDocId)) { currentScore = scoreDoc.score; + currentDocId = rebased; // We stored the bucket ID in Lucene's shardIndex property // for convenience. leafCollector.collect(rebased, scoreDoc.shardIndex); @@ -272,6 +275,11 @@ public float score() throws IOException { return currentScore; } + @Override + public int docID() { + return currentDocId; + } + public void collect(int docId, long parentBucket) throws IOException { perBucketSamples = bigArrays.grow(perBucketSamples, parentBucket + 1); PerParentBucketSamples sampler = perBucketSamples.get((int) parentBucket); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java index ff4ad059559fc..cfb9c4bb83249 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java @@ -32,8 +32,14 @@ import static org.hamcrest.Matchers.equalTo; public class MultiBucketCollectorTests extends ESTestCase { - private static class Score extends Scorable { + private static class ScoreAndDoc extends Scorable { float score; + int doc = -1; + + @Override + public int docID() { + return doc; + } @Override public float score() { @@ -240,7 +246,7 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { collector1 = new TerminateAfterBucketCollector(collector1, 1); collector2 = new TerminateAfterBucketCollector(collector2, 2); - Scorable scorer = new Score(); + Scorable scorer = new ScoreAndDoc(); List collectors = Arrays.asList(collector1, collector2); Collections.shuffle(collectors, random()); diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java index dbfd9d83ee887..f222e697488d2 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java @@ -1138,6 +1138,11 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { public float score() { return 0; } + + @Override + public int docID() { + return 0; + } }; QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector( @@ -1467,6 +1472,11 @@ public float score() throws IOException { return 0; } + @Override + public int docID() { + return 0; + } + @Override public void setMinCompetitiveScore(float minScore) { setMinCompetitiveScoreCalled = true; diff --git a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java index 7f136a097e24a..0f088d2948fcb 100644 --- a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java +++ b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java @@ -120,12 +120,18 @@ public void testScorer() throws IOException { } private class MockScorable extends Scorable { + private int doc; private float score; @Override public float score() throws IOException { return score; } + + @Override + public int docID() { + return doc; + } } /** From aa57a1553e3371158c23faed7a5f7c5833a6e18d Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 29 Aug 2024 09:13:30 +0100 Subject: [PATCH 035/144] [DOCS] Rewrite "What is Elasticsearch?" (Part 1) (#112213) --- docs/reference/intro.asciidoc | 132 ++++++++++-------- .../search-your-data/near-real-time.asciidoc | 2 +- 2 files changed, 72 insertions(+), 62 deletions(-) diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc index 3fc23b44994a7..cd9c126e7b1fd 100644 --- a/docs/reference/intro.asciidoc +++ b/docs/reference/intro.asciidoc @@ -1,42 +1,70 @@ [[elasticsearch-intro]] == What is {es}? -_**You know, for search (and analysis)**_ - -{es} is the distributed search and analytics engine at the heart of -the {stack}. {ls} and {beats} facilitate collecting, aggregating, and -enriching your data and storing it in {es}. {kib} enables you to -interactively explore, visualize, and share insights into your data and manage -and monitor the stack. {es} is where the indexing, search, and analysis -magic happens. - -{es} provides near real-time search and analytics for all types of data. Whether you -have structured or unstructured text, numerical data, or geospatial data, -{es} can efficiently store and index it in a way that supports fast searches. -You can go far beyond simple data retrieval and aggregate information to discover -trends and patterns in your data. And as your data and query volume grows, the -distributed nature of {es} enables your deployment to grow seamlessly right -along with it. - -While not _every_ problem is a search problem, {es} offers speed and flexibility -to handle data in a wide variety of use cases: - -* Add a search box to an app or website -* Store and analyze logs, metrics, and security event data -* Use machine learning to automatically model the behavior of your data in real - time -* Use {es} as a vector database to create, store, and search vector embeddings -* Automate business workflows using {es} as a storage engine -* Manage, integrate, and analyze spatial information using {es} as a geographic - information system (GIS) -* Store and process genetic data using {es} as a bioinformatics research tool - -We’re continually amazed by the novel ways people use search. But whether -your use case is similar to one of these, or you're using {es} to tackle a new -problem, the way you work with your data, documents, and indices in {es} is -the same. + +{es-repo}[{es}] is a distributed search and analytics engine, scalable data store, and vector database built on Apache Lucene. +It's optimized for speed and relevance on production-scale workloads. +Use {es} to search, index, store, and analyze data of all shapes and sizes in near real time. + +[TIP] +==== +{es} has a lot of features. Explore the full list on the https://www.elastic.co/elasticsearch/features[product webpage^]. +==== + +{es} is the heart of the {estc-welcome-current}/stack-components.html[Elastic Stack] and powers the Elastic https://www.elastic.co/enterprise-search[Search], https://www.elastic.co/observability[Observability] and https://www.elastic.co/security[Security] solutions. + +{es} is used for a wide and growing range of use cases. Here are a few examples: + +* *Monitor log and event data*. Store logs, metrics, and event data for observability and security information and event management (SIEM). +* *Build search applications*. Add search capabilities to apps or websites, or build enterprise search engines over your organization's internal data sources. +* *Vector database*. Store and search vectorized data, and create vector embeddings with built-in and third-party natural language processing (NLP) models. +* *Retrieval augmented generation (RAG)*. Use {es} as a retrieval engine to augment Generative AI models. +* *Application and security monitoring*. Monitor and analyze application performance and security data effectively. +* *Machine learning*. Use {ml} to automatically model the behavior of your data in real-time. + +This is just a sample of search, observability, and security use cases enabled by {es}. +Refer to our https://www.elastic.co/customers/success-stories[customer success stories] for concrete examples across a range of industries. +// Link to demos, search labs chatbots + +[discrete] +[[elasticsearch-intro-elastic-stack]] +.What is the Elastic Stack? +******************************* +{es} is the core component of the Elastic Stack, a suite of products for collecting, storing, searching, and visualizing data. +https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current/stack-components.html[Learn more about the Elastic Stack]. +******************************* +// TODO: Remove once we've moved Stack Overview to a subpage? + +[discrete] +[[elasticsearch-intro-deploy]] +=== Deployment options + +To use {es}, you need a running instance of the {es} service. +You can deploy {es} in various ways: + +* <>. Get started quickly with a minimal local Docker setup. +* {cloud}/ec-getting-started-trial.html[*Elastic Cloud*]. {es} is available as part of our hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14 day free trial]. +* {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless* (technical preview)]. Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14 day free trial]. + +**Advanced deployment options** + +* <>. Install, configure, and run {es} on your own premises. +* {ece-ref}/Elastic-Cloud-Enterprise-overview.html[*Elastic Cloud Enterprise*]. Deploy Elastic Cloud on public or private clouds, virtual machines, or your own premises. +* {eck-ref}/k8s-overview.html[*Elastic Cloud on Kubernetes*]. Deploy Elastic Cloud on Kubernetes. + +[discrete] +[[elasticsearch-next-steps]] +=== Learn more + +Here are some resources to help you get started: + +* <>. A beginner's guide to deploying your first {es} instance, indexing data, and running queries. +* https://elastic.co/webinars/getting-started-elasticsearch[Webinar: Introduction to {es}]. Register for our live webinars to learn directly from {es} experts. +* https://www.elastic.co/search-labs[Elastic Search Labs]. Tutorials and blogs that explore AI-powered search using the latest {es} features. +** Follow our tutorial https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[to build a hybrid search solution in Python]. +** Check out the https://github.com/elastic/elasticsearch-labs?tab=readme-ov-file#elasticsearch-examples--apps[`elasticsearch-labs` repository] for a range of Python notebooks and apps for various use cases. [[documents-indices]] -=== Data in: documents and indices +=== Documents and indices {es} is a distributed document store. Instead of storing information as rows of columnar data, {es} stores complex data structures that have been serialized @@ -65,8 +93,7 @@ behavior makes it easy to index and explore your data--just start indexing documents and {es} will detect and map booleans, floating point and integer values, dates, and strings to the appropriate {es} data types. -Ultimately, however, you know more about your data and how you want to use it -than {es} can. You can define rules to control dynamic mapping and explicitly +You can define rules to control dynamic mapping and explicitly define mappings to take full control of how fields are stored and indexed. Defining your own mappings enables you to: @@ -89,7 +116,7 @@ used at search time. When you query a full-text field, the query text undergoes the same analysis before the terms are looked up in the index. [[search-analyze]] -=== Information out: search and analyze +=== Search and analyze While you can use {es} as a document store and retrieve documents and their metadata, the real power comes from being able to easily access the full suite @@ -160,27 +187,8 @@ size 70 needles, you’re displaying a count of the size 70 needles that match your users' search criteria--for example, all size 70 _non-stick embroidery_ needles. -[discrete] -[[more-features]] -===== But wait, there’s more - -Want to automate the analysis of your time series data? You can use -{ml-docs}/ml-ad-overview.html[machine learning] features to create accurate -baselines of normal behavior in your data and identify anomalous patterns. With -machine learning, you can detect: - -* Anomalies related to temporal deviations in values, counts, or frequencies -* Statistical rarity -* Unusual behaviors for a member of a population - -And the best part? You can do this without having to specify algorithms, models, -or other data science-related configurations. - [[scalability]] -=== Scalability and resilience: clusters, nodes, and shards -++++ -Scalability and resilience -++++ +=== Scalability and resilience {es} is built to be always available and to scale with your needs. It does this by being distributed by nature. You can add servers (nodes) to a cluster to @@ -209,7 +217,7 @@ interrupting indexing or query operations. [discrete] [[it-depends]] -==== It depends... +==== Shard size and number of shards There are a number of performance considerations and trade offs with respect to shard size and the number of primary shards configured for an index. The more @@ -237,7 +245,7 @@ testing with your own data and queries]. [discrete] [[disaster-ccr]] -==== In case of disaster +==== Disaster recovery A cluster's nodes need good, reliable connections to each other. To provide better connections, you typically co-locate the nodes in the same data center or @@ -257,7 +265,7 @@ secondary clusters are read-only followers. [discrete] [[admin]] -==== Care and feeding +==== Security, management, and monitoring As with any enterprise system, you need tools to secure, manage, and monitor your {es} clusters. Security, monitoring, and administrative features @@ -265,3 +273,5 @@ that are integrated into {es} enable you to use {kibana-ref}/introduction.html[{ as a control center for managing a cluster. Features like <> and <> help you intelligently manage your data over time. + +Refer to <> for more information. \ No newline at end of file diff --git a/docs/reference/search/search-your-data/near-real-time.asciidoc b/docs/reference/search/search-your-data/near-real-time.asciidoc index 46a996c237c38..47618ecd9fd7a 100644 --- a/docs/reference/search/search-your-data/near-real-time.asciidoc +++ b/docs/reference/search/search-your-data/near-real-time.asciidoc @@ -2,7 +2,7 @@ [[near-real-time]] === Near real-time search -The overview of <> indicates that when a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? +When a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? Lucene, the Java libraries on which {es} is based, introduced the concept of per-segment search. A _segment_ is similar to an inverted index, but the word _index_ in Lucene means "a collection of segments plus a commit point". After a commit, a new segment is added to the commit point and the buffer is cleared. From 320ccbc24748809feecc42df1f7bab6c4d6fd4cc Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:25:04 +0300 Subject: [PATCH 036/144] Reduce load for stress test to avoid oom (#112331) Fixes #112326 --- .../logsdb/datageneration/DataGeneratorTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java index db3b81891e87e..4a4ffca0f37aa 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java @@ -113,13 +113,13 @@ protected Collection getPlugins() { } public void testDataGeneratorStressTest() throws IOException { - // Let's generate 1000000 fields to test an extreme case (2 levels of objects + 1 leaf level with 100 fields per object). + // Let's generate 125000 fields to test an extreme case (2 levels of objects + 1 leaf level with 50 fields per object). var testChildFieldGenerator = new DataSourceResponse.ChildFieldGenerator() { private int generatedFields = 0; @Override public int generateChildFieldCount() { - return 100; + return 50; } @Override From 2c29a3ae0a6e743c2df72df5895e90aa56dd2683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 29 Aug 2024 12:43:10 +0200 Subject: [PATCH 037/144] [DOCS] Highlights auto-chunking in intro of semantic text. (#111836) --- docs/reference/mapping/types/semantic-text.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc index 522a0c54c8aad..a006f288dc66d 100644 --- a/docs/reference/mapping/types/semantic-text.asciidoc +++ b/docs/reference/mapping/types/semantic-text.asciidoc @@ -7,8 +7,8 @@ beta[] -The `semantic_text` field type automatically generates embeddings for text -content using an inference endpoint. +The `semantic_text` field type automatically generates embeddings for text content using an inference endpoint. +Long passages are <> to smaller sections to enable the processing of larger corpuses of text. The `semantic_text` field type specifies an inference endpoint identifier that will be used to generate embeddings. You can create the inference endpoint by using the <>. From 35fe3a9c47500ab21735f7c40f7184fb7d724f9c Mon Sep 17 00:00:00 2001 From: weizijun Date: Thu, 29 Aug 2024 19:46:58 +0800 Subject: [PATCH 038/144] some fixed (#112332) --- .../inference/service-alibabacloud-ai-search.asciidoc | 2 +- docs/reference/inference/service-amazon-bedrock.asciidoc | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc index df5220573d9e4..23a3d532635ac 100644 --- a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc +++ b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc @@ -25,7 +25,7 @@ include::inference-shared.asciidoc[tag=task-type] Available task types: * `text_embedding`, -* `sparse_embedding`. +* `sparse_embedding`, * `rerank`. -- diff --git a/docs/reference/inference/service-amazon-bedrock.asciidoc b/docs/reference/inference/service-amazon-bedrock.asciidoc index 4ffa368613a0e..dbffd5c26fbcc 100644 --- a/docs/reference/inference/service-amazon-bedrock.asciidoc +++ b/docs/reference/inference/service-amazon-bedrock.asciidoc @@ -122,14 +122,6 @@ Only available for `anthropic`, `cohere`, and `mistral` providers. Alternative to `temperature`. Limits samples to the top-K most likely words, balancing coherence and variability. Should not be used if `temperature` is specified. -===== -+ -.`task_settings` for the `text_embedding` task type -[%collapsible%closed] -===== - -There are no `task_settings` available for the `text_embedding` task type. - ===== [discrete] From b4c8fa362dc88d9d1220c7466ec2c0219a258433 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Thu, 29 Aug 2024 13:50:24 +0200 Subject: [PATCH 039/144] Reenable 26_aggs_bucket EsqlClientYamlIT (#112343) Reenable 26_aggs_bucket EsqlClientYamlIT, fixed in #111897. Fixes #111901, fixes #111902. --- muted-tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 508403ee6238c..e4c2f62d2617f 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -122,12 +122,6 @@ tests: - class: org.elasticsearch.xpack.restart.CoreFullClusterRestartIT method: testSnapshotRestore {cluster=UPGRADED} issue: https://github.com/elastic/elasticsearch/issues/111799 -- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT - method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval hourly: #110916}" - issue: https://github.com/elastic/elasticsearch/issues/111901 -- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT - method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval: monthly #110916}" - issue: https://github.com/elastic/elasticsearch/issues/111902 - class: org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT method: testScaledFloat issue: https://github.com/elastic/elasticsearch/issues/112003 From a97b0e226e3d7ea5e27eb565ae05d01ca22b06a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= Date: Thu, 29 Aug 2024 13:59:16 +0200 Subject: [PATCH 040/144] Fix test failures in ScriptScoreQueryTests (#112334) --- muted-tests.yml | 6 ------ .../elasticsearch/search/query/ScriptScoreQueryTests.java | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index e4c2f62d2617f..e80a39040a4ef 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -151,12 +151,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAsync issue: https://github.com/elastic/elasticsearch/issues/112212 -- class: org.elasticsearch.search.query.ScriptScoreQueryTests - method: testScriptTermStatsAvailable - issue: https://github.com/elastic/elasticsearch/issues/112278 -- class: org.elasticsearch.search.query.ScriptScoreQueryTests - method: testScriptTermStatsNotAvailable - issue: https://github.com/elastic/elasticsearch/issues/112290 - class: org.elasticsearch.search.retriever.rankdoc.RankDocsSortBuilderTests method: testEqualsAndHashcode issue: https://github.com/elastic/elasticsearch/issues/112312 diff --git a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java index d6b1da9f76b42..177968b9a1326 100644 --- a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java @@ -72,7 +72,7 @@ public void initSearcher() throws IOException { w.commit(); reader = DirectoryReader.open(w); searcher = newSearcher(reader); - leafReaderContext = reader.leaves().get(0); + leafReaderContext = searcher.getTopReaderContext().leaves().get(0); } @After From a69f8e19ed4513d552b24a655f45b38098336b26 Mon Sep 17 00:00:00 2001 From: Albert Zaharovits Date: Thu, 29 Aug 2024 15:09:28 +0300 Subject: [PATCH 041/144] Avoid redundant cluster state build (#112340) Avoid redundant cluster state build when creating index --- .../cluster/metadata/MetadataCreateIndexService.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index b5ee0ebd7e387..b1a19d99dcb19 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -1249,11 +1249,10 @@ static ClusterState clusterStateCreateIndex( ClusterBlocks.Builder blocks = createClusterBlocksBuilder(currentState, indexName, clusterBlocks); blocks.updateBlocks(indexMetadata); - ClusterState updatedState = ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) + .addAsNew(newMetadata.index(indexName)); - RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, updatedState.routingTable()) - .addAsNew(updatedState.metadata().index(indexName)); - return ClusterState.builder(updatedState).routingTable(routingTableBuilder.build()).build(); + return ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).routingTable(routingTableBuilder).build(); } static IndexMetadata buildIndexMetadata( From cefe358b4197332aca6b4d15d440851033134d61 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 29 Aug 2024 14:15:29 +0200 Subject: [PATCH 042/144] Fix DLS using runtime fields and synthetic source (#112341) Somewhat of a tortured test but applying the same fix from #112260 to synthetic source which was running into the same bug as a stored field source. --- docs/changelog/112341.yaml | 5 +++ .../lookup/SyntheticSourceProvider.java | 36 +++++------------- .../DocumentLevelSecurityRandomTests.java | 38 ++++++++++++++++++- 3 files changed, 52 insertions(+), 27 deletions(-) create mode 100644 docs/changelog/112341.yaml diff --git a/docs/changelog/112341.yaml b/docs/changelog/112341.yaml new file mode 100644 index 0000000000000..8f44b53ad9998 --- /dev/null +++ b/docs/changelog/112341.yaml @@ -0,0 +1,5 @@ +pr: 112341 +summary: Fix DLS using runtime fields and synthetic source +area: Authorization +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java index bccfc22dc7e95..a4549f0814a06 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java @@ -8,13 +8,14 @@ package org.elasticsearch.search.lookup; -import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.SourceLoader; import java.io.IOException; +import java.util.Map; // NB This is written under the assumption that individual segments are accessed by a single // thread, even if separate segments may be searched concurrently. If we ever implement @@ -22,7 +23,7 @@ class SyntheticSourceProvider implements SourceProvider { private final SourceLoader sourceLoader; - private volatile SyntheticSourceLeafLoader[] leafLoaders; + private final Map leaves = ConcurrentCollections.newConcurrentMap(); SyntheticSourceProvider(SourceLoader sourceLoader) { this.sourceLoader = sourceLoader; @@ -30,31 +31,14 @@ class SyntheticSourceProvider implements SourceProvider { @Override public Source getSource(LeafReaderContext ctx, int doc) throws IOException { - maybeInit(ctx); - if (leafLoaders[ctx.ord] == null) { - // individual segments are currently only accessed on one thread so there's no need - // for locking here. - leafLoaders[ctx.ord] = new SyntheticSourceLeafLoader(ctx); + final Object id = ctx.id(); + var provider = leaves.get(id); + if (provider == null) { + provider = new SyntheticSourceLeafLoader(ctx); + var existing = leaves.put(id, provider); + assert existing == null : "unexpected source provider [" + existing + "]"; } - return leafLoaders[ctx.ord].getSource(doc); - } - - private void maybeInit(LeafReaderContext ctx) { - if (leafLoaders == null) { - synchronized (this) { - if (leafLoaders == null) { - leafLoaders = new SyntheticSourceLeafLoader[findParentContext(ctx).leaves().size()]; - } - } - } - } - - private IndexReaderContext findParentContext(LeafReaderContext ctx) { - if (ctx.parent != null) { - return ctx.parent; - } - assert ctx.isTopLevel; - return ctx; + return provider.getSource(doc); } private class SyntheticSourceLeafLoader { diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java index fb74631970813..1bf7d89347755 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java @@ -144,6 +144,43 @@ public void testWithRuntimeFields() throws Exception { .endObject() ) ); + doTestWithRuntimeFieldsInTestIndex(); + } + + public void testWithRuntimeFieldsAndSyntheticSource() throws Exception { + assertAcked( + indicesAdmin().prepareCreate("test") + .setMapping( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .field("mode", "synthetic") + .endObject() + .startObject("runtime") + .startObject("field1") + .field("type", "keyword") + .endObject() + .startObject("field2") + .field("type", "keyword") + .endObject() + .endObject() + .startObject("properties") + .startObject("field1") + .field("type", "text") + .field("store", true) + .endObject() + .startObject("field2") + .field("type", "text") + .field("store", true) + .endObject() + .endObject() + .endObject() + ) + ); + doTestWithRuntimeFieldsInTestIndex(); + } + + private void doTestWithRuntimeFieldsInTestIndex() { List requests = new ArrayList<>(47); for (int i = 1; i <= 42; i++) { requests.add(prepareIndex("test").setSource("field1", "value1", "field2", "foo" + i)); @@ -158,5 +195,4 @@ public void testWithRuntimeFields() throws Exception { 42L ); } - } From 9387ce335757194da1986722a98f95338a45a873 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 13:16:37 +0100 Subject: [PATCH 043/144] Deduplicate unstable-cluster troubleshooting docs (#112333) We duplicated these docs in order to avoid breaking older links, but this makes it confusing and hard to link to the right copy of the information. This commit removes the duplication by replacing the docs at the old locations with stubs that link to the new locations. --- .../discovery/fault-detection.asciidoc | 295 +--------------- .../troubleshooting-unstable-cluster.asciidoc | 314 +++++++++++++++++- .../common/reference-docs-links.json | 4 +- 3 files changed, 321 insertions(+), 292 deletions(-) diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index d12985b70597c..21f4ae2317e6a 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -35,313 +35,30 @@ starting from the beginning of the cluster state update. Refer to [[cluster-fault-detection-troubleshooting]] ==== Troubleshooting an unstable cluster -//tag::troubleshooting[] -Normally, a node will only leave a cluster if deliberately shut down. If a node -leaves the cluster unexpectedly, it's important to address the cause. A cluster -in which nodes leave unexpectedly is unstable and can create several issues. -For instance: -* The cluster health may be yellow or red. - -* Some shards will be initializing and other shards may be failing. - -* Search, indexing, and monitoring operations may fail and report exceptions in -logs. - -* The `.security` index may be unavailable, blocking access to the cluster. - -* The master may appear busy due to frequent cluster state updates. - -To troubleshoot a cluster in this state, first ensure the cluster has a -<>. Next, focus on the nodes -unexpectedly leaving the cluster ahead of all other issues. It will not be -possible to solve other issues until the cluster has a stable master node and -stable node membership. - -Diagnostics and statistics are usually not useful in an unstable cluster. These -tools only offer a view of the state of the cluster at a single point in time. -Instead, look at the cluster logs to see the pattern of behaviour over time. -Focus particularly on logs from the elected master. When a node leaves the -cluster, logs for the elected master include a message like this (with line -breaks added to make it easier to read): - -[source,text] ----- -[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000] - node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}] - with reason [disconnected] ----- - -This message says that the `NodeLeftExecutor` on the elected master -(`instance-0000000000`) processed a `node-left` task, identifying the node that -was removed and the reason for its removal. When the node joins the cluster -again, logs for the elected master will include a message like this (with line -breaks added to make it easier to read): - -[source,text] ----- -[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000] - node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}] - with reason [joining after restart, removed [24s] ago with reason [disconnected]] ----- - -This message says that the `NodeJoinExecutor` on the elected master -(`instance-0000000000`) processed a `node-join` task, identifying the node that -was added to the cluster and the reason for the task. - -Other nodes may log similar messages, but report fewer details: - -[source,text] ----- -[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService] - [instance-0000000001] removed { - {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m} - {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv} - }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415} ----- - -These messages are not especially useful for troubleshooting, so focus on the -ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted -on the elected master and which contain more details. If you don't see the -messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that: - -* You're looking at the logs for the elected master node. - -* The logs cover the correct time period. - -* Logging is enabled at `INFO` level. - -Nodes will also log a message containing `master node changed` whenever they -start or stop following the elected master. You can use these messages to -determine each node's view of the state of the master over time. - -If a node restarts, it will leave the cluster and then join the cluster again. -When it rejoins, the `NodeJoinExecutor` will log that it processed a -`node-join` task indicating that the node is `joining after restart`. If a node -is unexpectedly restarting, look at the node's logs to see why it is shutting -down. - -The <> API on the affected node will also provide some useful -information about the situation. - -If the node did not restart then you should look at the reason for its -departure more closely. Each reason has different troubleshooting steps, -described below. There are three possible reasons: - -* `disconnected`: The connection from the master node to the removed node was -closed. - -* `lagging`: The master published a cluster state update, but the removed node -did not apply it within the permitted timeout. By default, this timeout is 2 -minutes. Refer to <> for information about the -settings which control this mechanism. - -* `followers check retry count exceeded`: The master sent a number of -consecutive health checks to the removed node. These checks were rejected or -timed out. By default, each health check times out after 10 seconds and {es} -removes the node removed after three consecutively failed health checks. Refer -to <> for information about the settings which -control this mechanism. +See <>. [discrete] ===== Diagnosing `disconnected` nodes -Nodes typically leave the cluster with reason `disconnected` when they shut -down, but if they rejoin the cluster without restarting then there is some -other problem. - -{es} is designed to run on a fairly reliable network. It opens a number of TCP -connections between nodes and expects these connections to remain open -<>. If a connection is closed then {es} will -try and reconnect, so the occasional blip may fail some in-flight operations -but should otherwise have limited impact on the cluster. In contrast, -repeatedly-dropped connections will severely affect its operation. - -The connections from the elected master node to every other node in the cluster -are particularly important. The elected master never spontaneously closes its -outbound connections to other nodes. Similarly, once an inbound connection is -fully established, a node never spontaneously it unless the node is shutting -down. - -If you see a node unexpectedly leave the cluster with the `disconnected` -reason, something other than {es} likely caused the connection to close. A -common cause is a misconfigured firewall with an improper timeout or another -policy that's <>. It could also -be caused by general connectivity issues, such as packet loss due to faulty -hardware or network congestion. If you're an advanced user, configure the -following loggers to get more detailed information about network exceptions: - -[source,yaml] ----- -logger.org.elasticsearch.transport.TcpTransport: DEBUG -logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG ----- - -If these logs do not show enough information to diagnose the problem, obtain a -packet capture simultaneously from the nodes at both ends of an unstable -connection and analyse it alongside the {es} logs from those nodes to determine -if traffic between the nodes is being disrupted by another device on the -network. +See <>. [discrete] ===== Diagnosing `lagging` nodes -{es} needs every node to process cluster state updates reasonably quickly. If a -node takes too long to process a cluster state update, it can be harmful to the -cluster. The master will remove these nodes with the `lagging` reason. Refer to -<> for information about the settings which control -this mechanism. - -Lagging is typically caused by performance issues on the removed node. However, -a node may also lag due to severe network delays. To rule out network delays, -ensure that `net.ipv4.tcp_retries2` is <>. Log messages that contain `warn threshold` may provide more -information about the root cause. - -If you're an advanced user, you can get more detailed information about what -the node was doing when it was removed by configuring the following logger: - -[source,yaml] ----- -logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG ----- - -When this logger is enabled, {es} will attempt to run the -<> API on the faulty node and report the results in -the logs on the elected master. The results are compressed, encoded, and split -into chunks to avoid truncation: - -[source,text] ----- -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) ----- - -To reconstruct the output, base64-decode the data and decompress it using -`gzip`. For instance, on Unix-like systems: - -[source,sh] ----- -cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ----- +See <>. [discrete] ===== Diagnosing `follower check retry count exceeded` nodes -Nodes sometimes leave the cluster with reason `follower check retry count -exceeded` when they shut down, but if they rejoin the cluster without -restarting then there is some other problem. - -{es} needs every node to respond to network messages successfully and -reasonably quickly. If a node rejects requests or does not respond at all then -it can be harmful to the cluster. If enough consecutive checks fail then the -master will remove the node with reason `follower check retry count exceeded` -and will indicate in the `node-left` message how many of the consecutive -unsuccessful checks failed and how many of them timed out. Refer to -<> for information about the settings which control -this mechanism. - -Timeouts and failures may be due to network delays or performance problems on -the affected nodes. Ensure that `net.ipv4.tcp_retries2` is -<> to eliminate network delays as -a possible cause for this kind of instability. Log messages containing -`warn threshold` may give further clues about the cause of the instability. - -If the last check failed with an exception then the exception is reported, and -typically indicates the problem that needs to be addressed. If any of the -checks timed out then narrow down the problem as follows. - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm] - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection] - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads] - -By default the follower checks will time out after 30s, so if node departures -are unpredictable then capture stack dumps every 15s to be sure that at least -one stack dump was taken at the right time. +See <>. [discrete] ===== Diagnosing `ShardLockObtainFailedException` failures -If a node leaves and rejoins the cluster then {es} will usually shut down and -re-initialize its shards. If the shards do not shut down quickly enough then -{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. - -To gather more information about the reason for shards shutting down slowly, -configure the following logger: - -[source,yaml] ----- -logger.org.elasticsearch.env.NodeEnvironment: DEBUG ----- - -When this logger is enabled, {es} will attempt to run the -<> API whenever it encounters a -`ShardLockObtainFailedException`. The results are compressed, encoded, and -split into chunks to avoid truncation: - -[source,text] ----- -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) ----- - -To reconstruct the output, base64-decode the data and decompress it using -`gzip`. For instance, on Unix-like systems: - -[source,sh] ----- -cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ----- +See <>. [discrete] ===== Diagnosing other network disconnections -{es} is designed to run on a fairly reliable network. It opens a number of TCP -connections between nodes and expects these connections to remain open -<>. If a connection is closed then {es} will -try and reconnect, so the occasional blip may fail some in-flight operations -but should otherwise have limited impact on the cluster. In contrast, -repeatedly-dropped connections will severely affect its operation. - -{es} nodes will only actively close an outbound connection to another node if -the other node leaves the cluster. See -<> for further information about -identifying and troubleshooting this situation. If an outbound connection -closes for some other reason, nodes will log a message such as the following: - -[source,text] ----- -[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote ----- - -Similarly, once an inbound connection is fully established, a node never -spontaneously closes it unless the node is shutting down. - -Therefore if you see a node report that a connection to another node closed -unexpectedly, something other than {es} likely caused the connection to close. -A common cause is a misconfigured firewall with an improper timeout or another -policy that's <>. It could also -be caused by general connectivity issues, such as packet loss due to faulty -hardware or network congestion. If you're an advanced user, configure the -following loggers to get more detailed information about network exceptions: - -[source,yaml] ----- -logger.org.elasticsearch.transport.TcpTransport: DEBUG -logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG ----- - -If these logs do not show enough information to diagnose the problem, obtain a -packet capture simultaneously from the nodes at both ends of an unstable -connection and analyse it alongside the {es} logs from those nodes to determine -if traffic between the nodes is being disrupted by another device on the -network. -//end::troubleshooting[] +See <>. diff --git a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc index 387ebcdcd43c0..cbb35f7731034 100644 --- a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc +++ b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc @@ -1,4 +1,316 @@ [[troubleshooting-unstable-cluster]] == Troubleshooting an unstable cluster -include::../modules/discovery/fault-detection.asciidoc[tag=troubleshooting,leveloffset=-2] \ No newline at end of file +Normally, a node will only leave a cluster if deliberately shut down. If a node +leaves the cluster unexpectedly, it's important to address the cause. A cluster +in which nodes leave unexpectedly is unstable and can create several issues. +For instance: + +* The cluster health may be yellow or red. + +* Some shards will be initializing and other shards may be failing. + +* Search, indexing, and monitoring operations may fail and report exceptions in +logs. + +* The `.security` index may be unavailable, blocking access to the cluster. + +* The master may appear busy due to frequent cluster state updates. + +To troubleshoot a cluster in this state, first ensure the cluster has a +<>. Next, focus on the nodes +unexpectedly leaving the cluster ahead of all other issues. It will not be +possible to solve other issues until the cluster has a stable master node and +stable node membership. + +Diagnostics and statistics are usually not useful in an unstable cluster. These +tools only offer a view of the state of the cluster at a single point in time. +Instead, look at the cluster logs to see the pattern of behaviour over time. +Focus particularly on logs from the elected master. When a node leaves the +cluster, logs for the elected master include a message like this (with line +breaks added to make it easier to read): + +[source,text] +---- +[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000] + node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}] + with reason [disconnected] +---- + +This message says that the `NodeLeftExecutor` on the elected master +(`instance-0000000000`) processed a `node-left` task, identifying the node that +was removed and the reason for its removal. When the node joins the cluster +again, logs for the elected master will include a message like this (with line +breaks added to make it easier to read): + +[source,text] +---- +[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000] + node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}] + with reason [joining after restart, removed [24s] ago with reason [disconnected]] +---- + +This message says that the `NodeJoinExecutor` on the elected master +(`instance-0000000000`) processed a `node-join` task, identifying the node that +was added to the cluster and the reason for the task. + +Other nodes may log similar messages, but report fewer details: + +[source,text] +---- +[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService] + [instance-0000000001] removed { + {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m} + {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv} + }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415} +---- + +These messages are not especially useful for troubleshooting, so focus on the +ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted +on the elected master and which contain more details. If you don't see the +messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that: + +* You're looking at the logs for the elected master node. + +* The logs cover the correct time period. + +* Logging is enabled at `INFO` level. + +Nodes will also log a message containing `master node changed` whenever they +start or stop following the elected master. You can use these messages to +determine each node's view of the state of the master over time. + +If a node restarts, it will leave the cluster and then join the cluster again. +When it rejoins, the `NodeJoinExecutor` will log that it processed a +`node-join` task indicating that the node is `joining after restart`. If a node +is unexpectedly restarting, look at the node's logs to see why it is shutting +down. + +The <> API on the affected node will also provide some useful +information about the situation. + +If the node did not restart then you should look at the reason for its +departure more closely. Each reason has different troubleshooting steps, +described below. There are three possible reasons: + +* `disconnected`: The connection from the master node to the removed node was +closed. + +* `lagging`: The master published a cluster state update, but the removed node +did not apply it within the permitted timeout. By default, this timeout is 2 +minutes. Refer to <> for information about the +settings which control this mechanism. + +* `followers check retry count exceeded`: The master sent a number of +consecutive health checks to the removed node. These checks were rejected or +timed out. By default, each health check times out after 10 seconds and {es} +removes the node removed after three consecutively failed health checks. Refer +to <> for information about the settings which +control this mechanism. + +[discrete] +[[troubleshooting-unstable-cluster-disconnected]] +=== Diagnosing `disconnected` nodes + +Nodes typically leave the cluster with reason `disconnected` when they shut +down, but if they rejoin the cluster without restarting then there is some +other problem. + +{es} is designed to run on a fairly reliable network. It opens a number of TCP +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. + +The connections from the elected master node to every other node in the cluster +are particularly important. The elected master never spontaneously closes its +outbound connections to other nodes. Similarly, once an inbound connection is +fully established, a node never spontaneously it unless the node is shutting +down. + +If you see a node unexpectedly leave the cluster with the `disconnected` +reason, something other than {es} likely caused the connection to close. A +common cause is a misconfigured firewall with an improper timeout or another +policy that's <>. It could also +be caused by general connectivity issues, such as packet loss due to faulty +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: + +[source,yaml] +---- +logger.org.elasticsearch.transport.TcpTransport: DEBUG +logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG +---- + +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. + +[discrete] +[[troubleshooting-unstable-cluster-lagging]] +=== Diagnosing `lagging` nodes + +{es} needs every node to process cluster state updates reasonably quickly. If a +node takes too long to process a cluster state update, it can be harmful to the +cluster. The master will remove these nodes with the `lagging` reason. Refer to +<> for information about the settings which control +this mechanism. + +Lagging is typically caused by performance issues on the removed node. However, +a node may also lag due to severe network delays. To rule out network delays, +ensure that `net.ipv4.tcp_retries2` is <>. Log messages that contain `warn threshold` may provide more +information about the root cause. + +If you're an advanced user, you can get more detailed information about what +the node was doing when it was removed by configuring the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API on the faulty node and report the results in +the logs on the elected master. The results are compressed, encoded, and split +into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- + +[discrete] +[[troubleshooting-unstable-cluster-follower-check]] +=== Diagnosing `follower check retry count exceeded` nodes + +Nodes sometimes leave the cluster with reason `follower check retry count +exceeded` when they shut down, but if they rejoin the cluster without +restarting then there is some other problem. + +{es} needs every node to respond to network messages successfully and +reasonably quickly. If a node rejects requests or does not respond at all then +it can be harmful to the cluster. If enough consecutive checks fail then the +master will remove the node with reason `follower check retry count exceeded` +and will indicate in the `node-left` message how many of the consecutive +unsuccessful checks failed and how many of them timed out. Refer to +<> for information about the settings which control +this mechanism. + +Timeouts and failures may be due to network delays or performance problems on +the affected nodes. Ensure that `net.ipv4.tcp_retries2` is +<> to eliminate network delays as +a possible cause for this kind of instability. Log messages containing +`warn threshold` may give further clues about the cause of the instability. + +If the last check failed with an exception then the exception is reported, and +typically indicates the problem that needs to be addressed. If any of the +checks timed out then narrow down the problem as follows. + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm] + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection] + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads] + +By default the follower checks will time out after 30s, so if node departures +are unpredictable then capture stack dumps every 15s to be sure that at least +one stack dump was taken at the right time. + +[discrete] +[[troubleshooting-unstable-cluster-shardlockobtainfailedexception]] +=== Diagnosing `ShardLockObtainFailedException` failures + +If a node leaves and rejoins the cluster then {es} will usually shut down and +re-initialize its shards. If the shards do not shut down quickly enough then +{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. + +To gather more information about the reason for shards shutting down slowly, +configure the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.env.NodeEnvironment: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API whenever it encounters a +`ShardLockObtainFailedException`. The results are compressed, encoded, and +split into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- + +[discrete] +[[troubleshooting-unstable-cluster-network]] +=== Diagnosing other network disconnections + +{es} is designed to run on a fairly reliable network. It opens a number of TCP +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. + +{es} nodes will only actively close an outbound connection to another node if +the other node leaves the cluster. See +<> for further information about +identifying and troubleshooting this situation. If an outbound connection +closes for some other reason, nodes will log a message such as the following: + +[source,text] +---- +[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote +---- + +Similarly, once an inbound connection is fully established, a node never +spontaneously closes it unless the node is shutting down. + +Therefore if you see a node report that a connection to another node closed +unexpectedly, something other than {es} likely caused the connection to close. +A common cause is a misconfigured firewall with an improper timeout or another +policy that's <>. It could also +be caused by general connectivity issues, such as packet loss due to faulty +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: + +[source,yaml] +---- +logger.org.elasticsearch.transport.TcpTransport: DEBUG +logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG +---- + +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index 3eb8939c22a65..cc0bc5e2257c8 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -2,8 +2,8 @@ "INITIAL_MASTER_NODES": "important-settings.html#initial_master_nodes", "DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html", "UNSTABLE_CLUSTER_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html", - "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_lagging_nodes_2", - "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_shardlockobtainfailedexception_failures_2", + "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-lagging", + "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-shardlockobtainfailedexception", "CONCURRENT_REPOSITORY_WRITERS": "diagnosing-corrupted-repositories.html", "ARCHIVE_INDICES": "archive-indices.html", "HTTP_TRACER": "modules-network.html#http-rest-request-tracer", From 5ac4d8c71e06880624a9a91bfec4ae310d9cab2f Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Thu, 29 Aug 2024 14:48:15 +0200 Subject: [PATCH 044/144] Fix union-types where one index is missing the field (#111932) * Fix union-types where one index is missing the field When none of the indexes has the field, a validation error is correctly thrown, and when all indexes have the field, union-types works as normal. But when some indexes have the field and some do not, we were getting and internal error. We treat this case similarly to when some documents are missing the field, in which case `null` values are produced. So now a multi-index query where some indexes are missing the field will produce nulls for the documents coming from those indexes. * Update docs/changelog/111932.yaml * Added capability for this fix (missing-field) --- docs/changelog/111932.yaml | 6 ++ .../xpack/esql/CsvTestsDataLoader.java | 6 ++ .../mapping-missing_ip_sample_data.json | 13 ++++ .../main/resources/missing_ip_sample_data.csv | 8 +++ .../src/main/resources/union_types.csv-spec | 68 +++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 5 ++ .../planner/EsPhysicalOperationProviders.java | 4 +- 7 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/111932.yaml create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv diff --git a/docs/changelog/111932.yaml b/docs/changelog/111932.yaml new file mode 100644 index 0000000000000..ce840ecebcff0 --- /dev/null +++ b/docs/changelog/111932.yaml @@ -0,0 +1,6 @@ +pr: 111932 +summary: Fix union-types where one index is missing the field +area: ES|QL +type: bug +issues: + - 111912 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index b20e3bb0d5409..9ee22113a4244 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -68,6 +68,11 @@ public class CsvTestsDataLoader { "mapping-sample_data_ts_long.json", "sample_data_ts_long.csv" ); + private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset( + "missing_ip_sample_data", + "mapping-missing_ip_sample_data.json", + "missing_ip_sample_data.csv" + ); private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv"); private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv"); private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv"); @@ -112,6 +117,7 @@ public class CsvTestsDataLoader { Map.entry(ALERTS.indexName, ALERTS), Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), + Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json new file mode 100644 index 0000000000000..6f3796dd7715d --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json @@ -0,0 +1,13 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv new file mode 100644 index 0000000000000..e8e9ddcaee83b --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv @@ -0,0 +1,8 @@ +@timestamp:date,event_duration:long,message:keyword +2023-10-23T13:55:01.543Z,1756467,Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z,5033755,Connection error +2023-10-23T13:52:55.015Z,8268153,Connection error +2023-10-23T13:51:54.732Z,725448,Connection error +2023-10-23T13:33:34.937Z,1232382,Disconnected +2023-10-23T12:27:28.948Z,2764889,Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z,3450233,Connected to 10.1.0.3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index 6819727be0131..c6a2d47a78dc9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -405,6 +405,74 @@ count:long | message:keyword 2 | Connected to 10.1.0.3 ; +multiIndexMissingIpToString +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_STRING(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:keyword | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexMissingIpToIp +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + multiIndexTsLong required_capability: union_types required_capability: metadata_fields diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 81b2ba71b8808..120323ebeb7a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -183,6 +183,11 @@ public enum Cap { */ UNION_TYPES_FIX_RENAME_RESOLUTION, + /** + * Fix for union-types when some indexes are missing the required field. Done in #111932. + */ + UNION_TYPES_MISSING_FIELD, + /** * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles. * see Parsing large numbers is inconsistent #104323 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 8fddb7407a02a..04be731484267 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -138,7 +138,9 @@ private BlockLoader getBlockLoaderFor( if (unionTypes != null) { String indexName = shardContext.ctx.index().getName(); Expression conversion = unionTypes.getConversionExpressionForIndex(indexName); - return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); + return conversion == null + ? BlockLoader.CONSTANT_NULLS + : new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); } return blockLoader; } From 5c200afb9e3a02400cfdf45c4469c30bd1417223 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Thu, 29 Aug 2024 09:12:03 -0400 Subject: [PATCH 045/144] [ML] Adds Explain Functionality to LTR Rescoring (#112155) --- .../integration/LearningToRankRescorerIT.java | 433 ++++++++++-------- .../inference/ltr/LearningToRankRescorer.java | 55 ++- 2 files changed, 303 insertions(+), 185 deletions(-) diff --git a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java index b2a0b60aed7ba..4a703117c6551 100644 --- a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java +++ b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java @@ -16,7 +16,9 @@ import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import java.util.Map; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -28,189 +30,11 @@ public class LearningToRankRescorerIT extends InferenceTestCase { @Before public void setupModelAndData() throws IOException { - putRegressionModel(MODEL_ID, """ - { - "description": "super complex model for tests", - "inference_config": { - "learning_to_rank": { - "feature_extractors": [ - { - "query_extractor": { - "feature_name": "cost", - "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}} - } - }, - { - "query_extractor": { - "feature_name": "type_tv", - "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "type_vcr", - "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "type_laptop", - "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "two", - "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } } - } - }, - { - "query_extractor": { - "feature_name": "product_bm25", - "query": { "term": { "product": "{{keyword}}" } } - } - } - ] - } - }, - "definition": { - "trained_model": { - "ensemble": { - "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"], - "target_type": "regression", - "trained_models": [ - { - "tree": { - "feature_names": [ - "cost" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 400, - "decision_type": "lte", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 5.0 - }, - { - "node_index": 2, - "leaf_value": 2.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "type_tv" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 12.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "two" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 2.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "product_bm25" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 4.0 - } - ], - "target_type": "regression" - } - } - ] - } - } - } - } - """); - createIndex(INDEX_NAME, Settings.EMPTY, """ - "properties":{ - "product":{"type": "keyword"}, - "cost":{"type": "integer"} - }"""); - indexData("{ \"product\": \"TV\", \"cost\": 300}"); - indexData("{ \"product\": \"TV\", \"cost\": 400}"); - indexData("{ \"product\": \"TV\", \"cost\": 600}"); - indexData("{ \"product\": \"VCR\", \"cost\": 15}"); - indexData("{ \"product\": \"VCR\", \"cost\": 350}"); - indexData("{ \"product\": \"VCR\", \"cost\": 580}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 100}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 300}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 500}"); + putRegressionModel(MODEL_ID, testRegressionModel); + createIndex(INDEX_NAME, Settings.EMPTY, testIndexDefinition); + for (String testDataItem : testIndexData) { + indexData(testDataItem); + } adminClient().performRequest(new Request("POST", INDEX_NAME + "/_refresh")); } @@ -249,6 +73,19 @@ public void testLearningToRankRescore() throws Exception { assertHitScores(client().performRequest(request), List.of(9.0, 9.0, 6.0)); } + public void testLearningToRankRescoreWithExplain() throws Exception { + Request request = new Request("GET", "store/_search?size=3&explain=true&error_trace"); + request.setJsonEntity(""" + { + "rescore": { + "window_size": 10, + "learning_to_rank": { "model_id": "ltr-model" } + } + }"""); + var response = client().performRequest(request); + assertExplainExtractedFeatures(response, List.of("type_tv", "cost", "two")); + } + public void testLearningToRankRescoreSmallWindow() throws Exception { Request request = new Request("GET", "store/_search?size=5"); request.setJsonEntity(""" @@ -336,4 +173,234 @@ private void indexData(String data) throws IOException { private static void assertHitScores(Response response, List expectedScores) throws IOException { assertThat((List) XContentMapValues.extractValue("hits.hits._score", responseAsMap(response)), equalTo(expectedScores)); } + + @SuppressWarnings("unchecked") + private static void assertExplainExtractedFeatures(Response response, List expectedFeatures) throws IOException { + var explainValues = (ArrayList>) XContentMapValues.extractValue( + "hits.hits._explanation", + responseAsMap(response) + ); + + assertThat(explainValues.size(), equalTo(3)); + for (Map hit : explainValues) { + assertThat(hit.get("description"), equalTo("rescored using LTR model ltr-model")); + + var queryDetails = (ArrayList>) hit.get("details"); + assertThat(queryDetails.size(), equalTo(2)); + + assertThat(queryDetails.get(0).get("description"), equalTo("first pass query score")); + assertThat(queryDetails.get(1).get("description"), equalTo("extracted features")); + + var featureDetails = new ArrayList<>((ArrayList>) queryDetails.get(1).get("details")); + assertThat(featureDetails.size(), equalTo(3)); + + var missingKeys = new ArrayList(); + for (String expectedFeature : expectedFeatures) { + var expectedDescription = Strings.format("feature value for [%s]", expectedFeature); + + var wasFound = false; + for (Map detailItem : featureDetails) { + if (detailItem.get("description").equals(expectedDescription)) { + featureDetails.remove(detailItem); + wasFound = true; + break; + } + } + + if (wasFound == false) { + missingKeys.add(expectedFeature); + } + } + + assertThat(Strings.format("Could not find features: [%s]", String.join(", ", missingKeys)), featureDetails.size(), equalTo(0)); + } + } + + private static String testIndexDefinition = """ + "properties":{ + "product":{"type": "keyword"}, + "cost":{"type": "integer"} + }"""; + + private static List testIndexData = List.of( + "{ \"product\": \"TV\", \"cost\": 300}", + "{ \"product\": \"TV\", \"cost\": 400}", + "{ \"product\": \"TV\", \"cost\": 600}", + "{ \"product\": \"VCR\", \"cost\": 15}", + "{ \"product\": \"VCR\", \"cost\": 350}", + "{ \"product\": \"VCR\", \"cost\": 580}", + "{ \"product\": \"Laptop\", \"cost\": 100}", + "{ \"product\": \"Laptop\", \"cost\": 300}", + "{ \"product\": \"Laptop\", \"cost\": 500}" + ); + + private static String testRegressionModel = """ + { + "description": "super complex model for tests", + "inference_config": { + "learning_to_rank": { + "feature_extractors": [ + { + "query_extractor": { + "feature_name": "cost", + "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}} + } + }, + { + "query_extractor": { + "feature_name": "type_tv", + "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "type_vcr", + "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "type_laptop", + "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "two", + "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } } + } + }, + { + "query_extractor": { + "feature_name": "product_bm25", + "query": { "term": { "product": "{{keyword}}" } } + } + } + ] + } + }, + "definition": { + "trained_model": { + "ensemble": { + "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"], + "target_type": "regression", + "trained_models": [ + { + "tree": { + "feature_names": [ + "cost" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 400, + "decision_type": "lte", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 5.0 + }, + { + "node_index": 2, + "leaf_value": 2.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "type_tv" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 12.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "two" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 2.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "product_bm25" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 4.0 + } + ], + "target_type": "regression" + } + } + ] + } + } + } + } + """; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java index 8a310ba2719f2..70d0b980bb3bf 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java @@ -28,6 +28,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import static java.util.stream.Collectors.toUnmodifiableSet; @@ -129,8 +130,58 @@ public TopDocs rescore(TopDocs topDocs, IndexSearcher searcher, RescoreContext r @Override public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext, Explanation sourceExplanation) throws IOException { - // TODO: Call infer again but with individual feature importance values and explaining the model (which features are used, etc.) - return null; + if (sourceExplanation == null) { + return Explanation.noMatch("no match found"); + } + + LearningToRankRescorerContext ltrContext = (LearningToRankRescorerContext) rescoreContext; + LocalModel localModelDefinition = ltrContext.regressionModelDefinition; + + if (localModelDefinition == null) { + throw new IllegalStateException("local model reference is null, missing rewriteAndFetch before rescore phase?"); + } + + List leaves = ltrContext.executionContext.searcher().getIndexReader().leaves(); + + int endDoc = 0; + int readerUpto = -1; + LeafReaderContext currentSegment = null; + + while (topLevelDocId >= endDoc) { + readerUpto++; + currentSegment = leaves.get(readerUpto); + endDoc = currentSegment.docBase + currentSegment.reader().maxDoc(); + } + + assert currentSegment != null : "Unexpected null segment"; + + int targetDoc = topLevelDocId - currentSegment.docBase; + + List featureExtractors = ltrContext.buildFeatureExtractors(searcher); + int featureSize = featureExtractors.stream().mapToInt(fe -> fe.featureNames().size()).sum(); + + Map features = Maps.newMapWithExpectedSize(featureSize); + + for (FeatureExtractor featureExtractor : featureExtractors) { + featureExtractor.setNextReader(currentSegment); + featureExtractor.addFeatures(features, targetDoc); + } + + // Predicting the value + var ltrScore = ((Number) localModelDefinition.inferLtr(features, ltrContext.learningToRankConfig).predictedValue()).floatValue(); + + List featureExplanations = new ArrayList<>(); + for (String featureName : features.keySet()) { + Number featureValue = Objects.requireNonNullElse((Number) features.get(featureName), 0); + featureExplanations.add(Explanation.match(featureValue, "feature value for [" + featureName + "]")); + } + + return Explanation.match( + ltrScore, + "rescored using LTR model " + ltrContext.regressionModelDefinition.getModelId(), + Explanation.match(sourceExplanation.getValue(), "first pass query score", sourceExplanation), + Explanation.match(0f, "extracted features", featureExplanations) + ); } /** Returns a new {@link TopDocs} with the topN from the incoming one, or the same TopDocs if the number of hits is already <= From e36b5551ab418860e9db84cdaa96dc2e0df30d6b Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 29 Aug 2024 09:44:15 -0400 Subject: [PATCH 046/144] ESQL: Method to convert BooleanBlock to a "mask" (#112253) This adds a method, `BooleanBlock#toMask` to convert `BooleanBlock`s into a "mask" for use with `keepMask`. --- .../compute/data/BooleanArrayBlock.java | 21 ++++++ .../compute/data/BooleanBigArrayBlock.java | 21 ++++++ .../compute/data/BooleanBlock.java | 7 ++ .../compute/data/BooleanVectorBlock.java | 6 ++ .../compute/data/ConstantNullBlock.java | 5 ++ .../elasticsearch/compute/data/ToMask.java | 22 +++++++ .../compute/data/X-ArrayBlock.java.st | 22 +++++++ .../compute/data/X-BigArrayBlock.java.st | 23 +++++++ .../compute/data/X-Block.java.st | 10 ++- .../compute/data/X-Vector.java.st | 2 +- .../compute/data/X-VectorBlock.java.st | 9 ++- .../compute/data/BasicBlockTests.java | 41 +++++++++++- .../data/BigArrayBlockBuilderTests.java | 66 +++++++++++++++++++ .../compute/data/BigArrayVectorTests.java | 6 ++ .../compute/data/BlockMultiValuedTests.java | 49 ++++++++++++++ 15 files changed, 306 insertions(+), 4 deletions(-) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java index 14f6c9591ed10..3d600bec1bd65 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java @@ -85,6 +85,27 @@ public BooleanVector asVector() { return null; } + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java index 5342728af4fee..f353512eb93b7 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java @@ -86,6 +86,27 @@ public BooleanVector asVector() { return null; } + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java index 566b8fbed445c..5d2d6c97a11f1 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java @@ -37,6 +37,13 @@ public sealed interface BooleanBlock extends Block permits BooleanArrayBlock, Bo @Override BooleanVector asVector(); + /** + * Convert this to a {@link BooleanVector "mask"} that's appropriate for + * passing to {@link #keepMask}. Null and multivalued positions will be + * converted to {@code false}. + */ + ToMask toMask(); + @Override BooleanBlock filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java index ca2fc58bf0bb5..1544cc3355cd0 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java @@ -31,6 +31,12 @@ public BooleanVector asVector() { return vector; } + @Override + public ToMask toMask() { + vector.incRef(); + return new ToMask(vector, false); + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index fc4cdc1d41f46..3d61613ba70e9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -48,6 +48,11 @@ public OrdinalBytesRefBlock asOrdinals() { return null; } + @Override + public ToMask toMask() { + return new ToMask(blockFactory.newConstantBooleanVector(false, positionCount), false); + } + @Override public boolean isNull(int position) { return true; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java new file mode 100644 index 0000000000000..5b71679048e21 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data; + +import org.elasticsearch.core.Releasable; + +/** + * Result from calling {@link BooleanBlock#toMask}. {@link #close closing} this will + * close the contained {@link #mask()}. If you want to keep a reference to it then you'll + * have to {@link Block#incRef()} it. + */ +public record ToMask(BooleanVector mask, boolean hadMultivaluedFields) implements Releasable { + @Override + public void close() { + mask.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st index 750de95e7b8d7..e855e6d6296d8 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st @@ -101,6 +101,28 @@ $if(BytesRef)$ public OrdinalBytesRefBlock asOrdinals() { return null; } + +$elseif(boolean)$ + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } $endif$ @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st index bf9e6fec18726..23632bf41349c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st @@ -86,6 +86,29 @@ public final class $Type$BigArrayBlock extends AbstractArrayBlock implements $Ty return null; } +$if(boolean)$ + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } +$endif$ + @Override public $type$ get$Type$(int valueIndex) { return vector.get$Type$(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st index da0769af2d185..67e4ac4bb334f 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st @@ -63,8 +63,16 @@ $if(BytesRef)$ * returns null. Callers must not release the returned block as no extra reference is retained by this method. */ OrdinalBytesRefBlock asOrdinals(); -$endif$ +$elseif(boolean)$ + /** + * Convert this to a {@link BooleanVector "mask"} that's appropriate for + * passing to {@link #keepMask}. Null and multivalued positions will be + * converted to {@code false}. + */ + ToMask toMask(); + +$endif$ @Override $Type$Block filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st index 09f11f3504393..e19c1788cdb6b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st @@ -51,8 +51,8 @@ $if(BytesRef)$ * returns null. Callers must not release the returned vector as no extra reference is retained by this method. */ OrdinalBytesRefVector asOrdinals(); -$endif$ +$endif$ @Override $Type$Vector filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st index eec75f62f22f8..d4c6859e64b2a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st @@ -44,8 +44,15 @@ $if(BytesRef)$ return null; } } -$endif$ +$elseif(boolean)$ + @Override + public ToMask toMask() { + vector.incRef(); + return new ToMask(vector, false); + } + +$endif$ @Override $if(BytesRef)$ public BytesRef getBytesRef(int valueIndex, BytesRef dest) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java index e8401048af011..ad372da47d6b8 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java @@ -800,6 +800,12 @@ public void testBooleanBlock() { } assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); assertEmptyLookup(blockFactory, block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(p % 10 == 0)); + } + } try (BooleanBlock.Builder blockBuilder = blockFactory.newBooleanBlockBuilder(1)) { BooleanBlock copy = blockBuilder.copyFrom(block, 0, block.getPositionCount()).build(); @@ -826,6 +832,7 @@ public void testBooleanBlock() { IntStream.range(0, positionCount).mapToObj(ii -> randomBoolean()).forEach(vectorBuilder::appendBoolean); BooleanVector vector = vectorBuilder.build(); assertSingleValueDenseBlock(vector.asBlock()); + assertToMask(vector); releaseAndAssertBreaker(vector.asBlock()); } } @@ -1358,6 +1365,19 @@ void assertNullValues( assertTrue(block.isNull(randomNullPosition)); assertFalse(block.isNull(randomNonNullPosition)); releaseAndAssertBreaker(block); + if (block instanceof BooleanBlock bb) { + try (ToMask mask = bb.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(nullsMask.get(p) == false && p % 10 == 0)); + } + } + } + } + + void assertZeroPositionsAndRelease(BooleanBlock block) { + assertToMaskZeroPositions(block); + assertZeroPositionsAndRelease((Block) block); } void assertZeroPositionsAndRelease(Block block) { @@ -1366,6 +1386,11 @@ void assertZeroPositionsAndRelease(Block block) { releaseAndAssertBreaker(block); } + void assertZeroPositionsAndRelease(BooleanVector vector) { + assertToMask(vector); + assertZeroPositionsAndRelease((Vector) vector); + } + void assertZeroPositionsAndRelease(Vector vector) { assertThat(vector.getPositionCount(), is(0)); assertKeepMaskEmpty(vector); @@ -1386,6 +1411,20 @@ static void assertKeepMaskEmpty(Vector vector) { } } + static void assertToMaskZeroPositions(BooleanBlock block) { + try (ToMask mask = block.toMask()) { + assertThat(mask.mask().getPositionCount(), equalTo(0)); + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + } + } + + static void assertToMask(BooleanVector vector) { + try (ToMask mask = vector.asBlock().toMask()) { + assertThat(mask.mask(), sameInstance(vector)); + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + } + } + void releaseAndAssertBreaker(Block... blocks) { assertThat(breaker.getUsed(), greaterThan(0L)); Page[] pages = Arrays.stream(blocks).map(Page::new).toArray(Page[]::new); @@ -1836,7 +1875,7 @@ static void assertKeepMask(Block block) { /** * Build a random valid "mask" of single valued boolean fields that. */ - private static BooleanVector randomMask(int positions) { + static BooleanVector randomMask(int positions) { try (BooleanVector.Builder builder = TestBlockFactory.getNonBreakingInstance().newBooleanVectorFixedBuilder(positions)) { for (int i = 0; i < positions; i++) { builder.appendBoolean(randomBoolean()); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java index df32dcaddd927..34d591cd87d84 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java @@ -164,6 +164,12 @@ public void testBooleanVector() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(elements[p])); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanVectorBlock.class)); assertThat(block.asVector(), instanceOf(BooleanArrayVector.class)); @@ -224,6 +230,12 @@ public void testBooleanBlock() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanArrayBlock.class)); assertNull(copy.asVector()); @@ -253,6 +265,12 @@ public void testBooleanBlock() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanBigArrayBlock.class)); assertNull(block.asVector()); @@ -266,4 +284,52 @@ public void testBooleanBlock() throws IOException { } assertThat(blockFactory.breaker().getUsed(), equalTo(0L)); } + + /** + * Tests a block with one value being multivalued and the rest are single valued. + */ + public void testBooleanBlockOneMv() { + int mvCount = between(2, 10); + int positionCount = randomIntBetween(1000, 5000); + blockFactory = new BlockFactory(blockFactory.breaker(), blockFactory.bigArrays(), ByteSizeValue.ofBytes(1)); + try (var builder = blockFactory.newBooleanBlockBuilder(between(1, mvCount + positionCount))) { + boolean[] elements = new boolean[positionCount + mvCount]; + builder.beginPositionEntry(); + for (int i = 0; i < mvCount; i++) { + elements[i] = randomBoolean(); + builder.appendBoolean(elements[i]); + } + builder.endPositionEntry(); + for (int p = 1; p < positionCount; p++) { + elements[mvCount + p] = randomBoolean(); + builder.appendBoolean(elements[mvCount + p]); + } + try (var block = builder.build()) { + assertThat(block, instanceOf(BooleanBigArrayBlock.class)); + assertNull(block.asVector()); + assertThat(block.getPositionCount(), equalTo(positionCount)); + assertThat(block.getValueCount(0), equalTo(mvCount)); + for (int i = 0; i < mvCount; i++) { + assertThat(block.getBoolean(block.getFirstValueIndex(0) + i), equalTo(elements[i])); + } + for (int p = 1; p < positionCount; p++) { + assertThat(block.getValueCount(p), equalTo(1)); + assertThat(block.getBoolean(block.getFirstValueIndex(p)), equalTo(elements[mvCount + p])); + } + assertKeepMask(block); + try (ToMask mask = block.toMask()) { + /* + * NOTE: this test is customized to the layout above where we don't make + * any fields with 0 values. + */ + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + assertThat(mask.mask().getBoolean(0), equalTo(false)); + for (int p = 1; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(elements[mvCount + p])); + } + } + } + } + assertThat(blockFactory.breaker().getUsed(), equalTo(0L)); + } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java index af4c643a90625..aab8b86f9b795 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java @@ -72,6 +72,12 @@ public void testBoolean() throws IOException { assertEmptyLookup(blockFactory, vector.asBlock()); assertSerialization(block); assertThat(vector.toString(), containsString("BooleanBigArrayVector[positions=" + positionCount)); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < values.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(values[p])); + } + } } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java index c5e130726844d..e37b2638b56f7 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java @@ -31,6 +31,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.nullValue; public class BlockMultiValuedTests extends ESTestCase { @ParametersFactory @@ -122,6 +123,54 @@ public void testLookupFromSingleManyPages() { assertLookup(ByteSizeValue.ofBytes(1), between(1, 32), p -> 1); } + public void testToMask() { + if (elementType != ElementType.BOOLEAN) { + return; + } + int positionCount = randomIntBetween(1, 16 * 1024); + var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 2, 10, 0, 0); + try (ToMask mask = ((BooleanBlock) b.block()).toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < b.values().size(); p++) { + List v = b.values().get(p); + if (v == null) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + continue; + } + if (v.size() != 1) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + continue; + } + assertThat(mask.mask().getBoolean(p), equalTo(v.get(0))); + } + } finally { + b.block().close(); + } + } + + public void testMask() { + int positionCount = randomIntBetween(1, 16 * 1024); + var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0); + try ( + BooleanVector mask = BasicBlockTests.randomMask(b.values().size() + between(0, 1000)); + Block masked = b.block().keepMask(mask) + ) { + for (int p = 0; p < b.values().size(); p++) { + List inputValues = b.values().get(p); + List valuesAtPosition = BasicBlockTests.valuesAtPositions(masked, p, p + 1).get(0); + if (inputValues == null || mask.getBoolean(p) == false) { + assertThat(masked.isNull(p), equalTo(true)); + assertThat(valuesAtPosition, nullValue()); + continue; + } + assertThat(masked.isNull(p), equalTo(false)); + assertThat(valuesAtPosition, equalTo(inputValues)); + } + } finally { + b.block().close(); + } + } + private void assertFiltered(boolean all, boolean shuffled) { int positionCount = randomIntBetween(1, 16 * 1024); var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0); From 68b211e025f2222704e50b2f6b6890dbf8f94515 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:01:56 +0200 Subject: [PATCH 047/144] Store original source for keywords using a normalizer (#112151) Using a normalizer for a keyword field might result in not being able to reconstruct the original source when using synthetic source. Here if synthetic source is enabled and a normalizer is configured we store the original value in a stored field which is later used at document reconstruction time to reconstruct the field value as it was in the original document. We use the same fallback solution we use in other places like `ignore_malformed`. --- docs/changelog/112151.yaml | 5 ++ .../test/mget/90_synthetic_source.yml | 88 +++++++++++++++++++ .../index/mapper/KeywordFieldMapper.java | 19 ++-- .../index/mapper/MapperFeatures.java | 1 + .../KeywordFieldSyntheticSourceSupport.java | 9 +- 5 files changed, 105 insertions(+), 17 deletions(-) create mode 100644 docs/changelog/112151.yaml diff --git a/docs/changelog/112151.yaml b/docs/changelog/112151.yaml new file mode 100644 index 0000000000000..f5cbfd8da07c2 --- /dev/null +++ b/docs/changelog/112151.yaml @@ -0,0 +1,5 @@ +pr: 112151 +summary: Store original source for keywords using a normalizer +area: Logs +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml index 2935c0c1c41b5..ff17a92ed0fcc 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml @@ -46,6 +46,94 @@ keyword: docs.1._source: kwd: bar +--- +keyword with normalizer: + - requires: + cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ] + reason: support for normalizer on keyword fields + - do: + indices.create: + index: test-keyword-with-normalizer + body: + settings: + analysis: + normalizer: + lowercase: + type: custom + filter: + - lowercase + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + normalizer: lowercase + keyword_with_ignore_above: + type: keyword + normalizer: lowercase + ignore_above: 10 + keyword_without_doc_values: + type: keyword + normalizer: lowercase + doc_values: false + + - do: + index: + index: test-keyword-with-normalizer + id: 1 + body: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - do: + index: + index: test-keyword-with-normalizer + id: 2 + body: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - do: + index: + index: test-keyword-with-normalizer + id: 3 + body: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + + - do: + mget: + index: test-keyword-with-normalizer + body: + ids: [ 1, 2, 3 ] + - match: { docs.0._index: "test-keyword-with-normalizer" } + - match: { docs.0._id: "1" } + - match: + docs.0._source: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - match: { docs.1._index: "test-keyword-with-normalizer" } + - match: { docs.1._id: "2" } + - match: + docs.1._source: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - match: { docs.2._index: "test-keyword-with-normalizer" } + - match: { docs.2._id: "3" } + - match: + docs.2._source: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + --- stored text: - requires: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 9645b4397df4f..d130f37c3e8eb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -89,6 +89,7 @@ public final class KeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "keyword"; static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above"); + static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source"); public static class Defaults { public static final FieldType FIELD_TYPE; @@ -856,7 +857,7 @@ public boolean hasNormalizer() { private final Script script; private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; - private final boolean storeIgnored; + private final boolean isSyntheticSource; private final IndexAnalyzers indexAnalyzers; @@ -866,7 +867,7 @@ private KeywordFieldMapper( KeywordFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, - boolean storeIgnored, + boolean isSyntheticSource, Builder builder ) { super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.getValue()); @@ -881,7 +882,7 @@ private KeywordFieldMapper( this.indexAnalyzers = builder.indexAnalyzers; this.scriptCompiler = builder.scriptCompiler; this.indexCreatedVersion = builder.indexCreatedVersion; - this.storeIgnored = storeIgnored; + this.isSyntheticSource = isSyntheticSource; } @Override @@ -916,7 +917,7 @@ private void indexValue(DocumentParserContext context, String value) { if (value.length() > fieldType().ignoreAbove()) { context.addIgnoredField(fullPath()); - if (storeIgnored) { + if (isSyntheticSource) { // Save a copy of the field so synthetic source can load it context.doc().add(new StoredField(originalName(), new BytesRef(value))); } @@ -1026,6 +1027,11 @@ private String originalName() { @Override protected SyntheticSourceMode syntheticSourceMode() { + if (hasNormalizer()) { + // NOTE: no matter if we have doc values or not we use a stored field to reconstruct the original value + // whose doc values would be altered by the normalizer + return SyntheticSourceMode.FALLBACK; + } if (fieldType.stored() || hasDocValues) { return SyntheticSourceMode.NATIVE; } @@ -1047,11 +1053,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName) "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - if (hasNormalizer()) { - throw new IllegalArgumentException( - "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer" - ); - } if (syntheticSourceMode() != SyntheticSourceMode.NATIVE) { return super.syntheticFieldLoader(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 6dce9d6c7b86e..63bbef061c61f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -33,6 +33,7 @@ public Set getFeatures() { NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS, BooleanFieldMapper.BOOLEAN_DIMENSION, ObjectMapper.SUBOBJECTS_AUTO, + KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE, SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX ); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java index 6abe923851318..2f452161b10ca 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java @@ -21,8 +21,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.hamcrest.Matchers.equalTo; - public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport { private final Integer ignoreAbove; private final boolean allIgnored; @@ -128,11 +126,6 @@ private void mapping(XContentBuilder b) throws IOException { @Override public List invalidExample() throws IOException { - return List.of( - new MapperTestCase.SyntheticSourceInvalidExample( - equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"), - b -> b.field("type", "keyword").field("normalizer", "lowercase") - ) - ); + return List.of(); } } From 1be4f65da2ac35d971626353b8a076aa75a7b693 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 15:20:03 +0100 Subject: [PATCH 048/144] Add constants for UUID lengths (#112353) Our UUID strings have fixed lengths (depending on the type of UUID). Sometimes we might want code to rely on knowing these lengths rather than doing some other string manipulations to look for a boundary. This commit exposes constants for these things. --- .../common/RandomBasedUUIDGenerator.java | 4 +- .../common/TimeBasedUUIDGenerator.java | 4 +- .../java/org/elasticsearch/common/UUIDs.java | 40 +++++++++++++++---- .../org/elasticsearch/common/UUIDTests.java | 16 ++++++++ .../blobstore/RepositoryFileType.java | 5 ++- 5 files changed, 57 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java index e731cf3bc58be..58c23ab9aa398 100644 --- a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java @@ -56,8 +56,10 @@ public static String getBase64UUID(Random random) { return Base64.getUrlEncoder().withoutPadding().encodeToString(getUUIDBytes(random)); } + static final int SIZE_IN_BYTES = 16; + private static byte[] getUUIDBytes(Random random) { - final byte[] randomBytes = new byte[16]; + final byte[] randomBytes = new byte[SIZE_IN_BYTES]; random.nextBytes(randomBytes); /* Set the version to version 4 (see http://www.ietf.org/rfc/rfc4122.txt) * The randomly or pseudo-randomly generated version. diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java index f7f7f520fec90..d66b0f579ce3e 100644 --- a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java @@ -47,6 +47,8 @@ protected byte[] macAddress() { return SECURE_MUNGED_ADDRESS; } + static final int SIZE_IN_BYTES = 15; + @Override public String getBase64UUID() { final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff; @@ -61,7 +63,7 @@ public String getBase64UUID() { sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max ); - final byte[] uuidBytes = new byte[15]; + final byte[] uuidBytes = new byte[SIZE_IN_BYTES]; int i = 0; // We have auto-generated ids, which are usually used for append-only workloads. diff --git a/server/src/main/java/org/elasticsearch/common/UUIDs.java b/server/src/main/java/org/elasticsearch/common/UUIDs.java index 43a232e82510e..ebc0978f38d49 100644 --- a/server/src/main/java/org/elasticsearch/common/UUIDs.java +++ b/server/src/main/java/org/elasticsearch/common/UUIDs.java @@ -17,26 +17,50 @@ public class UUIDs { private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator(); private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator(); - /** Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as - * primary key. The id is opaque and the implementation is free to change at any time! */ + /** + * The length of a UUID string generated by {@link #base64UUID}. + */ + // A 15-byte time-based UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding). + public static final int TIME_BASED_UUID_STRING_LENGTH = 20; + + /** + * Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as + * primary key. The id is opaque and the implementation is free to change at any time! + * The resulting string has length {@link #TIME_BASED_UUID_STRING_LENGTH}. + */ public static String base64UUID() { return TIME_UUID_GENERATOR.getBase64UUID(); } - /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using the - * provided {@code Random} instance */ + /** + * The length of a UUID string generated by {@link #randomBase64UUID} and {@link #randomBase64UUIDSecureString}. + */ + // A 16-byte v4 UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding) plus another byte (becomes 2 chars). + public static final int RANDOM_BASED_UUID_STRING_LENGTH = 22; + + /** + * Returns a Base64 encoded string representing a RFC4122 version 4 UUID, using the + * provided {@code Random} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static String randomBase64UUID(Random random) { return RandomBasedUUIDGenerator.getBase64UUID(random); } - /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using a - * private {@code SecureRandom} instance */ + /** + * Returns a Base64 encoded string representing a RFC4122 version 4 UUID, using a + * private {@code SecureRandom} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static String randomBase64UUID() { return RANDOM_UUID_GENERATOR.getBase64UUID(); } - /** Returns a Base64 encoded {@link SecureString} of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, - * using a private {@code SecureRandom} instance */ + /** + * Returns a Base64 encoded {@link SecureString} representing a RFC4122 version 4 + * UUID, using a private {@code SecureRandom} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static SecureString randomBase64UUIDSecureString() { return RandomBasedUUIDGenerator.getBase64UUIDSecureString(); } diff --git a/server/src/test/java/org/elasticsearch/common/UUIDTests.java b/server/src/test/java/org/elasticsearch/common/UUIDTests.java index 5af036ce0648f..3229049b67b4c 100644 --- a/server/src/test/java/org/elasticsearch/common/UUIDTests.java +++ b/server/src/test/java/org/elasticsearch/common/UUIDTests.java @@ -176,4 +176,20 @@ protected byte[] macAddress() { ); return bytesPerDoc; } + + public void testStringLength() { + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(RandomBasedUUIDGenerator.SIZE_IN_BYTES)); + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID().length()); + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID(random()).length()); + try (var secureString = UUIDs.randomBase64UUIDSecureString()) { + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, secureString.toString().length()); + } + + assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(TimeBasedUUIDGenerator.SIZE_IN_BYTES)); + assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, UUIDs.base64UUID().length()); + } + + private static int getUnpaddedBase64StringLength(int sizeInBytes) { + return (int) Math.ceil(sizeInBytes * 4.0 / 3.0); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java index 014cbcd2bcc3a..8d20cce33bbb4 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java @@ -9,6 +9,7 @@ package org.elasticsearch.repositories.blobstore; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.UUIDs; import java.nio.file.Path; import java.util.regex.Pattern; @@ -38,9 +39,9 @@ public enum RepositoryFileType { // decimal numbers .replace("NUM", "(0|[1-9][0-9]*)") // 15-byte UUIDS from TimeBasedUUIDGenerator - .replace("SHORTUUID", "[0-9a-zA-Z_-]{20}") + .replace("SHORTUUID", "[0-9a-zA-Z_-]{" + UUIDs.TIME_BASED_UUID_STRING_LENGTH + "}") // 16-byte UUIDs from RandomBasedUUIDGenerator - .replace("UUID", "[0-9a-zA-Z_-]{22}") + .replace("UUID", "[0-9a-zA-Z_-]{" + UUIDs.RANDOM_BASED_UUID_STRING_LENGTH + "}") + ")$" ); } From 9e01181f0dd9bf6ea919164f8929f0874ad6e26d Mon Sep 17 00:00:00 2001 From: Albert Zaharovits Date: Thu, 29 Aug 2024 17:33:05 +0300 Subject: [PATCH 049/144] Remove unused cluster blocks in create index (#112352) Remove unused cluster blocks in create index --- .../CreateIndexClusterStateUpdateRequest.java | 9 ------- .../metadata/MetadataCreateIndexService.java | 24 ++++--------------- .../MetadataCreateIndexServiceTests.java | 10 +------- 3 files changed, 6 insertions(+), 37 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java index 8a46daa45e73b..948199fbe74f4 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java @@ -12,7 +12,6 @@ import org.elasticsearch.action.admin.indices.shrink.ResizeType; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.cluster.ack.ClusterStateUpdateRequest; -import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.metadata.ComposableIndexTemplate; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; @@ -43,8 +42,6 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ private final Set aliases = new HashSet<>(); - private final Set blocks = new HashSet<>(); - private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; private boolean performReroute = true; @@ -125,10 +122,6 @@ public Set aliases() { return aliases; } - public Set blocks() { - return blocks; - } - public Index recoverFrom() { return recoverFrom; } @@ -229,8 +222,6 @@ public String toString() { + settings + ", aliases=" + aliases - + ", blocks=" - + blocks + ", waitForActiveShards=" + waitForActiveShards + ", systemDataStreamDescriptor=" diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index b1a19d99dcb19..07dcb7baf0777 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -25,7 +25,6 @@ import org.elasticsearch.cluster.AckedClusterStateUpdateTask; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; -import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -514,7 +513,6 @@ private ClusterState applyCreateIndexWithTemporaryService( ClusterState updated = clusterStateCreateIndex( currentState, - request.blocks(), indexMetadata, metadataTransformer, allocationService.getShardRoutingRoleStrategy() @@ -1231,7 +1229,6 @@ public static List resolveAndValidateAliases( */ static ClusterState clusterStateCreateIndex( ClusterState currentState, - Set clusterBlocks, IndexMetadata indexMetadata, BiConsumer metadataTransformer, ShardRoutingRoleStrategy shardRoutingRoleStrategy @@ -1245,14 +1242,13 @@ static ClusterState clusterStateCreateIndex( newMetadata = currentState.metadata().withAddedIndex(indexMetadata); } - String indexName = indexMetadata.getIndex().getName(); - ClusterBlocks.Builder blocks = createClusterBlocksBuilder(currentState, indexName, clusterBlocks); - blocks.updateBlocks(indexMetadata); + var blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks()); + blocksBuilder.updateBlocks(indexMetadata); - RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) - .addAsNew(newMetadata.index(indexName)); + var routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) + .addAsNew(newMetadata.index(indexMetadata.getIndex().getName())); - return ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).routingTable(routingTableBuilder).build(); + return ClusterState.builder(currentState).blocks(blocksBuilder).metadata(newMetadata).routingTable(routingTableBuilder).build(); } static IndexMetadata buildIndexMetadata( @@ -1325,16 +1321,6 @@ private static IndexMetadata.Builder createIndexMetadataBuilder( return builder; } - private static ClusterBlocks.Builder createClusterBlocksBuilder(ClusterState currentState, String index, Set blocks) { - ClusterBlocks.Builder blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks()); - if (blocks.isEmpty() == false) { - for (ClusterBlock block : blocks) { - blocksBuilder.addIndexBlock(index, block); - } - } - return blocksBuilder; - } - private static void updateIndexMappingsAndBuildSortOrder( IndexService indexService, CreateIndexClusterStateUpdateRequest request, diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java index 8a487e5653627..f7d343b43b29c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -967,13 +967,7 @@ public void testClusterStateCreateIndexThrowsWriteIndexValidationException() thr assertThat( expectThrows( IllegalStateException.class, - () -> clusterStateCreateIndex( - currentClusterState, - Set.of(), - newIndex, - null, - TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY - ) + () -> clusterStateCreateIndex(currentClusterState, newIndex, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY) ).getMessage(), startsWith("alias [alias1] has more than one write index [") ); @@ -991,7 +985,6 @@ public void testClusterStateCreateIndex() { ClusterState updatedClusterState = clusterStateCreateIndex( currentClusterState, - Set.of(INDEX_READ_ONLY_BLOCK), newIndexMetadata, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY @@ -1037,7 +1030,6 @@ public void testClusterStateCreateIndexWithMetadataTransaction() { ClusterState updatedClusterState = clusterStateCreateIndex( currentClusterState, - Set.of(INDEX_READ_ONLY_BLOCK), newIndexMetadata, metadataTransformer, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY From cf0e18872878cce9332722c491b5cc7749106ae4 Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Thu, 29 Aug 2024 08:56:00 -0600 Subject: [PATCH 050/144] Add isAsync() to SearcTask and eliminate code for async detection from TransportSearchAction (#112311) --- .../action/search/SearchTask.java | 7 +++++ .../action/search/TransportSearchAction.java | 30 +------------------ .../xpack/search/AsyncSearchTask.java | 5 ++++ 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java index 3bf72313c4c21..cc5d60ad0b0c0 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java @@ -69,4 +69,11 @@ public Supplier getSearchResponseMergerSupplier() { public void setSearchResponseMergerSupplier(Supplier supplier) { this.searchResponseMergerSupplier = supplier; } + + /** + * Is this async search? + */ + public boolean isAsync() { + return false; + } } diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 6e1645c1ed711..32ee9c331295c 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -369,7 +369,7 @@ void executeRequest( } else { if ((listener instanceof TelemetryListener tl) && CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { tl.setRemotes(resolvedIndices.getRemoteClusterIndices().size()); - if (isAsyncSearchTask(task)) { + if (task.isAsync()) { tl.setFeature(CCSUsageTelemetry.ASYNC_FEATURE); } String client = task.getHeader(Task.X_ELASTIC_PRODUCT_ORIGIN_HTTP_HEADER); @@ -1514,34 +1514,6 @@ public SearchPhase newSearchPhase( } } - /** - * TransportSearchAction cannot access async-search code, so can't check whether this the Task - * is an instance of AsyncSearchTask, so this roundabout method is used - * @param searchTask SearchTask to analyze - * @return true if this is an async search task; false if a synchronous search task - */ - private boolean isAsyncSearchTask(SearchTask searchTask) { - assert assertAsyncSearchTaskListener(searchTask) : "AsyncSearchTask SearchProgressListener is not one of the expected types"; - // AsyncSearchTask will not return SearchProgressListener.NOOP, since it uses its own progress listener - // which delegates to CCSSingleCoordinatorSearchProgressListener when minimizing roundtrips. - // Only synchronous SearchTask uses SearchProgressListener.NOOP or CCSSingleCoordinatorSearchProgressListener directly - return searchTask.getProgressListener() != SearchProgressListener.NOOP - && searchTask.getProgressListener() instanceof CCSSingleCoordinatorSearchProgressListener == false; - } - - /** - * @param searchTask SearchTask to analyze - * @return true if AsyncSearchTask still uses its own special listener, not one of the two that synchronous SearchTask uses - */ - private boolean assertAsyncSearchTaskListener(SearchTask searchTask) { - if (searchTask.getClass().getSimpleName().contains("AsyncSearchTask")) { - SearchProgressListener progressListener = searchTask.getProgressListener(); - return progressListener != SearchProgressListener.NOOP - && progressListener instanceof CCSSingleCoordinatorSearchProgressListener == false; - } - return true; - } - private static void validateAndResolveWaitForCheckpoint( ClusterState clusterState, IndexNameExpressionResolver resolver, diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index c0305f873327d..5068ac69e462a 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -545,4 +545,9 @@ public void onFailure(Exception exc) { executeCompletionListeners(); } } + + @Override + public boolean isAsync() { + return true; + } } From 8f526098dbfcb109b8e5b01ee436e09491169025 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 29 Aug 2024 16:57:51 +0200 Subject: [PATCH 051/144] Fix SearchServiceTests not waiting for scroll clear (#111547) We were not waiting on the response here but assume the scrolls are cleared in the following lines. This worked as long as the transport action wasn't forking but is broken now that we fork to generic. Fixed by just waiting. closes #111529 --- .../test/java/org/elasticsearch/search/SearchServiceTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java index bdddea58b713f..2617f82b09f08 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java @@ -1548,7 +1548,7 @@ public void testMaxOpenScrollContexts() throws Exception { ClearScrollRequest clearScrollRequest = new ClearScrollRequest(); clearScrollRequest.setScrollIds(clearScrollIds); - client().clearScroll(clearScrollRequest); + client().clearScroll(clearScrollRequest).get(); for (int i = 0; i < clearScrollIds.size(); i++) { client().prepareSearch("index").setSize(1).setScroll(TimeValue.timeValueMinutes(1)).get().decRef(); From e966d0d9da74cb24c97a17d174f1b65324411e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= Date: Thu, 29 Aug 2024 17:30:16 +0200 Subject: [PATCH 052/144] Removing the feature flag mechanism for LTR. (#112358) --- .../ml/DefaultMachineLearningExtension.java | 5 ----- .../elasticsearch/xpack/ml/MachineLearning.java | 14 +++++--------- .../xpack/ml/MachineLearningExtension.java | 4 ---- .../ml/LocalStateMachineLearningAdOnly.java | 2 +- .../ml/LocalStateMachineLearningDfaOnly.java | 2 +- .../ml/LocalStateMachineLearningNlpOnly.java | 2 +- .../MachineLearningInfoTransportActionTests.java | 9 +-------- .../xpack/ml/MachineLearningTests.java | 16 ++++------------ 8 files changed, 13 insertions(+), 41 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java index 66f4797ef707c..fa94bf96c1167 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java @@ -51,11 +51,6 @@ public boolean isNlpEnabled() { return true; } - @Override - public boolean isLearningToRankEnabled() { - return true; - } - @Override public String[] getAnalyticsDestIndexAllowedSettings() { return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index c4bf92401be9d..5876836185ba3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -902,7 +902,7 @@ private static void reportClashingNodeAttribute(String attrName) { @Override public List> getRescorers() { - if (enabled && machineLearningExtension.get().isLearningToRankEnabled()) { + if (enabled) { return List.of( new RescorerSpec<>( LearningToRankRescorerBuilder.NAME, @@ -1864,10 +1864,8 @@ public List getNamedXContent() { ) ); namedXContent.addAll(new CorrelationNamedContentProvider().getNamedXContentParsers()); - // LTR Combine with Inference named content provider when feature flag is removed - if (machineLearningExtension.get().isLearningToRankEnabled()) { - namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers()); - } + namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers()); + return namedXContent; } @@ -1958,10 +1956,8 @@ public List getNamedWriteables() { namedWriteables.addAll(MlAutoscalingNamedWritableProvider.getNamedWriteables()); namedWriteables.addAll(new CorrelationNamedContentProvider().getNamedWriteables()); namedWriteables.addAll(new ChangePointNamedContentProvider().getNamedWriteables()); - // LTR Combine with Inference named content provider when feature flag is removed - if (machineLearningExtension.get().isLearningToRankEnabled()) { - namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables()); - } + namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables()); + return namedWriteables; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java index 528883439ef2f..f46652978753c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java @@ -25,10 +25,6 @@ default void configure(Settings settings) {} boolean isNlpEnabled(); - default boolean isLearningToRankEnabled() { - return true; - } - default boolean disableInferenceProcessCache() { return false; } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java index 175a035a70f7e..3ff3a4a404f97 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningAdOnly extends LocalStateMachineLearning { public LocalStateMachineLearningAdOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java index f054e52dc29ec..1a72f27865d8a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningDfaOnly extends LocalStateMachineLearning { public LocalStateMachineLearningDfaOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java index a3d684011e932..0f11e8033b83d 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningNlpOnly extends LocalStateMachineLearning { public LocalStateMachineLearningNlpOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java index afa372fb94527..e5575abfeb020 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java @@ -160,14 +160,7 @@ private MachineLearningUsageTransportAction newUsageAction( licenseState, jobManagerHolder, new MachineLearningExtensionHolder( - new MachineLearningTests.MlTestExtension( - true, - true, - isAnomalyDetectionEnabled, - isDataFrameAnalyticsEnabled, - isNlpEnabled, - true - ) + new MachineLearningTests.MlTestExtension(true, true, isAnomalyDetectionEnabled, isDataFrameAnalyticsEnabled, isNlpEnabled) ) ); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java index c35b9da7b2bd2..8a05537917abe 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java @@ -220,7 +220,7 @@ public void testNoAttributes_givenClash() throws IOException { public void testAnomalyDetectionOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -240,7 +240,7 @@ public void testAnomalyDetectionOnly() throws IOException { public void testDataFrameAnalyticsOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -260,7 +260,7 @@ public void testDataFrameAnalyticsOnly() throws IOException { public void testNlpOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -287,22 +287,19 @@ public static class MlTestExtension implements MachineLearningExtension { private final boolean isAnomalyDetectionEnabled; private final boolean isDataFrameAnalyticsEnabled; private final boolean isNlpEnabled; - private final boolean isLearningToRankEnabled; MlTestExtension( boolean useIlm, boolean includeNodeInfo, boolean isAnomalyDetectionEnabled, boolean isDataFrameAnalyticsEnabled, - boolean isNlpEnabled, - boolean isLearningToRankEnabled + boolean isNlpEnabled ) { this.useIlm = useIlm; this.includeNodeInfo = includeNodeInfo; this.isAnomalyDetectionEnabled = isAnomalyDetectionEnabled; this.isDataFrameAnalyticsEnabled = isDataFrameAnalyticsEnabled; this.isNlpEnabled = isNlpEnabled; - this.isLearningToRankEnabled = isLearningToRankEnabled; } @Override @@ -330,11 +327,6 @@ public boolean isNlpEnabled() { return isNlpEnabled; } - @Override - public boolean isLearningToRankEnabled() { - return isLearningToRankEnabled; - } - @Override public String[] getAnalyticsDestIndexAllowedSettings() { return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS; From 592858281400cfa608080aca5dac8331d3ea1f5c Mon Sep 17 00:00:00 2001 From: Fang Xing <155562079+fang-xing-esql@users.noreply.github.com> Date: Thu, 29 Aug 2024 12:03:03 -0400 Subject: [PATCH 053/144] [ES|QL] Combine 3 commonTypes into one (#112220) Combine 3 commonTypes into one. --- .../esql/core/type/DataTypeConverter.java | 80 --------- .../predicate/operator/arithmetic/Add.java | 1 - .../arithmetic/ArithmeticOperation.java | 7 +- .../BinaryComparisonInversible.java | 2 +- .../predicate/operator/arithmetic/Div.java | 1 - .../arithmetic/EsqlArithmeticOperation.java | 5 +- .../predicate/operator/arithmetic/Mul.java | 1 - .../predicate/operator/arithmetic/Sub.java | 1 - .../comparison/EsqlBinaryComparison.java | 4 +- .../predicate/operator/comparison/In.java | 4 +- .../rules/SimplifyComparisonsArithmetics.java | 4 +- .../esql/type/EsqlDataTypeConverter.java | 77 +++++++-- .../xpack/esql/type/EsqlDataTypeRegistry.java | 24 --- .../esql/type/DataTypeConversionTests.java | 20 --- .../esql/type/EsqlDataTypeConverterTests.java | 158 ++++++++++++++++++ 15 files changed, 234 insertions(+), 155 deletions(-) rename x-pack/plugin/{esql-core/src/main/java/org/elasticsearch/xpack/esql/core => esql/src/main/java/org/elasticsearch/xpack/esql}/expression/predicate/operator/arithmetic/ArithmeticOperation.java (80%) rename x-pack/plugin/{esql-core/src/main/java/org/elasticsearch/xpack/esql/core => esql/src/main/java/org/elasticsearch/xpack/esql}/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java (91%) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java index 1e68d63ef7bb1..78b395503e700 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java @@ -38,7 +38,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported; import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.UNSIGNED_LONG_MAX; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.inUnsignedLongRange; @@ -51,85 +50,6 @@ public final class DataTypeConverter { private DataTypeConverter() {} - /** - * Returns the type compatible with both left and right types - *

    - * If one of the types is null - returns another type - * If both types are numeric - returns type with the highest precision int < long < float < double - * If one of the types is string and another numeric - returns numeric - */ - public static DataType commonType(DataType left, DataType right) { - if (left == right) { - return left; - } - if (left == NULL) { - return right; - } - if (right == NULL) { - return left; - } - if (isString(left) && isString(right)) { - if (left == TEXT || right == TEXT) { - return TEXT; - } - if (left == KEYWORD) { - return KEYWORD; - } - return right; - } - if (left.isNumeric() && right.isNumeric()) { - int lsize = left.estimatedSize().orElseThrow(); - int rsize = right.estimatedSize().orElseThrow(); - // if one is int - if (left.isWholeNumber()) { - // promote the highest int - if (right.isWholeNumber()) { - if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) { - return UNSIGNED_LONG; - } - return lsize > rsize ? left : right; - } - // promote the rational - return right; - } - // try the other side - if (right.isWholeNumber()) { - return left; - } - // promote the highest rational - return lsize > rsize ? left : right; - } - if (isString(left)) { - if (right.isNumeric()) { - return right; - } - } - if (isString(right)) { - if (left.isNumeric()) { - return left; - } - } - - if (isDateTime(left) && isDateTime(right)) { - return DATETIME; - } - - // none found - return null; - } - - /** - * Returns true if the from type can be converted to the to type, false - otherwise - */ - public static boolean canConvert(DataType from, DataType to) { - // Special handling for nulls and if conversion is not requires - if (from == to || from == NULL) { - return true; - } - // only primitives are supported so far - return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null; - } - /** * Get the conversion from one type to another. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java index b6ec9b6fd0e23..8f8d885ee379b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java @@ -12,7 +12,6 @@ import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.NumericUtils; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java similarity index 80% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java index 8dc0f58083179..cb7e7c4643fb9 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java @@ -4,16 +4,17 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic; +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal; import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; +import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.DataTypeConverter; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNumeric; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class ArithmeticOperation extends BinaryOperator { @@ -36,7 +37,7 @@ public ArithmeticOperation swapLeftAndRight() { @Override public DataType dataType() { if (dataType == null) { - dataType = DataTypeConverter.commonType(left().dataType(), right().dataType()); + dataType = commonType(left().dataType(), right().dataType()); } return dataType; } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java similarity index 91% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java index 358ad59ec6356..b0ab4c48d970e 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic; +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java index 0e4c506a90d85..f1e197cf350b6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java index 647071c44cfd3..400e70b641111 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java @@ -13,14 +13,12 @@ import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; import java.util.List; @@ -31,6 +29,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class EsqlArithmeticOperation extends ArithmeticOperation implements EvaluatorMapper { public static List getNamedWriteables() { @@ -133,7 +132,7 @@ public Object fold() { public DataType dataType() { if (dataType == null) { - dataType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType()); + dataType = commonType(left().dataType(), right().dataType()); } return dataType; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java index a73562ff153b2..03981a821f52d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.NumericUtils; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java index ee2ccc3b7107a..27f5579129cc9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java @@ -12,7 +12,6 @@ import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java index 52d4c111b2eae..b50d70e69819d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java @@ -22,7 +22,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; import java.time.ZoneId; @@ -32,6 +31,7 @@ import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class EsqlBinaryComparison extends BinaryComparison implements EvaluatorMapper { public static List getNamedWriteables() { @@ -172,7 +172,7 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator( Function toEvaluator ) { // Our type is always boolean, so figure out the evaluator type from the inputs - DataType commonType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType()); + DataType commonType = commonType(left().dataType(), right().dataType()); EvalOperator.ExpressionEvaluator.Factory lhs; EvalOperator.ExpressionEvaluator.Factory rhs; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java index 636b31fcc691b..333f32e82c579 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java @@ -27,7 +27,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; +import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; import java.io.IOException; import java.util.BitSet; @@ -269,7 +269,7 @@ private DataType commonType() { break; } } - commonType = EsqlDataTypeRegistry.INSTANCE.commonType(commonType, e.dataType()); + commonType = EsqlDataTypeConverter.commonType(commonType, e.dataType()); } return commonType; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java index 4ef069ea16d04..fe83aeb647bf9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java @@ -9,10 +9,10 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.ArithmeticOperation; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Sub; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index 1572f8950e0ac..b090708a64ad3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -58,6 +58,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; @@ -67,9 +68,14 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration; import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeDoubleToLong; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeToInt; @@ -107,18 +113,6 @@ public class EsqlDataTypeConverter { entry(VERSION, ToVersion::new) ); - /** - * Returns true if the from type can be converted to the to type, false - otherwise - */ - public static boolean canConvert(DataType from, DataType to) { - // Special handling for nulls and if conversion is not requires - if (from == to || from == NULL) { - return true; - } - // only primitives are supported so far - return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null; - } - public static Converter converterFor(DataType from, DataType to) { // TODO move EXPRESSION_TO_LONG here if there is no regression if (isString(from)) { @@ -230,8 +224,63 @@ public static Object convert(Object value, DataType dataType) { return converter.convert(value); } + /** + * Returns the type compatible with both left and right types + *

    + * If one of the types is null - returns another type + * If both types are numeric - returns type with the highest precision int < long < float < double + */ public static DataType commonType(DataType left, DataType right) { - return DataTypeConverter.commonType(left, right); + if (left == right) { + return left; + } + if (left == NULL) { + return right; + } + if (right == NULL) { + return left; + } + if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) { + if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) { + return DATETIME; + } + if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) { + return TIME_DURATION; + } + if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) { + return DATE_PERIOD; + } + } + if (isString(left) && isString(right)) { + if (left == TEXT || right == TEXT) { + return TEXT; + } + return right; + } + if (left.isNumeric() && right.isNumeric()) { + int lsize = left.estimatedSize().orElseThrow(); + int rsize = right.estimatedSize().orElseThrow(); + // if one is int + if (left.isWholeNumber()) { + // promote the highest int + if (right.isWholeNumber()) { + if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) { + return UNSIGNED_LONG; + } + return lsize > rsize ? left : right; + } + // promote the rational + return right; + } + // try the other side + if (right.isWholeNumber()) { + return left; + } + // promote the highest rational + return lsize > rsize ? left : right; + } + // none found + return null; } // generally supporting abbreviations from https://en.wikipedia.org/wiki/Unit_of_time diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java index 96e206b82cf0c..f8e8cd37dc8b2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java @@ -10,15 +10,6 @@ import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.xpack.esql.core.type.DataType; -import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; -import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; -import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration; - public class EsqlDataTypeRegistry { public static final EsqlDataTypeRegistry INSTANCE = new EsqlDataTypeRegistry(); @@ -35,19 +26,4 @@ public DataType fromEs(String typeName, TimeSeriesParams.MetricType metricType) */ return metricType == TimeSeriesParams.MetricType.COUNTER ? type.widenSmallNumeric().counter() : type; } - - public DataType commonType(DataType left, DataType right) { - if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) { - if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) { - return DATETIME; - } - if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) { - return TIME_DURATION; - } - if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) { - return DATE_PERIOD; - } - } - return EsqlDataTypeConverter.commonType(left, right); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java index 9f8c8f91b7037..871bf632adcc6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java @@ -35,7 +35,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; -import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.commonType; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.converterFor; import static org.elasticsearch.xpack.esql.core.util.DateUtils.asDateTime; @@ -522,25 +521,6 @@ public void testConversionToIdentity() { assertEquals(10, conversion.convert(10)); } - public void testCommonType() { - assertEquals(BOOLEAN, commonType(BOOLEAN, NULL)); - assertEquals(BOOLEAN, commonType(NULL, BOOLEAN)); - assertEquals(BOOLEAN, commonType(BOOLEAN, BOOLEAN)); - assertEquals(NULL, commonType(NULL, NULL)); - assertEquals(INTEGER, commonType(INTEGER, KEYWORD)); - assertEquals(LONG, commonType(TEXT, LONG)); - assertEquals(SHORT, commonType(SHORT, BYTE)); - assertEquals(FLOAT, commonType(BYTE, FLOAT)); - assertEquals(FLOAT, commonType(FLOAT, INTEGER)); - assertEquals(UNSIGNED_LONG, commonType(UNSIGNED_LONG, LONG)); - assertEquals(DOUBLE, commonType(DOUBLE, FLOAT)); - assertEquals(FLOAT, commonType(FLOAT, UNSIGNED_LONG)); - - // strings - assertEquals(TEXT, commonType(TEXT, KEYWORD)); - assertEquals(TEXT, commonType(KEYWORD, TEXT)); - } - public void testEsDataTypes() { for (DataType type : DataType.types()) { assertEquals(type, DataType.fromTypeName(type.typeName())); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java index 0997c88aac2b0..8ad083683f696 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java @@ -8,6 +8,44 @@ package org.elasticsearch.xpack.esql.type; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.type.DataType; + +import java.util.Arrays; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOC_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.HALF_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.IP; +import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; +import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT; +import static org.elasticsearch.xpack.esql.core.type.DataType.PARTIAL_AGG; +import static org.elasticsearch.xpack.esql.core.type.DataType.SCALED_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SOURCE; +import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TSID_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; +import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isString; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public class EsqlDataTypeConverterTests extends ESTestCase { @@ -16,4 +54,124 @@ public void testNanoTimeToString() { long actual = EsqlDataTypeConverter.dateNanosToLong(EsqlDataTypeConverter.nanoTimeToString(expected)); assertEquals(expected, actual); } + + public void testCommonTypeNull() { + for (DataType dataType : DataType.values()) { + assertEqualsCommonType(dataType, NULL, dataType); + } + } + + public void testCommonTypeStrings() { + List STRINGS = Arrays.stream(DataType.values()).filter(DataType::isString).toList(); + for (DataType dataType1 : STRINGS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL) { + assertEqualsCommonType(dataType1, NULL, dataType1); + } else if ((isString(dataType1) && isString(dataType2))) { + if (dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertEqualsCommonType(dataType1, dataType2, TEXT); + } + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + public void testCommonTypeDateTimeIntervals() { + List DATE_TIME_INTERVALS = Arrays.stream(DataType.values()).filter(DataType::isDateTimeOrTemporal).toList(); + for (DataType dataType1 : DATE_TIME_INTERVALS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL) { + assertEqualsCommonType(dataType1, NULL, dataType1); + } else if (isDateTimeOrTemporal(dataType2)) { + if (isDateTime(dataType1) || isDateTime(dataType2)) { + assertEqualsCommonType(dataType1, dataType2, DATETIME); + } else if (dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertNullCommonType(dataType1, dataType2); + } + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + public void testCommonTypeNumeric() { + // whole numbers + commonNumericType(BYTE, List.of(NULL, BYTE)); + commonNumericType(SHORT, List.of(NULL, BYTE, SHORT)); + commonNumericType(INTEGER, List.of(NULL, BYTE, SHORT, INTEGER)); + commonNumericType(LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG)); + commonNumericType(UNSIGNED_LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG)); + // floats + commonNumericType(HALF_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT)); + commonNumericType(FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, FLOAT, HALF_FLOAT)); + commonNumericType(DOUBLE, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, DOUBLE, SCALED_FLOAT)); + commonNumericType(SCALED_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, SCALED_FLOAT, DOUBLE)); + } + + /** + * The first argument and the second argument(s) have the first argument as a common type. + */ + private static void commonNumericType(DataType numericType, List lowerTypes) { + List NUMERICS = Arrays.stream(DataType.values()).filter(DataType::isNumeric).toList(); + List DOUBLES = Arrays.stream(DataType.values()).filter(DataType::isRationalNumber).toList(); + for (DataType dataType : DataType.values()) { + if (DOUBLES.containsAll(List.of(numericType, dataType)) && (dataType.estimatedSize().equals(numericType.estimatedSize()))) { + assertEquals(numericType, commonType(dataType, numericType)); + } else if (lowerTypes.contains(dataType)) { + assertEqualsCommonType(numericType, dataType, numericType); + } else if (NUMERICS.contains(dataType)) { + assertEqualsCommonType(numericType, dataType, dataType); + } else { + assertNullCommonType(numericType, dataType); + } + } + } + + public void testCommonTypeMiscellaneous() { + List MISCELLANEOUS = List.of( + COUNTER_INTEGER, + COUNTER_LONG, + COUNTER_DOUBLE, + UNSUPPORTED, + OBJECT, + SOURCE, + DATE_NANOS, + DOC_DATA_TYPE, + TSID_DATA_TYPE, + PARTIAL_AGG, + IP, + VERSION, + GEO_POINT, + GEO_SHAPE, + CARTESIAN_POINT, + CARTESIAN_SHAPE, + BOOLEAN + ); + for (DataType dataType1 : MISCELLANEOUS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL || dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + private static void assertEqualsCommonType(DataType dataType1, DataType dataType2, DataType commonType) { + assertEquals(commonType, commonType(dataType1, dataType2)); + assertEquals(commonType, commonType(dataType2, dataType1)); + } + + private static void assertNullCommonType(DataType dataType1, DataType dataType2) { + assertNull(commonType(dataType1, dataType2)); + assertNull(commonType(dataType2, dataType1)); + } } From e3e562ffbfb981014bdd71bf663bb6f972f5e352 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Thu, 29 Aug 2024 17:18:54 +0100 Subject: [PATCH 054/144] [ML] Support sparse embedding models in the elasticsearch inference service (#112270) For a sparse embedding model created with the ml trained models APIs --- docs/changelog/112270.yaml | 5 + .../inference/service-elasticsearch.asciidoc | 3 +- .../xpack/inference/CustomElandModelIT.java | 134 +++++++++ .../xpack/inference/RerankingIT.java | 8 +- .../BaseElasticsearchInternalService.java | 6 +- .../ElasticsearchInternalService.java | 151 +++------- .../services/elser/ElserInternalService.java | 28 -- .../ElasticsearchInternalServiceTests.java | 278 +++++++++++------- 8 files changed, 363 insertions(+), 250 deletions(-) create mode 100644 docs/changelog/112270.yaml create mode 100644 x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java diff --git a/docs/changelog/112270.yaml b/docs/changelog/112270.yaml new file mode 100644 index 0000000000000..1e6b9c7fc9290 --- /dev/null +++ b/docs/changelog/112270.yaml @@ -0,0 +1,5 @@ +pr: 112270 +summary: Support sparse embedding models in the elasticsearch inference service +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 99fd41ee2db65..572cad591fba6 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -31,6 +31,7 @@ include::inference-shared.asciidoc[tag=task-type] Available task types: * `rerank`, +* `sparse_embedding`, * `text_embedding`. -- @@ -182,4 +183,4 @@ PUT _inference/text_embedding/my-e5-model } } ------------------------------------------------------------ -// TEST[skip:TBD] \ No newline at end of file +// TEST[skip:TBD] diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java new file mode 100644 index 0000000000000..65b7a138e7e1e --- /dev/null +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java @@ -0,0 +1,134 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference; + +import org.elasticsearch.client.Request; +import org.elasticsearch.core.Strings; +import org.elasticsearch.inference.TaskType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.stream.Collectors; + +public class CustomElandModelIT extends InferenceBaseRestTest { + + // The model definition is taken from org.elasticsearch.xpack.ml.integration.TextExpansionQueryIT + + static final String BASE_64_ENCODED_MODEL = "UEsDBAAACAgAAAAAAAAAAAAAAAAAA" + + "AAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VG" + + "V4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29" + + "rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxl" + + "bW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7" + + "/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77" + + "KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVc" + + "tJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdF" + + "ZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26Qu" + + "Zy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9" + + "UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/e" + + "UuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZ" + + "W1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlp" + + "aWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNL" + + "Fgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vL" + + "qbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLkt" + + "gNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjg" + + "hg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0Y" + + "gbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSR" + + "rhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9" + + "ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRgl" + + "XIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2" + + "dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/" + + "whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAA" + + "BkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgI" + + "AAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlp" + + "aWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAA" + + "AACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLA" + + "QIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19f" + + "dG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVt" + + "b2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAA" + + "AAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAA" + + "AAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAA" + + "AAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA="; + + static final long RAW_MODEL_SIZE; // size of the model before base64 encoding + static { + RAW_MODEL_SIZE = Base64.getDecoder().decode(BASE_64_ENCODED_MODEL).length; + } + + // Test a sparse embedding model deployed with the ml trained models APIs + public void testSparse() throws IOException { + String modelId = "custom-text-expansion-model"; + + createTextExpansionModel(modelId); + putModelDefinition(modelId, BASE_64_ENCODED_MODEL, RAW_MODEL_SIZE); + putVocabulary( + List.of("these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"), + modelId + ); + + var inferenceConfig = """ + { + "service": "elasticsearch", + "service_settings": { + "model_id": "custom-text-expansion-model", + "num_allocations": 1, + "num_threads": 1 + } + } + """; + + var inferenceId = "sparse-inf"; + putModel(inferenceId, inferenceConfig, TaskType.SPARSE_EMBEDDING); + var results = inferOnMockService(inferenceId, List.of("washing", "machine")); + deleteModel(inferenceId); + assertNotNull(results.get("sparse_embedding")); + } + + protected void createTextExpansionModel(String modelId) throws IOException { + // with_special_tokens: false for this test with limited vocab + Request request = new Request("PUT", "/_ml/trained_models/" + modelId); + request.setJsonEntity(""" + { + "description": "a text expansion model", + "model_type": "pytorch", + "inference_config": { + "text_expansion": { + "tokenization": { + "bert": { + "with_special_tokens": false + } + } + } + } + }"""); + client().performRequest(request); + } + + protected void putVocabulary(List vocabulary, String modelId) throws IOException { + List vocabularyWithPad = new ArrayList<>(); + vocabularyWithPad.add("[PAD]"); + vocabularyWithPad.add("[UNK]"); + vocabularyWithPad.addAll(vocabulary); + String quotedWords = vocabularyWithPad.stream().map(s -> "\"" + s + "\"").collect(Collectors.joining(",")); + + Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/vocabulary"); + request.setJsonEntity(Strings.format(""" + { "vocabulary": [%s] } + """, quotedWords)); + client().performRequest(request); + } + + protected void putModelDefinition(String modelId, String base64EncodedModel, long unencodedModelSize) throws IOException { + Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/definition/0"); + String body = Strings.format(""" + {"total_definition_length":%s,"definition": "%s","total_parts": 1}""", unencodedModelSize, base64EncodedModel); + request.setJsonEntity(body); + client().performRequest(request); + } +} diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java index 77251ada4c488..893d3fb3e9b80 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java @@ -35,7 +35,7 @@ private String putCohereRerankEndpoint() throws IOException { "api_key": "" } } - """);// TODO remove key + """); return endpointID; } @@ -61,7 +61,7 @@ private String putCohereRerankEndpointWithDocuments() throws IOException { "return_documents": true } } - """);// TODO remove key + """); return endpointID; } @@ -81,13 +81,13 @@ private String putCohereRerankEndpointWithTop2() throws IOException { "service": "cohere", "service_settings": { "model_id": "rerank-english-v2.0", - "api_key": "8TNPBvpBO7oN97009HQHzQbBhNrxmREbcJrZCwkK" + "api_key": "" }, "task_settings": { "top_n": 2 } } - """);// TODO remove key + """); return endpointID; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java index 574ca77d4587e..457416370e559 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -154,10 +154,10 @@ public void isModelDownloaded(Model model, ActionListener listener) { executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); } else { listener.onFailure( - new IllegalArgumentException( - "Unable to determine supported model for [" + new IllegalStateException( + "Can not check the download status of the model used by [" + model.getConfigurations().getInferenceEntityId() - + "] please verify the request and submit a bug report if necessary." + + "] as the model_id cannot be found." ) ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index c3a0111562319..cca8ae63e974c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; @@ -27,19 +25,18 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; -import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.action.StopTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults; +import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; @@ -53,8 +50,6 @@ import java.util.Set; import java.util.function.Function; -import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; -import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; import static org.elasticsearch.xpack.core.inference.results.ResultUtils.createInvalidChunkedResultException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; @@ -71,15 +66,13 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 ); - private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class); - public ElasticsearchInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { super(context); } @Override protected EnumSet supportedTaskTypes() { - return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING); + return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING); } @Override @@ -161,6 +154,12 @@ private static CustomElandModel createCustomElandModel( NAME, CustomElandInternalTextEmbeddingServiceSettings.fromMap(serviceSettings, context) ); + case SPARSE_EMBEDDING -> new CustomElandModel( + inferenceEntityId, + taskType, + NAME, + elandServiceSettings(serviceSettings, context) + ); case RERANK -> new CustomElandRerankModel( inferenceEntityId, taskType, @@ -334,6 +333,8 @@ public void infer( inferTextEmbedding(model, input, inputType, timeout, listener); } else if (TaskType.RERANK.equals(taskType)) { inferRerank(model, query, input, inputType, timeout, taskSettings, listener); + } else if (TaskType.SPARSE_EMBEDDING.equals(taskType)) { + inferSparseEmbedding(model, input, inputType, timeout, listener); } else { throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); } @@ -364,6 +365,31 @@ public void inferTextEmbedding( ); } + public void inferSparseEmbedding( + Model model, + List inputs, + InputType inputType, + TimeValue timeout, + ActionListener listener + ) { + var request = buildInferenceRequest( + model.getConfigurations().getInferenceEntityId(), + TextExpansionConfigUpdate.EMPTY_UPDATE, + inputs, + inputType, + timeout, + false + ); + + client.execute( + InferModelAction.INSTANCE, + request, + listener.delegateFailureAndWrap( + (l, inferenceResult) -> l.onResponse(SparseEmbeddingResults.of(inferenceResult.getInferenceResults())) + ) + ); + } + public void inferRerank( Model model, String query, @@ -422,7 +448,7 @@ public void chunkedInfer( TimeValue timeout, ActionListener> listener ) { - if (TaskType.TEXT_EMBEDDING.isAnyOrSame(model.getTaskType()) == false) { + if ((TaskType.TEXT_EMBEDDING.equals(model.getTaskType()) || TaskType.SPARSE_EMBEDDING.equals(model.getTaskType())) == false) { listener.onFailure( new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(model.getTaskType(), NAME), RestStatus.BAD_REQUEST) ); @@ -464,6 +490,8 @@ private static List translateToChunkedResults(Li private static ChunkedInferenceServiceResults translateToChunkedResult(InferenceResults inferenceResult) { if (inferenceResult instanceof MlChunkedTextEmbeddingFloatResults mlChunkedResult) { return InferenceChunkedTextEmbeddingFloatResults.ofMlResults(mlChunkedResult); + } else if (inferenceResult instanceof MlChunkedTextExpansionResults mlChunkedResult) { + return InferenceChunkedSparseEmbeddingResults.ofMlResult(mlChunkedResult); } else if (inferenceResult instanceof ErrorInferenceResults error) { return new ErrorChunkedInferenceResults(error.getException()); } else { @@ -471,103 +499,6 @@ private static ChunkedInferenceServiceResults translateToChunkedResult(Inference } } - @Override - public void start(Model model, ActionListener listener) { - if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - return; - } - - if (model.getTaskType() != TaskType.TEXT_EMBEDDING && model.getTaskType() != TaskType.RERANK) { - listener.onFailure( - new IllegalStateException(TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), NAME)) - ); - return; - } - - var startRequest = ((ElasticsearchInternalModel) model).getStartTrainedModelDeploymentActionRequest(); - var responseListener = ((ElasticsearchInternalModel) model).getCreateTrainedModelAssignmentActionListener(model, listener); - - client.execute(StartTrainedModelDeploymentAction.INSTANCE, startRequest, responseListener); - } - - @Override - public void stop(String inferenceEntityId, ActionListener listener) { - var request = new StopTrainedModelDeploymentAction.Request(inferenceEntityId); - request.setForce(true); - client.execute( - StopTrainedModelDeploymentAction.INSTANCE, - request, - listener.delegateFailureAndWrap((delegatedResponseListener, response) -> delegatedResponseListener.onResponse(Boolean.TRUE)) - ); - } - - @Override - public void putModel(Model model, ActionListener listener) { - if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - return; - } else if (model instanceof MultilingualE5SmallModel e5Model) { - String modelId = e5Model.getServiceSettings().modelId(); - var input = new TrainedModelInput(List.of("text_field")); // by convention text_field is used - var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build(); - PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true); - executeAsyncWithOrigin( - client, - INFERENCE_ORIGIN, - PutTrainedModelAction.INSTANCE, - putRequest, - ActionListener.wrap(response -> listener.onResponse(Boolean.TRUE), e -> { - if (e instanceof ElasticsearchStatusException esException - && esException.getMessage().contains(PutTrainedModelAction.MODEL_ALREADY_EXISTS_ERROR_MESSAGE_FRAGMENT)) { - listener.onResponse(Boolean.TRUE); - } else { - listener.onFailure(e); - } - }) - ); - } else if (model instanceof CustomElandModel) { - logger.info("Custom eland model detected, model must have been already loaded into the cluster with eland."); - listener.onResponse(Boolean.TRUE); - } else { - listener.onFailure( - new IllegalArgumentException( - "Can not download model automatically for [" - + model.getConfigurations().getInferenceEntityId() - + "] you may need to download it through the trained models API or with eland." - ) - ); - return; - } - } - - @Override - public void isModelDownloaded(Model model, ActionListener listener) { - ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { - if (response.getResources().count() < 1) { - delegate.onResponse(Boolean.FALSE); - } else { - delegate.onResponse(Boolean.TRUE); - } - }); - - if (model.getServiceSettings() instanceof ElasticsearchInternalServiceSettings internalServiceSettings) { - String modelId = internalServiceSettings.modelId(); - GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); - executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); - } else if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - } else { - listener.onFailure( - new IllegalArgumentException( - "Unable to determine supported model for [" - + model.getConfigurations().getInferenceEntityId() - + "] please verify the request and submit a bug report if necessary." - ) - ); - } - } - @Override public TransportVersion getMinimalSupportedVersion() { return TransportVersions.V_8_14_0; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java index 775ddca160463..948117954a63f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java @@ -28,7 +28,6 @@ import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; -import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; @@ -43,8 +42,6 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; -import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; import static org.elasticsearch.xpack.inference.services.elser.ElserModels.ELSER_V2_MODEL; @@ -242,31 +239,6 @@ private void checkCompatibleTaskType(TaskType taskType) { } } - @Override - public void isModelDownloaded(Model model, ActionListener listener) { - ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { - if (response.getResources().count() < 1) { - delegate.onResponse(Boolean.FALSE); - } else { - delegate.onResponse(Boolean.TRUE); - } - }); - - if (model instanceof ElserInternalModel elserModel) { - String modelId = elserModel.getServiceSettings().modelId(); - GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); - executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); - } else { - listener.onFailure( - new IllegalArgumentException( - "Can not download model automatically for [" - + model.getConfigurations().getInferenceEntityId() - + "] you may need to download it through the trained models API or with eland." - ) - ); - } - } - private static ElserMlNodeTaskSettings taskSettingsFromMap(TaskType taskType, Map config) { if (taskType != TaskType.SPARSE_EMBEDDING) { throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index e6fd725a50198..257616033f080 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceResults; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InputType; @@ -31,6 +32,7 @@ import org.elasticsearch.xpack.core.action.util.QueryPage; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; @@ -39,8 +41,10 @@ import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; +import org.elasticsearch.xpack.core.ml.inference.results.InferenceChunkedTextExpansionResultsTests; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResultsTests; +import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; @@ -52,12 +56,10 @@ import org.mockito.Mockito; import java.util.ArrayList; -import java.util.Arrays; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -76,7 +78,6 @@ public class ElasticsearchInternalServiceTests extends ESTestCase { - TaskType taskType = TaskType.TEXT_EMBEDDING; String randomInferenceEntityId = randomAlphaOfLength(10); private static ThreadPool threadPool; @@ -92,7 +93,25 @@ public void shutdownThreadPool() { } public void testParseRequestConfig() { + var service = createService(mock(Client.class)); + var settings = new HashMap(); + settings.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) + ) + ); + ActionListener modelListener = ActionListener.wrap( + model -> fail("Model parsing should have failed"), + e -> assertThat(e, instanceOf(IllegalArgumentException.class)) + ); + + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); + service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); + } + + public void testParseRequestConfig_Misconfigured() { // Null model variant { var service = createService(mock(Client.class)); @@ -109,43 +128,10 @@ public void testParseRequestConfig() { e -> assertThat(e, instanceOf(IllegalArgumentException.class)) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } - // Valid model variant - { - var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElasticsearchInternalServiceSettings.NUM_THREADS, - 4, - ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID - ) - ) - ); - - var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( - 1, - 4, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, - null - ); - - service.parseRequestConfig( - randomInferenceEntityId, - taskType, - settings, - Set.of(), - getModelVerificationActionListener(e5ServiceSettings) - ); - } - // Invalid config map { var service = createService(mock(Client.class)); @@ -163,10 +149,12 @@ public void testParseRequestConfig() { e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } + } - // Invalid service settings + public void testParseRequestConfig_E5() { { var service = createService(mock(Client.class)); var settings = new HashMap(); @@ -179,52 +167,28 @@ public void testParseRequestConfig() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock - // the threadpool within the client - "not_a_valid_service_setting", - randomAlphaOfLength(10) + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID ) ) ); - ActionListener modelListener = ActionListener.wrap( - model -> fail("Model parsing should have failed"), - e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) - ); - - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); - } - - // Extra service settings - { - var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElasticsearchInternalServiceSettings.NUM_THREADS, - 4, - ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock - // the threadpool within the client - "extra_setting_that_should_not_be_here", - randomAlphaOfLength(10) - ) - ) + var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( + 1, + 4, + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + null ); - ActionListener modelListener = ActionListener.wrap( - model -> fail("Model parsing should have failed"), - e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) + service.parseRequestConfig( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + settings, + Set.of(), + getModelVerificationActionListener(e5ServiceSettings) ); - - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } - // Extra settings + // Invalid service settings { var service = createService(mock(Client.class)); var settings = new HashMap(); @@ -237,19 +201,19 @@ public void testParseRequestConfig() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID // we can't directly test the eland case until we mock - // the threadpool within the client + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + "not_a_valid_service_setting", + randomAlphaOfLength(10) ) ) ); - settings.put("extra_setting_that_should_not_be_here", randomAlphaOfLength(10)); ActionListener modelListener = ActionListener.wrap( model -> fail("Model parsing should have failed"), e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) ); - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); + service.parseRequestConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings, Set.of(), modelListener); } } @@ -342,10 +306,53 @@ public void testParseRequestConfig_Rerank_DefaultTaskSettings() { } } + @SuppressWarnings("unchecked") + public void testParseRequestConfig_SparseEmbedding() { + var client = mock(Client.class); + doAnswer(invocation -> { + var listener = (ActionListener) invocation.getArguments()[2]; + listener.onResponse( + new GetTrainedModelsAction.Response(new QueryPage<>(List.of(mock(TrainedModelConfig.class)), 1, mock(ParseField.class))) + ); + return null; + }).when(client).execute(Mockito.same(GetTrainedModelsAction.INSTANCE), any(), any()); + + when(client.threadPool()).thenReturn(threadPool); + + var service = createService(client); + var settings = new HashMap(); + settings.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of( + ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, + 1, + ElasticsearchInternalServiceSettings.NUM_THREADS, + 4, + ElasticsearchInternalServiceSettings.MODEL_ID, + "foo" + ) + ) + ); + + ActionListener modelListener = ActionListener.wrap(model -> { + assertThat(model, instanceOf(CustomElandModel.class)); + assertThat(model.getTaskSettings(), instanceOf(EmptyTaskSettings.class)); + assertThat(model.getServiceSettings(), instanceOf(CustomElandInternalServiceSettings.class)); + }, e -> { fail("Model parsing failed " + e.getMessage()); }); + + service.parseRequestConfig(randomInferenceEntityId, TaskType.SPARSE_EMBEDDING, settings, Set.of(), modelListener); + } + private ActionListener getModelVerificationActionListener(MultilingualE5SmallInternalServiceSettings e5ServiceSettings) { return ActionListener.wrap(model -> { assertEquals( - new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings), + new MultilingualE5SmallModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + e5ServiceSettings + ), model ); }, e -> { fail("Model parsing failed " + e.getMessage()); }); @@ -371,7 +378,10 @@ public void testParsePersistedConfig() { ) ); - expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); + expectThrows( + IllegalArgumentException.class, + () -> service.parsePersistedConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings) + ); } @@ -397,12 +407,17 @@ public void testParsePersistedConfig() { CustomElandEmbeddingModel parsedModel = (CustomElandEmbeddingModel) service.parsePersistedConfig( randomInferenceEntityId, - taskType, + TaskType.TEXT_EMBEDDING, settings ); var elandServiceSettings = new CustomElandInternalTextEmbeddingServiceSettings(1, 4, "invalid", null); assertEquals( - new CustomElandEmbeddingModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, elandServiceSettings), + new CustomElandEmbeddingModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + elandServiceSettings + ), parsedModel ); } @@ -436,11 +451,16 @@ public void testParsePersistedConfig() { MultilingualE5SmallModel parsedModel = (MultilingualE5SmallModel) service.parsePersistedConfig( randomInferenceEntityId, - taskType, + TaskType.TEXT_EMBEDDING, settings ); assertEquals( - new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings), + new MultilingualE5SmallModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + e5ServiceSettings + ), parsedModel ); } @@ -456,6 +476,8 @@ public void testParsePersistedConfig() { ) ); settings.put("not_a_valid_config_setting", randomAlphaOfLength(10)); + + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); } @@ -476,12 +498,13 @@ public void testParsePersistedConfig() { ) ) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); } } @SuppressWarnings("unchecked") - public void testChunkInfer() { + public void testChunkInfer_e5() { var mlTrainedModelResults = new ArrayList(); mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults()); mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults()); @@ -568,6 +591,63 @@ public void testChunkInfer() { assertTrue("Listener not called", gotResults.get()); } + @SuppressWarnings("unchecked") + public void testChunkInfer_Sparse() { + var mlTrainedModelResults = new ArrayList(); + mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); + mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); + mlTrainedModelResults.add(new ErrorInferenceResults(new RuntimeException("boom"))); + var response = new InferModelAction.Response(mlTrainedModelResults, "foo", true); + + ThreadPool threadpool = new TestThreadPool("test"); + Client client = mock(Client.class); + when(client.threadPool()).thenReturn(threadpool); + doAnswer(invocationOnMock -> { + var listener = (ActionListener) invocationOnMock.getArguments()[2]; + listener.onResponse(response); + return null; + }).when(client).execute(same(InferModelAction.INSTANCE), any(InferModelAction.Request.class), any(ActionListener.class)); + + var model = new CustomElandModel( + "foo", + TaskType.SPARSE_EMBEDDING, + "elasticsearch", + new ElasticsearchInternalServiceSettings(1, 1, "model-id", null) + ); + var service = createService(client); + + var gotResults = new AtomicBoolean(); + var resultsListener = ActionListener.>wrap(chunkedResponse -> { + assertThat(chunkedResponse, hasSize(3)); + assertThat(chunkedResponse.get(0), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); + var result1 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(0); + assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(0)).getChunks(), result1.getChunkedResults()); + assertThat(chunkedResponse.get(1), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); + var result2 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(1); + assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(1)).getChunks(), result2.getChunkedResults()); + var result3 = (ErrorChunkedInferenceResults) chunkedResponse.get(2); + assertThat(result3.getException(), instanceOf(RuntimeException.class)); + assertThat(result3.getException().getMessage(), containsString("boom")); + gotResults.set(true); + }, ESTestCase::fail); + + service.chunkedInfer( + model, + null, + List.of("foo", "bar"), + Map.of(), + InputType.SEARCH, + new ChunkingOptions(null, null), + InferenceAction.Request.DEFAULT_TIMEOUT, + ActionListener.runAfter(resultsListener, () -> terminate(threadpool)) + ); + + if (gotResults.get() == false) { + terminate(threadpool); + } + assertTrue("Listener not called", gotResults.get()); + } + @SuppressWarnings("unchecked") public void testChunkInferSetsTokenization() { var expectedSpan = new AtomicInteger(); @@ -711,7 +791,7 @@ public void testParseRequestConfigEland_PreservesTaskType() { ) ); - var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING)); + var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING)); CustomElandModel expectedModel = getCustomElandModel(taskType); PlainActionFuture listener = new PlainActionFuture<>(); @@ -739,6 +819,13 @@ private CustomElandModel getCustomElandModel(TaskType taskType) { ElasticsearchInternalService.NAME, serviceSettings ); + } else if (taskType == TaskType.SPARSE_EMBEDDING) { + expectedModel = new CustomElandModel( + randomInferenceEntityId, + taskType, + ElasticsearchInternalService.NAME, + new CustomElandInternalServiceSettings(1, 4, "custom-model", null) + ); } return expectedModel; } @@ -867,21 +954,4 @@ private ElasticsearchInternalService createService(Client client) { var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client); return new ElasticsearchInternalService(context); } - - public static Model randomModelConfig(String inferenceEntityId) { - List givenList = Arrays.asList("MultilingualE5SmallModel"); - Random rand = org.elasticsearch.common.Randomness.get(); - String model = givenList.get(rand.nextInt(givenList.size())); - - return switch (model) { - case "MultilingualE5SmallModel" -> new MultilingualE5SmallModel( - inferenceEntityId, - TaskType.TEXT_EMBEDDING, - ElasticsearchInternalService.NAME, - MultilingualE5SmallInternalServiceSettingsTests.createRandom() - ); - default -> throw new IllegalArgumentException("model " + model + " is not supported for testing"); - }; - } - } From 70261214641bfacada301bc5dd9392e0bc2a4594 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 29 Aug 2024 12:51:24 -0400 Subject: [PATCH 055/144] ESQL: Move Exchange stuff to NamedWriteable (#112236) This moves all of our `Exchange` style `PhysicalPlan`s to `NamedWriteable` to line up with the rest of Elasticsearch. --- .../xpack/esql/io/stream/PlanNamedTypes.java | 52 +----------- .../esql/plan/physical/ExchangeExec.java | 50 ++++++++++- .../esql/plan/physical/ExchangeSinkExec.java | 33 +++++++ .../plan/physical/ExchangeSourceExec.java | 26 ++++++ .../esql/plan/physical/PhysicalPlan.java | 11 ++- .../xpack/esql/planner/PlannerUtils.java | 4 +- .../LocalPhysicalPlanOptimizerTests.java | 2 +- .../optimizer/PhysicalPlanOptimizerTests.java | 2 +- .../plan/AbstractNodeSerializationTests.java | 7 ++ ...AbstractLogicalPlanSerializationTests.java | 5 -- ...bstractPhysicalPlanSerializationTests.java | 15 +++- .../ExchangeExecSerializationTests.java | 47 ++++++++++ .../ExchangeSinkExecSerializationTests.java | 85 +++++++------------ .../ExchangeSourceExecSerializationTests.java | 40 +++++++++ 14 files changed, 260 insertions(+), 119 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExecSerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExecSerializationTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java index af82ceb4bf809..56eab73cd042b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java @@ -92,14 +92,9 @@ public static List namedTypeEntries() { of(PhysicalPlan.class, EsSourceExec.ENTRY), of(PhysicalPlan.class, EvalExec.ENTRY), of(PhysicalPlan.class, EnrichExec.class, PlanNamedTypes::writeEnrichExec, PlanNamedTypes::readEnrichExec), - of(PhysicalPlan.class, ExchangeExec.class, PlanNamedTypes::writeExchangeExec, PlanNamedTypes::readExchangeExec), - of(PhysicalPlan.class, ExchangeSinkExec.class, PlanNamedTypes::writeExchangeSinkExec, PlanNamedTypes::readExchangeSinkExec), - of( - PhysicalPlan.class, - ExchangeSourceExec.class, - PlanNamedTypes::writeExchangeSourceExec, - PlanNamedTypes::readExchangeSourceExec - ), + of(PhysicalPlan.class, ExchangeExec.ENTRY), + of(PhysicalPlan.class, ExchangeSinkExec.ENTRY), + of(PhysicalPlan.class, ExchangeSourceExec.ENTRY), of(PhysicalPlan.class, FieldExtractExec.class, PlanNamedTypes::writeFieldExtractExec, PlanNamedTypes::readFieldExtractExec), of(PhysicalPlan.class, FilterExec.class, PlanNamedTypes::writeFilterExec, PlanNamedTypes::readFilterExec), of(PhysicalPlan.class, FragmentExec.class, PlanNamedTypes::writeFragmentExec, PlanNamedTypes::readFragmentExec), @@ -174,47 +169,6 @@ static void writeEnrichExec(PlanStreamOutput out, EnrichExec enrich) throws IOEx out.writeNamedWriteableCollection(enrich.enrichFields()); } - static ExchangeExec readExchangeExec(PlanStreamInput in) throws IOException { - return new ExchangeExec( - Source.readFrom(in), - in.readNamedWriteableCollectionAsList(Attribute.class), - in.readBoolean(), - in.readPhysicalPlanNode() - ); - } - - static void writeExchangeExec(PlanStreamOutput out, ExchangeExec exchangeExec) throws IOException { - Source.EMPTY.writeTo(out); - out.writeNamedWriteableCollection(exchangeExec.output()); - out.writeBoolean(exchangeExec.isInBetweenAggs()); - out.writePhysicalPlanNode(exchangeExec.child()); - } - - static ExchangeSinkExec readExchangeSinkExec(PlanStreamInput in) throws IOException { - return new ExchangeSinkExec( - Source.readFrom(in), - in.readNamedWriteableCollectionAsList(Attribute.class), - in.readBoolean(), - in.readPhysicalPlanNode() - ); - } - - static void writeExchangeSinkExec(PlanStreamOutput out, ExchangeSinkExec exchangeSinkExec) throws IOException { - Source.EMPTY.writeTo(out); - out.writeNamedWriteableCollection(exchangeSinkExec.output()); - out.writeBoolean(exchangeSinkExec.isIntermediateAgg()); - out.writePhysicalPlanNode(exchangeSinkExec.child()); - } - - static ExchangeSourceExec readExchangeSourceExec(PlanStreamInput in) throws IOException { - return new ExchangeSourceExec(Source.readFrom(in), in.readNamedWriteableCollectionAsList(Attribute.class), in.readBoolean()); - } - - static void writeExchangeSourceExec(PlanStreamOutput out, ExchangeSourceExec exchangeSourceExec) throws IOException { - out.writeNamedWriteableCollection(exchangeSourceExec.output()); - out.writeBoolean(exchangeSourceExec.isIntermediateAgg()); - } - static FieldExtractExec readFieldExtractExec(PlanStreamInput in) throws IOException { return new FieldExtractExec(Source.readFrom(in), in.readPhysicalPlanNode(), in.readNamedWriteableCollectionAsList(Attribute.class)); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java index 61c65c484059e..f20b218f28efb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExec.java @@ -7,15 +7,27 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; +import java.io.IOException; import java.util.List; +import java.util.Objects; import static java.util.Collections.emptyList; public class ExchangeExec extends UnaryExec { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + PhysicalPlan.class, + "ExchangeExec", + ExchangeExec::new + ); private final List output; private final boolean inBetweenAggs; @@ -30,12 +42,34 @@ public ExchangeExec(Source source, List output, boolean inBetweenAggs this.inBetweenAggs = inBetweenAggs; } + private ExchangeExec(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + in.readNamedWriteableCollectionAsList(Attribute.class), + in.readBoolean(), + ((PlanStreamInput) in).readPhysicalPlanNode() + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + Source.EMPTY.writeTo(out); + out.writeNamedWriteableCollection(output); + out.writeBoolean(inBetweenAggs()); + ((PlanStreamOutput) out).writePhysicalPlanNode(child()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + @Override public List output() { return output.isEmpty() ? super.output() : output; } - public boolean isInBetweenAggs() { + public boolean inBetweenAggs() { return inBetweenAggs; } @@ -48,4 +82,18 @@ public UnaryExec replaceChild(PhysicalPlan newChild) { protected NodeInfo info() { return NodeInfo.create(this, ExchangeExec::new, output, inBetweenAggs, child()); } + + @Override + public boolean equals(Object obj) { + if (super.equals(obj) == false) { + return false; + } + ExchangeExec other = (ExchangeExec) obj; + return output.equals(other.output) && inBetweenAggs == other.inBetweenAggs; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), output, inBetweenAggs); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExec.java index 2f7c4a93eec71..2992619da75ef 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExec.java @@ -7,14 +7,25 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; +import java.io.IOException; import java.util.List; import java.util.Objects; public class ExchangeSinkExec extends UnaryExec { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + PhysicalPlan.class, + "ExchangeSinkExec", + ExchangeSinkExec::new + ); private final List output; private final boolean intermediateAgg; @@ -25,6 +36,28 @@ public ExchangeSinkExec(Source source, List output, boolean intermedi this.intermediateAgg = intermediateAgg; } + private ExchangeSinkExec(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + in.readNamedWriteableCollectionAsList(Attribute.class), + in.readBoolean(), + ((PlanStreamInput) in).readPhysicalPlanNode() + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + Source.EMPTY.writeTo(out); + out.writeNamedWriteableCollection(output()); + out.writeBoolean(isIntermediateAgg()); + ((PlanStreamOutput) out).writePhysicalPlanNode(child()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + @Override public List output() { return output; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExec.java index 44c9b38feee48..e679a9b131d1d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExec.java @@ -7,14 +7,24 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import java.io.IOException; import java.util.List; import java.util.Objects; public class ExchangeSourceExec extends LeafExec { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + PhysicalPlan.class, + "ExchangeSourceExec", + ExchangeSourceExec::new + ); private final List output; private final boolean intermediateAgg; @@ -25,6 +35,22 @@ public ExchangeSourceExec(Source source, List output, boolean interme this.intermediateAgg = intermediateAgg; } + private ExchangeSourceExec(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteableCollectionAsList(Attribute.class), in.readBoolean()); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteableCollection(output()); + out.writeBoolean(isIntermediateAgg()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + @Override public List output() { return output; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java index 60e44a5140dfa..6fcdf573564ff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/PhysicalPlan.java @@ -23,7 +23,16 @@ */ public abstract class PhysicalPlan extends QueryPlan { public static List getNamedWriteables() { - return List.of(AggregateExec.ENTRY, DissectExec.ENTRY, EsQueryExec.ENTRY, EsSourceExec.ENTRY, EvalExec.ENTRY); + return List.of( + AggregateExec.ENTRY, + DissectExec.ENTRY, + EsQueryExec.ENTRY, + EsSourceExec.ENTRY, + EvalExec.ENTRY, + ExchangeExec.ENTRY, + ExchangeSinkExec.ENTRY, + ExchangeSourceExec.ENTRY + ); } public PhysicalPlan(Source source, List children) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index 7187de4676fe7..29be49e60ad37 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -73,9 +73,9 @@ public static Tuple breakPlanBetweenCoordinatorAndDa PhysicalPlan coordinatorPlan = plan.transformUp(ExchangeExec.class, e -> { // remember the datanode subplan and wire it to a sink var subplan = e.child(); - dataNodePlan.set(new ExchangeSinkExec(e.source(), e.output(), e.isInBetweenAggs(), subplan)); + dataNodePlan.set(new ExchangeSinkExec(e.source(), e.output(), e.inBetweenAggs(), subplan)); - return new ExchangeSourceExec(e.source(), e.output(), e.isInBetweenAggs()); + return new ExchangeSourceExec(e.source(), e.output(), e.inBetweenAggs()); }); return new Tuple<>(coordinatorPlan, dataNodePlan.get()); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 5fba11c13561c..ee66e3043b62f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -504,7 +504,7 @@ public boolean exists(String field) { assertThat(agg.getMode(), is(FINAL)); assertThat(Expressions.names(agg.aggregates()), contains("c")); var exchange = as(agg.child(), ExchangeExec.class); - assertThat(exchange.isInBetweenAggs(), is(true)); + assertThat(exchange.inBetweenAggs(), is(true)); var localSource = as(exchange.child(), LocalSourceExec.class); assertThat(Expressions.names(localSource.output()), contains("count", "seen")); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 7dfa36011bf1f..5b8a465b05f1c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -4603,7 +4603,7 @@ static PhysicalPlan localRelationshipAlignment(PhysicalPlan l) { // handle local reduction alignment return l.transformUp(ExchangeExec.class, exg -> { PhysicalPlan pl = exg; - if (exg.isInBetweenAggs() && exg.child() instanceof LocalSourceExec lse) { + if (exg.inBetweenAggs() && exg.child() instanceof LocalSourceExec lse) { var output = exg.output(); if (lse.output().equals(output) == false) { pl = exg.replaceChild(new LocalSourceExec(lse.source(), output, lse.supplier())); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/AbstractNodeSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/AbstractNodeSerializationTests.java index 40dc05dd64cc0..e6f5d6e4fac70 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/AbstractNodeSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/AbstractNodeSerializationTests.java @@ -9,8 +9,10 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.test.AbstractWireTestCase; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; import org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; @@ -18,6 +20,7 @@ import org.junit.Before; import java.io.IOException; +import java.util.List; import java.util.Map; import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomConfiguration; @@ -44,6 +47,10 @@ public static Source randomSource() { return new Source(lineNumber + 1, offset, text); } + public static List randomFieldAttributes(int min, int max, boolean onlyRepresentable) { + return randomList(min, max, () -> FieldAttributeTests.createFieldAttribute(0, onlyRepresentable)); + } + @Override protected final T copyInstance(T instance, TransportVersion version) throws IOException { return copyInstance( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java index 1b9df46a1c842..6936c96a143d4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java @@ -13,7 +13,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Node; -import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.plan.AbstractNodeSerializationTests; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelationSerializationTests; @@ -30,10 +29,6 @@ public static LogicalPlan randomChild(int depth) { return randomBoolean() ? EsRelationSerializationTests.randomEsRelation() : LocalRelationSerializationTests.randomLocalRelation(); } - public static List randomFieldAttributes(int min, int max, boolean onlyRepresentable) { - return randomList(min, max, () -> FieldAttributeTests.createFieldAttribute(0, onlyRepresentable)); - } - @Override protected final NamedWriteableRegistry getNamedWriteableRegistry() { List entries = new ArrayList<>(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java index 2a05c472328e5..4b74114a0e01c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/AbstractPhysicalPlanSerializationTests.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.plan.AbstractNodeSerializationTests; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import java.util.ArrayList; import java.util.List; @@ -25,14 +26,21 @@ import static org.elasticsearch.xpack.esql.plan.physical.AggregateExecSerializationTests.randomAggregateExec; import static org.elasticsearch.xpack.esql.plan.physical.DissectExecSerializationTests.randomDissectExec; import static org.elasticsearch.xpack.esql.plan.physical.EsSourceExecSerializationTests.randomEsSourceExec; +import static org.elasticsearch.xpack.esql.plan.physical.ExchangeExecSerializationTests.randomExchangeExec; +import static org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExecSerializationTests.randomExchangeSinkExec; +import static org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExecSerializationTests.randomExchangeSourceExec; public abstract class AbstractPhysicalPlanSerializationTests extends AbstractNodeSerializationTests { public static PhysicalPlan randomChild(int depth) { if (randomBoolean() && depth < 4) { - // TODO more random options - return randomBoolean() ? randomDissectExec(depth + 1) : randomAggregateExec(depth + 1); + switch (between(0, 3)) { + case 0 -> randomDissectExec(depth + 1); + case 1 -> randomExchangeExec(depth + 1); + case 2 -> randomExchangeSinkExec(depth + 1); + case 3 -> randomAggregateExec(depth + 1); + } } - return randomEsSourceExec(); + return randomBoolean() ? randomExchangeSourceExec() : randomEsSourceExec(); } public static Integer randomEstimatedRowSize() { @@ -43,6 +51,7 @@ public static Integer randomEstimatedRowSize() { protected final NamedWriteableRegistry getNamedWriteableRegistry() { List entries = new ArrayList<>(); entries.addAll(PhysicalPlan.getNamedWriteables()); + entries.addAll(LogicalPlan.getNamedWriteables()); entries.addAll(AggregateFunction.getNamedWriteables()); entries.addAll(Expression.getNamedWriteables()); entries.addAll(Attribute.getNamedWriteables()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExecSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExecSerializationTests.java new file mode 100644 index 0000000000000..572eac0ff409c --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeExecSerializationTests.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan.physical; + +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.tree.Source; + +import java.io.IOException; +import java.util.List; + +public class ExchangeExecSerializationTests extends AbstractPhysicalPlanSerializationTests { + static ExchangeExec randomExchangeExec(int depth) { + Source source = randomSource(); + List output = randomFieldAttributes(1, 5, false); + boolean inBetweenAggs = randomBoolean(); + PhysicalPlan child = randomChild(depth); + return new ExchangeExec(source, output, inBetweenAggs, child); + } + + @Override + protected ExchangeExec createTestInstance() { + return randomExchangeExec(0); + } + + @Override + protected ExchangeExec mutateInstance(ExchangeExec instance) throws IOException { + List output = instance.output(); + boolean inBetweenAggs = instance.inBetweenAggs(); + PhysicalPlan child = instance.child(); + switch (between(0, 2)) { + case 0 -> output = randomValueOtherThan(output, () -> randomFieldAttributes(1, 5, false)); + case 1 -> inBetweenAggs = false == inBetweenAggs; + case 2 -> child = randomValueOtherThan(child, () -> randomChild(0)); + } + return new ExchangeExec(instance.source(), output, inBetweenAggs, child); + } + + @Override + protected boolean alwaysEmptySource() { + return true; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExecSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExecSerializationTests.java index ae58c49eade17..be30ac20df64e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExecSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSinkExecSerializationTests.java @@ -8,21 +8,13 @@ package org.elasticsearch.xpack.esql.plan.physical; import org.elasticsearch.common.io.stream.BytesStreamOutput; -import org.elasticsearch.common.io.stream.NamedWriteableRegistry; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.compute.data.Block; import org.elasticsearch.index.IndexMode; -import org.elasticsearch.search.SearchModule; -import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.analysis.Analyzer; import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.EsIndexSerializationTests; import org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry; @@ -30,31 +22,44 @@ import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.Limit; -import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.Project; -import org.elasticsearch.xpack.esql.session.Configuration; -import org.junit.Before; import java.io.IOException; -import java.util.ArrayList; import java.util.List; -import java.util.Map; import static org.elasticsearch.test.ByteSizeEqualsMatcher.byteSizeEquals; -import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomConfiguration; import static org.hamcrest.Matchers.equalTo; -public class ExchangeSinkExecSerializationTests extends ESTestCase { - // TODO port this to AbstractPhysicalPlanSerializationTests when implementing NamedWriteable - private Configuration config; +public class ExchangeSinkExecSerializationTests extends AbstractPhysicalPlanSerializationTests { + static ExchangeSinkExec randomExchangeSinkExec(int depth) { + Source source = randomSource(); + List output = randomFieldAttributes(1, 5, false); + boolean intermediateAgg = randomBoolean(); + PhysicalPlan child = randomChild(depth); + return new ExchangeSinkExec(source, output, intermediateAgg, child); + } + + @Override + protected ExchangeSinkExec createTestInstance() { + return randomExchangeSinkExec(0); + } - public static Source randomSource() { - int lineNumber = between(0, EXAMPLE_QUERY.length - 1); - String line = EXAMPLE_QUERY[lineNumber]; - int offset = between(0, line.length() - 2); - int length = between(1, line.length() - offset - 1); - String text = line.substring(offset, offset + length); - return new Source(lineNumber + 1, offset, text); + @Override + protected ExchangeSinkExec mutateInstance(ExchangeSinkExec instance) throws IOException { + List output = instance.output(); + boolean intermediateAgg = instance.isIntermediateAgg(); + PhysicalPlan child = instance.child(); + switch (between(0, 2)) { + case 0 -> output = randomValueOtherThan(output, () -> randomFieldAttributes(1, 5, false)); + case 1 -> intermediateAgg = false == intermediateAgg; + case 2 -> child = randomValueOtherThan(child, () -> randomChild(0)); + } + return new ExchangeSinkExec(instance.source(), output, intermediateAgg, child); + } + + @Override + protected boolean alwaysEmptySource() { + return true; } /** @@ -128,36 +133,4 @@ private void testManyTypeConflicts(boolean withParent, ByteSizeValue expected) t } } } - - private NamedWriteableRegistry getNamedWriteableRegistry() { - List entries = new ArrayList<>(); - entries.addAll(PhysicalPlan.getNamedWriteables()); - entries.addAll(LogicalPlan.getNamedWriteables()); - entries.addAll(AggregateFunction.getNamedWriteables()); - entries.addAll(Expression.getNamedWriteables()); - entries.addAll(Attribute.getNamedWriteables()); - entries.addAll(Block.getNamedWriteables()); - entries.addAll(NamedExpression.getNamedWriteables()); - entries.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables()); - return new NamedWriteableRegistry(entries); - } - - private Configuration configuration() { - return config; - } - - private static final String[] EXAMPLE_QUERY = new String[] { - "I am the very model of a modern Major-Gineral,", - "I've information vegetable, animal, and mineral,", - "I know the kings of England, and I quote the fights historical", - "From Marathon to Waterloo, in order categorical;", - "I'm very well acquainted, too, with matters mathematical,", - "I understand equations, both the simple and quadratical,", - "About binomial theorem I'm teeming with a lot o' news,", - "With many cheerful facts about the square of the hypotenuse." }; - - @Before - public void initConfig() { - config = randomConfiguration(String.join("\n", EXAMPLE_QUERY), Map.of()); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExecSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExecSerializationTests.java new file mode 100644 index 0000000000000..a1a890f3addd6 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/ExchangeSourceExecSerializationTests.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan.physical; + +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.tree.Source; + +import java.io.IOException; +import java.util.List; + +public class ExchangeSourceExecSerializationTests extends AbstractPhysicalPlanSerializationTests { + static ExchangeSourceExec randomExchangeSourceExec() { + Source source = randomSource(); + List output = randomFieldAttributes(1, 5, false); + boolean intermediateAgg = randomBoolean(); + return new ExchangeSourceExec(source, output, intermediateAgg); + } + + @Override + protected ExchangeSourceExec createTestInstance() { + return randomExchangeSourceExec(); + } + + @Override + protected ExchangeSourceExec mutateInstance(ExchangeSourceExec instance) throws IOException { + List output = instance.output(); + boolean intermediateAgg = instance.isIntermediateAgg(); + if (randomBoolean()) { + output = randomValueOtherThan(output, () -> randomFieldAttributes(1, 5, false)); + } else { + intermediateAgg = false == intermediateAgg; + } + return new ExchangeSourceExec(instance.source(), output, intermediateAgg); + } +} From 84bd31eb8eba90d568cf213cdef475d8d03c2bdd Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Thu, 29 Aug 2024 10:20:43 -0700 Subject: [PATCH 056/144] Fix shutdown race condition in server start (#112300) When the server CLI is preparing to start Elasticsearch, it may receive a SIGTERM. That signal causes the close method of the CLI to be invoked, which checks for a non-null server process, and stops it if it exists. Since shutdown occurs in a different thread, it is possible close is invoked, sees the server process is null, so skips calling close, but then before close finishes the server process is started. Normally the above case is ok; the close method will cause Java to exit, and the child process will be sent a SIGTERM. However, in the case of serverless, close is wrapped so that the CLI can wait on uploading possible heapdumps. This presents the possibility that close thinks the child process has been stopped, but it hasn't, so the wrapped close waits indefinitely on the child process that was started up during the race. This commit guards against the startup race by locking the critical sections that create the child process and stop it. Either the child process is created first, and will then be stopped immediately, or close is invoked first, marking that the CLI is shutting down, and creating the child process will be skipped. --- .../elasticsearch/server/cli/ServerCli.java | 19 ++++- .../server/cli/ServerCliTests.java | 78 +++++++++++++++---- 2 files changed, 81 insertions(+), 16 deletions(-) diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java index 7b904d4cb5a89..bea7fbb7f63e8 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/ServerCli.java @@ -32,6 +32,7 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.Locale; +import java.util.concurrent.atomic.AtomicBoolean; /** * The main CLI for running Elasticsearch. @@ -44,6 +45,8 @@ class ServerCli extends EnvironmentAwareCommand { private final OptionSpecBuilder quietOption; private final OptionSpec enrollmentTokenOption; + // flag for indicating shutdown has begun. we use an AtomicBoolean to double as a synchronization object + private final AtomicBoolean shuttingDown = new AtomicBoolean(false); private volatile ServerProcess server; // visible for testing @@ -98,7 +101,14 @@ public void execute(Terminal terminal, OptionSet options, Environment env, Proce syncPlugins(terminal, env, processInfo); ServerArgs args = createArgs(options, env, secrets, processInfo); - this.server = startServer(terminal, processInfo, args); + synchronized (shuttingDown) { + // if we are shutting down there is no reason to start the server + if (shuttingDown.get()) { + terminal.println("CLI is shutting down, skipping starting server process"); + return; + } + this.server = startServer(terminal, processInfo, args); + } } if (options.has(daemonizeOption)) { @@ -233,8 +243,11 @@ private ServerArgs createArgs(OptionSet options, Environment env, SecureSettings @Override public void close() throws IOException { - if (server != null) { - server.stop(); + synchronized (shuttingDown) { + shuttingDown.set(true); + if (server != null) { + server.stop(); + } } } diff --git a/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/ServerCliTests.java b/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/ServerCliTests.java index 38a64a778fc27..e603790051c0c 100644 --- a/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/ServerCliTests.java +++ b/distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/ServerCliTests.java @@ -36,6 +36,8 @@ import java.util.List; import java.util.Locale; import java.util.Optional; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; @@ -50,6 +52,7 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.matchesRegex; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.sameInstance; public class ServerCliTests extends CommandTestCase { @@ -383,6 +386,52 @@ public void testSecureSettingsLoaderWithNullPassword() throws Exception { assertEquals("", loader.password); } + public void testProcessCreationRace() throws Exception { + for (int i = 0; i < 10; ++i) { + CyclicBarrier raceStart = new CyclicBarrier(2); + TestServerCli cli = new TestServerCli() { + @Override + void syncPlugins(Terminal terminal, Environment env, ProcessInfo processInfo) throws Exception { + super.syncPlugins(terminal, env, processInfo); + raceStart.await(); + } + + @Override + public void close() throws IOException { + try { + raceStart.await(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new AssertionError(ie); + } catch (BrokenBarrierException e) { + throw new AssertionError(e); + } + super.close(); + } + }; + Thread closeThread = new Thread(() -> { + try { + cli.close(); + } catch (IOException e) { + throw new AssertionError(e); + } + }); + closeThread.start(); + cli.main(new String[] {}, terminal, new ProcessInfo(sysprops, envVars, esHomeDir)); + closeThread.join(); + + if (cli.getServer() == null) { + // close won the race, so server should never have been started + assertThat(cli.startServerCalled, is(false)); + } else { + // creation won the race, so check we correctly waited on it and stopped + assertThat(cli.getServer(), sameInstance(mockServer)); + assertThat(mockServer.waitForCalled, is(true)); + assertThat(mockServer.stopCalled, is(true)); + } + } + } + private MockSecureSettingsLoader loadWithMockSecureSettingsLoader() throws Exception { var loader = new MockSecureSettingsLoader(); this.mockSecureSettingsLoader = loader; @@ -465,9 +514,9 @@ public void execute(Terminal terminal, OptionSet options, Environment env, Proce } private class MockServerProcess extends ServerProcess { - boolean detachCalled = false; - boolean waitForCalled = false; - boolean stopCalled = false; + volatile boolean detachCalled = false; + volatile boolean waitForCalled = false; + volatile boolean stopCalled = false; MockServerProcess() { super(null, null); @@ -505,6 +554,8 @@ void reset() { } private class TestServerCli extends ServerCli { + boolean startServerCalled = false; + @Override protected Command loadTool(String toolname, String libs) { if (toolname.equals("auto-configure-node")) { @@ -551,20 +602,21 @@ protected SecureSettingsLoader secureSettingsLoader(Environment env) { return new KeystoreSecureSettingsLoader(); } + + @Override + protected ServerProcess startServer(Terminal terminal, ProcessInfo processInfo, ServerArgs args) throws Exception { + startServerCalled = true; + if (argsValidator != null) { + argsValidator.accept(args); + } + mockServer.reset(); + return mockServer; + } } @Override protected Command newCommand() { - return new TestServerCli() { - @Override - protected ServerProcess startServer(Terminal terminal, ProcessInfo processInfo, ServerArgs args) { - if (argsValidator != null) { - argsValidator.accept(args); - } - mockServer.reset(); - return mockServer; - } - }; + return new TestServerCli(); } static class MockSecureSettingsLoader implements SecureSettingsLoader { From bc1706f47906ec52fb7cf08a53d5ee002f8c94eb Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 29 Aug 2024 11:29:24 -0700 Subject: [PATCH 057/144] Add index mode to internal field-caps response (#112368) We need the index mode from resolved indices for the METRICS command and future LOGS command in ES|QL. This change adds the index mode to the internal field-caps index response, which is not user-facing. ES|QL will use this output, and the overhead should be minimal, as we serialize one index_mode per mapping_hash group. --- .../search/fieldcaps/FieldCapabilitiesIT.java | 58 +++++++++++++++++++ .../org/elasticsearch/TransportVersions.java | 2 + .../fieldcaps/FieldCapabilitiesFetcher.java | 10 ++-- .../FieldCapabilitiesIndexResponse.java | 31 ++++++++-- .../TransportFieldCapabilitiesAction.java | 24 +++++++- .../org/elasticsearch/index/IndexMode.java | 21 +++++++ .../FieldCapabilitiesIndexResponseTests.java | 7 ++- .../FieldCapabilitiesNodeResponseTests.java | 36 +++++++++--- .../FieldCapabilitiesResponseTests.java | 12 ++-- .../fieldcaps/RequestDispatcherTests.java | 9 ++- .../xpack/esql/analysis/AnalyzerTests.java | 4 +- .../enrich/EnrichPolicyResolverTests.java | 3 +- .../esql/stats/PlanExecutorMetricsTests.java | 4 +- .../esql/type/EsqlDataTypeRegistryTests.java | 4 +- 14 files changed, 194 insertions(+), 31 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java index 0bce9ecb178d0..cc272042d5384 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java @@ -89,6 +89,7 @@ import static org.hamcrest.Matchers.array; import static org.hamcrest.Matchers.arrayContainingInAnyOrder; import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasKey; @@ -711,6 +712,63 @@ public void testCancel() throws Exception { } } + public void testIndexMode() throws Exception { + Map indexModes = new HashMap<>(); + // metrics + { + final String metricsMapping = """ + { + "properties": { + "@timestamp": { "type": "date" }, + "hostname": { "type": "keyword", "time_series_dimension": true }, + "request_count" : { "type" : "long", "time_series_metric" : "counter" }, + "cluster": {"type": "keyword"} + } + } + """; + Settings settings = Settings.builder().put("mode", "time_series").putList("routing_path", List.of("hostname")).build(); + int numIndices = between(1, 5); + for (int i = 0; i < numIndices; i++) { + assertAcked(indicesAdmin().prepareCreate("test_metrics_" + i).setSettings(settings).setMapping(metricsMapping).get()); + indexModes.put("test_metrics_" + i, IndexMode.TIME_SERIES); + assertAcked(indicesAdmin().prepareCreate("test_old_metrics_" + i).setMapping(metricsMapping).get()); + indexModes.put("test_old_metrics_" + i, IndexMode.STANDARD); + } + } + // logsdb + { + final String logsMapping = """ + { + "properties": { + "@timestamp": { "type": "date" }, + "hostname": { "type": "keyword"}, + "request_count" : { "type" : "long"}, + "cluster": {"type": "keyword"} + } + } + """; + Settings settings = Settings.builder().put("mode", "logsdb").build(); + int numIndices = between(1, 5); + for (int i = 0; i < numIndices; i++) { + assertAcked(indicesAdmin().prepareCreate("test_logs_" + i).setSettings(settings).setMapping(logsMapping).get()); + indexModes.put("test_logs_" + i, IndexMode.LOGSDB); + assertAcked(indicesAdmin().prepareCreate("test_old_logs_" + i).setMapping(logsMapping).get()); + indexModes.put("test_old_logs_" + i, IndexMode.STANDARD); + } + } + FieldCapabilitiesRequest request = new FieldCapabilitiesRequest(); + request.setMergeResults(false); + request.indices("test_*"); + request.fields(randomFrom("*", "@timestamp", "host*")); + var resp = client().fieldCaps(request).get(); + assertThat(resp.getFailures(), empty()); + Map actualIndexModes = new HashMap<>(); + for (var indexResp : resp.getIndexResponses()) { + actualIndexModes.put(indexResp.getIndexName(), indexResp.getIndexMode()); + } + assertThat(actualIndexModes, equalTo(indexModes)); + } + private void assertIndices(FieldCapabilitiesResponse response, String... indices) { assertNotNull(response.getIndices()); Arrays.sort(indices); diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 5963e3c8058de..7581ee7649696 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -202,6 +202,8 @@ static TransportVersion def(int id) { public static final TransportVersion REPOSITORIES_TELEMETRY = def(8_732_00_0); public static final TransportVersion ML_INFERENCE_ALIBABACLOUD_SEARCH_ADDED = def(8_733_00_0); + public static final TransportVersion FIELD_CAPS_RESPONSE_INDEX_MODE = def(8_734_00_0); + /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java index 51cb05f981177..e435655668882 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java @@ -108,15 +108,15 @@ private FieldCapabilitiesIndexResponse doFetch( null, runtimeFields ); - + var indexMode = searchExecutionContext.getIndexSettings().getMode(); if (searcher != null && canMatchShard(shardId, indexFilter, nowInMillis, searchExecutionContext) == false) { - return new FieldCapabilitiesIndexResponse(shardId.getIndexName(), null, Collections.emptyMap(), false); + return new FieldCapabilitiesIndexResponse(shardId.getIndexName(), null, Collections.emptyMap(), false, indexMode); } final MappingMetadata mapping = indexService.getMetadata().mapping(); String indexMappingHash; if (includeEmptyFields || enableFieldHasValue == false) { - indexMappingHash = mapping != null ? mapping.getSha256() : null; + indexMappingHash = mapping != null ? mapping.getSha256() + indexMode : null; } else { // even if the mapping is the same if we return only fields with values we need // to make sure that we consider all the shard-mappings pair, that is why we @@ -129,7 +129,7 @@ private FieldCapabilitiesIndexResponse doFetch( indexMappingHash = fieldPredicate.modifyHash(indexMappingHash); final Map existing = indexMappingHashToResponses.get(indexMappingHash); if (existing != null) { - return new FieldCapabilitiesIndexResponse(shardId.getIndexName(), indexMappingHash, existing, true); + return new FieldCapabilitiesIndexResponse(shardId.getIndexName(), indexMappingHash, existing, true, indexMode); } } task.ensureNotCancelled(); @@ -145,7 +145,7 @@ private FieldCapabilitiesIndexResponse doFetch( if (indexMappingHash != null) { indexMappingHashToResponses.put(indexMappingHash, responseMap); } - return new FieldCapabilitiesIndexResponse(shardId.getIndexName(), indexMappingHash, responseMap, true); + return new FieldCapabilitiesIndexResponse(shardId.getIndexName(), indexMappingHash, responseMap, true, indexMode); } static Map retrieveFieldCaps( diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponse.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponse.java index cc72dd80dceac..5a50ed4c9f573 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponse.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponse.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.IndexMode; import java.io.IOException; import java.util.ArrayList; @@ -33,18 +34,21 @@ public final class FieldCapabilitiesIndexResponse implements Writeable { private final Map responseMap; private final boolean canMatch; private final transient TransportVersion originVersion; + private final IndexMode indexMode; public FieldCapabilitiesIndexResponse( String indexName, @Nullable String indexMappingHash, Map responseMap, - boolean canMatch + boolean canMatch, + IndexMode indexMode ) { this.indexName = indexName; this.indexMappingHash = indexMappingHash; this.responseMap = responseMap; this.canMatch = canMatch; this.originVersion = TransportVersion.current(); + this.indexMode = indexMode; } FieldCapabilitiesIndexResponse(StreamInput in) throws IOException { @@ -57,6 +61,11 @@ public FieldCapabilitiesIndexResponse( } else { this.indexMappingHash = null; } + if (in.getTransportVersion().onOrAfter(TransportVersions.FIELD_CAPS_RESPONSE_INDEX_MODE)) { + this.indexMode = IndexMode.readFrom(in); + } else { + this.indexMode = IndexMode.STANDARD; + } } @Override @@ -67,9 +76,12 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(MAPPING_HASH_VERSION)) { out.writeOptionalString(indexMappingHash); } + if (out.getTransportVersion().onOrAfter(TransportVersions.FIELD_CAPS_RESPONSE_INDEX_MODE)) { + IndexMode.writeTo(indexMode, out); + } } - private record CompressedGroup(String[] indices, String mappingHash, int[] fields) {} + private record CompressedGroup(String[] indices, IndexMode indexMode, String mappingHash, int[] fields) {} static List readList(StreamInput input) throws IOException { if (input.getTransportVersion().before(MAPPING_HASH_VERSION)) { @@ -92,10 +104,12 @@ static List readList(StreamInput input) throws I private static void collectCompressedResponses(StreamInput input, int groups, ArrayList responses) throws IOException { final CompressedGroup[] compressedGroups = new CompressedGroup[groups]; + final boolean readIndexMode = input.getTransportVersion().onOrAfter(TransportVersions.FIELD_CAPS_RESPONSE_INDEX_MODE); for (int i = 0; i < groups; i++) { final String[] indices = input.readStringArray(); + final IndexMode indexMode = readIndexMode ? IndexMode.readFrom(input) : IndexMode.STANDARD; final String mappingHash = input.readString(); - compressedGroups[i] = new CompressedGroup(indices, mappingHash, input.readIntArray()); + compressedGroups[i] = new CompressedGroup(indices, indexMode, mappingHash, input.readIntArray()); } final IndexFieldCapabilities[] ifcLookup = input.readArray(IndexFieldCapabilities::readFrom, IndexFieldCapabilities[]::new); for (CompressedGroup compressedGroup : compressedGroups) { @@ -105,7 +119,7 @@ private static void collectCompressedResponses(StreamInput input, int groups, Ar ifc.put(val.name(), val); } for (String index : compressedGroup.indices) { - responses.add(new FieldCapabilitiesIndexResponse(index, compressedGroup.mappingHash, ifc, true)); + responses.add(new FieldCapabilitiesIndexResponse(index, compressedGroup.mappingHash, ifc, true, compressedGroup.indexMode)); } } } @@ -117,7 +131,7 @@ private static void collectResponsesLegacyFormat(StreamInput input, int groups, final String mappingHash = input.readString(); final Map ifc = input.readMap(IndexFieldCapabilities::readFrom); for (String index : indices) { - responses.add(new FieldCapabilitiesIndexResponse(index, mappingHash, ifc, true)); + responses.add(new FieldCapabilitiesIndexResponse(index, mappingHash, ifc, true, IndexMode.STANDARD)); } } } @@ -164,6 +178,9 @@ private static void writeCompressedResponses(StreamOutput output, Map { o.writeCollection(fieldCapabilitiesIndexResponses, (oo, r) -> oo.writeString(r.indexName)); var first = fieldCapabilitiesIndexResponses.get(0); + if (output.getTransportVersion().onOrAfter(TransportVersions.FIELD_CAPS_RESPONSE_INDEX_MODE)) { + IndexMode.writeTo(first.indexMode, o); + } o.writeString(first.indexMappingHash); o.writeVInt(first.responseMap.size()); for (IndexFieldCapabilities ifc : first.responseMap.values()) { @@ -192,6 +209,10 @@ public String getIndexMappingHash() { return indexMappingHash; } + public IndexMode getIndexMode() { + return indexMode; + } + public boolean canMatch() { return canMatch; } diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java index b9bf3bb37c7b4..bb97b0dc48c42 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java @@ -174,7 +174,13 @@ private void doExecuteForked( if (resp.canMatch() && resp.getIndexMappingHash() != null) { FieldCapabilitiesIndexResponse curr = indexMappingHashToResponses.putIfAbsent(resp.getIndexMappingHash(), resp); if (curr != null) { - resp = new FieldCapabilitiesIndexResponse(resp.getIndexName(), curr.getIndexMappingHash(), curr.get(), true); + resp = new FieldCapabilitiesIndexResponse( + resp.getIndexName(), + curr.getIndexMappingHash(), + curr.get(), + true, + curr.getIndexMode() + ); } } if (request.includeEmptyFields()) { @@ -186,7 +192,13 @@ private void doExecuteForked( } Map mergedCaps = new HashMap<>(a.get()); mergedCaps.putAll(b.get()); - return new FieldCapabilitiesIndexResponse(a.getIndexName(), a.getIndexMappingHash(), mergedCaps, true); + return new FieldCapabilitiesIndexResponse( + a.getIndexName(), + a.getIndexMappingHash(), + mergedCaps, + true, + a.getIndexMode() + ); }); } if (fieldCapTask.isCancelled()) { @@ -249,7 +261,13 @@ private void doExecuteForked( for (FieldCapabilitiesIndexResponse resp : response.getIndexResponses()) { String indexName = RemoteClusterAware.buildRemoteIndexName(clusterAlias, resp.getIndexName()); handleIndexResponse.accept( - new FieldCapabilitiesIndexResponse(indexName, resp.getIndexMappingHash(), resp.get(), resp.canMatch()) + new FieldCapabilitiesIndexResponse( + indexName, + resp.getIndexMappingHash(), + resp.get(), + resp.canMatch(), + resp.getIndexMode() + ) ); } for (FieldCapabilitiesFailure failure : response.getFailures()) { diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java index b137cfe27a514..96598ba38a3fe 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexMode.java +++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java @@ -12,6 +12,8 @@ import org.elasticsearch.cluster.metadata.MetadataCreateDataStreamService; import org.elasticsearch.cluster.routing.IndexRouting; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; @@ -494,6 +496,25 @@ public static IndexMode fromString(String value) { }; } + public static IndexMode readFrom(StreamInput in) throws IOException { + int mode = in.readByte(); + return switch (mode) { + case 0 -> STANDARD; + case 1 -> TIME_SERIES; + case 2 -> LOGSDB; + default -> throw new IllegalStateException("unexpected index mode [" + mode + "]"); + }; + } + + public static void writeTo(IndexMode indexMode, StreamOutput out) throws IOException { + final int code = switch (indexMode) { + case STANDARD -> 0; + case TIME_SERIES -> 1; + case LOGSDB -> 2; + }; + out.writeByte((byte) code); + } + @Override public String toString() { return getName(); diff --git a/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponseTests.java b/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponseTests.java index 0f5ba959c5ed8..d487f584a20bb 100644 --- a/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesIndexResponseTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.action.fieldcaps; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.test.ESTestCase; @@ -60,9 +61,10 @@ static List randomIndexResponsesWithMappingHash( final List responses = new ArrayList<>(); for (Map.Entry> e : mappingHashToIndices.entrySet()) { Map fieldCaps = randomFieldCaps(); + var indexMode = randomFrom(IndexMode.values()); String mappingHash = e.getKey(); for (String index : e.getValue()) { - responses.add(new FieldCapabilitiesIndexResponse(index, mappingHash, fieldCaps, true)); + responses.add(new FieldCapabilitiesIndexResponse(index, mappingHash, fieldCaps, true, indexMode)); } } return responses; @@ -73,7 +75,8 @@ static List randomIndexResponsesWithoutMappingHa int numIndices = between(0, 10); for (int i = 0; i < numIndices; i++) { String index = "index_without_mapping_hash_" + i; - responses.add(new FieldCapabilitiesIndexResponse(index, null, randomFieldCaps(), randomBoolean())); + var indexMode = randomFrom(IndexMode.values()); + responses.add(new FieldCapabilitiesIndexResponse(index, null, randomFieldCaps(), randomBoolean(), indexMode)); } return responses; } diff --git a/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesNodeResponseTests.java b/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesNodeResponseTests.java index 0802e498c43a7..c39cc6ebfd665 100644 --- a/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesNodeResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesNodeResponseTests.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.AbstractWireSerializingTestCase; import org.elasticsearch.test.TransportVersionUtils; @@ -49,7 +50,9 @@ protected FieldCapabilitiesNodeResponse createTestInstance() { List responses = new ArrayList<>(); int numResponse = randomIntBetween(0, 10); for (int i = 0; i < numResponse; i++) { - responses.add(new FieldCapabilitiesIndexResponse("index_" + i, null, randomFieldCaps(), randomBoolean())); + responses.add( + new FieldCapabilitiesIndexResponse("index_" + i, null, randomFieldCaps(), randomBoolean(), randomFrom(IndexMode.values())) + ); } int numUnmatched = randomIntBetween(0, 3); Set shardIds = new HashSet<>(); @@ -69,21 +72,38 @@ protected FieldCapabilitiesNodeResponse mutateInstance(FieldCapabilitiesNodeResp List newResponses = new ArrayList<>(response.getIndexResponses()); int mutation = response.getIndexResponses().isEmpty() ? 0 : randomIntBetween(0, 3); switch (mutation) { - case 0 -> newResponses.add(new FieldCapabilitiesIndexResponse("extra_index", null, randomFieldCaps(), randomBoolean())); + case 0 -> newResponses.add( + new FieldCapabilitiesIndexResponse("extra_index", null, randomFieldCaps(), randomBoolean(), randomFrom(IndexMode.values())) + ); case 1 -> { int toRemove = randomInt(newResponses.size() - 1); newResponses.remove(toRemove); } case 2 -> { int toReplace = randomInt(newResponses.size() - 1); - newResponses.set(toReplace, new FieldCapabilitiesIndexResponse("new_index", null, randomFieldCaps(), randomBoolean())); + newResponses.set( + toReplace, + new FieldCapabilitiesIndexResponse( + "new_index", + null, + randomFieldCaps(), + randomBoolean(), + randomFrom(IndexMode.values()) + ) + ); } case 3 -> { int toReplace = randomInt(newResponses.size() - 1); FieldCapabilitiesIndexResponse resp = newResponses.get(toReplace); newResponses.set( toReplace, - new FieldCapabilitiesIndexResponse(resp.getIndexName(), UUIDs.randomBase64UUID(), resp.get(), true) + new FieldCapabilitiesIndexResponse( + resp.getIndexName(), + UUIDs.randomBase64UUID(), + resp.get(), + true, + randomFrom(IndexMode.values()) + ) ); } } @@ -194,9 +214,10 @@ public void testReadNodeResponseFromPre82() throws Exception { "blue_field", new IndexFieldCapabilities("blue_field", "long", false, true, true, false, null, Map.of()) ), - true + true, + IndexMode.STANDARD ), - new FieldCapabilitiesIndexResponse("index_02", null, Map.of(), false), + new FieldCapabilitiesIndexResponse("index_02", null, Map.of(), false, IndexMode.STANDARD), new FieldCapabilitiesIndexResponse( "index_03", null, @@ -206,7 +227,8 @@ public void testReadNodeResponseFromPre82() throws Exception { "_seq_no", new IndexFieldCapabilities("_seq_no", "long", true, true, true, false, null, Map.of()) ), - true + true, + IndexMode.STANDARD ) ) ); diff --git a/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesResponseTests.java b/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesResponseTests.java index 461000fc22b02..cc4d4de1e0f39 100644 --- a/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesResponseTests.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.ChunkedToXContent; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.AbstractWireSerializingTestCase; import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.xcontent.ToXContent; @@ -54,7 +55,8 @@ protected FieldCapabilitiesResponse createTestInstance() { int numResponse = randomIntBetween(0, 10); for (int i = 0; i < numResponse; i++) { Map fieldCaps = FieldCapabilitiesIndexResponseTests.randomFieldCaps(); - responses.add(new FieldCapabilitiesIndexResponse("index_" + i, null, fieldCaps, randomBoolean())); + var indexMode = randomFrom(IndexMode.values()); + responses.add(new FieldCapabilitiesIndexResponse("index_" + i, null, fieldCaps, randomBoolean(), indexMode)); } randomResponse = new FieldCapabilitiesResponse(responses, Collections.emptyList()); return randomResponse; @@ -267,9 +269,10 @@ public void testReadCCSResponseFromPre82() throws Exception { "blue_field", new IndexFieldCapabilities("blue_field", "long", false, true, true, false, null, Map.of()) ), - true + true, + IndexMode.STANDARD ), - new FieldCapabilitiesIndexResponse("index_02", null, Map.of(), false), + new FieldCapabilitiesIndexResponse("index_02", null, Map.of(), false, IndexMode.STANDARD), new FieldCapabilitiesIndexResponse( "index_03", null, @@ -279,7 +282,8 @@ public void testReadCCSResponseFromPre82() throws Exception { "_seq_no", new IndexFieldCapabilities("_seq_no", "long", true, true, true, false, null, Map.of()) ), - true + true, + IndexMode.STANDARD ) ) ); diff --git a/server/src/test/java/org/elasticsearch/action/fieldcaps/RequestDispatcherTests.java b/server/src/test/java/org/elasticsearch/action/fieldcaps/RequestDispatcherTests.java index f5f35c52044d7..96e615afb498f 100644 --- a/server/src/test/java/org/elasticsearch/action/fieldcaps/RequestDispatcherTests.java +++ b/server/src/test/java/org/elasticsearch/action/fieldcaps/RequestDispatcherTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder; @@ -890,7 +891,13 @@ static FieldCapabilitiesNodeResponse randomNodeResponse( indicesWithMappingHash.computeIfAbsent(index, k -> new ArrayList<>()).add(index); } else { indexResponses.add( - new FieldCapabilitiesIndexResponse(index, null, FieldCapabilitiesIndexResponseTests.randomFieldCaps(), true) + new FieldCapabilitiesIndexResponse( + index, + null, + FieldCapabilitiesIndexResponseTests.randomFieldCaps(), + true, + randomFrom(IndexMode.values()) + ) ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 72a905f4b37a4..e4872b24558bc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2122,7 +2122,9 @@ protected List filteredWarnings() { } private static LogicalPlan analyzeWithEmptyFieldCapsResponse(String query) throws IOException { - List idxResponses = List.of(new FieldCapabilitiesIndexResponse("idx", "idx", Map.of(), true)); + List idxResponses = List.of( + new FieldCapabilitiesIndexResponse("idx", "idx", Map.of(), true, IndexMode.STANDARD) + ); FieldCapabilitiesResponse caps = new FieldCapabilitiesResponse(idxResponses, List.of()); IndexResolution resolution = new IndexResolver(null).mergedMappings("test*", caps); var analyzer = analyzer(resolution, TEST_VERIFIER, configuration(query)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java index 45623a39da936..ebad8e6e13b8c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.client.NoOpClient; import org.elasticsearch.test.transport.MockTransportService; @@ -491,7 +492,7 @@ protected void var f = new IndexFieldCapabilities(e.getKey(), e.getValue(), false, false, false, false, null, Map.of()); fieldCaps.put(e.getKey(), f); } - var indexResponse = new FieldCapabilitiesIndexResponse(alias, null, fieldCaps, true); + var indexResponse = new FieldCapabilitiesIndexResponse(alias, null, fieldCaps, true, IndexMode.STANDARD); response = new FieldCapabilitiesResponse(List.of(indexResponse), List.of()); } else { response = new FieldCapabilitiesResponse(List.of(), List.of()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java index 66fd95c4f7726..d3795c9e9d953 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java @@ -13,6 +13,7 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.action.fieldcaps.IndexFieldCapabilities; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; @@ -151,7 +152,8 @@ private List indexFieldCapabilities(String[] ind Map.entry("foo", new IndexFieldCapabilities("foo", "integer", false, true, true, false, null, Map.of())), Map.entry("bar", new IndexFieldCapabilities("bar", "long", false, true, true, false, null, Map.of())) ), - true + true, + IndexMode.STANDARD ) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java index 27be2fe941393..e4e10a5c6af19 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java @@ -9,6 +9,7 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilitiesIndexResponse; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.action.fieldcaps.IndexFieldCapabilities; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -46,7 +47,8 @@ private void resolve(String esTypeName, TimeSeriesParams.MetricType metricType, idx, idx, Map.of(field, new IndexFieldCapabilities(field, esTypeName, false, true, true, false, metricType, Map.of())), - true + true, + IndexMode.TIME_SERIES ) ); From 2dae0533a7bdddd25707624190aa71774221462d Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Thu, 29 Aug 2024 12:22:29 -0700 Subject: [PATCH 058/144] LogsDB QA tests - add dynamic mapping support (#112321) --- .../logsdb/qa/AbstractChallengeRestTest.java | 4 +- ...ardVersusLogsIndexModeChallengeRestIT.java | 52 +++++------ ...ogsIndexModeRandomDataChallengeRestIT.java | 87 +++++++++---------- .../logsdb/qa/matchers/ListEqualMatcher.java | 2 +- .../matchers/source/DynamicFieldMatcher.java | 87 +++++++++++++++++++ .../matchers/source/FieldSpecificMatcher.java | 2 +- .../qa/matchers/source/MappingTransforms.java | 44 ++++++++-- .../qa/matchers/source/SourceMatcher.java | 69 ++++++++------- .../qa/matchers/source/SourceTransforms.java | 13 ++- .../logsdb/datageneration/DataGenerator.java | 6 +- .../DataGeneratorSpecification.java | 10 +++ .../logsdb/datageneration/FieldType.java | 29 ++++++- .../datasource/DataSourceRequest.java | 3 +- .../datasource/DataSourceResponse.java | 6 +- .../DefaultMappingParametersHandler.java | 24 ++--- .../DefaultObjectGenerationHandler.java | 37 ++++++-- .../DefaultPrimitiveTypesHandler.java | 3 +- .../logsdb/datageneration/fields/Context.java | 66 ++++++++++---- .../datageneration/fields/DynamicMapping.java | 15 ++++ .../GenericSubObjectFieldDataGenerator.java | 53 ++++------- .../fields/NestedFieldDataGenerator.java | 3 +- .../fields/ObjectFieldDataGenerator.java | 3 +- .../fields/PredefinedField.java | 32 ++++++- .../TopLevelObjectFieldDataGenerator.java | 39 +++++++-- .../DataGeneratorSnapshotTests.java | 11 ++- .../datageneration/DataGeneratorTests.java | 40 ++++++++- 26 files changed, 532 insertions(+), 208 deletions(-) create mode 100644 modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/DynamicFieldMatcher.java create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/DynamicMapping.java diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java index 6292b06d44c9a..1d36a04657e9c 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/AbstractChallengeRestTest.java @@ -233,9 +233,11 @@ private Response indexDocuments( final CheckedSupplier, IOException> documentsSupplier ) throws IOException { final StringBuilder sb = new StringBuilder(); + int id = 0; for (var document : documentsSupplier.get()) { - sb.append("{ \"create\": {} }").append("\n"); + sb.append(Strings.format("{ \"create\": { \"_id\" : \"%d\" } }", id)).append("\n"); sb.append(Strings.toString(document)).append("\n"); + id++; } var request = new Request("POST", "/" + dataStreamName + "/_bulk"); request.setJsonEntity(sb.toString()); diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java index 5824f8fa764f4..9bf1c394f9105 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java @@ -34,8 +34,11 @@ import java.io.IOException; import java.time.Instant; +import java.time.ZoneId; +import java.time.ZonedDateTime; import java.time.temporal.ChronoUnit; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import java.util.Map; @@ -178,11 +181,8 @@ protected static void waitForLogs(RestClient client) throws Exception { } public void testMatchAllQuery() throws IOException { - final List documents = new ArrayList<>(); int numberOfDocuments = ESTestCase.randomIntBetween(100, 200); - for (int i = 0; i < numberOfDocuments; i++) { - documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); - } + final List documents = generateDocuments(numberOfDocuments); assertDocumentIndexing(documents); @@ -199,11 +199,8 @@ public void testMatchAllQuery() throws IOException { } public void testTermsQuery() throws IOException { - final List documents = new ArrayList<>(); - int numberOfDocuments = randomIntBetween(100, 200); - for (int i = 0; i < numberOfDocuments; i++) { - documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); - } + int numberOfDocuments = ESTestCase.randomIntBetween(100, 200); + final List documents = generateDocuments(numberOfDocuments); assertDocumentIndexing(documents); @@ -220,11 +217,8 @@ public void testTermsQuery() throws IOException { } public void testHistogramAggregation() throws IOException { - final List documents = new ArrayList<>(); - int numberOfDocuments = randomIntBetween(100, 200); - for (int i = 0; i < numberOfDocuments; i++) { - documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); - } + int numberOfDocuments = ESTestCase.randomIntBetween(100, 200); + final List documents = generateDocuments(numberOfDocuments); assertDocumentIndexing(documents); @@ -241,11 +235,8 @@ public void testHistogramAggregation() throws IOException { } public void testTermsAggregation() throws IOException { - final List documents = new ArrayList<>(); - int numberOfDocuments = randomIntBetween(100, 200); - for (int i = 0; i < numberOfDocuments; i++) { - documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); - } + int numberOfDocuments = ESTestCase.randomIntBetween(100, 200); + final List documents = generateDocuments(numberOfDocuments); assertDocumentIndexing(documents); @@ -262,11 +253,8 @@ public void testTermsAggregation() throws IOException { } public void testDateHistogramAggregation() throws IOException { - final List documents = new ArrayList<>(); - int numberOfDocuments = randomIntBetween(100, 200); - for (int i = 0; i < numberOfDocuments; i++) { - documents.add(generateDocument(Instant.now().plus(i, ChronoUnit.SECONDS))); - } + int numberOfDocuments = ESTestCase.randomIntBetween(100, 200); + final List documents = generateDocuments(numberOfDocuments); assertDocumentIndexing(documents); @@ -282,6 +270,17 @@ public void testDateHistogramAggregation() throws IOException { assertTrue(matchResult.getMessage(), matchResult.isMatch()); } + private List generateDocuments(int numberOfDocuments) throws IOException { + final List documents = new ArrayList<>(); + // This is static in order to be able to identify documents between test runs. + var startingPoint = ZonedDateTime.of(2024, 1, 1, 10, 0, 0, 0, ZoneId.of("UTC")).toInstant(); + for (int i = 0; i < numberOfDocuments; i++) { + documents.add(generateDocument(startingPoint.plus(i, ChronoUnit.SECONDS))); + } + + return documents; + } + protected XContentBuilder generateDocument(final Instant timestamp) throws IOException { return XContentFactory.jsonBuilder() .startObject() @@ -301,7 +300,10 @@ private static List> getQueryHits(final Response response) t final List> hitsList = (List>) hitsMap.get("hits"); assertThat(hitsList.size(), greaterThan(0)); - return hitsList.stream().map(hit -> (Map) hit.get("_source")).toList(); + return hitsList.stream() + .sorted(Comparator.comparingInt((Map hit) -> Integer.parseInt((String) hit.get("_id")))) + .map(hit -> (Map) hit.get("_source")) + .toList(); } @SuppressWarnings("unchecked") diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java index f53fdcb6e8600..8bd62480f333d 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java @@ -10,9 +10,11 @@ import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.time.FormatNames; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.index.mapper.ObjectMapper; import org.elasticsearch.logsdb.datageneration.DataGenerator; import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; +import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; import org.elasticsearch.logsdb.datageneration.FieldType; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest; @@ -26,40 +28,27 @@ import java.time.Instant; import java.util.HashMap; import java.util.List; +import java.util.Map; /** * Challenge test (see {@link StandardVersusLogsIndexModeChallengeRestIT}) that uses randomly generated * mapping and documents in order to cover more code paths and permutations. */ public class StandardVersusLogsIndexModeRandomDataChallengeRestIT extends StandardVersusLogsIndexModeChallengeRestIT { - private final boolean fullyDynamicMapping; private final ObjectMapper.Subobjects subobjects; private final DataGenerator dataGenerator; public StandardVersusLogsIndexModeRandomDataChallengeRestIT() { super(); - this.fullyDynamicMapping = randomBoolean(); this.subobjects = randomFrom(ObjectMapper.Subobjects.values()); - var specificationBuilder = DataGeneratorSpecification.builder(); + var specificationBuilder = DataGeneratorSpecification.builder().withFullyDynamicMapping(randomBoolean()); if (subobjects != ObjectMapper.Subobjects.ENABLED) { specificationBuilder = specificationBuilder.withNestedFieldsLimit(0); } this.dataGenerator = new DataGenerator(specificationBuilder.withDataSourceHandlers(List.of(new DataSourceHandler() { @Override - public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) { - // Unsigned long is not used with dynamic mapping - // since it can initially look like long - // but later fail to parse once big values arrive. - // Double is not used since it maps to float with dynamic mapping - // resulting in precision loss compared to original source. - var excluded = fullyDynamicMapping ? List.of(FieldType.DOUBLE, FieldType.SCALED_FLOAT, FieldType.UNSIGNED_LONG) : List.of(); - return new DataSourceResponse.FieldTypeGenerator( - () -> randomValueOtherThanMany(excluded::contains, () -> randomFrom(FieldType.values())) - ); - } - public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequest.ObjectMappingParametersGenerator request) { if (subobjects == ObjectMapper.Subobjects.ENABLED) { // Use default behavior @@ -82,42 +71,52 @@ public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequ return parameters; }); } - })).withPredefinedFields(List.of(new PredefinedField("host.name", FieldType.KEYWORD))).build()); + })) + .withPredefinedFields( + List.of( + new PredefinedField.WithType("host.name", FieldType.KEYWORD), + // Needed for terms query + new PredefinedField.WithGenerator("method", new FieldDataGenerator() { + @Override + public CheckedConsumer mappingWriter() { + return b -> b.startObject().field("type", "keyword").endObject(); + } + + @Override + public CheckedConsumer fieldValueGenerator() { + return b -> b.value(randomFrom("put", "post", "get")); + } + }), + + // Needed for histogram aggregation + new PredefinedField.WithGenerator("memory_usage_bytes", new FieldDataGenerator() { + @Override + public CheckedConsumer mappingWriter() { + return b -> b.startObject().field("type", "long").endObject(); + } + + @Override + public CheckedConsumer fieldValueGenerator() { + // We can generate this using standard long field but we would get "too many buckets" + return b -> b.value(randomLongBetween(1000, 2000)); + } + }) + ) + ) + .build()); } @Override public void baselineMappings(XContentBuilder builder) throws IOException { - if (fullyDynamicMapping == false) { - dataGenerator.writeMapping(builder); - } else { - // We want dynamic mapping, but we need host.name to be a keyword instead of text to support aggregations. - builder.startObject() - .startObject("properties") - - .startObject("host.name") - .field("type", "keyword") - .field("ignore_above", randomIntBetween(1000, 1200)) - .endObject() - - .endObject() - .endObject(); - } + dataGenerator.writeMapping(builder); } @Override public void contenderMappings(XContentBuilder builder) throws IOException { - if (fullyDynamicMapping == false) { - if (subobjects != ObjectMapper.Subobjects.ENABLED) { - dataGenerator.writeMapping(builder, b -> builder.field("subobjects", subobjects.toString())); - } else { - dataGenerator.writeMapping(builder); - } + if (subobjects != ObjectMapper.Subobjects.ENABLED) { + dataGenerator.writeMapping(builder, Map.of("subobjects", subobjects.toString())); } else { - builder.startObject(); - if (subobjects != ObjectMapper.Subobjects.ENABLED) { - builder.field("subobjects", subobjects.toString()); - } - builder.endObject(); + dataGenerator.writeMapping(builder); } } @@ -126,10 +125,6 @@ protected XContentBuilder generateDocument(final Instant timestamp) throws IOExc var document = XContentFactory.jsonBuilder(); dataGenerator.generateDocument(document, doc -> { doc.field("@timestamp", DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(timestamp)); - // Needed for terms query - doc.field("method", randomFrom("put", "post", "get")); - // We can generate this but we would get "too many buckets" - doc.field("memory_usage_bytes", randomLongBetween(1000, 2000)); }); return document; diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java index bb5751b8873f2..ae18129a77111 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/ListEqualMatcher.java @@ -43,7 +43,7 @@ private MatchResult matchListEquals(final List actualList, final List match(List actual, List expected) { + if (expected == null) { + return Optional.empty(); + } + + // Floating point values are always mapped as float with dynamic mapping. + var isDouble = expected.stream().filter(Objects::nonNull).findFirst().map(o -> o instanceof Double).orElse(false); + if (isDouble) { + assert expected.stream().allMatch(o -> o == null || o instanceof Double); + + var normalizedActual = normalizeDoubles(actual); + var normalizedExpected = normalizeDoubles(expected); + + var matchResult = normalizedActual.equals(normalizedExpected) + ? MatchResult.match() + : MatchResult.noMatch( + formatErrorMessage( + actualMappings, + actualSettings, + expectedMappings, + expectedSettings, + "Values of dynamically mapped field containing double values don't match after normalization, normalized " + + prettyPrintCollections(normalizedActual, normalizedExpected) + ) + ); + return Optional.of(matchResult); + } + + return Optional.empty(); + } + + private static Set normalizeDoubles(List values) { + if (values == null) { + return Set.of(); + } + + Function toFloat = (o) -> o instanceof Number n ? n.floatValue() : Float.parseFloat((String) o); + return values.stream().filter(Objects::nonNull).map(toFloat).collect(Collectors.toSet()); + } +} diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/FieldSpecificMatcher.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/FieldSpecificMatcher.java index 10b1922e1e217..253fb4b0e9688 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/FieldSpecificMatcher.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/FieldSpecificMatcher.java @@ -198,7 +198,7 @@ public MatchResult match( actualSettings, expectedMappings, expectedSettings, - "Values of type [scaled_float] don't match after normalization, normalized " + "Values of type [unsigned_long] don't match after normalization, normalized " + prettyPrintCollections(actualNormalized, expectedNormalized) ) ); diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/MappingTransforms.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/MappingTransforms.java index 4ca3142310b44..eade6f10e48fe 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/MappingTransforms.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/MappingTransforms.java @@ -8,10 +8,21 @@ package org.elasticsearch.datastreams.logsdb.qa.matchers.source; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; class MappingTransforms { + /** + * Container for mapping of a field. Contains field mapping parameters and mapping parameters of parent fields (if present) + * in order of increasing distance (direct parent first). + * This is needed because some parent mapping parameters influence how source of the field is stored (e.g. `enabled: false`). + * @param mappingParameters + * @param parentMappingParameters + */ + record FieldMapping(Map mappingParameters, List> parentMappingParameters) {} + /** * Normalize mapping to have the same structure as normalized source and enable field mapping lookup. * Similar to {@link SourceTransforms#normalize(Map)} but needs to get rid of intermediate nodes @@ -20,8 +31,8 @@ class MappingTransforms { * @param map raw mapping document converted to map * @return map from normalized field name (like a.b.c) to a map of mapping parameters (like type) */ - public static Map> normalizeMapping(Map map) { - var flattened = new HashMap>(); + public static Map normalizeMapping(Map map) { + var flattened = new HashMap(); descend(null, map, flattened); @@ -29,21 +40,36 @@ public static Map> normalizeMapping(Map currentLevel, Map> flattened) { + private static void descend(String pathFromRoot, Map currentLevel, Map flattened) { for (var entry : currentLevel.entrySet()) { if (entry.getKey().equals("_doc") || entry.getKey().equals("properties")) { descend(pathFromRoot, (Map) entry.getValue(), flattened); } else { if (entry.getValue() instanceof Map map) { var pathToField = pathFromRoot == null ? entry.getKey() : pathFromRoot + "." + entry.getKey(); - descend(pathToField, (Map) map, flattened); - } else { - if (pathFromRoot == null) { - // Ignore top level mapping parameters for now - continue; + + // Descending to subobject, we need to remember parent mapping + if (pathFromRoot != null) { + var parentMapping = flattened.computeIfAbsent( + pathFromRoot, + k -> new FieldMapping(new HashMap<>(), new ArrayList<>()) + ); + var childMapping = flattened.computeIfAbsent( + pathToField, + k -> new FieldMapping(new HashMap<>(), new ArrayList<>()) + ); + childMapping.parentMappingParameters.add(parentMapping.mappingParameters); + childMapping.parentMappingParameters.addAll(parentMapping.parentMappingParameters); } - flattened.computeIfAbsent(pathFromRoot, k -> new HashMap<>()).put(entry.getKey(), entry.getValue()); + descend(pathToField, (Map) map, flattened); + } else { + var pathToField = pathFromRoot == null ? "_doc" : pathFromRoot; + // We are either at the lowest level of mapping or it's a leaf field of top level object + flattened.computeIfAbsent(pathToField, k -> new FieldMapping(new HashMap<>(), new ArrayList<>())).mappingParameters.put( + entry.getKey(), + entry.getValue() + ); } } } diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/SourceMatcher.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/SourceMatcher.java index f0e188a17631f..5eb93cee67d74 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/SourceMatcher.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/matchers/source/SourceMatcher.java @@ -10,15 +10,12 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.time.DateFormatter; -import org.elasticsearch.common.time.FormatNames; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.datastreams.logsdb.qa.matchers.GenericEqualsMatcher; import org.elasticsearch.datastreams.logsdb.qa.matchers.ListEqualMatcher; import org.elasticsearch.datastreams.logsdb.qa.matchers.MatchResult; import org.elasticsearch.xcontent.XContentBuilder; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -28,10 +25,11 @@ import static org.elasticsearch.datastreams.logsdb.qa.matchers.Messages.prettyPrintCollections; public class SourceMatcher extends GenericEqualsMatcher>> { - private final Map> actualNormalizedMapping; - private final Map> expectedNormalizedMapping; + private final Map actualNormalizedMapping; + private final Map expectedNormalizedMapping; private final Map fieldSpecificMatchers; + private final DynamicFieldMatcher dynamicFieldMatcher; public SourceMatcher( final XContentBuilder actualMappings, @@ -60,6 +58,7 @@ public SourceMatcher( "unsigned_long", new FieldSpecificMatcher.UnsignedLongMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings) ); + this.dynamicFieldMatcher = new DynamicFieldMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); } @Override @@ -76,14 +75,8 @@ public MatchResult match() { ); } - var sortedAndFlattenedActual = actual.stream() - .sorted(Comparator.comparing((Map m) -> parseTimestampToEpochMillis(m.get("@timestamp")))) - .map(SourceTransforms::normalize) - .toList(); - var sortedAndFlattenedExpected = expected.stream() - .sorted(Comparator.comparing((Map m) -> parseTimestampToEpochMillis(m.get("@timestamp")))) - .map(SourceTransforms::normalize) - .toList(); + var sortedAndFlattenedActual = actual.stream().map(SourceTransforms::normalize).toList(); + var sortedAndFlattenedExpected = expected.stream().map(SourceTransforms::normalize).toList(); for (int i = 0; i < sortedAndFlattenedActual.size(); i++) { var actual = sortedAndFlattenedActual.get(i); @@ -91,7 +84,8 @@ public MatchResult match() { var result = compareSource(actual, expected); if (result.isMatch() == false) { - return result; + var message = "Source matching failed at document id [" + i + "]. " + result.getMessage(); + return MatchResult.noMatch(message); } } @@ -105,12 +99,20 @@ private MatchResult compareSource(Map> actual, Map matchWithGenericMatcher(actualValues, expectedValues) - ); + // There are cases when field values are stored in ignored source + // so we try to match them as is first and then apply field specific matcher. + // This is temporary, we should be able to tell when source is exact using mappings. + // See #111916. + var genericMatchResult = matchWithGenericMatcher(actualValues, expectedValues); + if (genericMatchResult.isMatch()) { + return genericMatchResult; + } - if (fieldMatch.isMatch() == false) { - var message = "Source documents don't match for field [" + name + "]: " + fieldMatch.getMessage(); + var matchIncludingFieldSpecificMatchers = matchWithFieldSpecificMatcher(name, actualValues, expectedValues).orElse( + genericMatchResult + ); + if (matchIncludingFieldSpecificMatchers.isMatch() == false) { + var message = "Source documents don't match for field [" + name + "]: " + matchIncludingFieldSpecificMatchers.getMessage(); return MatchResult.noMatch(message); } } @@ -130,11 +132,11 @@ private Optional matchWithFieldSpecificMatcher(String fieldName, Li ); } - // Dynamic mapping, nothing to do - return Optional.empty(); + // Field is dynamically mapped + return dynamicFieldMatcher.match(actualValues, expectedValues); } - var actualFieldType = (String) actualFieldMapping.get("type"); + var actualFieldType = (String) actualFieldMapping.mappingParameters().get("type"); if (actualFieldType == null) { throw new IllegalStateException("Field type is missing from leaf field Leaf field [" + fieldName + "] mapping parameters"); } @@ -143,7 +145,7 @@ private Optional matchWithFieldSpecificMatcher(String fieldName, Li if (expectedFieldMapping == null) { throw new IllegalStateException("Leaf field [" + fieldName + "] is present in actual mapping but absent in expected mapping"); } else { - var expectedFieldType = expectedFieldMapping.get("type"); + var expectedFieldType = expectedFieldMapping.mappingParameters().get("type"); if (Objects.equals(actualFieldType, expectedFieldType) == false) { throw new IllegalStateException( "Leaf field [" @@ -157,15 +159,29 @@ private Optional matchWithFieldSpecificMatcher(String fieldName, Li } } + if (sourceMatchesExactly(expectedFieldMapping, expectedValues)) { + return Optional.empty(); + } + var fieldSpecificMatcher = fieldSpecificMatchers.get(actualFieldType); if (fieldSpecificMatcher == null) { return Optional.empty(); } - MatchResult matched = fieldSpecificMatcher.match(actualValues, expectedValues, expectedFieldMapping, actualFieldMapping); + MatchResult matched = fieldSpecificMatcher.match( + actualValues, + expectedValues, + actualFieldMapping.mappingParameters(), + expectedFieldMapping.mappingParameters() + ); return Optional.of(matched); } + // Checks for scenarios when source is stored exactly and therefore can be compared without special logic. + private boolean sourceMatchesExactly(MappingTransforms.FieldMapping mapping, List expectedValues) { + return mapping.parentMappingParameters().stream().anyMatch(m -> m.getOrDefault("enabled", "true").equals("false")); + } + private MatchResult matchWithGenericMatcher(List actualValues, List expectedValues) { var genericListMatcher = new ListEqualMatcher( actualMappings, @@ -179,9 +195,4 @@ private MatchResult matchWithGenericMatcher(List actualValues, List List normalizeValues(List values) { return Collections.emptyList(); } + return normalizeValues(values, Function.identity()); + } + + public static List normalizeValues(List values, Function transform) { + if (values == null) { + return Collections.emptyList(); + } + // Synthetic source modifications: // * null values are not present // * duplicates are removed - return new ArrayList<>(values.stream().filter(v -> v != null && Objects.equals(v, "null") == false).collect(Collectors.toSet())); + return new ArrayList<>( + values.stream().filter(v -> v != null && Objects.equals(v, "null") == false).map(transform).collect(Collectors.toSet()) + ); } private static void descend(String pathFromRoot, Map currentLevel, Map> flattened) { diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java index e6cfba2138882..ed2d968a78818 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java @@ -13,6 +13,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.Map; /** * Entry point of data generation logic. @@ -33,7 +34,7 @@ public DataGenerator(DataGeneratorSpecification specification) { */ public void writeMapping(XContentBuilder mapping) throws IOException { mapping.startObject().field("_doc"); - topLevelGenerator.mappingWriter(b -> {}).accept(mapping); + topLevelGenerator.mappingWriter(Map.of()).accept(mapping); mapping.endObject(); } @@ -44,8 +45,7 @@ public void writeMapping(XContentBuilder mapping) throws IOException { * @param customMappingParameters writer of custom mapping parameters of top level object mapping * @throws IOException */ - public void writeMapping(XContentBuilder mapping, CheckedConsumer customMappingParameters) - throws IOException { + public void writeMapping(XContentBuilder mapping, Map customMappingParameters) throws IOException { mapping.startObject().field("_doc"); topLevelGenerator.mappingWriter(customMappingParameters).accept(mapping); mapping.endObject(); diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java index 57bf9f12ccef1..04c30ae484f44 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java @@ -23,6 +23,7 @@ * Applies to subobjects. * @param maxObjectDepth maximum depth of nested objects * @param nestedFieldsLimit how many total nested fields can be present in a produced mapping + * @param fullyDynamicMapping if the mapping is fully dynamic, meaning none of the fields are mapped (essentially mapping is empty) * @param predefinedFields predefined fields that must be present in mapping and documents. Only top level fields are supported. */ public record DataGeneratorSpecification( @@ -30,6 +31,7 @@ public record DataGeneratorSpecification( int maxFieldCountPerLevel, int maxObjectDepth, int nestedFieldsLimit, + boolean fullyDynamicMapping, List predefinedFields ) { @@ -46,6 +48,7 @@ public static class Builder { private int maxFieldCountPerLevel; private int maxObjectDepth; private int nestedFieldsLimit; + private boolean fullyDynamicMapping; private List predefinedFields; public Builder() { @@ -55,6 +58,7 @@ public Builder() { this.maxObjectDepth = 2; // Default value of index.mapping.nested_fields.limit this.nestedFieldsLimit = 50; + fullyDynamicMapping = false; this.predefinedFields = new ArrayList<>(); } @@ -78,6 +82,11 @@ public Builder withNestedFieldsLimit(int nestedFieldsLimit) { return this; } + public Builder withFullyDynamicMapping(boolean fullyDynamicMapping) { + this.fullyDynamicMapping = fullyDynamicMapping; + return this; + } + public Builder withPredefinedFields(List predefinedFields) { this.predefinedFields = predefinedFields; return this; @@ -89,6 +98,7 @@ public DataGeneratorSpecification build() { maxFieldCountPerLevel, maxObjectDepth, nestedFieldsLimit, + fullyDynamicMapping, predefinedFields ); } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java index c8821c087d084..4ed36ea685238 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java @@ -8,6 +8,18 @@ package org.elasticsearch.logsdb.datageneration; +import org.elasticsearch.logsdb.datageneration.datasource.DataSource; +import org.elasticsearch.logsdb.datageneration.fields.leaf.ByteFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.DoubleFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.FloatFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.HalfFloatFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.IntegerFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.KeywordFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.LongFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.ScaledFloatFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.ShortFieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.fields.leaf.UnsignedLongFieldDataGenerator; + /** * Lists all leaf field types that are supported for data generation. */ @@ -21,5 +33,20 @@ public enum FieldType { DOUBLE, FLOAT, HALF_FLOAT, - SCALED_FLOAT + SCALED_FLOAT; + + public FieldDataGenerator generator(String fieldName, DataSource dataSource) { + return switch (this) { + case KEYWORD -> new KeywordFieldDataGenerator(fieldName, dataSource); + case LONG -> new LongFieldDataGenerator(fieldName, dataSource); + case UNSIGNED_LONG -> new UnsignedLongFieldDataGenerator(fieldName, dataSource); + case INTEGER -> new IntegerFieldDataGenerator(fieldName, dataSource); + case SHORT -> new ShortFieldDataGenerator(fieldName, dataSource); + case BYTE -> new ByteFieldDataGenerator(fieldName, dataSource); + case DOUBLE -> new DoubleFieldDataGenerator(fieldName, dataSource); + case FLOAT -> new FloatFieldDataGenerator(fieldName, dataSource); + case HALF_FLOAT -> new HalfFloatFieldDataGenerator(fieldName, dataSource); + case SCALED_FLOAT -> new ScaledFloatFieldDataGenerator(fieldName, dataSource); + }; + } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java index df3adc458829e..fadf51ee3ea10 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java @@ -10,6 +10,7 @@ import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; import org.elasticsearch.logsdb.datageneration.FieldType; +import org.elasticsearch.logsdb.datageneration.fields.DynamicMapping; public interface DataSourceRequest { TResponse accept(DataSourceHandler handler); @@ -88,7 +89,7 @@ public DataSourceResponse.ChildFieldGenerator accept(DataSourceHandler handler) } } - record FieldTypeGenerator() implements DataSourceRequest { + record FieldTypeGenerator(DynamicMapping dynamicMapping) implements DataSourceRequest { public DataSourceResponse.FieldTypeGenerator accept(DataSourceHandler handler) { return handler.handle(this); } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java index 2386c4c32ab6c..9093799ade41d 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java @@ -41,6 +41,8 @@ record ArrayWrapper(Function, Supplier> wrapper) implem interface ChildFieldGenerator extends DataSourceResponse { int generateChildFieldCount(); + boolean generateDynamicSubObject(); + boolean generateNestedSubObject(); boolean generateRegularSubObject(); @@ -48,7 +50,9 @@ interface ChildFieldGenerator extends DataSourceResponse { String generateFieldName(); } - record FieldTypeGenerator(Supplier generator) implements DataSourceResponse {} + record FieldTypeGenerator(Supplier generator) implements DataSourceResponse { + public record FieldTypeInfo(FieldType fieldType, boolean dynamic) {} + } record ObjectArrayGenerator(Supplier> lengthGenerator) implements DataSourceResponse {} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java index 9eea4e6ae932f..e57257f69da20 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java @@ -56,29 +56,23 @@ private Supplier> scaledFloatMapping() { @Override public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequest.ObjectMappingParametersGenerator request) { if (request.isNested()) { - return new DataSourceResponse.ObjectMappingParametersGenerator( - // TODO enable "false" and "strict" - // It is disabled because it hits a bug in synthetic source. - () -> { - var parameters = new HashMap(); - if (ESTestCase.randomBoolean()) { - parameters.put("dynamic", "true"); - } - - return parameters; + return new DataSourceResponse.ObjectMappingParametersGenerator(() -> { + var parameters = new HashMap(); + if (ESTestCase.randomBoolean()) { + parameters.put("dynamic", ESTestCase.randomFrom("true", "false", "strict")); } - ); + + return parameters; + }); } - // TODO enable "enabled: false" and "dynamic: false/runtime" - // It is disabled because it hits a bug in synthetic source. return new DataSourceResponse.ObjectMappingParametersGenerator(() -> { var parameters = new HashMap(); if (ESTestCase.randomBoolean()) { - parameters.put("dynamic", ESTestCase.randomFrom("true", "strict")); + parameters.put("dynamic", ESTestCase.randomFrom("true", "false", "strict", "runtime")); } if (ESTestCase.randomBoolean()) { - parameters.put("enabled", "true"); + parameters.put("enabled", ESTestCase.randomFrom("true", "false")); } return parameters; diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java index 45e4b0b6d6624..4ad9ac61a9158 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java @@ -12,10 +12,11 @@ import org.elasticsearch.test.ESTestCase; import java.util.Optional; +import java.util.Set; +import java.util.function.Supplier; import static org.elasticsearch.test.ESTestCase.randomAlphaOfLengthBetween; import static org.elasticsearch.test.ESTestCase.randomDouble; -import static org.elasticsearch.test.ESTestCase.randomFrom; import static org.elasticsearch.test.ESTestCase.randomIntBetween; public class DefaultObjectGenerationHandler implements DataSourceHandler { @@ -27,16 +28,22 @@ public int generateChildFieldCount() { return ESTestCase.randomIntBetween(0, request.specification().maxFieldCountPerLevel()); } + @Override + public boolean generateDynamicSubObject() { + // Using a static 5% change, this is just a chosen value that can be tweaked. + return randomDouble() <= 0.05; + } + @Override public boolean generateNestedSubObject() { - // Using a static 10% change, this is just a chosen value that can be tweaked. - return randomDouble() <= 0.1; + // Using a static 5% change, this is just a chosen value that can be tweaked. + return randomDouble() <= 0.05; } @Override public boolean generateRegularSubObject() { - // Using a static 10% change, this is just a chosen value that can be tweaked. - return randomDouble() <= 0.1; + // Using a static 5% change, this is just a chosen value that can be tweaked. + return randomDouble() <= 0.05; } @Override @@ -46,9 +53,27 @@ public String generateFieldName() { }; } + // UNSIGNED_LONG is excluded because it is mapped as long + // and values larger than long fail to parse. + private static final Set EXCLUDED_FROM_DYNAMIC_MAPPING = Set.of(FieldType.UNSIGNED_LONG); + @Override public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) { - return new DataSourceResponse.FieldTypeGenerator(() -> randomFrom(FieldType.values())); + Supplier generator = switch (request.dynamicMapping()) { + case FORBIDDEN -> () -> generateFieldTypeInfo(false); + case FORCED -> () -> generateFieldTypeInfo(true); + case SUPPORTED -> () -> generateFieldTypeInfo(ESTestCase.randomBoolean()); + }; + + return new DataSourceResponse.FieldTypeGenerator(generator); + } + + private static DataSourceResponse.FieldTypeGenerator.FieldTypeInfo generateFieldTypeInfo(boolean isDynamic) { + var excluded = isDynamic ? EXCLUDED_FROM_DYNAMIC_MAPPING : Set.of(); + + var fieldType = ESTestCase.randomValueOtherThanMany(excluded::contains, () -> ESTestCase.randomFrom(FieldType.values())); + + return new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(fieldType, isDynamic); } @Override diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java index 68bb628cc8b27..2514cbd51e21c 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java @@ -20,8 +20,7 @@ public DataSourceResponse.LongGenerator handle(DataSourceRequest.LongGenerator r @Override public DataSourceResponse.UnsignedLongGenerator handle(DataSourceRequest.UnsignedLongGenerator request) { - // TODO there is currently an issue with handling BigInteger in some synthetic source scenarios - return new DataSourceResponse.UnsignedLongGenerator(() -> new BigInteger(64, ESTestCase.random()).toString()); + return new DataSourceResponse.UnsignedLongGenerator(() -> new BigInteger(64, ESTestCase.random())); } @Override diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java index 62130967508f6..ef83ced13f0dc 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java @@ -12,6 +12,7 @@ import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse; +import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; @@ -19,23 +20,28 @@ class Context { private final DataGeneratorSpecification specification; private final DataSourceResponse.ChildFieldGenerator childFieldGenerator; - private final DataSourceResponse.FieldTypeGenerator fieldTypeGenerator; private final DataSourceResponse.ObjectArrayGenerator objectArrayGenerator; private final int objectDepth; // We don't need atomicity, but we need to pass counter by reference to accumulate total value from sub-objects. private final AtomicInteger nestedFieldsCount; + private final DynamicMapping parentDynamicMapping; - Context(DataGeneratorSpecification specification) { - this(specification, 0, new AtomicInteger(0)); + Context(DataGeneratorSpecification specification, DynamicMapping parentDynamicMapping) { + this(specification, 0, new AtomicInteger(0), parentDynamicMapping); } - private Context(DataGeneratorSpecification specification, int objectDepth, AtomicInteger nestedFieldsCount) { + private Context( + DataGeneratorSpecification specification, + int objectDepth, + AtomicInteger nestedFieldsCount, + DynamicMapping parentDynamicMapping + ) { this.specification = specification; this.childFieldGenerator = specification.dataSource().get(new DataSourceRequest.ChildFieldGenerator(specification)); - this.fieldTypeGenerator = specification.dataSource().get(new DataSourceRequest.FieldTypeGenerator()); this.objectArrayGenerator = specification.dataSource().get(new DataSourceRequest.ObjectArrayGenerator()); this.objectDepth = objectDepth; this.nestedFieldsCount = nestedFieldsCount; + this.parentDynamicMapping = parentDynamicMapping; } public DataGeneratorSpecification specification() { @@ -46,27 +52,43 @@ public DataSourceResponse.ChildFieldGenerator childFieldGenerator() { return childFieldGenerator; } - public DataSourceResponse.FieldTypeGenerator fieldTypeGenerator() { - return fieldTypeGenerator; + public DataSourceResponse.FieldTypeGenerator fieldTypeGenerator(DynamicMapping dynamicMapping) { + return specification.dataSource().get(new DataSourceRequest.FieldTypeGenerator(dynamicMapping)); } - public Context subObject() { - return new Context(specification, objectDepth + 1, nestedFieldsCount); + public Context subObject(DynamicMapping dynamicMapping) { + return new Context(specification, objectDepth + 1, nestedFieldsCount, dynamicMapping); } - public Context nestedObject() { + public Context nestedObject(DynamicMapping dynamicMapping) { nestedFieldsCount.incrementAndGet(); - return new Context(specification, objectDepth + 1, nestedFieldsCount); + return new Context(specification, objectDepth + 1, nestedFieldsCount, dynamicMapping); + } + + public boolean shouldAddDynamicObjectField(DynamicMapping dynamicMapping) { + if (objectDepth >= specification.maxObjectDepth() || dynamicMapping == DynamicMapping.FORBIDDEN) { + return false; + } + + return childFieldGenerator.generateDynamicSubObject(); } public boolean shouldAddObjectField() { - return childFieldGenerator.generateRegularSubObject() && objectDepth < specification.maxObjectDepth(); + if (objectDepth >= specification.maxObjectDepth() || parentDynamicMapping == DynamicMapping.FORCED) { + return false; + } + + return childFieldGenerator.generateRegularSubObject(); } public boolean shouldAddNestedField() { - return childFieldGenerator.generateNestedSubObject() - && objectDepth < specification.maxObjectDepth() - && nestedFieldsCount.get() < specification.nestedFieldsLimit(); + if (objectDepth >= specification.maxObjectDepth() + || nestedFieldsCount.get() >= specification.nestedFieldsLimit() + || parentDynamicMapping == DynamicMapping.FORCED) { + return false; + } + + return childFieldGenerator.generateNestedSubObject(); } public Optional generateObjectArray() { @@ -76,4 +98,18 @@ public Optional generateObjectArray() { return objectArrayGenerator.lengthGenerator().get(); } + + public DynamicMapping determineDynamicMapping(Map mappingParameters) { + if (parentDynamicMapping == DynamicMapping.FORCED) { + return DynamicMapping.FORCED; + } + + var dynamicParameter = mappingParameters.get("dynamic"); + // Inherited from parent + if (dynamicParameter == null) { + return parentDynamicMapping; + } + + return dynamicParameter.equals("strict") ? DynamicMapping.FORBIDDEN : DynamicMapping.SUPPORTED; + } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/DynamicMapping.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/DynamicMapping.java new file mode 100644 index 0000000000000..ea74080c03610 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/DynamicMapping.java @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.logsdb.datageneration.fields; + +public enum DynamicMapping { + SUPPORTED, + FORBIDDEN, + FORCED +} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java index 8a6a8939c7ddb..e8f9724fee269 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java @@ -10,17 +10,6 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.FieldType; -import org.elasticsearch.logsdb.datageneration.fields.leaf.ByteFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.DoubleFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.FloatFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.HalfFloatFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.IntegerFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.KeywordFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.LongFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.ScaledFloatFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.ShortFieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.fields.leaf.UnsignedLongFieldDataGenerator; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; @@ -39,7 +28,7 @@ public class GenericSubObjectFieldDataGenerator { this.context = context; } - List generateChildFields() { + List generateChildFields(DynamicMapping dynamicMapping) { var existingFieldNames = new HashSet(); // no child fields is legal var childFieldsCount = context.childFieldGenerator().generateChildFieldCount(); @@ -48,13 +37,16 @@ List generateChildFields() { for (int i = 0; i < childFieldsCount; i++) { var fieldName = generateFieldName(existingFieldNames); - if (context.shouldAddObjectField()) { - result.add(new ChildField(fieldName, new ObjectFieldDataGenerator(context.subObject()), false)); + if (context.shouldAddDynamicObjectField(dynamicMapping)) { + result.add(new ChildField(fieldName, new ObjectFieldDataGenerator(context.subObject(DynamicMapping.FORCED)), true)); + } else if (context.shouldAddObjectField()) { + result.add(new ChildField(fieldName, new ObjectFieldDataGenerator(context.subObject(dynamicMapping)), false)); } else if (context.shouldAddNestedField()) { - result.add(new ChildField(fieldName, new NestedFieldDataGenerator(context.nestedObject()), false)); + result.add(new ChildField(fieldName, new NestedFieldDataGenerator(context.nestedObject(dynamicMapping)), false)); } else { - var fieldType = context.fieldTypeGenerator().generator().get(); - result.add(leafField(fieldType, fieldName)); + var fieldTypeInfo = context.fieldTypeGenerator(dynamicMapping).generator().get(); + var generator = fieldTypeInfo.fieldType().generator(fieldName, context.specification().dataSource()); + result.add(new ChildField(fieldName, generator, fieldTypeInfo.dynamic())); } } @@ -62,13 +54,17 @@ List generateChildFields() { } List generateChildFields(List predefinedFields) { - return predefinedFields.stream().map(pf -> leafField(pf.fieldType(), pf.fieldName())).toList(); + return predefinedFields.stream() + .map(pf -> new ChildField(pf.name(), pf.generator(context.specification().dataSource()), false)) + .toList(); } static void writeChildFieldsMapping(XContentBuilder mapping, List childFields) throws IOException { for (var childField : childFields) { - mapping.field(childField.fieldName); - childField.generator.mappingWriter().accept(mapping); + if (childField.dynamic() == false) { + mapping.field(childField.fieldName); + childField.generator.mappingWriter().accept(mapping); + } } } @@ -101,23 +97,6 @@ static void writeChildFieldsData(XContentBuilder document, Iterable } } - private ChildField leafField(FieldType type, String fieldName) { - var generator = switch (type) { - case KEYWORD -> new KeywordFieldDataGenerator(fieldName, context.specification().dataSource()); - case LONG -> new LongFieldDataGenerator(fieldName, context.specification().dataSource()); - case UNSIGNED_LONG -> new UnsignedLongFieldDataGenerator(fieldName, context.specification().dataSource()); - case INTEGER -> new IntegerFieldDataGenerator(fieldName, context.specification().dataSource()); - case SHORT -> new ShortFieldDataGenerator(fieldName, context.specification().dataSource()); - case BYTE -> new ByteFieldDataGenerator(fieldName, context.specification().dataSource()); - case DOUBLE -> new DoubleFieldDataGenerator(fieldName, context.specification().dataSource()); - case FLOAT -> new FloatFieldDataGenerator(fieldName, context.specification().dataSource()); - case HALF_FLOAT -> new HalfFloatFieldDataGenerator(fieldName, context.specification().dataSource()); - case SCALED_FLOAT -> new ScaledFloatFieldDataGenerator(fieldName, context.specification().dataSource()); - }; - - return new ChildField(fieldName, generator, false); - } - private String generateFieldName(Set existingFields) { var fieldName = context.childFieldGenerator().generateFieldName(); while (existingFields.contains(fieldName)) { diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java index 3ba220a64d4fd..5b9891ab15920 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java @@ -30,9 +30,10 @@ public class NestedFieldDataGenerator implements FieldDataGenerator { .get(new DataSourceRequest.ObjectMappingParametersGenerator(true)) .mappingGenerator() .get(); + var dynamicMapping = context.determineDynamicMapping(mappingParameters); var genericGenerator = new GenericSubObjectFieldDataGenerator(context); - this.childFields = genericGenerator.generateChildFields(); + this.childFields = genericGenerator.generateChildFields(dynamicMapping); } @Override diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java index 00f2977c8af33..51732b71b73d0 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java @@ -30,9 +30,10 @@ public class ObjectFieldDataGenerator implements FieldDataGenerator { .get(new DataSourceRequest.ObjectMappingParametersGenerator(false)) .mappingGenerator() .get(); + var dynamicMapping = context.determineDynamicMapping(mappingParameters); var genericGenerator = new GenericSubObjectFieldDataGenerator(context); - this.childFields = genericGenerator.generateChildFields(); + this.childFields = genericGenerator.generateChildFields(dynamicMapping); } @Override diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java index 6adae35dc909c..81b73e4407bd7 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/PredefinedField.java @@ -8,6 +8,36 @@ package org.elasticsearch.logsdb.datageneration.fields; +import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; import org.elasticsearch.logsdb.datageneration.FieldType; +import org.elasticsearch.logsdb.datageneration.datasource.DataSource; -public record PredefinedField(String fieldName, FieldType fieldType) {} +public interface PredefinedField { + String name(); + + FieldDataGenerator generator(DataSource dataSource); + + record WithType(String fieldName, FieldType fieldType) implements PredefinedField { + @Override + public String name() { + return fieldName; + } + + @Override + public FieldDataGenerator generator(DataSource dataSource) { + return fieldType().generator(fieldName, dataSource); + } + } + + record WithGenerator(String fieldName, FieldDataGenerator generator) implements PredefinedField { + @Override + public String name() { + return fieldName; + } + + @Override + public FieldDataGenerator generator(DataSource dataSource) { + return generator; + } + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java index 645b7ca16e291..f2c209fb73862 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java @@ -10,32 +10,59 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; +import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; public class TopLevelObjectFieldDataGenerator { private final Context context; + private final Map mappingParameters; + // Child fields of top level object that were explicitly requested, they have predefined name and type. private final List predefinedFields; // Child fields of top level object that are generated and merged with predefined fields. private final List generatedChildFields; public TopLevelObjectFieldDataGenerator(DataGeneratorSpecification specification) { - this.context = new Context(specification); + DynamicMapping dynamicMapping; + if (specification.fullyDynamicMapping()) { + dynamicMapping = DynamicMapping.FORCED; + this.mappingParameters = Map.of(); + } else { + this.mappingParameters = new HashMap<>( + specification.dataSource().get(new DataSourceRequest.ObjectMappingParametersGenerator(false)).mappingGenerator().get() + ); + // Top-level object can't be disabled because @timestamp is a required field in data streams. + this.mappingParameters.remove("enabled"); + + dynamicMapping = mappingParameters.getOrDefault("dynamic", "true").equals("strict") + ? DynamicMapping.FORBIDDEN + : DynamicMapping.SUPPORTED; + } + this.context = new Context(specification, dynamicMapping); var genericGenerator = new GenericSubObjectFieldDataGenerator(context); + this.predefinedFields = genericGenerator.generateChildFields(specification.predefinedFields()); - this.generatedChildFields = genericGenerator.generateChildFields(); + this.generatedChildFields = genericGenerator.generateChildFields(dynamicMapping); } - public CheckedConsumer mappingWriter( - CheckedConsumer customMappingParameters - ) { + public CheckedConsumer mappingWriter(Map customMappingParameters) { return b -> { b.startObject(); - customMappingParameters.accept(b); + var mergedParameters = Stream.of(this.mappingParameters, customMappingParameters) + .flatMap(map -> map.entrySet().stream()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (l, r) -> r)); + + for (var entry : mergedParameters.entrySet()) { + b.field(entry.getKey(), entry.getValue()); + } b.startObject("properties"); GenericSubObjectFieldDataGenerator.writeChildFieldsMapping(b, predefinedFields); diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java index 8ff5998a31d45..81d6028182d21 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java @@ -39,6 +39,7 @@ public void testSnapshot() throws Exception { var expectedMapping = """ { "_doc" : { + "dynamic" : "false", "properties" : { "f1" : { "dynamic" : "false", @@ -184,7 +185,6 @@ public DataSourceResponse.ArrayWrapper handle(DataSourceRequest.ArrayWrapper req @Override public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) { - return childFieldGenerator; } @@ -205,11 +205,11 @@ public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeG return new DataSourceResponse.FieldTypeGenerator(() -> { if (fieldType == FieldType.KEYWORD) { fieldType = FieldType.LONG; - return FieldType.KEYWORD; + return new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(FieldType.KEYWORD, false); } fieldType = FieldType.KEYWORD; - return FieldType.LONG; + return new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(FieldType.LONG, false); }); } @@ -240,6 +240,11 @@ public int generateChildFieldCount() { return 2; } + @Override + public boolean generateDynamicSubObject() { + return false; + } + @Override public boolean generateNestedSubObject() { return generatedFields > 6 && generatedFields < 12; diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java index 4a4ffca0f37aa..e8535263d387c 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest; import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse; +import org.elasticsearch.logsdb.datageneration.fields.DynamicMapping; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentBuilder; @@ -42,6 +43,7 @@ public void testDataGeneratorSanity() throws IOException { public void testDataGeneratorProducesValidMappingAndDocument() throws IOException { // Make sure objects, nested objects and all field types are covered. var testChildFieldGenerator = new DataSourceResponse.ChildFieldGenerator() { + private boolean dynamicSubObjectCovered = false; private boolean subObjectCovered = false; private boolean nestedCovered = false; private int generatedFields = 0; @@ -52,6 +54,16 @@ public int generateChildFieldCount() { return 20; } + @Override + public boolean generateDynamicSubObject() { + if (dynamicSubObjectCovered == false) { + dynamicSubObjectCovered = true; + return true; + } + + return false; + } + @Override public boolean generateNestedSubObject() { if (nestedCovered == false) { @@ -88,7 +100,24 @@ public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFiel @Override public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) { - return new DataSourceResponse.FieldTypeGenerator(() -> FieldType.values()[generatedFields++ % FieldType.values().length]); + if (request.dynamicMapping() == DynamicMapping.FORBIDDEN || request.dynamicMapping() == DynamicMapping.SUPPORTED) { + return new DataSourceResponse.FieldTypeGenerator( + () -> new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo( + FieldType.values()[generatedFields++ % FieldType.values().length], + false + ) + ); + } + + return new DataSourceResponse.FieldTypeGenerator(() -> { + var fieldType = FieldType.values()[generatedFields++ % FieldType.values().length]; + // Does not really work with dynamic mapping. + if (fieldType == FieldType.UNSIGNED_LONG) { + fieldType = FieldType.values()[generatedFields++ % FieldType.values().length]; + } + + return new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(fieldType, true); + }); } }; @@ -122,6 +151,11 @@ public int generateChildFieldCount() { return 50; } + @Override + public boolean generateDynamicSubObject() { + return false; + } + @Override public boolean generateNestedSubObject() { return false; @@ -151,7 +185,9 @@ public DataSourceResponse.ObjectArrayGenerator handle(DataSourceRequest.ObjectAr @Override public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) { - return new DataSourceResponse.FieldTypeGenerator(() -> FieldType.LONG); + return new DataSourceResponse.FieldTypeGenerator( + () -> new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(FieldType.LONG, false) + ); } }; From 55a185b933ee508812a20ecd94d394974dad6ac1 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Thu, 29 Aug 2024 16:07:31 -0400 Subject: [PATCH 059/144] Bump spotless version to 6.25.0 (#112366) --- gradle/build.versions.toml | 2 +- gradle/verification-metadata.xml | 295 ++++++------------ .../health/node/DiskHealthInfo.java | 1 + .../xpack/ml/inference/nlp/NerProcessor.java | 1 + 4 files changed, 107 insertions(+), 192 deletions(-) diff --git a/gradle/build.versions.toml b/gradle/build.versions.toml index 792330fd3613b..12f7776add17b 100644 --- a/gradle/build.versions.toml +++ b/gradle/build.versions.toml @@ -44,6 +44,6 @@ snakeyaml = { group = "org.yaml", name = "snakeyaml", version = { strictly = "2. spock-core = { group = "org.spockframework", name="spock-core", version.ref="spock" } spock-junit4 = { group = "org.spockframework", name="spock-junit4", version.ref="spock" } spock-platform = { group = "org.spockframework", name="spock-bom", version.ref="spock" } -spotless-plugin = "com.diffplug.spotless:spotless-plugin-gradle:6.22.0" +spotless-plugin = "com.diffplug.spotless:spotless-plugin-gradle:6.25.0" wiremock = "com.github.tomakehurst:wiremock-jre8-standalone:2.23.2" xmlunit-core = "org.xmlunit:xmlunit-core:2.8.2" diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a27e2083a0849..3a4f5ef9d240c 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -204,19 +204,14 @@ - - - - - - - - + + + @@ -229,36 +224,19 @@ - - - - - - - - + + + - - - + + + - - - - - - - - - - - - - - - + + + @@ -601,11 +579,6 @@ - - - - - @@ -621,6 +594,11 @@ + + + + + @@ -631,14 +609,9 @@ - - - - - - - - + + + @@ -656,11 +629,6 @@ - - - - - @@ -686,6 +654,11 @@ + + + + + @@ -1038,14 +1011,9 @@ - - - - - - - - + + + @@ -1058,14 +1026,9 @@ - - - - - - - - + + + @@ -1338,14 +1301,14 @@ - - - + + + - - - + + + @@ -3284,11 +3247,6 @@ - - - - - @@ -3309,16 +3267,16 @@ + + + + + - - - - - @@ -3434,14 +3392,14 @@ - - - + + + - - - + + + @@ -3559,69 +3517,49 @@ - - - - - - - - - - - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - - - - - - - - - - + + + @@ -3629,19 +3567,19 @@ - - - + + + - - - + + + - - - + + + @@ -3649,9 +3587,14 @@ - - - + + + + + + + + @@ -3739,54 +3682,24 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - + + + - - - + + + - - - + + + diff --git a/server/src/main/java/org/elasticsearch/health/node/DiskHealthInfo.java b/server/src/main/java/org/elasticsearch/health/node/DiskHealthInfo.java index f1e085482b72a..244ed03bf098c 100644 --- a/server/src/main/java/org/elasticsearch/health/node/DiskHealthInfo.java +++ b/server/src/main/java/org/elasticsearch/health/node/DiskHealthInfo.java @@ -20,6 +20,7 @@ * The health status of the disk space of this node along with the cause. */ public record DiskHealthInfo(HealthStatus healthStatus, @Nullable Cause cause) implements Writeable { + public DiskHealthInfo(HealthStatus healthStatus) { this(healthStatus, null); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessor.java index 3dbf941c8120d..05b97422bfb97 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/NerProcessor.java @@ -172,6 +172,7 @@ static String buildAnnotatedText(String seq, List entiti } record NerResultProcessor(IobTag[] iobMap, String resultsField, boolean ignoreCase) implements NlpTask.ResultProcessor { + NerResultProcessor(IobTag[] iobMap, String resultsField, boolean ignoreCase) { this.iobMap = iobMap; this.resultsField = Optional.ofNullable(resultsField).orElse(DEFAULT_RESULTS_FIELD); From f1ef280c19ce3203532015368f2dbbe5112721b5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 30 Aug 2024 06:10:43 +1000 Subject: [PATCH 060/144] Mute org.elasticsearch.datastreams.logsdb.qa.StandardVersusLogsIndexModeRandomDataChallengeRestIT testMatchAllQuery #112374 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index e80a39040a4ef..94ff4b5d509cd 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -162,6 +162,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/112254 - class: org.elasticsearch.search.ccs.CCSUsageTelemetryIT issue: https://github.com/elastic/elasticsearch/issues/112324 +- class: org.elasticsearch.datastreams.logsdb.qa.StandardVersusLogsIndexModeRandomDataChallengeRestIT + method: testMatchAllQuery + issue: https://github.com/elastic/elasticsearch/issues/112374 # Examples: # From 09615aa49c9f21741adb56c957bae20b4d0f56e4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 30 Aug 2024 06:51:01 +1000 Subject: [PATCH 061/144] Mute org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT test {yaml=reference/rest-api/watcher/put-watch/line_120} #99517 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 94ff4b5d509cd..c50234a8eaa9d 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -165,6 +165,9 @@ tests: - class: org.elasticsearch.datastreams.logsdb.qa.StandardVersusLogsIndexModeRandomDataChallengeRestIT method: testMatchAllQuery issue: https://github.com/elastic/elasticsearch/issues/112374 +- class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT + method: test {yaml=reference/rest-api/watcher/put-watch/line_120} + issue: https://github.com/elastic/elasticsearch/issues/99517 # Examples: # From 5fb3c1dfbec669c6c6192de81cbb0af64791e810 Mon Sep 17 00:00:00 2001 From: Volodymyr Krasnikov <129072588+volodk85@users.noreply.github.com> Date: Thu, 29 Aug 2024 14:20:34 -0700 Subject: [PATCH 062/144] Write multiple cache gaps for warming service in one go (#112071) * Write multiple cache gaps for warming service in one go --- .../shared/SharedBlobCacheService.java | 51 ++++++++++++++++--- .../shared/SharedBlobCacheServiceTests.java | 28 ++++++---- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java index 3dfece0a9b20e..09fcd1cbeba92 100644 --- a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java +++ b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java @@ -967,12 +967,48 @@ void populate( listener.onResponse(false); return; } - try (var gapsListener = new RefCountingListener(listener.map(unused -> true))) { - assert writer.sharedInputStreamFactory(gaps) == null; - for (SparseFileTracker.Gap gap : gaps) { - executor.execute( - fillGapRunnable(gap, writer, null, ActionListener.releaseAfter(gapsListener.acquire(), refs.acquire())) - ); + final SourceInputStreamFactory streamFactory = writer.sharedInputStreamFactory(gaps); + logger.trace( + () -> Strings.format( + "fill gaps %s %s shared input stream factory", + gaps, + streamFactory == null ? "without" : "with" + ) + ); + if (streamFactory == null) { + try (var parallelGapsListener = new RefCountingListener(listener.map(unused -> true))) { + for (SparseFileTracker.Gap gap : gaps) { + executor.execute( + fillGapRunnable( + gap, + writer, + null, + ActionListener.releaseAfter(parallelGapsListener.acquire(), refs.acquire()) + ) + ); + } + } + } else { + try ( + var sequentialGapsListener = new RefCountingListener( + ActionListener.runBefore(listener.map(unused -> true), streamFactory::close) + ) + ) { + final List gapFillingTasks = gaps.stream() + .map( + gap -> fillGapRunnable( + gap, + writer, + streamFactory, + ActionListener.releaseAfter(sequentialGapsListener.acquire(), refs.acquire()) + ) + ) + .toList(); + executor.execute(() -> { + // Fill the gaps in order. If a gap fails to fill for whatever reason, the task for filling the next + // gap will still be executed. + gapFillingTasks.forEach(Runnable::run); + }); } } } @@ -1019,8 +1055,7 @@ void populateAndRead( () -> Strings.format( "fill gaps %s %s shared input stream factory", gaps, - (streamFactory == null ? "without" : "with"), - (streamFactory == null ? "" : " " + streamFactory) + streamFactory == null ? "without" : "with" ) ); if (streamFactory == null) { diff --git a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java index 0f3804baef42b..e405ed578b5a5 100644 --- a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java +++ b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java @@ -1422,7 +1422,7 @@ protected int computeCacheFileRegionSize(long fileLength, int region) { } } - public void testSharedSourceInputStreamFactory() throws Exception { + public void testUsageSharedSourceInputStreamFactoryInCachePopulation() throws Exception { final long regionSizeInBytes = size(100); final Settings settings = Settings.builder() .put(NODE_NAME_SETTING.getKey(), "node") @@ -1519,16 +1519,22 @@ public void fillCacheRange( }; final var range = ByteRange.of(0, regionSizeInBytes); - final PlainActionFuture future = new PlainActionFuture<>(); - region.populateAndRead( - range, - range, - (channel, channelPos, relativePos, length) -> length, - rangeMissingHandler, - threadPool.generic(), - future - ); - safeGet(future); + if (randomBoolean()) { + final PlainActionFuture future = new PlainActionFuture<>(); + region.populateAndRead( + range, + range, + (channel, channelPos, relativePos, length) -> length, + rangeMissingHandler, + threadPool.generic(), + future + ); + assertThat(safeGet(future).longValue(), equalTo(regionSizeInBytes)); + } else { + final PlainActionFuture future = new PlainActionFuture<>(); + region.populate(range, rangeMissingHandler, threadPool.generic(), future); + assertThat(safeGet(future), equalTo(true)); + } assertThat(invocationCounter.get(), equalTo(numberGaps)); assertThat(region.tracker.checkAvailable(regionSizeInBytes), is(true)); assertBusy(() -> assertThat(factoryClosed.get(), is(true))); From cb4d7ff28131650e84ae9aa7b94f1ce86758d3cc Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Thu, 29 Aug 2024 15:54:08 -0600 Subject: [PATCH 063/144] Skip CCS Usage telemetry ITs if feature flags is not enabled. (#112365) --- .../elasticsearch/search/ccs/CCSUsageTelemetryIT.java | 10 +++++++++- .../xpack/search/CCSUsageTelemetryAsyncSearchIT.java | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java index 40d98b2b5ea71..bb18b8f1b702d 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CCSUsageTelemetryIT.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.plugins.Plugin; @@ -36,6 +37,7 @@ import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.usage.UsageService; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.rules.TestRule; import org.junit.runner.Description; @@ -66,6 +68,7 @@ public class CCSUsageTelemetryIT extends AbstractMultiClustersTestCase { private static final Logger LOGGER = LogManager.getLogger(CCSUsageTelemetryIT.class); private static final String REMOTE1 = "cluster-a"; private static final String REMOTE2 = "cluster-b"; + private static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); @Override protected boolean reuseClusters() { @@ -80,6 +83,11 @@ protected Collection remoteClusterAlias() { @Rule public SkipUnavailableRule skipOverride = new SkipUnavailableRule(REMOTE1, REMOTE2); + @BeforeClass + protected static void skipIfTelemetryDisabled() { + assumeTrue("Skipping test as CCS_TELEMETRY_FEATURE_FLAG is disabled", CCS_TELEMETRY_FEATURE_FLAG.isEnabled()); + } + @Override protected Map skipUnavailableForRemoteClusters() { var map = skipOverride.getMap(); @@ -443,7 +451,7 @@ public void testRemoteTimesOut() throws Exception { // partial failure, and we disable partial results.. searchRequest.setCcsMinimizeRoundtrips(true); - TimeValue searchTimeout = new TimeValue(200, TimeUnit.MILLISECONDS); + TimeValue searchTimeout = new TimeValue(500, TimeUnit.MILLISECONDS); // query builder that will sleep for the specified amount of time in the query phase SlowRunningQueryBuilder slowRunningQueryBuilder = new SlowRunningQueryBuilder(searchTimeout.millis() * 5, remoteIndex); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().query(slowRunningQueryBuilder).timeout(searchTimeout); diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java index ac0b26cb4f4cd..4f8fa122047e1 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/CCSUsageTelemetryAsyncSearchIT.java @@ -14,6 +14,7 @@ import org.elasticsearch.action.search.TransportSearchAction; import org.elasticsearch.client.internal.Client; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.plugins.Plugin; @@ -33,6 +34,7 @@ import org.elasticsearch.xpack.core.search.action.SubmitAsyncSearchRequest; import org.hamcrest.Matchers; import org.junit.Before; +import org.junit.BeforeClass; import java.util.Arrays; import java.util.Collection; @@ -53,6 +55,12 @@ public class CCSUsageTelemetryAsyncSearchIT extends AbstractMultiClustersTestCase { private static final String REMOTE1 = "cluster-a"; private static final String REMOTE2 = "cluster-b"; + private static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); + + @BeforeClass + protected static void skipIfTelemetryDisabled() { + assumeTrue("Skipping test as CCS_TELEMETRY_FEATURE_FLAG is disabled", CCS_TELEMETRY_FEATURE_FLAG.isEnabled()); + } @Override protected boolean reuseClusters() { From ad0292cc836ebcafbaf9c11c1a1192ae2a33b7ad Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 30 Aug 2024 09:09:19 +1000 Subject: [PATCH 064/144] Mute org.elasticsearch.xpack.ml.integration.MlJobIT testMultiIndexDelete #112381 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index c50234a8eaa9d..356f48e8eb252 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -168,6 +168,9 @@ tests: - class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT method: test {yaml=reference/rest-api/watcher/put-watch/line_120} issue: https://github.com/elastic/elasticsearch/issues/99517 +- class: org.elasticsearch.xpack.ml.integration.MlJobIT + method: testMultiIndexDelete + issue: https://github.com/elastic/elasticsearch/issues/112381 # Examples: # From b5da668ad9400a92073869f09cf8fd89eadc6882 Mon Sep 17 00:00:00 2001 From: Athena Brown Date: Thu, 29 Aug 2024 18:05:40 -0600 Subject: [PATCH 065/144] Add tier preference to security index settings allowlist and update default tier preference (#111818) This commit allows tier preference for the security system indices to be set using the Security Settings API, and adds validation to prevent using the `data_frozen` tier for security system indices. Also updates the default tier preference to `data_hot,data_content`. --- docs/changelog/111818.yaml | 5 + .../UpdateSecuritySettingsAction.java | 68 ++++++++++---- .../UpdateSecuritySettingsActionTests.java | 92 ++++++++++++++++--- .../xpack/security/SecuritySettingsIT.java | 51 +++++++++- .../security/profile/ProfileIntegTests.java | 2 +- .../TransportGetSecuritySettingsAction.java | 2 +- .../support/SecuritySystemIndices.java | 4 + 7 files changed, 192 insertions(+), 32 deletions(-) create mode 100644 docs/changelog/111818.yaml diff --git a/docs/changelog/111818.yaml b/docs/changelog/111818.yaml new file mode 100644 index 0000000000000..c3a632861aae6 --- /dev/null +++ b/docs/changelog/111818.yaml @@ -0,0 +1,5 @@ +pr: 111818 +summary: Add tier preference to security index settings allowlist +area: Security +type: enhancement +issues: [] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsAction.java index 2d59911ec7ecb..c2bf1466fd41f 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsAction.java @@ -14,9 +14,9 @@ import org.elasticsearch.action.support.master.AcknowledgedRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.routing.allocation.DataTier; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.xcontent.ConstructingObjectParser; @@ -28,6 +28,8 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.function.BiFunction; +import java.util.stream.Collectors; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -42,10 +44,35 @@ public class UpdateSecuritySettingsAction { public static final String TOKENS_INDEX_NAME = "security-tokens"; public static final String PROFILES_INDEX_NAME = "security-profile"; - public static final Set ALLOWED_SETTING_KEYS = Set.of( - IndexMetadata.SETTING_NUMBER_OF_REPLICAS, - IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS - ); + /** + * A map of allowed settings to validators for those settings. Values should take the value which is being assigned to the setting + * and an existing {@link ActionRequestValidationException}, to which they should add if the value is disallowed. + */ + public static final Map< + String, + BiFunction> ALLOWED_SETTING_VALIDATORS = Map.of( + IndexMetadata.SETTING_NUMBER_OF_REPLICAS, + (it, ex) -> ex, // no additional validation + IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, + (it, ex) -> ex, // no additional validation + DataTier.TIER_PREFERENCE, + (it, ex) -> { + Set allowedTiers = Set.of(DataTier.DATA_CONTENT, DataTier.DATA_HOT, DataTier.DATA_WARM, DataTier.DATA_COLD); + if (it instanceof String preference) { + String disallowedTiers = DataTier.parseTierList(preference) + .stream() + .filter(tier -> allowedTiers.contains(tier) == false) + .collect(Collectors.joining(",")); + if (disallowedTiers.isEmpty() == false) { + return ValidateActions.addValidationError( + "disallowed data tiers [" + disallowedTiers + "] found, allowed tiers are [" + String.join(",", allowedTiers), + ex + ); + } + } + return ex; + } + ); private UpdateSecuritySettingsAction() {/* no instances */} @@ -154,19 +181,26 @@ private static ActionRequestValidationException validateIndexSettings( String indexName, ActionRequestValidationException existingExceptions ) { - Set forbiddenSettings = Sets.difference(indexSettings.keySet(), ALLOWED_SETTING_KEYS); - if (forbiddenSettings.size() > 0) { - return ValidateActions.addValidationError( - "illegal settings for index [" - + indexName - + "]: " - + forbiddenSettings - + ", these settings may not be configured. Only the following settings may be configured for that index: " - + ALLOWED_SETTING_KEYS, - existingExceptions - ); + ActionRequestValidationException errors = existingExceptions; + + for (Map.Entry entry : indexSettings.entrySet()) { + String setting = entry.getKey(); + if (ALLOWED_SETTING_VALIDATORS.containsKey(setting)) { + errors = ALLOWED_SETTING_VALIDATORS.get(setting).apply(entry.getValue(), errors); + } else { + errors = ValidateActions.addValidationError( + "illegal setting for index [" + + indexName + + "]: [" + + setting + + "], this setting may not be configured. Only the following settings may be configured for that index: " + + ALLOWED_SETTING_VALIDATORS.keySet(), + existingExceptions + ); + } } - return existingExceptions; + + return errors; } } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsActionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsActionTests.java index 893f7474c3e6e..50ab034a34ef3 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsActionTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/settings/UpdateSecuritySettingsActionTests.java @@ -7,15 +7,19 @@ package org.elasticsearch.xpack.core.security.action.settings; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.routing.allocation.DataTier; import org.elasticsearch.test.ESTestCase; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Supplier; import java.util.regex.Pattern; -import static org.elasticsearch.xpack.core.security.action.settings.UpdateSecuritySettingsAction.ALLOWED_SETTING_KEYS; +import static org.elasticsearch.xpack.core.security.action.settings.UpdateSecuritySettingsAction.ALLOWED_SETTING_VALIDATORS; import static org.elasticsearch.xpack.core.security.action.settings.UpdateSecuritySettingsAction.MAIN_INDEX_NAME; import static org.elasticsearch.xpack.core.security.action.settings.UpdateSecuritySettingsAction.PROFILES_INDEX_NAME; import static org.elasticsearch.xpack.core.security.action.settings.UpdateSecuritySettingsAction.TOKENS_INDEX_NAME; @@ -27,6 +31,15 @@ public class UpdateSecuritySettingsActionTests extends ESTestCase { + static final Map> ALLOWED_SETTING_GENERATORS = Map.of( + IndexMetadata.SETTING_NUMBER_OF_REPLICAS, + () -> randomAlphaOfLength(5), // no additional validation + IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, + () -> randomAlphaOfLength(5), // no additional validation + DataTier.TIER_PREFERENCE, + () -> randomFrom(DataTier.DATA_CONTENT, DataTier.DATA_HOT, DataTier.DATA_WARM, DataTier.DATA_COLD) + ); + public void testValidateSettingsEmpty() { var req = new UpdateSecuritySettingsAction.Request( TEST_REQUEST_TIMEOUT, @@ -43,9 +56,10 @@ public void testValidateSettingsEmpty() { public void testAllowedSettingsOk() { Map allAllowedSettingsMap = new HashMap<>(); - for (String allowedSetting : ALLOWED_SETTING_KEYS) { - Map allowedSettingMap = Map.of(allowedSetting, randomAlphaOfLength(5)); - allAllowedSettingsMap.put(allowedSetting, randomAlphaOfLength(5)); + for (String allowedSetting : ALLOWED_SETTING_VALIDATORS.keySet()) { + String settingValue = ALLOWED_SETTING_GENERATORS.get(allowedSetting).get(); + Map allowedSettingMap = Map.of(allowedSetting, settingValue); + allAllowedSettingsMap.put(allowedSetting, settingValue); var req = new UpdateSecuritySettingsAction.Request( TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT, @@ -86,11 +100,12 @@ public void testAllowedSettingsOk() { public void testDisallowedSettingsFailsValidation() { String disallowedSetting = "index." - + randomValueOtherThanMany((value) -> ALLOWED_SETTING_KEYS.contains("index." + value), () -> randomAlphaOfLength(5)); + + randomValueOtherThanMany((value) -> ALLOWED_SETTING_VALIDATORS.containsKey("index." + value), () -> randomAlphaOfLength(5)); Map disallowedSettingMap = Map.of(disallowedSetting, randomAlphaOfLength(5)); + String validSetting = randomFrom(ALLOWED_SETTING_VALIDATORS.keySet()); Map validOrEmptySettingMap = randomFrom( Collections.emptyMap(), - Map.of(randomFrom(ALLOWED_SETTING_KEYS), randomAlphaOfLength(5)) + Map.of(validSetting, ALLOWED_SETTING_GENERATORS.get(validSetting).get()) ); { var req = new UpdateSecuritySettingsAction.Request( @@ -106,11 +121,11 @@ public void testDisallowedSettingsFailsValidation() { assertThat( errorMsg, matchesRegex( - "illegal settings for index \\[" + "illegal setting for index \\[" + Pattern.quote(TOKENS_INDEX_NAME) + "\\]: \\[" + disallowedSetting - + "\\], these settings may not be configured. Only the following settings may be configured for that index.*" + + "\\], this setting may not be configured. Only the following settings may be configured for that index.*" ) ); } @@ -130,13 +145,13 @@ public void testDisallowedSettingsFailsValidation() { assertThat( errorMsg, matchesRegex( - "illegal settings for index \\[(" + "illegal setting for index \\[(" + Pattern.quote(MAIN_INDEX_NAME) + "|" + Pattern.quote(PROFILES_INDEX_NAME) + ")\\]: \\[" + disallowedSetting - + "\\], these settings may not be configured. Only the following settings may be configured for that index.*" + + "\\], this setting may not be configured. Only the following settings may be configured for that index.*" ) ); } @@ -156,7 +171,7 @@ public void testDisallowedSettingsFailsValidation() { assertThat( errorMsg, matchesRegex( - "illegal settings for index \\[(" + "illegal setting for index \\[(" + Pattern.quote(MAIN_INDEX_NAME) + "|" + Pattern.quote(TOKENS_INDEX_NAME) @@ -164,11 +179,64 @@ public void testDisallowedSettingsFailsValidation() { + Pattern.quote(PROFILES_INDEX_NAME) + ")\\]: \\[" + disallowedSetting - + "\\], these settings may not be configured. Only the following settings may be configured for that index.*" + + "\\], this setting may not be configured. Only the following settings may be configured for that index.*" ) ); } } } + public void testSettingValuesAreValidated() { + Map forbiddenSettingsMap = Map.of(DataTier.TIER_PREFERENCE, DataTier.DATA_FROZEN); + String badTier = randomAlphaOfLength(5); + Map badSettingsMap = Map.of(DataTier.TIER_PREFERENCE, badTier); + Map allowedSettingMap = Map.of( + DataTier.TIER_PREFERENCE, + randomFrom(DataTier.DATA_HOT, DataTier.DATA_WARM, DataTier.DATA_CONTENT, DataTier.DATA_COLD) + ); + { + var req = new UpdateSecuritySettingsAction.Request( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + allowedSettingMap, + Collections.emptyMap(), + Collections.emptyMap() + ); + assertThat(req.validate(), nullValue()); + } + + { + var req = new UpdateSecuritySettingsAction.Request( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + forbiddenSettingsMap, + Collections.emptyMap(), + Collections.emptyMap() + ); + ActionRequestValidationException exception = req.validate(); + assertThat(exception, notNullValue()); + assertThat(exception.validationErrors(), hasSize(1)); + assertThat( + exception.validationErrors().get(0), + containsString("disallowed data tiers [" + DataTier.DATA_FROZEN + "] found, allowed tiers are ") + ); + } + + { + var req = new UpdateSecuritySettingsAction.Request( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + badSettingsMap, + Collections.emptyMap(), + Collections.emptyMap() + ); + var exception = req.validate(); + assertThat(exception, notNullValue()); + assertThat(exception.validationErrors(), hasSize(1)); + assertThat( + exception.validationErrors().get(0), + containsString("disallowed data tiers [" + badTier + "] found, allowed tiers are ") + ); + } + } } diff --git a/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecuritySettingsIT.java b/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecuritySettingsIT.java index 5651538b2757c..f776c4834cc5f 100644 --- a/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecuritySettingsIT.java +++ b/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecuritySettingsIT.java @@ -19,6 +19,7 @@ import static org.elasticsearch.test.XContentTestUtils.createJsonMapView; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; public class SecuritySettingsIT extends SecurityInBasicRestTestCase { @@ -70,6 +71,54 @@ public void testBasicWorkflow() throws IOException { assertOK(getResp); final XContentTestUtils.JsonMapView mapView = createJsonMapView(getResp.getEntity().getContent()); assertThat(mapView.get("security.index.auto_expand_replicas"), equalTo("0-all")); + assertThat(mapView.get("security-profile.index.auto_expand_replicas"), equalTo("0-all")); + } + + public void testTierPreference() throws IOException { + { + Request req = new Request("PUT", "/_security/settings"); + req.setJsonEntity(""" + { + "security": { + "index.routing.allocation.include._tier_preference": "data_hot" + }, + "security-profile": { + "index.routing.allocation.include._tier_preference": "data_hot" + } + } + """); + Response resp = adminClient().performRequest(req); + assertOK(resp); + Request getRequest = new Request("GET", "/_security/settings"); + Response getResp = adminClient().performRequest(getRequest); + assertOK(getResp); + final XContentTestUtils.JsonMapView mapView = createJsonMapView(getResp.getEntity().getContent()); + assertThat(mapView.get("security.index.routing.allocation.include._tier_preference"), equalTo("data_hot")); + assertThat(mapView.get("security-profile.index.routing.allocation.include._tier_preference"), equalTo("data_hot")); + } + + { + Request req = new Request("PUT", "/_security/settings"); + req.setJsonEntity(""" + { + "security": { + "index.routing.allocation.include._tier_preference": null + }, + "security-profile": { + "index.routing.allocation.include._tier_preference": null + } + } + """); + Response resp = adminClient().performRequest(req); + assertOK(resp); + Request getRequest = new Request("GET", "/_security/settings"); + Response getResp = adminClient().performRequest(getRequest); + assertOK(getResp); + final XContentTestUtils.JsonMapView mapView = createJsonMapView(getResp.getEntity().getContent()); + assertThat(mapView.get("security.index.routing.allocation.include._tier_preference"), nullValue()); + assertThat(mapView.get("security-profile.index.routing.allocation.include._tier_preference"), nullValue()); + } + } public void testNoUpdatesThrowsException() throws IOException { @@ -85,7 +134,7 @@ public void testDisallowedSettingThrowsException() throws IOException { ResponseException ex = expectThrows(ResponseException.class, () -> adminClient().performRequest(req)); assertThat( EntityUtils.toString(ex.getResponse().getEntity()), - containsString("illegal settings for index [security]: " + "[index.max_ngram_diff], these settings may not be configured.") + containsString("illegal setting for index [security]: " + "[index.max_ngram_diff], this setting may not be configured.") ); } diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/profile/ProfileIntegTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/profile/ProfileIntegTests.java index d057b7ce0be20..4b8fbfd41acdf 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/profile/ProfileIntegTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/profile/ProfileIntegTests.java @@ -133,7 +133,7 @@ public void testProfileIndexAutoCreation() { final Settings settings = getIndexResponse.getSettings().get(INTERNAL_SECURITY_PROFILE_INDEX_8); assertThat(settings.get("index.number_of_shards"), equalTo("1")); assertThat(settings.get("index.auto_expand_replicas"), equalTo("0-1")); - assertThat(settings.get("index.routing.allocation.include._tier_preference"), equalTo("data_content")); + assertThat(settings.get("index.routing.allocation.include._tier_preference"), equalTo("data_hot,data_content")); final Map mappings = getIndexResponse.getMappings().get(INTERNAL_SECURITY_PROFILE_INDEX_8).getSourceAsMap(); diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportGetSecuritySettingsAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportGetSecuritySettingsAction.java index 25a677517825f..96363d5a099da 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportGetSecuritySettingsAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportGetSecuritySettingsAction.java @@ -85,7 +85,7 @@ private static Settings getFilteredSettingsForIndex(String indexName, ClusterSta .map(IndexMetadata::getSettings) .map(settings -> { Settings.Builder builder = Settings.builder(); - for (String settingName : UpdateSecuritySettingsAction.ALLOWED_SETTING_KEYS) { + for (String settingName : UpdateSecuritySettingsAction.ALLOWED_SETTING_VALIDATORS.keySet()) { if (settings.hasValue(settingName)) { builder.put(settingName, settings.get(settingName)); } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/support/SecuritySystemIndices.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/support/SecuritySystemIndices.java index 9541dd9dc470d..36ea14c6e101b 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/support/SecuritySystemIndices.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/support/SecuritySystemIndices.java @@ -13,6 +13,7 @@ import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.allocation.DataTier; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.VersionId; import org.elasticsearch.common.settings.Settings; @@ -154,6 +155,7 @@ private static Settings getMainIndexSettings() { return Settings.builder() .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1") + .put(DataTier.TIER_PREFERENCE, "data_hot,data_content") .put(IndexMetadata.SETTING_PRIORITY, 1000) .put(IndexMetadata.INDEX_FORMAT_SETTING.getKey(), INTERNAL_MAIN_INDEX_FORMAT) .put("analysis.filter.email.type", "pattern_capture") @@ -702,6 +704,7 @@ private static Settings getTokenIndexSettings() { return Settings.builder() .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1") + .put(DataTier.TIER_PREFERENCE, "data_hot,data_content") .put(IndexMetadata.SETTING_PRIORITY, 1000) .put(IndexMetadata.INDEX_FORMAT_SETTING.getKey(), INTERNAL_TOKENS_INDEX_FORMAT) .build(); @@ -902,6 +905,7 @@ private static Settings getProfileIndexSettings(Settings settings) { final Settings.Builder settingsBuilder = Settings.builder() .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1") + .put(DataTier.TIER_PREFERENCE, "data_hot,data_content") .put(IndexMetadata.SETTING_PRIORITY, 1000) .put(IndexMetadata.INDEX_FORMAT_SETTING.getKey(), INTERNAL_PROFILE_INDEX_FORMAT) .put("analysis.filter.email.type", "pattern_capture") From e379c4fcc10041b1cae5a4bb4b231a5c9938ad99 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 30 Aug 2024 15:21:50 +1000 Subject: [PATCH 066/144] Mute org.elasticsearch.xpack.searchablesnapshots.cache.shared.NodesCachesStatsIntegTests testNodesCachesStats #112384 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 356f48e8eb252..57cc7abb899e7 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -171,6 +171,9 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testMultiIndexDelete issue: https://github.com/elastic/elasticsearch/issues/112381 +- class: org.elasticsearch.xpack.searchablesnapshots.cache.shared.NodesCachesStatsIntegTests + method: testNodesCachesStats + issue: https://github.com/elastic/elasticsearch/issues/112384 # Examples: # From bfc7355ed7bd241e6c828e3de5282413bea60e33 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Fri, 30 Aug 2024 09:19:58 +0100 Subject: [PATCH 067/144] Some fixes for toReleaseVersion change #112242 from the backport #112280 (#112284) --- server/src/main/java/org/elasticsearch/ReleaseVersions.java | 2 +- .../src/test/java/org/elasticsearch/TransportVersionTests.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/ReleaseVersions.java b/server/src/main/java/org/elasticsearch/ReleaseVersions.java index cacdca1c5b528..bb90bc79a528a 100644 --- a/server/src/main/java/org/elasticsearch/ReleaseVersions.java +++ b/server/src/main/java/org/elasticsearch/ReleaseVersions.java @@ -53,7 +53,7 @@ public static IntFunction generateVersionsLookup(Class versionContain NavigableMap> versions = new TreeMap<>(); // add the current version id, which won't be in the csv - versions.put(current, List.of(Version.CURRENT)); + versions.computeIfAbsent(current, k -> new ArrayList<>()).add(Version.CURRENT); try (BufferedReader reader = new BufferedReader(new InputStreamReader(versionsFile, StandardCharsets.UTF_8))) { String line; diff --git a/server/src/test/java/org/elasticsearch/TransportVersionTests.java b/server/src/test/java/org/elasticsearch/TransportVersionTests.java index a3728f20a23d4..8cc0875fb9f38 100644 --- a/server/src/test/java/org/elasticsearch/TransportVersionTests.java +++ b/server/src/test/java/org/elasticsearch/TransportVersionTests.java @@ -19,6 +19,7 @@ import java.util.regex.Pattern; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; @@ -187,7 +188,7 @@ public void testCURRENTIsLatest() { } public void testToReleaseVersion() { - assertThat(TransportVersion.current().toReleaseVersion(), equalTo(Version.CURRENT.toString())); + assertThat(TransportVersion.current().toReleaseVersion(), endsWith(Version.CURRENT.toString())); } public void testToString() { From b8359dbc13bcd588af9badcc2f47ca850fbfde1e Mon Sep 17 00:00:00 2001 From: Moritz Mack Date: Fri, 30 Aug 2024 11:14:08 +0200 Subject: [PATCH 068/144] Cleanup / move some serverless code to ES (#112360) --- .../common/collect/Iterators.java | 53 +++++++++++++++++++ .../common/collect/IteratorsTests.java | 23 ++++++++ 2 files changed, 76 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/common/collect/Iterators.java b/server/src/main/java/org/elasticsearch/common/collect/Iterators.java index d029f8e3becc0..358b5675cc8e1 100644 --- a/server/src/main/java/org/elasticsearch/common/collect/Iterators.java +++ b/server/src/main/java/org/elasticsearch/common/collect/Iterators.java @@ -10,9 +10,11 @@ import org.elasticsearch.core.Nullable; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; +import java.util.List; import java.util.NoSuchElementException; import java.util.Objects; import java.util.function.BiFunction; @@ -233,6 +235,57 @@ public T next() { } } + /** + * Returns an iterator that yields at most the first {@code n} elements of the provided {@code input} iterator. + */ + public static Iterator limit(Iterator input, int n) { + assert n >= 0 : "negative limit"; + if (n > 0 && input.hasNext()) { + return new LimitIterator<>(input, n); + } else { + return Collections.emptyIterator(); + } + } + + private static final class LimitIterator implements Iterator { + private final Iterator input; + private final int limit; + private int current; + + LimitIterator(Iterator input, int limit) { + this.input = input; + this.limit = limit; + } + + @Override + public boolean hasNext() { + return current < limit && input.hasNext(); + } + + @Override + public T next() { + if (current >= limit) { + throw new NoSuchElementException(); + } + ++current; + return input.next(); + } + } + + /** + * Returns a list containing the elements of the provided {@code iterator}. + */ + public static List toList(Iterator iterator) { + if (iterator.hasNext()) { + var list = new ArrayList(); + while (iterator.hasNext()) { + list.add(iterator.next()); + } + return Collections.unmodifiableList(list); + } + return Collections.emptyList(); + } + public static Iterator flatMap(Iterator input, Function> fn) { while (input.hasNext()) { final var value = fn.apply(input.next()); diff --git a/server/src/test/java/org/elasticsearch/common/collect/IteratorsTests.java b/server/src/test/java/org/elasticsearch/common/collect/IteratorsTests.java index a3573d081397a..63cafa7e08d26 100644 --- a/server/src/test/java/org/elasticsearch/common/collect/IteratorsTests.java +++ b/server/src/test/java/org/elasticsearch/common/collect/IteratorsTests.java @@ -28,6 +28,10 @@ import java.util.function.ToIntFunction; import java.util.stream.IntStream; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.is; + public class IteratorsTests extends ESTestCase { public void testConcatentation() { List threeTwoOne = Arrays.asList(3, 2, 1); @@ -242,6 +246,25 @@ public void testFilter() { } } + public void testLimit() { + var result = Iterators.limit(Collections.emptyIterator(), 10); + assertThat(result.hasNext(), is(false)); + assertThat(Iterators.toList(result), is(empty())); + + var values = List.of(1, 2, 3); + result = Iterators.limit(values.iterator(), 10); + assertThat(result.hasNext(), is(true)); + assertThat(Iterators.toList(result), contains(1, 2, 3)); + + result = Iterators.limit(values.iterator(), 2); + assertThat(result.hasNext(), is(true)); + assertThat(Iterators.toList(result), contains(1, 2)); + + result = Iterators.limit(values.iterator(), 0); + assertThat(result.hasNext(), is(false)); + assertThat(Iterators.toList(result), is(empty())); + } + public void testFailFast() { final var array = randomIntegerArray(); assertEmptyIterator(Iterators.failFast(Iterators.forArray(array), () -> true)); From f444ce6971f5aff00f5ebd48e9156cc88a038030 Mon Sep 17 00:00:00 2001 From: Pooya Salehi Date: Fri, 30 Aug 2024 11:14:11 +0200 Subject: [PATCH 069/144] Make write thread pools EWMA configurable (#112283) Relates https://github.com/elastic/elasticsearch/pull/112206. --- .../threadpool/SimpleThreadPoolIT.java | 20 +++++++++++++++++++ .../ExponentiallyWeightedMovingAverage.java | 5 +++++ .../common/settings/ClusterSettings.java | 1 + ...utionTimeTrackingEsThreadPoolExecutor.java | 5 +++++ .../DefaultBuiltInExecutorBuilders.java | 3 ++- .../elasticsearch/threadpool/ThreadPool.java | 11 +++++++++- 6 files changed, 43 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java index 44b6ef1d51ce0..d98b1e7d4e526 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; @@ -19,6 +20,7 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; +import org.hamcrest.CoreMatchers; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; @@ -36,12 +38,15 @@ import static java.util.function.Function.identity; import static org.elasticsearch.common.util.Maps.toUnmodifiableSortedMap; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; +import static org.elasticsearch.threadpool.ThreadPool.DEFAULT_INDEX_AUTOSCALING_EWMA_ALPHA; +import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.in; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.matchesRegex; @ClusterScope(scope = Scope.TEST, numDataNodes = 0, numClientNodes = 0) @@ -190,4 +195,19 @@ public void testThreadPoolMetrics() throws Exception { }); } + public void testWriteThreadpoolEwmaAlphaSetting() { + Settings settings = Settings.EMPTY; + var ewmaAlpha = DEFAULT_INDEX_AUTOSCALING_EWMA_ALPHA; + if (randomBoolean()) { + ewmaAlpha = randomDoubleBetween(0.0, 1.0, true); + settings = Settings.builder().put(WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING.getKey(), ewmaAlpha).build(); + } + var nodeName = internalCluster().startNode(settings); + var threadPool = internalCluster().getInstance(ThreadPool.class, nodeName); + for (var name : List.of(ThreadPool.Names.WRITE, ThreadPool.Names.SYSTEM_WRITE, ThreadPool.Names.SYSTEM_CRITICAL_WRITE)) { + assertThat(threadPool.executor(name), instanceOf(TaskExecutionTimeTrackingEsThreadPoolExecutor.class)); + final var executor = (TaskExecutionTimeTrackingEsThreadPoolExecutor) threadPool.executor(name); + assertThat(Double.compare(executor.getEwmaAlpha(), ewmaAlpha), CoreMatchers.equalTo(0)); + } + } } diff --git a/server/src/main/java/org/elasticsearch/common/ExponentiallyWeightedMovingAverage.java b/server/src/main/java/org/elasticsearch/common/ExponentiallyWeightedMovingAverage.java index 8b969cdcf9801..ce4328530ef41 100644 --- a/server/src/main/java/org/elasticsearch/common/ExponentiallyWeightedMovingAverage.java +++ b/server/src/main/java/org/elasticsearch/common/ExponentiallyWeightedMovingAverage.java @@ -46,4 +46,9 @@ public void addValue(double newValue) { successful = averageBits.compareAndSet(currentBits, newBits); } while (successful == false); } + + // Used for testing + public double getAlpha() { + return alpha; + } } diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 3c60d63f78991..fb219f9093a96 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -522,6 +522,7 @@ public void apply(Settings value, Settings current, Settings previous) { ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING, ThreadPool.LATE_TIME_INTERVAL_WARN_THRESHOLD_SETTING, ThreadPool.SLOW_SCHEDULER_TASK_WARN_THRESHOLD_SETTING, + ThreadPool.WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING, FastVectorHighlighter.SETTING_TV_HIGHLIGHT_MULTI_VALUE, Node.BREAKER_TYPE_KEY, OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING, diff --git a/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java b/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java index 92e1626b0d0bb..141f4571ca02e 100644 --- a/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java +++ b/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java @@ -146,4 +146,9 @@ protected void appendThreadPoolExecutorDetails(StringBuilder sb) { public Map getOngoingTasks() { return trackOngoingTasks ? Map.copyOf(ongoingTasks) : Map.of(); } + + // Used for testing + public double getEwmaAlpha() { + return executionEWMA.getAlpha(); + } } diff --git a/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java b/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java index a4046f2f1594c..f8f623ff89643 100644 --- a/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java +++ b/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java @@ -18,7 +18,7 @@ import java.util.Map; import static java.util.Collections.unmodifiableMap; -import static org.elasticsearch.threadpool.ThreadPool.indexAutoscalingEWMA; +import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING; import static org.elasticsearch.threadpool.ThreadPool.searchAutoscalingEWMA; public class DefaultBuiltInExecutorBuilders implements BuiltInExecutorBuilders { @@ -29,6 +29,7 @@ public Map getBuilders(Settings settings, int allocated final int halfProcMaxAt5 = ThreadPool.halfAllocatedProcessorsMaxFive(allocatedProcessors); final int halfProcMaxAt10 = ThreadPool.halfAllocatedProcessorsMaxTen(allocatedProcessors); final int genericThreadPoolMax = ThreadPool.boundedBy(4 * allocatedProcessors, 128, 512); + final double indexAutoscalingEWMA = WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING.get(settings); Map result = new HashMap<>(); result.put( diff --git a/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java b/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java index 10b92f8c6dace..290352132ab00 100644 --- a/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java @@ -183,7 +183,7 @@ public static ThreadPoolType fromType(String type) { // EWMA value is at least within 90% of the new increased task duration. This value also determines the impact of a single // long-running task on the moving average and limits it roughly to 2% of the (long) task duration, e.g. if the current // moving average is 100ms, and we get one task which takes 20s the new EWMA will be ~500ms. - public static final double indexAutoscalingEWMA = 0.02; + public static final double DEFAULT_INDEX_AUTOSCALING_EWMA_ALPHA = 0.02; private final Map executors; @@ -230,6 +230,15 @@ public Collection builders() { Setting.Property.NodeScope ); + // A setting to change the alpha parameter of the EWMA used in WRITE, SYSTEM_WRITE and SYSTEM_CRITICAL_WRITE thread pools + public static final Setting WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING = Setting.doubleSetting( + "thread_pool.write.ewma_alpha", + DEFAULT_INDEX_AUTOSCALING_EWMA_ALPHA, + 0.0, + 1.0, + Setting.Property.NodeScope + ); + /** * Defines and builds the many thread pools delineated in {@link Names}. * From adb23531f9396259a607b33d1bf7587347ad95f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Fri, 30 Aug 2024 13:17:59 +0200 Subject: [PATCH 070/144] [DOCS] Adds Google Vertex AI tutorial (#112339) Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../semantic-search-inference.asciidoc | 23 ++++--- .../infer-api-ingest-pipeline-widget.asciidoc | 17 +++++ .../infer-api-ingest-pipeline.asciidoc | 26 ++++++++ .../infer-api-mapping-widget.asciidoc | 17 +++++ .../inference-api/infer-api-mapping.asciidoc | 33 ++++++++++ .../infer-api-reindex-widget.asciidoc | 17 +++++ .../inference-api/infer-api-reindex.asciidoc | 22 +++++++ .../infer-api-requirements-widget.asciidoc | 19 +++++- .../infer-api-requirements.asciidoc | 9 +++ .../infer-api-search-widget.asciidoc | 17 +++++ .../inference-api/infer-api-search.asciidoc | 65 +++++++++++++++++++ .../infer-api-task-widget.asciidoc | 17 +++++ .../inference-api/infer-api-task.asciidoc | 24 +++++++ 13 files changed, 296 insertions(+), 10 deletions(-) diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index 719aeb070fc7c..dee91a6aa4ec4 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -9,16 +9,20 @@ The instructions in this tutorial shows you how to use the {infer} API workflow IMPORTANT: For the easiest way to perform semantic search in the {stack}, refer to the <> end-to-end tutorial. -The following examples use Cohere's `embed-english-v3.0` model, the `all-mpnet-base-v2` model from HuggingFace, and OpenAI's `text-embedding-ada-002` second generation embedding model. +The following examples use the: + +* `embed-english-v3.0` model for https://docs.cohere.com/docs/cohere-embed[Cohere] +* `all-mpnet-base-v2` model from https://huggingface.co/sentence-transformers/all-mpnet-base-v2[HuggingFace] +* `text-embedding-ada-002` second generation embedding model for OpenAI +* models available through https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio] or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI] +* `text-embedding-004` model for https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api[Google Vertex AI] +* `mistral-embed` model for https://docs.mistral.ai/getting-started/models/[Mistral] +* `amazon.titan-embed-text-v1` model for https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[Amazon Bedrock] +* `ops-text-embedding-zh-001` model for https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[AlibabaCloud AI] + You can use any Cohere and OpenAI models, they are all supported by the {infer} API. For a list of recommended models available on HuggingFace, refer to <>. -Azure based examples use models available through https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio] -or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI]. -Mistral examples use the `mistral-embed` model from https://docs.mistral.ai/getting-started/models/[the Mistral API]. -Amazon Bedrock examples use the `amazon.titan-embed-text-v1` model from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[the Amazon Bedrock base models]. -AlibabaCloud AI Search examples use the `ops-text-embedding-zh-001` model from https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[the AlibabaCloud AI Search base models]. - Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. [discrete] @@ -74,8 +78,8 @@ Once the upload is complete, you can see an index named `test-data` with 182469 [[reindexing-data-infer]] ==== Ingest the data through the {infer} ingest pipeline -Create the embeddings from the text by reindexing the data through the {infer} -pipeline that uses the chosen model as the inference model. +Create embeddings from the text by reindexing the data through the {infer} pipeline that uses your chosen model. +This step uses the {ref}/docs-reindex.html[reindex API] to simulate data ingestion through a pipeline. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc[] @@ -114,5 +118,6 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-search-widget.asciidoc You can also find tutorials in an interactive Colab notebook format using the {es} Python client: + * https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/inference-cohere.ipynb[Cohere {infer} tutorial notebook] * https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/search/07-inference.ipynb[OpenAI {infer} tutorial notebook] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc index 3a686e27cf580..d8d1cfaa2a2c7 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc @@ -37,6 +37,12 @@ id="infer-api-ingest-azure-ai-studio"> Azure AI Studio + + + + + +