From 51833f524b6684b641d5851b7fccbf9422c8c4d2 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Fri, 8 Nov 2024 11:28:30 +0000 Subject: [PATCH 01/39] Minor refactor for ES|QL qstr integration test (#116469) --- .../action/AbstractEsqlIntegTestCase.java | 14 ++++++++++++ .../xpack/esql/plugin/QueryStringIT.java | 22 +++++-------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AbstractEsqlIntegTestCase.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AbstractEsqlIntegTestCase.java index 669723abe70dd..7ae45497f7297 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AbstractEsqlIntegTestCase.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/AbstractEsqlIntegTestCase.java @@ -32,6 +32,7 @@ import org.elasticsearch.xpack.esql.plugin.TransportEsqlQueryAction; import org.junit.After; +import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -39,6 +40,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.xpack.esql.EsqlTestUtils.getValuesList; +import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; @TestLogging(value = "org.elasticsearch.xpack.esql.session:DEBUG", reason = "to better understand planning") @@ -219,4 +221,16 @@ protected static void assertColumnTypes(List actualColumns protected static void assertValues(Iterator> actualValues, Iterable> expectedValues) { assertThat(getValuesList(actualValues), equalTo(getValuesList(expectedValues))); } + + protected static void assertValuesInAnyOrder(Iterator> actualValues, Iterable> expectedValues) { + List> items = new ArrayList<>(); + for (Iterable outter : expectedValues) { + var item = new ArrayList<>(); + for (var inner : outter) { + item.add(inner); + } + items.add(item); + } + assertThat(getValuesList(actualValues), containsInAnyOrder(items.toArray())); + } } diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java index e7da83a40fb20..03af16d29e9b4 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java @@ -13,18 +13,12 @@ import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; -import org.elasticsearch.xpack.esql.action.ColumnInfoImpl; -import org.elasticsearch.xpack.esql.core.type.DataType; import org.junit.Before; import java.util.List; -import static org.elasticsearch.test.ListMatcher.matchesList; -import static org.elasticsearch.test.MapMatcher.assertMap; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.elasticsearch.xpack.esql.EsqlTestUtils.getValuesList; import static org.hamcrest.CoreMatchers.containsString; -import static org.hamcrest.Matchers.equalTo; public class QueryStringIT extends AbstractEsqlIntegTestCase { @@ -42,11 +36,9 @@ public void testSimpleQueryString() { """; try (var resp = run(query)) { - assertThat(resp.columns().stream().map(ColumnInfoImpl::name).toList(), equalTo(List.of("id"))); - assertThat(resp.columns().stream().map(ColumnInfoImpl::type).map(DataType::toString).toList(), equalTo(List.of("INTEGER"))); - // values - List> values = getValuesList(resp); - assertMap(values, matchesList().item(List.of(1)).item(List.of(3)).item(List.of(4)).item(List.of(5))); + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(3), List.of(4), List.of(5))); } } @@ -58,11 +50,9 @@ public void testMultiFieldQueryString() { """; try (var resp = run(query)) { - assertThat(resp.columns().stream().map(ColumnInfoImpl::name).toList(), equalTo(List.of("id"))); - assertThat(resp.columns().stream().map(ColumnInfoImpl::type).map(DataType::toString).toList(), equalTo(List.of("INTEGER"))); - // values - List> values = getValuesList(resp); - assertThat(values.size(), equalTo(5)); + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValuesInAnyOrder(resp.values(), List.of(List.of(1), List.of(2), List.of(3), List.of(4), List.of(5))); } } From b161f2c22af0a9e4a45ac44e57ff0c5a2ad746b1 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Fri, 8 Nov 2024 12:46:13 +0000 Subject: [PATCH 02/39] Document using xpack.ml.use_auto_machine_memory_percent setting in docker getting started (#114009) If xpack.ml.use_auto_machine_memory_percent is not explicitly set to true then the default value (false) means ML will only use 30% of the available memory making it impractical to run the ELSER model. This is useful for users wanting to get started with semantic search.The single node docker instructions have been updated with a command that gives the container enough memory to run the ELSER model and enables xpack.ml.use_auto_machine_memory_percent. For the multi-node guide the docker compose file is updated to enable the ml setting for every node in the cluster. --- docs/reference/setup/install/docker.asciidoc | 9 +++++++++ docs/reference/setup/install/docker/docker-compose.yml | 3 +++ 2 files changed, 12 insertions(+) diff --git a/docs/reference/setup/install/docker.asciidoc b/docs/reference/setup/install/docker.asciidoc index 370fc5c4ccf7e..58feb55f32e2f 100644 --- a/docs/reference/setup/install/docker.asciidoc +++ b/docs/reference/setup/install/docker.asciidoc @@ -86,6 +86,15 @@ docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB {docker-image} TIP: Use the `-m` flag to set a memory limit for the container. This removes the need to <>. + + +{ml-cap} features such as <> +require a larger container with more than 1GB of memory. +If you intend to use the {ml} capabilities, then start the container with this command: ++ +[source,sh,subs="attributes"] +---- +docker run --name es01 --net elastic -p 9200:9200 -it -m 6GB -e "xpack.ml.use_auto_machine_memory_percent=true" {docker-image} +---- The command prints the `elastic` user password and an enrollment token for {kib}. . Copy the generated `elastic` password and enrollment token. These credentials diff --git a/docs/reference/setup/install/docker/docker-compose.yml b/docs/reference/setup/install/docker/docker-compose.yml index 15d8c11e2f12f..db5b6e6c91b49 100644 --- a/docs/reference/setup/install/docker/docker-compose.yml +++ b/docs/reference/setup/install/docker/docker-compose.yml @@ -90,6 +90,7 @@ services: - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt - xpack.security.transport.ssl.verification_mode=certificate - xpack.license.self_generated.type=${LICENSE} + - xpack.ml.use_auto_machine_memory_percent=true mem_limit: ${MEM_LIMIT} ulimits: memlock: @@ -130,6 +131,7 @@ services: - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt - xpack.security.transport.ssl.verification_mode=certificate - xpack.license.self_generated.type=${LICENSE} + - xpack.ml.use_auto_machine_memory_percent=true mem_limit: ${MEM_LIMIT} ulimits: memlock: @@ -170,6 +172,7 @@ services: - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt - xpack.security.transport.ssl.verification_mode=certificate - xpack.license.self_generated.type=${LICENSE} + - xpack.ml.use_auto_machine_memory_percent=true mem_limit: ${MEM_LIMIT} ulimits: memlock: From a3339574eebd727c1e901f1d39ed2b5bf624ed8a Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 8 Nov 2024 12:48:48 +0000 Subject: [PATCH 03/39] Modernize `AutoCreateIndexIT` (#116415) Since this test was written we've added lots of stuff to the test framework to make it easier to write tests like this. This commit adopts the new test utils to shorten the test a bit. --- .../action/support/AutoCreateIndexIT.java | 75 ++++++------------- 1 file changed, 24 insertions(+), 51 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/support/AutoCreateIndexIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/support/AutoCreateIndexIT.java index bcaca0766e536..0aedfed037a6e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/support/AutoCreateIndexIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/support/AutoCreateIndexIT.java @@ -10,15 +10,14 @@ package org.elasticsearch.action.support; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentType; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasItems; @@ -29,65 +28,39 @@ public void testBatchingWithDeprecationWarnings() throws Exception { final var masterNodeClusterService = internalCluster().getCurrentMasterNodeInstance(ClusterService.class); final var barrier = new CyclicBarrier(2); masterNodeClusterService.createTaskQueue("block", Priority.NORMAL, batchExecutionContext -> { - barrier.await(10, TimeUnit.SECONDS); - barrier.await(10, TimeUnit.SECONDS); + safeAwait(barrier); + safeAwait(barrier); batchExecutionContext.taskContexts().forEach(c -> c.success(() -> {})); return batchExecutionContext.initialState(); - }).submitTask("block", e -> { assert false : e; }, null); + }).submitTask("block", ESTestCase::fail, null); - barrier.await(10, TimeUnit.SECONDS); + safeAwait(barrier); final var countDownLatch = new CountDownLatch(2); final var client = client(); - client.prepareIndex("no-dot").setSource("{}", XContentType.JSON).execute(new ActionListener<>() { - @Override - public void onResponse(DocWriteResponse indexResponse) { - try { - final var warningHeaders = client.threadPool().getThreadContext().getResponseHeaders().get("Warning"); - if (warningHeaders != null) { - assertThat( - warningHeaders, - not( - hasItems( - containsString("index names starting with a dot are reserved for hidden indices and system indices") - ) - ) - ); - } - } finally { - countDownLatch.countDown(); - } - } - - @Override - public void onFailure(Exception e) { - countDownLatch.countDown(); - assert false : e; - } - }); - - client.prepareIndex(".has-dot").setSource("{}", XContentType.JSON).execute(new ActionListener<>() { - @Override - public void onResponse(DocWriteResponse indexResponse) { - try { - final var warningHeaders = client.threadPool().getThreadContext().getResponseHeaders().get("Warning"); - assertNotNull(warningHeaders); + client.prepareIndex("no-dot") + .setSource("{}", XContentType.JSON) + .execute(ActionListener.releaseAfter(ActionTestUtils.assertNoFailureListener(indexResponse -> { + final var warningHeaders = client.threadPool().getThreadContext().getResponseHeaders().get("Warning"); + if (warningHeaders != null) { assertThat( warningHeaders, - hasItems(containsString("index names starting with a dot are reserved for hidden indices and system indices")) + not(hasItems(containsString("index names starting with a dot are reserved for hidden indices and system indices"))) ); - } finally { - countDownLatch.countDown(); } - } + }), countDownLatch::countDown)); - @Override - public void onFailure(Exception e) { - countDownLatch.countDown(); - assert false : e; - } - }); + client.prepareIndex(".has-dot") + .setSource("{}", XContentType.JSON) + .execute(ActionListener.releaseAfter(ActionTestUtils.assertNoFailureListener(indexResponse -> { + final var warningHeaders = client.threadPool().getThreadContext().getResponseHeaders().get("Warning"); + assertNotNull(warningHeaders); + assertThat( + warningHeaders, + hasItems(containsString("index names starting with a dot are reserved for hidden indices and system indices")) + ); + }), countDownLatch::countDown)); assertBusy( () -> assertThat( @@ -100,7 +73,7 @@ public void onFailure(Exception e) { ) ); - barrier.await(10, TimeUnit.SECONDS); - assertTrue(countDownLatch.await(10, TimeUnit.SECONDS)); + safeAwait(barrier); + safeAwait(countDownLatch); } } From 3f369d3e90876c596d2d539a6f1b05b52333610b Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Fri, 8 Nov 2024 15:08:32 +0200 Subject: [PATCH 04/39] Wrapping TermQueryBuilders into ConstantScoreBuilders to ensure consistent scoring (#112607) --- .../xpack/rank/rrf/RRFRankMultiShardIT.java | 308 +++++++++--------- 1 file changed, 154 insertions(+), 154 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRankMultiShardIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRankMultiShardIT.java index 29c471296b5d1..b4cf409f5fd72 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRankMultiShardIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/rrf/RRFRankMultiShardIT.java @@ -174,16 +174,16 @@ public void testBM25AndKnn() { .setKnnSearch(List.of(knnSearch)) .setQuery( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(11.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "493").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(11.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "493")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) ) .addFetchField("vector_asc") .addFetchField("text0") @@ -268,16 +268,16 @@ public void testBM25AndMultipleKnn() { .setKnnSearch(List.of(knnSearchAsc, knnSearchDesc)) .setQuery( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(20.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "485").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "506").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "505").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "511").boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(20.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "485")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "506")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "505")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "511")).boost(9.0f)) ) .addFetchField("vector_asc") .addFetchField("vector_desc") @@ -339,16 +339,16 @@ public void testBM25AndKnnWithBucketAggregation() { .setKnnSearch(List.of(knnSearch)) .setQuery( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(11.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "493").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(11.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "493")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) ) .addFetchField("vector_asc") .addFetchField("text0") @@ -465,16 +465,16 @@ public void testBM25AndMultipleKnnWithAggregation() { .setKnnSearch(List.of(knnSearchAsc, knnSearchDesc)) .setQuery( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(20.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "485").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "506").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "505").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "511").boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(20.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "485")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "506")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "505")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "511")).boost(9.0f)) ) .addFetchField("vector_asc") .addFetchField("vector_desc") @@ -554,28 +554,28 @@ public void testMultiBM25() { List.of( new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "490").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "490")).boost(1.0f)) ), new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text1", "508").boost(9.0f)) - .should(QueryBuilders.termQuery("text1", "304").boost(8.0f)) - .should(QueryBuilders.termQuery("text1", "501").boost(7.0f)) - .should(QueryBuilders.termQuery("text1", "504").boost(6.0f)) - .should(QueryBuilders.termQuery("text1", "502").boost(5.0f)) - .should(QueryBuilders.termQuery("text1", "499").boost(4.0f)) - .should(QueryBuilders.termQuery("text1", "800").boost(3.0f)) - .should(QueryBuilders.termQuery("text1", "201").boost(2.0f)) - .should(QueryBuilders.termQuery("text1", "492").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "508")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "304")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "501")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "504")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "502")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "499")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "800")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "201")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "492")).boost(1.0f)) ) ) ) @@ -625,28 +625,28 @@ public void testMultiBM25WithAggregation() { List.of( new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "490").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "490")).boost(1.0f)) ), new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text1", "508").boost(9.0f)) - .should(QueryBuilders.termQuery("text1", "304").boost(8.0f)) - .should(QueryBuilders.termQuery("text1", "501").boost(7.0f)) - .should(QueryBuilders.termQuery("text1", "504").boost(6.0f)) - .should(QueryBuilders.termQuery("text1", "502").boost(5.0f)) - .should(QueryBuilders.termQuery("text1", "499").boost(4.0f)) - .should(QueryBuilders.termQuery("text1", "801").boost(3.0f)) - .should(QueryBuilders.termQuery("text1", "201").boost(2.0f)) - .should(QueryBuilders.termQuery("text1", "492").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "508")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "304")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "501")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "504")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "502")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "499")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "801")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "201")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "492")).boost(1.0f)) ) ) ) @@ -713,28 +713,28 @@ public void testMultiBM25AndSingleKnn() { List.of( new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "490").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "490")).boost(1.0f)) ), new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text1", "508").boost(9.0f)) - .should(QueryBuilders.termQuery("text1", "304").boost(8.0f)) - .should(QueryBuilders.termQuery("text1", "501").boost(7.0f)) - .should(QueryBuilders.termQuery("text1", "504").boost(6.0f)) - .should(QueryBuilders.termQuery("text1", "492").boost(5.0f)) - .should(QueryBuilders.termQuery("text1", "502").boost(4.0f)) - .should(QueryBuilders.termQuery("text1", "499").boost(3.0f)) - .should(QueryBuilders.termQuery("text1", "800").boost(2.0f)) - .should(QueryBuilders.termQuery("text1", "201").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "508")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "304")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "501")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "504")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "492")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "502")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "499")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "800")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "201")).boost(1.0f)) ) ) ) @@ -771,28 +771,28 @@ public void testMultiBM25AndSingleKnnWithAggregation() { List.of( new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "490").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "490")).boost(1.0f)) ), new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text1", "508").boost(9.0f)) - .should(QueryBuilders.termQuery("text1", "304").boost(8.0f)) - .should(QueryBuilders.termQuery("text1", "501").boost(7.0f)) - .should(QueryBuilders.termQuery("text1", "504").boost(6.0f)) - .should(QueryBuilders.termQuery("text1", "492").boost(5.0f)) - .should(QueryBuilders.termQuery("text1", "502").boost(4.0f)) - .should(QueryBuilders.termQuery("text1", "499").boost(3.0f)) - .should(QueryBuilders.termQuery("text1", "800").boost(2.0f)) - .should(QueryBuilders.termQuery("text1", "201").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "508")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "304")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "501")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "504")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "492")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "502")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "499")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "800")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "201")).boost(1.0f)) ) ) ) @@ -847,28 +847,28 @@ public void testMultiBM25AndMultipleKnn() { List.of( new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "490").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "490")).boost(1.0f)) ), new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text1", "508").boost(9.0f)) - .should(QueryBuilders.termQuery("text1", "304").boost(8.0f)) - .should(QueryBuilders.termQuery("text1", "501").boost(7.0f)) - .should(QueryBuilders.termQuery("text1", "504").boost(6.0f)) - .should(QueryBuilders.termQuery("text1", "492").boost(5.0f)) - .should(QueryBuilders.termQuery("text1", "502").boost(4.0f)) - .should(QueryBuilders.termQuery("text1", "499").boost(3.0f)) - .should(QueryBuilders.termQuery("text1", "800").boost(2.0f)) - .should(QueryBuilders.termQuery("text1", "201").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "508")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "304")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "501")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "504")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "492")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "502")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "499")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "800")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "201")).boost(1.0f)) ) ) ) @@ -909,28 +909,28 @@ public void testMultiBM25AndMultipleKnnWithAggregation() { List.of( new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text0", "500").boost(10.0f)) - .should(QueryBuilders.termQuery("text0", "499").boost(9.0f)) - .should(QueryBuilders.termQuery("text0", "498").boost(8.0f)) - .should(QueryBuilders.termQuery("text0", "497").boost(7.0f)) - .should(QueryBuilders.termQuery("text0", "496").boost(6.0f)) - .should(QueryBuilders.termQuery("text0", "495").boost(5.0f)) - .should(QueryBuilders.termQuery("text0", "494").boost(4.0f)) - .should(QueryBuilders.termQuery("text0", "492").boost(3.0f)) - .should(QueryBuilders.termQuery("text0", "491").boost(2.0f)) - .should(QueryBuilders.termQuery("text0", "490").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "500")).boost(10.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "499")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "498")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "497")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "496")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "495")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "494")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "492")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "491")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text0", "490")).boost(1.0f)) ), new SubSearchSourceBuilder( QueryBuilders.boolQuery() - .should(QueryBuilders.termQuery("text1", "508").boost(9.0f)) - .should(QueryBuilders.termQuery("text1", "304").boost(8.0f)) - .should(QueryBuilders.termQuery("text1", "501").boost(7.0f)) - .should(QueryBuilders.termQuery("text1", "504").boost(6.0f)) - .should(QueryBuilders.termQuery("text1", "492").boost(5.0f)) - .should(QueryBuilders.termQuery("text1", "502").boost(4.0f)) - .should(QueryBuilders.termQuery("text1", "499").boost(3.0f)) - .should(QueryBuilders.termQuery("text1", "800").boost(2.0f)) - .should(QueryBuilders.termQuery("text1", "201").boost(1.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "508")).boost(9.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "304")).boost(8.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "501")).boost(7.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "504")).boost(6.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "492")).boost(5.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "502")).boost(4.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "499")).boost(3.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "800")).boost(2.0f)) + .should(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("text1", "201")).boost(1.0f)) ) ) ) From f7ee3fbdee9e0b7a25816a0d0391487641bacb03 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sat, 9 Nov 2024 00:46:06 +1100 Subject: [PATCH 05/39] Mute org.elasticsearch.xpack.test.rest.XPackRestIT test {p0=ml/evaluate_data_frame/Test outlier_detection with query} #116484 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 97d33b3b14b8f..0d61008cb9c9a 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -288,6 +288,9 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/data_frame_analytics_crud/Test put config with unknown field in outlier detection analysis} issue: https://github.com/elastic/elasticsearch/issues/116458 +- class: org.elasticsearch.xpack.test.rest.XPackRestIT + method: test {p0=ml/evaluate_data_frame/Test outlier_detection with query} + issue: https://github.com/elastic/elasticsearch/issues/116484 # Examples: # From 0f9ac9de64bf9231c593f1dc3665da53f0f0385e Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Fri, 8 Nov 2024 15:10:40 +0100 Subject: [PATCH 06/39] Use SearchStats instead of field.isAggregatable in data node planning (#115744) Since ES|QL makes use of field-caps and only considers `isAggregatable` during Lucene pushdown, turning off doc-values disables Lucene pushdown. This is incorrect. The physical planning decision for Lucene pushdown is made during local planning on the data node, at which point `SearchStats` are known, and both `isIndexed` and `hasDocValues` are separately knowable. The Lucene pushdown should happen for `isIndexed` and not consider `hasDocValues` at all. This PR adds hasDocValues to SearchStats and the uses isIndexed and hasDocValue separately during local physical planning on the data nodes. This immediately cleared up one issue for spatial data, which could not push down a lucene query when doc-values was disabled. Summary of what `isAggregatable` means for different implementations of `MappedFieldType`: * Default implementation of `isAggregatable` in `MappedFieldType` is `hasDocValues`, and does not consider `isIndexed` * All classes that extend `AbstractScriptFieldType` (eg. `LongScriptFieldType`) hard coded `isAggregatable` to `true`. This presumably means Lucene is happy to mimic having doc-values * `TestFieldType`, and classes that extend it, return the value of `fielddata`, so consider the field aggregatable if there is field-data. * `AggregateDoubleMetricFieldType` and `ConstantFieldType` hard coded to `true` * `DenseVectorFieldType` hard coded to `false` * `IdFieldType` return the value of `fieldDataEnabled.getAsBoolean()` In no case is `isIndexed` used for `isAggregatable`. However, for our Lucene pushdown of filters, `isIndexed` would make a lot more sense. But for pushdown of TopN, `hasDocValues` makes more sense. Summarising the results of the various options for the various field types, where `?` means configrable: | Class | isAggregatable | isIndexed | isStored | hasDocValues | | --- | --- | --- | --- | --- | | AbstractScriptFieldType | true | false | false | false | | AggregateDoubleMetricFieldType | true | true | false | false | | DenseVectorFieldType | false | ? | false | !indexed | | IdFieldType | fieldData | true | true | false | | TsidExtractingIdField | false | true | true | false | | TextFieldType | fieldData | ? | ? | false | | ? (the rest) | hasDocValues | ? | ? | ? | It has also been observed that we cannot push filters to source without checking `hasDocValues` when we use the `SingleValueQuery`. So this leads to three groups of conditions: | Category | require `indexed` | require `docValues` | | --- | --- | --- | | Filters(single-value) | true | true | | Filters(multi-value) | true | false | | TopN | true | true | And for all cases we will also consider `isAggregatable` as a disjunction to cover the script field types, leading to two possible combinations: * `fa.isAggregatable() || searchStats.isIndexed(fa.name()) && searchStats.hasDocValues(fa.name())` * `fa.isAggregatable() || searchStats.isIndexed(fa.name())` --- docs/changelog/115744.yaml | 6 + .../index/mapper/TextFieldMapper.java | 16 +- ...xtFieldFamilySyntheticSourceTestSetup.java | 2 +- .../xpack/esql/CsvTestsDataLoader.java | 7 + .../xpack/esql/EsqlTestUtils.java | 102 ++++- .../mapping-airports_no_doc_values.json | 30 ++ .../mapping-airports_not_indexed.json | 30 ++ ...-airports_not_indexed_nor_doc_values.json} | 0 .../src/main/resources/spatial.csv-spec | 36 ++ .../SpatialPushDownPointsTestCase.java | 78 ++-- .../esql/spatial/SpatialPushDownTestCase.java | 91 ++++- .../local/EnableSpatialDistancePushdown.java | 23 +- .../physical/local/LucenePushDownUtils.java | 37 -- .../local/LucenePushdownPredicates.java | 111 ++++++ .../physical/local/PushFiltersToSource.java | 50 ++- .../physical/local/PushTopNToSource.java | 15 +- .../local/SpatialDocValuesExtraction.java | 19 +- .../xpack/esql/planner/PlannerUtils.java | 24 +- .../xpack/esql/plugin/ComputeService.java | 6 +- .../xpack/esql/stats/SearchContextStats.java | 357 +++++++++++++++++ .../xpack/esql/stats/SearchStats.java | 371 +++--------------- .../LocalPhysicalPlanOptimizerTests.java | 3 +- .../optimizer/PhysicalPlanOptimizerTests.java | 166 +++++--- .../physical/local/PushTopNToSourceTests.java | 5 +- .../xpack/esql/planner/FilterTests.java | 2 +- .../xpack/esql/stats/DisabledSearchStats.java | 27 +- 26 files changed, 1049 insertions(+), 565 deletions(-) create mode 100644 docs/changelog/115744.yaml create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_no_doc_values.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_not_indexed.json rename x-pack/plugin/esql/qa/testFixtures/src/main/resources/{mapping-airports-no-doc-values.json => mapping-airports_not_indexed_nor_doc_values.json} (100%) delete mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushDownUtils.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java diff --git a/docs/changelog/115744.yaml b/docs/changelog/115744.yaml new file mode 100644 index 0000000000000..9b8c91e59f451 --- /dev/null +++ b/docs/changelog/115744.yaml @@ -0,0 +1,6 @@ +pr: 115744 +summary: Use `SearchStats` instead of field.isAggregatable in data node planning +area: ES|QL +type: bug +issues: + - 115737 diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 253f70f4fda47..cf75f1ddf3b94 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -968,15 +968,27 @@ public boolean isAggregatable() { return fielddata; } - public boolean canUseSyntheticSourceDelegateForQuerying() { + /** + * Returns true if the delegate sub-field can be used for loading and querying (ie. either isIndexed or isStored is true) + */ + public boolean canUseSyntheticSourceDelegateForLoading() { return syntheticSourceDelegate != null && syntheticSourceDelegate.ignoreAbove() == Integer.MAX_VALUE && (syntheticSourceDelegate.isIndexed() || syntheticSourceDelegate.isStored()); } + /** + * Returns true if the delegate sub-field can be used for querying only (ie. isIndexed must be true) + */ + public boolean canUseSyntheticSourceDelegateForQuerying() { + return syntheticSourceDelegate != null + && syntheticSourceDelegate.ignoreAbove() == Integer.MAX_VALUE + && syntheticSourceDelegate.isIndexed(); + } + @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { - if (canUseSyntheticSourceDelegateForQuerying()) { + if (canUseSyntheticSourceDelegateForLoading()) { return new BlockLoader.Delegating(syntheticSourceDelegate.blockLoader(blContext)) { @Override protected String delegatingTo() { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java index 97ded7f9a06f2..475bf9212e1c5 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TextFieldFamilySyntheticSourceTestSetup.java @@ -39,7 +39,7 @@ public static MapperTestCase.BlockReaderSupport getSupportedReaders(MapperServic TextFieldMapper.TextFieldType text = (TextFieldMapper.TextFieldType) ft; boolean supportsColumnAtATimeReader = text.syntheticSourceDelegate() != null && text.syntheticSourceDelegate().hasDocValues() - && text.canUseSyntheticSourceDelegateForQuerying(); + && text.canUseSyntheticSourceDelegateForLoading(); return new MapperTestCase.BlockReaderSupport(supportsColumnAtATimeReader, mapper, loaderFieldName); } MappedFieldType parent = mapper.fieldType(parentName); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index cf9d66727a900..2bd7ecc37b034 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -72,6 +72,10 @@ public class CsvTestsDataLoader { private static final TestsDataset DECADES = new TestsDataset("decades"); private static final TestsDataset AIRPORTS = new TestsDataset("airports"); private static final TestsDataset AIRPORTS_MP = AIRPORTS.withIndex("airports_mp").withData("airports_mp.csv"); + private static final TestsDataset AIRPORTS_NO_DOC_VALUES = new TestsDataset("airports_no_doc_values").withData("airports.csv"); + private static final TestsDataset AIRPORTS_NOT_INDEXED = new TestsDataset("airports_not_indexed").withData("airports.csv"); + private static final TestsDataset AIRPORTS_NOT_INDEXED_NOR_DOC_VALUES = new TestsDataset("airports_not_indexed_nor_doc_values") + .withData("airports.csv"); private static final TestsDataset AIRPORTS_WEB = new TestsDataset("airports_web"); private static final TestsDataset DATE_NANOS = new TestsDataset("date_nanos"); private static final TestsDataset COUNTRIES_BBOX = new TestsDataset("countries_bbox"); @@ -105,6 +109,9 @@ public class CsvTestsDataLoader { Map.entry(DECADES.indexName, DECADES), Map.entry(AIRPORTS.indexName, AIRPORTS), Map.entry(AIRPORTS_MP.indexName, AIRPORTS_MP), + Map.entry(AIRPORTS_NO_DOC_VALUES.indexName, AIRPORTS_NO_DOC_VALUES), + Map.entry(AIRPORTS_NOT_INDEXED.indexName, AIRPORTS_NOT_INDEXED), + Map.entry(AIRPORTS_NOT_INDEXED_NOR_DOC_VALUES.indexName, AIRPORTS_NOT_INDEXED_NOR_DOC_VALUES), Map.entry(AIRPORTS_WEB.indexName, AIRPORTS_WEB), Map.entry(COUNTRIES_BBOX.indexName, COUNTRIES_BBOX), Map.entry(COUNTRIES_BBOX_WEB.indexName, COUNTRIES_BBOX_WEB), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index bc465e7e9b64c..2913401d8aab3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -89,6 +89,8 @@ import java.time.Period; import java.util.ArrayList; import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; @@ -206,9 +208,30 @@ public static EsRelation relation() { return new EsRelation(EMPTY, new EsIndex(randomAlphaOfLength(8), emptyMap()), IndexMode.STANDARD, randomBoolean()); } - public static class TestSearchStats extends SearchStats { - public TestSearchStats() { - super(emptyList()); + /** + * This version of SearchStats always returns true for all fields for all boolean methods. + * For custom behaviour either use {@link TestConfigurableSearchStats} or override the specific methods. + */ + public static class TestSearchStats implements SearchStats { + + @Override + public boolean exists(String field) { + return true; + } + + @Override + public boolean isIndexed(String field) { + return exists(field); + } + + @Override + public boolean hasDocValues(String field) { + return exists(field); + } + + @Override + public boolean hasExactSubfield(String field) { + return exists(field); } @Override @@ -226,11 +249,6 @@ public long count(String field, BytesRef value) { return exists(field) ? -1 : 0; } - @Override - public boolean exists(String field) { - return true; - } - @Override public byte[] min(String field, DataType dataType) { return null; @@ -245,10 +263,76 @@ public byte[] max(String field, DataType dataType) { public boolean isSingleValue(String field) { return false; } + } + + /** + * This version of SearchStats can be preconfigured to return true/false for various combinations of the four field settings: + *
    + *
  1. exists
  2. + *
  3. isIndexed
  4. + *
  5. hasDocValues
  6. + *
  7. hasExactSubfield
  8. + *
+ * The default will return true for all fields. The include/exclude methods can be used to configure the settings for specific fields. + * If you call 'include' with no fields, it will switch to return false for all fields. + */ + public static class TestConfigurableSearchStats extends TestSearchStats { + public enum Config { + EXISTS, + INDEXED, + DOC_VALUES, + EXACT_SUBFIELD + } + + private final Map> includes = new HashMap<>(); + private final Map> excludes = new HashMap<>(); + + public TestConfigurableSearchStats include(Config key, String... fields) { + // If this method is called with no fields, it is interpreted to mean include none, so we include a dummy field + for (String field : fields.length == 0 ? new String[] { "-" } : fields) { + includes.computeIfAbsent(key, k -> new HashSet<>()).add(field); + excludes.computeIfAbsent(key, k -> new HashSet<>()).remove(field); + } + return this; + } + + public TestConfigurableSearchStats exclude(Config key, String... fields) { + for (String field : fields) { + includes.computeIfAbsent(key, k -> new HashSet<>()).remove(field); + excludes.computeIfAbsent(key, k -> new HashSet<>()).add(field); + } + return this; + } + + private boolean isConfigationSet(Config config, String field) { + Set in = includes.getOrDefault(config, Set.of()); + Set ex = excludes.getOrDefault(config, Set.of()); + return (in.isEmpty() || in.contains(field)) && ex.contains(field) == false; + } + + @Override + public boolean exists(String field) { + return isConfigationSet(Config.EXISTS, field); + } @Override public boolean isIndexed(String field) { - return exists(field); + return isConfigationSet(Config.INDEXED, field); + } + + @Override + public boolean hasDocValues(String field) { + return isConfigationSet(Config.DOC_VALUES, field); + } + + @Override + public boolean hasExactSubfield(String field) { + return isConfigationSet(Config.EXACT_SUBFIELD, field); + } + + @Override + public String toString() { + return "TestConfigurableSearchStats{" + "includes=" + includes + ", excludes=" + excludes + '}'; } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_no_doc_values.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_no_doc_values.json new file mode 100644 index 0000000000000..d7097f89a17df --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_no_doc_values.json @@ -0,0 +1,30 @@ +{ + "properties": { + "abbrev": { + "type": "keyword" + }, + "name": { + "type": "text" + }, + "scalerank": { + "type": "integer" + }, + "type": { + "type": "keyword" + }, + "location": { + "type": "geo_point", + "index": true, + "doc_values": false + }, + "country": { + "type": "keyword" + }, + "city": { + "type": "keyword" + }, + "city_location": { + "type": "geo_point" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_not_indexed.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_not_indexed.json new file mode 100644 index 0000000000000..1c72cf1f3e1a2 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_not_indexed.json @@ -0,0 +1,30 @@ +{ + "properties": { + "abbrev": { + "type": "keyword" + }, + "name": { + "type": "text" + }, + "scalerank": { + "type": "integer" + }, + "type": { + "type": "keyword" + }, + "location": { + "type": "geo_point", + "index": false, + "doc_values": true + }, + "country": { + "type": "keyword" + }, + "city": { + "type": "keyword" + }, + "city_location": { + "type": "geo_point" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports-no-doc-values.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_not_indexed_nor_doc_values.json similarity index 100% rename from x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports-no-doc-values.json rename to x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-airports_not_indexed_nor_doc_values.json diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index c1c4538c7393d..01e7258e8a6ee 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -484,6 +484,42 @@ centroid:geo_point | count:long POINT (42.97109629958868 14.7552534006536) | 1 ; +centroidFromAirportsAfterIntersectsCompoundPredicateNoDocValues +required_capability: st_intersects + +FROM airports_no_doc_values +| WHERE scalerank == 9 AND ST_INTERSECTS(location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))")) AND country == "Yemen" +| STATS centroid=ST_CENTROID_AGG(location), count=COUNT() +; + +centroid:geo_point | count:long +POINT (42.97109629958868 14.7552534006536) | 1 +; + +centroidFromAirportsAfterIntersectsCompoundPredicateNotIndexedNorDocValues +required_capability: st_intersects + +FROM airports_not_indexed_nor_doc_values +| WHERE scalerank == 9 AND ST_INTERSECTS(location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))")) AND country == "Yemen" +| STATS centroid=ST_CENTROID_AGG(location), count=COUNT() +; + +centroid:geo_point | count:long +POINT (42.97109629958868 14.7552534006536) | 1 +; + +centroidFromAirportsAfterIntersectsCompoundPredicateNotIndexed +required_capability: st_intersects + +FROM airports_not_indexed +| WHERE scalerank == 9 AND ST_INTERSECTS(location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))")) AND country == "Yemen" +| STATS centroid=ST_CENTROID_AGG(location), count=COUNT() +; + +centroid:geo_point | count:long +POINT (42.97109629958868 14.7552534006536) | 1 +; + ############################################### # Tests for ST_INTERSECTS on GEO_POINT type diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownPointsTestCase.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownPointsTestCase.java index 0acbe98022f02..b9b003b8255e1 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownPointsTestCase.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownPointsTestCase.java @@ -12,8 +12,6 @@ import org.elasticsearch.geometry.utils.GeometryValidator; import org.elasticsearch.geometry.utils.WellKnownText; import org.elasticsearch.lucene.spatial.CentroidCalculator; -import org.elasticsearch.xpack.core.esql.action.EsqlQueryRequestBuilder; -import org.elasticsearch.xpack.core.esql.action.EsqlQueryResponse; import org.hamcrest.Description; import org.hamcrest.Matcher; import org.hamcrest.TypeSafeMatcher; @@ -22,6 +20,7 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Locale; import static org.hamcrest.Matchers.closeTo; @@ -62,8 +61,7 @@ public void testSimplePointInPolygon() throws IOException, ParseException { CentroidCalculator withinCentroid = new CentroidCalculator(); CentroidCalculator disjointCentroid = new CentroidCalculator(); for (int i = 0; i < data.size(); i++) { - index("indexed", i + "", "{\"location\" : " + data.get(i).data + " }"); - index("not-indexed", i + "", "{\"location\" : " + data.get(i).data + " }"); + addToIndexes(i, data.get(i).data, "indexed", "not-indexed", "not-indexed-nor-doc-values", "no-doc-values"); if (data.get(i).intersects) { expectedIntersects++; data.get(i).centroid.addTo(intersectsCentroid); @@ -76,7 +74,7 @@ public void testSimplePointInPolygon() throws IOException, ParseException { data.get(i).centroid.addTo(withinCentroid); } } - refresh("indexed", "not-indexed"); + refresh("indexed", "not-indexed", "not-indexed-nor-doc-values", "no-doc-values"); for (String polygon : new String[] { "POLYGON ((-10 -10, -10 10, 10 10, 10 -10, -10 -10))", @@ -89,24 +87,28 @@ public void testSimplePointInPolygon() throws IOException, ParseException { protected void assertFunction(String spatialFunction, String wkt, long expected, CentroidCalculator centroid) throws IOException, ParseException { - final String query1 = String.format(Locale.ROOT, """ + List queries = getQueries(String.format(Locale.ROOT, """ FROM indexed | WHERE %s(location, %s("%s")) | STATS COUNT(*), ST_CENTROID_AGG(location) - """, spatialFunction, castingFunction(), wkt); - final String query2 = String.format(Locale.ROOT, """ - FROM not-indexed | WHERE %s(location, %s("%s")) | STATS COUNT(*), ST_CENTROID_AGG(location) - """, spatialFunction, castingFunction(), wkt); - try ( - EsqlQueryResponse response1 = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query1).get(); - EsqlQueryResponse response2 = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query2).get(); - ) { - Object indexedCount = response1.response().column(0).iterator().next(); - Object notIndexedCount = response2.response().column(0).iterator().next(); - assertEquals(spatialFunction + "[expected=" + expected + "]", expected, indexedCount); - assertEquals(spatialFunction + "[expected=" + expected + "]", expected, notIndexedCount); - Object indexedCentroid = response1.response().column(1).iterator().next(); - Object notIndexedCentroid = response2.response().column(1).iterator().next(); - assertThat(spatialFunction + "[expected=" + toString(centroid) + "]", centroid, matchesCentroid(indexedCentroid)); - assertThat(spatialFunction + "[expected=" + toString(centroid) + "]", centroid, matchesCentroid(notIndexedCentroid)); + """, spatialFunction, castingFunction(), wkt)); + try (TestQueryResponseCollection responses = new TestQueryResponseCollection(queries)) { + for (int i = 0; i < ALL_INDEXES.length; i++) { + Object resultCount = responses.getResponse(i, 0); + Object resultCentroid = responses.getResponse(i, 1); + assertEquals(spatialFunction + "[expected=" + expected + "] for " + ALL_INDEXES[i], expected, resultCount); + assertThat( + spatialFunction + "[expected=" + toString(centroid) + "] for " + ALL_INDEXES[i], + centroid, + matchesCentroid(resultCentroid) + ); + } + long allIndexesCount = (long) responses.getResponse(ALL_INDEXES.length, 0); + assertEquals(spatialFunction + "[expected=" + expected + "] for all indexes", expected * 4, allIndexesCount); + Object allIndexesCentroid = responses.getResponse(ALL_INDEXES.length, 1); + assertThat( + spatialFunction + "[expected=" + toString(centroid) + "] for all indexes", + centroid, + matchesCentroid(allIndexesCentroid) + ); } } @@ -126,16 +128,14 @@ private void assertPushedDownDistance(boolean multiValue) throws RuntimeExceptio for (int j = 0; j < values.length; j++) { values[j] = "\"" + WellKnownText.toWKT(getIndexGeometry()) + "\""; } - index("indexed", i + "", "{\"location\" : " + Arrays.toString(values) + " }"); - index("not-indexed", i + "", "{\"location\" : " + Arrays.toString(values) + " }"); + addToIndexes(i, Arrays.toString(values), "indexed", "not-indexed", "not-indexed-nor-doc-values", "no-doc-values"); } else { final String value = WellKnownText.toWKT(getIndexGeometry()); - index("indexed", i + "", "{\"location\" : \"" + value + "\" }"); - index("not-indexed", i + "", "{\"location\" : \"" + value + "\" }"); + addToIndexes(i, "\"" + value + "\"", "indexed", "not-indexed", "not-indexed-nor-doc-values", "no-doc-values"); } } - refresh("indexed", "not-indexed"); + refresh("indexed", "not-indexed", "not-indexed-nor-doc-values", "no-doc-values"); for (int i = 0; i < 10; i++) { final Geometry geometry = getIndexGeometry(); @@ -149,19 +149,17 @@ private void assertPushedDownDistance(boolean multiValue) throws RuntimeExceptio protected void assertDistanceFunction(String wkt) { String spatialFunction = "ST_DISTANCE"; String castingFunction = castingFunction().replaceAll("SHAPE", "POINT"); - final String query1 = String.format(Locale.ROOT, """ - FROM indexed | WHERE %s(location, %s("%s")) < %.1f | STATS COUNT(*) - """, spatialFunction, castingFunction, wkt, searchDistance()); - final String query2 = String.format(Locale.ROOT, """ - FROM not-indexed | WHERE %s(location, %s("%s")) < %.1f | STATS COUNT(*) - """, spatialFunction, castingFunction, wkt, searchDistance()); - try ( - EsqlQueryResponse response1 = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query1).get(); - EsqlQueryResponse response2 = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query2).get(); - ) { - Object indexedResult = response1.response().column(0).iterator().next(); - Object notIndexedResult = response2.response().column(0).iterator().next(); - assertEquals(spatialFunction, indexedResult, notIndexedResult); + List queries = getQueries(String.format(Locale.ROOT, """ + FROM index | WHERE %s(location, %s("%s")) < %.1f | STATS COUNT(*) + """, spatialFunction, castingFunction, wkt, searchDistance())); + try (TestQueryResponseCollection responses = new TestQueryResponseCollection(queries)) { + Object indexedResult = responses.getResponse(0, 0); + for (int i = 1; i < ALL_INDEXES.length; i++) { + Object result = responses.getResponse(i, 0); + assertEquals(spatialFunction + " for " + ALL_INDEXES[i], indexedResult, result); + } + long allIndexesResult = (long) responses.getResponse(ALL_INDEXES.length, 0); + assertEquals(spatialFunction + " for all indexes", (long) indexedResult * 4, allIndexesResult); } } diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownTestCase.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownTestCase.java index e7e0c785f50e5..90e8bb713552e 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownTestCase.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialPushDownTestCase.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; import org.elasticsearch.xpack.spatial.SpatialPlugin; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -35,6 +36,8 @@ */ public abstract class SpatialPushDownTestCase extends ESIntegTestCase { + protected static final String[] ALL_INDEXES = new String[] { "indexed", "not-indexed", "not-indexed-nor-doc-values", "no-doc-values" }; + protected Collection> nodePlugins() { return List.of(EsqlPlugin.class, SpatialPlugin.class); } @@ -78,12 +81,34 @@ protected void initIndexes() { """, fieldType()))); assertAcked(prepareCreate("not-indexed").setMapping(String.format(Locale.ROOT, """ + { + "properties" : { + "location": { "type" : "%s", "index" : false, "doc_values" : true } + } + } + """, fieldType()))); + + assertAcked(prepareCreate("not-indexed-nor-doc-values").setMapping(String.format(Locale.ROOT, """ { "properties" : { "location": { "type" : "%s", "index" : false, "doc_values" : false } } } """, fieldType()))); + + assertAcked(prepareCreate("no-doc-values").setMapping(String.format(Locale.ROOT, """ + { + "properties" : { + "location": { "type" : "%s", "index" : true, "doc_values" : false } + } + } + """, fieldType()))); + } + + protected void addToIndexes(int id, String values, String... indexes) { + for (String index : indexes) { + index(index, id + "", "{\"location\" : " + values + " }"); + } } private void assertPushedDownQueries(boolean multiValue) throws RuntimeException { @@ -94,16 +119,14 @@ private void assertPushedDownQueries(boolean multiValue) throws RuntimeException for (int j = 0; j < values.length; j++) { values[j] = "\"" + WellKnownText.toWKT(getIndexGeometry()) + "\""; } - index("indexed", i + "", "{\"location\" : " + Arrays.toString(values) + " }"); - index("not-indexed", i + "", "{\"location\" : " + Arrays.toString(values) + " }"); + addToIndexes(i, Arrays.toString(values), ALL_INDEXES); } else { final String value = WellKnownText.toWKT(getIndexGeometry()); - index("indexed", i + "", "{\"location\" : \"" + value + "\" }"); - index("not-indexed", i + "", "{\"location\" : \"" + value + "\" }"); + addToIndexes(i, "\"" + value + "\"", ALL_INDEXES); } } - refresh("indexed", "not-indexed"); + refresh(ALL_INDEXES); String smallRectangleCW = "POLYGON ((-10 -10, -10 10, 10 10, 10 -10, -10 -10))"; assertFunction("ST_WITHIN", smallRectangleCW); @@ -115,27 +138,57 @@ private void assertPushedDownQueries(boolean multiValue) throws RuntimeException assertFunction("ST_INTERSECTS", wkt); assertFunction("ST_DISJOINT", wkt); assertFunction("ST_CONTAINS", wkt); - // within and lines are not globally supported so we avoid it here + // within and lines are not globally supported, so we avoid it here if (containsLine(geometry) == false) { assertFunction("ST_WITHIN", wkt); } } } + protected List getQueries(String query) { + ArrayList queries = new ArrayList<>(); + Arrays.stream(ALL_INDEXES).forEach(index -> queries.add(query.replaceAll("FROM (\\w+) \\|", "FROM " + index + " |"))); + queries.add(query.replaceAll("FROM (\\w+) \\|", "FROM " + String.join(",", ALL_INDEXES) + " |")); + return queries; + } + protected void assertFunction(String spatialFunction, String wkt) { - final String query1 = String.format(Locale.ROOT, """ - FROM indexed | WHERE %s(location, %s("%s")) | STATS COUNT(*) - """, spatialFunction, castingFunction(), wkt); - final String query2 = String.format(Locale.ROOT, """ - FROM not-indexed | WHERE %s(location, %s("%s")) | STATS COUNT(*) - """, spatialFunction, castingFunction(), wkt); - try ( - EsqlQueryResponse response1 = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query1).get(); - EsqlQueryResponse response2 = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query2).get(); - ) { - Object indexedResult = response1.response().column(0).iterator().next(); - Object notIndexedResult = response2.response().column(0).iterator().next(); - assertEquals(spatialFunction, indexedResult, notIndexedResult); + List queries = getQueries(String.format(Locale.ROOT, """ + FROM index | WHERE %s(location, %s("%s")) | STATS COUNT(*) + """, spatialFunction, castingFunction(), wkt)); + try (TestQueryResponseCollection responses = new TestQueryResponseCollection(queries)) { + Object indexedResult = responses.getResponse(0, 0); + for (int i = 1; i < ALL_INDEXES.length; i++) { + Object result = responses.getResponse(i, 0); + assertEquals(spatialFunction + " for " + ALL_INDEXES[i], indexedResult, result); + } + long allIndexesResult = (long) responses.getResponse(ALL_INDEXES.length, 0); + assertEquals(spatialFunction + " for all indexes", (long) indexedResult * 4, allIndexesResult); + } + } + + protected static class TestQueryResponseCollection implements AutoCloseable { + private final List responses; + + public TestQueryResponseCollection(List queries) { + this.responses = queries.stream().map(query -> { + try { + return EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query).get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }).toList(); + } + + protected Object getResponse(int index, int column) { + return responses.get(index).response().column(column).iterator().next(); + } + + @Override + public void close() { + for (EsqlQueryResponse response : responses) { + response.close(); + } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/EnableSpatialDistancePushdown.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/EnableSpatialDistancePushdown.java index cde305e52a705..dfb1dbc8bc8f3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/EnableSpatialDistancePushdown.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/EnableSpatialDistancePushdown.java @@ -76,15 +76,15 @@ public class EnableSpatialDistancePushdown extends PhysicalOptimizerRules.Parame protected PhysicalPlan rule(FilterExec filterExec, LocalPhysicalOptimizerContext ctx) { PhysicalPlan plan = filterExec; if (filterExec.child() instanceof EsQueryExec esQueryExec) { - plan = rewrite(filterExec, esQueryExec); + plan = rewrite(filterExec, esQueryExec, LucenePushdownPredicates.from(ctx.searchStats())); } else if (filterExec.child() instanceof EvalExec evalExec && evalExec.child() instanceof EsQueryExec esQueryExec) { - plan = rewriteBySplittingFilter(filterExec, evalExec, esQueryExec); + plan = rewriteBySplittingFilter(filterExec, evalExec, esQueryExec, LucenePushdownPredicates.from(ctx.searchStats())); } return plan; } - private FilterExec rewrite(FilterExec filterExec, EsQueryExec esQueryExec) { + private FilterExec rewrite(FilterExec filterExec, EsQueryExec esQueryExec, LucenePushdownPredicates lucenePushdownPredicates) { // Find and rewrite any binary comparisons that involve a distance function and a literal var rewritten = filterExec.condition().transformDown(EsqlBinaryComparison.class, comparison -> { ComparisonType comparisonType = ComparisonType.from(comparison.getFunctionType()); @@ -95,7 +95,7 @@ private FilterExec rewrite(FilterExec filterExec, EsQueryExec esQueryExec) { } return comparison; }); - if (rewritten.equals(filterExec.condition()) == false && canPushToSource(rewritten, x -> false)) { + if (rewritten.equals(filterExec.condition()) == false && canPushToSource(rewritten, lucenePushdownPredicates)) { return new FilterExec(filterExec.source(), esQueryExec, rewritten); } return filterExec; @@ -119,9 +119,14 @@ private FilterExec rewrite(FilterExec filterExec, EsQueryExec esQueryExec) { * | WHERE other > 10 * */ - private PhysicalPlan rewriteBySplittingFilter(FilterExec filterExec, EvalExec evalExec, EsQueryExec esQueryExec) { + private PhysicalPlan rewriteBySplittingFilter( + FilterExec filterExec, + EvalExec evalExec, + EsQueryExec esQueryExec, + LucenePushdownPredicates lucenePushdownPredicates + ) { // Find all pushable distance functions in the EVAL - Map distances = getPushableDistances(evalExec.fields()); + Map distances = getPushableDistances(evalExec.fields(), lucenePushdownPredicates); // Don't do anything if there are no distances to push down if (distances.isEmpty()) { @@ -139,7 +144,7 @@ private PhysicalPlan rewriteBySplittingFilter(FilterExec filterExec, EvalExec ev // Find and rewrite any binary comparisons that involve a distance function and a literal var rewritten = rewriteDistanceFilters(resExp, distances); // If all pushable StDistance functions were found and re-written, we need to re-write the FILTER/EVAL combination - if (rewritten.equals(resExp) == false && canPushToSource(rewritten, x -> false)) { + if (rewritten.equals(resExp) == false && canPushToSource(rewritten, lucenePushdownPredicates)) { pushable.add(rewritten); } else { nonPushable.add(exp); @@ -163,10 +168,10 @@ private PhysicalPlan rewriteBySplittingFilter(FilterExec filterExec, EvalExec ev } } - private Map getPushableDistances(List aliases) { + private Map getPushableDistances(List aliases, LucenePushdownPredicates lucenePushdownPredicates) { Map distances = new LinkedHashMap<>(); aliases.forEach(alias -> { - if (alias.child() instanceof StDistance distance && canPushSpatialFunctionToSource(distance)) { + if (alias.child() instanceof StDistance distance && canPushSpatialFunctionToSource(distance, lucenePushdownPredicates)) { distances.put(alias.id(), distance); } else if (alias.child() instanceof ReferenceAttribute ref && distances.containsKey(ref.id())) { StDistance distance = distances.get(ref.id()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushDownUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushDownUtils.java deleted file mode 100644 index 1242629c1da3c..0000000000000 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushDownUtils.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; - -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; -import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.stats.SearchStats; - -import java.util.function.Predicate; - -class LucenePushDownUtils { - /** - * this method is supposed to be used to define if a field can be used for exact push down (eg. sort or filter). - * "aggregatable" is the most accurate information we can have from field_caps as of now. - * Pushing down operations on fields that are not aggregatable would result in an error. - */ - public static boolean isAggregatable(FieldAttribute f) { - return f.exactAttribute().field().isAggregatable(); - } - - public static boolean hasIdenticalDelegate(FieldAttribute attr, SearchStats stats) { - return stats.hasIdenticalDelegate(attr.name()); - } - - public static boolean isPushableFieldAttribute(Expression exp, Predicate hasIdenticalDelegate) { - if (exp instanceof FieldAttribute fa && fa.getExactInfo().hasExact() && isAggregatable(fa)) { - return fa.dataType() != DataType.TEXT || hasIdenticalDelegate.test(fa); - } - return false; - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java new file mode 100644 index 0000000000000..feb8717f007b7 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java @@ -0,0 +1,111 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.stats.SearchStats; + +/** + * When deciding if a filter or topN can be pushed down to Lucene, we need to check a few things on the field. + * Exactly what is checked depends on the type of field and the query. For example, we have the following possible combinations: + *
    + *
  1. A normal filter on a normal field will be pushed down using SingleValueQuery to remove multi-valued results, + * and this requires knowing if the field is indexed and has doc-values.
  2. + *
  3. A filter using a spatial function will allow multi-valued fields and we only need to know if the field is indexed, + * and do not need doc values.
  4. + *
  5. A TopN will be pushed down if the field is indexed and has doc values.
  6. + *
  7. Filters with TEXT fields can only be pushed down if the TEXT field has a nested KEYWORD field, + * referred to here as ExactSubfield. This that this is related to normal ES|QL predicates, + * not the full-text search provided by the MATCH and QSTR functions, which are pushed down separately.
  8. + *
+ */ +public interface LucenePushdownPredicates { + /** + * For TEXT fields, we need to check if the field has a subfield of type KEYWORD that can be used instead. + */ + boolean hasExactSubfield(FieldAttribute attr); + + /** + * For pushing down TopN and for pushing down filters with SingleValueQuery, + * we need to check if the field is indexed and has doc values. + */ + boolean isIndexedAndHasDocValues(FieldAttribute attr); + + /** + * For pushing down filters when multi-value results are allowed (spatial functions like ST_INTERSECTS), + * we only need to know if the field is indexed. + */ + boolean isIndexed(FieldAttribute attr); + + /** + * We see fields as pushable if either they are aggregatable or they are indexed. + * This covers non-indexed cases like AbstractScriptFieldType which hard-coded isAggregatable to true, + * as well as normal FieldAttribute's which can only be pushed down if they are indexed. + * The reason we don't just rely entirely on isAggregatable is because this is often false for normal fields, and could + * also differ from node to node, and we can physically plan each node separately, allowing Lucene pushdown on the nodes that + * support it, and relying on the compute engine for the nodes that do not. + */ + default boolean isPushableFieldAttribute(Expression exp) { + if (exp instanceof FieldAttribute fa && fa.getExactInfo().hasExact() && isIndexedAndHasDocValues(fa)) { + return (fa.dataType() != DataType.TEXT && fa.dataType() != DataType.SEMANTIC_TEXT) || hasExactSubfield(fa); + } + return false; + } + + /** + * The default implementation of this has no access to SearchStats, so it can only make decisions based on the FieldAttribute itself. + * In particular, it assumes TEXT fields have no exact subfields (underlying keyword field), + * and that isAggregatable means indexed and has hasDocValues. + */ + LucenePushdownPredicates DEFAULT = new LucenePushdownPredicates() { + @Override + public boolean hasExactSubfield(FieldAttribute attr) { + return false; + } + + @Override + public boolean isIndexedAndHasDocValues(FieldAttribute attr) { + // Is the FieldType.isAggregatable() check correct here? In FieldType isAggregatable usually only means hasDocValues + return attr.field().isAggregatable(); + } + + @Override + public boolean isIndexed(FieldAttribute attr) { + // TODO: This is the original behaviour, but is it correct? In FieldType isAggregatable usually only means hasDocValues + return attr.field().isAggregatable(); + } + }; + + /** + * If we have access to SearchStats over a collection of shards, we can make more fine-grained decisions about what can be pushed down. + * This should open up more opportunities for lucene pushdown. + */ + static LucenePushdownPredicates from(SearchStats stats) { + return new LucenePushdownPredicates() { + @Override + public boolean hasExactSubfield(FieldAttribute attr) { + return stats.hasExactSubfield(attr.name()); + } + + @Override + public boolean isIndexedAndHasDocValues(FieldAttribute attr) { + // We still consider the value of isAggregatable here, because some fields like ScriptFieldTypes are always aggregatable + // But this could hide issues with fields that are not indexed but are aggregatable + // This is the original behaviour for ES|QL, but is it correct? + return attr.field().isAggregatable() || stats.isIndexed(attr.name()) && stats.hasDocValues(attr.name()); + } + + @Override + public boolean isIndexed(FieldAttribute attr) { + return stats.isIndexed(attr.name()); + } + }; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java index 626ef5e83bd65..f01e7c4b1f3a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java @@ -55,11 +55,9 @@ import java.util.ArrayList; import java.util.List; -import java.util.function.Predicate; import static java.util.Arrays.asList; import static org.elasticsearch.xpack.esql.core.expression.predicate.Predicates.splitAnd; -import static org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushDownUtils.isAggregatable; public class PushFiltersToSource extends PhysicalOptimizerRules.ParameterizedOptimizerRule { @@ -78,7 +76,7 @@ private static PhysicalPlan planFilterExec(FilterExec filterExec, EsQueryExec qu List pushable = new ArrayList<>(); List nonPushable = new ArrayList<>(); for (Expression exp : splitAnd(filterExec.condition())) { - (canPushToSource(exp, x -> LucenePushDownUtils.hasIdenticalDelegate(x, ctx.searchStats())) ? pushable : nonPushable).add(exp); + (canPushToSource(exp, LucenePushdownPredicates.from(ctx.searchStats())) ? pushable : nonPushable).add(exp); } return rewrite(filterExec, queryExec, pushable, nonPushable, List.of()); } @@ -94,9 +92,7 @@ private static PhysicalPlan planFilterExec( List nonPushable = new ArrayList<>(); for (Expression exp : splitAnd(filterExec.condition())) { Expression resExp = exp.transformUp(ReferenceAttribute.class, r -> aliasReplacedBy.resolve(r, r)); - (canPushToSource(resExp, x -> LucenePushDownUtils.hasIdenticalDelegate(x, ctx.searchStats())) ? pushable : nonPushable).add( - exp - ); + (canPushToSource(resExp, LucenePushdownPredicates.from(ctx.searchStats())) ? pushable : nonPushable).add(exp); } // Replace field references with their actual field attributes pushable.replaceAll(e -> e.transformDown(ReferenceAttribute.class, r -> aliasReplacedBy.resolve(r, r))); @@ -222,17 +218,27 @@ else if ((other instanceof GreaterThan || other instanceof GreaterThanOrEqual) return changed ? CollectionUtils.combine(others, bcs, ranges) : pushable; } - public static boolean canPushToSource(Expression exp, Predicate hasIdenticalDelegate) { + /** + * Check if the given expression can be pushed down to the source. + * This version of the check is called when we do not have SearchStats available. It assumes no exact subfields for TEXT fields, + * and makes the indexed/doc-values check using the isAggregatable flag only, which comes from field-caps, represents the field state + * over the entire cluster (is not node specific), and has risks for indexed=false/doc_values=true fields. + */ + public static boolean canPushToSource(Expression exp) { + return canPushToSource(exp, LucenePushdownPredicates.DEFAULT); + } + + static boolean canPushToSource(Expression exp, LucenePushdownPredicates lucenePushdownPredicates) { if (exp instanceof BinaryComparison bc) { - return isAttributePushable(bc.left(), bc, hasIdenticalDelegate) && bc.right().foldable(); + return isAttributePushable(bc.left(), bc, lucenePushdownPredicates) && bc.right().foldable(); } else if (exp instanceof InsensitiveBinaryComparison bc) { - return isAttributePushable(bc.left(), bc, hasIdenticalDelegate) && bc.right().foldable(); + return isAttributePushable(bc.left(), bc, lucenePushdownPredicates) && bc.right().foldable(); } else if (exp instanceof BinaryLogic bl) { - return canPushToSource(bl.left(), hasIdenticalDelegate) && canPushToSource(bl.right(), hasIdenticalDelegate); + return canPushToSource(bl.left(), lucenePushdownPredicates) && canPushToSource(bl.right(), lucenePushdownPredicates); } else if (exp instanceof In in) { - return isAttributePushable(in.value(), null, hasIdenticalDelegate) && Expressions.foldable(in.list()); + return isAttributePushable(in.value(), null, lucenePushdownPredicates) && Expressions.foldable(in.list()); } else if (exp instanceof Not not) { - return canPushToSource(not.field(), hasIdenticalDelegate); + return canPushToSource(not.field(), lucenePushdownPredicates); } else if (exp instanceof UnaryScalarFunction usf) { if (usf instanceof RegexMatch || usf instanceof IsNull || usf instanceof IsNotNull) { if (usf instanceof IsNull || usf instanceof IsNotNull) { @@ -240,12 +246,13 @@ public static boolean canPushToSource(Expression exp, Predicate return true; } } - return isAttributePushable(usf.field(), usf, hasIdenticalDelegate); + return isAttributePushable(usf.field(), usf, lucenePushdownPredicates); } } else if (exp instanceof CIDRMatch cidrMatch) { - return isAttributePushable(cidrMatch.ipField(), cidrMatch, hasIdenticalDelegate) && Expressions.foldable(cidrMatch.matches()); + return isAttributePushable(cidrMatch.ipField(), cidrMatch, lucenePushdownPredicates) + && Expressions.foldable(cidrMatch.matches()); } else if (exp instanceof SpatialRelatesFunction spatial) { - return canPushSpatialFunctionToSource(spatial); + return canPushSpatialFunctionToSource(spatial, lucenePushdownPredicates); } else if (exp instanceof StringQueryPredicate) { return true; } else if (exp instanceof QueryString) { @@ -259,23 +266,24 @@ public static boolean canPushToSource(Expression exp, Predicate /** * Push-down to Lucene is only possible if one field is an indexed spatial field, and the other is a constant spatial or string column. */ - public static boolean canPushSpatialFunctionToSource(BinarySpatialFunction s) { + public static boolean canPushSpatialFunctionToSource(BinarySpatialFunction s, LucenePushdownPredicates lucenePushdownPredicates) { // The use of foldable here instead of SpatialEvaluatorFieldKey.isConstant is intentional to match the behavior of the // Lucene pushdown code in EsqlTranslationHandler::SpatialRelatesTranslator // We could enhance both places to support ReferenceAttributes that refer to constants, but that is a larger change - return isPushableSpatialAttribute(s.left()) && s.right().foldable() || isPushableSpatialAttribute(s.right()) && s.left().foldable(); + return isPushableSpatialAttribute(s.left(), lucenePushdownPredicates) && s.right().foldable() + || isPushableSpatialAttribute(s.right(), lucenePushdownPredicates) && s.left().foldable(); } - private static boolean isPushableSpatialAttribute(Expression exp) { - return exp instanceof FieldAttribute fa && fa.getExactInfo().hasExact() && isAggregatable(fa) && DataType.isSpatial(fa.dataType()); + private static boolean isPushableSpatialAttribute(Expression exp, LucenePushdownPredicates p) { + return exp instanceof FieldAttribute fa && DataType.isSpatial(fa.dataType()) && fa.getExactInfo().hasExact() && p.isIndexed(fa); } private static boolean isAttributePushable( Expression expression, Expression operation, - Predicate hasIdenticalDelegate + LucenePushdownPredicates lucenePushdownPredicates ) { - if (LucenePushDownUtils.isPushableFieldAttribute(expression, hasIdenticalDelegate)) { + if (lucenePushdownPredicates.isPushableFieldAttribute(expression)) { return true; } if (expression instanceof MetadataAttribute ma && ma.searchable()) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java index 2ae496b55ac00..925e144b69fcc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java @@ -30,7 +30,6 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; -import java.util.function.Predicate; /** * We handle two main scenarios here: @@ -60,7 +59,7 @@ public class PushTopNToSource extends PhysicalOptimizerRules.ParameterizedOptimizerRule { @Override protected PhysicalPlan rule(TopNExec topNExec, LocalPhysicalOptimizerContext ctx) { - Pushable pushable = evaluatePushable(topNExec, x -> LucenePushDownUtils.hasIdenticalDelegate(x, ctx.searchStats())); + Pushable pushable = evaluatePushable(topNExec, LucenePushdownPredicates.from(ctx.searchStats())); return pushable.rewrite(topNExec); } @@ -121,11 +120,11 @@ public PhysicalPlan rewrite(TopNExec topNExec) { } } - private static Pushable evaluatePushable(TopNExec topNExec, Predicate hasIdenticalDelegate) { + private static Pushable evaluatePushable(TopNExec topNExec, LucenePushdownPredicates lucenePushdownPredicates) { PhysicalPlan child = topNExec.child(); if (child instanceof EsQueryExec queryExec && queryExec.canPushSorts() - && canPushDownOrders(topNExec.order(), hasIdenticalDelegate)) { + && canPushDownOrders(topNExec.order(), lucenePushdownPredicates)) { // With the simplest case of `FROM index | SORT ...` we only allow pushing down if the sort is on a field return new PushableQueryExec(queryExec); } @@ -148,7 +147,7 @@ && canPushDownOrders(topNExec.order(), hasIdenticalDelegate)) { List pushableSorts = new ArrayList<>(); for (Order order : orders) { - if (LucenePushDownUtils.isPushableFieldAttribute(order.child(), hasIdenticalDelegate)) { + if (lucenePushdownPredicates.isPushableFieldAttribute(order.child())) { pushableSorts.add( new EsQueryExec.FieldSort( ((FieldAttribute) order.child()).exactAttribute(), @@ -169,7 +168,7 @@ && canPushDownOrders(topNExec.order(), hasIdenticalDelegate)) { break; } } else if (aliasReplacedBy.resolve(referenceAttribute, referenceAttribute) instanceof FieldAttribute fieldAttribute - && LucenePushDownUtils.isPushableFieldAttribute(fieldAttribute, hasIdenticalDelegate)) { + && lucenePushdownPredicates.isPushableFieldAttribute(fieldAttribute)) { // If the SORT refers to a reference to a pushable field, we can push it down pushableSorts.add( new EsQueryExec.FieldSort(fieldAttribute.exactAttribute(), order.direction(), order.nullsPosition()) @@ -191,9 +190,9 @@ && canPushDownOrders(topNExec.order(), hasIdenticalDelegate)) { return NO_OP; } - private static boolean canPushDownOrders(List orders, Predicate hasIdenticalDelegate) { + private static boolean canPushDownOrders(List orders, LucenePushdownPredicates lucenePushdownPredicates) { // allow only exact FieldAttributes (no expressions) for sorting - return orders.stream().allMatch(o -> LucenePushDownUtils.isPushableFieldAttribute(o.child(), hasIdenticalDelegate)); + return orders.stream().allMatch(o -> lucenePushdownPredicates.isPushableFieldAttribute(o.child())); } private static List buildFieldSorts(List orders) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java index d03cd9ef7cb0b..0f1c32e94f867 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/SpatialDocValuesExtraction.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialAggregateFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.BinarySpatialFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesFunction; +import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; import org.elasticsearch.xpack.esql.optimizer.PhysicalOptimizerRules; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EvalExec; @@ -22,6 +23,7 @@ import org.elasticsearch.xpack.esql.plan.physical.FilterExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; +import org.elasticsearch.xpack.esql.stats.SearchStats; import java.util.ArrayList; import java.util.HashSet; @@ -63,9 +65,11 @@ * is the only place where this information is available. This also means that the knowledge of the usage of doc-values does not need * to be serialized between nodes, and is only used locally. */ -public class SpatialDocValuesExtraction extends PhysicalOptimizerRules.OptimizerRule { +public class SpatialDocValuesExtraction extends PhysicalOptimizerRules.ParameterizedOptimizerRule< + AggregateExec, + LocalPhysicalOptimizerContext> { @Override - protected PhysicalPlan rule(AggregateExec aggregate) { + protected PhysicalPlan rule(AggregateExec aggregate, LocalPhysicalOptimizerContext ctx) { var foundAttributes = new HashSet(); PhysicalPlan plan = aggregate.transformDown(UnaryExec.class, exec -> { @@ -75,7 +79,7 @@ protected PhysicalPlan rule(AggregateExec aggregate) { for (NamedExpression aggExpr : agg.aggregates()) { if (aggExpr instanceof Alias as && as.child() instanceof SpatialAggregateFunction af) { if (af.field() instanceof FieldAttribute fieldAttribute - && allowedForDocValues(fieldAttribute, agg, foundAttributes)) { + && allowedForDocValues(fieldAttribute, ctx.searchStats(), agg, foundAttributes)) { // We need to both mark the field to load differently, and change the spatial function to know to use it foundAttributes.add(fieldAttribute); changedAggregates = true; @@ -153,8 +157,13 @@ private boolean foundField(Expression expression, Set foundAttri * This function disallows the use of more than one field for doc-values extraction in the same spatial relation function. * This is because comparing two doc-values fields is not supported in the current implementation. */ - private boolean allowedForDocValues(FieldAttribute fieldAttribute, AggregateExec agg, Set foundAttributes) { - if (fieldAttribute.field().isAggregatable() == false) { + private boolean allowedForDocValues( + FieldAttribute fieldAttribute, + SearchStats stats, + AggregateExec agg, + Set foundAttributes + ) { + if (stats.hasDocValues(fieldAttribute.fieldName()) == false) { return false; } var candidateDocValuesAttributes = new HashSet<>(foundAttributes); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index 1758edb386e59..c998af2215169 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -20,7 +20,6 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -52,13 +51,13 @@ import org.elasticsearch.xpack.esql.planner.mapper.LocalMapper; import org.elasticsearch.xpack.esql.planner.mapper.Mapper; import org.elasticsearch.xpack.esql.session.Configuration; +import org.elasticsearch.xpack.esql.stats.SearchContextStats; import org.elasticsearch.xpack.esql.stats.SearchStats; import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; -import java.util.function.Predicate; import static java.util.Arrays.asList; import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES; @@ -138,7 +137,7 @@ public static String[] planOriginalIndices(PhysicalPlan plan) { } public static PhysicalPlan localPlan(List searchContexts, Configuration configuration, PhysicalPlan plan) { - return localPlan(configuration, plan, new SearchStats(searchContexts)); + return localPlan(configuration, plan, SearchContextStats.from(searchContexts)); } public static PhysicalPlan localPlan(Configuration configuration, PhysicalPlan plan, SearchStats searchStats) { @@ -174,17 +173,18 @@ public static PhysicalPlan localPlan( } /** - * Extracts the ES query provided by the filter parameter - * @param plan - * @param hasIdenticalDelegate a lambda that given a field attribute sayis if it has - * a synthetic source delegate with the exact same value - * @return + * Extracts the ES query for the @timestamp field for the passed plan. */ - public static QueryBuilder requestFilter(PhysicalPlan plan, Predicate hasIdenticalDelegate) { - return detectFilter(plan, "@timestamp", hasIdenticalDelegate); + public static QueryBuilder requestTimestampFilter(PhysicalPlan plan) { + return detectFilter(plan, "@timestamp"); } - static QueryBuilder detectFilter(PhysicalPlan plan, String fieldName, Predicate hasIdenticalDelegate) { + /** + * Note that since this filter does not have access to SearchStats, it cannot detect if the field is a text field with a delegate. + * We currently only use this filter for the @timestamp field, which is always a date field. Any tests that wish to use this should + * take care to not use it with TEXT fields. + */ + static QueryBuilder detectFilter(PhysicalPlan plan, String fieldName) { // first position is the REST filter, the second the query filter var requestFilter = new QueryBuilder[] { null, null }; @@ -205,7 +205,7 @@ static QueryBuilder detectFilter(PhysicalPlan plan, String fieldName, Predicate< boolean matchesField = refs.removeIf(e -> fieldName.equals(e.name())); // the expression only contains the target reference // and the expression is pushable (functions can be fully translated) - if (matchesField && refs.isEmpty() && canPushToSource(exp, hasIdenticalDelegate)) { + if (matchesField && refs.isEmpty() && canPushToSource(exp)) { matches.add(exp); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index 108e70d7d3a50..ffad379001ed0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -309,11 +309,7 @@ private void startComputeOnDataNodes( return reductionNode == null ? f : f.withReducer(reductionNode); }); - // The lambda is to say if a TEXT field has an identical exact subfield - // We cannot use SearchContext because we don't have it yet. - // Since it's used only for @timestamp, it is relatively safe to assume it's not needed - // but it would be better to have a proper impl. - QueryBuilder requestFilter = PlannerUtils.requestFilter(planWithReducer, x -> true); + QueryBuilder requestFilter = PlannerUtils.requestTimestampFilter(planWithReducer); var lookupListener = ActionListener.releaseAfter(computeListener.acquireAvoid(), exchangeSource.addEmptySink()); // SearchShards API can_match is done in lookupDataNodes lookupDataNodes(parentTask, clusterAlias, requestFilter, concreteIndices, originalIndices, ActionListener.wrap(dataNodeResult -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java new file mode 100644 index 0000000000000..1f895c43f5dde --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java @@ -0,0 +1,357 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.stats; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.ConstantFieldType; +import org.elasticsearch.index.mapper.DocCountFieldMapper.DocCountFieldType; +import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberFieldType; +import org.elasticsearch.index.mapper.SeqNoFieldMapper; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; +import org.elasticsearch.xpack.esql.core.type.DataType; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper.TimestampFieldType; +import static org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; +import static org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType; + +/** + * This class provides SearchStats from a list of SearchExecutionContext's. + * It contains primarily a cache of FieldStats which is dynamically updated as needed. + * Each FieldStats contains FieldConfig information which is populated once at creation time. + * The remaining statistics are lazily computed and cached only on demand. + * This cache is not thread-safe. + */ +public class SearchContextStats implements SearchStats { + + private final List contexts; + + private record FieldConfig(boolean exists, boolean hasExactSubfield, boolean indexed, boolean hasDocValues) {} + + private static class FieldStats { + private Long count; + private Object min, max; + private Boolean singleValue; + private FieldConfig config; + } + + private static final int CACHE_SIZE = 32; + + // simple non-thread-safe cache for avoiding unnecessary IO (which while fast is still I/O) + private final Map cache = new LinkedHashMap<>(CACHE_SIZE, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > CACHE_SIZE; + } + }; + + public static SearchStats from(List contexts) { + if (contexts == null || contexts.isEmpty()) { + return SearchStats.EMPTY; + } + return new SearchContextStats(contexts); + } + + private SearchContextStats(List contexts) { + this.contexts = contexts; + assert contexts != null && contexts.isEmpty() == false; + } + + public boolean exists(String field) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + return stat.config.exists; + } + + private FieldStats makeFieldStats(String field) { + var stat = new FieldStats(); + stat.config = makeFieldConfig(field); + return stat; + } + + private FieldConfig makeFieldConfig(String field) { + boolean exists = false; + boolean hasExactSubfield = true; + boolean indexed = true; + boolean hasDocValues = true; + // even if there are deleted documents, check the existence of a field + // since if it's missing, deleted documents won't change that + for (SearchExecutionContext context : contexts) { + if (context.isFieldMapped(field)) { + exists = exists || true; + MappedFieldType type = context.getFieldType(field); + indexed = indexed && type.isIndexed(); + hasDocValues = hasDocValues && type.hasDocValues(); + if (type instanceof TextFieldMapper.TextFieldType t) { + hasExactSubfield = hasExactSubfield && t.canUseSyntheticSourceDelegateForQuerying(); + } else { + hasExactSubfield = false; + } + } else { + indexed = false; + hasDocValues = false; + hasExactSubfield = false; + } + } + if (exists == false) { + // if it does not exist on any context, no other settings are valid + return new FieldConfig(false, false, false, false); + } else { + return new FieldConfig(exists, hasExactSubfield, indexed, hasDocValues); + } + } + + public boolean isIndexed(String field) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + return stat.config.indexed; + } + + public boolean hasDocValues(String field) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + return stat.config.hasDocValues; + } + + public boolean hasExactSubfield(String field) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + return stat.config.hasExactSubfield; + } + + public long count() { + var count = new long[] { 0 }; + boolean completed = doWithContexts(r -> { + count[0] += r.numDocs(); + return true; + }, false); + return completed ? count[0] : -1; + } + + public long count(String field) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + if (stat.count == null) { + var count = new long[] { 0 }; + boolean completed = doWithContexts(r -> { + count[0] += countEntries(r, field); + return true; + }, false); + stat.count = completed ? count[0] : -1; + } + return stat.count; + } + + public long count(String field, BytesRef value) { + var count = new long[] { 0 }; + Term term = new Term(field, value); + boolean completed = doWithContexts(r -> { + count[0] += r.docFreq(term); + return true; + }, false); + return completed ? count[0] : -1; + } + + public byte[] min(String field, DataType dataType) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + if (stat.min == null) { + var min = new byte[][] { null }; + doWithContexts(r -> { + byte[] localMin = PointValues.getMinPackedValue(r, field); + // TODO: how to compare with the previous min + if (localMin != null) { + if (min[0] == null) { + min[0] = localMin; + } else { + throw new EsqlIllegalArgumentException("Don't know how to compare with previous min"); + } + } + return true; + }, true); + stat.min = min[0]; + } + // return stat.min; + return null; + } + + public byte[] max(String field, DataType dataType) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + if (stat.max == null) { + var max = new byte[][] { null }; + doWithContexts(r -> { + byte[] localMax = PointValues.getMaxPackedValue(r, field); + // TODO: how to compare with the previous max + if (localMax != null) { + if (max[0] == null) { + max[0] = localMax; + } else { + throw new EsqlIllegalArgumentException("Don't know how to compare with previous max"); + } + } + return true; + }, true); + stat.max = max[0]; + } + // return stat.max; + return null; + } + + public boolean isSingleValue(String field) { + var stat = cache.computeIfAbsent(field, this::makeFieldStats); + if (stat.singleValue == null) { + // there's no such field so no need to worry about multi-value fields + if (exists(field) == false) { + stat.singleValue = true; + } else { + // fields are MV per default + var sv = new boolean[] { false }; + for (SearchExecutionContext context : contexts) { + MappedFieldType mappedType = context.isFieldMapped(field) ? context.getFieldType(field) : null; + if (mappedType != null) { + sv[0] = true; + doWithContexts(r -> { + sv[0] &= detectSingleValue(r, mappedType, field); + return sv[0]; + }, true); + break; + } + } + stat.singleValue = sv[0]; + } + } + return stat.singleValue; + } + + private boolean detectSingleValue(IndexReader r, MappedFieldType fieldType, String name) throws IOException { + // types that are always single value (and are accessible through instanceof) + if (fieldType instanceof ConstantFieldType || fieldType instanceof DocCountFieldType || fieldType instanceof TimestampFieldType) { + return true; + } + + var typeName = fieldType.typeName(); + + // non-visible fields, check their names + boolean found = switch (typeName) { + case IdFieldMapper.NAME, SeqNoFieldMapper.NAME -> true; + default -> false; + }; + + if (found) { + return true; + } + + // check against doc size + DocCountTester tester = null; + if (fieldType instanceof DateFieldType || fieldType instanceof NumberFieldType) { + tester = lr -> { + PointValues values = lr.getPointValues(name); + return values == null || values.size() == values.getDocCount(); + }; + } else if (fieldType instanceof KeywordFieldType) { + tester = lr -> { + Terms terms = lr.terms(name); + return terms == null || terms.size() == terms.getDocCount(); + }; + } + + if (tester != null) { + // check each leaf + for (LeafReaderContext context : r.leaves()) { + if (tester.test(context.reader()) == false) { + return false; + } + } + // field is missing or single value + return true; + } + + // unsupported type - default to MV + return false; + } + + private interface DocCountTester { + Boolean test(LeafReader leafReader) throws IOException; + } + + // + // @see org.elasticsearch.search.query.QueryPhaseCollectorManager#shortcutTotalHitCount(IndexReader, Query) + // + private static long countEntries(IndexReader indexReader, String field) { + long count = 0; + try { + for (LeafReaderContext context : indexReader.leaves()) { + LeafReader reader = context.reader(); + FieldInfos fieldInfos = reader.getFieldInfos(); + FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + + if (fieldInfo != null) { + if (fieldInfo.getDocValuesType() == DocValuesType.NONE) { + // no shortcut possible: it's a text field, empty values are counted as no value. + return -1; + } + if (fieldInfo.getPointIndexDimensionCount() > 0) { + PointValues points = reader.getPointValues(field); + if (points != null) { + count += points.size(); + } + } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { + Terms terms = reader.terms(field); + if (terms != null) { + count += terms.getSumTotalTermFreq(); + } + } else { + return -1; // no shortcut possible for fields that are not indexed + } + } + } + } catch (IOException ex) { + throw new EsqlIllegalArgumentException("Cannot access data storage", ex); + } + return count; + } + + private interface IndexReaderConsumer { + /** + * Returns true if the consumer should keep on going, false otherwise. + */ + boolean consume(IndexReader reader) throws IOException; + } + + private boolean doWithContexts(IndexReaderConsumer consumer, boolean acceptsDeletions) { + try { + for (SearchExecutionContext context : contexts) { + for (LeafReaderContext leafContext : context.searcher().getLeafContexts()) { + var reader = leafContext.reader(); + if (acceptsDeletions == false && reader.hasDeletions()) { + return false; + } + // check if the looping continues or not + if (consumer.consume(reader) == false) { + return false; + } + } + } + return true; + } catch (IOException ex) { + throw new EsqlIllegalArgumentException("Cannot access data storage", ex); + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java index 73935cea540b1..ca24bd54ee67c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java @@ -7,363 +7,90 @@ package org.elasticsearch.xpack.esql.stats; -import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.index.mapper.AbstractScriptFieldType; -import org.elasticsearch.index.mapper.ConstantFieldType; -import org.elasticsearch.index.mapper.DocCountFieldMapper.DocCountFieldType; -import org.elasticsearch.index.mapper.IdFieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.NumberFieldMapper.NumberFieldType; -import org.elasticsearch.index.mapper.SeqNoFieldMapper; -import org.elasticsearch.index.mapper.TextFieldMapper; -import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.type.DataType; -import java.io.IOException; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +/** + * Interface for determining information about fields in the index. + * This is used by the optimizer to make decisions about how to optimize queries. + */ +public interface SearchStats { + SearchStats EMPTY = new EmptySearchStats(); -import static org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper.TimestampFieldType; -import static org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; -import static org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType; + boolean exists(String field); -public class SearchStats { + boolean isIndexed(String field); - private final List contexts; + boolean hasDocValues(String field); - private static class FieldStat { - private Long count; - private Object min, max; - // TODO: use a multi-bitset instead - private Boolean exists; - private Boolean singleValue; - private Boolean hasIdenticalDelegate; - private Boolean indexed; - private Boolean runtime; - } + boolean hasExactSubfield(String field); - private static final int CACHE_SIZE = 32; + long count(); - // simple non-thread-safe cache for avoiding unnecessary IO (which while fast it still I/O) - private final Map cache = new LinkedHashMap<>(CACHE_SIZE, 0.75f, true) { - @Override - protected boolean removeEldestEntry(Map.Entry eldest) { - return size() > CACHE_SIZE; - } - }; + long count(String field); - public SearchStats(List contexts) { - this.contexts = contexts; - } + long count(String field, BytesRef value); - public long count() { - var count = new long[] { 0 }; - boolean completed = doWithContexts(r -> { - count[0] += r.numDocs(); - return true; - }, false); - return completed ? count[0] : -1; - } + byte[] min(String field, DataType dataType); - public long count(String field) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.count == null) { - var count = new long[] { 0 }; - boolean completed = doWithContexts(r -> { - count[0] += countEntries(r, field); - return true; - }, false); - stat.count = completed ? count[0] : -1; - } - return stat.count; - } + byte[] max(String field, DataType dataType); - public long count(String field, BytesRef value) { - var count = new long[] { 0 }; - Term term = new Term(field, value); - boolean completed = doWithContexts(r -> { - count[0] += r.docFreq(term); - return true; - }, false); - return completed ? count[0] : -1; - } + boolean isSingleValue(String field); - public boolean exists(String field) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.exists == null) { - stat.exists = false; - // even if there are deleted documents, check the existence of a field - // since if it's missing, deleted documents won't change that - for (SearchExecutionContext context : contexts) { - if (context.isFieldMapped(field)) { - stat.exists = true; - break; - } - } + /** + * When there are no search stats available, for example when there are no search contexts, we have static results. + */ + record EmptySearchStats() implements SearchStats { - // populate additional properties to save on the lookups - if (stat.exists == false) { - stat.indexed = false; - stat.singleValue = true; - } - } - return stat.exists; - } - - public boolean hasIdenticalDelegate(String field) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.hasIdenticalDelegate == null) { - stat.hasIdenticalDelegate = true; - for (SearchExecutionContext context : contexts) { - if (context.isFieldMapped(field)) { - MappedFieldType type = context.getFieldType(field); - if (type instanceof TextFieldMapper.TextFieldType t) { - if (t.canUseSyntheticSourceDelegateForQuerying() == false) { - stat.hasIdenticalDelegate = false; - break; - } - } else { - stat.hasIdenticalDelegate = false; - break; - } - } - } + @Override + public boolean exists(String field) { + return false; } - return stat.hasIdenticalDelegate; - } - public byte[] min(String field, DataType dataType) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.min == null) { - var min = new byte[][] { null }; - doWithContexts(r -> { - byte[] localMin = PointValues.getMinPackedValue(r, field); - // TODO: how to compare with the previous min - if (localMin != null) { - if (min[0] == null) { - min[0] = localMin; - } else { - throw new EsqlIllegalArgumentException("Don't know how to compare with previous min"); - } - } - return true; - }, true); - stat.min = min[0]; + @Override + public boolean isIndexed(String field) { + return false; } - // return stat.min; - return null; - } - public byte[] max(String field, DataType dataType) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.max == null) { - var max = new byte[][] { null }; - doWithContexts(r -> { - byte[] localMax = PointValues.getMaxPackedValue(r, field); - // TODO: how to compare with the previous max - if (localMax != null) { - if (max[0] == null) { - max[0] = localMax; - } else { - throw new EsqlIllegalArgumentException("Don't know how to compare with previous max"); - } - } - return true; - }, true); - stat.max = max[0]; + @Override + public boolean hasDocValues(String field) { + return false; } - // return stat.max; - return null; - } - public boolean isSingleValue(String field) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.singleValue == null) { - // there's no such field so no need to worry about multi-value fields - if (exists(field) == false) { - stat.singleValue = true; - } else { - // fields are MV per default - var sv = new boolean[] { false }; - for (SearchExecutionContext context : contexts) { - MappedFieldType mappedType = context.isFieldMapped(field) ? context.getFieldType(field) : null; - if (mappedType != null) { - sv[0] = true; - doWithContexts(r -> { - sv[0] &= detectSingleValue(r, mappedType, field); - return sv[0]; - }, true); - break; - } - } - stat.singleValue = sv[0]; - } + @Override + public boolean hasExactSubfield(String field) { + return false; } - return stat.singleValue; - } - public boolean isRuntimeField(String field) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.runtime == null) { - stat.runtime = false; - if (exists(field)) { - for (SearchExecutionContext context : contexts) { - if (context.isFieldMapped(field)) { - if (context.getFieldType(field) instanceof AbstractScriptFieldType) { - stat.runtime = true; - break; - } - } - } - } + @Override + public long count() { + return 0; } - return stat.runtime; - } - public boolean isIndexed(String field) { - var stat = cache.computeIfAbsent(field, s -> new FieldStat()); - if (stat.indexed == null) { - stat.indexed = false; - if (exists(field)) { - boolean indexed = true; - for (SearchExecutionContext context : contexts) { - if (context.isFieldMapped(field)) { - if (context.getFieldType(field).isIndexed() == false) { - indexed = false; - break; - } - } - } - stat.indexed = indexed; - } + @Override + public long count(String field) { + return 0; } - return stat.indexed; - } - private boolean detectSingleValue(IndexReader r, MappedFieldType fieldType, String name) throws IOException { - // types that are always single value (and are accessible through instanceof) - if (fieldType instanceof ConstantFieldType || fieldType instanceof DocCountFieldType || fieldType instanceof TimestampFieldType) { - return true; + @Override + public long count(String field, BytesRef value) { + return 0; } - var typeName = fieldType.typeName(); - - // non-visible fields, check their names - boolean found = switch (typeName) { - case IdFieldMapper.NAME, SeqNoFieldMapper.NAME -> true; - default -> false; - }; - - if (found) { - return true; + @Override + public byte[] min(String field, DataType dataType) { + return null; } - // check against doc size - DocCountTester tester = null; - if (fieldType instanceof DateFieldType || fieldType instanceof NumberFieldType) { - tester = lr -> { - PointValues values = lr.getPointValues(name); - return values == null || values.size() == values.getDocCount(); - }; - } else if (fieldType instanceof KeywordFieldType) { - tester = lr -> { - Terms terms = lr.terms(name); - return terms == null || terms.size() == terms.getDocCount(); - }; + @Override + public byte[] max(String field, DataType dataType) { + return null; } - if (tester != null) { - // check each leaf - for (LeafReaderContext context : r.leaves()) { - if (tester.test(context.reader()) == false) { - return false; - } - } - // field is missing or single value + @Override + public boolean isSingleValue(String field) { return true; } - // unsupported type - default to MV - return false; - } - - private interface DocCountTester { - Boolean test(LeafReader leafReader) throws IOException; - } - - // - // @see org.elasticsearch.search.query.QueryPhaseCollectorManager#shortcutTotalHitCount(IndexReader, Query) - // - private static long countEntries(IndexReader indexReader, String field) { - long count = 0; - try { - for (LeafReaderContext context : indexReader.leaves()) { - LeafReader reader = context.reader(); - FieldInfos fieldInfos = reader.getFieldInfos(); - FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - - if (fieldInfo != null) { - if (fieldInfo.getDocValuesType() == DocValuesType.NONE) { - // no shortcut possible: it's a text field, empty values are counted as no value. - return -1; - } - if (fieldInfo.getPointIndexDimensionCount() > 0) { - PointValues points = reader.getPointValues(field); - if (points != null) { - count += points.size(); - } - } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { - Terms terms = reader.terms(field); - if (terms != null) { - count += terms.getSumTotalTermFreq(); - } - } else { - return -1; // no shortcut possible for fields that are not indexed - } - } - } - } catch (IOException ex) { - throw new EsqlIllegalArgumentException("Cannot access data storage", ex); - } - return count; - } - - private interface IndexReaderConsumer { - /** - * Returns true if the consumer should keep on going, false otherwise. - */ - boolean consume(IndexReader reader) throws IOException; - } - - private boolean doWithContexts(IndexReaderConsumer consumer, boolean acceptsDeletions) { - try { - for (SearchExecutionContext context : contexts) { - for (LeafReaderContext leafContext : context.searcher().getLeafContexts()) { - var reader = leafContext.reader(); - if (acceptsDeletions == false && reader.hasDeletions()) { - return false; - } - // check if the looping continues or not - if (consumer.consume(reader) == false) { - return false; - } - } - } - return true; - } catch (IOException ex) { - throw new EsqlIllegalArgumentException("Cannot access data storage", ex); - } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 905ca190ebe79..073a51ee69114 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -60,6 +60,7 @@ import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; import org.elasticsearch.xpack.esql.session.Configuration; import org.elasticsearch.xpack.esql.stats.Metrics; +import org.elasticsearch.xpack.esql.stats.SearchContextStats; import org.elasticsearch.xpack.esql.stats.SearchStats; import org.junit.Before; @@ -330,7 +331,7 @@ private PhysicalPlan planWithMappingAndDocs(String query, String mapping, List { IndexSearcher searcher = newSearcher(directoryReader); SearchExecutionContext ctx = createSearchExecutionContext(mapperService, searcher); - plan.set(plannerOptimizer.plan(query, new SearchStats(List.of(ctx)))); + plan.set(plannerOptimizer.plan(query, SearchContextStats.from(List.of(ctx)))); }); return plan.get(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 9f5d6440e4a06..eb115ed7b2948 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -33,6 +33,8 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.EsqlTestUtils.TestConfigurableSearchStats; +import org.elasticsearch.xpack.esql.EsqlTestUtils.TestConfigurableSearchStats.Config; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.analysis.Analyzer; import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; @@ -141,6 +143,7 @@ import static org.elasticsearch.index.query.QueryBuilders.existsQuery; import static org.elasticsearch.test.ListMatcher.matchesList; import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_SEARCH_STATS; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; import static org.elasticsearch.xpack.esql.EsqlTestUtils.configuration; @@ -189,14 +192,16 @@ public class PhysicalPlanOptimizerTests extends ESTestCase { private TestDataSource testData; private int allFieldRowSize; // TODO: Move this into testDataSource so tests that load other indexes can also assert on this private TestDataSource airports; - private TestDataSource airportsNoDocValues; - private TestDataSource airportsWeb; - private TestDataSource countriesBbox; - private TestDataSource countriesBboxWeb; + private TestDataSource airportsNoDocValues; // Test when spatial field is indexed but has no doc values + private TestDataSource airportsNotIndexed; // Test when spatial field has doc values but is not indexed + private TestDataSource airportsNotIndexedNorDocValues; // Test when spatial field is neither indexed nor has doc-values + private TestDataSource airportsWeb; // Cartesian point field tests + private TestDataSource countriesBbox; // geo_shape field tests + private TestDataSource countriesBboxWeb; // cartesian_shape field tests private final Configuration config; - private record TestDataSource(Map mapping, EsIndex index, Analyzer analyzer) {} + private record TestDataSource(Map mapping, EsIndex index, Analyzer analyzer, SearchStats stats) {} @ParametersFactory(argumentFormatting = PARAM_FORMATTING) public static List readScriptSpec() { @@ -240,9 +245,24 @@ public void init() { this.airports = makeTestDataSource("airports", "mapping-airports.json", functionRegistry, enrichResolution); this.airportsNoDocValues = makeTestDataSource( "airports-no-doc-values", - "mapping-airports-no-doc-values.json", + "mapping-airports_no_doc_values.json", functionRegistry, - enrichResolution + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.DOC_VALUES, "location") + ); + this.airportsNotIndexed = makeTestDataSource( + "airports-not-indexed", + "mapping-airports_not_indexed.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.INDEXED, "location") + ); + this.airportsNotIndexedNorDocValues = makeTestDataSource( + "airports-not-indexed-nor-doc-values", + "mapping-airports_not_indexed_nor_doc_values.json", + functionRegistry, + enrichResolution, + new TestConfigurableSearchStats().exclude(Config.INDEXED, "location").exclude(Config.DOC_VALUES, "location") ); this.airportsWeb = makeTestDataSource("airports_web", "mapping-airports_web.json", functionRegistry, enrichResolution); this.countriesBbox = makeTestDataSource("countriesBbox", "mapping-countries_bbox.json", functionRegistry, enrichResolution); @@ -258,13 +278,23 @@ TestDataSource makeTestDataSource( String indexName, String mappingFileName, EsqlFunctionRegistry functionRegistry, - EnrichResolution enrichResolution + EnrichResolution enrichResolution, + SearchStats stats ) { Map mapping = loadMapping(mappingFileName); EsIndex index = new EsIndex(indexName, mapping, Map.of("test", IndexMode.STANDARD)); IndexResolution getIndexResult = IndexResolution.valid(index); Analyzer analyzer = new Analyzer(new AnalyzerContext(config, functionRegistry, getIndexResult, enrichResolution), TEST_VERIFIER); - return new TestDataSource(mapping, index, analyzer); + return new TestDataSource(mapping, index, analyzer, stats); + } + + TestDataSource makeTestDataSource( + String indexName, + String mappingFileName, + EsqlFunctionRegistry functionRegistry, + EnrichResolution enrichResolution + ) { + return makeTestDataSource(indexName, mappingFileName, functionRegistry, enrichResolution, TEST_SEARCH_STATS); } private static EnrichResolution setupEnrichResolution() { @@ -2132,7 +2162,7 @@ public void testNoNonIndexedFilterPushDown() { | where long_noidx == 1 """); - var optimized = optimizedPlan(plan); + var optimized = optimizedPlan(plan, statsWithIndexedFields()); var limit = as(optimized, LimitExec.class); var exchange = asRemoteExchange(limit.child()); var project = as(exchange.child(), ProjectExec.class); @@ -2183,7 +2213,7 @@ public void testNoNonIndexedSortPushDown() { | sort long_noidx """); - var optimized = optimizedPlan(plan); + var optimized = optimizedPlan(plan, statsWithIndexedFields()); var topN = as(optimized, TopNExec.class); var exchange = as(topN.child(), ExchangeExec.class); var project = as(exchange.child(), ProjectExec.class); @@ -2656,7 +2686,8 @@ public void testSpatialTypesAndStatsUseDocValues() { "from airports | stats centroid = st_centroid_agg(to_geopoint(location))", "from airports | eval location = to_geopoint(location) | stats centroid = st_centroid_agg(location)" }) { for (boolean withDocValues : new boolean[] { false, true }) { - var plan = withDocValues ? physicalPlan(query, airports) : physicalPlan(query, airportsNoDocValues); + var testData = withDocValues ? airports : airportsNoDocValues; + var plan = physicalPlan(query, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); @@ -2669,7 +2700,7 @@ public void testSpatialTypesAndStatsUseDocValues() { as(fAgg.child(), EsRelation.class); // Now optimize the plan and assert the aggregation uses doc-values - var optimized = optimizedPlan(plan); + var optimized = optimizedPlan(plan, testData.stats); limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); // Above the exchange (in coordinator) the aggregation is not using doc-values @@ -2943,11 +2974,12 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsFiltered() { * Note the FieldExtractExec has 'location' set for stats: FieldExtractExec[location{f}#9][location{f}#9] */ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { - for (boolean useDocValues : new boolean[] { true, false }) { + for (boolean useDocValues : new boolean[] { false }) { + var testData = useDocValues ? airports : airportsNoDocValues; var plan = this.physicalPlan(""" FROM airports | STATS centroid=ST_CENTROID_AGG(location), count=COUNT() BY scalerank - """, useDocValues ? airports : airportsNoDocValues); + """, testData); var limit = as(plan, LimitExec.class); var agg = as(limit.child(), AggregateExec.class); @@ -2964,7 +2996,7 @@ public void testSpatialTypesAndStatsUseDocValuesMultiAggregationsGrouped() { as(fAgg.child(), EsRelation.class); // Now optimize the plan and assert the aggregation uses doc-values - var optimized = optimizedPlan(plan); + var optimized = optimizedPlan(plan, testData.stats); limit = as(optimized, LimitExec.class); agg = as(limit.child(), AggregateExec.class); att = as(agg.groupings().get(0), Attribute.class); @@ -3519,44 +3551,63 @@ public void testPushSpatialIntersectsStringToSourceAndUseDocValuesForCentroid() | STATS centroid=ST_CENTROID_AGG(location), count=COUNT() """ }) { - for (boolean useDocValues : new boolean[] { true, false }) { - var plan = this.physicalPlan(query, useDocValues ? airports : airportsNoDocValues); - var limit = as(plan, LimitExec.class); - var agg = as(limit.child(), AggregateExec.class); - assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); - // Before optimization the aggregation does not use doc-values - assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); - - var exchange = as(agg.child(), ExchangeExec.class); - var fragment = as(exchange.child(), FragmentExec.class); - var fAgg = as(fragment.fragment(), Aggregate.class); - var filter = as(fAgg.child(), Filter.class); - assertThat("filter contains ST_INTERSECTS", filter.condition(), instanceOf(SpatialIntersects.class)); + for (boolean isIndexed : new boolean[] { true, false }) { + for (boolean useDocValues : new boolean[] { true, false }) { + var testData = useDocValues + ? (isIndexed ? airports : airportsNotIndexed) + : (isIndexed ? airportsNoDocValues : airportsNotIndexedNorDocValues); + var plan = this.physicalPlan(query, testData); + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertThat("No groupings in aggregation", agg.groupings().size(), equalTo(0)); + // Before optimization the aggregation does not use doc-values + assertAggregation(agg, "count", Count.class); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); - // Now verify that optimization re-writes the ExchangeExec and pushed down the filter into the Lucene query - var optimized = optimizedPlan(plan); - limit = as(optimized, LimitExec.class); - agg = as(limit.child(), AggregateExec.class); - // Above the exchange (in coordinator) the aggregation is not using doc-values - assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); - exchange = as(agg.child(), ExchangeExec.class); - agg = as(exchange.child(), AggregateExec.class); - assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); - // below the exchange (in data node) the aggregation is using doc-values - assertAggregation(agg, "count", Count.class); - assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, useDocValues); - var source = assertChildIsGeoPointExtract(useDocValues ? agg : as(agg.child(), FilterExec.class), useDocValues); - if (useDocValues) { - // Query is only pushed to lucene if indexing/doc-values are enabled - var condition = as(source.query(), SpatialRelatesQuery.ShapeQueryBuilder.class); - assertThat("Geometry field name", condition.fieldName(), equalTo("location")); - assertThat("Spatial relationship", condition.relation(), equalTo(ShapeRelation.INTERSECTS)); - assertThat("Geometry is Polygon", condition.shape().type(), equalTo(ShapeType.POLYGON)); - var polygon = as(condition.shape(), Polygon.class); - assertThat("Polygon shell length", polygon.getPolygon().length(), equalTo(5)); - assertThat("Polygon holes", polygon.getNumberOfHoles(), equalTo(0)); + var exchange = as(agg.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var fAgg = as(fragment.fragment(), Aggregate.class); + var filter = as(fAgg.child(), Filter.class); + assertThat("filter contains ST_INTERSECTS", filter.condition(), instanceOf(SpatialIntersects.class)); + + // Now verify that optimization re-writes the ExchangeExec and pushed down the filter into the Lucene query + var optimized = optimizedPlan(plan, testData.stats); + limit = as(optimized, LimitExec.class); + agg = as(limit.child(), AggregateExec.class); + // Above the exchange (in coordinator) the aggregation is not using doc-values + assertAggregation(agg, "count", Count.class); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, false); + exchange = as(agg.child(), ExchangeExec.class); + agg = as(exchange.child(), AggregateExec.class); + assertThat("Aggregation is PARTIAL", agg.getMode(), equalTo(INITIAL)); + // below the exchange (in data node) the aggregation is using doc-values + assertAggregation(agg, "count", Count.class); + assertAggregation(agg, "centroid", SpatialCentroid.class, GEO_POINT, useDocValues); + if (isIndexed) { + var source = assertChildIsGeoPointExtract(agg, useDocValues); + // Query is pushed to lucene if field is indexed (and does not require doc-values or isAggregatable) + var condition = as(source.query(), SpatialRelatesQuery.ShapeQueryBuilder.class); + assertThat("Geometry field name", condition.fieldName(), equalTo("location")); + assertThat("Spatial relationship", condition.relation(), equalTo(ShapeRelation.INTERSECTS)); + assertThat("Geometry is Polygon", condition.shape().type(), equalTo(ShapeType.POLYGON)); + var polygon = as(condition.shape(), Polygon.class); + assertThat("Polygon shell length", polygon.getPolygon().length(), equalTo(5)); + assertThat("Polygon holes", polygon.getNumberOfHoles(), equalTo(0)); + } else { + // If the field is not indexed, we cannot push the filter down to source, so assert that we need to have an explicit + // filter as well as extract the field needed for that filter. + var filterExec = as(agg.child(), FilterExec.class); + assertThat("filter contains ST_INTERSECTS", filterExec.condition(), instanceOf(SpatialIntersects.class)); + var fieldExtractLocation = as(filterExec.child(), FieldExtractExec.class); + assertThat("location field is extracted", fieldExtractLocation.attributesToExtract().size(), equalTo(1)); + assertThat( + "location field is extracted", + fieldExtractLocation.attributesToExtract().get(0).name(), + equalTo("location") + ); + var source = source(fieldExtractLocation.child()); + assertThat("source query is null", source.query(), equalTo(null)); + } } } } @@ -6651,14 +6702,7 @@ private PhysicalPlan optimizedPlan(PhysicalPlan plan, SearchStats searchStats) { } static SearchStats statsWithIndexedFields(String... names) { - return new EsqlTestUtils.TestSearchStats() { - private final Set indexedFields = Set.of(names); - - @Override - public boolean isIndexed(String field) { - return indexedFields.contains(field); - } - }; + return new TestConfigurableSearchStats().include(Config.INDEXED, names); } static PhysicalPlan localRelationshipAlignment(PhysicalPlan l) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java index 0fe7eb6b3d43b..98f0af8e4b8e6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java @@ -34,7 +34,7 @@ import org.elasticsearch.xpack.esql.plan.physical.EvalExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.plan.physical.TopNExec; -import org.elasticsearch.xpack.esql.stats.DisabledSearchStats; +import org.elasticsearch.xpack.esql.stats.SearchStats; import java.io.IOException; import java.nio.ByteOrder; @@ -256,8 +256,7 @@ private static void assertNoPushdownSort(TestPhysicalPlanBuilder builder, String private static PhysicalPlan pushTopNToSource(TopNExec topNExec) { var configuration = EsqlTestUtils.configuration("from test"); - var searchStats = new DisabledSearchStats(); - var ctx = new LocalPhysicalOptimizerContext(configuration, searchStats); + var ctx = new LocalPhysicalOptimizerContext(configuration, SearchStats.EMPTY); var pushTopNToSource = new PushTopNToSource(); return pushTopNToSource.rule(topNExec, ctx); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java index bb937700ef771..8d819f9dbcd6c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java @@ -318,7 +318,7 @@ private QueryBuilder restFilterQuery(String field) { } private QueryBuilder filterQueryForTransportNodes(PhysicalPlan plan) { - return PlannerUtils.detectFilter(plan, EMP_NO, x -> true); + return PlannerUtils.detectFilter(plan, EMP_NO); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java index 564d34149da0e..fce05b07a6a42 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java @@ -10,12 +10,26 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.xpack.esql.core.type.DataType; -import static java.util.Collections.emptyList; +public class DisabledSearchStats implements SearchStats { -public class DisabledSearchStats extends SearchStats { + @Override + public boolean exists(String field) { + return true; + } + + @Override + public boolean isIndexed(String field) { + return true; + } - public DisabledSearchStats() { - super(emptyList()); + @Override + public boolean hasDocValues(String field) { + return true; + } + + @Override + public boolean hasExactSubfield(String field) { + return true; } @Override @@ -33,11 +47,6 @@ public long count(String field, BytesRef value) { return -1; } - @Override - public boolean exists(String field) { - return true; - } - @Override public byte[] min(String field, DataType dataType) { return null; From 66123cfe31edf69c9b55df3acd64a2441bca3983 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 8 Nov 2024 15:12:29 +0100 Subject: [PATCH 07/39] Remove SearchPhaseContext (#116471) The only production implementation of this thing is AbstractSearchAsyncAction, no need to keep a separate interface around. This makes the logic a lot more obvious in terms of the lifeycle of "context" and how it's essentially just the "main" search phase. Plus it outright saves a lot of code, even though it adds a little on the test side. --- .../search/AbstractSearchAsyncAction.java | 93 ++++++++++--- .../action/search/CountedCollector.java | 6 +- .../action/search/DfsQueryPhase.java | 4 +- .../action/search/ExpandSearchPhase.java | 4 +- .../action/search/FetchLookupFieldsPhase.java | 8 +- .../action/search/FetchSearchPhase.java | 6 +- .../action/search/RankFeaturePhase.java | 35 ++--- .../action/search/SearchPhase.java | 2 +- .../action/search/SearchPhaseContext.java | 130 ------------------ .../SearchQueryThenFetchAsyncAction.java | 2 +- .../action/search/CountedCollectorTests.java | 1 + .../action/search/DfsQueryPhaseTests.java | 10 +- .../action/search/ExpandSearchPhaseTests.java | 6 +- .../search/FetchLookupFieldsPhaseTests.java | 3 +- .../action/search/FetchSearchPhaseTests.java | 2 + .../action/search/MockSearchPhaseContext.java | 76 +++++----- .../action/search/RankFeaturePhaseTests.java | 6 + 17 files changed, 168 insertions(+), 226 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/action/search/SearchPhaseContext.java diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java index 13c085c9875d4..c051f0ca7a6f5 100644 --- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java @@ -68,7 +68,7 @@ * The fan out and collect algorithm is traditionally used as the initial phase which can either be a query execution or collection of * distributed frequencies */ -abstract class AbstractSearchAsyncAction extends SearchPhase implements SearchPhaseContext { +abstract class AbstractSearchAsyncAction extends SearchPhase { private static final float DEFAULT_INDEX_BOOST = 1.0f; private final Logger logger; private final NamedWriteableRegistry namedWriteableRegistry; @@ -106,7 +106,8 @@ abstract class AbstractSearchAsyncAction exten private final boolean throttleConcurrentRequests; private final AtomicBoolean requestCancelled = new AtomicBoolean(); - private final List releasables = new ArrayList<>(); + // protected for tests + protected final List releasables = new ArrayList<>(); AbstractSearchAsyncAction( String name, @@ -194,7 +195,9 @@ protected void notifyListShards( ); } - @Override + /** + * Registers a {@link Releasable} that will be closed when the search request finishes or fails. + */ public void addReleasable(Releasable releasable) { releasables.add(releasable); } @@ -333,8 +336,12 @@ protected abstract void executePhaseOnShard( SearchActionListener listener ); - @Override - public final void executeNextPhase(SearchPhase currentPhase, Supplier nextPhaseSupplier) { + /** + * Processes the phase transition from on phase to another. This method handles all errors that happen during the initial run execution + * of the next phase. If there are no successful operations in the context when this method is executed the search is aborted and + * a response is returned to the user indicating that all shards have failed. + */ + protected void executeNextPhase(SearchPhase currentPhase, Supplier nextPhaseSupplier) { /* This is the main search phase transition where we move to the next phase. If all shards * failed or if there was a failure and partial results are not allowed, then we immediately * fail. Otherwise we continue to the next phase. @@ -470,8 +477,7 @@ protected void onShardGroupFailure(int shardIndex, SearchShardTarget shardTarget * @param shardTarget the shard target for this failure * @param e the failure reason */ - @Override - public final void onShardFailure(final int shardIndex, SearchShardTarget shardTarget, Exception e) { + void onShardFailure(final int shardIndex, SearchShardTarget shardTarget, Exception e) { if (TransportActions.isShardNotAvailableException(e)) { // Groups shard not available exceptions under a generic exception that returns a SERVICE_UNAVAILABLE(503) // temporary error. @@ -568,32 +574,45 @@ private void successfulShardExecution(SearchShardIterator shardsIt) { } } - @Override + /** + * Returns the total number of shards to the current search across all indices + */ public final int getNumShards() { return results.getNumShards(); } - @Override + /** + * Returns a logger for this context to prevent each individual phase to create their own logger. + */ public final Logger getLogger() { return logger; } - @Override + /** + * Returns the currently executing search task + */ public final SearchTask getTask() { return task; } - @Override + /** + * Returns the currently executing search request + */ public final SearchRequest getRequest() { return request; } - @Override + /** + * Returns the targeted {@link OriginalIndices} for the provided {@code shardIndex}. + */ public OriginalIndices getOriginalIndices(int shardIndex) { return shardIterators[shardIndex].getOriginalIndices(); } - @Override + /** + * Checks if the given context id is part of the point in time of this search (if exists). + * We should not release search contexts that belong to the point in time during or after searches. + */ public boolean isPartOfPointInTime(ShardSearchContextId contextId) { final PointInTimeBuilder pointInTimeBuilder = request.pointInTimeBuilder(); if (pointInTimeBuilder != null) { @@ -630,7 +649,12 @@ boolean buildPointInTimeFromSearchResults() { return false; } - @Override + /** + * Builds and sends the final search response back to the user. + * + * @param internalSearchResponse the internal search response + * @param queryResults the results of the query phase + */ public void sendSearchResponse(SearchResponseSections internalSearchResponse, AtomicArray queryResults) { ShardSearchFailure[] failures = buildShardFailures(); Boolean allowPartialResults = request.allowPartialSearchResults(); @@ -655,8 +679,14 @@ public void sendSearchResponse(SearchResponseSections internalSearchResponse, At } } - @Override - public final void onPhaseFailure(SearchPhase phase, String msg, Throwable cause) { + /** + * This method will communicate a fatal phase failure back to the user. In contrast to a shard failure + * will this method immediately fail the search request and return the failure to the issuer of the request + * @param phase the phase that failed + * @param msg an optional message + * @param cause the cause of the phase failure + */ + public void onPhaseFailure(SearchPhase phase, String msg, Throwable cause) { raisePhaseFailure(new SearchPhaseExecutionException(phase.getName(), msg, cause, buildShardFailures())); } @@ -683,6 +713,19 @@ private void raisePhaseFailure(SearchPhaseExecutionException exception) { listener.onFailure(exception); } + /** + * Releases a search context with the given context ID on the node the given connection is connected to. + * @see org.elasticsearch.search.query.QuerySearchResult#getContextId() + * @see org.elasticsearch.search.fetch.FetchSearchResult#getContextId() + * + */ + void sendReleaseSearchContext(ShardSearchContextId contextId, Transport.Connection connection, OriginalIndices originalIndices) { + assert isPartOfPointInTime(contextId) == false : "Must not release point in time context [" + contextId + "]"; + if (connection != null) { + searchTransportService.sendFreeContext(connection, contextId, originalIndices); + } + } + /** * Executed once all shard results have been received and processed * @see #onShardFailure(int, SearchShardTarget, Exception) @@ -692,23 +735,29 @@ final void onPhaseDone() { // as a tribute to @kimchy aka. finishHim() executeNextPhase(this, this::getNextPhase); } - @Override + /** + * Returns a connection to the node if connected otherwise and {@link org.elasticsearch.transport.ConnectTransportException} will be + * thrown. + */ public final Transport.Connection getConnection(String clusterAlias, String nodeId) { return nodeIdToConnection.apply(clusterAlias, nodeId); } - @Override - public final SearchTransportService getSearchTransport() { + /** + * Returns the {@link SearchTransportService} to send shard request to other nodes + */ + public SearchTransportService getSearchTransport() { return searchTransportService; } - @Override public final void execute(Runnable command) { executor.execute(command); } - @Override - public final void onFailure(Exception e) { + /** + * Notifies the top-level listener of the provided exception + */ + public void onFailure(Exception e) { listener.onFailure(e); } diff --git a/server/src/main/java/org/elasticsearch/action/search/CountedCollector.java b/server/src/main/java/org/elasticsearch/action/search/CountedCollector.java index 2603e3a5a51b5..3d15e11a19d31 100644 --- a/server/src/main/java/org/elasticsearch/action/search/CountedCollector.java +++ b/server/src/main/java/org/elasticsearch/action/search/CountedCollector.java @@ -22,9 +22,9 @@ final class CountedCollector { private final SearchPhaseResults resultConsumer; private final CountDown counter; private final Runnable onFinish; - private final SearchPhaseContext context; + private final AbstractSearchAsyncAction context; - CountedCollector(SearchPhaseResults resultConsumer, int expectedOps, Runnable onFinish, SearchPhaseContext context) { + CountedCollector(SearchPhaseResults resultConsumer, int expectedOps, Runnable onFinish, AbstractSearchAsyncAction context) { this.resultConsumer = resultConsumer; this.counter = new CountDown(expectedOps); this.onFinish = onFinish; @@ -50,7 +50,7 @@ void onResult(R result) { } /** - * Escalates the failure via {@link SearchPhaseContext#onShardFailure(int, SearchShardTarget, Exception)} + * Escalates the failure via {@link AbstractSearchAsyncAction#onShardFailure(int, SearchShardTarget, Exception)} * and then runs {@link #countDown()} */ void onFailure(final int shardIndex, @Nullable SearchShardTarget shardTarget, Exception e) { diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 36d73c0db166a..285dd0a22fd7e 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -44,7 +44,7 @@ final class DfsQueryPhase extends SearchPhase { private final AggregatedDfs dfs; private final List knnResults; private final Function, SearchPhase> nextPhaseFactory; - private final SearchPhaseContext context; + private final AbstractSearchAsyncAction context; private final SearchTransportService searchTransportService; private final SearchProgressListener progressListener; @@ -54,7 +54,7 @@ final class DfsQueryPhase extends SearchPhase { List knnResults, SearchPhaseResults queryResult, Function, SearchPhase> nextPhaseFactory, - SearchPhaseContext context + AbstractSearchAsyncAction context ) { super("dfs_query"); this.progressListener = context.getTask().getProgressListener(); diff --git a/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java index 968d9dac958fa..8feed2aea00b0 100644 --- a/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java @@ -31,11 +31,11 @@ * forwards to the next phase immediately. */ final class ExpandSearchPhase extends SearchPhase { - private final SearchPhaseContext context; + private final AbstractSearchAsyncAction context; private final SearchHits searchHits; private final Supplier nextPhase; - ExpandSearchPhase(SearchPhaseContext context, SearchHits searchHits, Supplier nextPhase) { + ExpandSearchPhase(AbstractSearchAsyncAction context, SearchHits searchHits, Supplier nextPhase) { super("expand"); this.context = context; this.searchHits = searchHits; diff --git a/server/src/main/java/org/elasticsearch/action/search/FetchLookupFieldsPhase.java b/server/src/main/java/org/elasticsearch/action/search/FetchLookupFieldsPhase.java index e73ec5cb14e38..d8671bcadf86d 100644 --- a/server/src/main/java/org/elasticsearch/action/search/FetchLookupFieldsPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/FetchLookupFieldsPhase.java @@ -33,11 +33,15 @@ * @see org.elasticsearch.index.mapper.LookupRuntimeFieldType */ final class FetchLookupFieldsPhase extends SearchPhase { - private final SearchPhaseContext context; + private final AbstractSearchAsyncAction context; private final SearchResponseSections searchResponse; private final AtomicArray queryResults; - FetchLookupFieldsPhase(SearchPhaseContext context, SearchResponseSections searchResponse, AtomicArray queryResults) { + FetchLookupFieldsPhase( + AbstractSearchAsyncAction context, + SearchResponseSections searchResponse, + AtomicArray queryResults + ) { super("fetch_lookup_fields"); this.context = context; this.searchResponse = searchResponse; diff --git a/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java index d7b847d835b83..0fbface3793a8 100644 --- a/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/FetchSearchPhase.java @@ -36,7 +36,7 @@ final class FetchSearchPhase extends SearchPhase { private final AtomicArray searchPhaseShardResults; private final BiFunction, SearchPhase> nextPhaseFactory; - private final SearchPhaseContext context; + private final AbstractSearchAsyncAction context; private final Logger logger; private final SearchProgressListener progressListener; private final AggregatedDfs aggregatedDfs; @@ -47,7 +47,7 @@ final class FetchSearchPhase extends SearchPhase { FetchSearchPhase( SearchPhaseResults resultConsumer, AggregatedDfs aggregatedDfs, - SearchPhaseContext context, + AbstractSearchAsyncAction context, @Nullable SearchPhaseController.ReducedQueryPhase reducedQueryPhase ) { this( @@ -66,7 +66,7 @@ final class FetchSearchPhase extends SearchPhase { FetchSearchPhase( SearchPhaseResults resultConsumer, AggregatedDfs aggregatedDfs, - SearchPhaseContext context, + AbstractSearchAsyncAction context, @Nullable SearchPhaseController.ReducedQueryPhase reducedQueryPhase, BiFunction, SearchPhase> nextPhaseFactory ) { diff --git a/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java b/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java index 05213eb94b750..199228c9f992c 100644 --- a/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java @@ -38,7 +38,7 @@ public class RankFeaturePhase extends SearchPhase { private static final Logger logger = LogManager.getLogger(RankFeaturePhase.class); - private final SearchPhaseContext context; + private final AbstractSearchAsyncAction context; final SearchPhaseResults queryPhaseResults; final SearchPhaseResults rankPhaseResults; private final AggregatedDfs aggregatedDfs; @@ -48,7 +48,7 @@ public class RankFeaturePhase extends SearchPhase { RankFeaturePhase( SearchPhaseResults queryPhaseResults, AggregatedDfs aggregatedDfs, - SearchPhaseContext context, + AbstractSearchAsyncAction context, RankFeaturePhaseRankCoordinatorContext rankFeaturePhaseRankCoordinatorContext ) { super("rank-feature"); @@ -179,22 +179,25 @@ private void onPhaseDone( RankFeaturePhaseRankCoordinatorContext rankFeaturePhaseRankCoordinatorContext, SearchPhaseController.ReducedQueryPhase reducedQueryPhase ) { - ThreadedActionListener rankResultListener = new ThreadedActionListener<>(context, new ActionListener<>() { - @Override - public void onResponse(RankFeatureDoc[] docsWithUpdatedScores) { - RankFeatureDoc[] topResults = rankFeaturePhaseRankCoordinatorContext.rankAndPaginate(docsWithUpdatedScores); - SearchPhaseController.ReducedQueryPhase reducedRankFeaturePhase = newReducedQueryPhaseResults( - reducedQueryPhase, - topResults - ); - moveToNextPhase(rankPhaseResults, reducedRankFeaturePhase); - } + ThreadedActionListener rankResultListener = new ThreadedActionListener<>( + context::execute, + new ActionListener<>() { + @Override + public void onResponse(RankFeatureDoc[] docsWithUpdatedScores) { + RankFeatureDoc[] topResults = rankFeaturePhaseRankCoordinatorContext.rankAndPaginate(docsWithUpdatedScores); + SearchPhaseController.ReducedQueryPhase reducedRankFeaturePhase = newReducedQueryPhaseResults( + reducedQueryPhase, + topResults + ); + moveToNextPhase(rankPhaseResults, reducedRankFeaturePhase); + } - @Override - public void onFailure(Exception e) { - context.onPhaseFailure(RankFeaturePhase.this, "Computing updated ranks for results failed", e); + @Override + public void onFailure(Exception e) { + context.onPhaseFailure(RankFeaturePhase.this, "Computing updated ranks for results failed", e); + } } - }); + ); rankFeaturePhaseRankCoordinatorContext.computeRankScoresForGlobalResults( rankPhaseResults.getAtomicArray().asList().stream().map(SearchPhaseResult::rankFeatureResult).toList(), rankResultListener diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java index e312b7399e70d..e4fef357cb4e9 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhase.java @@ -74,7 +74,7 @@ protected void doCheckNoMissingShards(String phaseName, SearchRequest request, G /** * Releases shard targets that are not used in the docsIdsToLoad. */ - protected void releaseIrrelevantSearchContext(SearchPhaseResult searchPhaseResult, SearchPhaseContext context) { + protected void releaseIrrelevantSearchContext(SearchPhaseResult searchPhaseResult, AbstractSearchAsyncAction context) { // we only release search context that we did not fetch from, if we are not scrolling // or using a PIT and if it has at least one hit that didn't make it to the global topDocs if (searchPhaseResult == null) { diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseContext.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseContext.java deleted file mode 100644 index d048887b69c97..0000000000000 --- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseContext.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ -package org.elasticsearch.action.search; - -import org.apache.logging.log4j.Logger; -import org.elasticsearch.action.OriginalIndices; -import org.elasticsearch.common.util.concurrent.AtomicArray; -import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.Releasable; -import org.elasticsearch.search.SearchPhaseResult; -import org.elasticsearch.search.SearchShardTarget; -import org.elasticsearch.search.internal.ShardSearchContextId; -import org.elasticsearch.transport.Transport; - -import java.util.concurrent.Executor; -import java.util.function.Supplier; - -/** - * This class provide contextual state and access to resources across multiple search phases. - */ -interface SearchPhaseContext extends Executor { - // TODO maybe we can make this concrete later - for now we just implement this in the base class for all initial phases - - /** - * Returns the total number of shards to the current search across all indices - */ - int getNumShards(); - - /** - * Returns a logger for this context to prevent each individual phase to create their own logger. - */ - Logger getLogger(); - - /** - * Returns the currently executing search task - */ - SearchTask getTask(); - - /** - * Returns the currently executing search request - */ - SearchRequest getRequest(); - - /** - * Returns the targeted {@link OriginalIndices} for the provided {@code shardIndex}. - */ - OriginalIndices getOriginalIndices(int shardIndex); - - /** - * Checks if the given context id is part of the point in time of this search (if exists). - * We should not release search contexts that belong to the point in time during or after searches. - */ - boolean isPartOfPointInTime(ShardSearchContextId contextId); - - /** - * Builds and sends the final search response back to the user. - * - * @param internalSearchResponse the internal search response - * @param queryResults the results of the query phase - */ - void sendSearchResponse(SearchResponseSections internalSearchResponse, AtomicArray queryResults); - - /** - * Notifies the top-level listener of the provided exception - */ - void onFailure(Exception e); - - /** - * This method will communicate a fatal phase failure back to the user. In contrast to a shard failure - * will this method immediately fail the search request and return the failure to the issuer of the request - * @param phase the phase that failed - * @param msg an optional message - * @param cause the cause of the phase failure - */ - void onPhaseFailure(SearchPhase phase, String msg, Throwable cause); - - /** - * This method will record a shard failure for the given shard index. In contrast to a phase failure - * ({@link #onPhaseFailure(SearchPhase, String, Throwable)}) this method will immediately return to the user but will record - * a shard failure for the given shard index. This should be called if a shard failure happens after we successfully retrieved - * a result from that shard in a previous phase. - */ - void onShardFailure(int shardIndex, @Nullable SearchShardTarget shardTarget, Exception e); - - /** - * Returns a connection to the node if connected otherwise and {@link org.elasticsearch.transport.ConnectTransportException} will be - * thrown. - */ - Transport.Connection getConnection(String clusterAlias, String nodeId); - - /** - * Returns the {@link SearchTransportService} to send shard request to other nodes - */ - SearchTransportService getSearchTransport(); - - /** - * Releases a search context with the given context ID on the node the given connection is connected to. - * @see org.elasticsearch.search.query.QuerySearchResult#getContextId() - * @see org.elasticsearch.search.fetch.FetchSearchResult#getContextId() - * - */ - default void sendReleaseSearchContext( - ShardSearchContextId contextId, - Transport.Connection connection, - OriginalIndices originalIndices - ) { - assert isPartOfPointInTime(contextId) == false : "Must not release point in time context [" + contextId + "]"; - if (connection != null) { - getSearchTransport().sendFreeContext(connection, contextId, originalIndices); - } - } - - /** - * Processes the phase transition from on phase to another. This method handles all errors that happen during the initial run execution - * of the next phase. If there are no successful operations in the context when this method is executed the search is aborted and - * a response is returned to the user indicating that all shards have failed. - */ - void executeNextPhase(SearchPhase currentPhase, Supplier nextPhaseSupplier); - - /** - * Registers a {@link Releasable} that will be closed when the search request finishes or fails. - */ - void addReleasable(Releasable releasable); -} diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java index e92b5bbf4b5e5..84e0e2adea612 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java @@ -135,7 +135,7 @@ && getRequest().scroll() == null static SearchPhase nextPhase( Client client, - SearchPhaseContext context, + AbstractSearchAsyncAction context, SearchPhaseResults queryResults, AggregatedDfs aggregatedDfs ) { diff --git a/server/src/test/java/org/elasticsearch/action/search/CountedCollectorTests.java b/server/src/test/java/org/elasticsearch/action/search/CountedCollectorTests.java index 40c6a707a87f9..4ba03f85aa05c 100644 --- a/server/src/test/java/org/elasticsearch/action/search/CountedCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/CountedCollectorTests.java @@ -93,6 +93,7 @@ public void testCollect() throws InterruptedException { for (int i = numResultsExpected; i < results.length(); i++) { assertNull("index: " + i, results.get(i)); } + context.results.close(); } } } diff --git a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java index 99401e8a8d40a..64362daf7f75c 100644 --- a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java @@ -134,7 +134,7 @@ public void sendExecuteQuery( new NoopCircuitBreaker(CircuitBreaker.REQUEST), () -> false, SearchProgressListener.NOOP, - mockSearchPhaseContext.searchRequest, + mockSearchPhaseContext.getRequest(), results.length(), exc -> {} ) @@ -159,6 +159,7 @@ public void run() throws IOException { assertEquals(84, responseRef.get().get(1).queryResult().topDocs().topDocs.scoreDocs[0].doc); assertTrue(mockSearchPhaseContext.releasedSearchContexts.isEmpty()); assertEquals(2, mockSearchPhaseContext.numSuccess.get()); + mockSearchPhaseContext.results.close(); } } @@ -219,7 +220,7 @@ public void sendExecuteQuery( new NoopCircuitBreaker(CircuitBreaker.REQUEST), () -> false, SearchProgressListener.NOOP, - mockSearchPhaseContext.searchRequest, + mockSearchPhaseContext.getRequest(), results.length(), exc -> {} ) @@ -246,6 +247,7 @@ public void run() throws IOException { assertEquals(1, mockSearchPhaseContext.releasedSearchContexts.size()); assertTrue(mockSearchPhaseContext.releasedSearchContexts.contains(new ShardSearchContextId("", 2L))); assertNull(responseRef.get().get(1)); + mockSearchPhaseContext.results.close(); } } @@ -306,7 +308,7 @@ public void sendExecuteQuery( new NoopCircuitBreaker(CircuitBreaker.REQUEST), () -> false, SearchProgressListener.NOOP, - mockSearchPhaseContext.searchRequest, + mockSearchPhaseContext.getRequest(), results.length(), exc -> {} ) @@ -322,6 +324,7 @@ public void run() throws IOException { assertThat(mockSearchPhaseContext.failures, hasSize(1)); assertThat(mockSearchPhaseContext.failures.get(0).getCause(), instanceOf(UncheckedIOException.class)); assertThat(mockSearchPhaseContext.releasedSearchContexts, hasSize(1)); // phase execution will clean up on the contexts + mockSearchPhaseContext.results.close(); } } @@ -371,6 +374,7 @@ public void testRewriteShardSearchRequestWithRank() { ssr.source().subSearches().get(2).getQueryBuilder() ) ); + mspc.results.close(); } private SearchPhaseController searchPhaseController() { diff --git a/server/src/test/java/org/elasticsearch/action/search/ExpandSearchPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/ExpandSearchPhaseTests.java index 5240d704dea3b..23184be02f9c3 100644 --- a/server/src/test/java/org/elasticsearch/action/search/ExpandSearchPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/ExpandSearchPhaseTests.java @@ -229,7 +229,7 @@ public void run() { assertNotNull(mockSearchPhaseContext.phaseFailure.get()); assertNull(mockSearchPhaseContext.searchResponse.get()); } finally { - mockSearchPhaseContext.execute(() -> {}); + mockSearchPhaseContext.results.close(); hits.decRef(); collapsedHits.decRef(); } @@ -269,7 +269,7 @@ public void run() { hits.decRef(); } } finally { - mockSearchPhaseContext.execute(() -> {}); + mockSearchPhaseContext.results.close(); var resp = mockSearchPhaseContext.searchResponse.get(); if (resp != null) { resp.decRef(); @@ -356,6 +356,7 @@ public void run() { hits.decRef(); } } finally { + mockSearchPhaseContext.results.close(); var resp = mockSearchPhaseContext.searchResponse.get(); if (resp != null) { resp.decRef(); @@ -407,6 +408,7 @@ public void run() { hits.decRef(); } } finally { + mockSearchPhaseContext.results.close(); var resp = mockSearchPhaseContext.searchResponse.get(); if (resp != null) { resp.decRef(); diff --git a/server/src/test/java/org/elasticsearch/action/search/FetchLookupFieldsPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/FetchLookupFieldsPhaseTests.java index e478ed2d0ccb5..1d2daf0cd660e 100644 --- a/server/src/test/java/org/elasticsearch/action/search/FetchLookupFieldsPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/FetchLookupFieldsPhaseTests.java @@ -57,7 +57,6 @@ void sendExecuteMultiSearch(MultiSearchRequest request, SearchTask task, ActionL } searchPhaseContext.assertNoFailure(); assertNotNull(searchPhaseContext.searchResponse.get()); - searchPhaseContext.execute(() -> {}); } finally { var resp = searchPhaseContext.searchResponse.get(); if (resp != null) { @@ -225,8 +224,8 @@ void sendExecuteMultiSearch( leftHit1.field("lookup_field_3").getValues(), contains(Map.of("field_a", List.of("a2"), "field_b", List.of("b1", "b2"))) ); - searchPhaseContext.execute(() -> {}); } finally { + searchPhaseContext.results.close(); var resp = searchPhaseContext.searchResponse.get(); if (resp != null) { resp.decRef(); diff --git a/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java index 09dd7821cd123..762a7e0f47cab 100644 --- a/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/FetchSearchPhaseTests.java @@ -123,6 +123,7 @@ public void testShortcutQueryAndFetchOptimization() throws Exception { assertProfiles(profiled, 1, searchResponse); assertTrue(mockSearchPhaseContext.releasedSearchContexts.isEmpty()); } finally { + mockSearchPhaseContext.results.close(); var resp = mockSearchPhaseContext.searchResponse.get(); if (resp != null) { resp.decRef(); @@ -252,6 +253,7 @@ public void sendExecuteFetch( assertProfiles(profiled, 2, searchResponse); assertTrue(mockSearchPhaseContext.releasedSearchContexts.isEmpty()); } finally { + mockSearchPhaseContext.results.close(); var resp = mockSearchPhaseContext.searchResponse.get(); if (resp != null) { resp.decRef(); diff --git a/server/src/test/java/org/elasticsearch/action/search/MockSearchPhaseContext.java b/server/src/test/java/org/elasticsearch/action/search/MockSearchPhaseContext.java index 5395e4569901a..03c5d0a06f6fb 100644 --- a/server/src/test/java/org/elasticsearch/action/search/MockSearchPhaseContext.java +++ b/server/src/test/java/org/elasticsearch/action/search/MockSearchPhaseContext.java @@ -10,12 +10,15 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.OriginalIndices; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.routing.GroupShardsIterator; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; import org.elasticsearch.search.SearchPhaseResult; import org.elasticsearch.search.SearchShardTarget; @@ -32,23 +35,41 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import static org.mockito.Mockito.mock; + /** * SearchPhaseContext for tests */ -public final class MockSearchPhaseContext implements SearchPhaseContext { +public final class MockSearchPhaseContext extends AbstractSearchAsyncAction { private static final Logger logger = LogManager.getLogger(MockSearchPhaseContext.class); - final AtomicReference phaseFailure = new AtomicReference<>(); + public final AtomicReference phaseFailure = new AtomicReference<>(); final int numShards; final AtomicInteger numSuccess; - final List failures = Collections.synchronizedList(new ArrayList<>()); + public final List failures = Collections.synchronizedList(new ArrayList<>()); SearchTransportService searchTransport; final Set releasedSearchContexts = new HashSet<>(); - final SearchRequest searchRequest = new SearchRequest(); - final AtomicReference searchResponse = new AtomicReference<>(); - - private final List releasables = new ArrayList<>(); + public final AtomicReference searchResponse = new AtomicReference<>(); public MockSearchPhaseContext(int numShards) { + super( + "mock", + logger, + new NamedWriteableRegistry(List.of()), + mock(SearchTransportService.class), + (clusterAlias, nodeId) -> null, + null, + null, + Runnable::run, + new SearchRequest(), + ActionListener.noop(), + new GroupShardsIterator(List.of()), + null, + ClusterState.EMPTY_STATE, + new SearchTask(0, "n/a", "n/a", () -> "test", null, Collections.emptyMap()), + new ArraySearchPhaseResults<>(numShards), + 5, + null + ); this.numShards = numShards; numSuccess = new AtomicInteger(numShards); } @@ -59,28 +80,9 @@ public void assertNoFailure() { } } - @Override - public int getNumShards() { - return numShards; - } - - @Override - public Logger getLogger() { - return logger; - } - - @Override - public SearchTask getTask() { - return new SearchTask(0, "n/a", "n/a", () -> "test", null, Collections.emptyMap()); - } - - @Override - public SearchRequest getRequest() { - return searchRequest; - } - @Override public OriginalIndices getOriginalIndices(int shardIndex) { + var searchRequest = getRequest(); return new OriginalIndices(searchRequest.indices(), searchRequest.indicesOptions()); } @@ -122,8 +124,8 @@ public void onShardFailure(int shardIndex, @Nullable SearchShardTarget shardTarg } @Override - public Transport.Connection getConnection(String clusterAlias, String nodeId) { - return null; // null is ok here for this test + protected SearchPhase getNextPhase() { + return null; } @Override @@ -143,13 +145,13 @@ public void executeNextPhase(SearchPhase currentPhase, Supplier nex } @Override - public void addReleasable(Releasable releasable) { - releasables.add(releasable); - } - - @Override - public void execute(Runnable command) { - command.run(); + protected void executePhaseOnShard( + SearchShardIterator shardIt, + SearchShardTarget shard, + SearchActionListener listener + ) { + onShardResult(new SearchPhaseResult() { + }, shardIt); } @Override diff --git a/server/src/test/java/org/elasticsearch/action/search/RankFeaturePhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/RankFeaturePhaseTests.java index a4201716d31e2..3d104758b096b 100644 --- a/server/src/test/java/org/elasticsearch/action/search/RankFeaturePhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/RankFeaturePhaseTests.java @@ -155,6 +155,7 @@ public void sendExecuteRankFeature( rankFeaturePhase.rankPhaseResults.close(); } } finally { + mockSearchPhaseContext.results.close(); if (mockSearchPhaseContext.searchResponse.get() != null) { mockSearchPhaseContext.searchResponse.get().decRef(); } @@ -281,6 +282,7 @@ public void sendExecuteRankFeature( rankFeaturePhase.rankPhaseResults.close(); } } finally { + mockSearchPhaseContext.results.close(); if (mockSearchPhaseContext.searchResponse.get() != null) { mockSearchPhaseContext.searchResponse.get().decRef(); } @@ -385,6 +387,7 @@ public void sendExecuteRankFeature( rankFeaturePhase.rankPhaseResults.close(); } } finally { + mockSearchPhaseContext.results.close(); if (mockSearchPhaseContext.searchResponse.get() != null) { mockSearchPhaseContext.searchResponse.get().decRef(); } @@ -480,6 +483,7 @@ public void moveToNextPhase( rankFeaturePhase.rankPhaseResults.close(); } } finally { + mockSearchPhaseContext.results.close(); if (mockSearchPhaseContext.searchResponse.get() != null) { mockSearchPhaseContext.searchResponse.get().decRef(); } @@ -626,6 +630,7 @@ public void sendExecuteRankFeature( rankFeaturePhase.rankPhaseResults.close(); } } finally { + mockSearchPhaseContext.results.close(); if (mockSearchPhaseContext.searchResponse.get() != null) { mockSearchPhaseContext.searchResponse.get().decRef(); } @@ -762,6 +767,7 @@ public void sendExecuteRankFeature( rankFeaturePhase.rankPhaseResults.close(); } } finally { + mockSearchPhaseContext.results.close(); if (mockSearchPhaseContext.searchResponse.get() != null) { mockSearchPhaseContext.searchResponse.get().decRef(); } From 7369c0818df0166ee18d50f5a1d9be0ba0bc005b Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Fri, 8 Nov 2024 09:14:19 -0500 Subject: [PATCH 08/39] Add new multi_dense_vector field for brute-force search (#116275) This adds a new `multi_dense_vector` field that focuses on the maxSim usecase provided by Col[BERT|Pali]. Indexing vectors in HNSW as it stands makes no sense. Performance wise or for cost. However, we should totally support rescoring and brute-force search over vectors with maxSim. This is step one of many. Behind a feature flag, this adds support for indexing any number of vectors of the same dimension. Supports bit/byte/float. Scripting support will be a follow up. Marking as non-issue as its behind a flag and unusable currently. --- .../search.vectors/30_multi_dense_vector.yml | 141 +++++ .../vectors/DenseVectorFieldMapper.java | 120 +++-- .../vectors/MultiDenseVectorFieldMapper.java | 431 +++++++++++++++ .../vectors/MultiVectorDVLeafFieldData.java | 54 ++ .../vectors/MultiVectorIndexFieldData.java | 114 ++++ .../elasticsearch/indices/IndicesModule.java | 4 + .../action/search/SearchCapabilities.java | 39 +- .../MultiDenseVectorFieldMapperTests.java | 506 ++++++++++++++++++ .../MultiDenseVectorFieldTypeTests.java | 105 ++++ .../aggregations/AggregatorTestCase.java | 2 + 10 files changed, 1451 insertions(+), 65 deletions(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/30_multi_dense_vector.yml create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapper.java create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorDVLeafFieldData.java create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorIndexFieldData.java create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapperTests.java create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldTypeTests.java diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/30_multi_dense_vector.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/30_multi_dense_vector.yml new file mode 100644 index 0000000000000..80d1d25dfcbd8 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/30_multi_dense_vector.yml @@ -0,0 +1,141 @@ +setup: + - requires: + capabilities: + - method: POST + path: /_search + capabilities: [ multi_dense_vector_field_mapper ] + test_runner_features: capabilities + reason: "Support for multi dense vector field mapper capability required" +--- +"Test create multi-vector field": + - do: + indices.create: + index: test + body: + mappings: + properties: + vector1: + type: multi_dense_vector + dims: 3 + - do: + index: + index: test + id: "1" + body: + vector1: [[2, -1, 1]] + - do: + index: + index: test + id: "2" + body: + vector1: [[2, -1, 1], [3, 4, 5]] + - do: + index: + index: test + id: "3" + body: + vector1: [[2, -1, 1], [3, 4, 5], [6, 7, 8]] + - do: + indices.refresh: {} +--- +"Test create dynamic dim multi-vector field": + - do: + indices.create: + index: test + body: + mappings: + properties: + name: + type: keyword + vector1: + type: multi_dense_vector + - do: + index: + index: test + id: "1" + body: + vector1: [[2, -1, 1]] + - do: + index: + index: test + id: "2" + body: + vector1: [[2, -1, 1], [3, 4, 5]] + - do: + index: + index: test + id: "3" + body: + vector1: [[2, -1, 1], [3, 4, 5], [6, 7, 8]] + - do: + cluster.health: + wait_for_events: languid + + # verify some other dimension will fail + - do: + catch: bad_request + index: + index: test + id: "4" + body: + vector1: [[2, -1, 1], [3, 4, 5], [6, 7, 8, 9]] +--- +"Test dynamic dim mismatch fails multi-vector field": + - do: + indices.create: + index: test + body: + mappings: + properties: + vector1: + type: multi_dense_vector + - do: + catch: bad_request + index: + index: test + id: "1" + body: + vector1: [[2, -1, 1], [2]] +--- +"Test static dim mismatch fails multi-vector field": + - do: + indices.create: + index: test + body: + mappings: + properties: + vector1: + type: multi_dense_vector + dims: 3 + - do: + catch: bad_request + index: + index: test + id: "1" + body: + vector1: [[2, -1, 1], [2]] +--- +"Test poorly formatted multi-vector field": + - do: + indices.create: + index: poorly_formatted_vector + body: + mappings: + properties: + vector1: + type: multi_dense_vector + dims: 3 + - do: + catch: bad_request + index: + index: poorly_formatted_vector + id: "1" + body: + vector1: [[[2, -1, 1]]] + - do: + catch: bad_request + index: + index: poorly_formatted_vector + id: "1" + body: + vector1: [[2, -1, 1], [[2, -1, 1]]] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 1c61dcec906a1..dea9368a9377e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -416,13 +416,18 @@ public double computeSquaredMagnitude(VectorData vectorData) { return VectorUtil.dotProduct(vectorData.asByteVector(), vectorData.asByteVector()); } - private VectorData parseVectorArray(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { + private VectorData parseVectorArray( + DocumentParserContext context, + int dims, + IntBooleanConsumer dimChecker, + VectorSimilarity similarity + ) throws IOException { int index = 0; - byte[] vector = new byte[fieldMapper.fieldType().dims]; + byte[] vector = new byte[dims]; float squaredMagnitude = 0; for (XContentParser.Token token = context.parser().nextToken(); token != Token.END_ARRAY; token = context.parser() .nextToken()) { - fieldMapper.checkDimensionExceeded(index, context); + dimChecker.accept(index, false); ensureExpectedToken(Token.VALUE_NUMBER, token, context.parser()); final int value; if (context.parser().numberType() != XContentParser.NumberType.INT) { @@ -460,30 +465,31 @@ private VectorData parseVectorArray(DocumentParserContext context, DenseVectorFi vector[index++] = (byte) value; squaredMagnitude += value * value; } - fieldMapper.checkDimensionMatches(index, context); - checkVectorMagnitude(fieldMapper.fieldType().similarity, errorByteElementsAppender(vector), squaredMagnitude); + dimChecker.accept(index, true); + checkVectorMagnitude(similarity, errorByteElementsAppender(vector), squaredMagnitude); return VectorData.fromBytes(vector); } - private VectorData parseHexEncodedVector(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { + private VectorData parseHexEncodedVector( + DocumentParserContext context, + IntBooleanConsumer dimChecker, + VectorSimilarity similarity + ) throws IOException { byte[] decodedVector = HexFormat.of().parseHex(context.parser().text()); - fieldMapper.checkDimensionMatches(decodedVector.length, context); + dimChecker.accept(decodedVector.length, true); VectorData vectorData = VectorData.fromBytes(decodedVector); double squaredMagnitude = computeSquaredMagnitude(vectorData); - checkVectorMagnitude( - fieldMapper.fieldType().similarity, - errorByteElementsAppender(decodedVector), - (float) squaredMagnitude - ); + checkVectorMagnitude(similarity, errorByteElementsAppender(decodedVector), (float) squaredMagnitude); return vectorData; } @Override - VectorData parseKnnVector(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { + VectorData parseKnnVector(DocumentParserContext context, int dims, IntBooleanConsumer dimChecker, VectorSimilarity similarity) + throws IOException { XContentParser.Token token = context.parser().currentToken(); return switch (token) { - case START_ARRAY -> parseVectorArray(context, fieldMapper); - case VALUE_STRING -> parseHexEncodedVector(context, fieldMapper); + case START_ARRAY -> parseVectorArray(context, dims, dimChecker, similarity); + case VALUE_STRING -> parseHexEncodedVector(context, dimChecker, similarity); default -> throw new ParsingException( context.parser().getTokenLocation(), format("Unsupported type [%s] for provided value [%s]", token, context.parser().text()) @@ -493,7 +499,13 @@ VectorData parseKnnVector(DocumentParserContext context, DenseVectorFieldMapper @Override public void parseKnnVectorAndIndex(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { - VectorData vectorData = parseKnnVector(context, fieldMapper); + VectorData vectorData = parseKnnVector(context, fieldMapper.fieldType().dims, (i, end) -> { + if (end) { + fieldMapper.checkDimensionMatches(i, context); + } else { + fieldMapper.checkDimensionExceeded(i, context); + } + }, fieldMapper.fieldType().similarity); Field field = createKnnVectorField( fieldMapper.fieldType().name(), vectorData.asByteVector(), @@ -677,21 +689,22 @@ && isNotUnitVector(squaredMagnitude)) { } @Override - VectorData parseKnnVector(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { + VectorData parseKnnVector(DocumentParserContext context, int dims, IntBooleanConsumer dimChecker, VectorSimilarity similarity) + throws IOException { int index = 0; float squaredMagnitude = 0; - float[] vector = new float[fieldMapper.fieldType().dims]; + float[] vector = new float[dims]; for (Token token = context.parser().nextToken(); token != Token.END_ARRAY; token = context.parser().nextToken()) { - fieldMapper.checkDimensionExceeded(index, context); + dimChecker.accept(index, false); ensureExpectedToken(Token.VALUE_NUMBER, token, context.parser()); float value = context.parser().floatValue(true); vector[index] = value; squaredMagnitude += value * value; index++; } - fieldMapper.checkDimensionMatches(index, context); + dimChecker.accept(index, true); checkVectorBounds(vector); - checkVectorMagnitude(fieldMapper.fieldType().similarity, errorFloatElementsAppender(vector), squaredMagnitude); + checkVectorMagnitude(similarity, errorFloatElementsAppender(vector), squaredMagnitude); return VectorData.fromFloats(vector); } @@ -816,12 +829,17 @@ public double computeSquaredMagnitude(VectorData vectorData) { return count; } - private VectorData parseVectorArray(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { + private VectorData parseVectorArray( + DocumentParserContext context, + int dims, + IntBooleanConsumer dimChecker, + VectorSimilarity similarity + ) throws IOException { int index = 0; - byte[] vector = new byte[fieldMapper.fieldType().dims / Byte.SIZE]; + byte[] vector = new byte[dims / Byte.SIZE]; for (XContentParser.Token token = context.parser().nextToken(); token != Token.END_ARRAY; token = context.parser() .nextToken()) { - fieldMapper.checkDimensionExceeded(index, context); + dimChecker.accept(index * Byte.SIZE, false); ensureExpectedToken(Token.VALUE_NUMBER, token, context.parser()); final int value; if (context.parser().numberType() != XContentParser.NumberType.INT) { @@ -856,35 +874,25 @@ private VectorData parseVectorArray(DocumentParserContext context, DenseVectorFi + "];" ); } - if (index >= vector.length) { - throw new IllegalArgumentException( - "The number of dimensions for field [" - + fieldMapper.fieldType().name() - + "] should be [" - + fieldMapper.fieldType().dims - + "] but found [" - + (index + 1) * Byte.SIZE - + "]" - ); - } vector[index++] = (byte) value; } - fieldMapper.checkDimensionMatches(index * Byte.SIZE, context); + dimChecker.accept(index * Byte.SIZE, true); return VectorData.fromBytes(vector); } - private VectorData parseHexEncodedVector(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { + private VectorData parseHexEncodedVector(DocumentParserContext context, IntBooleanConsumer dimChecker) throws IOException { byte[] decodedVector = HexFormat.of().parseHex(context.parser().text()); - fieldMapper.checkDimensionMatches(decodedVector.length * Byte.SIZE, context); + dimChecker.accept(decodedVector.length * Byte.SIZE, true); return VectorData.fromBytes(decodedVector); } @Override - VectorData parseKnnVector(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { + VectorData parseKnnVector(DocumentParserContext context, int dims, IntBooleanConsumer dimChecker, VectorSimilarity similarity) + throws IOException { XContentParser.Token token = context.parser().currentToken(); return switch (token) { - case START_ARRAY -> parseVectorArray(context, fieldMapper); - case VALUE_STRING -> parseHexEncodedVector(context, fieldMapper); + case START_ARRAY -> parseVectorArray(context, dims, dimChecker, similarity); + case VALUE_STRING -> parseHexEncodedVector(context, dimChecker); default -> throw new ParsingException( context.parser().getTokenLocation(), format("Unsupported type [%s] for provided value [%s]", token, context.parser().text()) @@ -894,7 +902,13 @@ VectorData parseKnnVector(DocumentParserContext context, DenseVectorFieldMapper @Override public void parseKnnVectorAndIndex(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException { - VectorData vectorData = parseKnnVector(context, fieldMapper); + VectorData vectorData = parseKnnVector(context, fieldMapper.fieldType().dims, (i, end) -> { + if (end) { + fieldMapper.checkDimensionMatches(i, context); + } else { + fieldMapper.checkDimensionExceeded(i, context); + } + }, fieldMapper.fieldType().similarity); Field field = createKnnVectorField( fieldMapper.fieldType().name(), vectorData.asByteVector(), @@ -958,7 +972,12 @@ public void checkDimensions(Integer dvDims, int qvDims) { abstract void parseKnnVectorAndIndex(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException; - abstract VectorData parseKnnVector(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException; + abstract VectorData parseKnnVector( + DocumentParserContext context, + int dims, + IntBooleanConsumer dimChecker, + VectorSimilarity similarity + ) throws IOException; abstract int getNumBytes(int dimensions); @@ -2180,7 +2199,13 @@ private void parseBinaryDocValuesVectorAndIndex(DocumentParserContext context) t : elementType.getNumBytes(dims); ByteBuffer byteBuffer = elementType.createByteBuffer(indexCreatedVersion, numBytes); - VectorData vectorData = elementType.parseKnnVector(context, this); + VectorData vectorData = elementType.parseKnnVector(context, dims, (i, b) -> { + if (b) { + checkDimensionMatches(i, context); + } else { + checkDimensionExceeded(i, context); + } + }, fieldType().similarity); vectorData.addToBuffer(byteBuffer); if (indexCreatedVersion.onOrAfter(MAGNITUDE_STORED_INDEX_VERSION)) { // encode vector magnitude at the end @@ -2433,4 +2458,11 @@ public String fieldName() { return fullPath(); } } + + /** + * @FunctionalInterface for a function that takes a int and boolean + */ + interface IntBooleanConsumer { + void accept(int value, boolean isComplete); + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapper.java new file mode 100644 index 0000000000000..b23a1f1f66792 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapper.java @@ -0,0 +1,431 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.mapper.ArraySourceValueFetcher; +import org.elasticsearch.index.mapper.DocumentParserContext; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.SimpleMappedFieldType; +import org.elasticsearch.index.mapper.SourceLoader; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.vectors.VectorData; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT_BIT; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.namesToElementType; + +public class MultiDenseVectorFieldMapper extends FieldMapper { + + public static final String VECTOR_MAGNITUDES_SUFFIX = "._magnitude"; + public static final FeatureFlag FEATURE_FLAG = new FeatureFlag("multi_dense_vector"); + public static final String CONTENT_TYPE = "multi_dense_vector"; + + private static MultiDenseVectorFieldMapper toType(FieldMapper in) { + return (MultiDenseVectorFieldMapper) in; + } + + public static class Builder extends FieldMapper.Builder { + + private final Parameter elementType = new Parameter<>( + "element_type", + false, + () -> DenseVectorFieldMapper.ElementType.FLOAT, + (n, c, o) -> { + DenseVectorFieldMapper.ElementType elementType = namesToElementType.get((String) o); + if (elementType == null) { + throw new MapperParsingException( + "invalid element_type [" + o + "]; available types are " + namesToElementType.keySet() + ); + } + return elementType; + }, + m -> toType(m).fieldType().elementType, + XContentBuilder::field, + Objects::toString + ); + + // This is defined as updatable because it can be updated once, from [null] to a valid dim size, + // by a dynamic mapping update. Once it has been set, however, the value cannot be changed. + private final Parameter dims = new Parameter<>("dims", true, () -> null, (n, c, o) -> { + if (o instanceof Integer == false) { + throw new MapperParsingException("Property [dims] on field [" + n + "] must be an integer but got [" + o + "]"); + } + + return XContentMapValues.nodeIntegerValue(o); + }, m -> toType(m).fieldType().dims, XContentBuilder::field, Object::toString).setSerializerCheck((id, ic, v) -> v != null) + .setMergeValidator((previous, current, c) -> previous == null || Objects.equals(previous, current)) + .addValidator(dims -> { + if (dims == null) { + return; + } + int maxDims = elementType.getValue() == DenseVectorFieldMapper.ElementType.BIT ? MAX_DIMS_COUNT_BIT : MAX_DIMS_COUNT; + int minDims = elementType.getValue() == DenseVectorFieldMapper.ElementType.BIT ? Byte.SIZE : 1; + if (dims < minDims || dims > maxDims) { + throw new MapperParsingException( + "The number of dimensions should be in the range [" + minDims + ", " + maxDims + "] but was [" + dims + "]" + ); + } + if (elementType.getValue() == DenseVectorFieldMapper.ElementType.BIT) { + if (dims % Byte.SIZE != 0) { + throw new MapperParsingException("The number of dimensions for should be a multiple of 8 but was [" + dims + "]"); + } + } + }); + private final Parameter> meta = Parameter.metaParam(); + + private final IndexVersion indexCreatedVersion; + + public Builder(String name, IndexVersion indexCreatedVersion) { + super(name); + this.indexCreatedVersion = indexCreatedVersion; + } + + @Override + protected Parameter[] getParameters() { + return new Parameter[] { elementType, dims, meta }; + } + + public MultiDenseVectorFieldMapper.Builder dimensions(int dimensions) { + this.dims.setValue(dimensions); + return this; + } + + public MultiDenseVectorFieldMapper.Builder elementType(DenseVectorFieldMapper.ElementType elementType) { + this.elementType.setValue(elementType); + return this; + } + + @Override + public MultiDenseVectorFieldMapper build(MapperBuilderContext context) { + // Validate again here because the dimensions or element type could have been set programmatically, + // which affects index option validity + validate(); + return new MultiDenseVectorFieldMapper( + leafName(), + new MultiDenseVectorFieldType( + context.buildFullName(leafName()), + elementType.getValue(), + dims.getValue(), + indexCreatedVersion, + meta.getValue() + ), + builderParams(this, context), + indexCreatedVersion + ); + } + } + + public static final TypeParser PARSER = new TypeParser( + (n, c) -> new MultiDenseVectorFieldMapper.Builder(n, c.indexVersionCreated()), + notInMultiFields(CONTENT_TYPE) + ); + + public static final class MultiDenseVectorFieldType extends SimpleMappedFieldType { + private final DenseVectorFieldMapper.ElementType elementType; + private final Integer dims; + private final IndexVersion indexCreatedVersion; + + public MultiDenseVectorFieldType( + String name, + DenseVectorFieldMapper.ElementType elementType, + Integer dims, + IndexVersion indexCreatedVersion, + Map meta + ) { + super(name, false, false, true, TextSearchInfo.NONE, meta); + this.elementType = elementType; + this.dims = dims; + this.indexCreatedVersion = indexCreatedVersion; + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + return new ArraySourceValueFetcher(name(), context) { + @Override + protected Object parseSourceValue(Object value) { + return value; + } + }; + } + + @Override + public DocValueFormat docValueFormat(String format, ZoneId timeZone) { + throw new IllegalArgumentException( + "Field [" + name() + "] of type [" + typeName() + "] doesn't support docvalue_fields or aggregations" + ); + } + + @Override + public boolean isAggregatable() { + return false; + } + + @Override + public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { + return new MultiVectorIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD, indexCreatedVersion, dims, elementType); + } + + @Override + public Query existsQuery(SearchExecutionContext context) { + return new FieldExistsQuery(name()); + } + + @Override + public Query termQuery(Object value, SearchExecutionContext context) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support term queries"); + } + + int getVectorDimensions() { + return dims; + } + + DenseVectorFieldMapper.ElementType getElementType() { + return elementType; + } + } + + private final IndexVersion indexCreatedVersion; + + private MultiDenseVectorFieldMapper( + String simpleName, + MappedFieldType fieldType, + BuilderParams params, + IndexVersion indexCreatedVersion + ) { + super(simpleName, fieldType, params); + this.indexCreatedVersion = indexCreatedVersion; + } + + @Override + public MultiDenseVectorFieldType fieldType() { + return (MultiDenseVectorFieldType) super.fieldType(); + } + + @Override + public boolean parsesArrayValue() { + return true; + } + + @Override + public void parse(DocumentParserContext context) throws IOException { + if (context.doc().getByKey(fieldType().name()) != null) { + throw new IllegalArgumentException( + "Field [" + + fullPath() + + "] of type [" + + typeName() + + "] doesn't support indexing multiple values for the same field in the same document" + ); + } + if (XContentParser.Token.VALUE_NULL == context.parser().currentToken()) { + return; + } + if (XContentParser.Token.START_ARRAY != context.parser().currentToken()) { + throw new IllegalArgumentException( + "Field [" + fullPath() + "] of type [" + typeName() + "] cannot be indexed with a single value" + ); + } + if (fieldType().dims == null) { + int currentDims = -1; + while (XContentParser.Token.END_ARRAY != context.parser().nextToken()) { + int dims = fieldType().elementType.parseDimensionCount(context); + if (currentDims == -1) { + currentDims = dims; + } else if (currentDims != dims) { + throw new IllegalArgumentException( + "Field [" + fullPath() + "] of type [" + typeName() + "] cannot be indexed with vectors of different dimensions" + ); + } + } + MultiDenseVectorFieldType updatedFieldType = new MultiDenseVectorFieldType( + fieldType().name(), + fieldType().elementType, + currentDims, + indexCreatedVersion, + fieldType().meta() + ); + Mapper update = new MultiDenseVectorFieldMapper(leafName(), updatedFieldType, builderParams, indexCreatedVersion); + context.addDynamicMapper(update); + return; + } + int dims = fieldType().dims; + DenseVectorFieldMapper.ElementType elementType = fieldType().elementType; + List vectors = new ArrayList<>(); + while (XContentParser.Token.END_ARRAY != context.parser().nextToken()) { + VectorData vector = elementType.parseKnnVector(context, dims, (i, b) -> { + if (b) { + checkDimensionMatches(i, context); + } else { + checkDimensionExceeded(i, context); + } + }, null); + vectors.add(vector); + } + int bufferSize = elementType.getNumBytes(dims) * vectors.size(); + ByteBuffer buffer = ByteBuffer.allocate(bufferSize).order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer magnitudeBuffer = ByteBuffer.allocate(vectors.size() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN); + for (VectorData vector : vectors) { + vector.addToBuffer(buffer); + magnitudeBuffer.putFloat((float) Math.sqrt(elementType.computeSquaredMagnitude(vector))); + } + String vectorFieldName = fieldType().name(); + String vectorMagnitudeFieldName = vectorFieldName + VECTOR_MAGNITUDES_SUFFIX; + context.doc().addWithKey(vectorFieldName, new BinaryDocValuesField(vectorFieldName, new BytesRef(buffer.array()))); + context.doc() + .addWithKey( + vectorMagnitudeFieldName, + new BinaryDocValuesField(vectorMagnitudeFieldName, new BytesRef(magnitudeBuffer.array())) + ); + } + + private void checkDimensionExceeded(int index, DocumentParserContext context) { + if (index >= fieldType().dims) { + throw new IllegalArgumentException( + "The [" + + typeName() + + "] field [" + + fullPath() + + "] in doc [" + + context.documentDescription() + + "] has more dimensions " + + "than defined in the mapping [" + + fieldType().dims + + "]" + ); + } + } + + private void checkDimensionMatches(int index, DocumentParserContext context) { + if (index != fieldType().dims) { + throw new IllegalArgumentException( + "The [" + + typeName() + + "] field [" + + fullPath() + + "] in doc [" + + context.documentDescription() + + "] has a different number of dimensions " + + "[" + + index + + "] than defined in the mapping [" + + fieldType().dims + + "]" + ); + } + } + + @Override + protected void parseCreateField(DocumentParserContext context) { + throw new AssertionError("parse is implemented directly"); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return new MultiDenseVectorFieldMapper.Builder(leafName(), indexCreatedVersion).init(this); + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport() { + return new SyntheticSourceSupport.Native(new MultiDenseVectorFieldMapper.DocValuesSyntheticFieldLoader()); + } + + private class DocValuesSyntheticFieldLoader extends SourceLoader.DocValuesBasedSyntheticFieldLoader { + private BinaryDocValues values; + private boolean hasValue; + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + values = leafReader.getBinaryDocValues(fullPath()); + if (values == null) { + return null; + } + return docId -> { + hasValue = docId == values.advance(docId); + return hasValue; + }; + } + + @Override + public boolean hasValue() { + return hasValue; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (false == hasValue) { + return; + } + b.startArray(leafName()); + BytesRef ref = values.binaryValue(); + ByteBuffer byteBuffer = ByteBuffer.wrap(ref.bytes, ref.offset, ref.length).order(ByteOrder.LITTLE_ENDIAN); + assert ref.length % fieldType().elementType.getNumBytes(fieldType().dims) == 0; + int numVecs = ref.length / fieldType().elementType.getNumBytes(fieldType().dims); + for (int i = 0; i < numVecs; i++) { + b.startArray(); + int dims = fieldType().elementType == DenseVectorFieldMapper.ElementType.BIT + ? fieldType().dims / Byte.SIZE + : fieldType().dims; + for (int dim = 0; dim < dims; dim++) { + fieldType().elementType.readAndWriteValue(byteBuffer, b); + } + b.endArray(); + } + b.endArray(); + } + + @Override + public String fieldName() { + return fullPath(); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorDVLeafFieldData.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorDVLeafFieldData.java new file mode 100644 index 0000000000000..cc6fb38274451 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorDVLeafFieldData.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.apache.lucene.index.LeafReader; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.fielddata.LeafFieldData; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.script.field.DocValuesScriptFieldFactory; + +final class MultiVectorDVLeafFieldData implements LeafFieldData { + private final LeafReader reader; + private final String field; + private final IndexVersion indexVersion; + private final DenseVectorFieldMapper.ElementType elementType; + private final int dims; + + MultiVectorDVLeafFieldData( + LeafReader reader, + String field, + IndexVersion indexVersion, + DenseVectorFieldMapper.ElementType elementType, + int dims + ) { + this.reader = reader; + this.field = field; + this.indexVersion = indexVersion; + this.elementType = elementType; + this.dims = dims; + } + + @Override + public DocValuesScriptFieldFactory getScriptFieldFactory(String name) { + // TODO + return null; + } + + @Override + public SortedBinaryDocValues getBytesValues() { + throw new UnsupportedOperationException("String representation of doc values for multi-vector fields is not supported"); + } + + @Override + public long ramBytesUsed() { + return 0; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorIndexFieldData.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorIndexFieldData.java new file mode 100644 index 0000000000000..65ef492ce052b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/MultiVectorIndexFieldData.java @@ -0,0 +1,114 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.SortField; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.MultiValueMode; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; +import org.elasticsearch.search.sort.BucketedSort; +import org.elasticsearch.search.sort.SortOrder; + +public class MultiVectorIndexFieldData implements IndexFieldData { + protected final String fieldName; + protected final ValuesSourceType valuesSourceType; + private final int dims; + private final IndexVersion indexVersion; + private final DenseVectorFieldMapper.ElementType elementType; + + public MultiVectorIndexFieldData( + String fieldName, + int dims, + ValuesSourceType valuesSourceType, + IndexVersion indexVersion, + DenseVectorFieldMapper.ElementType elementType + ) { + this.fieldName = fieldName; + this.valuesSourceType = valuesSourceType; + this.indexVersion = indexVersion; + this.elementType = elementType; + this.dims = dims; + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public ValuesSourceType getValuesSourceType() { + return valuesSourceType; + } + + @Override + public MultiVectorDVLeafFieldData load(LeafReaderContext context) { + return new MultiVectorDVLeafFieldData(context.reader(), fieldName, indexVersion, elementType, dims); + } + + @Override + public MultiVectorDVLeafFieldData loadDirect(LeafReaderContext context) throws Exception { + return load(context); + } + + @Override + public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { + throw new IllegalArgumentException( + "Field [" + fieldName + "] of type [" + MultiDenseVectorFieldMapper.CONTENT_TYPE + "] doesn't support sort" + ); + } + + @Override + public BucketedSort newBucketedSort( + BigArrays bigArrays, + Object missingValue, + MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, + SortOrder sortOrder, + DocValueFormat format, + int bucketSize, + BucketedSort.ExtraData extra + ) { + throw new IllegalArgumentException("only supported on numeric fields"); + } + + public static class Builder implements IndexFieldData.Builder { + + private final String name; + private final ValuesSourceType valuesSourceType; + private final IndexVersion indexVersion; + private final int dims; + private final DenseVectorFieldMapper.ElementType elementType; + + public Builder( + String name, + ValuesSourceType valuesSourceType, + IndexVersion indexVersion, + int dims, + DenseVectorFieldMapper.ElementType elementType + ) { + this.name = name; + this.valuesSourceType = valuesSourceType; + this.indexVersion = indexVersion; + this.dims = dims; + this.elementType = elementType; + } + + @Override + public IndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) { + return new MultiVectorIndexFieldData(name, dims, valuesSourceType, indexVersion, elementType); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 09be98630d5c4..340bff4e1c852 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -67,6 +67,7 @@ import org.elasticsearch.index.mapper.VersionFieldMapper; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.MultiDenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.seqno.RetentionLeaseBackgroundSyncAction; import org.elasticsearch.index.seqno.RetentionLeaseSyncAction; @@ -210,6 +211,9 @@ public static Map getMappers(List mappe mappers.put(DenseVectorFieldMapper.CONTENT_TYPE, DenseVectorFieldMapper.PARSER); mappers.put(SparseVectorFieldMapper.CONTENT_TYPE, SparseVectorFieldMapper.PARSER); + if (MultiDenseVectorFieldMapper.FEATURE_FLAG.isEnabled()) { + mappers.put(MultiDenseVectorFieldMapper.CONTENT_TYPE, MultiDenseVectorFieldMapper.PARSER); + } for (MapperPlugin mapperPlugin : mapperPlugins) { for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) { diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index d1039a6ddc74e..338dabb23ab4f 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -10,9 +10,9 @@ package org.elasticsearch.rest.action.search; import org.elasticsearch.Build; -import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.index.mapper.vectors.MultiDenseVectorFieldMapper; -import java.util.Collections; +import java.util.HashSet; import java.util.Set; /** @@ -34,26 +34,23 @@ private SearchCapabilities() {} private static final String TRANSFORM_RANK_RRF_TO_RETRIEVER = "transform_rank_rrf_to_retriever"; /** Support kql query. */ private static final String KQL_QUERY_SUPPORTED = "kql_query"; - - public static final Set CAPABILITIES = capabilities(); - - private static Set capabilities() { - Set capabilities = Set.of( - RANGE_REGEX_INTERVAL_QUERY_CAPABILITY, - BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY, - BYTE_FLOAT_BIT_DOT_PRODUCT_CAPABILITY, - DENSE_VECTOR_DOCVALUE_FIELDS, - TRANSFORM_RANK_RRF_TO_RETRIEVER - ); - + /** Support multi-dense-vector field mapper. */ + private static final String MULTI_DENSE_VECTOR_FIELD_MAPPER = "multi_dense_vector_field_mapper"; + + public static final Set CAPABILITIES; + static { + HashSet capabilities = new HashSet<>(); + capabilities.add(RANGE_REGEX_INTERVAL_QUERY_CAPABILITY); + capabilities.add(BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY); + capabilities.add(BYTE_FLOAT_BIT_DOT_PRODUCT_CAPABILITY); + capabilities.add(DENSE_VECTOR_DOCVALUE_FIELDS); + capabilities.add(TRANSFORM_RANK_RRF_TO_RETRIEVER); + if (MultiDenseVectorFieldMapper.FEATURE_FLAG.isEnabled()) { + capabilities.add(MULTI_DENSE_VECTOR_FIELD_MAPPER); + } if (Build.current().isSnapshot()) { - return Collections.unmodifiableSet(Sets.union(capabilities, snapshotBuildCapabilities())); + capabilities.add(KQL_QUERY_SUPPORTED); } - - return capabilities; - } - - private static Set snapshotBuildCapabilities() { - return Set.of(KQL_QUERY_SUPPORTED); + CAPABILITIES = Set.copyOf(capabilities); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapperTests.java new file mode 100644 index 0000000000000..6a890328732ca --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldMapperTests.java @@ -0,0 +1,506 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.DocumentParsingException; +import org.elasticsearch.index.mapper.LuceneDocument; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperTestCase; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.search.lookup.SourceProvider; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.junit.AssumptionViolatedException; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.FloatBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class MultiDenseVectorFieldMapperTests extends MapperTestCase { + + @BeforeClass + public static void setup() { + assumeTrue("Requires multi-dense vector support", MultiDenseVectorFieldMapper.FEATURE_FLAG.isEnabled()); + } + + private final ElementType elementType; + private final int dims; + + public MultiDenseVectorFieldMapperTests() { + this.elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT); + this.dims = ElementType.BIT == elementType ? 4 * Byte.SIZE : 4; + } + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + indexMapping(b, IndexVersion.current()); + } + + @Override + protected void minimalMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { + indexMapping(b, indexVersion); + } + + private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { + b.field("type", "multi_dense_vector").field("dims", dims); + if (elementType != ElementType.FLOAT) { + b.field("element_type", elementType.toString()); + } + } + + @Override + protected Object getSampleValueForDocument() { + int numVectors = randomIntBetween(1, 16); + return Stream.generate( + () -> elementType == ElementType.FLOAT ? List.of(0.5, 0.5, 0.5, 0.5) : List.of((byte) 1, (byte) 1, (byte) 1, (byte) 1) + ).limit(numVectors).toList(); + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + checker.registerConflictCheck( + "dims", + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims)), + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims + 8)) + ); + checker.registerConflictCheck( + "element_type", + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims).field("element_type", "byte")), + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims).field("element_type", "float")) + ); + checker.registerConflictCheck( + "element_type", + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims).field("element_type", "float")), + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims * 8).field("element_type", "bit")) + ); + checker.registerConflictCheck( + "element_type", + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims).field("element_type", "byte")), + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", dims * 8).field("element_type", "bit")) + ); + } + + @Override + protected boolean supportsStoredFields() { + return false; + } + + @Override + protected boolean supportsIgnoreMalformed() { + return false; + } + + @Override + protected void assertSearchable(MappedFieldType fieldType) { + assertThat(fieldType, instanceOf(MultiDenseVectorFieldMapper.MultiDenseVectorFieldType.class)); + assertFalse(fieldType.isIndexed()); + assertFalse(fieldType.isSearchable()); + } + + protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { + assertThat(query, instanceOf(FieldExistsQuery.class)); + FieldExistsQuery existsQuery = (FieldExistsQuery) query; + assertEquals("field", existsQuery.getField()); + assertNoFieldNamesField(fields); + } + + // We override this because dense vectors are the only field type that are not aggregatable but + // that do provide fielddata. TODO: resolve this inconsistency! + @Override + public void testAggregatableConsistency() {} + + public void testDims() { + { + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "multi_dense_vector"); + b.field("dims", 0); + }))); + assertThat( + e.getMessage(), + equalTo("Failed to parse mapping: " + "The number of dimensions should be in the range [1, 4096] but was [0]") + ); + } + // test max limit for non-indexed vectors + { + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "multi_dense_vector"); + b.field("dims", 5000); + }))); + assertThat( + e.getMessage(), + equalTo("Failed to parse mapping: " + "The number of dimensions should be in the range [1, 4096] but was [5000]") + ); + } + } + + public void testMergeDims() throws IOException { + XContentBuilder mapping = mapping(b -> { + b.startObject("field"); + b.field("type", "multi_dense_vector"); + b.endObject(); + }); + MapperService mapperService = createMapperService(mapping); + + mapping = mapping(b -> { + b.startObject("field"); + b.field("type", "multi_dense_vector").field("dims", dims); + b.endObject(); + }); + merge(mapperService, mapping); + assertEquals( + XContentHelper.convertToMap(BytesReference.bytes(mapping), false, mapping.contentType()).v2(), + XContentHelper.convertToMap(mapperService.documentMapper().mappingSource().uncompressed(), false, mapping.contentType()).v2() + ); + } + + public void testLargeDimsBit() throws IOException { + createMapperService(fieldMapping(b -> { + b.field("type", "multi_dense_vector"); + b.field("dims", 1024 * Byte.SIZE); + b.field("element_type", ElementType.BIT.toString()); + })); + } + + public void testNonIndexedVector() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", 3))); + + float[][] validVectors = { { -12.1f, 100.7f, -4 }, { 42f, .05f, -1f } }; + double[] dotProduct = new double[2]; + int vecId = 0; + for (float[] vector : validVectors) { + for (float value : vector) { + dotProduct[vecId] += value * value; + } + vecId++; + } + ParsedDocument doc1 = mapper.parse(source(b -> { + b.startArray("field"); + for (float[] vector : validVectors) { + b.startArray(); + for (float value : vector) { + b.value(value); + } + b.endArray(); + } + b.endArray(); + })); + + List fields = doc1.rootDoc().getFields("field"); + assertEquals(1, fields.size()); + assertThat(fields.get(0), instanceOf(BinaryDocValuesField.class)); + // assert that after decoding the indexed value is equal to expected + BytesRef vectorBR = fields.get(0).binaryValue(); + assertEquals(ElementType.FLOAT.getNumBytes(validVectors[0].length) * validVectors.length, vectorBR.length); + float[][] decodedValues = new float[validVectors.length][]; + for (int i = 0; i < validVectors.length; i++) { + decodedValues[i] = new float[validVectors[i].length]; + FloatBuffer fb = ByteBuffer.wrap(vectorBR.bytes, i * Float.BYTES * validVectors[i].length, Float.BYTES * validVectors[i].length) + .order(ByteOrder.LITTLE_ENDIAN) + .asFloatBuffer(); + fb.get(decodedValues[i]); + } + List magFields = doc1.rootDoc().getFields("field" + MultiDenseVectorFieldMapper.VECTOR_MAGNITUDES_SUFFIX); + assertEquals(1, magFields.size()); + assertThat(magFields.get(0), instanceOf(BinaryDocValuesField.class)); + BytesRef magBR = magFields.get(0).binaryValue(); + assertEquals(Float.BYTES * validVectors.length, magBR.length); + FloatBuffer fb = ByteBuffer.wrap(magBR.bytes, magBR.offset, magBR.length).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer(); + for (int i = 0; i < validVectors.length; i++) { + assertEquals((float) Math.sqrt(dotProduct[i]), fb.get(), 0.001f); + } + for (int i = 0; i < validVectors.length; i++) { + assertArrayEquals("Decoded dense vector values is not equal to the indexed one.", validVectors[i], decodedValues[i], 0.001f); + } + } + + public void testPoorlyIndexedVector() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", 3))); + + float[][] validVectors = { { -12.1f, 100.7f, -4 }, { 42f, .05f, -1f } }; + double[] dotProduct = new double[2]; + int vecId = 0; + for (float[] vector : validVectors) { + for (float value : vector) { + dotProduct[vecId] += value * value; + } + vecId++; + } + expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> { + b.startArray("field"); + b.startArray(); // double nested array should fail + for (float[] vector : validVectors) { + b.startArray(); + for (float value : vector) { + b.value(value); + } + b.endArray(); + } + b.endArray(); + b.endArray(); + }))); + } + + public void testInvalidParameters() { + + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", 3).field("element_type", "foo")) + ) + ); + assertThat(e.getMessage(), containsString("invalid element_type [foo]; available types are ")); + e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + fieldMapping(b -> b.field("type", "multi_dense_vector").field("dims", 3).startObject("foo").endObject()) + ) + ); + assertThat( + e.getMessage(), + containsString("Failed to parse mapping: unknown parameter [foo] on mapper [field] of type [multi_dense_vector]") + ); + } + + public void testDocumentsWithIncorrectDims() throws Exception { + int dims = 3; + XContentBuilder fieldMapping = fieldMapping(b -> { + b.field("type", "multi_dense_vector"); + b.field("dims", dims); + }); + + DocumentMapper mapper = createDocumentMapper(fieldMapping); + + // test that error is thrown when a document has number of dims more than defined in the mapping + float[][] invalidVector = new float[4][dims + 1]; + DocumentParsingException e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> { + b.startArray("field"); + for (float[] vector : invalidVector) { + b.startArray(); + for (float value : vector) { + b.value(value); + } + b.endArray(); + } + b.endArray(); + }))); + assertThat(e.getCause().getMessage(), containsString("has more dimensions than defined in the mapping [3]")); + + // test that error is thrown when a document has number of dims less than defined in the mapping + float[][] invalidVector2 = new float[4][dims - 1]; + DocumentParsingException e2 = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> { + b.startArray("field"); + for (float[] vector : invalidVector2) { + b.startArray(); + for (float value : vector) { + b.value(value); + } + b.endArray(); + } + b.endArray(); + }))); + assertThat(e2.getCause().getMessage(), containsString("has a different number of dimensions [2] than defined in the mapping [3]")); + // test that error is thrown when some of the vectors have correct number of dims, but others do not + DocumentParsingException e3 = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> { + b.startArray("field"); + for (float[] vector : new float[4][dims]) { + b.startArray(); + for (float value : vector) { + b.value(value); + } + b.endArray(); + } + for (float[] vector : invalidVector2) { + b.startArray(); + for (float value : vector) { + b.value(value); + } + b.endArray(); + } + b.endArray(); + }))); + assertThat(e3.getCause().getMessage(), containsString("has a different number of dimensions [2] than defined in the mapping [3]")); + } + + @Override + protected void assertFetchMany(MapperService mapperService, String field, Object value, String format, int count) throws IOException { + assumeFalse("Dense vectors currently don't support multiple values in the same field", false); + } + + /** + * Dense vectors don't support doc values or string representation (for doc value parser/fetching). + * We may eventually support that, but until then, we only verify that the parsing and fields fetching matches the provided value object + */ + @Override + protected void assertFetch(MapperService mapperService, String field, Object value, String format) throws IOException { + MappedFieldType ft = mapperService.fieldType(field); + MappedFieldType.FielddataOperation fdt = MappedFieldType.FielddataOperation.SEARCH; + SourceToParse source = source(b -> b.field(ft.name(), value)); + SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class); + when(searchExecutionContext.isSourceEnabled()).thenReturn(true); + when(searchExecutionContext.sourcePath(field)).thenReturn(Set.of(field)); + when(searchExecutionContext.getForField(ft, fdt)).thenAnswer(inv -> fieldDataLookup(mapperService).apply(ft, () -> { + throw new UnsupportedOperationException(); + }, fdt)); + ValueFetcher nativeFetcher = ft.valueFetcher(searchExecutionContext, format); + ParsedDocument doc = mapperService.documentMapper().parse(source); + withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> { + Source s = SourceProvider.fromStoredFields().getSource(ir.leaves().get(0), 0); + nativeFetcher.setNextReader(ir.leaves().get(0)); + List fromNative = nativeFetcher.fetchValues(s, 0, new ArrayList<>()); + MultiDenseVectorFieldMapper.MultiDenseVectorFieldType denseVectorFieldType = + (MultiDenseVectorFieldMapper.MultiDenseVectorFieldType) ft; + switch (denseVectorFieldType.getElementType()) { + case BYTE -> assumeFalse("byte element type testing not currently added", false); + case FLOAT -> { + List fetchedFloatsList = new ArrayList<>(); + for (var f : fromNative) { + float[] fetchedFloats = new float[denseVectorFieldType.getVectorDimensions()]; + assert f instanceof List; + List vector = (List) f; + int i = 0; + for (Object v : vector) { + assert v instanceof Number; + fetchedFloats[i++] = ((Number) v).floatValue(); + } + fetchedFloatsList.add(fetchedFloats); + } + float[][] fetchedFloats = fetchedFloatsList.toArray(new float[0][]); + assertThat("fetching " + value, fetchedFloats, equalTo(value)); + } + } + }); + } + + @Override + protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException { + b.field("type", "multi_dense_vector").field("dims", randomIntBetween(2, 4096)).field("element_type", "float"); + } + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + MultiDenseVectorFieldMapper.MultiDenseVectorFieldType vectorFieldType = (MultiDenseVectorFieldMapper.MultiDenseVectorFieldType) ft; + int numVectors = randomIntBetween(1, 16); + return switch (vectorFieldType.getElementType()) { + case BYTE -> { + byte[][] vectors = new byte[numVectors][vectorFieldType.getVectorDimensions()]; + for (int i = 0; i < numVectors; i++) { + vectors[i] = randomByteArrayOfLength(vectorFieldType.getVectorDimensions()); + } + yield vectors; + } + case FLOAT -> { + float[][] vectors = new float[numVectors][vectorFieldType.getVectorDimensions()]; + for (int i = 0; i < numVectors; i++) { + for (int j = 0; j < vectorFieldType.getVectorDimensions(); j++) { + vectors[i][j] = randomFloat(); + } + } + yield vectors; + } + case BIT -> { + byte[][] vectors = new byte[numVectors][vectorFieldType.getVectorDimensions() / 8]; + for (int i = 0; i < numVectors; i++) { + vectors[i] = randomByteArrayOfLength(vectorFieldType.getVectorDimensions() / 8); + } + yield vectors; + } + }; + } + + public void testCannotBeUsedInMultifields() { + Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { + b.field("type", "keyword"); + b.startObject("fields"); + b.startObject("vectors"); + minimalMapping(b); + b.endObject(); + b.endObject(); + }))); + assertThat(e.getMessage(), containsString("Field [vectors] of type [multi_dense_vector] can't be used in multifields")); + } + + @Override + protected IngestScriptSupport ingestScriptSupport() { + throw new AssumptionViolatedException("not supported"); + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { + return new DenseVectorSyntheticSourceSupport(); + } + + @Override + protected boolean supportsEmptyInputArray() { + return false; + } + + private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport { + private final int dims = between(5, 1000); + private final int numVecs = between(1, 16); + private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT); + + @Override + public SyntheticSourceExample example(int maxValues) { + Object value = switch (elementType) { + case BYTE, BIT: + yield randomList(numVecs, numVecs, () -> randomList(dims, dims, ESTestCase::randomByte)); + case FLOAT: + yield randomList(numVecs, numVecs, () -> randomList(dims, dims, ESTestCase::randomFloat)); + }; + return new SyntheticSourceExample(value, value, this::mapping); + } + + private void mapping(XContentBuilder b) throws IOException { + b.field("type", "multi_dense_vector"); + if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) { + b.field("element_type", elementType.toString()); + } + b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims); + } + + @Override + public List invalidExample() { + return List.of(); + } + } + + @Override + public void testSyntheticSourceKeepArrays() { + // The mapper expects to parse an array of values by default, it's not compatible with array of arrays. + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldTypeTests.java new file mode 100644 index 0000000000000..14cc63e31fa27 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorFieldTypeTests.java @@ -0,0 +1,105 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.vectors.MultiDenseVectorFieldMapper.MultiDenseVectorFieldType; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.BBQ_MIN_DIMS; + +public class MultiDenseVectorFieldTypeTests extends FieldTypeTestCase { + + @BeforeClass + public static void setup() { + assumeTrue("Requires multi-dense vector support", MultiDenseVectorFieldMapper.FEATURE_FLAG.isEnabled()); + } + + private MultiDenseVectorFieldType createFloatFieldType() { + return new MultiDenseVectorFieldType( + "f", + DenseVectorFieldMapper.ElementType.FLOAT, + BBQ_MIN_DIMS, + IndexVersion.current(), + Collections.emptyMap() + ); + } + + private MultiDenseVectorFieldType createByteFieldType() { + return new MultiDenseVectorFieldType( + "f", + DenseVectorFieldMapper.ElementType.BYTE, + 5, + IndexVersion.current(), + Collections.emptyMap() + ); + } + + public void testHasDocValues() { + MultiDenseVectorFieldType fft = createFloatFieldType(); + assertTrue(fft.hasDocValues()); + MultiDenseVectorFieldType bft = createByteFieldType(); + assertTrue(bft.hasDocValues()); + } + + public void testIsIndexed() { + MultiDenseVectorFieldType fft = createFloatFieldType(); + assertFalse(fft.isIndexed()); + MultiDenseVectorFieldType bft = createByteFieldType(); + assertFalse(bft.isIndexed()); + } + + public void testIsSearchable() { + MultiDenseVectorFieldType fft = createFloatFieldType(); + assertFalse(fft.isSearchable()); + MultiDenseVectorFieldType bft = createByteFieldType(); + assertFalse(bft.isSearchable()); + } + + public void testIsAggregatable() { + MultiDenseVectorFieldType fft = createFloatFieldType(); + assertFalse(fft.isAggregatable()); + MultiDenseVectorFieldType bft = createByteFieldType(); + assertFalse(bft.isAggregatable()); + } + + public void testFielddataBuilder() { + MultiDenseVectorFieldType fft = createFloatFieldType(); + FieldDataContext fdc = new FieldDataContext("test", null, () -> null, Set::of, MappedFieldType.FielddataOperation.SCRIPT); + assertNotNull(fft.fielddataBuilder(fdc)); + + MultiDenseVectorFieldType bft = createByteFieldType(); + FieldDataContext bdc = new FieldDataContext("test", null, () -> null, Set::of, MappedFieldType.FielddataOperation.SCRIPT); + assertNotNull(bft.fielddataBuilder(bdc)); + } + + public void testDocValueFormat() { + MultiDenseVectorFieldType fft = createFloatFieldType(); + expectThrows(IllegalArgumentException.class, () -> fft.docValueFormat(null, null)); + MultiDenseVectorFieldType bft = createByteFieldType(); + expectThrows(IllegalArgumentException.class, () -> bft.docValueFormat(null, null)); + } + + public void testFetchSourceValue() throws IOException { + MultiDenseVectorFieldType fft = createFloatFieldType(); + List> vector = List.of(List.of(0.0, 1.0, 2.0, 3.0, 4.0, 6.0)); + assertEquals(vector, fetchSourceValue(fft, vector)); + MultiDenseVectorFieldType bft = createByteFieldType(); + assertEquals(vector, fetchSourceValue(bft, vector)); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index d6709b00b4dbb..7cd2e6e1cc82e 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -111,6 +111,7 @@ import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.MultiDenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.shard.IndexShard; @@ -202,6 +203,7 @@ public abstract class AggregatorTestCase extends ESTestCase { private static final List TYPE_TEST_BLACKLIST = List.of( ObjectMapper.CONTENT_TYPE, // Cannot aggregate objects DenseVectorFieldMapper.CONTENT_TYPE, // Cannot aggregate dense vectors + MultiDenseVectorFieldMapper.CONTENT_TYPE, // Cannot aggregate dense vectors SparseVectorFieldMapper.CONTENT_TYPE, // Sparse vectors are no longer supported NestedObjectMapper.CONTENT_TYPE, // TODO support for nested From 9af7af1a0045563eac8d844aea023686720f7c9b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sat, 9 Nov 2024 01:26:04 +1100 Subject: [PATCH 09/39] Mute org.elasticsearch.xpack.kql.query.KqlQueryBuilderTests org.elasticsearch.xpack.kql.query.KqlQueryBuilderTests #116487 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 0d61008cb9c9a..1321cdc2a3d57 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -291,6 +291,8 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/evaluate_data_frame/Test outlier_detection with query} issue: https://github.com/elastic/elasticsearch/issues/116484 +- class: org.elasticsearch.xpack.kql.query.KqlQueryBuilderTests + issue: https://github.com/elastic/elasticsearch/issues/116487 # Examples: # From cd2433d60c57506a81f9217409ba013ab7ff8d21 Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Fri, 8 Nov 2024 14:29:44 +0000 Subject: [PATCH 10/39] Validate missing shards after the coordinator rewrite (#116382) The coordinate rewrite can skip searching shards when the query filters on `@timestamp`, event.ingested or the _tier field. We currently check for missing shards across all the indices that are the query is running against however, some shards/indices might not play a role in the query at all after the coordinator rewrite. This moves the check for missing shards **after** we've run the coordinator rewrite so we validate only the shards that will be searched by the query. --- docs/changelog/116382.yaml | 5 + .../search/CanMatchPreFilterSearchPhase.java | 10 +- .../CanMatchPreFilterSearchPhaseTests.java | 266 +++++++++++++++--- ...pshotsCanMatchOnCoordinatorIntegTests.java | 6 + 4 files changed, 247 insertions(+), 40 deletions(-) create mode 100644 docs/changelog/116382.yaml diff --git a/docs/changelog/116382.yaml b/docs/changelog/116382.yaml new file mode 100644 index 0000000000000..c941fb6eaa1e4 --- /dev/null +++ b/docs/changelog/116382.yaml @@ -0,0 +1,5 @@ +pr: 116382 +summary: Validate missing shards after the coordinator rewrite +area: Search +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java index 8dcfbf5f070a1..c4aea73cc6141 100644 --- a/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhase.java @@ -131,7 +131,6 @@ private static boolean assertSearchCoordinationThread() { @Override public void run() { assert assertSearchCoordinationThread(); - checkNoMissingShards(); runCoordinatorRewritePhase(); } @@ -175,7 +174,10 @@ private void runCoordinatorRewritePhase() { if (matchedShardLevelRequests.isEmpty()) { finishPhase(); } else { - new Round(new GroupShardsIterator<>(matchedShardLevelRequests)).run(); + GroupShardsIterator matchingShards = new GroupShardsIterator<>(matchedShardLevelRequests); + // verify missing shards only for the shards that we hit for the query + checkNoMissingShards(matchingShards); + new Round(matchingShards).run(); } } @@ -185,9 +187,9 @@ private void consumeResult(boolean canMatch, ShardSearchRequest request) { results.consumeResult(result, () -> {}); } - private void checkNoMissingShards() { + private void checkNoMissingShards(GroupShardsIterator shards) { assert assertSearchCoordinationThread(); - doCheckNoMissingShards(getName(), request, shardsIts); + doCheckNoMissingShards(getName(), request, shards); } private Map> groupByNode(GroupShardsIterator shards) { diff --git a/server/src/test/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhaseTests.java index c1119ee5973f4..69872b5e4b546 100644 --- a/server/src/test/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhaseTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.mapper.DateFieldMapper; @@ -68,6 +69,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; @@ -77,6 +79,7 @@ import static org.elasticsearch.core.Types.forciblyCast; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.mockito.Mockito.mock; @@ -1087,6 +1090,137 @@ public void testCanMatchFilteringOnCoordinatorThatCanBeSkippedTsdb() throws Exce ); } + public void testCanMatchFilteringOnCoordinatorWithMissingShards() throws Exception { + // we'll test that we're executing _tier coordinator rewrite for indices (data stream backing or regular) without any @timestamp + // or event.ingested fields + // for both data stream backing and regular indices we'll have one index in hot and one UNASSIGNED (targeting warm though). + // the warm indices will be skipped as our queries will filter based on _tier: hot and the can match phase will not report error the + // missing index even if allow_partial_search_results is false (because the warm index would've not been part of the search anyway) + + Map indexNameToSettings = new HashMap<>(); + ClusterState state = ClusterState.EMPTY_STATE; + + String dataStreamName = randomAlphaOfLengthBetween(10, 20); + Index warmDataStreamIndex = new Index(DataStream.getDefaultBackingIndexName(dataStreamName, 1), UUIDs.base64UUID()); + indexNameToSettings.put( + warmDataStreamIndex, + settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, warmDataStreamIndex.getUUID()) + .put(DataTier.TIER_PREFERENCE, "data_warm,data_hot") + ); + Index hotDataStreamIndex = new Index(DataStream.getDefaultBackingIndexName(dataStreamName, 2), UUIDs.base64UUID()); + indexNameToSettings.put( + hotDataStreamIndex, + settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, hotDataStreamIndex.getUUID()) + .put(DataTier.TIER_PREFERENCE, "data_hot") + ); + DataStream dataStream = DataStreamTestHelper.newInstance(dataStreamName, List.of(warmDataStreamIndex, hotDataStreamIndex)); + + Index warmRegularIndex = new Index("warm-index", UUIDs.base64UUID()); + indexNameToSettings.put( + warmRegularIndex, + settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, warmRegularIndex.getUUID()) + .put(DataTier.TIER_PREFERENCE, "data_warm,data_hot") + ); + Index hotRegularIndex = new Index("hot-index", UUIDs.base64UUID()); + indexNameToSettings.put( + hotRegularIndex, + settings(IndexVersion.current()).put(IndexMetadata.SETTING_INDEX_UUID, hotRegularIndex.getUUID()) + .put(DataTier.TIER_PREFERENCE, "data_hot") + ); + + List allIndices = new ArrayList<>(4); + allIndices.addAll(dataStream.getIndices()); + allIndices.add(warmRegularIndex); + allIndices.add(hotRegularIndex); + + List hotIndices = List.of(hotRegularIndex, hotDataStreamIndex); + List warmIndices = List.of(warmRegularIndex, warmDataStreamIndex); + + for (Index index : allIndices) { + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index.getName()) + .settings(indexNameToSettings.get(index)) + .numberOfShards(1) + .numberOfReplicas(0); + Metadata.Builder metadataBuilder = Metadata.builder(state.metadata()).put(indexMetadataBuilder); + state = ClusterState.builder(state).metadata(metadataBuilder).build(); + } + + ClusterState finalState = state; + CoordinatorRewriteContextProvider coordinatorRewriteContextProvider = new CoordinatorRewriteContextProvider( + parserConfig(), + mock(Client.class), + System::currentTimeMillis, + () -> finalState, + (index) -> null + ); + + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery(CoordinatorRewriteContext.TIER_FIELD_NAME, "data_hot")); + + { + // test that a search doesn't fail if the query filters out the unassigned shards + // via _tier (coordinator rewrite will eliminate the shards that don't match) + assignShardsAndExecuteCanMatchPhase( + List.of(dataStream), + List.of(hotRegularIndex, warmRegularIndex), + coordinatorRewriteContextProvider, + boolQueryBuilder, + List.of(), + null, + warmIndices, + false, + (updatedSearchShardIterators, requests) -> { + var skippedShards = updatedSearchShardIterators.stream().filter(SearchShardIterator::skip).toList(); + var nonSkippedShards = updatedSearchShardIterators.stream() + .filter(searchShardIterator -> searchShardIterator.skip() == false) + .toList(); + + boolean allSkippedShardAreFromWarmIndices = skippedShards.stream() + .allMatch(shardIterator -> warmIndices.contains(shardIterator.shardId().getIndex())); + assertThat(allSkippedShardAreFromWarmIndices, equalTo(true)); + boolean allNonSkippedShardAreHotIndices = nonSkippedShards.stream() + .allMatch(shardIterator -> hotIndices.contains(shardIterator.shardId().getIndex())); + assertThat(allNonSkippedShardAreHotIndices, equalTo(true)); + boolean allRequestMadeToHotIndices = requests.stream() + .allMatch(request -> hotIndices.contains(request.shardId().getIndex())); + assertThat(allRequestMadeToHotIndices, equalTo(true)); + } + ); + } + + { + // test that a search does fail if the query does NOT filter ALL the + // unassigned shards + CountDownLatch latch = new CountDownLatch(1); + Tuple> canMatchPhaseAndRequests = getCanMatchPhaseAndRequests( + List.of(dataStream), + List.of(hotRegularIndex, warmRegularIndex), + coordinatorRewriteContextProvider, + boolQueryBuilder, + List.of(), + null, + List.of(hotRegularIndex, warmRegularIndex, warmDataStreamIndex), + false, + new ActionListener<>() { + @Override + public void onResponse(GroupShardsIterator searchShardIterators) { + fail(null, "unexpected success with result [%s] while expecting to handle failure with [%s]", searchShardIterators); + latch.countDown(); + } + + @Override + public void onFailure(Exception e) { + assertThat(e, instanceOf(SearchPhaseExecutionException.class)); + latch.countDown(); + } + } + ); + + canMatchPhaseAndRequests.v1().start(); + latch.await(10, TimeUnit.SECONDS); + } + } + private void assertAllShardsAreQueried(List updatedSearchShardIterators, List requests) { int skippedShards = (int) updatedSearchShardIterators.stream().filter(SearchShardIterator::skip).count(); @@ -1111,6 +1245,69 @@ private void assignShardsAndExecuteCanMatchPhase( SuggestBuilder suggest, BiConsumer, List> canMatchResultsConsumer ) throws Exception { + assignShardsAndExecuteCanMatchPhase( + dataStreams, + regularIndices, + contextProvider, + query, + aggregations, + suggest, + List.of(), + true, + canMatchResultsConsumer + ); + } + + private void assignShardsAndExecuteCanMatchPhase( + List dataStreams, + List regularIndices, + CoordinatorRewriteContextProvider contextProvider, + QueryBuilder query, + List aggregations, + SuggestBuilder suggest, + List unassignedIndices, + boolean allowPartialResults, + BiConsumer, List> canMatchResultsConsumer + ) throws Exception { + AtomicReference> result = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + Tuple> canMatchAndShardRequests = getCanMatchPhaseAndRequests( + dataStreams, + regularIndices, + contextProvider, + query, + aggregations, + suggest, + unassignedIndices, + allowPartialResults, + ActionTestUtils.assertNoFailureListener(iter -> { + result.set(iter); + latch.countDown(); + }) + ); + + canMatchAndShardRequests.v1().start(); + latch.await(); + + List updatedSearchShardIterators = new ArrayList<>(); + for (SearchShardIterator updatedSearchShardIterator : result.get()) { + updatedSearchShardIterators.add(updatedSearchShardIterator); + } + + canMatchResultsConsumer.accept(updatedSearchShardIterators, canMatchAndShardRequests.v2()); + } + + private Tuple> getCanMatchPhaseAndRequests( + List dataStreams, + List regularIndices, + CoordinatorRewriteContextProvider contextProvider, + QueryBuilder query, + List aggregations, + SuggestBuilder suggest, + List unassignedIndices, + boolean allowPartialResults, + ActionListener> canMatchActionListener + ) { Map lookup = new ConcurrentHashMap<>(); DiscoveryNode primaryNode = DiscoveryNodeUtils.create("node_1"); DiscoveryNode replicaNode = DiscoveryNodeUtils.create("node_2"); @@ -1136,23 +1333,31 @@ private void assignShardsAndExecuteCanMatchPhase( // and none is assigned, the phase is considered as failed meaning that the next phase won't be executed boolean withAssignedPrimaries = randomBoolean() || atLeastOnePrimaryAssigned == false; int numShards = randomIntBetween(1, 6); - originalShardIters.addAll( - getShardsIter(dataStreamIndex, originalIndices, numShards, false, withAssignedPrimaries ? primaryNode : null, null) - ); - atLeastOnePrimaryAssigned |= withAssignedPrimaries; + if (unassignedIndices.contains(dataStreamIndex)) { + originalShardIters.addAll(getShardsIter(dataStreamIndex, originalIndices, numShards, false, null, null)); + } else { + originalShardIters.addAll( + getShardsIter(dataStreamIndex, originalIndices, numShards, false, withAssignedPrimaries ? primaryNode : null, null) + ); + atLeastOnePrimaryAssigned |= withAssignedPrimaries; + } } } for (Index regularIndex : regularIndices) { - originalShardIters.addAll( - getShardsIter(regularIndex, originalIndices, randomIntBetween(1, 6), randomBoolean(), primaryNode, replicaNode) - ); + if (unassignedIndices.contains(regularIndex)) { + originalShardIters.addAll(getShardsIter(regularIndex, originalIndices, randomIntBetween(1, 6), false, null, null)); + } else { + originalShardIters.addAll( + getShardsIter(regularIndex, originalIndices, randomIntBetween(1, 6), randomBoolean(), primaryNode, replicaNode) + ); + } } GroupShardsIterator shardsIter = GroupShardsIterator.sortAndCreate(originalShardIters); final SearchRequest searchRequest = new SearchRequest(); searchRequest.indices(indices); - searchRequest.allowPartialSearchResults(true); + searchRequest.allowPartialSearchResults(allowPartialResults); final AliasFilter aliasFilter; if (aggregations.isEmpty() == false || randomBoolean()) { @@ -1212,35 +1417,24 @@ public void sendCanMatch( ); AtomicReference> result = new AtomicReference<>(); - CountDownLatch latch = new CountDownLatch(1); - CanMatchPreFilterSearchPhase canMatchPhase = new CanMatchPreFilterSearchPhase( - logger, - searchTransportService, - (clusterAlias, node) -> lookup.get(node), - aliasFilters, - Collections.emptyMap(), - threadPool.executor(ThreadPool.Names.SEARCH_COORDINATION), - searchRequest, - shardsIter, - timeProvider, - null, - true, - contextProvider, - ActionTestUtils.assertNoFailureListener(iter -> { - result.set(iter); - latch.countDown(); - }) + return new Tuple<>( + new CanMatchPreFilterSearchPhase( + logger, + searchTransportService, + (clusterAlias, node) -> lookup.get(node), + aliasFilters, + Collections.emptyMap(), + threadPool.executor(ThreadPool.Names.SEARCH_COORDINATION), + searchRequest, + shardsIter, + timeProvider, + null, + true, + contextProvider, + canMatchActionListener + ), + requests ); - - canMatchPhase.start(); - latch.await(); - - List updatedSearchShardIterators = new ArrayList<>(); - for (SearchShardIterator updatedSearchShardIterator : result.get()) { - updatedSearchShardIterators.add(updatedSearchShardIterator); - } - - canMatchResultsConsumer.accept(updatedSearchShardIterators, requests); } static class StaticCoordinatorRewriteContextProviderBuilder { diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java index 259d38b1fe8ee..26764592d5f72 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java @@ -1029,6 +1029,9 @@ public void testCanMatchSkipsPartiallyMountedIndicesWhenFrozenNodesUnavailable() TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("_tier", "data_content"); List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + // we randomise the partial search results because if shards that do NOT match the query are unavailable + // the search is not partial + .allowPartialSearchResults(randomBoolean()) .source(new SearchSourceBuilder().query(termQueryBuilder)); assertResponse(client().search(request), searchResponse -> { @@ -1045,6 +1048,7 @@ public void testCanMatchSkipsPartiallyMountedIndicesWhenFrozenNodesUnavailable() TermsQueryBuilder termsQueryBuilder = QueryBuilders.termsQuery("_tier", "data_hot", "data_content"); List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + .allowPartialSearchResults(randomBoolean()) .source(new SearchSourceBuilder().query(termsQueryBuilder)); assertResponse(client().search(request), searchResponse -> { @@ -1061,6 +1065,7 @@ public void testCanMatchSkipsPartiallyMountedIndicesWhenFrozenNodesUnavailable() BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery("_tier", "data_frozen")); List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + .allowPartialSearchResults(randomBoolean()) .source(new SearchSourceBuilder().query(boolQueryBuilder)); assertResponse(client().search(request), searchResponse -> { @@ -1078,6 +1083,7 @@ public void testCanMatchSkipsPartiallyMountedIndicesWhenFrozenNodesUnavailable() .mustNot(randomFrom(QueryBuilders.wildcardQuery("_tier", "dat*ozen"), QueryBuilders.prefixQuery("_tier", "data_fro"))); List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + .allowPartialSearchResults(randomBoolean()) .source(new SearchSourceBuilder().query(boolQueryBuilder)); assertResponse(client().search(request), searchResponse -> { From 6eb772960f67e4a1d6f398735253c5a27970effc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= Date: Fri, 8 Nov 2024 15:57:48 +0100 Subject: [PATCH 11/39] KQL update tests (#116483) --- .../kql/parser/AbstractKqlParserTestCase.java | 55 +++++- .../kql/src/test/resources/supported-queries | 164 +++++++++--------- .../src/test/resources/unsupported-queries | 42 ++--- 3 files changed, 161 insertions(+), 100 deletions(-) diff --git a/x-pack/plugin/kql/src/test/java/org/elasticsearch/xpack/kql/parser/AbstractKqlParserTestCase.java b/x-pack/plugin/kql/src/test/java/org/elasticsearch/xpack/kql/parser/AbstractKqlParserTestCase.java index ac06a96d49eb4..588e60bd4dd75 100644 --- a/x-pack/plugin/kql/src/test/java/org/elasticsearch/xpack/kql/parser/AbstractKqlParserTestCase.java +++ b/x-pack/plugin/kql/src/test/java/org/elasticsearch/xpack/kql/parser/AbstractKqlParserTestCase.java @@ -7,9 +7,12 @@ package org.elasticsearch.xpack.kql.parser; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.core.Predicates; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; @@ -19,6 +22,7 @@ import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.test.AbstractBuilderTestCase; +import org.elasticsearch.xcontent.XContentBuilder; import java.io.BufferedReader; import java.io.IOException; @@ -36,6 +40,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.equalTo; @@ -46,6 +51,43 @@ public abstract class AbstractKqlParserTestCase extends AbstractBuilderTestCase protected static final String UNSUPPORTED_QUERY_FILE_PATH = "/unsupported-queries"; protected static final Predicate BOOLEAN_QUERY_FILTER = (q) -> q.matches("(?i)[^{]*[^\\\\]*(NOT|AND|OR)[^}]*"); + protected static final String NESTED_FIELD_NAME = "mapped_nested"; + + @Override + protected void initializeAdditionalMappings(MapperService mapperService) throws IOException { + XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties"); + + mapping.startObject(TEXT_FIELD_NAME).field("type", "text").endObject(); + mapping.startObject(NESTED_FIELD_NAME); + { + mapping.field("type", "nested"); + mapping.startObject("properties"); + { + mapping.startObject(TEXT_FIELD_NAME).field("type", "text").endObject(); + mapping.startObject(KEYWORD_FIELD_NAME).field("type", "keyword").endObject(); + mapping.startObject(INT_FIELD_NAME).field("type", "integer").endObject(); + mapping.startObject(NESTED_FIELD_NAME); + { + mapping.field("type", "nested"); + mapping.startObject("properties"); + { + mapping.startObject(TEXT_FIELD_NAME).field("type", "text").endObject(); + mapping.startObject(KEYWORD_FIELD_NAME).field("type", "keyword").endObject(); + mapping.startObject(INT_FIELD_NAME).field("type", "integer").endObject(); + } + mapping.endObject(); + } + mapping.endObject(); + } + mapping.endObject(); + } + mapping.endObject(); + + mapping.endObject().endObject().endObject(); + + mapperService.merge("_doc", new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE); + } + protected static String wrapWithRandomWhitespaces(String input) { return String.join("", randomWhitespaces(), input, randomWhitespaces()); } @@ -93,7 +135,18 @@ private static InputStream readFromJarUrl(URL source) throws IOException { protected List mappedLeafFields() { return Stream.concat( Arrays.stream(MAPPED_LEAF_FIELD_NAMES), - List.of(DATE_FIELD_NAME, INT_FIELD_NAME).stream().map(subfieldName -> OBJECT_FIELD_NAME + "." + subfieldName) + Stream.of( + // Adding mapped_object subfields + Strings.format("%s.%s", OBJECT_FIELD_NAME, INT_FIELD_NAME), + Strings.format("%s.%s", OBJECT_FIELD_NAME, DATE_FIELD_NAME), + // Adding mapped_nested subfields + Strings.format("%s.%s", NESTED_FIELD_NAME, TEXT_FIELD_NAME), + Strings.format("%s.%s", NESTED_FIELD_NAME, KEYWORD_FIELD_NAME), + Strings.format("%s.%s", NESTED_FIELD_NAME, INT_FIELD_NAME), + Strings.format("%s.%s.%s", NESTED_FIELD_NAME, NESTED_FIELD_NAME, TEXT_FIELD_NAME), + Strings.format("%s.%s.%s", NESTED_FIELD_NAME, NESTED_FIELD_NAME, KEYWORD_FIELD_NAME), + Strings.format("%s.%s.%s", NESTED_FIELD_NAME, NESTED_FIELD_NAME, INT_FIELD_NAME) + ) ).toList(); } diff --git a/x-pack/plugin/kql/src/test/resources/supported-queries b/x-pack/plugin/kql/src/test/resources/supported-queries index 4911c9e3ebecd..b659b1ae5b1db 100644 --- a/x-pack/plugin/kql/src/test/resources/supported-queries +++ b/x-pack/plugin/kql/src/test/resources/supported-queries @@ -23,54 +23,54 @@ f*oo *:"foo bar" // Querying a field -foo_field:200 -foo_field:foo -foo_field:foo bar -foo_field:(foo bar) -foo_field:foo* -foo_field: f*oo -foo_field: *foo -foo_field:"foo bar" -foo_field.subfield:foo -foo_*_field:foo -foo_field:* -foo_*:* +mapped_int:200 +mapped_string_2:foo +mapped_string:foo bar +mapped_string:(foo bar) +mapped_string:foo* +mapped_string_2: f*oo +mapped_string: *foo +mapped_string:"foo bar" +mapped_object.subfield:foo +mapped_str*:foo +mapped_string:* +mapped_str_*:* // Range queries -foo_field<200 -foo_field=200 -foo_field>=foo -foo_field>"foo bar" -foo_field<=foo -foo_field>=foo +mapped_int<200 +mapped_string_2=200 +mapped_string_alias>=foo +mapped_string>"foo bar" +mapped_string<=foo +mapped_string_2>=foo // Boolean queries NOT foo NOT foo bar -NOT foo_field:foo -NOT foo_fieldbar -(foo_field:foo) AND (foo_field:foo bar) -foo_field:foo OR foo_field:foo bar -NOT(foo_field:foo OR foo_field:foo bar) -NOT(foo_field:foo AND foo_field:foo bar) -NOT foo_field:foo AND NOT foo_field:foo bar -(NOT foo_field:foo) AND (NOT foo_field:foo bar) -NOT(foo_field:foo) AND NOT(foo_field:foo bar) -foo_field:foo AND foo_field:foo bar AND foo bar -foo_field:foo AND foo_field:foo bar OR foo bar -foo_field:foo OR foo_field:foo bar OR foo bar -foo_field:foo OR foo_field:foo bar AND foo bar -foo_field:foo AND (foo_field:foo bar OR foo bar) -foo_field:foo AND (foo_field:foo bar OR foo bar) -foo_field:foo OR (foo_field:foo bar OR foo bar) +NOT mapped_string:foo +NOT mapped_string_2bar +(mapped_string:foo) AND (mapped_string:foo bar) +mapped_string:foo OR mapped_string_2:foo bar +NOT(mapped_string:foo OR mapped_string:foo bar) +NOT(mapped_string:foo AND mapped_string:foo bar) +NOT mapped_string:foo AND NOT mapped_string_2:foo bar +(NOT mapped_string_alias:foo) AND (NOT mapped_string:foo bar) +NOT(mapped_string:foo) AND NOT(mapped_string:foo bar) +mapped_string:foo AND mapped_string_2:foo bar AND foo bar +mapped_string:foo AND mapped_string_2:foo bar OR foo bar +mapped_string:foo OR mapped_string_2:foo bar OR foo bar +mapped_string:foo OR mapped_string:foo bar AND foo bar +mapped_string:foo AND (mapped_string_2:foo bar OR foo bar) +mapped_string:foo AND (mapped_string_2:foo bar OR foo bar) +mapped_string:foo OR (mapped_string_2:foo bar OR foo bar) -foo:AND -foo:OR -foo:NOT +mapped_string:AND +mapped_string:OR +mapped_string:NOT foo AND foo OR foo NOT @@ -79,43 +79,51 @@ OR foo NOT // Nested queries -nested_field: { NOT foo } -nested_field: { NOT foo bar } -nested_field: { NOT foo_field:foo } -nested_field: { foo_field:foo AND foo_field:foo bar } -nested_field: { foo_fieldbar } -nested_field: { (foo_field:foo) AND (foo_field:foo bar) } -nested_field: { foo_field:foo OR foo_field:foo bar } -nested_field: { NOT(foo_field:foo OR foo_field:foo bar) } -nested_field: { NOT(foo_field:foo AND foo_field:foo bar) } -nested_field: { NOT foo_field:foo AND NOT foo_field:foo bar } -nested_field: { (NOT foo_field:foo) AND (NOT foo_field:foo bar) } -nested_field: { NOT(foo_field:foo) AND NOT(foo_field:foo bar) } -nested_field: { foo_field:foo AND foo_field:foo bar AND foo bar } -nested_field: { foo_field:foo AND foo_field:foo bar OR foo bar } -nested_field: { foo_field:foo OR foo_field:foo bar OR foo bar } -nested_field: { foo_field:foo OR foo_field:foo bar AND foo bar } -nested_field: { foo_field:foo AND (foo_field:foo bar OR foo bar) } -nested_field: { foo_field:foo AND (foo_field:foo bar OR foo bar) } -nested_field: { foo_field:foo OR (foo_field:foo bar OR foo bar) } -nested_field: { sub_nested_field : { foo_field:foo } AND foo_field:foo bar } +mapped_nested: { NOT foo } +mapped_nested: { NOT foo bar } +mapped_nested: { NOT mapped_string:foo } +mapped_nested: { mapped_string:foo AND mapped_string_2:foo bar } +mapped_nested: { mapped_string2 } +mapped_nested: { (mapped_string:foo) AND (mapped_string_2:foo bar) } +mapped_nested: { mapped_string:foo OR mapped_string_2:foo bar } +mapped_nested: { NOT(mapped_string:foo OR mapped_string_2:foo bar) } +mapped_nested: { NOT(mapped_string:foo AND mapped_string_2:foo bar) } +mapped_nested: { NOT mapped_string:foo AND NOT mapped_string_2:foo bar } +mapped_nested: { (NOT mapped_string:foo) AND (NOT mapped_string_2:foo bar) } +mapped_nested: { NOT(mapped_string:foo) AND NOT(mapped_string_2:foo bar) } +mapped_nested: { mapped_string:foo AND mapped_string_2:foo bar AND foo bar } +mapped_nested: { mapped_string:foo AND mapped_string_2:foo bar OR foo bar } +mapped_nested: { mapped_string:foo OR mapped_string_2:foo bar OR foo bar } +mapped_nested: { mapped_string:foo OR mapped_string_2:foo bar AND foo bar } +mapped_nested: { mapped_string:foo AND (mapped_string_2:foo bar OR foo bar) } +mapped_nested: { mapped_string:foo AND (mapped_string_2:foo bar OR foo bar) } +mapped_nested: { mapped_string:foo OR (mapped_string_2:foo bar OR foo bar) } +mapped_nested: { mapped_str*:foo } +mapped_nested: { mapped_nested : { mapped_string:foo AND mapped_int < 3 } AND mapped_string_2:foo bar } +mapped_nested: { mapped_nested.mapped_string:foo AND mapped_string_2:foo bar } + +// Inline nested queries +mapped_nested.mapped_string:foo AND mapped_nested.mapped_int < 2 +mapped_nested.mapped_nested.mapped_string:foo AND mapped_nested.mapped_int < 2 +mapped_nested.mapped_str*: foo + // Queries with escape sequences -foo_field : (foo\(bar\)) -foo_field : foo\:bar -foo_field : (foo \\and bar) -foo_field : (foo \\or bar) -foo_field : foo \\not bar -foo_field : foo \{bar\} -foo_field : foo \(bar\) -foo_field : foo \\ bar -foo_field : foo \"bar\" +mapped_string:(foo\(bar\)) +mapped_string:foo\:bar +mapped_string:(foo \\and bar) +mapped_string:(foo \\or bar) +mapped_string:foo \\not bar +mapped_string:foo \{bar\} +mapped_string:foo \(bar\) +mapped_string:foo \\ bar +mapped_string:foo \"bar\" -foo_field : "foo and bar" -foo_field : "foo not bar" -foo_field : "foo or bar" -foo_field : "foo : bar" -foo_field : "foo { bar }" -foo_field : "foo (bar)" -foo_field : "foo \\ bar" -foo_field : "foo \"bar\"" +mapped_string:"foo and bar" +mapped_string:"foo not bar" +mapped_string:"foo or bar" +mapped_string:"foo : bar" +mapped_string:"foo { bar }" +mapped_string:"foo (bar)" +mapped_string:"foo \\ bar" +mapped_string:"foo \"bar\"" diff --git a/x-pack/plugin/kql/src/test/resources/unsupported-queries b/x-pack/plugin/kql/src/test/resources/unsupported-queries index 64901891c6786..149bcf5bd2b5a 100644 --- a/x-pack/plugin/kql/src/test/resources/unsupported-queries +++ b/x-pack/plugin/kql/src/test/resources/unsupported-queries @@ -1,36 +1,36 @@ // Incomplete expressions -foo_field : -foo_field < -foo_field > -foo_field >= -foo_field <= +mapped_string : +mapped_string < +mapped_string > +mapped_string >= +mapped_string <= >= foo : "foo" : foo // Parentheses mismatch -foo_field: (foo bar -foo_field: foo bar) -NOT foo_field:foo OR foo_field:foo bar) -NOT (foo_field:foo AND) foo_field:foo bar +mapped_string: (foo bar +mapped_string: foo bar) +NOT mapped_string:foo OR mapped_string_2:foo bar) +NOT (mapped_string:foo AND) mapped_string_2:foo bar // Quotes mismatch -foo_field: "foo bar -foo_field: foo bar" +mapped_string: "foo bar +mapped_string: foo bar" // Can't nest grouping terms parentheses -foo_field:(foo (bar)) +mapped_string:(foo (bar)) // Bad syntax for nested fields: -nested_field { foo: bar } +mapped_nested { mapped_string: bar } // Missing escape sequences: -foo_field: foo:bar -foo_field: (foo and bar) -foo_field: (foo or bar) -foo_field: foo not bar -foo_field: foo { bar } -foo_field: foo (bar) -foo_field: foo "bar" -foo_field: "foo "bar"" +mapped_string: foo:bar +mapped_string: (foo and bar) +mapped_string: (foo or bar) +mapped_string: foo not bar +mapped_string: foo { bar } +mapped_string: foo (bar) +mapped_string: foo "bar" +mapped_string: "foo "bar"" From c48e5e59807e329811c6c86bbdcd960b0448d2c5 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Fri, 8 Nov 2024 09:59:34 -0500 Subject: [PATCH 12/39] Fixing list for size estimates (#116486) --- docs/reference/how-to/knn-search.asciidoc | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/docs/reference/how-to/knn-search.asciidoc b/docs/reference/how-to/knn-search.asciidoc index 83614b0d99024..e884c01dd3509 100644 --- a/docs/reference/how-to/knn-search.asciidoc +++ b/docs/reference/how-to/knn-search.asciidoc @@ -72,15 +72,13 @@ least enough RAM to hold the vector data and index structures. To check the size of the vector data, you can use the <> API. Here are estimates for different element types and quantization levels: -+ --- -`element_type: float`: `num_vectors * num_dimensions * 4` -`element_type: float` with `quantization: int8`: `num_vectors * (num_dimensions + 4)` -`element_type: float` with `quantization: int4`: `num_vectors * (num_dimensions/2 + 4)` -`element_type: float` with `quantization: bbq`: `num_vectors * (num_dimensions/8 + 12)` -`element_type: byte`: `num_vectors * num_dimensions` -`element_type: bit`: `num_vectors * (num_dimensions/8)` --- + +* `element_type: float`: `num_vectors * num_dimensions * 4` +* `element_type: float` with `quantization: int8`: `num_vectors * (num_dimensions + 4)` +* `element_type: float` with `quantization: int4`: `num_vectors * (num_dimensions/2 + 4)` +* `element_type: float` with `quantization: bbq`: `num_vectors * (num_dimensions/8 + 12)` +* `element_type: byte`: `num_vectors * num_dimensions` +* `element_type: bit`: `num_vectors * (num_dimensions/8)` If utilizing HNSW, the graph must also be in memory, to estimate the required bytes use `num_vectors * 4 * HNSW.m`. The default value for `HNSW.m` is 16, so by default `num_vectors * 4 * 16`. From e9f3addc55022b402a8f261093a5540cadb20289 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Fri, 8 Nov 2024 09:59:50 -0500 Subject: [PATCH 13/39] Adjust analyze limit exception to be a bad_request (#116325) The exception is due to large input on the user and is resolvable by either the user adjusting their request or changing their cluster settings. So a user focused error is preferred. I chose bad_request as it seemed like the best fit. closes: https://github.com/elastic/elasticsearch/issues/116323 --- docs/changelog/116325.yaml | 5 +++++ .../indices/analyze/TransportAnalyzeAction.java | 7 +++++-- .../admin/indices/TransportAnalyzeActionTests.java | 13 +++++++------ 3 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 docs/changelog/116325.yaml diff --git a/docs/changelog/116325.yaml b/docs/changelog/116325.yaml new file mode 100644 index 0000000000000..b8cd16dc85773 --- /dev/null +++ b/docs/changelog/116325.yaml @@ -0,0 +1,5 @@ +pr: 116325 +summary: Adjust analyze limit exception to be a `bad_request` +area: Analysis +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 97a259cc6f030..fb672b49c2f5a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -18,6 +18,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.single.shard.TransportSingleShardAction; import org.elasticsearch.cluster.ClusterState; @@ -44,6 +45,7 @@ import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.injection.guice.Inject; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -455,11 +457,12 @@ private TokenCounter(int maxTokenCount) { private void increment() { tokenCount++; if (tokenCount > maxTokenCount) { - throw new IllegalStateException( + throw new ElasticsearchStatusException( "The number of tokens produced by calling _analyze has exceeded the allowed maximum of [" + maxTokenCount + "]." - + " This limit can be set by changing the [index.analyze.max_token_count] index level setting." + + " This limit can be set by changing the [index.analyze.max_token_count] index level setting.", + RestStatus.BAD_REQUEST ); } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 901f0d7000542..0b9cba837583d 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.tests.analysis.MockTokenizer; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction; import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -460,8 +461,8 @@ public void testExceedDefaultMaxTokenLimit() { AnalyzeAction.Request request = new AnalyzeAction.Request(); request.text(text); request.analyzer("standard"); - IllegalStateException e = expectThrows( - IllegalStateException.class, + ElasticsearchStatusException e = expectThrows( + ElasticsearchStatusException.class, () -> TransportAnalyzeAction.analyze(request, registry, null, maxTokenCount) ); assertEquals( @@ -477,8 +478,8 @@ public void testExceedDefaultMaxTokenLimit() { request2.text(text); request2.analyzer("standard"); request2.explain(true); - IllegalStateException e2 = expectThrows( - IllegalStateException.class, + ElasticsearchStatusException e2 = expectThrows( + ElasticsearchStatusException.class, () -> TransportAnalyzeAction.analyze(request2, registry, null, maxTokenCount) ); assertEquals( @@ -506,8 +507,8 @@ public void testExceedSetMaxTokenLimit() { AnalyzeAction.Request request = new AnalyzeAction.Request(); request.text(text); request.analyzer("standard"); - IllegalStateException e = expectThrows( - IllegalStateException.class, + ElasticsearchStatusException e = expectThrows( + ElasticsearchStatusException.class, () -> TransportAnalyzeAction.analyze(request, registry, null, idxMaxTokenCount) ); assertEquals( From bc270bd39cabae6a0809d2865b609f30b905bb80 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Fri, 8 Nov 2024 15:04:20 +0000 Subject: [PATCH 14/39] Add BuildVersion to DiscoveryNode VersionInformation (#115434) --- .../cluster/ClusterStateDiffIT.java | 2 +- .../cluster/node/DiscoveryNode.java | 50 +++++-------------- .../cluster/node/DiscoveryNodes.java | 7 +++ .../cluster/node/VersionInformation.java | 44 ++++++++++++---- .../rest/action/cat/RestNodesAction.java | 2 +- .../rest/action/cat/RestTasksAction.java | 2 +- .../TransportMasterNodeActionTests.java | 2 +- .../metadata/AutoExpandReplicasTests.java | 9 +--- .../cluster/node/DiscoveryNodeTests.java | 2 +- .../allocation/FailedNodeRoutingTests.java | 3 +- .../action/document/RestIndexActionTests.java | 9 ++-- .../cluster/ESAllocationTestCase.java | 2 +- .../cluster/node/DiscoveryNodeUtils.java | 23 ++++++++- .../xpack/ccr/repository/CcrRepository.java | 3 +- ...actClusterStateLicenseServiceTestCase.java | 1 - .../AbstractProfilingPersistenceManager.java | 16 +----- ...pleSecurityNetty4ServerTransportTests.java | 10 ++-- 17 files changed, 95 insertions(+), 92 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java index 5b99300d99f22..58b9af7724aaa 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java @@ -147,7 +147,7 @@ public void testClusterStateDiffSerialization() throws Exception { for (Map.Entry node : clusterStateFromDiffs.nodes().getNodes().entrySet()) { DiscoveryNode node1 = clusterState.nodes().get(node.getKey()); DiscoveryNode node2 = clusterStateFromDiffs.nodes().get(node.getKey()); - assertThat(node1.getVersion(), equalTo(node2.getVersion())); + assertThat(node1.getBuildVersion(), equalTo(node2.getBuildVersion())); assertThat(node1.getAddress(), equalTo(node2.getAddress())); assertThat(node1.getAttributes(), equalTo(node2.getAttributes())); } diff --git a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNode.java index d3410b9139b41..7bf367f99b929 100644 --- a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNode.java @@ -21,8 +21,8 @@ import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.StringLiteralDeduplicator; import org.elasticsearch.core.Nullable; +import org.elasticsearch.env.BuildVersion; import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; import org.elasticsearch.node.Node; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; @@ -33,7 +33,6 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; -import java.util.OptionalInt; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -290,18 +289,6 @@ public static Set getRolesFromSettings(final Settings setting return Set.copyOf(NODE_ROLES_SETTING.get(settings)); } - private static VersionInformation inferVersionInformation(Version version) { - if (version.before(Version.V_8_10_0)) { - return new VersionInformation( - version, - IndexVersion.getMinimumCompatibleIndexVersion(version.id), - IndexVersion.fromId(version.id) - ); - } else { - return new VersionInformation(version, IndexVersions.MINIMUM_COMPATIBLE, IndexVersion.current()); - } - } - private static final Writeable.Reader readStringLiteral = s -> nodeStringDeduplicator.deduplicate(s.readString()); /** @@ -338,11 +325,7 @@ public DiscoveryNode(StreamInput in) throws IOException { } } this.roles = Collections.unmodifiableSortedSet(roles); - if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_10_X)) { - versionInfo = new VersionInformation(Version.readVersion(in), IndexVersion.readVersion(in), IndexVersion.readVersion(in)); - } else { - versionInfo = inferVersionInformation(Version.readVersion(in)); - } + versionInfo = new VersionInformation(Version.readVersion(in), IndexVersion.readVersion(in), IndexVersion.readVersion(in)); if (in.getTransportVersion().onOrAfter(EXTERNAL_ID_VERSION)) { this.externalId = readStringLiteral.read(in); } else { @@ -375,13 +358,9 @@ public void writeTo(StreamOutput out) throws IOException { o.writeString(role.roleNameAbbreviation()); o.writeBoolean(role.canContainData()); }); - if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_10_X)) { - Version.writeVersion(versionInfo.nodeVersion(), out); - IndexVersion.writeVersion(versionInfo.minIndexVersion(), out); - IndexVersion.writeVersion(versionInfo.maxIndexVersion(), out); - } else { - Version.writeVersion(versionInfo.nodeVersion(), out); - } + Version.writeVersion(versionInfo.nodeVersion(), out); + IndexVersion.writeVersion(versionInfo.minIndexVersion(), out); + IndexVersion.writeVersion(versionInfo.maxIndexVersion(), out); if (out.getTransportVersion().onOrAfter(EXTERNAL_ID_VERSION)) { out.writeString(externalId); } @@ -486,18 +465,13 @@ public VersionInformation getVersionInformation() { return this.versionInfo; } - public Version getVersion() { - return this.versionInfo.nodeVersion(); + public BuildVersion getBuildVersion() { + return versionInfo.buildVersion(); } - public OptionalInt getPre811VersionId() { - // Even if Version is removed from this class completely it will need to read the version ID - // off the wire for old node versions, so the value of this variable can be obtained from that - int versionId = versionInfo.nodeVersion().id; - if (versionId >= Version.V_8_11_0.id) { - return OptionalInt.empty(); - } - return OptionalInt.of(versionId); + @Deprecated + public Version getVersion() { + return this.versionInfo.nodeVersion(); } public IndexVersion getMinIndexVersion() { @@ -564,7 +538,7 @@ public void appendDescriptionWithoutAttributes(StringBuilder stringBuilder) { appendRoleAbbreviations(stringBuilder, ""); stringBuilder.append('}'); } - stringBuilder.append('{').append(versionInfo.nodeVersion()).append('}'); + stringBuilder.append('{').append(versionInfo.buildVersion()).append('}'); stringBuilder.append('{').append(versionInfo.minIndexVersion()).append('-').append(versionInfo.maxIndexVersion()).append('}'); } @@ -601,7 +575,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.value(role.roleName()); } builder.endArray(); - builder.field("version", versionInfo.nodeVersion()); + builder.field("version", versionInfo.buildVersion().toString()); builder.field("min_index_version", versionInfo.minIndexVersion()); builder.field("max_index_version", versionInfo.maxIndexVersion()); builder.endObject(); diff --git a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodes.java b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodes.java index 9477f9c6a5cc1..12c698a6ed958 100644 --- a/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodes.java +++ b/server/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodes.java @@ -339,6 +339,13 @@ public boolean hasByName(String name) { return false; } + /** + * {@code true} if this cluster consists of nodes with several release versions + */ + public boolean isMixedVersionCluster() { + return minNodeVersion.equals(maxNodeVersion) == false; + } + /** * Returns the version of the node with the oldest version in the cluster that is not a client node * diff --git a/server/src/main/java/org/elasticsearch/cluster/node/VersionInformation.java b/server/src/main/java/org/elasticsearch/cluster/node/VersionInformation.java index e9468ef966cec..a4d0ff1eb55e4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/node/VersionInformation.java +++ b/server/src/main/java/org/elasticsearch/cluster/node/VersionInformation.java @@ -10,6 +10,7 @@ package org.elasticsearch.cluster.node; import org.elasticsearch.Version; +import org.elasticsearch.env.BuildVersion; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; @@ -17,18 +18,49 @@ /** * Represents the versions of various aspects of an Elasticsearch node. - * @param nodeVersion The node {@link Version} + * @param buildVersion The node {@link BuildVersion} * @param minIndexVersion The minimum {@link IndexVersion} supported by this node * @param maxIndexVersion The maximum {@link IndexVersion} supported by this node */ -public record VersionInformation(Version nodeVersion, IndexVersion minIndexVersion, IndexVersion maxIndexVersion) { +public record VersionInformation( + BuildVersion buildVersion, + Version nodeVersion, + IndexVersion minIndexVersion, + IndexVersion maxIndexVersion +) { public static final VersionInformation CURRENT = new VersionInformation( - Version.CURRENT, + BuildVersion.current(), IndexVersions.MINIMUM_COMPATIBLE, IndexVersion.current() ); + public VersionInformation { + Objects.requireNonNull(buildVersion); + Objects.requireNonNull(nodeVersion); + Objects.requireNonNull(minIndexVersion); + Objects.requireNonNull(maxIndexVersion); + } + + public VersionInformation(BuildVersion version, IndexVersion minIndexVersion, IndexVersion maxIndexVersion) { + this(version, Version.CURRENT, minIndexVersion, maxIndexVersion); + /* + * Whilst DiscoveryNode.getVersion exists, we need to be able to get a Version from VersionInfo + * This needs to be consistent - on serverless, BuildVersion has an id of -1, which translates + * to a nonsensical Version. So all consumers of Version need to be moved to BuildVersion + * before we can remove Version from here. + */ + // for the moment, check this is only called with current() so the implied Version is correct + // TODO: work out what needs to happen for other versions. Maybe we can only remove this once the nodeVersion field is gone + assert version.equals(BuildVersion.current()) : version + " is not " + BuildVersion.current(); + } + + @Deprecated + public VersionInformation(Version version, IndexVersion minIndexVersion, IndexVersion maxIndexVersion) { + this(BuildVersion.fromVersionId(version.id()), version, minIndexVersion, maxIndexVersion); + } + + @Deprecated public static VersionInformation inferVersions(Version nodeVersion) { if (nodeVersion == null) { return null; @@ -44,10 +76,4 @@ public static VersionInformation inferVersions(Version nodeVersion) { throw new IllegalArgumentException("Node versions can only be inferred before release version 8.10.0"); } } - - public VersionInformation { - Objects.requireNonNull(nodeVersion); - Objects.requireNonNull(minIndexVersion); - Objects.requireNonNull(maxIndexVersion); - } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java b/server/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java index 99fde2731644e..39e679f2c0ad0 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java @@ -367,7 +367,7 @@ Table buildTable( table.addCell("-"); } - table.addCell(node.getVersion().toString()); + table.addCell(node.getBuildVersion().toString()); table.addCell(info == null ? null : info.getBuild().type().displayName()); table.addCell(info == null ? null : info.getBuild().hash()); table.addCell(jvmInfo == null ? null : jvmInfo.version()); diff --git a/server/src/main/java/org/elasticsearch/rest/action/cat/RestTasksAction.java b/server/src/main/java/org/elasticsearch/rest/action/cat/RestTasksAction.java index fb0814d5746ac..b0f94fd1420e9 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/cat/RestTasksAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/cat/RestTasksAction.java @@ -140,7 +140,7 @@ private static void buildRow(Table table, boolean fullId, boolean detailed, Disc table.addCell(node == null ? "-" : node.getHostAddress()); table.addCell(node.getAddress().address().getPort()); table.addCell(node == null ? "-" : node.getName()); - table.addCell(node == null ? "-" : node.getVersion().toString()); + table.addCell(node == null ? "-" : node.getBuildVersion().toString()); table.addCell(taskInfo.headers().getOrDefault(Task.X_OPAQUE_ID_HTTP_HEADER, "-")); if (detailed) { diff --git a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java index 1167f9f779830..e11b0749dad41 100644 --- a/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/master/TransportMasterNodeActionTests.java @@ -576,7 +576,7 @@ public void testDelegateToFailingMaster() throws ExecutionException, Interrupted // simulate master restart followed by a state recovery - this will reset the cluster state version final DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(clusterService.state().nodes()); nodesBuilder.remove(masterNode); - masterNode = DiscoveryNodeUtils.create(masterNode.getId(), masterNode.getAddress(), masterNode.getVersion()); + masterNode = DiscoveryNodeUtils.create(masterNode.getId(), masterNode.getAddress(), masterNode.getVersionInformation()); nodesBuilder.add(masterNode); nodesBuilder.masterNodeId(masterNode.getId()); final ClusterState.Builder builder = ClusterState.builder(clusterService.state()).nodes(nodesBuilder); diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java index 1c8c2c7aa4c22..efb5df7d7a4fc 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java @@ -9,7 +9,6 @@ package org.elasticsearch.cluster.metadata; import org.elasticsearch.TransportVersion; -import org.elasticsearch.Version; import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.support.ActiveShardCount; @@ -98,15 +97,11 @@ public void testInvalidValues() { private static final AtomicInteger nodeIdGenerator = new AtomicInteger(); - protected DiscoveryNode createNode(Version version, DiscoveryNodeRole... mustHaveRoles) { + protected DiscoveryNode createNode(DiscoveryNodeRole... mustHaveRoles) { Set roles = new HashSet<>(randomSubsetOf(DiscoveryNodeRole.roles())); Collections.addAll(roles, mustHaveRoles); final String id = Strings.format("node_%03d", nodeIdGenerator.incrementAndGet()); - return DiscoveryNodeUtils.builder(id).name(id).roles(roles).version(version).build(); - } - - protected DiscoveryNode createNode(DiscoveryNodeRole... mustHaveRoles) { - return createNode(Version.CURRENT, mustHaveRoles); + return DiscoveryNodeUtils.builder(id).name(id).roles(roles).build(); } /** diff --git a/server/src/test/java/org/elasticsearch/cluster/node/DiscoveryNodeTests.java b/server/src/test/java/org/elasticsearch/cluster/node/DiscoveryNodeTests.java index 8924acf892d34..331b5d92ca94e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/node/DiscoveryNodeTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/node/DiscoveryNodeTests.java @@ -247,7 +247,7 @@ public void testDiscoveryNodeToString() { assertThat(toString, containsString("{" + node.getEphemeralId() + "}")); assertThat(toString, containsString("{" + node.getAddress() + "}")); assertThat(toString, containsString("{IScdfhilmrstvw}"));// roles - assertThat(toString, containsString("{" + node.getVersion() + "}")); + assertThat(toString, containsString("{" + node.getBuildVersion() + "}")); assertThat(toString, containsString("{test-attr=val}"));// attributes } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java index 02723f4c7f236..2ecd1da3c6322 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedNodeRoutingTests.java @@ -130,8 +130,7 @@ public void testRandomClusterPromotesNewestReplica() throws InterruptedException // Log the node versions (for debugging if necessary) for (DiscoveryNode discoveryNode : state.nodes().getDataNodes().values()) { - Version nodeVer = discoveryNode.getVersion(); - logger.info("--> node [{}] has version [{}]", discoveryNode.getId(), nodeVer); + logger.info("--> node [{}] has version [{}]", discoveryNode.getId(), discoveryNode.getBuildVersion()); } // randomly create some indices diff --git a/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java index 1aa53382666ef..c97160427e59d 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/document/RestIndexActionTests.java @@ -10,7 +10,6 @@ package org.elasticsearch.rest.action.document; import org.apache.lucene.util.SetOnce; -import org.elasticsearch.Version; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexResponse; @@ -55,10 +54,10 @@ public void testCreateOpTypeValidation() { } public void testAutoIdDefaultsToOptypeCreate() { - checkAutoIdOpType(Version.CURRENT, DocWriteRequest.OpType.CREATE); + checkAutoIdOpType(DocWriteRequest.OpType.CREATE); } - private void checkAutoIdOpType(Version minClusterVersion, DocWriteRequest.OpType expectedOpType) { + private void checkAutoIdOpType(DocWriteRequest.OpType expectedOpType) { SetOnce executeCalled = new SetOnce<>(); verifyingClient.setExecuteVerifier((actionType, request) -> { assertThat(request, instanceOf(IndexRequest.class)); @@ -71,9 +70,7 @@ private void checkAutoIdOpType(Version minClusterVersion, DocWriteRequest.OpType .withContent(new BytesArray("{}"), XContentType.JSON) .build(); clusterStateSupplier.set( - ClusterState.builder(ClusterName.DEFAULT) - .nodes(DiscoveryNodes.builder().add(DiscoveryNodeUtils.builder("test").version(minClusterVersion).build()).build()) - .build() + ClusterState.builder(ClusterName.DEFAULT).nodes(DiscoveryNodes.builder().add(DiscoveryNodeUtils.create("test")).build()).build() ); dispatchRequest(autoIdRequest); assertThat(executeCalled.get(), equalTo(true)); diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java index c8d66f389dab1..a1718e956800c 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java @@ -194,7 +194,7 @@ protected void submitReconcileTask(DesiredBalance desiredBalance) { protected static Set MASTER_DATA_ROLES = Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE); protected static DiscoveryNode newNode(String nodeId) { - return newNode(nodeId, (Version) null); + return DiscoveryNodeUtils.builder(nodeId).roles(MASTER_DATA_ROLES).build(); } protected static DiscoveryNode newNode(String nodeName, String nodeId, Map attributes) { diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeUtils.java b/test/framework/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeUtils.java index dba7f28db3f4c..64f8fa88762b8 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeUtils.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/node/DiscoveryNodeUtils.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.env.BuildVersion; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.node.Node; @@ -36,10 +37,15 @@ public static DiscoveryNode create(String id, TransportAddress address) { return builder(id).address(address).build(); } + @Deprecated public static DiscoveryNode create(String id, TransportAddress address, Version version) { return builder(id).address(address).version(version).build(); } + public static DiscoveryNode create(String id, TransportAddress address, VersionInformation version) { + return builder(id).address(address).version(version).build(); + } + public static DiscoveryNode create(String id, TransportAddress address, Map attributes, Set roles) { return builder(id).address(address).attributes(attributes).roles(roles).build(); } @@ -67,6 +73,7 @@ public static class Builder { private TransportAddress address; private Map attributes = Map.of(); private Set roles = DiscoveryNodeRole.roles(); + private BuildVersion buildVersion; private Version version; private IndexVersion minIndexVersion; private IndexVersion maxIndexVersion; @@ -107,19 +114,33 @@ public Builder roles(Set roles) { return this; } + @Deprecated public Builder version(Version version) { this.version = version; return this; } + @Deprecated public Builder version(Version version, IndexVersion minIndexVersion, IndexVersion maxIndexVersion) { + this.buildVersion = BuildVersion.fromVersionId(version.id()); this.version = version; this.minIndexVersion = minIndexVersion; this.maxIndexVersion = maxIndexVersion; return this; } + public Builder version(BuildVersion version, IndexVersion minIndexVersion, IndexVersion maxIndexVersion) { + // see comment in VersionInformation + assert version.equals(BuildVersion.current()); + this.buildVersion = version; + this.version = Version.CURRENT; + this.minIndexVersion = minIndexVersion; + this.maxIndexVersion = maxIndexVersion; + return this; + } + public Builder version(VersionInformation versions) { + this.buildVersion = versions.buildVersion(); this.version = versions.nodeVersion(); this.minIndexVersion = versions.minIndexVersion(); this.maxIndexVersion = versions.maxIndexVersion(); @@ -152,7 +173,7 @@ public DiscoveryNode build() { if (minIndexVersion == null || maxIndexVersion == null) { versionInfo = VersionInformation.inferVersions(version); } else { - versionInfo = new VersionInformation(version, minIndexVersion, maxIndexVersion); + versionInfo = new VersionInformation(buildVersion, version, minIndexVersion, maxIndexVersion); } return new DiscoveryNode(name, id, ephemeralId, hostName, hostAddress, address, attributes, roles, versionInfo, externalId); diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java index c8136002bbd52..1baf4282e4e77 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java @@ -216,8 +216,7 @@ public void getSnapshotInfo( if (IndexVersion.current().equals(maxIndexVersion)) { for (var node : response.nodes()) { if (node.canContainData() && node.getMaxIndexVersion().equals(maxIndexVersion)) { - // TODO: Revisit when looking into removing release version from DiscoveryNode - BuildVersion remoteVersion = BuildVersion.fromVersionId(node.getVersion().id); + BuildVersion remoteVersion = node.getBuildVersion(); if (remoteVersion.isFutureVersion()) { throw new SnapshotException( snapshot, diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/AbstractClusterStateLicenseServiceTestCase.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/AbstractClusterStateLicenseServiceTestCase.java index 27e5c1213f1f9..a3a12792df4aa 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/AbstractClusterStateLicenseServiceTestCase.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/AbstractClusterStateLicenseServiceTestCase.java @@ -85,7 +85,6 @@ protected void setInitialState(License license, XPackLicenseState licenseState, when(discoveryNodes.stream()).thenAnswer(i -> Stream.of(mockNode)); when(discoveryNodes.iterator()).thenAnswer(i -> Iterators.single(mockNode)); when(discoveryNodes.isLocalNodeElectedMaster()).thenReturn(false); - when(discoveryNodes.getMinNodeVersion()).thenReturn(mockNode.getVersion()); when(state.nodes()).thenReturn(discoveryNodes); when(state.getNodes()).thenReturn(discoveryNodes); // it is really ridiculous we have nodes() and getNodes()... when(clusterService.state()).thenReturn(state); diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/persistence/AbstractProfilingPersistenceManager.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/persistence/AbstractProfilingPersistenceManager.java index 528d6f28a7115..0c005e8472cd2 100644 --- a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/persistence/AbstractProfilingPersistenceManager.java +++ b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/persistence/AbstractProfilingPersistenceManager.java @@ -9,7 +9,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; @@ -22,7 +21,6 @@ import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateListener; -import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; @@ -73,18 +71,6 @@ public void setTemplatesEnabled(boolean templatesEnabled) { this.templatesEnabled = templatesEnabled; } - private static boolean isMixedVersionCluster(DiscoveryNodes nodes) { - Version version = null; - for (var n : nodes) { - if (version == null) { - version = n.getVersion(); - } else if (version.equals(n.getVersion()) == false) { - return true; - } - } - return false; - } - @Override public final void clusterChanged(ClusterChangedEvent event) { if (templatesEnabled == false) { @@ -100,7 +86,7 @@ public final void clusterChanged(ClusterChangedEvent event) { return; } - if (isMixedVersionCluster(event.state().nodes())) { + if (event.state().nodes().isMixedVersionCluster()) { logger.debug("Skipping up-to-date check as cluster has mixed versions"); return; } diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java index 888e858f2b039..c5c5e14934408 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/netty4/SimpleSecurityNetty4ServerTransportTests.java @@ -454,7 +454,7 @@ public void testSecurityClientAuthenticationConfigs() throws Exception { DiscoveryNode node = DiscoveryNodeUtils.create( service.getLocalNode().getId(), clientAddress, - service.getLocalNode().getVersion() + service.getLocalNode().getVersionInformation() ); try (Transport.Connection connection2 = openConnection(serviceA, node, TestProfiles.LIGHT_PROFILE)) { sslEngine = getEngineFromAcceptedChannel(originalTransport, connection2); @@ -486,7 +486,7 @@ public void testSecurityClientAuthenticationConfigs() throws Exception { DiscoveryNode node = DiscoveryNodeUtils.create( service.getLocalNode().getId(), clientAddress, - service.getLocalNode().getVersion() + service.getLocalNode().getVersionInformation() ); try (Transport.Connection connection2 = openConnection(serviceA, node, TestProfiles.LIGHT_PROFILE)) { sslEngine = getEngineFromAcceptedChannel(originalTransport, connection2); @@ -518,7 +518,7 @@ public void testSecurityClientAuthenticationConfigs() throws Exception { DiscoveryNode node = DiscoveryNodeUtils.create( service.getLocalNode().getId(), clientAddress, - service.getLocalNode().getVersion() + service.getLocalNode().getVersionInformation() ); try (Transport.Connection connection2 = openConnection(serviceA, node, TestProfiles.LIGHT_PROFILE)) { sslEngine = getEngineFromAcceptedChannel(originalTransport, connection2); @@ -562,7 +562,7 @@ public void testClientChannelUsesSeparateSslConfigurationForRemoteCluster() thro final DiscoveryNode node = DiscoveryNodeUtils.create( fcService.getLocalNode().getId(), remoteAccessAddress, - fcService.getLocalNode().getVersion() + fcService.getLocalNode().getVersionInformation() ); // 1. Connection will fail because FC server certificate is not trusted by default @@ -679,7 +679,7 @@ public void testRemoteClusterCanWorkWithoutSSL() throws Exception { final DiscoveryNode node = DiscoveryNodeUtils.create( fcService.getLocalNode().getId(), remoteAccessAddress, - fcService.getLocalNode().getVersion() + fcService.getLocalNode().getVersionInformation() ); final Settings qcSettings = Settings.builder().put("xpack.security.remote_cluster_client.ssl.enabled", "false").build(); try ( From f88a004030d57ede9f38acef85229bc59741239c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Fri, 8 Nov 2024 16:08:54 +0100 Subject: [PATCH 15/39] Remove unused EntitlementInternals (#116473) --- libs/entitlement/README.md | 4 ++++ .../api/ElasticsearchEntitlementChecker.java | 14 ----------- .../internals/EntitlementInternals.java | 24 ------------------- 3 files changed, 4 insertions(+), 38 deletions(-) delete mode 100644 libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/internals/EntitlementInternals.java diff --git a/libs/entitlement/README.md b/libs/entitlement/README.md index 76e4db0505d38..2ab76cf1c2221 100644 --- a/libs/entitlement/README.md +++ b/libs/entitlement/README.md @@ -5,3 +5,7 @@ This module implements mechanisms to grant and check permissions under the _enti The entitlements system provides an alternative to the legacy `SecurityManager` system, which is deprecated for removal. The `entitlement-agent` instruments sensitive class library methods with calls to this module, in order to enforce the controls. +This feature is currently under development, and it is completely disabled by default (the agent is not loaded). To enable it, run Elasticsearch with +```shell +./gradlew run --entitlements +``` diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java index 330205997d21c..6d5dbd4098aa9 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java @@ -15,8 +15,6 @@ import java.util.Optional; -import static org.elasticsearch.entitlement.runtime.internals.EntitlementInternals.isActive; - /** * Implementation of the {@link EntitlementChecker} interface, providing additional * API methods for managing the checks. @@ -25,13 +23,6 @@ public class ElasticsearchEntitlementChecker implements EntitlementChecker { private static final Logger logger = LogManager.getLogger(ElasticsearchEntitlementChecker.class); - /** - * Causes entitlements to be enforced. - */ - public void activate() { - isActive = true; - } - @Override public void checkSystemExit(Class callerClass, int status) { var requestingModule = requestingModule(callerClass); @@ -66,10 +57,6 @@ private static Module requestingModule(Class callerClass) { } private static boolean isTriviallyAllowed(Module requestingModule) { - if (isActive == false) { - logger.debug("Trivially allowed: entitlements are inactive"); - return true; - } if (requestingModule == null) { logger.debug("Trivially allowed: Entire call stack is in the boot module layer"); return true; @@ -81,5 +68,4 @@ private static boolean isTriviallyAllowed(Module requestingModule) { logger.trace("Not trivially allowed"); return false; } - } diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/internals/EntitlementInternals.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/internals/EntitlementInternals.java deleted file mode 100644 index ea83caf198b0a..0000000000000 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/internals/EntitlementInternals.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.entitlement.runtime.internals; - -/** - * Don't export this from the module. Just don't. - */ -public class EntitlementInternals { - /** - * When false, entitlement rules are not enforced; all operations are allowed. - */ - public static volatile boolean isActive = false; - - public static void reset() { - isActive = false; - } -} From 7b79a5230874d273268d93706d8f953981374952 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 8 Nov 2024 15:53:03 +0000 Subject: [PATCH 16/39] Allow missing semantic text field in bulk updates (#116478) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This update enables bulk update operations to succeed even if the semantic text field is absent in the partial update. For the simple case where the field isn’t referenced by a copy_to operation from another source, the inference can be safely bypassed, allowing the update to proceed without errors. --- docs/changelog/116478.yaml | 5 ++ .../xpack/inference/InferenceFeatures.java | 5 +- .../ShardBulkInferenceActionFilter.java | 3 +- .../mapper/SemanticTextFieldMapper.java | 2 + .../60_semantic_text_inference_update.yml | 56 +++++++++++++++++++ 5 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/116478.yaml diff --git a/docs/changelog/116478.yaml b/docs/changelog/116478.yaml new file mode 100644 index 0000000000000..ec50799eb2019 --- /dev/null +++ b/docs/changelog/116478.yaml @@ -0,0 +1,5 @@ +pr: 116478 +summary: Semantic text simple partial update +area: Search +type: bug +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 216b5c984eca5..10ffedef14e26 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -38,6 +38,9 @@ public Set getFeatures() { @Override public Set getTestFeatures() { - return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX); + return Set.of( + SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX, + SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX + ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index 1a6e4760fe125..b3bbe3a7df9bc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -446,7 +446,8 @@ private Map> createFieldInferenceRequests(Bu String field = entry.getName(); String inferenceId = entry.getInferenceId(); var originalFieldValue = XContentMapValues.extractValue(field, docMap); - if (originalFieldValue instanceof Map) { + if (originalFieldValue instanceof Map || (originalFieldValue == null && entry.getSourceFields().length == 1)) { + // Inference has already been computed, or there is no inference required. continue; } int order = 0; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 4c07516051287..f0cb612c9082f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -91,6 +91,8 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2"); public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); + public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); + public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml index 59ce439d954a2..294761608ee81 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/60_semantic_text_inference_update.yml @@ -610,3 +610,59 @@ setup: - exists: _source.dense_field.inference.chunks.0.embeddings - match: { _source.dense_field.inference.chunks.0.text: "another updated inference test" } - match: { _source.non_inference_field: "updated non inference test" } + +--- +"Bypass inference on bulk update operation": + - requires: + cluster_features: semantic_text.single_field_update_fix + reason: Standalone semantic text fields are now optional in a bulk update operation + + # Update as upsert + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "sparse_field": "inference test", "dense_field": "another inference test", "non_inference_field": "non inference test" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "created" } + + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "non_inference_field": "another value" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "updated" } + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field.text: "inference test" } + - exists: _source.sparse_field.inference.chunks.0.embeddings + - match: { _source.sparse_field.inference.chunks.0.text: "inference test" } + - match: { _source.dense_field.text: "another inference test" } + - exists: _source.dense_field.inference.chunks.0.embeddings + - match: { _source.dense_field.inference.chunks.0.text: "another inference test" } + - match: { _source.non_inference_field: "another value" } + + - do: + bulk: + body: + - '{"update": {"_index": "test-index", "_id": "doc_1"}}' + - '{"doc": { "sparse_field": null, "dense_field": null, "non_inference_field": "updated value" }, "doc_as_upsert": true}' + + - match: { errors: false } + - match: { items.0.update.result: "updated" } + + - do: + get: + index: test-index + id: doc_1 + + - match: { _source.sparse_field: null } + - match: { _source.dense_field: null } + - match: { _source.non_inference_field: "updated value" } From 1da6ea4c54d50161d48597cf71ef57faca304193 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 8 Nov 2024 17:28:49 +0100 Subject: [PATCH 17/39] ESQL: Fix EsqlNodeSubclassTests (#116496) - Fix EsqlNodeSubclassTests so that they also test the Node subclasses in esql.core. - Fix the re-enabled tests for Field-/Reference-/MetadataAttribute by using the longest public c'tor in their info() method implementations. --- .../esql/core/expression/FieldAttribute.java | 13 +----------- .../core/expression/MetadataAttribute.java | 21 +------------------ .../core/expression/ReferenceAttribute.java | 19 +---------------- .../esql/tree/EsqlNodeSubclassTests.java | 21 +++++++------------ 4 files changed, 10 insertions(+), 64 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java index 4076acdb7e7b8..d7ae438bc3189 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java @@ -174,18 +174,7 @@ public String getWriteableName() { @Override protected NodeInfo info() { - return NodeInfo.create( - this, - FieldAttribute::new, - parentName, - name(), - dataType(), - field, - (String) null, - nullable(), - id(), - synthetic() - ); + return NodeInfo.create(this, FieldAttribute::new, parentName, name(), field, nullable(), id(), synthetic()); } public String parentName() { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java index 3641812cd6cad..6e4e9292bfc99 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java @@ -147,26 +147,7 @@ protected String label() { @Override protected NodeInfo info() { - return NodeInfo.create( - this, - (source, name, dataType, qualifier, nullability, id, synthetic, searchable1) -> new MetadataAttribute( - source, - name, - dataType, - qualifier, - nullability, - id, - synthetic, - searchable1 - ), - name(), - dataType(), - (String) null, - nullable(), - id(), - synthetic(), - searchable - ); + return NodeInfo.create(this, MetadataAttribute::new, name(), dataType(), nullable(), id(), synthetic(), searchable); } public boolean searchable() { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java index 3626c5d26f235..404cd75edd5e4 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/ReferenceAttribute.java @@ -110,24 +110,7 @@ protected Attribute clone(Source source, String name, DataType dataType, Nullabi @Override protected NodeInfo info() { - return NodeInfo.create( - this, - (source, name, dataType, qualifier, nullability, id, synthetic) -> new ReferenceAttribute( - source, - name, - dataType, - qualifier, - nullability, - id, - synthetic - ), - name(), - dataType(), - (String) null, - nullable(), - id(), - synthetic() - ); + return NodeInfo.create(this, ReferenceAttribute::new, name(), dataType(), nullable(), id(), synthetic()); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java index b8a64be5dfd35..82f0ebf316508 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java @@ -21,8 +21,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttributeTests; import org.elasticsearch.xpack.esql.core.expression.UnresolvedNamedExpression; @@ -114,9 +112,13 @@ * */ public class EsqlNodeSubclassTests> extends NodeSubclassTests { + private static final String ESQL_CORE_CLASS_PREFIX = "org.elasticsearch.xpack.esql.core"; + private static final String ESQL_CORE_JAR_LOCATION_SUBSTRING = "x-pack-esql-core"; + private static final String ESQL_CLASS_PREFIX = "org.elasticsearch.xpack.esql"; + private static final Predicate CLASSNAME_FILTER = className -> { - boolean esqlCore = className.startsWith("org.elasticsearch.xpack.esql.core") != false; - boolean esqlProper = className.startsWith("org.elasticsearch.xpack.esql") != false; + boolean esqlCore = className.startsWith(ESQL_CORE_CLASS_PREFIX) != false; + boolean esqlProper = className.startsWith(ESQL_CLASS_PREFIX) != false; return (esqlCore || esqlProper); }; @@ -164,15 +166,6 @@ public void testInfoParameters() throws Exception { */ expectedCount -= 1; - // special exceptions with private constructors - if (MetadataAttribute.class.equals(subclass) || ReferenceAttribute.class.equals(subclass)) { - expectedCount++; - } - - if (FieldAttribute.class.equals(subclass)) { - expectedCount += 2; - } - assertEquals(expectedCount, info(node).properties().size()); } @@ -736,7 +729,7 @@ public static Set> subclassesOf(Class clazz, Predicate // NIO FileSystem API is not used since it trips the SecurityManager // https://bugs.openjdk.java.net/browse/JDK-8160798 // so iterate the jar "by hand" - if (path.endsWith(".jar") && path.contains("x-pack-ql")) { + if (path.endsWith(".jar") && path.contains(ESQL_CORE_JAR_LOCATION_SUBSTRING)) { try (JarInputStream jar = jarStream(root)) { JarEntry je = null; while ((je = jar.getNextJarEntry()) != null) { From b285204fdee1ba831b85a02061dbee90858a170c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 8 Nov 2024 12:40:53 -0500 Subject: [PATCH 18/39] DOCS: Correct explanation of percentiles_bucket (#116499) Corrects the explanation of `percentiles_bucket` so it's clear that it returns the `nth` largest item always, and it rounds `n` towards infinity. That's how it's worked since 2016 but the docs talked about "not greater than" which I don't think is particularly clear. --- .../pipeline/percentiles-bucket-aggregation.asciidoc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc index 658470c8d5a4e..d5bd868258081 100644 --- a/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc @@ -127,10 +127,11 @@ And the following may be the response: ==== Percentiles_bucket implementation -The Percentile Bucket returns the nearest input data point that is not greater than the requested percentile; it does not -interpolate between data points. - The percentiles are calculated exactly and is not an approximation (unlike the Percentiles Metric). This means the implementation maintains an in-memory, sorted list of your data to compute the percentiles, before discarding the data. You may run into memory pressure issues if you attempt to calculate percentiles over many millions of data-points in a single `percentiles_bucket`. + +The Percentile Bucket returns the nearest input data point to the requested percentile, rounding indices toward +positive infinity; it does not interpolate between data points. For example, if there are eight data points and +you request the `50%th` percentile, it will return the `4th` item because `ROUND_UP(.50 * (8-1))` is `4`. From 8b3507138634906c293c2a018e306bcddb6d766c Mon Sep 17 00:00:00 2001 From: Mark Vieira Date: Fri, 8 Nov 2024 09:45:25 -0800 Subject: [PATCH 19/39] Add SLES 15.6 to docker linux exclusions list (#116506) --- .ci/dockerOnLinuxExclusions | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/dockerOnLinuxExclusions b/.ci/dockerOnLinuxExclusions index c150cca590f7d..715ed86188dd5 100644 --- a/.ci/dockerOnLinuxExclusions +++ b/.ci/dockerOnLinuxExclusions @@ -15,6 +15,7 @@ sles-15.2 sles-15.3 sles-15.4 sles-15.5 +sles-15.6 # These OSes are deprecated and filtered starting with 8.0.0, but need to be excluded # for PR checks From af99654dac95e55bc44b9bfe71393f5d8b48b0ef Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Fri, 8 Nov 2024 11:59:32 -0600 Subject: [PATCH 20/39] Add a monitor_stats privilege and allow that privilege for remote cluster privileges (#114964) This commit does the following: * Add a new monitor_stats privilege * Ensure that monitor_stats can be set in the remote_cluster privileges * Give's Kibana the ability to remotely call monitor_stats via RCS 2.0 Since this is the first case where there is more than 1 remote_cluster privilege, the following framework concern has been added: * Ensure that when sending to elder RCS 2.0 clusters that we don't send the new privilege previous only supported all or nothing remote_cluster blocks * Ensure that we when sending API key role descriptors that contains remote_cluster, we don't send the new privileges for RCS 1.0/2.0 if it not new enough * Fix and extend the BWC tests for RCS 1.0 and RCS 2.0 --- docs/changelog/114964.yaml | 6 + .../security/bulk-create-roles.asciidoc | 2 +- .../security/get-builtin-privileges.asciidoc | 4 +- .../org/elasticsearch/TransportVersions.java | 1 + x-pack/plugin/build.gradle | 1 + .../user/GetUserPrivilegesResponse.java | 2 +- .../core/security/authc/Authentication.java | 111 +++++++- .../core/security/authz/RoleDescriptor.java | 34 ++- .../RemoteClusterPermissionGroup.java | 39 ++- .../permission/RemoteClusterPermissions.java | 132 +++++++-- .../core/security/authz/permission/Role.java | 2 +- .../security/authz/permission/SimpleRole.java | 4 +- .../privilege/ClusterPrivilegeResolver.java | 12 +- .../KibanaOwnedReservedRoleDescriptors.java | 13 +- ...usterApiKeyRoleDescriptorBuilderTests.java | 45 ++- .../action/role/PutRoleRequestTests.java | 2 +- .../security/authc/AuthenticationTests.java | 86 +++++- .../security/authz/RoleDescriptorTests.java | 28 ++ .../RemoteClusterPermissionGroupTests.java | 15 +- .../RemoteClusterPermissionsTests.java | 141 ++++++++-- .../authz/store/ReservedRolesStoreTests.java | 2 +- .../security/qa/multi-cluster/build.gradle | 8 +- ...stractRemoteClusterSecurityBWCRestIT.java} | 168 +++++------ ...ClusterSecurityBWCToRCS1ClusterRestIT.java | 69 +++++ ...ClusterSecurityBWCToRCS2ClusterRestIT.java | 90 ++++++ .../RemoteClusterSecurityRestStatsIT.java | 266 ++++++++++++++++++ .../xpack/security/apikey/ApiKeyRestIT.java | 4 +- .../authz/store/CompositeRolesStore.java | 2 +- .../authz/store/RoleDescriptorStore.java | 2 +- .../xpack/security/authz/RBACEngineTests.java | 50 ++-- .../authz/store/CompositeRolesStoreTests.java | 10 +- .../authz/store/FileRolesStoreTests.java | 3 +- .../RestGetUserPrivilegesActionTests.java | 4 +- .../test/privileges/11_builtin.yml | 2 +- 34 files changed, 1156 insertions(+), 204 deletions(-) create mode 100644 docs/changelog/114964.yaml rename x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/{RemoteClusterSecurityBwcRestIT.java => AbstractRemoteClusterSecurityBWCRestIT.java} (65%) create mode 100644 x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS1ClusterRestIT.java create mode 100644 x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS2ClusterRestIT.java create mode 100644 x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityRestStatsIT.java diff --git a/docs/changelog/114964.yaml b/docs/changelog/114964.yaml new file mode 100644 index 0000000000000..8274aeb76a937 --- /dev/null +++ b/docs/changelog/114964.yaml @@ -0,0 +1,6 @@ +pr: 114964 +summary: Add a `monitor_stats` privilege and allow that privilege for remote cluster + privileges +area: Authorization +type: enhancement +issues: [] diff --git a/docs/reference/rest-api/security/bulk-create-roles.asciidoc b/docs/reference/rest-api/security/bulk-create-roles.asciidoc index a1fe998c08146..a198f49383907 100644 --- a/docs/reference/rest-api/security/bulk-create-roles.asciidoc +++ b/docs/reference/rest-api/security/bulk-create-roles.asciidoc @@ -327,7 +327,7 @@ The result would then have the `errors` field set to `true` and hold the error f "details": { "my_admin_role": { <4> "type": "action_request_validation_exception", - "reason": "Validation Failed: 1: unknown cluster privilege [bad_cluster_privilege]. a privilege must be either one of the predefined cluster privilege names [manage_own_api_key,manage_data_stream_global_retention,monitor_data_stream_global_retention,none,cancel_task,cross_cluster_replication,cross_cluster_search,delegate_pki,grant_api_key,manage_autoscaling,manage_index_templates,manage_logstash_pipelines,manage_oidc,manage_saml,manage_search_application,manage_search_query_rules,manage_search_synonyms,manage_service_account,manage_token,manage_user_profile,monitor_connector,monitor_enrich,monitor_inference,monitor_ml,monitor_rollup,monitor_snapshot,monitor_text_structure,monitor_watcher,post_behavioral_analytics_event,read_ccr,read_connector_secrets,read_fleet_secrets,read_ilm,read_pipeline,read_security,read_slm,transport_client,write_connector_secrets,write_fleet_secrets,create_snapshot,manage_behavioral_analytics,manage_ccr,manage_connector,manage_enrich,manage_ilm,manage_inference,manage_ml,manage_rollup,manage_slm,manage_watcher,monitor_data_frame_transforms,monitor_transform,manage_api_key,manage_ingest_pipelines,manage_pipeline,manage_data_frame_transforms,manage_transform,manage_security,monitor,manage,all] or a pattern over one of the available cluster actions;" + "reason": "Validation Failed: 1: unknown cluster privilege [bad_cluster_privilege]. a privilege must be either one of the predefined cluster privilege names [manage_own_api_key,manage_data_stream_global_retention,monitor_data_stream_global_retention,none,cancel_task,cross_cluster_replication,cross_cluster_search,delegate_pki,grant_api_key,manage_autoscaling,manage_index_templates,manage_logstash_pipelines,manage_oidc,manage_saml,manage_search_application,manage_search_query_rules,manage_search_synonyms,manage_service_account,manage_token,manage_user_profile,monitor_connector,monitor_enrich,monitor_inference,monitor_ml,monitor_rollup,monitor_snapshot,monitor_stats,monitor_text_structure,monitor_watcher,post_behavioral_analytics_event,read_ccr,read_connector_secrets,read_fleet_secrets,read_ilm,read_pipeline,read_security,read_slm,transport_client,write_connector_secrets,write_fleet_secrets,create_snapshot,manage_behavioral_analytics,manage_ccr,manage_connector,manage_enrich,manage_ilm,manage_inference,manage_ml,manage_rollup,manage_slm,manage_watcher,monitor_data_frame_transforms,monitor_transform,manage_api_key,manage_ingest_pipelines,manage_pipeline,manage_data_frame_transforms,manage_transform,manage_security,monitor,manage,all] or a pattern over one of the available cluster actions;" } } } diff --git a/docs/reference/rest-api/security/get-builtin-privileges.asciidoc b/docs/reference/rest-api/security/get-builtin-privileges.asciidoc index 8435f5539ab9d..7f3d75b926780 100644 --- a/docs/reference/rest-api/security/get-builtin-privileges.asciidoc +++ b/docs/reference/rest-api/security/get-builtin-privileges.asciidoc @@ -111,6 +111,7 @@ A successful call returns an object with "cluster", "index", and "remote_cluster "monitor_ml", "monitor_rollup", "monitor_snapshot", + "monitor_stats", "monitor_text_structure", "monitor_transform", "monitor_watcher", @@ -152,7 +153,8 @@ A successful call returns an object with "cluster", "index", and "remote_cluster "write" ], "remote_cluster" : [ - "monitor_enrich" + "monitor_enrich", + "monitor_stats" ] } -------------------------------------------------- diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 4edeacfa754c5..3134eb4966115 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -189,6 +189,7 @@ static TransportVersion def(int id) { public static final TransportVersion LOGSDB_TELEMETRY = def(8_784_00_0); public static final TransportVersion LOGSDB_TELEMETRY_STATS = def(8_785_00_0); public static final TransportVersion KQL_QUERY_ADDED = def(8_786_00_0); + public static final TransportVersion ROLE_MONITOR_STATS = def(8_787_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 8b920ac11cee7..193a82436f26a 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -88,5 +88,6 @@ tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version", "The number of functions is constantly increasing") task.skipTest("esql/80_text/reverse text", "The output type changed from TEXT to KEYWORD.") task.skipTest("esql/80_text/values function", "The output type changed from TEXT to KEYWORD.") + task.skipTest("privileges/11_builtin/Test get builtin privileges" ,"unnecessary to test compatibility") }) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/GetUserPrivilegesResponse.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/GetUserPrivilegesResponse.java index de351cd59c690..763ab6ccb9886 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/GetUserPrivilegesResponse.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/GetUserPrivilegesResponse.java @@ -115,7 +115,7 @@ public boolean hasRemoteIndicesPrivileges() { } public boolean hasRemoteClusterPrivileges() { - return remoteClusterPermissions.hasPrivileges(); + return remoteClusterPermissions.hasAnyPrivileges(); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Authentication.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Authentication.java index 04dda75692208..c2f40a3e393b9 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Authentication.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Authentication.java @@ -36,6 +36,7 @@ import org.elasticsearch.xpack.core.security.authc.service.ServiceAccountSettings; import org.elasticsearch.xpack.core.security.authc.support.AuthenticationContextSerializer; import org.elasticsearch.xpack.core.security.authz.RoleDescriptor; +import org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions; import org.elasticsearch.xpack.core.security.user.AnonymousUser; import org.elasticsearch.xpack.core.security.user.InternalUser; import org.elasticsearch.xpack.core.security.user.InternalUsers; @@ -76,6 +77,7 @@ import static org.elasticsearch.xpack.core.security.authc.AuthenticationField.FALLBACK_REALM_NAME; import static org.elasticsearch.xpack.core.security.authc.AuthenticationField.FALLBACK_REALM_TYPE; import static org.elasticsearch.xpack.core.security.authc.RealmDomain.REALM_DOMAIN_PARSER; +import static org.elasticsearch.xpack.core.security.authz.RoleDescriptor.Fields.REMOTE_CLUSTER; import static org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions.ROLE_REMOTE_CLUSTER_PRIVS; /** @@ -233,8 +235,8 @@ public Authentication maybeRewriteForOlderVersion(TransportVersion olderVersion) + "]" ); } - final Map newMetadata = maybeRewriteMetadata(olderVersion, this); + final Authentication newAuthentication; if (isRunAs()) { // The lookup user for run-as currently doesn't have authentication metadata associated with them because @@ -272,12 +274,23 @@ public Authentication maybeRewriteForOlderVersion(TransportVersion olderVersion) } private static Map maybeRewriteMetadata(TransportVersion olderVersion, Authentication authentication) { - if (authentication.isAuthenticatedAsApiKey()) { - return maybeRewriteMetadataForApiKeyRoleDescriptors(olderVersion, authentication); - } else if (authentication.isCrossClusterAccess()) { - return maybeRewriteMetadataForCrossClusterAccessAuthentication(olderVersion, authentication); - } else { - return authentication.getAuthenticatingSubject().getMetadata(); + try { + if (authentication.isAuthenticatedAsApiKey()) { + return maybeRewriteMetadataForApiKeyRoleDescriptors(olderVersion, authentication); + } else if (authentication.isCrossClusterAccess()) { + return maybeRewriteMetadataForCrossClusterAccessAuthentication(olderVersion, authentication); + } else { + return authentication.getAuthenticatingSubject().getMetadata(); + } + } catch (Exception e) { + // CCS workflows may swallow the exception message making this difficult to troubleshoot, so we explicitly log and re-throw + // here. It may result in duplicate logs, so we only log the message at warn level. + if (logger.isDebugEnabled()) { + logger.debug("Un-expected exception thrown while rewriting metadata. This is likely a bug.", e); + } else { + logger.warn("Un-expected exception thrown while rewriting metadata. This is likely a bug [" + e.getMessage() + "]"); + } + throw e; } } @@ -1323,6 +1336,7 @@ private static Map maybeRewriteMetadataForApiKeyRoleDescriptors( if (authentication.getEffectiveSubject().getTransportVersion().onOrAfter(ROLE_REMOTE_CLUSTER_PRIVS) && streamVersion.before(ROLE_REMOTE_CLUSTER_PRIVS)) { + // the authentication understands the remote_cluster field but the stream does not metadata = new HashMap<>(metadata); metadata.put( AuthenticationField.API_KEY_ROLE_DESCRIPTORS_KEY, @@ -1336,7 +1350,26 @@ private static Map maybeRewriteMetadataForApiKeyRoleDescriptors( (BytesReference) metadata.get(AuthenticationField.API_KEY_LIMITED_ROLE_DESCRIPTORS_KEY) ) ); - } + } else if (authentication.getEffectiveSubject().getTransportVersion().onOrAfter(ROLE_REMOTE_CLUSTER_PRIVS) + && streamVersion.onOrAfter(ROLE_REMOTE_CLUSTER_PRIVS)) { + // both the authentication object and the stream understand the remote_cluster field + // check each individual permission and remove as needed + metadata = new HashMap<>(metadata); + metadata.put( + AuthenticationField.API_KEY_ROLE_DESCRIPTORS_KEY, + maybeRemoveRemoteClusterPrivilegesFromRoleDescriptors( + (BytesReference) metadata.get(AuthenticationField.API_KEY_ROLE_DESCRIPTORS_KEY), + streamVersion + ) + ); + metadata.put( + AuthenticationField.API_KEY_LIMITED_ROLE_DESCRIPTORS_KEY, + maybeRemoveRemoteClusterPrivilegesFromRoleDescriptors( + (BytesReference) metadata.get(AuthenticationField.API_KEY_LIMITED_ROLE_DESCRIPTORS_KEY), + streamVersion + ) + ); + } if (authentication.getEffectiveSubject().getTransportVersion().onOrAfter(VERSION_API_KEY_ROLES_AS_BYTES) && streamVersion.before(VERSION_API_KEY_ROLES_AS_BYTES)) { @@ -1417,7 +1450,7 @@ private static BytesReference convertRoleDescriptorsMapToBytes(Map roleDescriptorsMap = convertRoleDescriptorsBytesToMap(roleDescriptorsBytes); + final Map roleDescriptorsMapMutated = new HashMap<>(roleDescriptorsMap); + final AtomicBoolean modified = new AtomicBoolean(false); + roleDescriptorsMap.forEach((key, value) -> { + if (value instanceof Map) { + Map roleDescriptor = (Map) value; + roleDescriptor.forEach((innerKey, innerValue) -> { + // example: remote_cluster=[{privileges=[monitor_enrich, monitor_stats] + if (REMOTE_CLUSTER.getPreferredName().equals(innerKey)) { + assert innerValue instanceof List; + RemoteClusterPermissions discoveredRemoteClusterPermission = new RemoteClusterPermissions( + (List>>) innerValue + ); + RemoteClusterPermissions mutated = discoveredRemoteClusterPermission.removeUnsupportedPrivileges(outboundVersion); + if (mutated.equals(discoveredRemoteClusterPermission) == false) { + // swap out the old value with the new value + modified.set(true); + Map remoteClusterMap = new HashMap<>((Map) roleDescriptorsMapMutated.get(key)); + if (mutated.hasAnyPrivileges()) { + // has at least one group with privileges + remoteClusterMap.put(innerKey, mutated.toMap()); + } else { + // has no groups with privileges + remoteClusterMap.remove(innerKey); + } + roleDescriptorsMapMutated.put(key, remoteClusterMap); + } + } + }); + } + }); + if (modified.get()) { + logger.debug( + "mutated role descriptors. Changed from {} to {} for outbound version {}", + roleDescriptorsMap, + roleDescriptorsMapMutated, + outboundVersion + ); + return convertRoleDescriptorsMapToBytes(roleDescriptorsMapMutated); + } else { + // No need to serialize if we did not change anything. + logger.trace("no change to role descriptors {} for outbound version {}", roleDescriptorsMap, outboundVersion); + return roleDescriptorsBytes; + } + } + static boolean equivalentRealms(String name1, String type1, String name2, String type2) { if (false == type1.equals(type2)) { return false; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java index 8d069caf0496f..9f5aaa8562a88 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java @@ -6,6 +6,8 @@ */ package org.elasticsearch.xpack.core.security.authz; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.TransportVersion; @@ -62,6 +64,7 @@ public class RoleDescriptor implements ToXContentObject, Writeable { public static final TransportVersion SECURITY_ROLE_DESCRIPTION = TransportVersions.V_8_15_0; public static final String ROLE_TYPE = "role"; + private static final Logger logger = LogManager.getLogger(RoleDescriptor.class); private final String name; private final String[] clusterPrivileges; @@ -191,7 +194,7 @@ public RoleDescriptor( ? Collections.unmodifiableMap(transientMetadata) : Collections.singletonMap("enabled", true); this.remoteIndicesPrivileges = remoteIndicesPrivileges != null ? remoteIndicesPrivileges : RemoteIndicesPrivileges.NONE; - this.remoteClusterPermissions = remoteClusterPermissions != null && remoteClusterPermissions.hasPrivileges() + this.remoteClusterPermissions = remoteClusterPermissions != null && remoteClusterPermissions.hasAnyPrivileges() ? remoteClusterPermissions : RemoteClusterPermissions.NONE; this.restriction = restriction != null ? restriction : Restriction.NONE; @@ -263,7 +266,7 @@ public boolean hasRemoteIndicesPrivileges() { } public boolean hasRemoteClusterPermissions() { - return remoteClusterPermissions.hasPrivileges(); + return remoteClusterPermissions.hasAnyPrivileges(); } public RemoteClusterPermissions getRemoteClusterPermissions() { @@ -830,25 +833,32 @@ private static RemoteClusterPermissions parseRemoteCluster(final String roleName currentFieldName = parser.currentName(); } else if (Fields.PRIVILEGES.match(currentFieldName, parser.getDeprecationHandler())) { privileges = readStringArray(roleName, parser, false); - if (privileges.length != 1 - || RemoteClusterPermissions.getSupportedRemoteClusterPermissions() - .contains(privileges[0].trim().toLowerCase(Locale.ROOT)) == false) { - throw new ElasticsearchParseException( - "failed to parse remote_cluster for role [{}]. " - + RemoteClusterPermissions.getSupportedRemoteClusterPermissions() - + " is the only value allowed for [{}] within [remote_cluster]", + if (Arrays.stream(privileges) + .map(s -> s.toLowerCase(Locale.ROOT).trim()) + .allMatch(RemoteClusterPermissions.getSupportedRemoteClusterPermissions()::contains) == false) { + final String message = String.format( + Locale.ROOT, + "failed to parse remote_cluster for role [%s]. " + + "%s are the only values allowed for [%s] within [remote_cluster]. Found %s", roleName, - currentFieldName + RemoteClusterPermissions.getSupportedRemoteClusterPermissions(), + currentFieldName, + Arrays.toString(privileges) ); + logger.info(message); + throw new ElasticsearchParseException(message); } } else if (Fields.CLUSTERS.match(currentFieldName, parser.getDeprecationHandler())) { clusters = readStringArray(roleName, parser, false); } else { - throw new ElasticsearchParseException( - "failed to parse remote_cluster for role [{}]. unexpected field [{}]", + final String message = String.format( + Locale.ROOT, + "failed to parse remote_cluster for role [%s]. unexpected field [%s]", roleName, currentFieldName ); + logger.info(message); + throw new ElasticsearchParseException(message); } } if (privileges != null && clusters == null) { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroup.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroup.java index 1c34a7829fcbb..ec245fae28612 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroup.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroup.java @@ -13,11 +13,15 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xpack.core.security.authz.RoleDescriptor; import org.elasticsearch.xpack.core.security.support.StringMatcher; import java.io.IOException; import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.core.security.authz.RoleDescriptor.Fields.CLUSTERS; +import static org.elasticsearch.xpack.core.security.authz.RoleDescriptor.Fields.PRIVILEGES; /** * Represents a group of permissions for a remote cluster. For example: @@ -41,6 +45,14 @@ public RemoteClusterPermissionGroup(StreamInput in) throws IOException { remoteClusterAliasMatcher = StringMatcher.of(remoteClusterAliases); } + public RemoteClusterPermissionGroup(Map> remoteClusterGroup) { + assert remoteClusterGroup.get(PRIVILEGES.getPreferredName()) != null : "privileges must be non-null"; + assert remoteClusterGroup.get(CLUSTERS.getPreferredName()) != null : "clusters must be non-null"; + clusterPrivileges = remoteClusterGroup.get(PRIVILEGES.getPreferredName()).toArray(new String[0]); + remoteClusterAliases = remoteClusterGroup.get(CLUSTERS.getPreferredName()).toArray(new String[0]); + remoteClusterAliasMatcher = StringMatcher.of(remoteClusterAliases); + } + /** * @param clusterPrivileges The list of cluster privileges that are allowed for the remote cluster. must not be null or empty. * @param remoteClusterAliases The list of remote clusters that the privileges apply to. must not be null or empty. @@ -53,10 +65,14 @@ public RemoteClusterPermissionGroup(String[] clusterPrivileges, String[] remoteC throw new IllegalArgumentException("remote cluster groups must not be null or empty"); } if (Arrays.stream(clusterPrivileges).anyMatch(s -> Strings.hasText(s) == false)) { - throw new IllegalArgumentException("remote_cluster privileges must contain valid non-empty, non-null values"); + throw new IllegalArgumentException( + "remote_cluster privileges must contain valid non-empty, non-null values " + Arrays.toString(clusterPrivileges) + ); } if (Arrays.stream(remoteClusterAliases).anyMatch(s -> Strings.hasText(s) == false)) { - throw new IllegalArgumentException("remote_cluster clusters aliases must contain valid non-empty, non-null values"); + throw new IllegalArgumentException( + "remote_cluster clusters aliases must contain valid non-empty, non-null values " + Arrays.toString(remoteClusterAliases) + ); } this.clusterPrivileges = clusterPrivileges; @@ -86,11 +102,24 @@ public String[] remoteClusterAliases() { return Arrays.copyOf(remoteClusterAliases, remoteClusterAliases.length); } + /** + * Converts the group to a map representation. + * @return A map representation of the group. + */ + public Map> toMap() { + return Map.of( + PRIVILEGES.getPreferredName(), + Arrays.asList(clusterPrivileges), + CLUSTERS.getPreferredName(), + Arrays.asList(remoteClusterAliases) + ); + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.array(RoleDescriptor.Fields.PRIVILEGES.getPreferredName(), clusterPrivileges); - builder.array(RoleDescriptor.Fields.CLUSTERS.getPreferredName(), remoteClusterAliases); + builder.array(PRIVILEGES.getPreferredName(), clusterPrivileges); + builder.array(CLUSTERS.getPreferredName(), remoteClusterAliases); builder.endObject(); return builder; } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissions.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissions.java index 0d8880c33720b..1928cf117dde3 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissions.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissions.java @@ -29,13 +29,19 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.TreeSet; import java.util.stream.Collectors; +import static org.elasticsearch.TransportVersions.ROLE_MONITOR_STATS; + /** * Represents the set of permissions for remote clusters. This is intended to be the model for both the {@link RoleDescriptor} - * and {@link Role}. This model is not intended to be sent to a remote cluster, but can be (wire) serialized within a single cluster - * as well as the Xcontent serialization for the REST API and persistence of the role in the security index. The privileges modeled here - * will be converted to the appropriate cluster privileges when sent to a remote cluster. + * and {@link Role}. This model is intended to be converted to local cluster permissions + * {@link #collapseAndRemoveUnsupportedPrivileges(String, TransportVersion)} before sent to the remote cluster. This model also be included + * in the role descriptors for (normal) API keys sent between nodes/clusters. In both cases the outbound transport version can be used to + * remove permissions that are not available to older nodes or clusters. The methods {@link #removeUnsupportedPrivileges(TransportVersion)} + * and {@link #collapseAndRemoveUnsupportedPrivileges(String, TransportVersion)} are used to aid in ensuring correct privileges per + * transport version. * For example, on the local/querying cluster this model represents the following: * * "remote_cluster" : [ @@ -49,15 +55,18 @@ * } * ] * - * when sent to the remote cluster "clusterA", the privileges will be converted to the appropriate cluster privileges. For example: + * (RCS 2.0) when sent to the remote cluster "clusterA", the privileges will be converted to the appropriate cluster privileges. + * For example: * * "cluster": ["foo"] * - * and when sent to the remote cluster "clusterB", the privileges will be converted to the appropriate cluster privileges. For example: + * and (RCS 2.0) when sent to the remote cluster "clusterB", the privileges will be converted to the appropriate cluster privileges. + * For example: * * "cluster": ["bar"] * - * If the remote cluster does not support the privilege, as determined by the remote cluster version, the privilege will be not be sent. + * For normal API keys and their role descriptors :If the remote cluster does not support the privilege, the privilege will be not be sent. + * Upstream code performs the removal, but this class owns the business logic for how to remove per outbound version. */ public class RemoteClusterPermissions implements NamedWriteable, ToXContentObject { @@ -70,19 +79,33 @@ public class RemoteClusterPermissions implements NamedWriteable, ToXContentObjec // package private non-final for testing static Map> allowedRemoteClusterPermissions = Map.of( ROLE_REMOTE_CLUSTER_PRIVS, - Set.of(ClusterPrivilegeResolver.MONITOR_ENRICH.name()) + Set.of(ClusterPrivilegeResolver.MONITOR_ENRICH.name()), + ROLE_MONITOR_STATS, + Set.of(ClusterPrivilegeResolver.MONITOR_STATS.name()) ); + static final TransportVersion lastTransportVersionPermission = allowedRemoteClusterPermissions.keySet() + .stream() + .max(TransportVersion::compareTo) + .orElseThrow(); public static final RemoteClusterPermissions NONE = new RemoteClusterPermissions(); public static Set getSupportedRemoteClusterPermissions() { - return allowedRemoteClusterPermissions.values().stream().flatMap(Set::stream).collect(Collectors.toSet()); + return allowedRemoteClusterPermissions.values().stream().flatMap(Set::stream).collect(Collectors.toCollection(TreeSet::new)); } public RemoteClusterPermissions(StreamInput in) throws IOException { remoteClusterPermissionGroups = in.readNamedWriteableCollectionAsList(RemoteClusterPermissionGroup.class); } + public RemoteClusterPermissions(List>> remoteClusters) { + remoteClusterPermissionGroups = new ArrayList<>(); + for (Map> remoteCluster : remoteClusters) { + RemoteClusterPermissionGroup remoteClusterPermissionGroup = new RemoteClusterPermissionGroup(remoteCluster); + remoteClusterPermissionGroups.add(remoteClusterPermissionGroup); + } + } + public RemoteClusterPermissions() { remoteClusterPermissionGroups = new ArrayList<>(); } @@ -97,10 +120,64 @@ public RemoteClusterPermissions addGroup(RemoteClusterPermissionGroup remoteClus } /** - * Gets the privilege names for the remote cluster. This method will collapse all groups to single String[] all lowercase - * and will only return the appropriate privileges for the provided remote cluster version. + * Will remove any unsupported privileges for the provided outbound version. This method will not modify the current instance. + * This is useful for (normal) API keys role descriptors to help ensure that we don't send unsupported privileges. The result of + * this method may result in no groups if all privileges are removed. {@link #hasAnyPrivileges()} can be used to check if there are + * any privileges left. + * @param outboundVersion The version by which to remove unsupported privileges, this is typically the version of the remote cluster + * @return a new instance of RemoteClusterPermissions with the unsupported privileges removed */ - public String[] privilegeNames(final String remoteClusterAlias, TransportVersion remoteClusterVersion) { + public RemoteClusterPermissions removeUnsupportedPrivileges(TransportVersion outboundVersion) { + Objects.requireNonNull(outboundVersion, "outboundVersion must not be null"); + if (outboundVersion.onOrAfter(lastTransportVersionPermission)) { + return this; + } + RemoteClusterPermissions copyForOutboundVersion = new RemoteClusterPermissions(); + Set allowedPermissionsPerVersion = getAllowedPermissionsPerVersion(outboundVersion); + for (RemoteClusterPermissionGroup group : remoteClusterPermissionGroups) { + String[] privileges = group.clusterPrivileges(); + List outboundPrivileges = new ArrayList<>(privileges.length); + for (String privilege : privileges) { + if (allowedPermissionsPerVersion.contains(privilege.toLowerCase(Locale.ROOT))) { + outboundPrivileges.add(privilege); + } + } + if (outboundPrivileges.isEmpty() == false) { + RemoteClusterPermissionGroup outboundGroup = new RemoteClusterPermissionGroup( + outboundPrivileges.toArray(new String[0]), + group.remoteClusterAliases() + ); + copyForOutboundVersion.addGroup(outboundGroup); + if (logger.isDebugEnabled()) { + if (group.equals(outboundGroup) == false) { + logger.debug( + "Removed unsupported remote cluster permissions. Remaining {} for remote cluster [{}] for version [{}]." + + "Due to the remote cluster version, only the following permissions are allowed: {}", + outboundPrivileges, + group.remoteClusterAliases(), + outboundVersion, + allowedPermissionsPerVersion + ); + } + } + } else { + logger.debug( + "Removed all remote cluster permissions for remote cluster [{}]. " + + "Due to the remote cluster version, only the following permissions are allowed: {}", + group.remoteClusterAliases(), + allowedPermissionsPerVersion + ); + } + } + return copyForOutboundVersion; + } + + /** + * Gets all the privilege names for the remote cluster. This method will collapse all groups to single String[] all lowercase + * and will only return the appropriate privileges for the provided remote cluster version. This is useful for RCS 2.0 to ensure + * that we properly convert all the remote_cluster -> cluster privileges per remote cluster. + */ + public String[] collapseAndRemoveUnsupportedPrivileges(final String remoteClusterAlias, TransportVersion outboundVersion) { // get all privileges for the remote cluster Set groupPrivileges = remoteClusterPermissionGroups.stream() @@ -111,13 +188,7 @@ public String[] privilegeNames(final String remoteClusterAlias, TransportVersion .collect(Collectors.toSet()); // find all the privileges that are allowed for the remote cluster version - Set allowedPermissionsPerVersion = allowedRemoteClusterPermissions.entrySet() - .stream() - .filter((entry) -> entry.getKey().onOrBefore(remoteClusterVersion)) - .map(Map.Entry::getValue) - .flatMap(Set::stream) - .map(s -> s.toLowerCase(Locale.ROOT)) - .collect(Collectors.toSet()); + Set allowedPermissionsPerVersion = getAllowedPermissionsPerVersion(outboundVersion); // intersect the two sets to get the allowed privileges for the remote cluster version Set allowedPrivileges = new HashSet<>(groupPrivileges); @@ -137,13 +208,21 @@ public String[] privilegeNames(final String remoteClusterAlias, TransportVersion return allowedPrivileges.stream().sorted().toArray(String[]::new); } + /** + * Converts this object to it's {@link Map} representation. + * @return a list of maps representing the remote cluster permissions + */ + public List>> toMap() { + return remoteClusterPermissionGroups.stream().map(RemoteClusterPermissionGroup::toMap).toList(); + } + /** * Validates the remote cluster permissions (regardless of remote cluster version). * This method will throw an {@link IllegalArgumentException} if the permissions are invalid. * Generally, this method is just a safety check and validity should be checked before adding the permissions to this class. */ public void validate() { - assert hasPrivileges(); + assert hasAnyPrivileges(); Set invalid = getUnsupportedPrivileges(); if (invalid.isEmpty() == false) { throw new IllegalArgumentException( @@ -173,11 +252,11 @@ private Set getUnsupportedPrivileges() { return invalid; } - public boolean hasPrivileges(final String remoteClusterAlias) { + public boolean hasAnyPrivileges(final String remoteClusterAlias) { return remoteClusterPermissionGroups.stream().anyMatch(remoteIndicesGroup -> remoteIndicesGroup.hasPrivileges(remoteClusterAlias)); } - public boolean hasPrivileges() { + public boolean hasAnyPrivileges() { return remoteClusterPermissionGroups.isEmpty() == false; } @@ -185,6 +264,16 @@ public List groups() { return Collections.unmodifiableList(remoteClusterPermissionGroups); } + private Set getAllowedPermissionsPerVersion(TransportVersion outboundVersion) { + return allowedRemoteClusterPermissions.entrySet() + .stream() + .filter((entry) -> entry.getKey().onOrBefore(outboundVersion)) + .map(Map.Entry::getValue) + .flatMap(Set::stream) + .map(s -> s.toLowerCase(Locale.ROOT)) + .collect(Collectors.toSet()); + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { for (RemoteClusterPermissionGroup remoteClusterPermissionGroup : remoteClusterPermissionGroups) { @@ -220,4 +309,5 @@ public String toString() { public String getWriteableName() { return NAME; } + } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/Role.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/Role.java index d8d56a4fbb247..f52f8f85f006d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/Role.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/Role.java @@ -283,7 +283,7 @@ public Builder addRemoteIndicesGroup( public Builder addRemoteClusterPermissions(RemoteClusterPermissions remoteClusterPermissions) { Objects.requireNonNull(remoteClusterPermissions, "remoteClusterPermissions must not be null"); assert this.remoteClusterPermissions == null : "addRemoteClusterPermissions should only be called once"; - if (remoteClusterPermissions.hasPrivileges()) { + if (remoteClusterPermissions.hasAnyPrivileges()) { remoteClusterPermissions.validate(); } this.remoteClusterPermissions = remoteClusterPermissions; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/SimpleRole.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/SimpleRole.java index 08c86c5f71f4f..0ec9d2a48316a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/SimpleRole.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/permission/SimpleRole.java @@ -210,7 +210,7 @@ public RoleDescriptorsIntersection getRoleDescriptorsIntersectionForRemoteCluste final RemoteIndicesPermission remoteIndicesPermission = this.remoteIndicesPermission.forCluster(remoteClusterAlias); if (remoteIndicesPermission.remoteIndicesGroups().isEmpty() - && remoteClusterPermissions.hasPrivileges(remoteClusterAlias) == false) { + && remoteClusterPermissions.hasAnyPrivileges(remoteClusterAlias) == false) { return RoleDescriptorsIntersection.EMPTY; } @@ -224,7 +224,7 @@ public RoleDescriptorsIntersection getRoleDescriptorsIntersectionForRemoteCluste return new RoleDescriptorsIntersection( new RoleDescriptor( REMOTE_USER_ROLE_NAME, - remoteClusterPermissions.privilegeNames(remoteClusterAlias, remoteClusterVersion), + remoteClusterPermissions.collapseAndRemoveUnsupportedPrivileges(remoteClusterAlias, remoteClusterVersion), // The role descriptors constructed here may be cached in raw byte form, using a hash of their content as a // cache key; we therefore need deterministic order when constructing them here, to ensure cache hits for // equivalent role descriptors diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege/ClusterPrivilegeResolver.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege/ClusterPrivilegeResolver.java index 3d1b378f4f51e..00d45fb135fb2 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege/ClusterPrivilegeResolver.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege/ClusterPrivilegeResolver.java @@ -110,6 +110,8 @@ public class ClusterPrivilegeResolver { private static final Set MONITOR_WATCHER_PATTERN = Set.of("cluster:monitor/xpack/watcher/*"); private static final Set MONITOR_ROLLUP_PATTERN = Set.of("cluster:monitor/xpack/rollup/*"); private static final Set MONITOR_ENRICH_PATTERN = Set.of("cluster:monitor/xpack/enrich/*", "cluster:admin/xpack/enrich/get"); + // intentionally cluster:monitor/stats* to match cluster:monitor/stats, cluster:monitor/stats[n] and cluster:monitor/stats/remote + private static final Set MONITOR_STATS_PATTERN = Set.of("cluster:monitor/stats*"); private static final Set ALL_CLUSTER_PATTERN = Set.of( "cluster:*", @@ -208,7 +210,11 @@ public class ClusterPrivilegeResolver { // esql enrich "cluster:monitor/xpack/enrich/esql/resolve_policy", "cluster:internal:data/read/esql/open_exchange", - "cluster:internal:data/read/esql/exchange" + "cluster:internal:data/read/esql/exchange", + // cluster stats for remote clusters + "cluster:monitor/stats/remote", + "cluster:monitor/stats", + "cluster:monitor/stats[n]" ); private static final Set CROSS_CLUSTER_REPLICATION_PATTERN = Set.of( RemoteClusterService.REMOTE_CLUSTER_HANDSHAKE_ACTION_NAME, @@ -243,6 +249,7 @@ public class ClusterPrivilegeResolver { public static final NamedClusterPrivilege MONITOR_WATCHER = new ActionClusterPrivilege("monitor_watcher", MONITOR_WATCHER_PATTERN); public static final NamedClusterPrivilege MONITOR_ROLLUP = new ActionClusterPrivilege("monitor_rollup", MONITOR_ROLLUP_PATTERN); public static final NamedClusterPrivilege MONITOR_ENRICH = new ActionClusterPrivilege("monitor_enrich", MONITOR_ENRICH_PATTERN); + public static final NamedClusterPrivilege MONITOR_STATS = new ActionClusterPrivilege("monitor_stats", MONITOR_STATS_PATTERN); public static final NamedClusterPrivilege MANAGE = new ActionClusterPrivilege("manage", ALL_CLUSTER_PATTERN, ALL_SECURITY_PATTERN); public static final NamedClusterPrivilege MANAGE_INFERENCE = new ActionClusterPrivilege("manage_inference", MANAGE_INFERENCE_PATTERN); public static final NamedClusterPrivilege MANAGE_ML = new ActionClusterPrivilege("manage_ml", MANAGE_ML_PATTERN); @@ -424,6 +431,7 @@ public class ClusterPrivilegeResolver { MONITOR_WATCHER, MONITOR_ROLLUP, MONITOR_ENRICH, + MONITOR_STATS, MANAGE, MANAGE_CONNECTOR, MANAGE_INFERENCE, @@ -499,7 +507,7 @@ public static NamedClusterPrivilege resolve(String name) { + Strings.collectionToCommaDelimitedString(VALUES.keySet()) + "] or a pattern over one of the available " + "cluster actions"; - logger.debug(errorMessage); + logger.warn(errorMessage); throw new IllegalArgumentException(errorMessage); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/KibanaOwnedReservedRoleDescriptors.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/KibanaOwnedReservedRoleDescriptors.java index 5fb753ab55aab..259e66f633bac 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/KibanaOwnedReservedRoleDescriptors.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/KibanaOwnedReservedRoleDescriptors.java @@ -20,6 +20,9 @@ import org.elasticsearch.xpack.core.security.action.profile.SuggestProfilesAction; import org.elasticsearch.xpack.core.security.action.user.ProfileHasPrivilegesAction; import org.elasticsearch.xpack.core.security.authz.RoleDescriptor; +import org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissionGroup; +import org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions; +import org.elasticsearch.xpack.core.security.authz.privilege.ClusterPrivilegeResolver; import org.elasticsearch.xpack.core.security.authz.privilege.ConfigurableClusterPrivilege; import org.elasticsearch.xpack.core.security.authz.privilege.ConfigurableClusterPrivileges; import org.elasticsearch.xpack.core.security.support.MetadataUtils; @@ -497,7 +500,15 @@ static RoleDescriptor kibanaSystem(String name) { getRemoteIndicesReadPrivileges("metrics-apm.*"), getRemoteIndicesReadPrivileges("traces-apm.*"), getRemoteIndicesReadPrivileges("traces-apm-*") }, - null, + new RemoteClusterPermissions().addGroup( + new RemoteClusterPermissionGroup( + RemoteClusterPermissions.getSupportedRemoteClusterPermissions() + .stream() + .filter(s -> s.equals(ClusterPrivilegeResolver.MONITOR_STATS.name())) + .toArray(String[]::new), + new String[] { "*" } + ) + ), null, "Grants access necessary for the Kibana system user to read from and write to the Kibana indices, " + "manage index templates and tokens, and check the availability of the Elasticsearch cluster. " diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/apikey/CrossClusterApiKeyRoleDescriptorBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/apikey/CrossClusterApiKeyRoleDescriptorBuilderTests.java index 22590e155e642..1dfd68ea95485 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/apikey/CrossClusterApiKeyRoleDescriptorBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/apikey/CrossClusterApiKeyRoleDescriptorBuilderTests.java @@ -10,11 +10,16 @@ import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.core.Strings; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.xcontent.XContentParseException; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.core.security.authc.AuthenticationTestHelper; import org.elasticsearch.xpack.core.security.authz.RoleDescriptor; +import org.elasticsearch.xpack.core.security.authz.permission.ClusterPermission; import org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions; +import org.elasticsearch.xpack.core.security.authz.privilege.ClusterPrivilege; +import org.elasticsearch.xpack.core.security.authz.privilege.ClusterPrivilegeResolver; import java.io.IOException; import java.util.List; @@ -27,6 +32,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; public class CrossClusterApiKeyRoleDescriptorBuilderTests extends ESTestCase { @@ -356,9 +362,42 @@ public void testEmptyAccessIsNotAllowed() throws IOException { } public void testAPIKeyAllowsAllRemoteClusterPrivilegesForCCS() { - // if users can add remote cluster permissions to a role, then the APIKey should also allow that for that permission - // the inverse however, is not guaranteed. cross_cluster_search exists largely for internal use and is not exposed to the users role - assertTrue(Set.of(CCS_CLUSTER_PRIVILEGE_NAMES).containsAll(RemoteClusterPermissions.getSupportedRemoteClusterPermissions())); + // test to help ensure that at least 1 action that is allowed by the remote cluster permissions are supported by CCS + List actionsToTest = List.of("cluster:monitor/xpack/enrich/esql/resolve_policy", "cluster:monitor/stats/remote"); + // if you add new remote cluster permissions, please define an action we can test to help ensure it is supported by RCS 2.0 + assertThat(actionsToTest.size(), equalTo(RemoteClusterPermissions.getSupportedRemoteClusterPermissions().size())); + + for (String privilege : RemoteClusterPermissions.getSupportedRemoteClusterPermissions()) { + boolean actionPassesRemoteClusterPermissionCheck = false; + ClusterPrivilege clusterPrivilege = ClusterPrivilegeResolver.resolve(privilege); + // each remote cluster privilege has an action to test + for (String action : actionsToTest) { + if (clusterPrivilege.buildPermission(ClusterPermission.builder()) + .build() + .check(action, mock(TransportRequest.class), AuthenticationTestHelper.builder().build())) { + actionPassesRemoteClusterPermissionCheck = true; + break; + } + } + assertTrue( + "privilege [" + privilege + "] does not cover any actions among [" + actionsToTest + "]", + actionPassesRemoteClusterPermissionCheck + ); + } + // test that the actions pass the privilege check for CCS + for (String privilege : Set.of(CCS_CLUSTER_PRIVILEGE_NAMES)) { + boolean actionPassesRemoteCCSCheck = false; + ClusterPrivilege clusterPrivilege = ClusterPrivilegeResolver.resolve(privilege); + for (String action : actionsToTest) { + if (clusterPrivilege.buildPermission(ClusterPermission.builder()) + .build() + .check(action, mock(TransportRequest.class), AuthenticationTestHelper.builder().build())) { + actionPassesRemoteCCSCheck = true; + break; + } + } + assertTrue(actionPassesRemoteCCSCheck); + } } private static void assertRoleDescriptor( diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/role/PutRoleRequestTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/role/PutRoleRequestTests.java index 97255502bc7be..239d48ca9c2e1 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/role/PutRoleRequestTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/action/role/PutRoleRequestTests.java @@ -104,7 +104,7 @@ public void testValidationErrorWithUnknownRemoteClusterPrivilegeName() { } request.putRemoteCluster(remoteClusterPermissions); assertValidationError("Invalid remote_cluster permissions found. Please remove the following: [", request); - assertValidationError("Only [monitor_enrich] are allowed", request); + assertValidationError("Only [monitor_enrich, monitor_stats] are allowed", request); } public void testValidationErrorWithEmptyClustersInRemoteIndices() { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/AuthenticationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/AuthenticationTests.java index 66e246d1c8a50..c999c970a76da 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/AuthenticationTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/AuthenticationTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.transport.RemoteClusterPortSettings; +import org.elasticsearch.xcontent.ObjectPath; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; @@ -32,6 +33,7 @@ import org.elasticsearch.xpack.core.security.authz.RoleDescriptorsIntersection; import org.elasticsearch.xpack.core.security.user.AnonymousUser; import org.elasticsearch.xpack.core.security.user.User; +import org.hamcrest.Matchers; import java.io.IOException; import java.util.Arrays; @@ -42,6 +44,8 @@ import java.util.stream.Collectors; import static java.util.Map.entry; +import static org.elasticsearch.TransportVersions.ROLE_MONITOR_STATS; +import static org.elasticsearch.xpack.core.security.authc.Authentication.VERSION_API_KEY_ROLES_AS_BYTES; import static org.elasticsearch.xpack.core.security.authc.AuthenticationTestHelper.randomCrossClusterAccessSubjectInfo; import static org.elasticsearch.xpack.core.security.authc.CrossClusterAccessSubjectInfoTests.randomRoleDescriptorsIntersection; import static org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions.ROLE_REMOTE_CLUSTER_PRIVS; @@ -1070,7 +1074,7 @@ public void testMaybeRewriteMetadataForApiKeyRoleDescriptorsWithRemoteIndices() // pick a version before that of the authentication instance to force a rewrite final TransportVersion olderVersion = TransportVersionUtils.randomVersionBetween( random(), - Authentication.VERSION_API_KEY_ROLES_AS_BYTES, + VERSION_API_KEY_ROLES_AS_BYTES, TransportVersionUtils.getPreviousVersion(original.getEffectiveSubject().getTransportVersion()) ); @@ -1115,7 +1119,7 @@ public void testMaybeRewriteMetadataForApiKeyRoleDescriptorsWithRemoteCluster() // pick a version before that of the authentication instance to force a rewrite final TransportVersion olderVersion = TransportVersionUtils.randomVersionBetween( random(), - Authentication.VERSION_API_KEY_ROLES_AS_BYTES, + VERSION_API_KEY_ROLES_AS_BYTES, TransportVersionUtils.getPreviousVersion(original.getEffectiveSubject().getTransportVersion()) ); @@ -1135,6 +1139,84 @@ public void testMaybeRewriteMetadataForApiKeyRoleDescriptorsWithRemoteCluster() ); } + public void testMaybeRewriteMetadataForApiKeyRoleDescriptorsWithRemoteClusterRemovePrivs() throws IOException { + final String apiKeyId = randomAlphaOfLengthBetween(1, 10); + final String apiKeyName = randomAlphaOfLengthBetween(1, 10); + Map metadata = Map.ofEntries( + entry(AuthenticationField.API_KEY_ID_KEY, apiKeyId), + entry(AuthenticationField.API_KEY_NAME_KEY, apiKeyName), + entry(AuthenticationField.API_KEY_ROLE_DESCRIPTORS_KEY, new BytesArray(""" + {"base_role":{"cluster":["all"], + "remote_cluster":[{"privileges":["monitor_enrich", "monitor_stats"],"clusters":["*"]}] + }}""")), + entry(AuthenticationField.API_KEY_LIMITED_ROLE_DESCRIPTORS_KEY, new BytesArray(""" + {"limited_by_role":{"cluster":["*"], + "remote_cluster":[{"privileges":["monitor_enrich", "monitor_stats"],"clusters":["*"]}] + }}""")) + ); + + final Authentication with2privs = AuthenticationTestHelper.builder() + .apiKey() + .metadata(metadata) + .transportVersion(TransportVersion.current()) + .build(); + + // pick a version that will only remove one of the two privileges + final TransportVersion olderVersion = TransportVersionUtils.randomVersionBetween( + random(), + ROLE_REMOTE_CLUSTER_PRIVS, + TransportVersionUtils.getPreviousVersion(ROLE_MONITOR_STATS) + ); + + Map rewrittenMetadata = with2privs.maybeRewriteForOlderVersion(olderVersion).getEffectiveSubject().getMetadata(); + assertThat(rewrittenMetadata.keySet(), equalTo(with2privs.getAuthenticatingSubject().getMetadata().keySet())); + + // only one of the two privileges are left after the rewrite + BytesReference baseRoleBytes = (BytesReference) rewrittenMetadata.get(AuthenticationField.API_KEY_ROLE_DESCRIPTORS_KEY); + Map baseRoleAsMap = XContentHelper.convertToMap(baseRoleBytes, false, XContentType.JSON).v2(); + assertThat(ObjectPath.eval("base_role.remote_cluster.0.privileges", baseRoleAsMap), Matchers.contains("monitor_enrich")); + assertThat(ObjectPath.eval("base_role.remote_cluster.0.clusters", baseRoleAsMap), notNullValue()); + BytesReference limitedByRoleBytes = (BytesReference) rewrittenMetadata.get( + AuthenticationField.API_KEY_LIMITED_ROLE_DESCRIPTORS_KEY + ); + Map limitedByRoleAsMap = XContentHelper.convertToMap(limitedByRoleBytes, false, XContentType.JSON).v2(); + assertThat(ObjectPath.eval("limited_by_role.remote_cluster.0.privileges", limitedByRoleAsMap), Matchers.contains("monitor_enrich")); + assertThat(ObjectPath.eval("limited_by_role.remote_cluster.0.clusters", limitedByRoleAsMap), notNullValue()); + + // same version, but it removes the only defined privilege + metadata = Map.ofEntries( + entry(AuthenticationField.API_KEY_ID_KEY, apiKeyId), + entry(AuthenticationField.API_KEY_NAME_KEY, apiKeyName), + entry(AuthenticationField.API_KEY_ROLE_DESCRIPTORS_KEY, new BytesArray(""" + {"base_role":{"cluster":["all"], + "remote_cluster":[{"privileges":["monitor_stats"],"clusters":["*"]}] + }}""")), + entry(AuthenticationField.API_KEY_LIMITED_ROLE_DESCRIPTORS_KEY, new BytesArray(""" + {"limited_by_role":{"cluster":["*"], + "remote_cluster":[{"privileges":["monitor_stats"],"clusters":["*"]}] + }}""")) + ); + + final Authentication with1priv = AuthenticationTestHelper.builder() + .apiKey() + .metadata(metadata) + .transportVersion(TransportVersion.current()) + .build(); + + rewrittenMetadata = with1priv.maybeRewriteForOlderVersion(olderVersion).getEffectiveSubject().getMetadata(); + assertThat(rewrittenMetadata.keySet(), equalTo(with1priv.getAuthenticatingSubject().getMetadata().keySet())); + + // the one privileges is removed after the rewrite, which removes the full "remote_cluster" object + baseRoleBytes = (BytesReference) rewrittenMetadata.get(AuthenticationField.API_KEY_ROLE_DESCRIPTORS_KEY); + baseRoleAsMap = XContentHelper.convertToMap(baseRoleBytes, false, XContentType.JSON).v2(); + assertThat(ObjectPath.eval("base_role.remote_cluster", baseRoleAsMap), nullValue()); + assertThat(ObjectPath.eval("base_role.cluster", baseRoleAsMap), notNullValue()); + limitedByRoleBytes = (BytesReference) rewrittenMetadata.get(AuthenticationField.API_KEY_LIMITED_ROLE_DESCRIPTORS_KEY); + limitedByRoleAsMap = XContentHelper.convertToMap(limitedByRoleBytes, false, XContentType.JSON).v2(); + assertThat(ObjectPath.eval("limited_by_role.remote_cluster", limitedByRoleAsMap), nullValue()); + assertThat(ObjectPath.eval("limited_by_role.cluster", limitedByRoleAsMap), notNullValue()); + } + public void testMaybeRemoveRemoteIndicesFromRoleDescriptors() { final boolean includeClusterPrivileges = randomBoolean(); final BytesReference roleWithoutRemoteIndices = new BytesArray(Strings.format(""" diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptorTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptorTests.java index 94430a4ed5bba..218876c7d40e8 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptorTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptorTests.java @@ -542,6 +542,34 @@ public void testParseInvalidRemoteCluster() throws IOException { () -> RoleDescriptor.parserBuilder().build().parse("test", new BytesArray(q4), XContentType.JSON) ); assertThat(illegalArgumentException.getMessage(), containsString("remote cluster groups must not be null or empty")); + + // one invalid privilege + String q5 = """ + { + "remote_cluster": [ + { + "privileges": [ + "monitor_stats", "read_pipeline" + ], + "clusters": [ + "*" + ] + } + ] + }"""; + + ElasticsearchParseException parseException = expectThrows( + ElasticsearchParseException.class, + () -> RoleDescriptor.parserBuilder().build().parse("test", new BytesArray(q5), XContentType.JSON) + ); + assertThat( + parseException.getMessage(), + containsString( + "failed to parse remote_cluster for role [test]. " + + "[monitor_enrich, monitor_stats] are the only values allowed for [privileges] within [remote_cluster]. " + + "Found [monitor_stats, read_pipeline]" + ) + ); } public void testParsingFieldPermissionsUsesCache() throws IOException { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroupTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroupTests.java index cd269bd1a97b3..0b99db826d540 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroupTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionGroupTests.java @@ -16,6 +16,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Locale; +import java.util.Map; import static org.hamcrest.Matchers.containsString; @@ -90,7 +91,7 @@ public void testInvalidValues() { ); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, invalidClusterAlias); - assertEquals("remote_cluster clusters aliases must contain valid non-empty, non-null values", e.getMessage()); + assertThat(e.getMessage(), containsString("remote_cluster clusters aliases must contain valid non-empty, non-null values")); final ThrowingRunnable invalidPermission = randomFrom( () -> new RemoteClusterPermissionGroup(new String[] { null }, new String[] { "bar" }), @@ -100,7 +101,17 @@ public void testInvalidValues() { ); IllegalArgumentException e2 = expectThrows(IllegalArgumentException.class, invalidPermission); - assertEquals("remote_cluster privileges must contain valid non-empty, non-null values", e2.getMessage()); + assertThat(e2.getMessage(), containsString("remote_cluster privileges must contain valid non-empty, non-null values")); + } + + public void testToMap() { + String[] privileges = generateRandomStringArray(5, 5, false, false); + String[] clusters = generateRandomStringArray(5, 5, false, false); + RemoteClusterPermissionGroup remoteClusterPermissionGroup = new RemoteClusterPermissionGroup(privileges, clusters); + assertEquals( + Map.of("privileges", Arrays.asList(privileges), "clusters", Arrays.asList(clusters)), + remoteClusterPermissionGroup.toMap() + ); } @Override diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionsTests.java index 5b5a895f12ae8..2c31965009273 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/permission/RemoteClusterPermissionsTests.java @@ -15,6 +15,8 @@ import org.elasticsearch.test.AbstractXContentSerializingTestCase; import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.core.security.authz.RoleDescriptor; +import org.elasticsearch.xpack.core.security.xcontent.XContentUtils; import org.junit.Before; import java.io.IOException; @@ -27,8 +29,11 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; +import static org.elasticsearch.TransportVersions.ROLE_MONITOR_STATS; import static org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions.ROLE_REMOTE_CLUSTER_PRIVS; +import static org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions.lastTransportVersionPermission; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -85,13 +90,13 @@ public void testMatcher() { for (int i = 0; i < generateRandomGroups(true).size(); i++) { String[] clusters = groupClusters.get(i); for (String cluster : clusters) { - assertTrue(remoteClusterPermission.hasPrivileges(cluster)); - assertFalse(remoteClusterPermission.hasPrivileges(randomAlphaOfLength(20))); + assertTrue(remoteClusterPermission.hasAnyPrivileges(cluster)); + assertFalse(remoteClusterPermission.hasAnyPrivileges(randomAlphaOfLength(20))); } } } - public void testPrivilegeNames() { + public void testCollapseAndRemoveUnsupportedPrivileges() { Map> original = RemoteClusterPermissions.allowedRemoteClusterPermissions; try { // create random groups with random privileges for random clusters @@ -108,7 +113,7 @@ public void testPrivilegeNames() { String[] privileges = groupPrivileges.get(i); String[] clusters = groupClusters.get(i); for (String cluster : clusters) { - String[] found = remoteClusterPermission.privilegeNames(cluster, TransportVersion.current()); + String[] found = remoteClusterPermission.collapseAndRemoveUnsupportedPrivileges(cluster, TransportVersion.current()); Arrays.sort(found); // ensure all lowercase since the privilege names are case insensitive and the method will result in lowercase for (int j = 0; j < privileges.length; j++) { @@ -126,13 +131,14 @@ public void testPrivilegeNames() { // create random groups with random privileges for random clusters List randomGroups = generateRandomGroups(true); // replace a random value with one that is allowed - groupPrivileges.get(0)[0] = "monitor_enrich"; + String singleValidPrivilege = randomFrom(RemoteClusterPermissions.allowedRemoteClusterPermissions.get(TransportVersion.current())); + groupPrivileges.get(0)[0] = singleValidPrivilege; for (int i = 0; i < randomGroups.size(); i++) { String[] privileges = groupPrivileges.get(i); String[] clusters = groupClusters.get(i); for (String cluster : clusters) { - String[] found = remoteClusterPermission.privilegeNames(cluster, TransportVersion.current()); + String[] found = remoteClusterPermission.collapseAndRemoveUnsupportedPrivileges(cluster, TransportVersion.current()); Arrays.sort(found); // ensure all lowercase since the privilege names are case insensitive and the method will result in lowercase for (int j = 0; j < privileges.length; j++) { @@ -149,7 +155,7 @@ public void testPrivilegeNames() { assertFalse(Arrays.equals(privileges, found)); if (i == 0) { // ensure that for the current version we only find the valid "monitor_enrich" - assertThat(Set.of(found), equalTo(Set.of("monitor_enrich"))); + assertThat(Set.of(found), equalTo(Set.of(singleValidPrivilege))); } else { // all other groups should be found to not have any privileges assertTrue(found.length == 0); @@ -159,21 +165,26 @@ public void testPrivilegeNames() { } } - public void testMonitorEnrichPerVersion() { - // test monitor_enrich before, after and on monitor enrich version - String[] privileges = randomBoolean() ? new String[] { "monitor_enrich" } : new String[] { "monitor_enrich", "foo", "bar" }; + public void testPermissionsPerVersion() { + testPermissionPerVersion("monitor_enrich", ROLE_REMOTE_CLUSTER_PRIVS); + testPermissionPerVersion("monitor_stats", ROLE_MONITOR_STATS); + } + + private void testPermissionPerVersion(String permission, TransportVersion version) { + // test permission before, after and on the version + String[] privileges = randomBoolean() ? new String[] { permission } : new String[] { permission, "foo", "bar" }; String[] before = new RemoteClusterPermissions().addGroup(new RemoteClusterPermissionGroup(privileges, new String[] { "*" })) - .privilegeNames("*", TransportVersionUtils.getPreviousVersion(ROLE_REMOTE_CLUSTER_PRIVS)); - // empty set since monitor_enrich is not allowed in the before version + .collapseAndRemoveUnsupportedPrivileges("*", TransportVersionUtils.getPreviousVersion(version)); + // empty set since permissions is not allowed in the before version assertThat(Set.of(before), equalTo(Collections.emptySet())); String[] on = new RemoteClusterPermissions().addGroup(new RemoteClusterPermissionGroup(privileges, new String[] { "*" })) - .privilegeNames("*", ROLE_REMOTE_CLUSTER_PRIVS); - // only monitor_enrich since the other values are not allowed - assertThat(Set.of(on), equalTo(Set.of("monitor_enrich"))); + .collapseAndRemoveUnsupportedPrivileges("*", version); + // the permission is found on that provided version + assertThat(Set.of(on), equalTo(Set.of(permission))); String[] after = new RemoteClusterPermissions().addGroup(new RemoteClusterPermissionGroup(privileges, new String[] { "*" })) - .privilegeNames("*", TransportVersion.current()); - // only monitor_enrich since the other values are not allowed - assertThat(Set.of(after), equalTo(Set.of("monitor_enrich"))); + .collapseAndRemoveUnsupportedPrivileges("*", TransportVersion.current()); + // current version (after the version) has the permission + assertThat(Set.of(after), equalTo(Set.of(permission))); } public void testValidate() { @@ -181,12 +192,70 @@ public void testValidate() { // random values not allowed IllegalArgumentException error = expectThrows(IllegalArgumentException.class, () -> remoteClusterPermission.validate()); assertTrue(error.getMessage().contains("Invalid remote_cluster permissions found. Please remove the following:")); - assertTrue(error.getMessage().contains("Only [monitor_enrich] are allowed")); + assertTrue(error.getMessage().contains("Only [monitor_enrich, monitor_stats] are allowed")); new RemoteClusterPermissions().addGroup(new RemoteClusterPermissionGroup(new String[] { "monitor_enrich" }, new String[] { "*" })) .validate(); // no error } + public void testToMap() { + RemoteClusterPermissions remoteClusterPermissions = new RemoteClusterPermissions(); + List groups = generateRandomGroups(randomBoolean()); + for (int i = 0; i < groups.size(); i++) { + remoteClusterPermissions.addGroup(groups.get(i)); + } + List>> asAsMap = remoteClusterPermissions.toMap(); + RemoteClusterPermissions remoteClusterPermissionsAsMap = new RemoteClusterPermissions(asAsMap); + assertEquals(remoteClusterPermissions, remoteClusterPermissionsAsMap); + } + + public void testRemoveUnsupportedPrivileges() { + RemoteClusterPermissions remoteClusterPermissions = new RemoteClusterPermissions(); + RemoteClusterPermissionGroup group = new RemoteClusterPermissionGroup(new String[] { "monitor_enrich" }, new String[] { "*" }); + remoteClusterPermissions.addGroup(group); + // this privilege is allowed by versions, so nothing should be removed + assertEquals(remoteClusterPermissions, remoteClusterPermissions.removeUnsupportedPrivileges(ROLE_REMOTE_CLUSTER_PRIVS)); + assertEquals(remoteClusterPermissions, remoteClusterPermissions.removeUnsupportedPrivileges(ROLE_MONITOR_STATS)); + + remoteClusterPermissions = new RemoteClusterPermissions(); + if (randomBoolean()) { + group = new RemoteClusterPermissionGroup(new String[] { "monitor_stats" }, new String[] { "*" }); + } else { + // if somehow duplicates end up here, they should not influence removal + group = new RemoteClusterPermissionGroup(new String[] { "monitor_stats", "monitor_stats" }, new String[] { "*" }); + } + remoteClusterPermissions.addGroup(group); + // this single newer privilege is not allowed in the older version, so it should result in an object with no groups + assertNotEquals(remoteClusterPermissions, remoteClusterPermissions.removeUnsupportedPrivileges(ROLE_REMOTE_CLUSTER_PRIVS)); + assertFalse(remoteClusterPermissions.removeUnsupportedPrivileges(ROLE_REMOTE_CLUSTER_PRIVS).hasAnyPrivileges()); + assertEquals(remoteClusterPermissions, remoteClusterPermissions.removeUnsupportedPrivileges(ROLE_MONITOR_STATS)); + + int groupCount = randomIntBetween(1, 5); + remoteClusterPermissions = new RemoteClusterPermissions(); + group = new RemoteClusterPermissionGroup(new String[] { "monitor_enrich", "monitor_stats" }, new String[] { "*" }); + for (int i = 0; i < groupCount; i++) { + remoteClusterPermissions.addGroup(group); + } + // one of the newer privilege is not allowed in the older version, so it should result in a group with only the allowed privilege + RemoteClusterPermissions expected = new RemoteClusterPermissions(); + for (int i = 0; i < groupCount; i++) { + expected.addGroup(new RemoteClusterPermissionGroup(new String[] { "monitor_enrich" }, new String[] { "*" })); + } + assertEquals(expected, remoteClusterPermissions.removeUnsupportedPrivileges(ROLE_REMOTE_CLUSTER_PRIVS)); + // both privileges allowed in the newer version, so it should not change the permission + assertEquals(remoteClusterPermissions, remoteClusterPermissions.removeUnsupportedPrivileges(ROLE_MONITOR_STATS)); + } + + public void testShortCircuitRemoveUnsupportedPrivileges() { + RemoteClusterPermissions remoteClusterPermissions = new RemoteClusterPermissions(); + assertSame(remoteClusterPermissions, remoteClusterPermissions.removeUnsupportedPrivileges(TransportVersion.current())); + assertSame(remoteClusterPermissions, remoteClusterPermissions.removeUnsupportedPrivileges(lastTransportVersionPermission)); + assertNotSame( + remoteClusterPermissions, + remoteClusterPermissions.removeUnsupportedPrivileges(TransportVersionUtils.getPreviousVersion(lastTransportVersionPermission)) + ); + } + private List generateRandomGroups(boolean fuzzyCluster) { clean(); List groups = new ArrayList<>(); @@ -216,22 +285,48 @@ protected Writeable.Reader instanceReader() { @Override protected RemoteClusterPermissions createTestInstance() { + Set all = RemoteClusterPermissions.allowedRemoteClusterPermissions.values() + .stream() + .flatMap(Set::stream) + .collect(Collectors.toSet()); + List randomPermission = randomList(1, all.size(), () -> randomFrom(all)); return new RemoteClusterPermissions().addGroup( - new RemoteClusterPermissionGroup(new String[] { "monitor_enrich" }, new String[] { "*" }) + new RemoteClusterPermissionGroup(randomPermission.toArray(new String[0]), new String[] { "*" }) ); } @Override protected RemoteClusterPermissions mutateInstance(RemoteClusterPermissions instance) throws IOException { return new RemoteClusterPermissions().addGroup( - new RemoteClusterPermissionGroup(new String[] { "monitor_enrich" }, new String[] { "*" }) + new RemoteClusterPermissionGroup(new String[] { "monitor_enrich", "monitor_stats" }, new String[] { "*" }) ).addGroup(new RemoteClusterPermissionGroup(new String[] { "foobar" }, new String[] { "*" })); } @Override protected RemoteClusterPermissions doParseInstance(XContentParser parser) throws IOException { - // fromXContent/parsing isn't supported since we still do old school manual parsing of the role descriptor - return createTestInstance(); + // fromXContent/object parsing isn't supported since we still do old school manual parsing of the role descriptor + // so this test is silly because it only tests we know how to manually parse the test instance in this test + // this is needed since we want the other parts from the AbstractXContentSerializingTestCase suite + RemoteClusterPermissions remoteClusterPermissions = new RemoteClusterPermissions(); + String[] privileges = null; + String[] clusters = null; + XContentParser.Token token; + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.START_OBJECT) { + continue; + } + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (RoleDescriptor.Fields.PRIVILEGES.match(currentFieldName, parser.getDeprecationHandler())) { + privileges = XContentUtils.readStringArray(parser, false); + + } else if (RoleDescriptor.Fields.CLUSTERS.match(currentFieldName, parser.getDeprecationHandler())) { + clusters = XContentUtils.readStringArray(parser, false); + } + } + remoteClusterPermissions.addGroup(new RemoteClusterPermissionGroup(privileges, clusters)); + return remoteClusterPermissions; } @Override diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java index a71ac6a9b51fd..fb4d822b7655c 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java @@ -2833,7 +2833,7 @@ public void testSuperuserRole() { is(false) ); assertThat( - superuserRole.remoteCluster().privilegeNames("*", TransportVersion.current()), + superuserRole.remoteCluster().collapseAndRemoveUnsupportedPrivileges("*", TransportVersion.current()), equalTo(RemoteClusterPermissions.getSupportedRemoteClusterPermissions().toArray(new String[0])) ); } diff --git a/x-pack/plugin/security/qa/multi-cluster/build.gradle b/x-pack/plugin/security/qa/multi-cluster/build.gradle index c7b8f81bb7876..b8eccb14819a4 100644 --- a/x-pack/plugin/security/qa/multi-cluster/build.gradle +++ b/x-pack/plugin/security/qa/multi-cluster/build.gradle @@ -31,13 +31,15 @@ dependencies { tasks.named("javaRestTest") { enabled = true // This is tested explicitly in bwc test tasks. - exclude '**/RemoteClusterSecurityBwcRestIT.class' + exclude '**/RemoteClusterSecurityBWCToRCS1ClusterRestIT.class' + exclude '**/RemoteClusterSecurityBWCToRCS2ClusterRestIT.class' } -BuildParams.bwcVersions.withWireCompatible(v -> v.before(BuildParams.isSnapshotBuild() ? '8.8.0' : '8.9.1')) { bwcVersion, baseName -> +BuildParams.bwcVersions.withWireCompatible() { bwcVersion, baseName -> tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) { usesBwcDistribution(bwcVersion) systemProperty("tests.old_cluster_version", bwcVersion) - include '**/RemoteClusterSecurityBwcRestIT.class' + include '**/RemoteClusterSecurityBWCToRCS1ClusterRestIT.class' + include '**/RemoteClusterSecurityBWCToRCS2ClusterRestIT.class' } } diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBwcRestIT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/AbstractRemoteClusterSecurityBWCRestIT.java similarity index 65% rename from x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBwcRestIT.java rename to x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/AbstractRemoteClusterSecurityBWCRestIT.java index 17acd258ed34b..20cdbb9f8b0df 100644 --- a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBwcRestIT.java +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/AbstractRemoteClusterSecurityBWCRestIT.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.remotecluster; +import org.apache.http.util.EntityUtils; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Request; import org.elasticsearch.client.RequestOptions; @@ -15,14 +16,9 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchResponseUtils; -import org.elasticsearch.test.cluster.ElasticsearchCluster; -import org.elasticsearch.test.cluster.local.distribution.DistributionType; -import org.elasticsearch.test.cluster.util.Version; -import org.elasticsearch.test.cluster.util.resource.Resource; import org.elasticsearch.test.rest.ObjectPath; -import org.junit.ClassRule; -import org.junit.rules.RuleChain; -import org.junit.rules.TestRule; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; import java.io.IOException; import java.util.Arrays; @@ -32,48 +28,21 @@ import java.util.stream.Collectors; import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; /** - * BWC test which ensures that users and API keys with defined {@code remote_indices} privileges can be used to query legacy remote clusters + * A set of BWC tests that can be executed with either RCS 1 or RCS 2 against an older fulfilling cluster. */ -public class RemoteClusterSecurityBwcRestIT extends AbstractRemoteClusterSecurityTestCase { +public abstract class AbstractRemoteClusterSecurityBWCRestIT extends AbstractRemoteClusterSecurityTestCase { - private static final Version OLD_CLUSTER_VERSION = Version.fromString(System.getProperty("tests.old_cluster_version")); + protected abstract boolean isRCS2(); - static { - fulfillingCluster = ElasticsearchCluster.local() - .version(OLD_CLUSTER_VERSION) - .distribution(DistributionType.DEFAULT) - .name("fulfilling-cluster") - .apply(commonClusterConfig) - .setting("xpack.ml.enabled", "false") - .build(); - - queryCluster = ElasticsearchCluster.local() - .version(Version.CURRENT) - .distribution(DistributionType.INTEG_TEST) - .name("query-cluster") - .apply(commonClusterConfig) - .setting("xpack.security.remote_cluster_client.ssl.enabled", "true") - .setting("xpack.security.remote_cluster_client.ssl.certificate_authorities", "remote-cluster-ca.crt") - .rolesFile(Resource.fromClasspath("roles.yml")) - .build(); - } - - @ClassRule - // Use a RuleChain to ensure that fulfilling cluster is started before query cluster - public static TestRule clusterRule = RuleChain.outerRule(fulfillingCluster).around(queryCluster); - - public void testBwcWithLegacyCrossClusterSearch() throws Exception { - final boolean useProxyMode = randomBoolean(); - // Update remote cluster settings on QC. - setupQueryClusterRemoteClusters(useProxyMode); - // Ensure remote cluster is connected - ensureRemoteFulfillingClusterIsConnected(useProxyMode); + public void testBwcCCSViaRCS1orRCS2() throws Exception { // Fulfilling cluster { @@ -122,19 +91,22 @@ public void testBwcWithLegacyCrossClusterSearch() throws Exception { ] }"""); assertOK(adminClient().performRequest(putRoleRequest)); - // We need to define the same role on QC and FC in order for CCS to work. - final var putRoleRequestFulfilling = new Request("PUT", "/_security/role/" + REMOTE_SEARCH_ROLE); - putRoleRequestFulfilling.setJsonEntity(""" - { - "cluster": ["manage_own_api_key"], - "indices": [ + if (isRCS2() == false) { + // We need to define the same role on QC and FC in order for CCS to work. + final var putRoleRequestFulfilling = new Request("PUT", "/_security/role/" + REMOTE_SEARCH_ROLE); + putRoleRequestFulfilling.setJsonEntity(""" { - "names": ["remote_index1"], - "privileges": ["read", "read_cross_cluster"] - } - ] - }"""); - assertOK(performRequestAgainstFulfillingCluster(putRoleRequestFulfilling)); + "cluster": ["manage_own_api_key"], + "indices": [ + { + "names": ["remote_index1"], + "privileges": ["read", "read_cross_cluster"] + } + ] + }"""); + assertOK(performRequestAgainstFulfillingCluster(putRoleRequestFulfilling)); + } + final var putUserRequest = new Request("PUT", "/_security/user/" + REMOTE_SEARCH_USER); putUserRequest.setJsonEntity(""" { @@ -166,7 +138,7 @@ public void testBwcWithLegacyCrossClusterSearch() throws Exception { ], "remote_cluster": [ { - "privileges": ["monitor_enrich"], + "privileges": ["monitor_enrich", "monitor_stats"], "clusters": ["*"] } ] @@ -187,38 +159,35 @@ public void testBwcWithLegacyCrossClusterSearch() throws Exception { // Check that we can search the fulfilling cluster from the querying cluster final boolean alsoSearchLocally = randomBoolean(); + final String remoteClusterName = randomFrom("my_remote_cluster", "*", "my_remote_*"); + final String remoteIndexName = randomFrom("remote_index1", "*"); final var searchRequest = new Request( "GET", String.format( Locale.ROOT, "/%s%s:%s/_search?ccs_minimize_roundtrips=%s", alsoSearchLocally ? "local_index," : "", - randomFrom("my_remote_cluster", "*", "my_remote_*"), - randomFrom("remote_index1", "*"), + remoteClusterName, + remoteIndexName, randomBoolean() ) ); - final String sendRequestWith = randomFrom("user", "apikey"); - final Response response = sendRequestWith.equals("user") - ? performRequestWithRemoteAccessUser(searchRequest) - : performRequestWithApiKey(searchRequest, apiKeyEncoded); + String esqlCommand = String.format(Locale.ROOT, "FROM %s,%s:%s | LIMIT 10", "local_index", remoteClusterName, remoteIndexName); + // send request with user + Response response = performRequestWithRemoteAccessUser(searchRequest); assertOK(response); - final SearchResponse searchResponse; try (var parser = responseAsParser(response)) { - searchResponse = SearchResponseUtils.parseSearchResponse(parser); + assertSearchResponse(SearchResponseUtils.parseSearchResponse(parser), alsoSearchLocally); } - try { - final List actualIndices = Arrays.stream(searchResponse.getHits().getHits()) - .map(SearchHit::getIndex) - .collect(Collectors.toList()); - if (alsoSearchLocally) { - assertThat(actualIndices, containsInAnyOrder("remote_index1", "local_index")); - } else { - assertThat(actualIndices, containsInAnyOrder("remote_index1")); - } - } finally { - searchResponse.decRef(); + assertEsqlResponse(performRequestWithRemoteAccessUser(esqlRequest(esqlCommand))); + + // send request with apikey + response = performRequestWithApiKey(searchRequest, apiKeyEncoded); + assertOK(response); + try (var parser = responseAsParser(response)) { + assertSearchResponse(SearchResponseUtils.parseSearchResponse(parser), alsoSearchLocally); } + assertEsqlResponse(performRequestWithApiKey(esqlRequest(esqlCommand), apiKeyEncoded)); } } @@ -231,6 +200,14 @@ private void ensureRemoteFulfillingClusterIsConnected(boolean useProxyMode) thro final Map remoteInfoMap = responseAsMap(remoteInfoResponse); assertThat(remoteInfoMap, hasKey("my_remote_cluster")); assertThat(org.elasticsearch.xcontent.ObjectPath.eval("my_remote_cluster.connected", remoteInfoMap), is(true)); + if (isRCS2()) { + assertThat( + org.elasticsearch.xcontent.ObjectPath.eval("my_remote_cluster.cluster_credentials", remoteInfoMap), + is("::es_redacted::") // RCS 2.0 + ); + } else { + assertThat(org.elasticsearch.xcontent.ObjectPath.eval("my_remote_cluster.cluster_credentials", remoteInfoMap), nullValue()); + } if (false == useProxyMode) { assertThat( org.elasticsearch.xcontent.ObjectPath.eval("my_remote_cluster.num_nodes_connected", remoteInfoMap), @@ -240,7 +217,17 @@ private void ensureRemoteFulfillingClusterIsConnected(boolean useProxyMode) thro }); } - private void setupQueryClusterRemoteClusters(boolean useProxyMode) throws IOException { + private Response performRequestWithRemoteAccessUser(final Request request) throws IOException { + request.setOptions(RequestOptions.DEFAULT.toBuilder().addHeader("Authorization", basicAuthHeaderValue(REMOTE_SEARCH_USER, PASS))); + return client().performRequest(request); + } + + private Response performRequestWithApiKey(final Request request, final String encoded) throws IOException { + request.setOptions(RequestOptions.DEFAULT.toBuilder().addHeader("Authorization", "ApiKey " + encoded)); + return client().performRequest(request); + } + + private void setupQueryClusterRCS1(boolean useProxyMode) throws IOException { final Settings.Builder builder = Settings.builder(); if (useProxyMode) { builder.put("cluster.remote.my_remote_cluster.mode", "proxy") @@ -252,14 +239,37 @@ private void setupQueryClusterRemoteClusters(boolean useProxyMode) throws IOExce updateClusterSettings(builder.build()); } - private Response performRequestWithRemoteAccessUser(final Request request) throws IOException { - request.setOptions(RequestOptions.DEFAULT.toBuilder().addHeader("Authorization", basicAuthHeaderValue(REMOTE_SEARCH_USER, PASS))); - return client().performRequest(request); + private Request esqlRequest(String command) throws IOException { + XContentBuilder body = JsonXContent.contentBuilder(); + body.startObject(); + body.field("query", command); + body.field("include_ccs_metadata", true); + body.endObject(); + Request request = new Request("POST", "_query"); + request.setJsonEntity(org.elasticsearch.common.Strings.toString(body)); + return request; } - private Response performRequestWithApiKey(final Request request, final String encoded) throws IOException { - request.setOptions(RequestOptions.DEFAULT.toBuilder().addHeader("Authorization", "ApiKey " + encoded)); - return client().performRequest(request); + private void assertSearchResponse(SearchResponse searchResponse, boolean alsoSearchLocally) { + try { + final List actualIndices = Arrays.stream(searchResponse.getHits().getHits()) + .map(SearchHit::getIndex) + .collect(Collectors.toList()); + if (alsoSearchLocally) { + assertThat(actualIndices, containsInAnyOrder("remote_index1", "local_index")); + } else { + assertThat(actualIndices, containsInAnyOrder("remote_index1")); + } + } finally { + searchResponse.decRef(); + } } + private void assertEsqlResponse(Response response) throws IOException { + assertOK(response); + String responseAsString = EntityUtils.toString(response.getEntity()); + assertThat(responseAsString, containsString("\"my_remote_cluster\":{\"status\":\"successful\"")); + assertThat(responseAsString, containsString("local_bar")); + assertThat(responseAsString, containsString("bar")); + } } diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS1ClusterRestIT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS1ClusterRestIT.java new file mode 100644 index 0000000000000..73e0f096039f9 --- /dev/null +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS1ClusterRestIT.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.remotecluster; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.cluster.util.Version; +import org.elasticsearch.test.cluster.util.resource.Resource; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +/** + * BWC test which ensures that users and API keys with defined {@code remote_indices}/{@code remote_cluster} privileges can be used + * to query legacy remote clusters when using RCS 1.0. We send the request the to an older fulfilling cluster using RCS 1.0 with a user/role + * and API key where the {@code remote_indices}/{@code remote_cluster} are defined in the newer query cluster. + * All RCS 2.0 config should be effectively ignored when using RCS 1 for CCS. We send to an elder fulfil cluster to help ensure that + * newly introduced RCS 2.0 artifacts are forward compatible from the perspective of the old cluster. For example, a new privilege + * sent to an old cluster should be ignored. + */ +public class RemoteClusterSecurityBWCToRCS1ClusterRestIT extends AbstractRemoteClusterSecurityBWCRestIT { + + private static final Version OLD_CLUSTER_VERSION = Version.fromString(System.getProperty("tests.old_cluster_version")); + + static { + fulfillingCluster = ElasticsearchCluster.local() + .version(OLD_CLUSTER_VERSION) + .distribution(DistributionType.DEFAULT) + .name("fulfilling-cluster") + .apply(commonClusterConfig) + .setting("xpack.ml.enabled", "false") + // .setting("logger.org.elasticsearch.xpack.core", "trace") //useful for human debugging + // .setting("logger.org.elasticsearch.xpack.security", "trace") //useful for human debugging + .build(); + + queryCluster = ElasticsearchCluster.local() + .version(Version.CURRENT) + .distribution(DistributionType.DEFAULT) + .setting("xpack.ml.enabled", "false") + .name("query-cluster") + .apply(commonClusterConfig) + .setting("xpack.security.remote_cluster_client.ssl.enabled", "true") + .setting("xpack.security.remote_cluster_client.ssl.certificate_authorities", "remote-cluster-ca.crt") + .rolesFile(Resource.fromClasspath("roles.yml")) + .build(); + } + + @ClassRule + // Use a RuleChain to ensure that fulfilling cluster is started before query cluster + public static TestRule clusterRule = RuleChain.outerRule(fulfillingCluster).around(queryCluster); + + @Override + protected boolean isRCS2() { + return false; + } + + @Before + @Override + public void setUp() throws Exception { + configureRemoteCluster(REMOTE_CLUSTER_ALIAS, fulfillingCluster, true, randomBoolean(), false); + super.setUp(); + } +} diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS2ClusterRestIT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS2ClusterRestIT.java new file mode 100644 index 0000000000000..5e173b72c66de --- /dev/null +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityBWCToRCS2ClusterRestIT.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.remotecluster; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.cluster.util.Version; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +/** + * BWC test which ensures that users and API keys with defined {@code remote_indices}/{@code remote_cluster} privileges can be used + * to query older remote clusters when using RCS 2.0. We send the request the to an older fulfilling cluster using RCS 2.0 with a user/role + * and API key where the {@code remote_indices}/{@code remote_cluster} are defined in the newer query cluster. + * All new RCS 2.0 config should be effectively ignored when sending to older RCS 2.0. For example, a new privilege + * sent to an old cluster should be ignored. + */ +public class RemoteClusterSecurityBWCToRCS2ClusterRestIT extends AbstractRemoteClusterSecurityBWCRestIT { + + private static final Version OLD_CLUSTER_VERSION = Version.fromString(System.getProperty("tests.old_cluster_version")); + private static final AtomicReference> API_KEY_MAP_REF = new AtomicReference<>(); + + static { + + fulfillingCluster = ElasticsearchCluster.local() + .name("fulfilling-cluster") + .version(OLD_CLUSTER_VERSION) + .distribution(DistributionType.DEFAULT) + .apply(commonClusterConfig) + .setting("xpack.ml.enabled", "false") + .setting("remote_cluster_server.enabled", "true") + .setting("remote_cluster.port", "0") + .setting("xpack.security.remote_cluster_server.ssl.enabled", "true") + .setting("xpack.security.remote_cluster_server.ssl.key", "remote-cluster.key") + .setting("xpack.security.remote_cluster_server.ssl.certificate", "remote-cluster.crt") + .keystore("xpack.security.remote_cluster_server.ssl.secure_key_passphrase", "remote-cluster-password") + // .setting("logger.org.elasticsearch.xpack.core", "trace") //useful for human debugging + // .setting("logger.org.elasticsearch.xpack.security", "trace") //useful for human debugging + .build(); + + queryCluster = ElasticsearchCluster.local() + .name("query-cluster") + .distribution(DistributionType.DEFAULT) + .setting("xpack.ml.enabled", "false") + .apply(commonClusterConfig) + .setting("xpack.security.remote_cluster_client.ssl.enabled", "true") + .setting("xpack.security.remote_cluster_client.ssl.certificate_authorities", "remote-cluster-ca.crt") + .keystore("cluster.remote.my_remote_cluster.credentials", () -> { + if (API_KEY_MAP_REF.get() == null) { + final Map apiKeyMap = createCrossClusterAccessApiKey(""" + { + "search": [ + { + "names": ["*"] + } + ] + }"""); + API_KEY_MAP_REF.set(apiKeyMap); + } + return (String) API_KEY_MAP_REF.get().get("encoded"); + }) + .build(); + } + + @ClassRule + // Use a RuleChain to ensure that fulfilling cluster is started before query cluster + public static TestRule clusterRule = RuleChain.outerRule(fulfillingCluster).around(queryCluster); + + @Override + protected boolean isRCS2() { + return true; + } + + @Before + @Override + public void setUp() throws Exception { + configureRemoteCluster(REMOTE_CLUSTER_ALIAS, fulfillingCluster, false, randomBoolean(), false); + super.setUp(); + } +} diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityRestStatsIT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityRestStatsIT.java new file mode 100644 index 0000000000000..e98fcf6f72881 --- /dev/null +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityRestStatsIT.java @@ -0,0 +1,266 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.remotecluster; + +import org.apache.http.util.EntityUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.Response; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.Strings; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchResponseUtils; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.cluster.util.resource.Resource; +import org.elasticsearch.test.junit.RunnableTestRuleAdapter; +import org.elasticsearch.test.rest.ObjectPath; +import org.elasticsearch.xcontent.XContentType; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; + +public class RemoteClusterSecurityRestStatsIT extends AbstractRemoteClusterSecurityTestCase { + + private static final AtomicReference> API_KEY_MAP_REF = new AtomicReference<>(); + private static final AtomicReference> REST_API_KEY_MAP_REF = new AtomicReference<>(); + private static final AtomicBoolean SSL_ENABLED_REF = new AtomicBoolean(); + private static final AtomicBoolean NODE1_RCS_SERVER_ENABLED = new AtomicBoolean(); + private static final AtomicBoolean NODE2_RCS_SERVER_ENABLED = new AtomicBoolean(); + private static final int FULFILL_NODE_COUNT = 3; + private static final Logger logger = LogManager.getLogger(RemoteClusterSecurityRestStatsIT.class); + + static { + fulfillingCluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .name("fulfilling-cluster") + .nodes(FULFILL_NODE_COUNT) + .apply(commonClusterConfig) + .setting("remote_cluster.port", "0") + .setting("xpack.security.remote_cluster_server.ssl.enabled", () -> String.valueOf(SSL_ENABLED_REF.get())) + .setting("xpack.security.remote_cluster_server.ssl.key", "remote-cluster.key") + .setting("xpack.security.remote_cluster_server.ssl.certificate", "remote-cluster.crt") + .setting("xpack.security.authc.token.enabled", "true") + .keystore("xpack.security.remote_cluster_server.ssl.secure_key_passphrase", "remote-cluster-password") + .node(0, spec -> spec.setting("remote_cluster_server.enabled", "true")) + .node(1, spec -> spec.setting("remote_cluster_server.enabled", () -> String.valueOf(NODE1_RCS_SERVER_ENABLED.get()))) + .node(2, spec -> spec.setting("remote_cluster_server.enabled", () -> String.valueOf(NODE2_RCS_SERVER_ENABLED.get()))) + .build(); + + queryCluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .name("query-cluster") + .apply(commonClusterConfig) + .setting("xpack.security.remote_cluster_client.ssl.enabled", () -> String.valueOf(SSL_ENABLED_REF.get())) + .setting("xpack.security.remote_cluster_client.ssl.certificate_authorities", "remote-cluster-ca.crt") + .setting("xpack.security.authc.token.enabled", "true") + .keystore("cluster.remote.my_remote_cluster.credentials", () -> { + if (API_KEY_MAP_REF.get() == null) { + final Map apiKeyMap = createCrossClusterAccessApiKey(""" + { + "search": [ + { + "names": ["*"] + } + ] + }"""); + API_KEY_MAP_REF.set(apiKeyMap); + } + return (String) API_KEY_MAP_REF.get().get("encoded"); + }) + // Define a bogus API key for another remote cluster + .keystore("cluster.remote.invalid_remote.credentials", randomEncodedApiKey()) + // Define remote with a REST API key to observe expected failure + .keystore("cluster.remote.wrong_api_key_type.credentials", () -> { + if (REST_API_KEY_MAP_REF.get() == null) { + initFulfillingClusterClient(); + final var createApiKeyRequest = new Request("POST", "/_security/api_key"); + createApiKeyRequest.setJsonEntity(""" + { + "name": "rest_api_key" + }"""); + try { + final Response createApiKeyResponse = performRequestWithAdminUser(fulfillingClusterClient, createApiKeyRequest); + assertOK(createApiKeyResponse); + REST_API_KEY_MAP_REF.set(responseAsMap(createApiKeyResponse)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + return (String) REST_API_KEY_MAP_REF.get().get("encoded"); + }) + .rolesFile(Resource.fromClasspath("roles.yml")) + .user(REMOTE_METRIC_USER, PASS.toString(), "read_remote_shared_metrics", false) + .build(); + } + + @ClassRule + // Use a RuleChain to ensure that fulfilling cluster is started before query cluster + // `SSL_ENABLED_REF` is used to control the SSL-enabled setting on the test clusters + // We set it here, since randomization methods are not available in the static initialize context above + public static TestRule clusterRule = RuleChain.outerRule(new RunnableTestRuleAdapter(() -> { + SSL_ENABLED_REF.set(usually()); + NODE1_RCS_SERVER_ENABLED.set(randomBoolean()); + NODE2_RCS_SERVER_ENABLED.set(randomBoolean()); + })).around(fulfillingCluster).around(queryCluster); + + public void testCrossClusterStats() throws Exception { + configureRemoteCluster(); + setupRoleAndUserQueryCluster(); + addDocToIndexFulfillingCluster("index1"); + + // search #1 + searchFulfillingClusterFromQueryCluster("index1"); + Map statsResponseAsMap = getFulfillingClusterStatsFromQueryCluster(); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs.clusters.my_remote_cluster.nodes_count"), equalTo(FULFILL_NODE_COUNT)); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs._search.clusters.my_remote_cluster.total"), equalTo(1)); + int initialIndexCount = ObjectPath.evaluate(statsResponseAsMap, "ccs.clusters.my_remote_cluster.indices_count"); + + // search #2 + searchFulfillingClusterFromQueryCluster("index1"); + statsResponseAsMap = getFulfillingClusterStatsFromQueryCluster(); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs._search.total"), equalTo(2)); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs._search.clusters.my_remote_cluster.total"), equalTo(2)); + + // search #3 + expectThrows(Exception.class, () -> searchFulfillingClusterFromQueryCluster("junk")); + statsResponseAsMap = getFulfillingClusterStatsFromQueryCluster(); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs._search.total"), equalTo(3)); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs._search.clusters.my_remote_cluster.total"), equalTo(2)); + + // search #4 + addDocToIndexFulfillingCluster("index2"); + searchFulfillingClusterFromQueryCluster("index2"); + statsResponseAsMap = getFulfillingClusterStatsFromQueryCluster(); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs._search.total"), equalTo(4)); + assertThat(ObjectPath.evaluate(statsResponseAsMap, "ccs._search.clusters.my_remote_cluster.total"), equalTo(3)); + int updatedIndexCount = ObjectPath.evaluate(statsResponseAsMap, "ccs.clusters.my_remote_cluster.indices_count"); + assertThat(updatedIndexCount, equalTo(initialIndexCount + 1)); + } + + private Map getFulfillingClusterStatsFromQueryCluster() throws IOException { + return getFulfillingClusterStatsFromQueryCluster(false); + } + + private Map getFulfillingClusterStatsFromQueryCluster(boolean humanDebug) throws IOException { + Request stats = new Request("GET", "_cluster/stats?include_remotes=true&filter_path=ccs"); + Response statsResponse = performRequestWithRemoteSearchUser(stats); + if (humanDebug) { + debugResponse(statsResponse); + } + return entityAsMap(statsResponse.getEntity()); + } + + private void searchFulfillingClusterFromQueryCluster(String index, boolean humanDebug) throws IOException { + final var searchRequest = new Request( + "GET", + String.format( + Locale.ROOT, + "/%s:%s/_search?ccs_minimize_roundtrips=%s", + randomFrom("my_remote_cluster", "*", "my_remote_*"), + index, + randomBoolean() + ) + ); + Response response = performRequestWithRemoteSearchUser(searchRequest); + if (humanDebug) { + debugResponse(response); + } + assertOK(response); + final SearchResponse searchResponse = SearchResponseUtils.parseSearchResponse(responseAsParser(response)); + try { + final List actualIndices = Arrays.stream(searchResponse.getHits().getHits()) + .map(SearchHit::getIndex) + .collect(Collectors.toList()); + assertThat(actualIndices, containsInAnyOrder(index)); + + } finally { + searchResponse.decRef(); + } + } + + private void searchFulfillingClusterFromQueryCluster(String index) throws IOException { + searchFulfillingClusterFromQueryCluster(index, false); + } + + private void addDocToIndexFulfillingCluster(String index) throws IOException { + // Index some documents, so we can attempt to search them from the querying cluster + final Request bulkRequest = new Request("POST", "/_bulk?refresh=true"); + bulkRequest.setJsonEntity(Strings.format(""" + { "index": { "_index": "%s" } } + { "foo": "bar" } + """, index)); + assertOK(performRequestAgainstFulfillingCluster(bulkRequest)); + } + + private void setupRoleAndUserQueryCluster() throws IOException { + final var putRoleRequest = new Request("PUT", "/_security/role/" + REMOTE_SEARCH_ROLE); + putRoleRequest.setJsonEntity(""" + { + "description": "Role with privileges for remote indices and stats.", + "cluster": ["monitor_stats"], + "remote_indices": [ + { + "names": ["*"], + "privileges": ["read", "read_cross_cluster"], + "clusters": ["*"] + } + ], + "remote_cluster": [ + { + "privileges": ["monitor_stats"], + "clusters": ["*"] + } + ] + }"""); + assertOK(adminClient().performRequest(putRoleRequest)); + final var putUserRequest = new Request("PUT", "/_security/user/" + REMOTE_SEARCH_USER); + putUserRequest.setJsonEntity(""" + { + "password": "x-pack-test-password", + "roles" : ["remote_search"] + }"""); + assertOK(adminClient().performRequest(putUserRequest)); + } + + private Response performRequestWithRemoteSearchUser(final Request request) throws IOException { + request.setOptions( + RequestOptions.DEFAULT.toBuilder().addHeader("Authorization", headerFromRandomAuthMethod(REMOTE_SEARCH_USER, PASS)) + ); + return client().performRequest(request); + } + + // helper method for humans see the responses for debug purposes, when used will always fail the test + private void debugResponse(Response response) throws IOException { + String jsonString = XContentHelper.convertToJson( + new BytesArray(EntityUtils.toString(response.getEntity())), + true, + true, + XContentType.JSON + ); + logger.error(jsonString); + assertFalse(true); // boom + } +} diff --git a/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/apikey/ApiKeyRestIT.java b/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/apikey/ApiKeyRestIT.java index 667140b849951..8ce7fc77fe4f3 100644 --- a/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/apikey/ApiKeyRestIT.java +++ b/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/apikey/ApiKeyRestIT.java @@ -828,7 +828,7 @@ public void testRemoteClusterSupportForApiKeys() throws IOException { assertOK(response); assertAPIKeyWithRemoteClusterPermissions(apiKeyId, includeRemoteCluster, false, null, new String[] { "foo", "bar" }); - // create API key as the remote user which does remote_cluster limited_by permissions + // create API key as the remote user which has all remote_cluster permissions via limited_by response = sendRequestAsRemoteUser(createApiKeyRequest); apiKeyId = ObjectPath.createFromResponse(response).evaluate("id"); assertThat(apiKeyId, notNullValue()); @@ -922,7 +922,7 @@ private void assertAPIKeyWithRemoteClusterPermissions( assertNotNull(limitedByRole); List>> remoteCluster = (List>>) limitedByRole.get("remote_cluster"); - assertThat(remoteCluster.get(0).get("privileges"), containsInAnyOrder("monitor_enrich")); + assertThat(remoteCluster.get(0).get("privileges"), containsInAnyOrder("monitor_stats", "monitor_enrich")); assertThat(remoteCluster.get(0).get("clusters"), containsInAnyOrder("remote")); } else { // no limited by permissions diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStore.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStore.java index d79a3e31c1bc9..2e1a643bf4f4f 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStore.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStore.java @@ -572,7 +572,7 @@ public static void buildRoleFromDescriptors( ); }); - if (remoteClusterPermissions.hasPrivileges()) { + if (remoteClusterPermissions.hasAnyPrivileges()) { builder.addRemoteClusterPermissions(remoteClusterPermissions); } else { builder.addRemoteClusterPermissions(RemoteClusterPermissions.NONE); diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/RoleDescriptorStore.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/RoleDescriptorStore.java index ac8d84d95fd1d..a64cef366926f 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/RoleDescriptorStore.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/RoleDescriptorStore.java @@ -150,7 +150,7 @@ public void resolveCrossClusterAccessRoleReference( + "but other privileges found for subject [" + crossClusterAccessRoleReference.getUserPrincipal() + "]"; - logger.debug("{}. Invalid role descriptor: [{}]", message, roleDescriptor); + logger.warn("{}. Invalid role descriptor: [{}]", message, roleDescriptor); listener.onFailure(new IllegalArgumentException(message)); return; } diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/RBACEngineTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/RBACEngineTests.java index d71c2b0d19074..a41c54ada781a 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/RBACEngineTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/RBACEngineTests.java @@ -92,6 +92,7 @@ import org.elasticsearch.xpack.core.security.authz.privilege.ApplicationPrivilege; import org.elasticsearch.xpack.core.security.authz.privilege.ApplicationPrivilegeDescriptor; import org.elasticsearch.xpack.core.security.authz.privilege.ApplicationPrivilegeTests; +import org.elasticsearch.xpack.core.security.authz.privilege.ClusterPrivilegeResolver; import org.elasticsearch.xpack.core.security.authz.privilege.ConfigurableClusterPrivileges.ManageApplicationPrivileges; import org.elasticsearch.xpack.core.security.authz.privilege.IndexPrivilege; import org.elasticsearch.xpack.core.security.authz.privilege.Privilege; @@ -1312,10 +1313,7 @@ public void testBuildUserPrivilegeResponse() { ) .addRemoteClusterPermissions( new RemoteClusterPermissions().addGroup( - new RemoteClusterPermissionGroup( - RemoteClusterPermissions.getSupportedRemoteClusterPermissions().toArray(new String[0]), - new String[] { "remote-1" } - ) + new RemoteClusterPermissionGroup(new String[] { "monitor_enrich" }, new String[] { "remote-1" }) ) .addGroup( new RemoteClusterPermissionGroup( @@ -1383,26 +1381,33 @@ public void testBuildUserPrivilegeResponse() { RemoteClusterPermissions remoteClusterPermissions = response.getRemoteClusterPermissions(); String[] allRemoteClusterPermissions = RemoteClusterPermissions.getSupportedRemoteClusterPermissions().toArray(new String[0]); - assert allRemoteClusterPermissions.length == 1 - : "if more remote cluster permissions are added this test needs to be updated to ensure the correct remotes receive the " - + "correct permissions. "; - // 2 groups with 3 aliases + assertThat(response.getRemoteClusterPermissions().groups(), iterableWithSize(2)); - assertEquals( - 3, - response.getRemoteClusterPermissions() - .groups() - .stream() - .map(RemoteClusterPermissionGroup::remoteClusterAliases) - .flatMap(Arrays::stream) - .distinct() - .count() + // remote-1 has monitor_enrich permission + // remote-2 and remote-3 have all permissions + assertThat( + response.getRemoteClusterPermissions().groups(), + containsInAnyOrder( + new RemoteClusterPermissionGroup(new String[] { "monitor_enrich" }, new String[] { "remote-1" }), + new RemoteClusterPermissionGroup(allRemoteClusterPermissions, new String[] { "remote-2", "remote-3" }) + ) + ); + + // ensure that all permissions are valid for the current transport version + assertThat( + Arrays.asList(remoteClusterPermissions.collapseAndRemoveUnsupportedPrivileges("remote-1", TransportVersion.current())), + hasItem("monitor_enrich") ); for (String permission : RemoteClusterPermissions.getSupportedRemoteClusterPermissions()) { - assertThat(Arrays.asList(remoteClusterPermissions.privilegeNames("remote-1", TransportVersion.current())), hasItem(permission)); - assertThat(Arrays.asList(remoteClusterPermissions.privilegeNames("remote-2", TransportVersion.current())), hasItem(permission)); - assertThat(Arrays.asList(remoteClusterPermissions.privilegeNames("remote-3", TransportVersion.current())), hasItem(permission)); + assertThat( + Arrays.asList(remoteClusterPermissions.collapseAndRemoveUnsupportedPrivileges("remote-2", TransportVersion.current())), + hasItem(permission) + ); + assertThat( + Arrays.asList(remoteClusterPermissions.collapseAndRemoveUnsupportedPrivileges("remote-3", TransportVersion.current())), + hasItem(permission) + ); } } @@ -1782,7 +1787,10 @@ public void testGetRoleDescriptorsForRemoteClusterForReservedRoles() { new RoleDescriptorsIntersection( new RoleDescriptor( Role.REMOTE_USER_ROLE_NAME, - null, + RemoteClusterPermissions.getSupportedRemoteClusterPermissions() + .stream() + .filter(s -> s.equals(ClusterPrivilegeResolver.MONITOR_STATS.name())) + .toArray(String[]::new), new IndicesPrivileges[] { IndicesPrivileges.builder().indices(".monitoring-*").privileges("read", "read_cross_cluster").build(), IndicesPrivileges.builder().indices("apm-*").privileges("read", "read_cross_cluster").build(), diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStoreTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStoreTests.java index da903ff7f7177..cef3572ee3ac4 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStoreTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/CompositeRolesStoreTests.java @@ -1158,7 +1158,7 @@ public ClusterPermission.Builder buildPermission(ClusterPermission.Builder build assertHasRemoteIndexGroupsForClusters(forRemote, Set.of("*"), indexGroup("remote-idx-2-*")); assertValidRemoteClusterPermissions(role.remoteCluster(), new String[] { "remote-*" }); assertThat( - role.remoteCluster().privilegeNames("remote-foobar", TransportVersion.current()), + role.remoteCluster().collapseAndRemoveUnsupportedPrivileges("remote-foobar", TransportVersion.current()), equalTo(RemoteClusterPermissions.getSupportedRemoteClusterPermissions().toArray(new String[0])) ); } @@ -3322,12 +3322,12 @@ private void assertValidRemoteClusterPermissions(RemoteClusterPermissions permis } private void assertValidRemoteClusterPermissionsParent(RemoteClusterPermissions permissions, String[] aliases) { - assertTrue(permissions.hasPrivileges()); + assertTrue(permissions.hasAnyPrivileges()); for (String alias : aliases) { - assertTrue(permissions.hasPrivileges(alias)); - assertFalse(permissions.hasPrivileges(randomValueOtherThan(alias, () -> randomAlphaOfLength(5)))); + assertTrue(permissions.hasAnyPrivileges(alias)); + assertFalse(permissions.hasAnyPrivileges(randomValueOtherThan(alias, () -> randomAlphaOfLength(5)))); assertThat( - permissions.privilegeNames(alias, TransportVersion.current()), + permissions.collapseAndRemoveUnsupportedPrivileges(alias, TransportVersion.current()), arrayContaining(RemoteClusterPermissions.getSupportedRemoteClusterPermissions().toArray(new String[0])) ); } diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/FileRolesStoreTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/FileRolesStoreTests.java index a4d9dacd1a63d..af5f44b5989fb 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/FileRolesStoreTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/FileRolesStoreTests.java @@ -388,7 +388,8 @@ public void testParseFileWithRemoteIndicesAndCluster() throws IllegalAccessExcep events.get(4), startsWith( "failed to parse remote_cluster for role [invalid_role_bad_priv_remote_clusters]. " - + "[monitor_enrich] is the only value allowed for [privileges] within [remote_cluster]. skipping role..." + + "[monitor_enrich, monitor_stats] are the only values allowed for [privileges] within [remote_cluster]. " + + "Found [junk]. skipping role..." ) ); } diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/rest/action/user/RestGetUserPrivilegesActionTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/rest/action/user/RestGetUserPrivilegesActionTests.java index e17d651a19748..5b91b774cc435 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/rest/action/user/RestGetUserPrivilegesActionTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/rest/action/user/RestGetUserPrivilegesActionTests.java @@ -213,7 +213,7 @@ public void testBuildResponse() throws Exception { ,"remote_cluster":[ { "privileges":[ - "monitor_enrich" + "monitor_enrich", "monitor_stats" ], "clusters":[ "remote-1" @@ -221,7 +221,7 @@ public void testBuildResponse() throws Exception { }, { "privileges":[ - "monitor_enrich" + "monitor_enrich", "monitor_stats" ], "clusters":[ "remote-2", diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/privileges/11_builtin.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/privileges/11_builtin.yml index ef8fab9ca7b6d..d03e6925cab1f 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/privileges/11_builtin.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/privileges/11_builtin.yml @@ -15,5 +15,5 @@ setup: # This is fragile - it needs to be updated every time we add a new cluster/index privilege # I would much prefer we could just check that specific entries are in the array, but we don't have # an assertion for that - - length: { "cluster" : 61 } + - length: { "cluster" : 62 } - length: { "index" : 22 } From 6ab260c3a69ce3bc197873d35683bfb2fd52ccc8 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Fri, 8 Nov 2024 19:45:34 +0100 Subject: [PATCH 21/39] ES|QL: Fix funciton Telemetry tests (#116470) --- muted-tests.yml | 6 ------ .../resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 1321cdc2a3d57..718d160994103 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -112,9 +112,6 @@ tests: - class: org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityWithApmTracingRestIT method: testTracingCrossCluster issue: https://github.com/elastic/elasticsearch/issues/112731 -- class: org.elasticsearch.xpack.test.rest.XPackRestIT - method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry)} - issue: https://github.com/elastic/elasticsearch/issues/115231 - class: org.elasticsearch.xpack.inference.DefaultEndPointsIT method: testInferDeploysDefaultE5 issue: https://github.com/elastic/elasticsearch/issues/115361 @@ -279,9 +276,6 @@ tests: - class: org.elasticsearch.smoketest.MlWithSecurityIT method: test {yaml=ml/inference_crud/Test force delete given model with alias referenced by pipeline} issue: https://github.com/elastic/elasticsearch/issues/116443 -- class: org.elasticsearch.xpack.test.rest.XPackRestIT - method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} - issue: https://github.com/elastic/elasticsearch/issues/116448 - class: org.elasticsearch.xpack.downsample.ILMDownsampleDisruptionIT method: testILMDownsampleRollingRestart issue: https://github.com/elastic/elasticsearch/issues/114233 diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index bb3345f4118b9..6e7098da33805 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -162,4 +162,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 118} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 116} # check the "sister" test above for a likely update to the same esql.functions length check From 2acd164c1a2156fcc946406664583b029bcc0072 Mon Sep 17 00:00:00 2001 From: Patrick Doyle <810052+prdoyle@users.noreply.github.com> Date: Fri, 8 Nov 2024 13:46:44 -0500 Subject: [PATCH 22/39] Use different file-based settings error message for invalid JSON and NotMasterException (#116359) * Fixup: remove unused pattern variable from before * Try1 handle XContentParseException * Mocks wrap XContentParseException in ExecutionException like the real code does * onProcessFileChangesException case for XContentParseException * Handle NotMasterException while we're at it. * Cleanup * Use Nikolaj's addFileChangedListener approach to test * Add REPLACE_EXISTING * Remove ATOMIC_MOVE Co-authored-by: Nikolaj Volgushev * Delete stray generated files * Remove unused method --------- Co-authored-by: Nikolaj Volgushev --- .../file/AbstractFileWatchingService.java | 9 ++- .../service/FileSettingsService.java | 19 ++++- .../service/FileSettingsServiceTests.java | 74 ++++++++++++++++++- 3 files changed, 92 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/file/AbstractFileWatchingService.java b/server/src/main/java/org/elasticsearch/common/file/AbstractFileWatchingService.java index 41998bf974bf9..b7ecd671c7d62 100644 --- a/server/src/main/java/org/elasticsearch/common/file/AbstractFileWatchingService.java +++ b/server/src/main/java/org/elasticsearch/common/file/AbstractFileWatchingService.java @@ -302,11 +302,12 @@ final WatchKey enableDirectoryWatcher(WatchKey previousKey, Path settingsDir) th void processSettingsOnServiceStartAndNotifyListeners() throws InterruptedException { try { processFileOnServiceStart(); - for (var listener : eventListeners) { - listener.watchedFileChanged(); - } } catch (IOException | ExecutionException e) { - logger.error(() -> "Error processing watched file: " + watchedFile(), e); + onProcessFileChangesException(e); + return; + } + for (var listener : eventListeners) { + listener.watchedFileChanged(); } } diff --git a/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java b/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java index 601fc3c86d98f..ae9ae6f8b5bf9 100644 --- a/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java +++ b/server/src/main/java/org/elasticsearch/reservedstate/service/FileSettingsService.java @@ -15,12 +15,14 @@ import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.ReservedStateMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.file.MasterNodeFileWatchingService; import org.elasticsearch.env.Environment; +import org.elasticsearch.xcontent.XContentParseException; import org.elasticsearch.xcontent.XContentParserConfiguration; import java.io.BufferedInputStream; @@ -146,11 +148,20 @@ private void processFileChanges(ReservedStateVersionCheck versionCheck) throws I @Override protected void onProcessFileChangesException(Exception e) { - if (e instanceof ExecutionException && e.getCause() instanceof FailedToCommitClusterStateException f) { - logger.error("Unable to commit cluster state", e); - } else { - super.onProcessFileChangesException(e); + if (e instanceof ExecutionException) { + var cause = e.getCause(); + if (cause instanceof FailedToCommitClusterStateException) { + logger.error("Unable to commit cluster state", e); + return; + } else if (cause instanceof XContentParseException) { + logger.error("Unable to parse settings", e); + return; + } else if (cause instanceof NotMasterException) { + logger.error("Node is no longer master", e); + return; + } } + super.onProcessFileChangesException(e); } @Override diff --git a/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java b/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java index aa76245c20679..0db29588c4298 100644 --- a/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/reservedstate/service/FileSettingsServiceTests.java @@ -38,6 +38,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentParseException; import org.elasticsearch.xcontent.XContentParser; import org.junit.After; import org.junit.Before; @@ -55,16 +56,22 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import static java.nio.file.StandardCopyOption.ATOMIC_MOVE; +import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.hasEntry; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; @@ -262,6 +269,68 @@ public void testProcessFileChanges() throws Exception { verify(controller, times(1)).process(any(), any(XContentParser.class), eq(ReservedStateVersionCheck.HIGHER_VERSION_ONLY), any()); } + @SuppressWarnings("unchecked") + public void testInvalidJSON() throws Exception { + doAnswer((Answer) invocation -> { + invocation.getArgument(1, XContentParser.class).map(); // Throw if JSON is invalid + ((Consumer) invocation.getArgument(3)).accept(null); + return null; + }).when(controller).process(any(), any(XContentParser.class), any(), any()); + + CyclicBarrier fileChangeBarrier = new CyclicBarrier(2); + fileSettingsService.addFileChangedListener(() -> awaitOrBust(fileChangeBarrier)); + + Files.createDirectories(fileSettingsService.watchedFileDir()); + // contents of the JSON don't matter, we just need a file to exist + writeTestFile(fileSettingsService.watchedFile(), "{}"); + + doAnswer((Answer) invocation -> { + boolean returnedNormally = false; + try { + var result = invocation.callRealMethod(); + returnedNormally = true; + return result; + } catch (XContentParseException e) { + // We're expecting a parse error. processFileChanges specifies that this is supposed to throw ExecutionException. + throw new ExecutionException(e); + } catch (Throwable e) { + throw new AssertionError("Unexpected exception", e); + } finally { + if (returnedNormally == false) { + // Because of the exception, listeners aren't notified, so we need to activate the barrier ourselves + awaitOrBust(fileChangeBarrier); + } + } + }).when(fileSettingsService).processFileChanges(); + + // Establish the initial valid JSON + fileSettingsService.start(); + fileSettingsService.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), ClusterState.EMPTY_STATE)); + awaitOrBust(fileChangeBarrier); + + // Now break the JSON + writeTestFile(fileSettingsService.watchedFile(), "test_invalid_JSON"); + awaitOrBust(fileChangeBarrier); + + verify(fileSettingsService, times(1)).processFileOnServiceStart(); // The initial state + verify(fileSettingsService, times(1)).processFileChanges(); // The changed state + verify(fileSettingsService, times(1)).onProcessFileChangesException( + argThat(e -> e instanceof ExecutionException && e.getCause() instanceof XContentParseException) + ); + + // Note: the name "processFileOnServiceStart" is a bit misleading because it is not + // referring to fileSettingsService.start(). Rather, it is referring to the initialization + // of the watcher thread itself, which occurs asynchronously when clusterChanged is first called. + } + + private static void awaitOrBust(CyclicBarrier barrier) { + try { + barrier.await(20, TimeUnit.SECONDS); + } catch (InterruptedException | BrokenBarrierException | TimeoutException e) { + throw new AssertionError("Unexpected exception waiting for barrier", e); + } + } + @SuppressWarnings("unchecked") public void testStopWorksInMiddleOfProcessing() throws Exception { CountDownLatch processFileLatch = new CountDownLatch(1); @@ -356,10 +425,10 @@ private static void writeTestFile(Path path, String contents) throws IOException Path tempFilePath = createTempFile(); Files.writeString(tempFilePath, contents); try { - Files.move(tempFilePath, path, ATOMIC_MOVE); + Files.move(tempFilePath, path, REPLACE_EXISTING, ATOMIC_MOVE); } catch (AtomicMoveNotSupportedException e) { logger.info("Atomic move not available. Falling back on non-atomic move to write [{}]", path.toAbsolutePath()); - Files.move(tempFilePath, path); + Files.move(tempFilePath, path, REPLACE_EXISTING); } } @@ -374,4 +443,5 @@ private static void longAwait(CountDownLatch latch) { fail(e, "longAwait: interrupted waiting for CountDownLatch to reach zero"); } } + } From 04dbd8cfe17e04b84799009bef133c8775211421 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sat, 9 Nov 2024 06:20:18 +1100 Subject: [PATCH 23/39] Mute org.elasticsearch.reservedstate.service.FileSettingsServiceTests testInvalidJSON #116521 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 718d160994103..b0e532bc56210 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -287,6 +287,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/116484 - class: org.elasticsearch.xpack.kql.query.KqlQueryBuilderTests issue: https://github.com/elastic/elasticsearch/issues/116487 +- class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests + method: testInvalidJSON + issue: https://github.com/elastic/elasticsearch/issues/116521 # Examples: # From 5a3e2135d061d884ce4ba227a8afad54f8939d8a Mon Sep 17 00:00:00 2001 From: Pete Gillin Date: Fri, 8 Nov 2024 19:53:12 +0000 Subject: [PATCH 24/39] Update deprecation message in `EnrichPolicy` (#116504) The `elasticsearch_version` property of enrich policies is deprecated with a message saying that it will be removed in ES 9.0. It's still deprecated, but it won't be removed in 9.0. So this change makes the deprecation message less specific. --- x-pack/plugin/build.gradle | 1 + .../org/elasticsearch/xpack/core/enrich/EnrichPolicy.java | 2 +- x-pack/plugin/enrich/qa/rest/build.gradle | 5 +++++ .../resources/rest-api-spec/test/enrich/10_basic.yml | 2 +- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 193a82436f26a..1750ccbb8c0ce 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -89,5 +89,6 @@ tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("esql/80_text/reverse text", "The output type changed from TEXT to KEYWORD.") task.skipTest("esql/80_text/values function", "The output type changed from TEXT to KEYWORD.") task.skipTest("privileges/11_builtin/Test get builtin privileges" ,"unnecessary to test compatibility") + task.skipTest("enrich/10_basic/Test using the deprecated elasticsearch_version field results in a warning", "The deprecation message was changed") }) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/enrich/EnrichPolicy.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/enrich/EnrichPolicy.java index 916bd3c62a598..9bbe41b4797fe 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/enrich/EnrichPolicy.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/enrich/EnrichPolicy.java @@ -36,7 +36,7 @@ public final class EnrichPolicy implements Writeable, ToXContentFragment { private static final String ELASTICEARCH_VERSION_DEPRECATION_MESSAGE = - "the [elasticsearch_version] field of an enrich policy has no effect and will be removed in Elasticsearch 9.0"; + "the [elasticsearch_version] field of an enrich policy has no effect and will be removed in a future version of Elasticsearch"; private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(EnrichPolicy.class); diff --git a/x-pack/plugin/enrich/qa/rest/build.gradle b/x-pack/plugin/enrich/qa/rest/build.gradle index fdaddbc1f9290..064e362c77e6c 100644 --- a/x-pack/plugin/enrich/qa/rest/build.gradle +++ b/x-pack/plugin/enrich/qa/rest/build.gradle @@ -32,3 +32,8 @@ testClusters.configureEach { setting 'xpack.security.enabled', 'false' requiresFeature 'es.index_mode_feature_flag_registered', Version.fromString("8.4.0") } + +tasks.named("yamlRestCompatTestTransform").configure({ task -> + task.skipTest("enrich/10_basic/Test using the deprecated elasticsearch_version field results in a warning", "The deprecation message was changed") +}) + diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/10_basic.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/10_basic.yml index c9b05c4e13a85..17e5e0cfb0759 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/10_basic.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/enrich/10_basic.yml @@ -69,7 +69,7 @@ setup: - do: warnings: - - "the [elasticsearch_version] field of an enrich policy has no effect and will be removed in Elasticsearch 9.0" + - "the [elasticsearch_version] field of an enrich policy has no effect and will be removed in a future version of Elasticsearch" enrich.put_policy: name: policy-crud-warning body: From eac2a01eec49cba455228d10df524efbe4033139 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sat, 9 Nov 2024 06:56:46 +1100 Subject: [PATCH 25/39] Mute org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsCanMatchOnCoordinatorIntegTests testSearchableSnapshotShardsAreSkippedBySearchRequestWithoutQueryingAnyNodeWhenTheyAreOutsideOfTheQueryRange #116523 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index b0e532bc56210..1b768222f8bae 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -290,6 +290,9 @@ tests: - class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests method: testInvalidJSON issue: https://github.com/elastic/elasticsearch/issues/116521 +- class: org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsCanMatchOnCoordinatorIntegTests + method: testSearchableSnapshotShardsAreSkippedBySearchRequestWithoutQueryingAnyNodeWhenTheyAreOutsideOfTheQueryRange + issue: https://github.com/elastic/elasticsearch/issues/116523 # Examples: # From 51a9863e91801d4afa5f63b3e920c231c685fa71 Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Fri, 8 Nov 2024 15:15:47 -0600 Subject: [PATCH 26/39] muting RemoteClusterPermissionsTests.testCollapseAndRemoveUnsupportedPrivileges (#116524) --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 1b768222f8bae..ae40f339818a9 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -293,6 +293,9 @@ tests: - class: org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsCanMatchOnCoordinatorIntegTests method: testSearchableSnapshotShardsAreSkippedBySearchRequestWithoutQueryingAnyNodeWhenTheyAreOutsideOfTheQueryRange issue: https://github.com/elastic/elasticsearch/issues/116523 +- class: org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissionsTests + method: testCollapseAndRemoveUnsupportedPrivileges + issue: https://github.com/elastic/elasticsearch/issues/116520 # Examples: # From e96c75568de0a8412f6e49fcd422f455cf67a91d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sat, 9 Nov 2024 17:36:10 +1100 Subject: [PATCH 27/39] Mute org.elasticsearch.xpack.logsdb.qa.StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT testMatchAllQuery #116536 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index ae40f339818a9..110d0d9bd5b5c 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -296,6 +296,9 @@ tests: - class: org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissionsTests method: testCollapseAndRemoveUnsupportedPrivileges issue: https://github.com/elastic/elasticsearch/issues/116520 +- class: org.elasticsearch.xpack.logsdb.qa.StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT + method: testMatchAllQuery + issue: https://github.com/elastic/elasticsearch/issues/116536 # Examples: # From fae80f85a9f5e4e5ba358bad2953d33d1367e253 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Mon, 11 Nov 2024 09:12:57 +1100 Subject: [PATCH 28/39] Mute org.elasticsearch.xpack.test.rest.XPackRestIT test {p0=ml/inference_crud/Test force delete given model referenced by pipeline} #116555 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 110d0d9bd5b5c..1da1e370bfd12 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -299,6 +299,9 @@ tests: - class: org.elasticsearch.xpack.logsdb.qa.StandardVersusLogsIndexModeRandomDataDynamicMappingChallengeRestIT method: testMatchAllQuery issue: https://github.com/elastic/elasticsearch/issues/116536 +- class: org.elasticsearch.xpack.test.rest.XPackRestIT + method: test {p0=ml/inference_crud/Test force delete given model referenced by pipeline} + issue: https://github.com/elastic/elasticsearch/issues/116555 # Examples: # From 46b17bd550a17d6989136e96d122d182adb96644 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 11 Nov 2024 09:29:14 +0100 Subject: [PATCH 29/39] Two small improvemetns to IndexNameExpressionResolver (#116552) Not using an iterator loop for the mostly single item list saves measurable runtime in the benchmarks for the resolver. Also, cleaned up a redundant method argument. --- .../metadata/IndexNameExpressionResolver.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java index 39499253c8790..bf80c38d64a4e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java @@ -402,8 +402,10 @@ Index[] concreteIndices(Context context, String... indexExpressions) { resolveIndicesForDataStream(context, dataStream, concreteIndicesResult); } } else { - for (Index index : indexAbstraction.getIndices()) { - if (shouldTrackConcreteIndex(context, context.getOptions(), index)) { + List indices = indexAbstraction.getIndices(); + for (int i = 0, n = indices.size(); i < n; i++) { + Index index = indices.get(i); + if (shouldTrackConcreteIndex(context, index)) { concreteIndicesResult.add(index); } } @@ -421,7 +423,7 @@ Index[] concreteIndices(Context context, String... indexExpressions) { private static void resolveIndicesForDataStream(Context context, DataStream dataStream, Set concreteIndicesResult) { if (shouldIncludeRegularIndices(context.getOptions())) { for (Index index : dataStream.getIndices()) { - if (shouldTrackConcreteIndex(context, context.getOptions(), index)) { + if (shouldTrackConcreteIndex(context, index)) { concreteIndicesResult.add(index); } } @@ -430,7 +432,7 @@ private static void resolveIndicesForDataStream(Context context, DataStream data // We short-circuit here, if failure indices are not allowed and they can be skipped if (context.getOptions().allowFailureIndices() || context.getOptions().ignoreUnavailable() == false) { for (Index index : dataStream.getFailureIndices().getIndices()) { - if (shouldTrackConcreteIndex(context, context.getOptions(), index)) { + if (shouldTrackConcreteIndex(context, index)) { concreteIndicesResult.add(index); } } @@ -565,7 +567,7 @@ private static IndexNotFoundException notFoundException(String... indexExpressio return infe; } - private static boolean shouldTrackConcreteIndex(Context context, IndicesOptions options, Index index) { + private static boolean shouldTrackConcreteIndex(Context context, Index index) { if (context.systemIndexAccessLevel == SystemIndexAccessLevel.BACKWARDS_COMPATIBLE_ONLY && context.netNewSystemIndexPredicate.test(index.getName())) { // Exclude this one as it's a net-new system index, and we explicitly don't want those. @@ -575,7 +577,7 @@ private static boolean shouldTrackConcreteIndex(Context context, IndicesOptions DataStream parentDataStream = context.getState().metadata().getIndicesLookup().get(index.getName()).getParentDataStream(); if (parentDataStream != null && parentDataStream.isFailureStoreEnabled()) { if (parentDataStream.isFailureStoreIndex(index.getName())) { - if (options.ignoreUnavailable()) { + if (context.options.ignoreUnavailable()) { return false; } else { throw new FailureIndexNotSupportedException(index); @@ -585,6 +587,7 @@ private static boolean shouldTrackConcreteIndex(Context context, IndicesOptions } final IndexMetadata imd = context.state.metadata().index(index); if (imd.getState() == IndexMetadata.State.CLOSE) { + IndicesOptions options = context.options; if (options.forbidClosedIndices() && options.ignoreUnavailable() == false) { throw new IndexClosedException(index); } else { From 14944f2d3bc9868ce175424a3dc89b5fb136a6ad Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:14:10 +1100 Subject: [PATCH 30/39] Mute org.elasticsearch.search.basic.SearchWithRandomIOExceptionsIT testRandomDirectoryIOExceptions #114824 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 1da1e370bfd12..a00fecc253a7b 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -302,6 +302,9 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/inference_crud/Test force delete given model referenced by pipeline} issue: https://github.com/elastic/elasticsearch/issues/116555 +- class: org.elasticsearch.search.basic.SearchWithRandomIOExceptionsIT + method: testRandomDirectoryIOExceptions + issue: https://github.com/elastic/elasticsearch/issues/114824 # Examples: # From ddd606d360c03ef6741b235c452f1d6952ee2b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Slobodan=20Adamovi=C4=87?= Date: Mon, 11 Nov 2024 10:42:49 +0100 Subject: [PATCH 31/39] Unmute SecurityWithBasicLicenseIT (#116300) Test failure issue got closed without actually unmuting this test. Locally, this test passes. Relates https://github.com/elastic/elasticsearch/issues/99169 --- .../elasticsearch/xpack/security/SecurityWithBasicLicenseIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecurityWithBasicLicenseIT.java b/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecurityWithBasicLicenseIT.java index 3dd678046ea5f..324850f158268 100644 --- a/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecurityWithBasicLicenseIT.java +++ b/x-pack/plugin/security/qa/security-basic/src/javaRestTest/java/org/elasticsearch/xpack/security/SecurityWithBasicLicenseIT.java @@ -29,7 +29,6 @@ public class SecurityWithBasicLicenseIT extends SecurityInBasicRestTestCase { - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/99169") public void testWithBasicLicense() throws Exception { checkLicenseType("basic"); checkSecurityEnabled(false); From 09146792256211b47549e162af5177309b4c3107 Mon Sep 17 00:00:00 2001 From: Jack Pan <35284546+jackpan123@users.noreply.github.com> Date: Mon, 11 Nov 2024 18:10:05 +0800 Subject: [PATCH 32/39] Remove trailing semicolon in REPEAT function example (#116218) Remove trailing semicolon in REPEAT function example (Closes #116156 ) --- docs/reference/esql/functions/kibana/definition/repeat.json | 2 +- docs/reference/esql/functions/kibana/docs/repeat.md | 2 +- .../esql/qa/testFixtures/src/main/resources/string.csv-spec | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/reference/esql/functions/kibana/definition/repeat.json b/docs/reference/esql/functions/kibana/definition/repeat.json index b8660b6362e30..201484cf7aa6f 100644 --- a/docs/reference/esql/functions/kibana/definition/repeat.json +++ b/docs/reference/esql/functions/kibana/definition/repeat.json @@ -42,7 +42,7 @@ } ], "examples" : [ - "ROW a = \"Hello!\"\n| EVAL triple_a = REPEAT(a, 3);" + "ROW a = \"Hello!\"\n| EVAL triple_a = REPEAT(a, 3)" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/esql/functions/kibana/docs/repeat.md b/docs/reference/esql/functions/kibana/docs/repeat.md index cc46e8282d9fe..4949d86a28f46 100644 --- a/docs/reference/esql/functions/kibana/docs/repeat.md +++ b/docs/reference/esql/functions/kibana/docs/repeat.md @@ -7,5 +7,5 @@ Returns a string constructed by concatenating `string` with itself the specified ``` ROW a = "Hello!" -| EVAL triple_a = REPEAT(a, 3); +| EVAL triple_a = REPEAT(a, 3) ``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index de5981df999c7..963245f9f0ea6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -1655,8 +1655,9 @@ repeat required_capability: repeat // tag::repeat[] ROW a = "Hello!" -| EVAL triple_a = REPEAT(a, 3); +| EVAL triple_a = REPEAT(a, 3) // end::repeat[] +; // tag::repeat-result[] a:keyword | triple_a:keyword From 3f6fda631b0ae032a61bd91e2c7587c55848ada4 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 11 Nov 2024 11:55:19 +0100 Subject: [PATCH 33/39] Fix/cleanup two spots in open-PIT request handling (#116553) Two things fixed here: 1. Don't fork just to send the response, it's unnecessary. Serializing the ID might take a little time but if it's really an issue we should optimize it rather than forking just to send a single response. 2. Handle finding a connection cleanly, don't allow the exception to bubble up and fail the phase, this may cause leaks. --- .../TransportOpenPointInTimeAction.java | 38 +++++-------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java index 010f96f212116..eee65134eae33 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java @@ -28,7 +28,6 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.shard.ShardId; @@ -257,16 +256,17 @@ protected void executePhaseOnShard( SearchShardTarget shard, SearchActionListener phaseListener ) { - final ShardOpenReaderRequest shardRequest = new ShardOpenReaderRequest( - shardIt.shardId(), - shardIt.getOriginalIndices(), - pitRequest.keepAlive() - ); - Transport.Connection connection = connectionLookup.apply(shardIt.getClusterAlias(), shard.getNodeId()); + final Transport.Connection connection; + try { + connection = connectionLookup.apply(shardIt.getClusterAlias(), shard.getNodeId()); + } catch (Exception e) { + phaseListener.onFailure(e); + return; + } transportService.sendChildRequest( connection, OPEN_SHARD_READER_CONTEXT_NAME, - shardRequest, + new ShardOpenReaderRequest(shardIt.shardId(), shardIt.getOriginalIndices(), pitRequest.keepAlive()), task, new ActionListenerResponseHandler<>( phaseListener, @@ -279,29 +279,9 @@ protected void executePhaseOnShard( @Override protected SearchPhase getNextPhase() { return new SearchPhase(getName()) { - - private void onExecuteFailure(Exception e) { - onPhaseFailure(this, "sending response failed", e); - } - @Override public void run() { - execute(new AbstractRunnable() { - @Override - public void onFailure(Exception e) { - onExecuteFailure(e); - } - - @Override - protected void doRun() { - sendSearchResponse(SearchResponseSections.EMPTY_WITH_TOTAL_HITS, results.getAtomicArray()); - } - - @Override - public boolean isForceExecution() { - return true; // we already created the PIT, no sense in rejecting the task that sends the response. - } - }); + sendSearchResponse(SearchResponseSections.EMPTY_WITH_TOTAL_HITS, results.getAtomicArray()); } }; } From 60de8ed05c6ff3a46fa7f0be5b32f2b0706adafe Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 11 Nov 2024 11:55:52 +0100 Subject: [PATCH 34/39] Cleanup IndexActionIT (#116554) We can use the hit count assertion here, no need to be tricky. Also, this can be a single loop nowadays, the two loops are a leftover from when this was testing with types. --- .../elasticsearch/indexing/IndexActionIT.java | 65 +++---------------- 1 file changed, 9 insertions(+), 56 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/indexing/IndexActionIT.java b/server/src/internalClusterTest/java/org/elasticsearch/indexing/IndexActionIT.java index 37fbc95d56506..84abb57b7821e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/indexing/IndexActionIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/indexing/IndexActionIT.java @@ -17,7 +17,6 @@ import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.indices.InvalidIndexNameException; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; import java.util.ArrayList; import java.util.List; @@ -28,7 +27,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicIntegerArray; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThanOrEqualTo; @@ -42,7 +41,6 @@ public class IndexActionIT extends ESIntegTestCase { public void testAutoGenerateIdNoDuplicates() throws Exception { int numberOfIterations = scaledRandomIntBetween(10, 50); for (int i = 0; i < numberOfIterations; i++) { - Exception firstError = null; createIndex("test"); int numOfDocs = randomIntBetween(10, 100); logger.info("indexing [{}] docs", numOfDocs); @@ -52,51 +50,9 @@ public void testAutoGenerateIdNoDuplicates() throws Exception { } indexRandom(true, builders); logger.info("verifying indexed content"); - int numOfChecks = randomIntBetween(8, 12); + int numOfChecks = randomIntBetween(16, 24); for (int j = 0; j < numOfChecks; j++) { - try { - logger.debug("running search with all types"); - assertResponse(prepareSearch("test"), response -> { - if (response.getHits().getTotalHits().value() != numOfDocs) { - final String message = "Count is " - + response.getHits().getTotalHits().value() - + " but " - + numOfDocs - + " was expected. " - + ElasticsearchAssertions.formatShardStatus(response); - logger.error("{}. search response: \n{}", message, response); - fail(message); - } - }); - } catch (Exception e) { - logger.error("search for all docs types failed", e); - if (firstError == null) { - firstError = e; - } - } - try { - logger.debug("running search with a specific type"); - assertResponse(prepareSearch("test"), response -> { - if (response.getHits().getTotalHits().value() != numOfDocs) { - final String message = "Count is " - + response.getHits().getTotalHits().value() - + " but " - + numOfDocs - + " was expected. " - + ElasticsearchAssertions.formatShardStatus(response); - logger.error("{}. search response: \n{}", message, response); - fail(message); - } - }); - } catch (Exception e) { - logger.error("search for all docs of a specific type failed", e); - if (firstError == null) { - firstError = e; - } - } - } - if (firstError != null) { - fail(firstError.getMessage()); + assertHitCount(prepareSearch("test"), numOfDocs); } internalCluster().wipeIndices("test"); } @@ -147,16 +103,13 @@ public void testCreatedFlagParallelExecution() throws Exception { List> tasks = new ArrayList<>(taskCount); final Random random = random(); for (int i = 0; i < taskCount; i++) { - tasks.add(new Callable() { - @Override - public Void call() throws Exception { - int docId = random.nextInt(docCount); - DocWriteResponse indexResponse = indexDoc("test", Integer.toString(docId), "field1", "value"); - if (indexResponse.getResult() == DocWriteResponse.Result.CREATED) { - createdCounts.incrementAndGet(docId); - } - return null; + tasks.add(() -> { + int docId = random.nextInt(docCount); + DocWriteResponse indexResponse = indexDoc("test", Integer.toString(docId), "field1", "value"); + if (indexResponse.getResult() == DocWriteResponse.Result.CREATED) { + createdCounts.incrementAndGet(docId); } + return null; }); } From 64c362b154534371a8f90dd8cd42cbba71a7f825 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Mon, 11 Nov 2024 13:19:20 +0200 Subject: [PATCH 35/39] Adding more retriever examples to documentation (#116196) --- .../retrievers-examples.asciidoc | 428 ++++++++++++++++++ .../retrievers-overview.asciidoc | 121 +++-- .../search-your-data/search-api.asciidoc | 1 - .../search-your-data.asciidoc | 1 + 4 files changed, 504 insertions(+), 47 deletions(-) create mode 100644 docs/reference/search/search-your-data/retrievers-examples.asciidoc diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc new file mode 100644 index 0000000000000..8cd1a4bf5ce98 --- /dev/null +++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc @@ -0,0 +1,428 @@ +[[retrievers-examples]] +=== Retrievers examples + +Learn how to combine different retrievers in these hands-on examples. +To demonstrate the full functionality of retrievers, these examples require access to a <> set up using the <>. + +[discrete] +[[retrievers-examples-setup]] +==== Add example data + +To begin with, we'll set up the necessary services and have them in place for later use. + +[source,js] +---- +// Setup rerank task stored as `my-rerank-model` +PUT _inference/rerank/my-rerank-model +{ + "service": "cohere", + "service_settings": { + "model_id": "rerank-english-v3.0", + "api_key": "{{COHERE_API_KEY}}" + } +} +---- +//NOTCONSOLE + +Now that we have our reranking service in place, lets create the `retrievers_example` index, and add some documents to it. +[source,js] +---- +PUT retrievers_example +{ + "mappings": { + "properties": { + "vector": { + "type": "dense_vector", + "dims": 3, + "similarity": "l2_norm", + "index": true + }, + "text": { + "type": "text" + }, + "year": { + "type": "integer" + }, + "topic": { + "type": "keyword" + } + } + } +} +---- +//NOTCONSOLE + +[source,js] +---- +POST /retrievers_example/_doc/1 +{ + "vector": [0.23, 0.67, 0.89], + "text": "Large language models are revolutionizing information retrieval by boosting search precision, deepening contextual understanding, and reshaping user experiences in data-rich environments.", + "year": 2024, + "topic": ["llm", "ai", "information_retrieval"] +} + +POST /retrievers_example/_doc/2 +{ + "vector": [0.12, 0.56, 0.78], + "text": "Artificial intelligence is transforming medicine, from advancing diagnostics and tailoring treatment plans to empowering predictive patient care for improved health outcomes.", + "year": 2023, + "topic": ["ai", "medicine"] +} + +POST /retrievers_example/_doc/3 +{ + "vector": [0.45, 0.32, 0.91], + "text": "AI is redefining security by enabling advanced threat detection, proactive risk analysis, and dynamic defenses against increasingly sophisticated cyber threats.", + "year": 2024, + "topic": ["ai", "security"] +} + +POST /retrievers_example/_doc/4 +{ + "vector": [0.34, 0.21, 0.98], + "text": "Elastic introduces Elastic AI Assistant, the open, generative AI sidekick powered by ESRE to democratize cybersecurity and enable users of every skill level.", + "year": 2023, + "topic": ["ai", "elastic", "assistant"] +} + +POST /retrievers_example/_doc/5 +{ + "vector": [0.11, 0.65, 0.47], + "text": "Learn how to spin up a deployment of our hosted Elasticsearch Service and use Elastic Observability to gain deeper insight into the behavior of your applications and systems.", + "year": 2024, + "topic": ["documentation", "observability", "elastic"] +} + +---- +//NOTCONSOLE + +Now that we also have our documents in place, let's try to run some queries using retrievers. + +[discrete] +[[retrievers-examples-combining-standard-knn-retrievers-with-rrf]] +==== Example: Combining query and kNN with RRF + +First, let's examine how to combine two different types of queries: a `kNN` query and a +`query_string` query. While these queries may produce scores in different ranges, we can use +Reciprocal Rank Fusion (`rrf`) to combine the results and generate a merged final result +list. + +To implement this in the retriever framework, we start with the top-level element: our `rrf` +retriever. This retriever operates on top of two other retrievers: a `knn` retriever and a +`standard` retriever. Our query structure would look like this: + +[source,js] +---- +GET /retrievers_example/_search +{ + "retriever":{ + "rrf": { + "retrievers":[ + { + "standard":{ + "query":{ + "query_string":{ + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "_source": ["text", "topic"] +} +---- +//NOTCONSOLE + +[discrete] +[[retrievers-examples-collapsing-retriever-results]] +==== Example: Grouping results by year with `collapse` + +In our result set, we have many documents with the same `year` value. We can clean this +up using the `collapse` parameter with our retriever. This enables grouping results by +any field and returns only the highest-scoring document from each group. In this example +we'll collapse our results based on the `year` field. + +[source,js] +---- +GET /retrievers_example/_search +{ + "retriever":{ + "rrf": { + "retrievers":[ + { + "standard":{ + "query":{ + "query_string":{ + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "collapse": { + "field": "year", + "inner_hits": { + "name": "topic related documents", + "_source": ["text", "year"] + } + }, + "_source": ["text", "topic"] +} +---- +//NOTCONSOLE + +[discrete] +[[retrievers-examples-text-similarity-reranker-on-top-of-rrf]] +==== Example: Rerank results of an RRF retriever + +Previously, we used a `text_similarity_reranker` retriever within an `rrf` retriever. +Because retrievers support full composability, we can also rerank the results of an +`rrf` retriever. Let's apply this to our first example. + +[source,js] +---- +GET retrievers_example/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "rrf": { + "retrievers": [ + { + "standard":{ + "query":{ + "query_string":{ + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "field": "text", + "inference_id": "my-rerank-model", + "inference_text": "What are the state of the art applications of AI in information retrieval?" + } + }, + "_source": ["text", "topic"] +} + +---- +//NOTCONSOLE + +[discrete] +[[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]] +==== Example: RRF with semantic reranker + +For this example, we'll replace our semantic query with the `my-rerank-model` +reranker we previously configured. Since this is a reranker, it needs an initial pool of +documents to work with. In this case, we'll filter for documents about `ai` topics. + +[source,js] +---- +GET /retrievers_example/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + }, + { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "term": { + "topic": "ai" + } + } + } + }, + "field": "text", + "inference_id": "my-rerank-model", + "inference_text": "Can I use generative AI to identify user intent and improve search relevance?" + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "_source": [ + "text", + "topic" + ] +} +---- +//NOTCONSOLE + +[discrete] +[[retrievers-examples-chaining-text-similarity-reranker-retrievers]] +==== Example: Chaining multiple semantic rerankers + +Full composability means we can chain together multiple retrievers of the same type. For instance, imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services. + +[source,js] +---- +GET retrievers_example/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "text_similarity_reranker": { + "retriever": { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + }, + "rank_window_size": 100, + "field": "text", + "inference_id": "my-rerank-model", + "inference_text": "What are the state of the art applications of AI in information retrieval?" + } + }, + "rank_window_size": 10, + "field": "text", + "inference_id": "my-other-more-expensive-rerank-model", + "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction" + } + }, + "_source": [ + "text", + "topic" + ] +} +---- +//NOTCONSOLE + + +Note that our example applies two reranking steps. First, we rerank the top 100 +documents from the `knn` search using the `my-rerank-model` reranker. Then we +pick the top 10 results and rerank them using the more fine-grained +`my-other-more-expensive-rerank-model`. + +[discrete] +[[retrievers-examples-rrf-and-aggregations]] +==== Example: Combine RRF with aggregations + +Retrievers support both composability and most of the standard `_search` functionality. For instance, +we can compute aggregations with the `rrf` retriever. When using a compound retriever, +the aggregations are computed based on its nested retrievers. In the following example, +the `terms` aggregation for the `topic` field will include all results, not just the top `rank_window_size`, +from the 2 nested retrievers, i.e. all documents whose `year` field is greater than 2023, and whose `topic` field +matches the term `elastic`. + +[source,js] +---- +GET retrievers_example/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "range": { + "year": { + "gt": 2023 + } + } + } + } + }, + { + "standard": { + "query": { + "term": { + "topic": "elastic" + } + } + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "_source": [ + "text", + "topic" + ], + "aggs": { + "topics": { + "terms": { + "field": "topic" + } + } + } +} +---- +//NOTCONSOLE diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index 8e5955fc41782..1771b5bb0d849 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -1,5 +1,5 @@ [[retrievers-overview]] -=== Retrievers +== Retrievers A retriever is an abstraction that was added to the Search API in *8.14.0* and was made generally available in *8.16.0*. This abstraction enables the configuration of multi-stage retrieval pipelines within a single `_search` call. @@ -11,7 +11,7 @@ For implementation details, including notable restrictions, check out the [discrete] [[retrievers-overview-types]] -==== Retriever types +=== Retriever types Retrievers come in various types, each tailored for different search operations. The following retrievers are currently available: @@ -34,7 +34,8 @@ Used for <>. Requires first creating a `rerank` task using the <>. [discrete] -==== What makes retrievers useful? +[[retrievers-overview-why-are-they-useful]] +=== What makes retrievers useful? Here's an overview of what makes retrievers useful and how they differ from regular queries. @@ -66,65 +67,90 @@ When using compound retrievers, only the query element is allowed, which enforce [discrete] [[retrievers-overview-example]] -==== Example +=== Example -The following example demonstrates the powerful queries that we can now compose, and how retrievers simplify this process. -We can use any combination of retrievers we want, propagating the results of a nested retriever to its parent. -In this scenario, we'll make use of 4 of our currently available retrievers, i.e. `standard`, `knn`, `text_similarity_reranker` and `rrf`. -See <> for the complete list of available retrievers. - -We'll first combine the results of a `semantic` query using the `standard` retriever, and that of a `knn` search on a dense vector field, using `rrf` to get the top 100 results. -Finally, we'll then rerank the top-50 results of `rrf` using the `text_similarity_reranker` +The following example demonstrates how using retrievers simplify the composability of queries for RRF ranking. [source,js] ---- GET example-index/_search { "retriever": { - "text_similarity_reranker": { - "retriever": { - "rrf": { - "retrievers": [ - { - "standard": { - "query": { - "semantic": { - "field": "inference_field", - "query": "state of the art vector database" - } - } - } - }, - { - "knn": { - "query_vector": [ - 0.54, - ..., - 0.245 - ], - "field": "embedding", - "k": 10, - "num_candidates": 15 + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "sparse_vector": { + "field": "vector.tokens", + "inference_id": "my-elser-endpoint", + "query": "What blue shoes are on sale?" + } + } + } + }, + { + "standard": { + "query": { + "match": { + "text": "blue shoes sale" } } - ], - "rank_window_size": 100, - "rank_constant": 10 + } } - }, - "rank_window_size": 50, - "field": "description", - "inference_text": "what's the best way to create complex pipelines and retrieve documents?", - "inference_id": "my-awesome-rerank-model" + ] } } } ---- //NOTCONSOLE +This example demonstrates how you can combine different retrieval strategies into a single `retriever` pipeline. + +Compare to `RRF` with `sub_searches` approach (which is deprecated as of 8.16.0): + +.*Expand* for example +[%collapsible] +============== + +[source,js] +---- +GET example-index/_search +{ + "sub_searches":[ + { + "query":{ + "match":{ + "text":"blue shoes sale" + } + } + }, + { + "query":{ + "sparse_vector": { + "field": "vector.tokens", + "inference_id": "my-elser-endoint", + "query": "What blue shoes are on sale?" + } + } + } + ], + "rank":{ + "rrf":{ + "rank_window_size":50, + "rank_constant":20 + } + } +} +---- +//NOTCONSOLE +============== + +For more examples on how to use retrievers, please refer to <>. + [discrete] [[retrievers-overview-glossary]] -==== Glossary +=== Glossary Here are some important terms: @@ -143,7 +169,7 @@ Special compound retrievers that reorder hits and may adjust the number of hits, [discrete] [[retrievers-overview-play-in-search]] -==== Retrievers in action +=== Retrievers in action The Search Playground builds Elasticsearch queries using the retriever abstraction. It automatically detects the fields and types in your index and builds a retriever tree based on your selections. @@ -154,6 +180,9 @@ Refer to the {kibana-ref}/playground.html[Playground documentation] for more inf [discrete] [[retrievers-overview-api-reference]] -==== API reference +=== API reference For implementation details, including notable restrictions, check out the <> in the Search API docs. + + +include::retrievers-examples.asciidoc[] diff --git a/docs/reference/search/search-your-data/search-api.asciidoc b/docs/reference/search/search-your-data/search-api.asciidoc index 13cea537ea4fb..a9e74d54dd9d9 100644 --- a/docs/reference/search/search-your-data/search-api.asciidoc +++ b/docs/reference/search/search-your-data/search-api.asciidoc @@ -530,5 +530,4 @@ include::retrieve-inner-hits.asciidoc[] include::search-shard-routing.asciidoc[] include::search-using-query-rules.asciidoc[] include::search-template.asciidoc[] -include::retrievers-overview.asciidoc[] diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc index cd2b418a7e79b..82541412db4bd 100644 --- a/docs/reference/search/search-your-data/search-your-data.asciidoc +++ b/docs/reference/search/search-your-data/search-your-data.asciidoc @@ -43,6 +43,7 @@ DSL, with a simplified user experience. Create search applications based on your results directly in the Kibana Search UI. include::search-api.asciidoc[] +include::retrievers-overview.asciidoc[] include::knn-search.asciidoc[] include::semantic-search.asciidoc[] include::search-across-clusters.asciidoc[] From 9e087921e5c2c4dfb87504eb95698ea25cfd796e Mon Sep 17 00:00:00 2001 From: Pete Gillin Date: Mon, 11 Nov 2024 12:24:45 +0000 Subject: [PATCH 36/39] Reenable compat tests of enrich policy deprecation (#116581) In https://github.com/elastic/elasticsearch/pull/116504, we changed a deprecation message, and although this didn't break BWC because deprecation messages aren't part of the API contract, it did break the compat tests which asserted on the message. We therefore suppressed the compat tests in question. In https://github.com/elastic/elasticsearch/pull/116522, we backported that change to the `8.x` branch. So the compat tests on `main` are now asserting the correct message, and so pass, and can be reenabled. --- x-pack/plugin/build.gradle | 1 - x-pack/plugin/enrich/qa/rest/build.gradle | 4 ---- 2 files changed, 5 deletions(-) diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 1750ccbb8c0ce..193a82436f26a 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -89,6 +89,5 @@ tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("esql/80_text/reverse text", "The output type changed from TEXT to KEYWORD.") task.skipTest("esql/80_text/values function", "The output type changed from TEXT to KEYWORD.") task.skipTest("privileges/11_builtin/Test get builtin privileges" ,"unnecessary to test compatibility") - task.skipTest("enrich/10_basic/Test using the deprecated elasticsearch_version field results in a warning", "The deprecation message was changed") }) diff --git a/x-pack/plugin/enrich/qa/rest/build.gradle b/x-pack/plugin/enrich/qa/rest/build.gradle index 064e362c77e6c..f96eff5f933c4 100644 --- a/x-pack/plugin/enrich/qa/rest/build.gradle +++ b/x-pack/plugin/enrich/qa/rest/build.gradle @@ -33,7 +33,3 @@ testClusters.configureEach { requiresFeature 'es.index_mode_feature_flag_registered', Version.fromString("8.4.0") } -tasks.named("yamlRestCompatTestTransform").configure({ task -> - task.skipTest("enrich/10_basic/Test using the deprecated elasticsearch_version field results in a warning", "The deprecation message was changed") -}) - From 91559da015abc4fd5851eb768d3af8884efa9c7c Mon Sep 17 00:00:00 2001 From: Nikolaj Volgushev Date: Mon, 11 Nov 2024 13:37:23 +0100 Subject: [PATCH 37/39] Use retry logic and real file system in file settings ITs (#116392) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several file-settings ITs fail (rarely) with exceptions like: ``` java.nio.file.AccessDeniedException: C:\Users\jenkins\workspace\platform-support\14\server\build\testrun\internalClusterTest\temp\org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT_5733F2A737542BE-001\tempFile-001.tmp -> C:\Users\jenkins\workspace\platform-support\14\server\build\testrun\internalClusterTest\temp\org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT_5733F2A737542BE-001\tempDir-002\config\operator\settings.json |   at sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:89) |   -- | --   |   | at sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) |     |   | at sun.nio.fs.WindowsFileCopy.move(WindowsFileCopy.java:317) |     |   | at sun.nio.fs.WindowsFileSystemProvider.move(WindowsFileSystemProvider.java:293) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at org.apache.lucene.tests.mockfile.FilterFileSystemProvider.move(FilterFileSystemProvider.java:144) |     |   | at java.nio.file.Files.move(Files.java:1430) |     |   | at org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT.writeJSONFile(SnaphotsAndFileSettingsIT.java:86) |     |   | at org.elasticsearch.reservedstate.service.SnaphotsAndFileSettingsIT.testRestoreWithPersistedFileSettings(SnaphotsAndFileSettingsIT.java:321) ``` This happens in Windows file systems, due to a race condition where the file settings service is reading the settings file concurrently with the test trying to modify it (a no-go in Windows). It turns out we have already addressed this with a retry for one test suite (https://github.com/elastic/elasticsearch/pull/91863), plus addressed a related issue around mock windows file-systems misbehaving (https://github.com/elastic/elasticsearch/pull/92653). This PR extends the above fixes to all file-settings related ITs. --- .../ComponentTemplatesFileSettingsIT.java | 16 +---- .../service/FileSettingsServiceIT.java | 67 ++++++++++-------- .../service/RepositoriesFileSettingsIT.java | 16 +---- .../service/SnapshotsAndFileSettingsIT.java | 34 +-------- .../RoleMappingFileSettingsIT.java | 69 +++++++++++-------- .../FileSettingsRoleMappingsRestartIT.java | 13 ++-- ...eanupRoleMappingDuplicatesMigrationIT.java | 14 ++-- 7 files changed, 101 insertions(+), 128 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java index 45e370a2e2252..8e0dee2396411 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java @@ -9,6 +9,7 @@ package org.elasticsearch.reservedstate.service; +import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.admin.indices.template.get.GetComponentTemplateAction; @@ -26,16 +27,12 @@ import org.elasticsearch.cluster.metadata.ReservedStateMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.Strings; import org.elasticsearch.core.Tuple; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xcontent.XContentParserConfiguration; import java.io.ByteArrayInputStream; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; @@ -54,6 +51,7 @@ import static org.hamcrest.Matchers.notNullValue; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) +@LuceneTestCase.SuppressFileSystems("*") public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase { private static AtomicLong versionCounter = new AtomicLong(1); @@ -365,15 +363,7 @@ private void assertMasterNode(Client client, String node) throws ExecutionExcept } private void writeJSONFile(String node, String json) throws Exception { - long version = versionCounter.incrementAndGet(); - - FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node); - - Files.createDirectories(fileSettingsService.watchedFileDir()); - Path tempFilePath = createTempFile(); - - Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8)); - Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE); + FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet()); } private Tuple setupClusterStateListener(String node) { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java index f9122ccfb4a3e..90326abb381d0 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java @@ -10,6 +10,7 @@ package org.elasticsearch.reservedstate.service; import org.apache.logging.log4j.Logger; +import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; @@ -20,6 +21,7 @@ import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata; import org.elasticsearch.cluster.metadata.ReservedStateMetadata; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.core.Tuple; @@ -27,7 +29,7 @@ import org.elasticsearch.test.ESIntegTestCase; import org.junit.Before; -import java.nio.charset.StandardCharsets; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; @@ -50,6 +52,7 @@ import static org.hamcrest.Matchers.nullValue; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) +@LuceneTestCase.SuppressFileSystems("*") public class FileSettingsServiceIT extends ESIntegTestCase { private final AtomicLong versionCounter = new AtomicLong(1); @@ -129,29 +132,37 @@ private void assertMasterNode(Client client, String node) { ); } - public static void writeJSONFile(String node, String json, AtomicLong versionCounter, Logger logger, boolean incrementVersion) - throws Exception { - long version = incrementVersion ? versionCounter.incrementAndGet() : versionCounter.get(); - + public static void writeJSONFile(String node, String json, Logger logger, Long version) throws Exception { FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node); Files.createDirectories(fileSettingsService.watchedFileDir()); Path tempFilePath = createTempFile(); - String settingsFileContent = Strings.format(json, version); - Files.write(tempFilePath, settingsFileContent.getBytes(StandardCharsets.UTF_8)); - logger.info("--> Before writing new settings file with version [{}]", version); - Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE); - logger.info("--> After writing new settings file: [{}]", settingsFileContent); - } - - public static void writeJSONFile(String node, String json, AtomicLong versionCounter, Logger logger) throws Exception { - writeJSONFile(node, json, versionCounter, logger, true); + String jsonWithVersion = Strings.format(json, version); + logger.info("--> before writing JSON config to node {} with path {}", node, tempFilePath); + logger.info(jsonWithVersion); + + Files.writeString(tempFilePath, jsonWithVersion); + int retryCount = 0; + do { + try { + // this can fail on Windows because of timing + Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE); + logger.info("--> after writing JSON config to node {} with path {}", node, tempFilePath); + return; + } catch (IOException e) { + logger.info("--> retrying writing a settings file [{}]", retryCount); + if (retryCount == 4) { // retry 5 times + throw e; + } + Thread.sleep(retryDelay(retryCount)); + retryCount++; + } + } while (true); } - public static void writeJSONFileWithoutVersionIncrement(String node, String json, AtomicLong versionCounter, Logger logger) - throws Exception { - writeJSONFile(node, json, versionCounter, logger, false); + private static long retryDelay(int retryCount) { + return 100 * (1 << retryCount) + Randomness.get().nextInt(10); } private Tuple setupCleanupClusterStateListener(String node) { @@ -245,7 +256,7 @@ public void testSettingsApplied() throws Exception { assertTrue(masterFileSettingsService.watching()); assertFalse(dataFileSettingsService.watching()); - writeJSONFile(masterNode, testJSON, versionCounter, logger); + writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet()); assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb"); } @@ -260,7 +271,7 @@ public void testSettingsAppliedOnStart() throws Exception { // In internal cluster tests, the nodes share the config directory, so when we write with the data node path // the master will pick it up on start - writeJSONFile(dataNode, testJSON, versionCounter, logger); + writeJSONFile(dataNode, testJSON, logger, versionCounter.incrementAndGet()); logger.info("--> start master node"); final String masterNode = internalCluster().startMasterOnlyNode(); @@ -288,7 +299,7 @@ public void testReservedStatePersistsOnRestart() throws Exception { assertBusy(() -> assertTrue(masterFileSettingsService.watching())); logger.info("--> write some settings"); - writeJSONFile(masterNode, testJSON, versionCounter, logger); + writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet()); assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb"); logger.info("--> restart master"); @@ -366,7 +377,7 @@ public void testErrorSaved() throws Exception { assertTrue(masterFileSettingsService.watching()); assertFalse(dataFileSettingsService.watching()); - writeJSONFile(masterNode, testErrorJSON, versionCounter, logger); + writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet()); assertClusterStateNotSaved(savedClusterState.v1(), savedClusterState.v2()); } @@ -390,14 +401,14 @@ public void testErrorCanRecoverOnRestart() throws Exception { assertTrue(masterFileSettingsService.watching()); assertFalse(dataFileSettingsService.watching()); - writeJSONFile(masterNode, testErrorJSON, versionCounter, logger); + writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet()); AtomicLong metadataVersion = savedClusterState.v2(); assertClusterStateNotSaved(savedClusterState.v1(), metadataVersion); assertHasErrors(metadataVersion, "not_cluster_settings"); // write valid json without version increment to simulate ES being able to process settings after a restart (usually, this would be // due to a code change) - writeJSONFileWithoutVersionIncrement(masterNode, testJSON, versionCounter, logger); + writeJSONFile(masterNode, testJSON, logger, versionCounter.get()); internalCluster().restartNode(masterNode); ensureGreen(); @@ -426,14 +437,14 @@ public void testNewErrorOnRestartReprocessing() throws Exception { assertTrue(masterFileSettingsService.watching()); assertFalse(dataFileSettingsService.watching()); - writeJSONFile(masterNode, testErrorJSON, versionCounter, logger); + writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet()); AtomicLong metadataVersion = savedClusterState.v2(); assertClusterStateNotSaved(savedClusterState.v1(), metadataVersion); assertHasErrors(metadataVersion, "not_cluster_settings"); // write json with new error without version increment to simulate ES failing to process settings after a restart for a new reason // (usually, this would be due to a code change) - writeJSONFileWithoutVersionIncrement(masterNode, testOtherErrorJSON, versionCounter, logger); + writeJSONFile(masterNode, testOtherErrorJSON, logger, versionCounter.get()); assertHasErrors(metadataVersion, "not_cluster_settings"); internalCluster().restartNode(masterNode); ensureGreen(); @@ -461,7 +472,7 @@ public void testSettingsAppliedOnMasterReElection() throws Exception { assertTrue(masterFileSettingsService.watching()); - writeJSONFile(masterNode, testJSON, versionCounter, logger); + writeJSONFile(masterNode, testJSON, logger, versionCounter.incrementAndGet()); assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "50mb"); internalCluster().stopCurrentMasterNode(); @@ -476,13 +487,13 @@ public void testSettingsAppliedOnMasterReElection() throws Exception { ensureStableCluster(3); savedClusterState = setupCleanupClusterStateListener(internalCluster().getMasterName()); - writeJSONFile(internalCluster().getMasterName(), testCleanupJSON, versionCounter, logger); + writeJSONFile(internalCluster().getMasterName(), testCleanupJSON, logger, versionCounter.incrementAndGet()); boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); savedClusterState = setupClusterStateListener(internalCluster().getMasterName()); - writeJSONFile(internalCluster().getMasterName(), testJSON43mb, versionCounter, logger); + writeJSONFile(internalCluster().getMasterName(), testJSON43mb, logger, versionCounter.incrementAndGet()); assertClusterStateSaveOK(savedClusterState.v1(), savedClusterState.v2(), "43mb"); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java index 54ba74a62890d..7b284979611e2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/RepositoriesFileSettingsIT.java @@ -9,6 +9,7 @@ package org.elasticsearch.reservedstate.service; +import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesAction; import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesRequest; import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest; @@ -22,7 +23,6 @@ import org.elasticsearch.cluster.metadata.ReservedStateMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.Strings; import org.elasticsearch.core.Tuple; import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.test.ESIntegTestCase; @@ -30,9 +30,6 @@ import java.io.ByteArrayInputStream; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; @@ -49,6 +46,7 @@ import static org.hamcrest.Matchers.notNullValue; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) +@LuceneTestCase.SuppressFileSystems("*") public class RepositoriesFileSettingsIT extends ESIntegTestCase { private static AtomicLong versionCounter = new AtomicLong(1); @@ -102,15 +100,7 @@ private void assertMasterNode(Client client, String node) throws ExecutionExcept } private void writeJSONFile(String node, String json) throws Exception { - long version = versionCounter.incrementAndGet(); - - FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node); - - Files.createDirectories(fileSettingsService.watchedFileDir()); - Path tempFilePath = createTempFile(); - - Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8)); - Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE); + FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet()); } private Tuple setupClusterStateListener(String node) { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java index 7e13402b7e66a..7d47ed391199c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/SnapshotsAndFileSettingsIT.java @@ -19,9 +19,7 @@ import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata; import org.elasticsearch.cluster.metadata.ReservedStateMetadata; import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.Randomness; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.Strings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.Tuple; import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction; @@ -29,11 +27,7 @@ import org.elasticsearch.snapshots.SnapshotState; import org.junit.After; -import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -78,34 +72,8 @@ public void cleanUp() throws Exception { awaitNoMoreRunningOperations(); } - private long retryDelay(int retryCount) { - return 100 * (1 << retryCount) + Randomness.get().nextInt(10); - } - private void writeJSONFile(String node, String json) throws Exception { - long version = versionCounter.incrementAndGet(); - - FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node); - - Files.createDirectories(fileSettingsService.watchedFileDir()); - Path tempFilePath = createTempFile(); - - Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8)); - int retryCount = 0; - do { - try { - // this can fail on Windows because of timing - Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE); - return; - } catch (IOException e) { - logger.info("--> retrying writing a settings file [" + retryCount + "]"); - if (retryCount == 4) { // retry 5 times - throw e; - } - Thread.sleep(retryDelay(retryCount)); - retryCount++; - } - } while (true); + FileSettingsServiceIT.writeJSONFile(node, json, logger, versionCounter.incrementAndGet()); } private Tuple setupClusterStateListener(String node) { diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/RoleMappingFileSettingsIT.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/RoleMappingFileSettingsIT.java index 9e36055e917a6..5be00ae3bfa0c 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/RoleMappingFileSettingsIT.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/RoleMappingFileSettingsIT.java @@ -8,6 +8,7 @@ package org.elasticsearch.integration; import org.apache.logging.log4j.Logger; +import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; @@ -20,6 +21,7 @@ import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata; import org.elasticsearch.cluster.metadata.ReservedStateMetadata; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.core.Tuple; @@ -42,6 +44,7 @@ import org.junit.After; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; @@ -73,6 +76,7 @@ /** * Tests that file settings service can properly add role mappings. */ +@LuceneTestCase.SuppressFileSystems("*") public class RoleMappingFileSettingsIT extends NativeRealmIntegTestCase { private static AtomicLong versionCounter = new AtomicLong(1); @@ -154,32 +158,37 @@ public void cleanUp() { updateClusterSettings(Settings.builder().putNull("indices.recovery.max_bytes_per_sec")); } - public static void writeJSONFile(String node, String json, Logger logger, AtomicLong versionCounter) throws Exception { - writeJSONFile(node, json, logger, versionCounter, true); - } - - public static void writeJSONFileWithoutVersionIncrement(String node, String json, Logger logger, AtomicLong versionCounter) - throws Exception { - writeJSONFile(node, json, logger, versionCounter, false); - } - - private static void writeJSONFile(String node, String json, Logger logger, AtomicLong versionCounter, boolean incrementVersion) - throws Exception { - long version = incrementVersion ? versionCounter.incrementAndGet() : versionCounter.get(); - + public static void writeJSONFile(String node, String json, Logger logger, Long version) throws Exception { FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node); - assertTrue(fileSettingsService.watching()); - - Files.deleteIfExists(fileSettingsService.watchedFile()); Files.createDirectories(fileSettingsService.watchedFileDir()); Path tempFilePath = createTempFile(); + String jsonWithVersion = Strings.format(json, version); logger.info("--> before writing JSON config to node {} with path {}", node, tempFilePath); - logger.info(Strings.format(json, version)); - Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8)); - Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE); - logger.info("--> after writing JSON config to node {} with path {}", node, tempFilePath); + logger.info(jsonWithVersion); + + Files.writeString(tempFilePath, jsonWithVersion); + int retryCount = 0; + do { + try { + // this can fail on Windows because of timing + Files.move(tempFilePath, fileSettingsService.watchedFile(), StandardCopyOption.ATOMIC_MOVE); + logger.info("--> after writing JSON config to node {} with path {}", node, tempFilePath); + return; + } catch (IOException e) { + logger.info("--> retrying writing a settings file [{}]", retryCount); + if (retryCount == 4) { // retry 5 times + throw e; + } + Thread.sleep(retryDelay(retryCount)); + retryCount++; + } + } while (true); + } + + private static long retryDelay(int retryCount) { + return 100 * (1 << retryCount) + Randomness.get().nextInt(10); } public static Tuple setupClusterStateListener(String node, String expectedKey) { @@ -320,7 +329,7 @@ public void testClusterStateRoleMappingsAddedThenDeleted() throws Exception { ensureGreen(); var savedClusterState = setupClusterStateListener(internalCluster().getMasterName(), "everyone_kibana"); - writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter); + writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter.incrementAndGet()); assertRoleMappingsSaveOK(savedClusterState.v1(), savedClusterState.v2()); logger.info("---> cleanup cluster settings..."); @@ -333,7 +342,7 @@ public void testClusterStateRoleMappingsAddedThenDeleted() throws Exception { savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName()); - writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter); + writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter.incrementAndGet()); boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); @@ -373,7 +382,7 @@ public void testGetRoleMappings() throws Exception { } var savedClusterState = setupClusterStateListener(internalCluster().getMasterName(), "everyone_kibana"); - writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter); + writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter.incrementAndGet()); boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); @@ -415,7 +424,8 @@ public void testGetRoleMappings() throws Exception { ); savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName()); - writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter); + String node = internalCluster().getMasterName(); + writeJSONFile(node, emptyJSON, logger, versionCounter.incrementAndGet()); awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); @@ -465,7 +475,7 @@ public void testErrorSaved() throws Exception { // save an empty file to clear any prior state, this ensures we don't get a stale file left over by another test var savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName()); - writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter); + writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter.incrementAndGet()); boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); @@ -490,7 +500,8 @@ public void testErrorSaved() throws Exception { } ); - writeJSONFile(internalCluster().getMasterName(), testErrorJSON, logger, versionCounter); + String node = internalCluster().getMasterName(); + writeJSONFile(node, testErrorJSON, logger, versionCounter.incrementAndGet()); awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); @@ -515,7 +526,8 @@ public void testRoleMappingApplyWithSecurityIndexClosed() throws Exception { var closeIndexResponse = indicesAdmin().close(new CloseIndexRequest(INTERNAL_SECURITY_MAIN_INDEX_7)).get(); assertTrue(closeIndexResponse.isAcknowledged()); - writeJSONFile(internalCluster().getMasterName(), testJSON, logger, versionCounter); + String node = internalCluster().getMasterName(); + writeJSONFile(node, testJSON, logger, versionCounter.incrementAndGet()); boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); @@ -550,7 +562,8 @@ public void testRoleMappingApplyWithSecurityIndexClosed() throws Exception { } } finally { savedClusterState = setupClusterStateListenerForCleanup(internalCluster().getMasterName()); - writeJSONFile(internalCluster().getMasterName(), emptyJSON, logger, versionCounter); + String node = internalCluster().getMasterName(); + writeJSONFile(node, emptyJSON, logger, versionCounter.incrementAndGet()); boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS); assertTrue(awaitSuccessful); diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsRestartIT.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsRestartIT.java index 15892c8d021f0..ef8f2cfc0d411 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsRestartIT.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/FileSettingsRoleMappingsRestartIT.java @@ -7,9 +7,11 @@ package org.elasticsearch.xpack.security; +import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.core.Tuple; +import org.elasticsearch.integration.RoleMappingFileSettingsIT; import org.elasticsearch.reservedstate.service.FileSettingsService; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.SecurityIntegTestCase; @@ -29,12 +31,11 @@ import static org.elasticsearch.integration.RoleMappingFileSettingsIT.setupClusterStateListener; import static org.elasticsearch.integration.RoleMappingFileSettingsIT.setupClusterStateListenerForCleanup; -import static org.elasticsearch.integration.RoleMappingFileSettingsIT.writeJSONFile; -import static org.elasticsearch.integration.RoleMappingFileSettingsIT.writeJSONFileWithoutVersionIncrement; import static org.elasticsearch.xpack.core.security.authz.RoleMappingMetadata.METADATA_NAME_FIELD; import static org.hamcrest.Matchers.containsInAnyOrder; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) +@LuceneTestCase.SuppressFileSystems("*") public class FileSettingsRoleMappingsRestartIT extends SecurityIntegTestCase { private static final int MAX_WAIT_TIME_SECONDS = 20; @@ -116,7 +117,7 @@ public void testReservedStatePersistsOnRestart() throws Exception { awaitFileSettingsWatcher(); logger.info("--> write some role mappings, no other file settings"); - writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter.incrementAndGet()); assertRoleMappingsInClusterStateWithAwait( savedClusterState, @@ -196,7 +197,7 @@ public void testFileSettingsReprocessedOnRestartWithoutVersionChange() throws Ex Tuple savedClusterState = setupClusterStateListener(masterNode, "everyone_kibana_alone"); awaitFileSettingsWatcher(); logger.info("--> write some role mappings, no other file settings"); - writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, testJSONOnlyRoleMappings, logger, versionCounter.incrementAndGet()); assertRoleMappingsInClusterStateWithAwait( savedClusterState, @@ -226,7 +227,7 @@ public void testFileSettingsReprocessedOnRestartWithoutVersionChange() throws Ex ); // write without version increment and assert that change gets applied on restart - writeJSONFileWithoutVersionIncrement(masterNode, testJSONOnlyUpdatedRoleMappings, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, testJSONOnlyUpdatedRoleMappings, logger, versionCounter.get()); logger.info("--> restart master"); internalCluster().restartNode(masterNode); ensureGreen(); @@ -288,7 +289,7 @@ private void cleanupClusterStateAndAssertNoMappings(String masterNode) throws Ex var savedClusterState = setupClusterStateListenerForCleanup(masterNode); awaitFileSettingsWatcher(); logger.info("--> remove the role mappings with an empty settings file"); - writeJSONFile(masterNode, emptyJSON, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, emptyJSON, logger, versionCounter.incrementAndGet()); boolean awaitSuccessful = savedClusterState.v1().await(MAX_WAIT_TIME_SECONDS, TimeUnit.SECONDS); assertTrue(awaitSuccessful); // ensure cluster-state update got propagated to expected version diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/support/CleanupRoleMappingDuplicatesMigrationIT.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/support/CleanupRoleMappingDuplicatesMigrationIT.java index 63c510062bdad..e7f544399bdf0 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/support/CleanupRoleMappingDuplicatesMigrationIT.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/support/CleanupRoleMappingDuplicatesMigrationIT.java @@ -14,6 +14,7 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.integration.RoleMappingFileSettingsIT; import org.elasticsearch.reservedstate.service.FileSettingsService; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.SecurityIntegTestCase; @@ -40,7 +41,6 @@ import java.util.concurrent.atomic.AtomicLong; import static org.elasticsearch.integration.RoleMappingFileSettingsIT.setupClusterStateListener; -import static org.elasticsearch.integration.RoleMappingFileSettingsIT.writeJSONFile; import static org.elasticsearch.xpack.core.security.action.UpdateIndexMigrationVersionAction.MIGRATION_VERSION_CUSTOM_DATA_KEY; import static org.elasticsearch.xpack.core.security.action.UpdateIndexMigrationVersionAction.MIGRATION_VERSION_CUSTOM_KEY; import static org.elasticsearch.xpack.core.security.test.TestRestrictedIndices.INTERNAL_SECURITY_MAIN_INDEX_7; @@ -138,7 +138,7 @@ public void testMigrationSuccessful() throws Exception { // Setup listener to wait for role mapping var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode, "everyone_kibana_alone"); // Write role mappings - writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet()); assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS)); waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION); @@ -170,7 +170,7 @@ public void testMigrationSuccessfulNoOverlap() throws Exception { // Setup listener to wait for role mapping var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode, "everyone_kibana_alone"); // Write role mappings with fallback name, this should block any security migration - writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet()); assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS)); waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION); @@ -202,7 +202,7 @@ public void testMigrationSuccessfulNoNative() throws Exception { // Setup listener to wait for role mapping var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode, "everyone_kibana_alone"); // Write role mappings with fallback name, this should block any security migration - writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet()); assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS)); waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION); @@ -228,7 +228,7 @@ public void testMigrationFallbackNamePreCondition() throws Exception { // Setup listener to wait for role mapping var nameNotAvailableListener = setupClusterStateListener(masterNode, "name_not_available_after_deserialization"); // Write role mappings with fallback name, this should block any security migration - writeJSONFile(masterNode, TEST_JSON_WITH_FALLBACK_NAME, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_FALLBACK_NAME, logger, versionCounter.incrementAndGet()); assertTrue(nameNotAvailableListener.v1().await(20, TimeUnit.SECONDS)); // Create a native role mapping to create security index and trigger migration @@ -249,7 +249,7 @@ public void testMigrationFallbackNamePreCondition() throws Exception { assertThat(status, equalTo(SecurityIndexManager.RoleMappingsCleanupMigrationStatus.NOT_READY)); // Write file without fallback name in it to unblock migration - writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet()); waitForMigrationCompletion(SecurityMigrations.CLEANUP_ROLE_MAPPING_DUPLICATES_MIGRATION_VERSION); } @@ -282,7 +282,7 @@ public void testSkipMigrationEmptyFileBasedMappings() throws Exception { // Setup listener to wait for any role mapping var fileBasedRoleMappingsWrittenListener = setupClusterStateListener(masterNode); // Write role mappings - writeJSONFile(masterNode, TEST_JSON_WITH_EMPTY_ROLE_MAPPINGS, logger, versionCounter); + RoleMappingFileSettingsIT.writeJSONFile(masterNode, TEST_JSON_WITH_EMPTY_ROLE_MAPPINGS, logger, versionCounter.incrementAndGet()); assertTrue(fileBasedRoleMappingsWrittenListener.v1().await(20, TimeUnit.SECONDS)); // Create a native role mapping to create security index and trigger migration (skipped initially) From 53f6080b605414e7962e5d9fd0aa8eaa80fc29b8 Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Mon, 11 Nov 2024 07:40:36 -0600 Subject: [PATCH 38/39] Adding a deprecation info API warning for data streams with old indices (#116447) --- docs/changelog/116447.yaml | 5 + .../org/elasticsearch/TransportVersions.java | 1 + .../DataStreamDeprecationChecks.java | 74 +++++++++++ .../xpack/deprecation/DeprecationChecks.java | 6 + .../deprecation/DeprecationInfoAction.java | 39 +++++- .../TransportDeprecationInfoAction.java | 2 + .../DataStreamDeprecationChecksTests.java | 124 ++++++++++++++++++ .../DeprecationInfoActionResponseTests.java | 47 ++++++- 8 files changed, 294 insertions(+), 4 deletions(-) create mode 100644 docs/changelog/116447.yaml create mode 100644 x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecks.java create mode 100644 x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecksTests.java diff --git a/docs/changelog/116447.yaml b/docs/changelog/116447.yaml new file mode 100644 index 0000000000000..8c0cea4b54578 --- /dev/null +++ b/docs/changelog/116447.yaml @@ -0,0 +1,5 @@ +pr: 116447 +summary: Adding a deprecation info API warning for data streams with old indices +area: Data streams +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 3134eb4966115..1c7e1eee5df65 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -190,6 +190,7 @@ static TransportVersion def(int id) { public static final TransportVersion LOGSDB_TELEMETRY_STATS = def(8_785_00_0); public static final TransportVersion KQL_QUERY_ADDED = def(8_786_00_0); public static final TransportVersion ROLE_MONITOR_STATS = def(8_787_00_0); + public static final TransportVersion DATA_STREAM_INDEX_VERSION_DEPRECATION_CHECK = def(8_788_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecks.java new file mode 100644 index 0000000000000..ee029d01427aa --- /dev/null +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecks.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.deprecation; + +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.DataStream; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; + +import java.util.List; + +import static java.util.Map.entry; +import static java.util.Map.ofEntries; + +public class DataStreamDeprecationChecks { + static DeprecationIssue oldIndicesCheck(DataStream dataStream, ClusterState clusterState) { + List backingIndices = dataStream.getIndices(); + boolean hasOldIndices = backingIndices.stream() + .anyMatch(index -> clusterState.metadata().index(index).getCompatibilityVersion().before(IndexVersions.V_8_0_0)); + if (hasOldIndices) { + long totalIndices = backingIndices.size(); + List oldIndices = backingIndices.stream() + .filter(index -> clusterState.metadata().index(index).getCompatibilityVersion().before(IndexVersions.V_8_0_0)) + .toList(); + long totalOldIndices = oldIndices.size(); + long totalOldSearchableSnapshots = oldIndices.stream() + .filter(index -> clusterState.metadata().index(index).isSearchableSnapshot()) + .count(); + long totalOldPartiallyMountedSearchableSnapshots = oldIndices.stream() + .filter(index -> clusterState.metadata().index(index).isPartialSearchableSnapshot()) + .count(); + long totalOldFullyMountedSearchableSnapshots = totalOldSearchableSnapshots - totalOldPartiallyMountedSearchableSnapshots; + return new DeprecationIssue( + DeprecationIssue.Level.CRITICAL, + "Old data stream with a compatibility version < 8.0", + "https://www.elastic.co/guide/en/elasticsearch/reference/master/breaking-changes-9.0.html", + "This data stream has backing indices that were created before Elasticsearch 8.0.0", + false, + ofEntries( + entry( + "backing_indices", + ofEntries( + entry("count", totalIndices), + entry( + "need_upgrading", + ofEntries( + entry("count", totalOldIndices), + entry( + "searchable_snapshots", + ofEntries( + entry("count", totalOldSearchableSnapshots), + entry("fully_mounted", ofEntries(entry("count", totalOldFullyMountedSearchableSnapshots))), + entry( + "partially_mounted", + ofEntries(entry("count", totalOldPartiallyMountedSearchableSnapshots)) + ) + ) + ) + ) + ) + ) + ) + ) + ); + } + return null; + } +} diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java index 2f875cc1a3fa9..c80f26cda7b36 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java @@ -8,6 +8,7 @@ import org.elasticsearch.action.admin.cluster.node.info.PluginsAndModules; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; @@ -16,6 +17,7 @@ import java.util.List; import java.util.Objects; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; @@ -97,6 +99,10 @@ private DeprecationChecks() {} IndexDeprecationChecks::deprecatedCamelCasePattern ); + static List> DATA_STREAM_CHECKS = List.of( + DataStreamDeprecationChecks::oldIndicesCheck + ); + /** * helper utility function to reduce repeat of running a specific {@link List} of checks. * diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java index cb9efd526fb29..cd26e23394e81 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java @@ -16,6 +16,7 @@ import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.action.support.master.MasterNodeReadRequest; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; @@ -42,6 +43,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; @@ -144,10 +146,11 @@ private static Map> getMergedIssuesToNodesMap( } public static class Response extends ActionResponse implements ToXContentObject { - static final Set RESERVED_NAMES = Set.of("cluster_settings", "node_settings", "index_settings"); + static final Set RESERVED_NAMES = Set.of("cluster_settings", "node_settings", "index_settings", "data_streams"); private final List clusterSettingsIssues; private final List nodeSettingsIssues; private final Map> indexSettingsIssues; + private final Map> dataStreamIssues; private final Map> pluginSettingsIssues; public Response(StreamInput in) throws IOException { @@ -155,6 +158,11 @@ public Response(StreamInput in) throws IOException { clusterSettingsIssues = in.readCollectionAsList(DeprecationIssue::new); nodeSettingsIssues = in.readCollectionAsList(DeprecationIssue::new); indexSettingsIssues = in.readMapOfLists(DeprecationIssue::new); + if (in.getTransportVersion().onOrAfter(TransportVersions.DATA_STREAM_INDEX_VERSION_DEPRECATION_CHECK)) { + dataStreamIssues = in.readMapOfLists(DeprecationIssue::new); + } else { + dataStreamIssues = Map.of(); + } if (in.getTransportVersion().before(TransportVersions.V_7_11_0)) { List mlIssues = in.readCollectionAsList(DeprecationIssue::new); pluginSettingsIssues = new HashMap<>(); @@ -168,11 +176,13 @@ public Response( List clusterSettingsIssues, List nodeSettingsIssues, Map> indexSettingsIssues, + Map> dataStreamIssues, Map> pluginSettingsIssues ) { this.clusterSettingsIssues = clusterSettingsIssues; this.nodeSettingsIssues = nodeSettingsIssues; this.indexSettingsIssues = indexSettingsIssues; + this.dataStreamIssues = dataStreamIssues; Set intersection = Sets.intersection(RESERVED_NAMES, pluginSettingsIssues.keySet()); if (intersection.isEmpty() == false) { throw new ElasticsearchStatusException( @@ -205,6 +215,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(clusterSettingsIssues); out.writeCollection(nodeSettingsIssues); out.writeMap(indexSettingsIssues, StreamOutput::writeCollection); + if (out.getTransportVersion().onOrAfter(TransportVersions.DATA_STREAM_INDEX_VERSION_DEPRECATION_CHECK)) { + out.writeMap(dataStreamIssues, StreamOutput::writeCollection); + } if (out.getTransportVersion().before(TransportVersions.V_7_11_0)) { out.writeCollection(pluginSettingsIssues.getOrDefault("ml_settings", Collections.emptyList())); } else { @@ -219,6 +232,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws .array("node_settings", nodeSettingsIssues.toArray()) .field("index_settings") .map(indexSettingsIssues) + .field("data_streams") + .map(dataStreamIssues) .mapContents(pluginSettingsIssues) .endObject(); } @@ -260,6 +275,7 @@ public static DeprecationInfoAction.Response from( Request request, NodesDeprecationCheckResponse nodeDeprecationResponse, List> indexSettingsChecks, + List> dataStreamChecks, List> clusterSettingsChecks, Map> pluginSettingIssues, List skipTheseDeprecatedSettings @@ -283,6 +299,19 @@ public static DeprecationInfoAction.Response from( } } + List dataStreamNames = indexNameExpressionResolver.dataStreamNames( + state, + IndicesOptions.LENIENT_EXPAND_OPEN_CLOSED_HIDDEN + ); + Map> dataStreamIssues = new HashMap<>(); + for (String dataStreamName : dataStreamNames) { + DataStream dataStream = stateWithSkippedSettingsRemoved.metadata().dataStreams().get(dataStreamName); + List issuesForSingleDataStream = filterChecks(dataStreamChecks, c -> c.apply(dataStream, state)); + if (issuesForSingleDataStream.isEmpty() == false) { + dataStreamIssues.put(dataStreamName, issuesForSingleDataStream); + } + } + // WORKAROUND: move transform deprecation issues into cluster_settings List transformDeprecations = pluginSettingIssues.remove( TransformDeprecationChecker.TRANSFORM_DEPRECATION_KEY @@ -291,7 +320,13 @@ public static DeprecationInfoAction.Response from( clusterSettingsIssues.addAll(transformDeprecations); } - return new DeprecationInfoAction.Response(clusterSettingsIssues, nodeSettingsIssues, indexSettingsIssues, pluginSettingIssues); + return new DeprecationInfoAction.Response( + clusterSettingsIssues, + nodeSettingsIssues, + indexSettingsIssues, + dataStreamIssues, + pluginSettingIssues + ); } } diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportDeprecationInfoAction.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportDeprecationInfoAction.java index 91e77762870bf..683c29815399b 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportDeprecationInfoAction.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportDeprecationInfoAction.java @@ -36,6 +36,7 @@ import java.util.stream.Collectors; import static org.elasticsearch.xpack.deprecation.DeprecationChecks.CLUSTER_SETTINGS_CHECKS; +import static org.elasticsearch.xpack.deprecation.DeprecationChecks.DATA_STREAM_CHECKS; import static org.elasticsearch.xpack.deprecation.DeprecationChecks.INDEX_SETTINGS_CHECKS; public class TransportDeprecationInfoAction extends TransportMasterNodeReadAction< @@ -134,6 +135,7 @@ protected final void masterOperation( request, response, INDEX_SETTINGS_CHECKS, + DATA_STREAM_CHECKS, CLUSTER_SETTINGS_CHECKS, deprecationIssues, skipTheseDeprecations diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecksTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecksTests.java new file mode 100644 index 0000000000000..d5325fb0ff3a4 --- /dev/null +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DataStreamDeprecationChecksTests.java @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.deprecation; + +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.DataStream; +import org.elasticsearch.cluster.metadata.DataStreamOptions; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static java.util.Collections.singletonList; +import static org.elasticsearch.xpack.deprecation.DeprecationChecks.DATA_STREAM_CHECKS; +import static org.hamcrest.Matchers.equalTo; + +public class DataStreamDeprecationChecksTests extends ESTestCase { + + public void testOldIndicesCheck() { + long oldIndexCount = randomIntBetween(1, 100); + long newIndexCount = randomIntBetween(1, 100); + long oldSearchableSnapshotCount = 0; + long oldFullyManagedSearchableSnapshotCount = 0; + long oldPartiallyManagedSearchableSnapshotCount = 0; + List allIndices = new ArrayList<>(); + Map nameToIndexMetadata = new HashMap<>(); + for (int i = 0; i < oldIndexCount; i++) { + Settings.Builder settingsBuilder = settings(IndexVersion.fromId(7170099)); + if (randomBoolean()) { + settingsBuilder.put("index.store.type", "snapshot"); + if (randomBoolean()) { + oldFullyManagedSearchableSnapshotCount++; + } else { + settingsBuilder.put("index.store.snapshot.partial", true); + oldPartiallyManagedSearchableSnapshotCount++; + } + oldSearchableSnapshotCount++; + } + IndexMetadata oldIndexMetadata = IndexMetadata.builder("old-data-stream-index-" + i) + .settings(settingsBuilder) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + allIndices.add(oldIndexMetadata.getIndex()); + nameToIndexMetadata.put(oldIndexMetadata.getIndex().getName(), oldIndexMetadata); + } + for (int i = 0; i < newIndexCount; i++) { + Settings.Builder settingsBuilder = settings(IndexVersion.current()); + if (randomBoolean()) { + settingsBuilder.put("index.store.type", "snapshot"); + } + IndexMetadata newIndexMetadata = IndexMetadata.builder("new-data-stream-index-" + i) + .settings(settingsBuilder) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + allIndices.add(newIndexMetadata.getIndex()); + nameToIndexMetadata.put(newIndexMetadata.getIndex().getName(), newIndexMetadata); + } + DataStream dataStream = new DataStream( + randomAlphaOfLength(10), + allIndices, + randomNegativeLong(), + Map.of(), + randomBoolean(), + false, + false, + randomBoolean(), + randomFrom(IndexMode.values()), + null, + randomFrom(DataStreamOptions.EMPTY, DataStreamOptions.FAILURE_STORE_DISABLED, DataStreamOptions.FAILURE_STORE_ENABLED, null), + List.of(), + randomBoolean(), + null + ); + Metadata metadata = Metadata.builder().indices(nameToIndexMetadata).build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).metadata(metadata).build(); + DeprecationIssue expected = new DeprecationIssue( + DeprecationIssue.Level.CRITICAL, + "Old data stream with a compatibility version < 8.0", + "https://www.elastic.co/guide/en/elasticsearch/reference/master/breaking-changes-9.0.html", + "This data stream has backing indices that were created before Elasticsearch 8.0.0", + false, + Map.of( + "backing_indices", + Map.of( + "count", + oldIndexCount + newIndexCount, + "need_upgrading", + Map.of( + "count", + oldIndexCount, + "searchable_snapshots", + Map.of( + "count", + oldSearchableSnapshotCount, + "fully_mounted", + Map.of("count", oldFullyManagedSearchableSnapshotCount), + "partially_mounted", + Map.of("count", oldPartiallyManagedSearchableSnapshotCount) + ) + ) + ) + ) + ); + List issues = DeprecationChecks.filterChecks(DATA_STREAM_CHECKS, c -> c.apply(dataStream, clusterState)); + assertThat(issues, equalTo(singletonList(expected))); + } +} diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java index 480ac2103fbfa..5750daa8e3673 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java @@ -9,6 +9,7 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; @@ -36,7 +37,9 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -63,6 +66,13 @@ protected DeprecationInfoAction.Response createTestInstance() { .collect(Collectors.toList()); indexIssues.put(randomAlphaOfLength(10), perIndexIssues); } + Map> dataStreamIssues = new HashMap<>(); + for (int i = 0; i < randomIntBetween(0, 10); i++) { + List perDataStreamIssues = Stream.generate(DeprecationInfoActionResponseTests::createTestDeprecationIssue) + .limit(randomIntBetween(0, 10)) + .collect(Collectors.toList()); + dataStreamIssues.put(randomAlphaOfLength(10), perDataStreamIssues); + } Map> pluginIssues = new HashMap<>(); for (int i = 0; i < randomIntBetween(0, 10); i++) { List perPluginIssues = Stream.generate(DeprecationInfoActionResponseTests::createTestDeprecationIssue) @@ -70,7 +80,7 @@ protected DeprecationInfoAction.Response createTestInstance() { .collect(Collectors.toList()); pluginIssues.put(randomAlphaOfLength(10), perPluginIssues); } - return new DeprecationInfoAction.Response(clusterIssues, nodeIssues, indexIssues, pluginIssues); + return new DeprecationInfoAction.Response(clusterIssues, nodeIssues, indexIssues, dataStreamIssues, pluginIssues); } @Override @@ -104,9 +114,13 @@ public void testFrom() throws IOException { boolean clusterIssueFound = randomBoolean(); boolean nodeIssueFound = randomBoolean(); boolean indexIssueFound = randomBoolean(); + boolean dataStreamIssueFound = randomBoolean(); DeprecationIssue foundIssue = createTestDeprecationIssue(); List> clusterSettingsChecks = List.of((s) -> clusterIssueFound ? foundIssue : null); List> indexSettingsChecks = List.of((idx) -> indexIssueFound ? foundIssue : null); + List> dataStreamChecks = List.of( + (ds, cs) -> dataStreamIssueFound ? foundIssue : null + ); NodesDeprecationCheckResponse nodeDeprecationIssues = new NodesDeprecationCheckResponse( new ClusterName(randomAlphaOfLength(5)), @@ -125,6 +139,7 @@ public void testFrom() throws IOException { request, nodeDeprecationIssues, indexSettingsChecks, + dataStreamChecks, clusterSettingsChecks, Collections.emptyMap(), Collections.emptyList() @@ -197,6 +212,7 @@ public void testFromWithMergeableNodeIssues() throws IOException { DeprecationIssue foundIssue2 = createTestDeprecationIssue(foundIssue1, metaMap2); List> clusterSettingsChecks = Collections.emptyList(); List> indexSettingsChecks = List.of((idx) -> null); + List> dataStreamChecks = List.of((ds, cs) -> null); NodesDeprecationCheckResponse nodeDeprecationIssues = new NodesDeprecationCheckResponse( new ClusterName(randomAlphaOfLength(5)), @@ -214,6 +230,7 @@ public void testFromWithMergeableNodeIssues() throws IOException { request, nodeDeprecationIssues, indexSettingsChecks, + dataStreamChecks, clusterSettingsChecks, Collections.emptyMap(), Collections.emptyList() @@ -239,8 +256,15 @@ public void testRemoveSkippedSettings() throws IOException { settingsBuilder.put("some.undeprecated.property", "someValue3"); settingsBuilder.putList("some.undeprecated.list.property", List.of("someValue4", "someValue5")); Settings inputSettings = settingsBuilder.build(); + IndexMetadata dataStreamIndexMetadata = IndexMetadata.builder("ds-test-index-1") + .settings(inputSettings) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); Metadata metadata = Metadata.builder() .put(IndexMetadata.builder("test").settings(inputSettings).numberOfShards(1).numberOfReplicas(0)) + .put(dataStreamIndexMetadata, true) + .put(DataStream.builder("ds-test", List.of(dataStreamIndexMetadata.getIndex())).build()) .persistentSettings(inputSettings) .build(); @@ -256,6 +280,13 @@ public void testRemoveSkippedSettings() throws IOException { visibleIndexSettings.set(idx.getSettings()); return null; })); + AtomicInteger backingIndicesCount = new AtomicInteger(0); + List> dataStreamChecks = Collections.unmodifiableList( + Arrays.asList((ds, cs) -> { + backingIndicesCount.set(ds.getIndices().size()); + return null; + }) + ); NodesDeprecationCheckResponse nodeDeprecationIssues = new NodesDeprecationCheckResponse( new ClusterName(randomAlphaOfLength(5)), @@ -270,6 +301,7 @@ public void testRemoveSkippedSettings() throws IOException { request, nodeDeprecationIssues, indexSettingsChecks, + dataStreamChecks, clusterSettingsChecks, Collections.emptyMap(), List.of("some.deprecated.property", "some.other.*.deprecated.property") @@ -288,19 +320,30 @@ public void testRemoveSkippedSettings() throws IOException { Assert.assertTrue(resultIndexSettings.getAsList("some.undeprecated.list.property").equals(List.of("someValue4", "someValue5"))); Assert.assertFalse(resultIndexSettings.hasValue("some.deprecated.property")); Assert.assertFalse(resultIndexSettings.hasValue("some.other.bad.deprecated.property")); + + assertThat(backingIndicesCount.get(), equalTo(1)); } public void testCtorFailure() { Map> indexNames = Stream.generate(() -> randomAlphaOfLength(10)) .limit(10) .collect(Collectors.toMap(Function.identity(), (_k) -> Collections.emptyList())); + Map> dataStreamNames = Stream.generate(() -> randomAlphaOfLength(10)) + .limit(10) + .collect(Collectors.toMap(Function.identity(), (_k) -> Collections.emptyList())); Set shouldCauseFailure = new HashSet<>(RESERVED_NAMES); for (int i = 0; i < NUMBER_OF_TEST_RUNS; i++) { Map> pluginSettingsIssues = randomSubsetOf(3, shouldCauseFailure).stream() .collect(Collectors.toMap(Function.identity(), (_k) -> Collections.emptyList())); expectThrows( ElasticsearchStatusException.class, - () -> new DeprecationInfoAction.Response(Collections.emptyList(), Collections.emptyList(), indexNames, pluginSettingsIssues) + () -> new DeprecationInfoAction.Response( + Collections.emptyList(), + Collections.emptyList(), + indexNames, + dataStreamNames, + pluginSettingsIssues + ) ); } } From a21d3753ccd7ce1d2a62f8511b091d482d360a2b Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 11 Nov 2024 14:51:19 +0100 Subject: [PATCH 39/39] Unmute a lot of fixed tests from search race condition (#116587) These are all fixed by #116264 closes #115664 #113430 #115717 #115705 #115970 #115988 #115810 #116027 #115754 #116097 #115818 #116377 #114824 --- muted-tests.yml | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index a00fecc253a7b..38310b6650419 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -127,18 +127,9 @@ tests: - class: org.elasticsearch.xpack.shutdown.NodeShutdownIT method: testStalledShardMigrationProperlyDetected issue: https://github.com/elastic/elasticsearch/issues/115697 -- class: org.elasticsearch.xpack.spatial.search.GeoGridAggAndQueryConsistencyIT - method: testGeoShapeGeoHash - issue: https://github.com/elastic/elasticsearch/issues/115664 - class: org.elasticsearch.xpack.inference.InferenceCrudIT method: testSupportedStream issue: https://github.com/elastic/elasticsearch/issues/113430 -- class: org.elasticsearch.xpack.spatial.search.GeoGridAggAndQueryConsistencyIT - method: testGeoShapeGeoTile - issue: https://github.com/elastic/elasticsearch/issues/115717 -- class: org.elasticsearch.xpack.spatial.search.GeoGridAggAndQueryConsistencyIT - method: testGeoShapeGeoHex - issue: https://github.com/elastic/elasticsearch/issues/115705 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=transform/transforms_start_stop/Verify start transform reuses destination index} issue: https://github.com/elastic/elasticsearch/issues/115808 @@ -157,32 +148,14 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/inference_crud/Test delete given model referenced by pipeline} issue: https://github.com/elastic/elasticsearch/issues/115970 -- class: org.elasticsearch.search.slice.SearchSliceIT - method: testPointInTime - issue: https://github.com/elastic/elasticsearch/issues/115988 -- class: org.elasticsearch.action.search.PointInTimeIT - method: testPITTiebreak - issue: https://github.com/elastic/elasticsearch/issues/115810 - class: org.elasticsearch.index.reindex.ReindexNodeShutdownIT method: testReindexWithShutdown issue: https://github.com/elastic/elasticsearch/issues/115996 - class: org.elasticsearch.search.query.SearchQueryIT method: testAllDocsQueryString issue: https://github.com/elastic/elasticsearch/issues/115728 -- class: org.elasticsearch.search.basic.SearchWithRandomExceptionsIT - method: testRandomExceptions - issue: https://github.com/elastic/elasticsearch/issues/116027 -- class: org.elasticsearch.action.admin.HotThreadsIT - method: testHotThreadsDontFail - issue: https://github.com/elastic/elasticsearch/issues/115754 -- class: org.elasticsearch.search.functionscore.QueryRescorerIT - method: testScoring - issue: https://github.com/elastic/elasticsearch/issues/116050 - class: org.elasticsearch.xpack.application.connector.ConnectorIndexServiceTests issue: https://github.com/elastic/elasticsearch/issues/116087 -- class: org.elasticsearch.xpack.searchbusinessrules.PinnedQueryBuilderIT - method: testPinnedPromotions - issue: https://github.com/elastic/elasticsearch/issues/116097 - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=cat.shards/10_basic/Help} issue: https://github.com/elastic/elasticsearch/issues/116110 @@ -195,9 +168,6 @@ tests: - class: org.elasticsearch.upgrades.FullClusterRestartIT method: testSnapshotRestore {cluster=OLD} issue: https://github.com/elastic/elasticsearch/issues/111777 -- class: org.elasticsearch.xpack.spatial.search.GeoGridAggAndQueryConsistencyIT - method: testGeoPointGeoTile - issue: https://github.com/elastic/elasticsearch/issues/115818 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT method: testLookbackWithIndicesOptions issue: https://github.com/elastic/elasticsearch/issues/116127 @@ -261,9 +231,6 @@ tests: - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: test {categorize.Categorize ASYNC} issue: https://github.com/elastic/elasticsearch/issues/116373 -- class: org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsIntegTests - method: testCreateAndRestoreSearchableSnapshot - issue: https://github.com/elastic/elasticsearch/issues/116377 - class: org.elasticsearch.threadpool.SimpleThreadPoolIT method: testThreadPoolMetrics issue: https://github.com/elastic/elasticsearch/issues/108320 @@ -302,9 +269,6 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/inference_crud/Test force delete given model referenced by pipeline} issue: https://github.com/elastic/elasticsearch/issues/116555 -- class: org.elasticsearch.search.basic.SearchWithRandomIOExceptionsIT - method: testRandomDirectoryIOExceptions - issue: https://github.com/elastic/elasticsearch/issues/114824 # Examples: #