From 4a992150016dd07adc432030dc2b1579ccfd4581 Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Wed, 18 Dec 2024 15:12:32 +0100 Subject: [PATCH] Semantic search with query builder rewrite (#118676) (#118945) * Semantic search with query builder rewrite * Address review feedback * Add feature behind snapshot * Use after/before instead of afterClass/beforeClass * Call onFailure instead of throwing exception * Fix KqlFunctionIT by requiring KqlPlugin * Update scoring tests now that they are enabled * Drop the score column for now --- .../elasticsearch/action/ResolvedIndices.java | 20 +- .../esql/qa/multi_node/SemanticMatchIT.java | 26 +++ .../esql/qa/single_node/SemanticMatchIT.java | 26 +++ .../esql/qa/rest/SemanticMatchTestCase.java | 121 ++++++++++++ .../xpack/esql/EsqlTestUtils.java | 3 + .../xpack/esql/MockQueryBuilderResolver.java | 30 +++ .../main/resources/match-function.csv-spec | 71 +++++++ .../main/resources/match-operator.csv-spec | 70 +++++++ .../src/main/resources/scoring.csv-spec | 40 ++++ .../xpack/esql/plugin/KqlFunctionIT.java | 9 + .../xpack/esql/execution/PlanExecutor.java | 5 +- .../expression/function/fulltext/Match.java | 2 + .../esql/plugin/TransportEsqlQueryAction.java | 8 +- .../xpack/esql/session/EsqlSession.java | 16 +- .../esql/session/QueryBuilderResolver.java | 178 ++++++++++++++++++ .../elasticsearch/xpack/esql/CsvTests.java | 3 +- .../LocalPhysicalPlanOptimizerTests.java | 6 + .../esql/stats/PlanExecutorMetricsTests.java | 2 + 18 files changed, 629 insertions(+), 7 deletions(-) create mode 100644 x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/SemanticMatchIT.java create mode 100644 x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/SemanticMatchIT.java create mode 100644 x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/SemanticMatchTestCase.java create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/MockQueryBuilderResolver.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/QueryBuilderResolver.java diff --git a/server/src/main/java/org/elasticsearch/action/ResolvedIndices.java b/server/src/main/java/org/elasticsearch/action/ResolvedIndices.java index f149603f12d8b..16f37c9573a8e 100644 --- a/server/src/main/java/org/elasticsearch/action/ResolvedIndices.java +++ b/server/src/main/java/org/elasticsearch/action/ResolvedIndices.java @@ -150,10 +150,26 @@ public static ResolvedIndices resolveWithIndicesRequest( RemoteClusterService remoteClusterService, long startTimeInMillis ) { - final Map remoteClusterIndices = remoteClusterService.groupIndices( + return resolveWithIndexNamesAndOptions( + request.indices(), request.indicesOptions(), - request.indices() + clusterState, + indexNameExpressionResolver, + remoteClusterService, + startTimeInMillis ); + } + + public static ResolvedIndices resolveWithIndexNamesAndOptions( + String[] indexNames, + IndicesOptions indicesOptions, + ClusterState clusterState, + IndexNameExpressionResolver indexNameExpressionResolver, + RemoteClusterService remoteClusterService, + long startTimeInMillis + ) { + final Map remoteClusterIndices = remoteClusterService.groupIndices(indicesOptions, indexNames); + final OriginalIndices localIndices = remoteClusterIndices.remove(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); Index[] concreteLocalIndices = localIndices == null diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/SemanticMatchIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/SemanticMatchIT.java new file mode 100644 index 0000000000000..0ce84330b0b01 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/SemanticMatchIT.java @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.multi_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.qa.rest.SemanticMatchTestCase; +import org.junit.ClassRule; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class SemanticMatchIT extends SemanticMatchTestCase { + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test")); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/SemanticMatchIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/SemanticMatchIT.java new file mode 100644 index 0000000000000..8edc2dbcf35a2 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/SemanticMatchIT.java @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.single_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.qa.rest.SemanticMatchTestCase; +import org.junit.ClassRule; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class SemanticMatchIT extends SemanticMatchTestCase { + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test")); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/SemanticMatchTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/SemanticMatchTestCase.java new file mode 100644 index 0000000000000..aafa57e764ae7 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/SemanticMatchTestCase.java @@ -0,0 +1,121 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.rest; + +import org.elasticsearch.client.Request; +import org.elasticsearch.client.ResponseException; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.Map; + +import static org.hamcrest.core.StringContains.containsString; + +public abstract class SemanticMatchTestCase extends ESRestTestCase { + public void testWithMultipleInferenceIds() throws IOException { + String query = """ + from test-semantic1,test-semantic2 + | where match(semantic_text_field, "something") + """; + ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(query)); + + assertThat(re.getMessage(), containsString("Field [semantic_text_field] has multiple inference IDs associated with it")); + + assertEquals(400, re.getResponse().getStatusLine().getStatusCode()); + } + + public void testWithInferenceNotConfigured() { + String query = """ + from test-semantic3 + | where match(semantic_text_field, "something") + """; + ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(query)); + + assertThat(re.getMessage(), containsString("Inference endpoint not found")); + assertEquals(404, re.getResponse().getStatusLine().getStatusCode()); + } + + @Before + public void setUpIndices() throws IOException { + assumeTrue("semantic text capability not available", EsqlCapabilities.Cap.SEMANTIC_TEXT_TYPE.isEnabled()); + + var settings = Settings.builder().build(); + + String mapping1 = """ + "properties": { + "semantic_text_field": { + "type": "semantic_text", + "inference_id": "test_sparse_inference" + } + } + """; + createIndex(adminClient(), "test-semantic1", settings, mapping1); + + String mapping2 = """ + "properties": { + "semantic_text_field": { + "type": "semantic_text", + "inference_id": "test_dense_inference" + } + } + """; + createIndex(adminClient(), "test-semantic2", settings, mapping2); + + String mapping3 = """ + "properties": { + "semantic_text_field": { + "type": "semantic_text", + "inference_id": "inexistent" + } + } + """; + createIndex(adminClient(), "test-semantic3", settings, mapping3); + } + + @Before + public void setUpTextEmbeddingInferenceEndpoint() throws IOException { + assumeTrue("semantic text capability not available", EsqlCapabilities.Cap.SEMANTIC_TEXT_TYPE.isEnabled()); + Request request = new Request("PUT", "_inference/text_embedding/test_dense_inference"); + request.setJsonEntity(""" + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + """); + adminClient().performRequest(request); + } + + @After + public void wipeData() throws IOException { + assumeTrue("semantic text capability not available", EsqlCapabilities.Cap.SEMANTIC_TEXT_TYPE.isEnabled()); + adminClient().performRequest(new Request("DELETE", "*")); + + try { + adminClient().performRequest(new Request("DELETE", "_inference/test_dense_inference")); + } catch (ResponseException e) { + // 404 here means the endpoint was not created + if (e.getResponse().getStatusLine().getStatusCode() != 404) { + throw e; + } + } + } + + private Map runEsqlQuery(String query) throws IOException { + RestEsqlTestCase.RequestObjectBuilder builder = RestEsqlTestCase.requestObjectBuilder().query(query); + return RestEsqlTestCase.runEsqlSync(builder); + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 18ce9d7e3e057..66fd7d3ee5eb5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -70,6 +70,7 @@ import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; import org.elasticsearch.xpack.esql.plugin.QueryPragmas; import org.elasticsearch.xpack.esql.session.Configuration; +import org.elasticsearch.xpack.esql.session.QueryBuilderResolver; import org.elasticsearch.xpack.esql.stats.Metrics; import org.elasticsearch.xpack.esql.stats.SearchStats; import org.elasticsearch.xpack.versionfield.Version; @@ -351,6 +352,8 @@ public String toString() { public static final Verifier TEST_VERIFIER = new Verifier(new Metrics(new EsqlFunctionRegistry()), new XPackLicenseState(() -> 0L)); + public static final QueryBuilderResolver MOCK_QUERY_BUILDER_RESOLVER = new MockQueryBuilderResolver(); + private EsqlTestUtils() {} public static Configuration configuration(QueryPragmas pragmas, String query) { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/MockQueryBuilderResolver.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/MockQueryBuilderResolver.java new file mode 100644 index 0000000000000..7af3a89108fc0 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/MockQueryBuilderResolver.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.session.QueryBuilderResolver; +import org.elasticsearch.xpack.esql.session.Result; + +import java.util.function.BiConsumer; + +public class MockQueryBuilderResolver extends QueryBuilderResolver { + public MockQueryBuilderResolver() { + super(null, null, null, null); + } + + @Override + public void resolveQueryBuilders( + LogicalPlan plan, + ActionListener listener, + BiConsumer> callback + ) { + callback.accept(plan, listener); + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-function.csv-spec index 5ea169e1b110d..6c9a6fed3853c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-function.csv-spec @@ -597,3 +597,74 @@ from employees,employees_incompatible emp_no_bool:boolean ; + +testMatchWithSemanticText +required_capability: match_function +required_capability: semantic_text_type + +from semantic_text +| where match(semantic_text_field, "something") +| keep semantic_text_field +| sort semantic_text_field asc +; + +semantic_text_field:semantic_text +all we have to decide is what to do with the time that is given to us +be excellent to each other +live long and prosper +; + +testMatchWithSemanticTextAndKeyword +required_capability: match_function +required_capability: semantic_text_type + +from semantic_text +| where match(semantic_text_field, "something") AND match(host, "host1") +| keep semantic_text_field, host +; + +semantic_text_field:semantic_text | host:keyword +live long and prosper | host1 +; + +testMatchWithSemanticTextMultiValueField +required_capability: match_function +required_capability: semantic_text_type + +from semantic_text metadata _id +| where match(st_multi_value, "something") AND match(host, "host1") +| keep _id, st_multi_value +; + +_id: keyword | st_multi_value:semantic_text +1 | ["Hello there!", "This is a random value", "for testing purposes"] +; + +testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats +required_capability: match_function +required_capability: semantic_text_type + +from semantic_text +| where qstr("description:some*") +| eval size = mv_count(st_multi_value) +| where match(semantic_text_field, "something") AND size > 1 AND match(host, "host1") +| STATS result = count(*) +; + +result:long +1 +; + +testMatchWithSemanticTextAndKql +required_capability: match_function +required_capability: semantic_text_type +required_capability: kql_function + +from semantic_text +| where kql("host:host1") AND match(semantic_text_field, "something") +| KEEP host, semantic_text_field +; + +host:keyword | semantic_text_field:semantic_text +"host1" | live long and prosper +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec index 7906f8b69162b..721443a70fe20 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec @@ -608,3 +608,73 @@ from employees,employees_incompatible emp_no_bool:boolean ; +testMatchWithSemanticText +required_capability: match_operator_colon +required_capability: semantic_text_type + +from semantic_text +| where semantic_text_field:"something" +| keep semantic_text_field +| sort semantic_text_field asc +; + +semantic_text_field:semantic_text +all we have to decide is what to do with the time that is given to us +be excellent to each other +live long and prosper +; + +testMatchWithSemanticTextAndKeyword +required_capability: match_operator_colon +required_capability: semantic_text_type + +from semantic_text +| where semantic_text_field:"something" AND host:"host1" +| keep semantic_text_field, host +; + +semantic_text_field:semantic_text | host:keyword +live long and prosper | host1 +; + +testMatchWithSemanticTextMultiValueField +required_capability: match_operator_colon +required_capability: semantic_text_type + +from semantic_text metadata _id +| where st_multi_value:"something" AND match(host, "host1") +| keep _id, st_multi_value +; + +_id: keyword | st_multi_value:semantic_text +1 | ["Hello there!", "This is a random value", "for testing purposes"] +; + +testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats +required_capability: match_operator_colon +required_capability: semantic_text_type + +from semantic_text +| where qstr("description:some*") +| eval size = mv_count(st_multi_value) +| where semantic_text_field:"something" AND size > 1 AND match(host, "host1") +| STATS result = count(*) +; + +result:long +1 +; + +testMatchWithSemanticTextAndKql +required_capability: match_operator_colon +required_capability: semantic_text_type +required_capability: kql_function + +from semantic_text +| where kql("host:host1") AND semantic_text_field:"something" +| KEEP host, semantic_text_field +; + +host:keyword | semantic_text_field:semantic_text +"host1" | live long and prosper +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec index 72632c62603aa..9d3526982f9ef 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec @@ -317,3 +317,43 @@ book_no:keyword | title:text 2924 | A Gentle Creature and Other Stories: White Nights, A Gentle Creature, and The Dream of a Ridiculous Man (The World's Classics) | foobar 5948 | That We Are Gentle Creatures | foobar ; + + +semanticTextMatch +required_capability: metadata_score +required_capability: semantic_text_type +required_capability: match_function + +from semantic_text metadata _id, _score +| where match(semantic_text_field, "something") +| sort _score desc +| keep _id +; + +_id:keyword +2 +3 +1 +; + +semanticTextMatchWithAllTheTextFunctions + +required_capability: metadata_score +required_capability: semantic_text_type +required_capability: match_function +required_capability: kql_function +required_capability: qstr_function + +from semantic_text metadata _id, _score +| where match(semantic_text_field, "something") + AND match(description, "some") + AND kql("description:some*") + AND NOT qstr("host:host1") +| sort _score desc +| keep _id +; + +_id:keyword +2 +3 +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KqlFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KqlFunctionIT.java index d58637ab52c86..0e84ac7588ad6 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KqlFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KqlFunctionIT.java @@ -10,13 +10,17 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.kql.KqlPlugin; import org.junit.Before; import org.junit.BeforeClass; +import java.util.Collection; import java.util.List; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -141,4 +145,9 @@ private void createAndPopulateIndex() { .get(); ensureYellow(indexName); } + + @Override + protected Collection> nodePlugins() { + return CollectionUtils.appendToCopy(super.nodePlugins(), KqlPlugin.class); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java index c1269009c6a41..dad63d25046d9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java @@ -23,6 +23,7 @@ import org.elasticsearch.xpack.esql.session.Configuration; import org.elasticsearch.xpack.esql.session.EsqlSession; import org.elasticsearch.xpack.esql.session.IndexResolver; +import org.elasticsearch.xpack.esql.session.QueryBuilderResolver; import org.elasticsearch.xpack.esql.session.Result; import org.elasticsearch.xpack.esql.stats.Metrics; import org.elasticsearch.xpack.esql.stats.PlanningMetrics; @@ -59,6 +60,7 @@ public void esql( EsqlExecutionInfo executionInfo, IndicesExpressionGrouper indicesExpressionGrouper, EsqlSession.PlanRunner planRunner, + QueryBuilderResolver queryBuilderResolver, ActionListener listener ) { final PlanningMetrics planningMetrics = new PlanningMetrics(); @@ -73,7 +75,8 @@ public void esql( mapper, verifier, planningMetrics, - indicesExpressionGrouper + indicesExpressionGrouper, + queryBuilderResolver ); QueryMetric clientId = QueryMetric.fromString("rest"); metrics.total(clientId); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java index 0b2268fe1b022..e695a94198dab 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java @@ -51,6 +51,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.IP; import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.SEMANTIC_TEXT; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; @@ -70,6 +71,7 @@ public class Match extends FullTextFunction implements Validatable { public static final Set FIELD_DATA_TYPES = Set.of( KEYWORD, TEXT, + SEMANTIC_TEXT, BOOLEAN, DATETIME, DATE_NANOS, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java index 76bfb95d07926..50d5819688e46 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java @@ -12,6 +12,7 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.HandledTransportAction; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; @@ -42,6 +43,7 @@ import org.elasticsearch.xpack.esql.execution.PlanExecutor; import org.elasticsearch.xpack.esql.session.Configuration; import org.elasticsearch.xpack.esql.session.EsqlSession.PlanRunner; +import org.elasticsearch.xpack.esql.session.QueryBuilderResolver; import org.elasticsearch.xpack.esql.session.Result; import java.io.IOException; @@ -68,6 +70,7 @@ public class TransportEsqlQueryAction extends HandledTransportAction asyncTaskManagementService; private final RemoteClusterService remoteClusterService; + private final QueryBuilderResolver queryBuilderResolver; @Inject @SuppressWarnings("this-escape") @@ -82,7 +85,8 @@ public TransportEsqlQueryAction( BigArrays bigArrays, BlockFactory blockFactory, Client client, - NamedWriteableRegistry registry + NamedWriteableRegistry registry, + IndexNameExpressionResolver indexNameExpressionResolver ) { // TODO replace SAME when removing workaround for https://github.com/elastic/elasticsearch/issues/97916 @@ -121,6 +125,7 @@ public TransportEsqlQueryAction( bigArrays ); this.remoteClusterService = transportService.getRemoteClusterService(); + this.queryBuilderResolver = new QueryBuilderResolver(searchService, clusterService, transportService, indexNameExpressionResolver); } @Override @@ -191,6 +196,7 @@ private void innerExecute(Task task, EsqlQueryRequest request, ActionListener toResponse(task, request, configuration, result)) ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index c0290fa2b1d73..bd3b3bdb3483c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -113,6 +113,7 @@ public interface PlanRunner { private final PhysicalPlanOptimizer physicalPlanOptimizer; private final PlanningMetrics planningMetrics; private final IndicesExpressionGrouper indicesExpressionGrouper; + private final QueryBuilderResolver queryBuilderResolver; public EsqlSession( String sessionId, @@ -125,7 +126,8 @@ public EsqlSession( Mapper mapper, Verifier verifier, PlanningMetrics planningMetrics, - IndicesExpressionGrouper indicesExpressionGrouper + IndicesExpressionGrouper indicesExpressionGrouper, + QueryBuilderResolver queryBuilderResolver ) { this.sessionId = sessionId; this.configuration = configuration; @@ -139,6 +141,7 @@ public EsqlSession( this.physicalPlanOptimizer = new PhysicalPlanOptimizer(new PhysicalOptimizerContext(configuration)); this.planningMetrics = planningMetrics; this.indicesExpressionGrouper = indicesExpressionGrouper; + this.queryBuilderResolver = queryBuilderResolver; } public String sessionId() { @@ -158,7 +161,16 @@ public void execute(EsqlQueryRequest request, EsqlExecutionInfo executionInfo, P new EsqlSessionCCSUtils.CssPartialErrorsActionListener(executionInfo, listener) { @Override public void onResponse(LogicalPlan analyzedPlan) { - executeOptimizedPlan(request, executionInfo, planRunner, optimizedPlan(analyzedPlan), listener); + try { + var optimizedPlan = optimizedPlan(analyzedPlan); + queryBuilderResolver.resolveQueryBuilders( + optimizedPlan, + listener, + (newPlan, next) -> executeOptimizedPlan(request, executionInfo, planRunner, newPlan, next) + ); + } catch (Exception e) { + listener.onFailure(e); + } } } ); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/QueryBuilderResolver.java new file mode 100644 index 0000000000000..b6424c5f7fa56 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/QueryBuilderResolver.java @@ -0,0 +1,178 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.session; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ResolvedIndices; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryRewriteContext; +import org.elasticsearch.index.query.Rewriteable; +import org.elasticsearch.search.SearchService; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.util.Holder; +import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.function.BiConsumer; + +/** + * Some {@link FullTextFunction} implementations such as {@link org.elasticsearch.xpack.esql.expression.function.fulltext.Match} + * will be translated to a {@link QueryBuilder} that require a rewrite phase on the coordinator. + * {@link QueryBuilderResolver#resolveQueryBuilders(LogicalPlan, ActionListener, BiConsumer)} will rewrite the plan by replacing + * {@link FullTextFunction} expression with new ones that hold rewritten {@link QueryBuilder}s. + */ +public class QueryBuilderResolver { + private final SearchService searchService; + private final ClusterService clusterService; + private final TransportService transportService; + private final IndexNameExpressionResolver indexNameExpressionResolver; + + public QueryBuilderResolver( + SearchService searchService, + ClusterService clusterService, + TransportService transportService, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + this.searchService = searchService; + this.clusterService = clusterService; + this.transportService = transportService; + this.indexNameExpressionResolver = indexNameExpressionResolver; + } + + public void resolveQueryBuilders( + LogicalPlan plan, + ActionListener listener, + BiConsumer> callback + ) { + // TODO: remove once SEMANTIC_TEXT_TYPE is enabled outside of snapshots + if (false == EsqlCapabilities.Cap.SEMANTIC_TEXT_TYPE.isEnabled()) { + callback.accept(plan, listener); + return; + } + + if (plan.optimized() == false) { + listener.onFailure(new IllegalStateException("Expected optimized plan before query builder rewrite.")); + return; + } + + Set unresolved = fullTextFunctions(plan); + Set indexNames = indexNames(plan); + + if (indexNames == null || indexNames.isEmpty() || unresolved.isEmpty()) { + callback.accept(plan, listener); + return; + } + QueryRewriteContext ctx = queryRewriteContext(indexNames); + FullTextFunctionsRewritable rewritable = new FullTextFunctionsRewritable(unresolved); + Rewriteable.rewriteAndFetch(rewritable, ctx, new ActionListener() { + @Override + public void onResponse(FullTextFunctionsRewritable fullTextFunctionsRewritable) { + try { + LogicalPlan newPlan = planWithResolvedQueryBuilders(plan, fullTextFunctionsRewritable.results()); + callback.accept(newPlan, listener); + } catch (Exception e) { + onFailure(e); + } + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + }); + } + + private Set fullTextFunctions(LogicalPlan plan) { + Set functions = new HashSet<>(); + plan.forEachExpressionDown(FullTextFunction.class, func -> functions.add(func)); + return functions; + } + + public Set indexNames(LogicalPlan plan) { + Holder> indexNames = new Holder<>(); + + plan.forEachDown(EsRelation.class, esRelation -> { indexNames.set(esRelation.index().concreteIndices()); }); + + return indexNames.get(); + } + + public LogicalPlan planWithResolvedQueryBuilders(LogicalPlan plan, Map newQueryBuilders) { + LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, m -> { + if (newQueryBuilders.keySet().contains(m)) { + return m.replaceQueryBuilder(newQueryBuilders.get(m)); + } + return m; + }); + // The given plan was already analyzed and optimized, so we set the resulted plan to optimized as well. + newPlan.setOptimized(); + return newPlan; + } + + private QueryRewriteContext queryRewriteContext(Set indexNames) { + ResolvedIndices resolvedIndices = ResolvedIndices.resolveWithIndexNamesAndOptions( + indexNames.toArray(String[]::new), + IndexResolver.FIELD_CAPS_INDICES_OPTIONS, + clusterService.state(), + indexNameExpressionResolver, + transportService.getRemoteClusterService(), + System.currentTimeMillis() + ); + + return searchService.getRewriteContext(() -> System.currentTimeMillis(), resolvedIndices, null); + } + + private class FullTextFunctionsRewritable implements Rewriteable { + + private final Map queryBuilderMap; + + FullTextFunctionsRewritable(Map queryBuilderMap) { + this.queryBuilderMap = queryBuilderMap; + } + + FullTextFunctionsRewritable(Set functions) { + this.queryBuilderMap = new HashMap<>(); + + for (FullTextFunction func : functions) { + queryBuilderMap.put(func, func.asQuery(PlannerUtils.TRANSLATOR_HANDLER).asBuilder()); + } + } + + @Override + public FullTextFunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException { + Map results = new HashMap<>(); + + boolean hasChanged = false; + for (FullTextFunction func : queryBuilderMap.keySet()) { + var initial = queryBuilderMap.get(func); + var rewritten = Rewriteable.rewrite(initial, ctx, false); + + if (rewritten.equals(initial) == false) { + hasChanged = true; + } + + results.put(func, rewritten); + } + + return hasChanged ? new FullTextFunctionsRewritable(results) : this; + } + + public Map results() { + return queryBuilderMap; + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 7c9dad0ea403f..f04796243305f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -442,7 +442,8 @@ private ActualResults executePlan(BigArrays bigArrays) throws Exception { mapper, TEST_VERIFIER, new PlanningMetrics(), - null + null, + EsqlTestUtils.MOCK_QUERY_BUILDER_RESOLVER ); TestPhysicalOperationProviders physicalOperationProviders = testOperationProviders(testDataset); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 8624559b0d304..2226b8a646823 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1401,6 +1401,11 @@ private void checkMatchFunctionPushDown( var analyzer = makeAnalyzer("mapping-all-types.json", new EnrichResolution()); // Check for every possible query data type for (DataType fieldDataType : fieldDataTypes) { + // TODO: semantic_text is not present in mapping-all-types.json so we skip it for now + if (fieldDataType == DataType.SEMANTIC_TEXT) { + continue; + } + var queryValue = randomQueryValue(fieldDataType); String fieldName = fieldDataType == DataType.DATETIME ? "date" : fieldDataType.name().toLowerCase(Locale.ROOT); @@ -1434,6 +1439,7 @@ private static Object randomQueryValue(DataType dataType) { case KEYWORD -> randomAlphaOfLength(5); case IP -> NetworkAddress.format(randomIp(randomBoolean())); case TEXT -> randomAlphaOfLength(50); + case SEMANTIC_TEXT -> randomAlphaOfLength(5); case VERSION -> VersionUtils.randomVersion(random()).toString(); default -> throw new IllegalArgumentException("Unexpected type: " + dataType); }; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java index b323efad2b4c3..539cd0314a4d1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java @@ -123,6 +123,7 @@ public void testFailedMetric() { new EsqlExecutionInfo(randomBoolean()), groupIndicesByCluster, runPhase, + EsqlTestUtils.MOCK_QUERY_BUILDER_RESOLVER, new ActionListener<>() { @Override public void onResponse(Result result) { @@ -152,6 +153,7 @@ public void onFailure(Exception e) { new EsqlExecutionInfo(randomBoolean()), groupIndicesByCluster, runPhase, + EsqlTestUtils.MOCK_QUERY_BUILDER_RESOLVER, new ActionListener<>() { @Override public void onResponse(Result result) {}