From 91605860ee9a112815cb20915d07551cefa4d63f Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Fri, 6 Dec 2024 08:42:48 +0100 Subject: [PATCH] Term query for ES|QL (#117359) This commit adds a `term` function for ES|QL to run `TermQueries`. For example: FROM test | WHERE term(content, "dog") --- docs/changelog/117359.yaml | 5 + .../esql/functions/description/term.asciidoc | 5 + .../esql/functions/examples/term.asciidoc | 13 ++ .../functions/kibana/definition/term.json | 85 ++++++++ .../esql/functions/kibana/docs/term.md | 13 ++ .../esql/functions/layout/term.asciidoc | 17 ++ .../esql/functions/parameters/term.asciidoc | 9 + .../esql/functions/signature/term.svg | 1 + .../esql/functions/types/term.asciidoc | 12 + .../src/main/resources/term-function.csv-spec | 206 ++++++++++++++++++ .../xpack/esql/plugin/TermIT.java | 139 ++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../xpack/esql/analysis/Verifier.java | 9 + .../function/EsqlFunctionRegistry.java | 4 +- .../function/fulltext/FullTextWritables.java | 3 + .../expression/function/fulltext/Term.java | 124 +++++++++++ .../physical/local/PushFiltersToSource.java | 3 + .../planner/EsqlExpressionTranslators.java | 10 + .../elasticsearch/xpack/esql/CsvTests.java | 4 + .../xpack/esql/analysis/VerifierTests.java | 54 +++++ .../function/fulltext/TermTests.java | 132 +++++++++++ .../LocalPhysicalPlanOptimizerTests.java | 29 +++ .../rest-api-spec/test/esql/60_usage.yml | 2 +- 23 files changed, 883 insertions(+), 3 deletions(-) create mode 100644 docs/changelog/117359.yaml create mode 100644 docs/reference/esql/functions/description/term.asciidoc create mode 100644 docs/reference/esql/functions/examples/term.asciidoc create mode 100644 docs/reference/esql/functions/kibana/definition/term.json create mode 100644 docs/reference/esql/functions/kibana/docs/term.md create mode 100644 docs/reference/esql/functions/layout/term.asciidoc create mode 100644 docs/reference/esql/functions/parameters/term.asciidoc create mode 100644 docs/reference/esql/functions/signature/term.svg create mode 100644 docs/reference/esql/functions/types/term.asciidoc create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/term-function.csv-spec create mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/TermIT.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Term.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/TermTests.java diff --git a/docs/changelog/117359.yaml b/docs/changelog/117359.yaml new file mode 100644 index 0000000000000..87d2d828ace54 --- /dev/null +++ b/docs/changelog/117359.yaml @@ -0,0 +1,5 @@ +pr: 117359 +summary: Term query for ES|QL +area: ES|QL +type: enhancement +issues: [] diff --git a/docs/reference/esql/functions/description/term.asciidoc b/docs/reference/esql/functions/description/term.asciidoc new file mode 100644 index 0000000000000..c43aeb25a0ef7 --- /dev/null +++ b/docs/reference/esql/functions/description/term.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Performs a Term query on the specified field. Returns true if the provided term matches the row. diff --git a/docs/reference/esql/functions/examples/term.asciidoc b/docs/reference/esql/functions/examples/term.asciidoc new file mode 100644 index 0000000000000..b9d57f366294b --- /dev/null +++ b/docs/reference/esql/functions/examples/term.asciidoc @@ -0,0 +1,13 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +[source.merge.styled,esql] +---- +include::{esql-specs}/term-function.csv-spec[tag=term-with-field] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/term-function.csv-spec[tag=term-with-field-result] +|=== + diff --git a/docs/reference/esql/functions/kibana/definition/term.json b/docs/reference/esql/functions/kibana/definition/term.json new file mode 100644 index 0000000000000..d8bb61fd596a1 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/term.json @@ -0,0 +1,85 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "eval", + "name" : "term", + "description" : "Performs a Term query on the specified field. Returns true if the provided term matches the row.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Field that the query will target." + }, + { + "name" : "query", + "type" : "keyword", + "optional" : false, + "description" : "Term you wish to find in the provided field." + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Field that the query will target." + }, + { + "name" : "query", + "type" : "text", + "optional" : false, + "description" : "Term you wish to find in the provided field." + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Field that the query will target." + }, + { + "name" : "query", + "type" : "keyword", + "optional" : false, + "description" : "Term you wish to find in the provided field." + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Field that the query will target." + }, + { + "name" : "query", + "type" : "text", + "optional" : false, + "description" : "Term you wish to find in the provided field." + } + ], + "variadic" : false, + "returnType" : "boolean" + } + ], + "examples" : [ + "from books \n| where term(author, \"gabriel\") \n| keep book_no, title\n| limit 3;" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/esql/functions/kibana/docs/term.md b/docs/reference/esql/functions/kibana/docs/term.md new file mode 100644 index 0000000000000..83e61a949208d --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/term.md @@ -0,0 +1,13 @@ + + +### TERM +Performs a Term query on the specified field. Returns true if the provided term matches the row. + +``` +from books +| where term(author, "gabriel") +| keep book_no, title +| limit 3; +``` diff --git a/docs/reference/esql/functions/layout/term.asciidoc b/docs/reference/esql/functions/layout/term.asciidoc new file mode 100644 index 0000000000000..1fe94491bed04 --- /dev/null +++ b/docs/reference/esql/functions/layout/term.asciidoc @@ -0,0 +1,17 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-term]] +=== `TERM` + +preview::["Do not use on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] + +*Syntax* + +[.text-center] +image::esql/functions/signature/term.svg[Embedded,opts=inline] + +include::../parameters/term.asciidoc[] +include::../description/term.asciidoc[] +include::../types/term.asciidoc[] +include::../examples/term.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/term.asciidoc b/docs/reference/esql/functions/parameters/term.asciidoc new file mode 100644 index 0000000000000..edba8625d04c5 --- /dev/null +++ b/docs/reference/esql/functions/parameters/term.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: +Field that the query will target. + +`query`:: +Term you wish to find in the provided field. diff --git a/docs/reference/esql/functions/signature/term.svg b/docs/reference/esql/functions/signature/term.svg new file mode 100644 index 0000000000000..955dd7fa215ab --- /dev/null +++ b/docs/reference/esql/functions/signature/term.svg @@ -0,0 +1 @@ +TERM(field,query) \ No newline at end of file diff --git a/docs/reference/esql/functions/types/term.asciidoc b/docs/reference/esql/functions/types/term.asciidoc new file mode 100644 index 0000000000000..7523b29c62b1d --- /dev/null +++ b/docs/reference/esql/functions/types/term.asciidoc @@ -0,0 +1,12 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | query | result +keyword | keyword | boolean +keyword | text | boolean +text | keyword | boolean +text | text | boolean +|=== diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/term-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/term-function.csv-spec new file mode 100644 index 0000000000000..0c72cad02eed1 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/term-function.csv-spec @@ -0,0 +1,206 @@ +############################################### +# Tests for Term function +# + +termWithTextField +required_capability: term_function + +// tag::term-with-field[] +FROM books +| WHERE TERM(author, "gabriel") +| KEEP book_no, title +| LIMIT 3; +// end::term-with-field[] +ignoreOrder:true + +book_no:keyword | title:text +4814 | El Coronel No Tiene Quien Le Escriba / No One Writes to the Colonel (Spanish Edition) +4917 | Autumn of the Patriarch +6380 | La hojarasca (Spanish Edition) +; + +termWithKeywordField +required_capability: term_function + +from employees +| where term(first_name, "Guoxiang") +| keep emp_no, first_name; + +// tag::term-with-keyword-field-result[] +emp_no:integer | first_name:keyword +10015 | Guoxiang +; +// end::term-with-keyword-field-result[] + +termWithQueryExpressions +required_capability: term_function + +from books +| where term(author, CONCAT("gab", "riel")) +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +4814 | El Coronel No Tiene Quien Le Escriba / No One Writes to the Colonel (Spanish Edition) +4917 | Autumn of the Patriarch +6380 | La hojarasca (Spanish Edition) +; + +termAfterKeep +required_capability: term_function + +from books +| keep book_no, author +| where term(author, "faulkner") +| sort book_no +| limit 5; + +book_no:keyword | author:text +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] +2713 | William Faulkner +2847 | Colleen Faulkner +2883 | William Faulkner +3293 | Danny Faulkner +; + +termAfterDrop +required_capability: term_function + +from books +| drop ratings, description, year, publisher, title, author.keyword +| where term(author, "william") +| keep book_no, author +| sort book_no +| limit 2; + +book_no:keyword | author:text +2713 | William Faulkner +2883 | William Faulkner +; + +termAfterEval +required_capability: term_function + +from books +| eval stars = to_long(ratings / 2.0) +| where term(author, "colleen") +| sort book_no +| keep book_no, author, stars +| limit 2; + +book_no:keyword | author:text | stars:long +2847 | Colleen Faulkner | 3 +4502 | Colleen Faulkner | 3 +; + +termWithConjunction +required_capability: term_function + +from books +| where term(author, "tolkien") and ratings > 4.95 +| eval author = mv_sort(author) +| keep book_no, ratings, author; +ignoreOrder:true + +book_no:keyword | ratings:double | author:keyword +2301 | 5.0 | John Ronald Reuel Tolkien +3254 | 5.0 | [Christopher Tolkien, John Ronald Reuel Tolkien] +7350 | 5.0 | [Christopher Tolkien, John Ronald Reuel Tolkien] +; + +termWithConjunctionAndSort +required_capability: term_function + +from books +| where term(author, "tolkien") and ratings > 4.95 +| eval author = mv_sort(author) +| keep book_no, ratings, author +| sort book_no; + +book_no:keyword | ratings:double | author:keyword +2301 | 5.0 | John Ronald Reuel Tolkien +3254 | 5.0 | [Christopher Tolkien, John Ronald Reuel Tolkien] +7350 | 5.0 | [Christopher Tolkien, John Ronald Reuel Tolkien] +; + +termWithFunctionPushedToLucene +required_capability: term_function + +from hosts +| where term(host, "beta") and cidr_match(ip1, "127.0.0.2/32", "127.0.0.3/32") +| keep card, host, ip0, ip1; +ignoreOrder:true + +card:keyword |host:keyword |ip0:ip |ip1:ip +eth1 |beta |127.0.0.1 |127.0.0.2 +; + +termWithNonPushableConjunction +required_capability: term_function + +from books +| where term(title, "rings") and length(title) > 75 +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +4023 | A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings +; + +termWithMultipleWhereClauses +required_capability: term_function + +from books +| where term(title, "rings") +| where term(title, "lord") +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +2675 | The Lord of the Rings - Boxed Set +2714 | Return of the King Being the Third Part of The Lord of the Rings +4023 | A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings +7140 | The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) +; + +termWithMultivaluedField +required_capability: term_function + +from employees +| where term(job_positions, "Data Scientist") +| keep emp_no, first_name, last_name +| sort emp_no asc +| limit 2; +ignoreOrder:true + +emp_no:integer | first_name:keyword | last_name:keyword +10014 | Berni | Genin +10017 | Cristinel | Bouloucos +; + +testWithMultiValuedFieldWithConjunction +required_capability: term_function + +from employees +| where term(job_positions, "Data Scientist") and term(first_name, "Cristinel") +| keep emp_no, first_name, last_name +| limit 1; + +emp_no:integer | first_name:keyword | last_name:keyword +10017 | Cristinel | Bouloucos +; + +termWithConjQueryStringFunctions +required_capability: term_function +required_capability: qstr_function + +from employees +| where term(job_positions, "Data Scientist") and qstr("first_name: Cristinel and gender: F") +| keep emp_no, first_name, last_name +| sort emp_no ASC +| limit 1; +ignoreOrder:true + +emp_no:integer | first_name:keyword | last_name:keyword +10017 | Cristinel | Bouloucos +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/TermIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/TermIT.java new file mode 100644 index 0000000000000..4bb4897c9db5f --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/TermIT.java @@ -0,0 +1,139 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.action.EsqlQueryRequest; +import org.elasticsearch.xpack.esql.action.EsqlQueryResponse; +import org.junit.Before; + +import java.util.List; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.CoreMatchers.containsString; + +public class TermIT extends AbstractEsqlIntegTestCase { + + @Before + public void setupIndex() { + createAndPopulateIndex(); + } + + @Override + protected EsqlQueryResponse run(EsqlQueryRequest request) { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + return super.run(request); + } + + public void testSimpleTermQuery() throws Exception { + var query = """ + FROM test + | WHERE term(content,"dog") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(3), List.of(4), List.of(5))); + } + } + + public void testTermWithinEval() { + var query = """ + FROM test + | EVAL term_query = term(title,"fox") + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("[Term] function is only supported in WHERE commands")); + } + + public void testMultipleTerm() { + var query = """ + FROM test + | WHERE term(content,"fox") AND term(content,"brown") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(2), List.of(4), List.of(5))); + } + } + + public void testNotWhereTerm() { + var query = """ + FROM test + | WHERE NOT term(content,"brown") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(3))); + } + } + + private void createAndPopulateIndex() { + var indexName = "test"; + var client = client().admin().indices(); + var CreateRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping("id", "type=integer", "content", "type=text"); + assertAcked(CreateRequest); + client().prepareBulk() + .add( + new IndexRequest(indexName).id("1") + .source("id", 1, "content", "The quick brown animal swiftly jumps over a lazy dog", "title", "A Swift Fox's Journey") + ) + .add( + new IndexRequest(indexName).id("2") + .source("id", 2, "content", "A speedy brown fox hops effortlessly over a sluggish canine", "title", "The Fox's Leap") + ) + .add( + new IndexRequest(indexName).id("3") + .source("id", 3, "content", "Quick and nimble, the fox vaults over the lazy dog", "title", "Brown Fox in Action") + ) + .add( + new IndexRequest(indexName).id("4") + .source( + "id", + 4, + "content", + "A fox that is quick and brown jumps over a dog that is quite lazy", + "title", + "Speedy Animals" + ) + ) + .add( + new IndexRequest(indexName).id("5") + .source( + "id", + 5, + "content", + "With agility, a quick brown fox bounds over a slow-moving dog", + "title", + "Foxes and Canines" + ) + ) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + ensureYellow(indexName); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 19ba6a5151eaf..ee3f5be185b4f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -550,7 +550,12 @@ public enum Cap { /** * Support the "METADATA _score" directive to enable _score column. */ - METADATA_SCORE(Build.current().isSnapshot()); + METADATA_SCORE(Build.current().isSnapshot()), + + /** + * Term function + */ + TERM_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index f5fd82d742bc7..d6f0ff766eb40 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -36,6 +36,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.Kql; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Term; import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg; @@ -837,6 +838,14 @@ private static void checkFullTextQueryFunctions(LogicalPlan plan, Set f m -> "[" + m.functionName() + "] " + m.functionType(), failures ); + checkCommandsBeforeExpression( + plan, + condition, + Term.class, + lp -> (lp instanceof Limit == false) && (lp instanceof Aggregate == false), + m -> "[" + m.functionName() + "] " + m.functionType(), + failures + ); checkNotPresentInDisjunctions(condition, ftf -> "[" + ftf.functionName() + "] " + ftf.functionType(), failures); checkFullTextFunctionsParents(condition, failures); } else { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index c66a5293eb14a..3749b46879354 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -36,6 +36,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.Kql; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Term; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; @@ -424,7 +425,8 @@ private static FunctionDefinition[][] snapshotFunctions() { // This is an experimental function and can be removed without notice. def(Delay.class, Delay::new, "delay"), def(Kql.class, Kql::new, "kql"), - def(Rate.class, Rate::withUnresolvedTimestamp, "rate") } }; + def(Rate.class, Rate::withUnresolvedTimestamp, "rate"), + def(Term.class, Term::new, "term") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java index 8804a031de78c..d6b79d16b74f6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java @@ -29,6 +29,9 @@ public static List getNamedWriteables() { if (EsqlCapabilities.Cap.KQL_FUNCTION.isEnabled()) { entries.add(Kql.ENTRY); } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + entries.add(Term.ENTRY); + } return Collections.unmodifiableList(entries); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Term.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Term.java new file mode 100644 index 0000000000000..125a5b02b6e1c --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Term.java @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.fulltext; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xpack.esql.capabilities.Validatable; +import org.elasticsearch.xpack.esql.common.Failure; +import org.elasticsearch.xpack.esql.common.Failures; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; +import org.elasticsearch.xpack.esql.core.querydsl.query.TermQuery; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +/** + * Full text function that performs a {@link TermQuery} . + */ +public class Term extends FullTextFunction implements Validatable { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Term", Term::readFrom); + + private final Expression field; + + @FunctionInfo( + returnType = "boolean", + preview = true, + description = "Performs a Term query on the specified field. Returns true if the provided term matches the row.", + examples = { @Example(file = "term-function", tag = "term-with-field") } + ) + public Term( + Source source, + @Param(name = "field", type = { "keyword", "text" }, description = "Field that the query will target.") Expression field, + @Param( + name = "query", + type = { "keyword", "text" }, + description = "Term you wish to find in the provided field." + ) Expression termQuery + ) { + super(source, termQuery, List.of(field, termQuery)); + this.field = field; + } + + private static Term readFrom(StreamInput in) throws IOException { + Source source = Source.readFrom((PlanStreamInput) in); + Expression field = in.readNamedWriteable(Expression.class); + Expression query = in.readNamedWriteable(Expression.class); + return new Term(source, field, query); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(field()); + out.writeNamedWriteable(query()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + protected TypeResolution resolveNonQueryParamTypes() { + return isNotNull(field, sourceText(), FIRST).and(isString(field, sourceText(), FIRST)).and(super.resolveNonQueryParamTypes()); + } + + @Override + public void validate(Failures failures) { + if (field instanceof FieldAttribute == false) { + failures.add( + Failure.fail( + field, + "[{}] {} cannot operate on [{}], which is not a field from an index mapping", + functionName(), + functionType(), + field.sourceText() + ) + ); + } + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Term(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Term::new, field, query()); + } + + protected TypeResolutions.ParamOrdinal queryParamOrdinal() { + return SECOND; + } + + public Expression field() { + return field; + } + + @Override + public String functionName() { + return ENTRY.name; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java index 3d6c35e914294..9d02af0efbab0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java @@ -32,6 +32,7 @@ import org.elasticsearch.xpack.esql.core.util.Queries; import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Term; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.BinarySpatialFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesFunction; @@ -254,6 +255,8 @@ static boolean canPushToSource(Expression exp, LucenePushdownPredicates lucenePu return canPushSpatialFunctionToSource(spatial, lucenePushdownPredicates); } else if (exp instanceof Match mf) { return mf.field() instanceof FieldAttribute && DataType.isString(mf.field().dataType()); + } else if (exp instanceof Term term) { + return term.field() instanceof FieldAttribute && DataType.isString(term.field().dataType()); } else if (exp instanceof FullTextFunction) { return true; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java index 1580b77931240..1aee8f029e474 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java @@ -37,6 +37,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.Kql; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Term; import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialRelatesUtils; @@ -92,6 +93,7 @@ public final class EsqlExpressionTranslators { new MatchFunctionTranslator(), new QueryStringFunctionTranslator(), new KqlFunctionTranslator(), + new TermFunctionTranslator(), new Scalars() ); @@ -548,4 +550,12 @@ protected Query asQuery(Kql kqlFunction, TranslatorHandler handler) { return new KqlQuery(kqlFunction.source(), kqlFunction.queryAsText()); } } + + public static class TermFunctionTranslator extends ExpressionTranslator { + @Override + protected Query asQuery(Term term, TranslatorHandler handler) { + return new TermQuery(term.source(), ((FieldAttribute) term.field()).name(), term.queryAsText()); + } + } + } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 2e8b856cf82a6..2834e5f3f8358 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -265,6 +265,10 @@ public final void test() throws Throwable { "lookup join disabled for csv tests", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V4.capabilityName()) ); + assumeFalse( + "can't use TERM function in csv tests", + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.TERM_FUNCTION.capabilityName()) + ); if (Build.current().isSnapshot()) { assertThat( "Capability is not included in the enabled list capabilities on a snapshot build. Spelling mistake?", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 882b8b7dbfd7c..7e3ef4f1f5f87 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1337,6 +1337,11 @@ public void testMatchFunctionOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere("MATCH", "match(first_name, \"Anna\")", "function"); } + public void testTermFunctionOnlyAllowedInWhere() throws Exception { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + checkFullTextFunctionsOnlyAllowedInWhere("Term", "term(first_name, \"Anna\")", "function"); + } + public void testMatchOperatornOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere(":", "first_name:\"Anna\"", "operator"); } @@ -1401,6 +1406,11 @@ public void testMatchFunctionWithDisjunctions() { checkWithDisjunctions("MATCH", "match(first_name, \"Anna\")", "function"); } + public void testTermFunctionWithDisjunctions() { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + checkWithDisjunctions("Term", "term(first_name, \"Anna\")", "function"); + } + public void testMatchOperatorWithDisjunctions() { checkWithDisjunctions(":", "first_name : \"Anna\"", "operator"); } @@ -1463,6 +1473,11 @@ public void testMatchFunctionWithNonBooleanFunctions() { checkFullTextFunctionsWithNonBooleanFunctions("MATCH", "match(first_name, \"Anna\")", "function"); } + public void testTermFunctionWithNonBooleanFunctions() { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(first_name, \"Anna\")", "function"); + } + public void testMatchOperatorWithNonBooleanFunctions() { checkFullTextFunctionsWithNonBooleanFunctions(":", "first_name:\"Anna\"", "operator"); } @@ -1563,6 +1578,45 @@ public void testMatchTargetsExistingField() throws Exception { assertEquals("1:33: Unknown column [first_name]", error("from test | keep emp_no | where first_name : \"Anna\"")); } + public void testTermFunctionArgNotConstant() throws Exception { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + assertEquals( + "1:19: second argument of [term(first_name, first_name)] must be a constant, received [first_name]", + error("from test | where term(first_name, first_name)") + ); + assertEquals( + "1:59: second argument of [term(first_name, query)] must be a constant, received [query]", + error("from test | eval query = concat(\"first\", \" name\") | where term(first_name, query)") + ); + // Other value types are tested in QueryStringFunctionTests + } + + // These should pass eventually once we lift some restrictions on match function + public void testTermFunctionCurrentlyUnsupportedBehaviour() throws Exception { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + assertEquals( + "1:67: Unknown column [first_name]", + error("from test | stats max_salary = max(salary) by emp_no | where term(first_name, \"Anna\")") + ); + } + + public void testTermFunctionNullArgs() throws Exception { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + assertEquals( + "1:19: first argument of [term(null, \"query\")] cannot be null, received [null]", + error("from test | where term(null, \"query\")") + ); + assertEquals( + "1:19: second argument of [term(first_name, null)] cannot be null, received [null]", + error("from test | where term(first_name, null)") + ); + } + + public void testTermTargetsExistingField() throws Exception { + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + assertEquals("1:38: Unknown column [first_name]", error("from test | keep emp_no | where term(first_name, \"Anna\")")); + } + public void testCoalesceWithMixedNumericTypes() { assertEquals( "1:22: second argument of [coalesce(languages, height)] must be [integer], found value [height] type [double]", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/TermTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/TermTests.java new file mode 100644 index 0000000000000..c1c0dc26880ab --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/TermTests.java @@ -0,0 +1,132 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.fulltext; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.xpack.core.security.authc.support.mapper.expressiondsl.FieldExpression; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +@FunctionName("term") +public class TermTests extends AbstractFunctionTestCase { + + public TermTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List> supportedPerPosition = supportedParams(); + List suppliers = new LinkedList<>(); + for (DataType fieldType : DataType.stringTypes()) { + for (DataType queryType : DataType.stringTypes()) { + addPositiveTestCase(List.of(fieldType, queryType), suppliers); + addNonFieldTestCase(List.of(fieldType, queryType), supportedPerPosition, suppliers); + } + } + + List suppliersWithErrors = errorsForCasesWithoutExamples(suppliers, (v, p) -> "string"); + + // Don't test null, as it is not allowed but the expected message is not a type error - so we check it separately in VerifierTests + return parameterSuppliersFromTypedData( + suppliersWithErrors.stream().filter(s -> s.types().contains(DataType.NULL) == false).toList() + ); + } + + protected static List> supportedParams() { + Set supportedTextParams = Set.of(DataType.KEYWORD, DataType.TEXT); + Set supportedNumericParams = Set.of(DataType.DOUBLE, DataType.INTEGER); + Set supportedFuzzinessParams = Set.of(DataType.INTEGER, DataType.KEYWORD, DataType.TEXT); + List> supportedPerPosition = List.of( + supportedTextParams, + supportedTextParams, + supportedNumericParams, + supportedFuzzinessParams + ); + return supportedPerPosition; + } + + protected static void addPositiveTestCase(List paramDataTypes, List suppliers) { + + // Positive case - creates an ES field from the field parameter type + suppliers.add( + new TestCaseSupplier( + getTestCaseName(paramDataTypes, "-ES field"), + paramDataTypes, + () -> new TestCaseSupplier.TestCase( + getTestParams(paramDataTypes), + "EndsWithEvaluator[str=Attribute[channel=0], suffix=Attribute[channel=1]]", + DataType.BOOLEAN, + equalTo(true) + ) + ) + ); + } + + private static void addNonFieldTestCase( + List paramDataTypes, + List> supportedPerPosition, + List suppliers + ) { + // Negative case - use directly the field parameter type + suppliers.add( + new TestCaseSupplier( + getTestCaseName(paramDataTypes, "-non ES field"), + paramDataTypes, + typeErrorSupplier(true, supportedPerPosition, paramDataTypes, TermTests::matchTypeErrorSupplier) + ) + ); + } + + private static List getTestParams(List paramDataTypes) { + String fieldName = randomIdentifier(); + List params = new ArrayList<>(); + params.add( + new TestCaseSupplier.TypedData( + new FieldExpression(fieldName, List.of(new FieldExpression.FieldValue(fieldName))), + paramDataTypes.get(0), + "field" + ) + ); + params.add(new TestCaseSupplier.TypedData(new BytesRef(randomAlphaOfLength(10)), paramDataTypes.get(1), "query")); + return params; + } + + private static String getTestCaseName(List paramDataTypes, String fieldType) { + StringBuilder sb = new StringBuilder(); + sb.append("<"); + sb.append(paramDataTypes.get(0)).append(fieldType).append(", "); + sb.append(paramDataTypes.get(1)); + sb.append(">"); + return sb.toString(); + } + + private static String matchTypeErrorSupplier(boolean includeOrdinal, List> validPerPosition, List types) { + return "[] cannot operate on [" + types.getFirst().typeName() + "], which is not a field from an index mapping"; + } + + @Override + protected Expression build(Source source, List args) { + return new Match(source, args.get(0), args.get(1)); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 86f5c812737b1..d32124c1aaf32 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1391,6 +1391,35 @@ public void testMultipleMatchFilterPushdown() { assertThat(actualLuceneQuery.toString(), is(expectedLuceneQuery.toString())); } + /** + * Expecting + * LimitExec[1000[INTEGER]] + * \_ExchangeExec[[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gender{f}#4, job{f}#9, job.raw{f}#10, languages{f}#5, last_na + * me{f}#6, long_noidx{f}#11, salary{f}#7],false] + * \_ProjectExec[[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gender{f}#4, job{f}#9, job.raw{f}#10, languages{f}#5, last_na + * me{f}#6, long_noidx{f}#11, salary{f}#7]] + * \_FieldExtractExec[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gen] + * \_EsQueryExec[test], indexMode[standard], query[{"term":{"last_name":{"query":"Smith"}}}] + */ + public void testTermFunction() { + // Skip test if the term function is not enabled. + assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); + + var plan = plannerOptimizer.plan(""" + from test + | where term(last_name, "Smith") + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var exchange = as(limit.child(), ExchangeExec.class); + var project = as(exchange.child(), ProjectExec.class); + var field = as(project.child(), FieldExtractExec.class); + var query = as(field.child(), EsQueryExec.class); + assertThat(query.limit().fold(), is(1000)); + var expected = QueryBuilders.termQuery("last_name", "Smith"); + assertThat(query.query().toString(), is(expected.toString())); + } + private QueryBuilder wrapWithSingleQuery(String query, QueryBuilder inner, String fieldName, Source source) { return FilterTests.singleValueQuery(query, inner, fieldName, source); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index e6c061f44a9e4..81f65668722fc 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -92,7 +92,7 @@ setup: - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} # Testing for the entire function set isn't feasbile, so we just check that we return the correct count as an approximation. - - length: {esql.functions: 127} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 128} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version":