diff --git a/docs/reference/esql/functions/description/categorize.asciidoc b/docs/reference/esql/functions/description/categorize.asciidoc index b6574c1855505..a5e8e2d507574 100644 --- a/docs/reference/esql/functions/description/categorize.asciidoc +++ b/docs/reference/esql/functions/description/categorize.asciidoc @@ -2,4 +2,4 @@ *Description* -Categorizes text messages. +Groups text messages into categories of similarly formatted text values. diff --git a/docs/reference/esql/functions/examples/categorize.asciidoc b/docs/reference/esql/functions/examples/categorize.asciidoc new file mode 100644 index 0000000000000..4167be6910c89 --- /dev/null +++ b/docs/reference/esql/functions/examples/categorize.asciidoc @@ -0,0 +1,14 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +This example categorizes server logs messages into categories and aggregates their counts. +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=docsCategorize] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=docsCategorize-result] +|=== + diff --git a/docs/reference/esql/functions/grouping-functions.asciidoc b/docs/reference/esql/functions/grouping-functions.asciidoc index ed0caf5ec2a4c..839320ce23392 100644 --- a/docs/reference/esql/functions/grouping-functions.asciidoc +++ b/docs/reference/esql/functions/grouping-functions.asciidoc @@ -9,6 +9,8 @@ The <> command supports these grouping functions: // tag::group_list[] * <> +* experimental:[] <> // end::group_list[] include::layout/bucket.asciidoc[] +include::layout/categorize.asciidoc[] diff --git a/docs/reference/esql/functions/kibana/definition/categorize.json b/docs/reference/esql/functions/kibana/definition/categorize.json index ca3971a6e05a3..ed5fa15232b85 100644 --- a/docs/reference/esql/functions/kibana/definition/categorize.json +++ b/docs/reference/esql/functions/kibana/definition/categorize.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "categorize", - "description" : "Categorizes text messages.", + "description" : "Groups text messages into categories of similarly formatted text values.", "signatures" : [ { "params" : [ @@ -29,6 +29,9 @@ "returnType" : "keyword" } ], - "preview" : false, - "snapshot_only" : true + "examples" : [ + "FROM sample_data\n| STATS count=COUNT() BY category=CATEGORIZE(message)" + ], + "preview" : true, + "snapshot_only" : false } diff --git a/docs/reference/esql/functions/kibana/docs/categorize.md b/docs/reference/esql/functions/kibana/docs/categorize.md index f59151b5bee65..80c04b79084e9 100644 --- a/docs/reference/esql/functions/kibana/docs/categorize.md +++ b/docs/reference/esql/functions/kibana/docs/categorize.md @@ -3,5 +3,9 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ --> ### CATEGORIZE -Categorizes text messages. +Groups text messages into categories of similarly formatted text values. +``` +FROM sample_data +| STATS count=COUNT() BY category=CATEGORIZE(message) +``` diff --git a/docs/reference/esql/functions/layout/categorize.asciidoc b/docs/reference/esql/functions/layout/categorize.asciidoc index c547362b71ab0..4075949ab4d12 100644 --- a/docs/reference/esql/functions/layout/categorize.asciidoc +++ b/docs/reference/esql/functions/layout/categorize.asciidoc @@ -4,6 +4,8 @@ [[esql-categorize]] === `CATEGORIZE` +preview::["Do not use on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] + *Syntax* [.text-center] @@ -12,3 +14,4 @@ image::esql/functions/signature/categorize.svg[Embedded,opts=inline] include::../parameters/categorize.asciidoc[] include::../description/categorize.asciidoc[] include::../types/categorize.asciidoc[] +include::../examples/categorize.asciidoc[] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index a6e1a771374ca..24baf1263d06a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -676,3 +676,20 @@ Ahmedabad | 9 | 72 Bangalore | 9 | 72 // end::bitLength-result[] ; + +docsCategorize +required_capability: categorize_v4 +// tag::docsCategorize[] +FROM sample_data +| STATS count=COUNT() BY category=CATEGORIZE(message) +// end::docsCategorize[] +| SORT category +; + +// tag::docsCategorize-result[] +count:long | category:keyword + 3 | .*?Connected.+?to.*? + 3 | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? +// end::docsCategorize-result[] +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 3d26bc170b723..37b159922906c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -265,7 +265,9 @@ public Collection listFunctions(String pattern) { private static FunctionDefinition[][] functions() { return new FunctionDefinition[][] { // grouping functions - new FunctionDefinition[] { def(Bucket.class, Bucket::new, "bucket", "bin"), }, + new FunctionDefinition[] { + def(Bucket.class, Bucket::new, "bucket", "bin"), + def(Categorize.class, Categorize::new, "categorize") }, // aggregate functions // since they declare two public constructors - one with filter (for nested where) and one without // use casting to disambiguate between the two @@ -411,7 +413,6 @@ private static FunctionDefinition[][] snapshotFunctions() { // The delay() function is for debug/snapshot environments only and should never be enabled in a non-snapshot build. // This is an experimental function and can be removed without notice. def(Delay.class, Delay::new, "delay"), - def(Categorize.class, Categorize::new, "categorize"), def(Kql.class, Kql::new, "kql"), def(Rate.class, Rate::withUnresolvedTimestamp, "rate") } }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java index 63b5073c2217a..ca0447ce11ec4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; @@ -44,10 +45,21 @@ public class Categorize extends GroupingFunction implements Validatable { private final Expression field; - @FunctionInfo(returnType = "keyword", description = "Categorizes text messages.") + @FunctionInfo( + returnType = "keyword", + description = "Groups text messages into categories of similarly formatted text values.", + examples = { + @Example( + file = "docs", + tag = "docsCategorize", + description = "This example categorizes server logs messages into categories and aggregates their counts. " + ) }, + preview = true + ) public Categorize( Source source, @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field + ) { super(source, List.of(field)); this.field = field;