diff --git a/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc b/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc index 87748fee4f202..44b24865dc4ba 100644 --- a/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc +++ b/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc @@ -40,7 +40,7 @@ delimiter-based pattern, and extracts the specified keys as columns. For example, the following pattern: [source,txt] ---- -%{clientip} [%{@timestamp}] %{status} +%{clientip} [%{@timestamp}] %{status} ---- matches a log line of this format: @@ -76,8 +76,8 @@ ignore certain fields, append fields, skip over padding, etc. ===== Terminology dissect pattern:: -the set of fields and delimiters describing the textual -format. Also known as a dissection. +the set of fields and delimiters describing the textual +format. Also known as a dissection. The dissection is described using a set of `%{}` sections: `%{a} - %{b} - %{c}` @@ -91,14 +91,14 @@ Any set of characters other than `%{`, `'not }'`, or `}` is a delimiter. key:: + -- -the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes -and the ordinal suffix. +the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes +and the ordinal suffix. Examples: -* `%{?aaa}` - the key is `aaa` -* `%{+bbb/3}` - the key is `bbb` -* `%{&ccc}` - the key is `ccc` +* `%{?aaa}` - the key is `aaa` +* `%{+bbb/3}` - the key is `bbb` +* `%{&ccc}` - the key is `ccc` -- [[esql-dissect-examples]] @@ -239,7 +239,7 @@ with a `\`. For example, in the earlier pattern: %{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status} ---- -In {esql} queries, the backslash character itself is a special character that +In {esql} queries, when using single quotes for strings, the backslash character itself is a special character that needs to be escaped with another `\`. For this example, the corresponding {esql} query becomes: [source.merge.styled,esql] @@ -248,6 +248,16 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithEscape] ---- ==== +For this reason, in general it is more convenient to use triple quotes `"""` for GROK patterns, +that do not require escaping for backslash. + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithEscapeTripleQuotes] +---- +==== + + [[esql-grok-patterns]] ===== Grok patterns @@ -318,4 +328,4 @@ as the `GROK` command. The `GROK` command does not support configuring <>, or <>. The `GROK` command is not subject to <>. -// end::grok-limitations[] \ No newline at end of file +// end::grok-limitations[] diff --git a/docs/reference/esql/functions/kibana/definition/like.json b/docs/reference/esql/functions/kibana/definition/like.json index 97e84e0361fd2..db48018bf65e5 100644 --- a/docs/reference/esql/functions/kibana/definition/like.json +++ b/docs/reference/esql/functions/kibana/definition/like.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "operator", "name" : "like", - "description" : "Use `LIKE` to filter data based on string patterns using wildcards. `LIKE`\nusually acts on a field placed on the left-hand side of the operator, but it can\nalso act on a constant (literal) expression. The right-hand side of the operator\nrepresents the pattern.\n\nThe following wildcard characters are supported:\n\n* `*` matches zero or more characters.\n* `?` matches one character.", + "description" : "Use `LIKE` to filter data based on string patterns using wildcards. `LIKE`\nusually acts on a field placed on the left-hand side of the operator, but it can\nalso act on a constant (literal) expression. The right-hand side of the operator\nrepresents the pattern, as a <>.\n\nThe following wildcard characters are supported:\n\n* `*` matches zero or more characters.\n* `?` matches one character.", "signatures" : [ { "params" : [ @@ -42,7 +42,7 @@ } ], "examples" : [ - "FROM employees\n| WHERE first_name LIKE \"?b*\"\n| KEEP first_name, last_name" + "FROM employees\n| WHERE first_name LIKE \"\"\"?b*\"\"\"\n| KEEP first_name, last_name" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/esql/functions/kibana/definition/rlike.json b/docs/reference/esql/functions/kibana/definition/rlike.json index e442bb2c55050..34ff45808fa4d 100644 --- a/docs/reference/esql/functions/kibana/definition/rlike.json +++ b/docs/reference/esql/functions/kibana/definition/rlike.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "operator", "name" : "rlike", - "description" : "Use `RLIKE` to filter data based on string patterns using using\n<>. `RLIKE` usually acts on a field placed on\nthe left-hand side of the operator, but it can also act on a constant (literal)\nexpression. The right-hand side of the operator represents the pattern.", + "description" : "Use `RLIKE` to filter data based on string patterns using using\n<>. `RLIKE` usually acts on a field placed on\nthe left-hand side of the operator, but it can also act on a constant (literal)\nexpression. The right-hand side of the operator represents the pattern, as a <>.", "signatures" : [ { "params" : [ @@ -42,7 +42,7 @@ } ], "examples" : [ - "FROM employees\n| WHERE first_name RLIKE \".leja.*\"\n| KEEP first_name, last_name" + "FROM employees\n| WHERE first_name RLIKE \"\"\".leja.*\"\"\"\n| KEEP first_name, last_name" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/esql/functions/kibana/docs/like.md b/docs/reference/esql/functions/kibana/docs/like.md index 4c400bdc65479..2c697179628c1 100644 --- a/docs/reference/esql/functions/kibana/docs/like.md +++ b/docs/reference/esql/functions/kibana/docs/like.md @@ -6,7 +6,7 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ Use `LIKE` to filter data based on string patterns using wildcards. `LIKE` usually acts on a field placed on the left-hand side of the operator, but it can also act on a constant (literal) expression. The right-hand side of the operator -represents the pattern. +represents the pattern, as a <>. The following wildcard characters are supported: @@ -15,6 +15,6 @@ The following wildcard characters are supported: ``` FROM employees -| WHERE first_name LIKE "?b*" +| WHERE first_name LIKE """?b*""" | KEEP first_name, last_name ``` diff --git a/docs/reference/esql/functions/kibana/docs/rlike.md b/docs/reference/esql/functions/kibana/docs/rlike.md index ed94553e7e44f..b81386d2f1263 100644 --- a/docs/reference/esql/functions/kibana/docs/rlike.md +++ b/docs/reference/esql/functions/kibana/docs/rlike.md @@ -6,10 +6,10 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ Use `RLIKE` to filter data based on string patterns using using <>. `RLIKE` usually acts on a field placed on the left-hand side of the operator, but it can also act on a constant (literal) -expression. The right-hand side of the operator represents the pattern. +expression. The right-hand side of the operator represents the pattern, as a <>. ``` FROM employees -| WHERE first_name RLIKE ".leja.*" +| WHERE first_name RLIKE """.leja.*""" | KEEP first_name, last_name ``` diff --git a/docs/reference/esql/functions/like.asciidoc b/docs/reference/esql/functions/like.asciidoc index 2298617be5699..4a9a363986b6c 100644 --- a/docs/reference/esql/functions/like.asciidoc +++ b/docs/reference/esql/functions/like.asciidoc @@ -23,4 +23,22 @@ include::{esql-specs}/docs.csv-spec[tag=like] |=== include::{esql-specs}/docs.csv-spec[tag=like-result] |=== + +Matching the exact characters `*` and `.` will require escaping. +The escape character is backslash `\`. Since also backslash is a special character in string literals, +it will require further escaping. + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes] +---- +==== + +To reduce the overhead of escaping, we suggest using triple quotes strings `"""` + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes] +---- +==== // end::body[] diff --git a/docs/reference/esql/functions/rlike.asciidoc b/docs/reference/esql/functions/rlike.asciidoc index 031594ae403da..76f4f76d99650 100644 --- a/docs/reference/esql/functions/rlike.asciidoc +++ b/docs/reference/esql/functions/rlike.asciidoc @@ -18,4 +18,22 @@ include::{esql-specs}/docs.csv-spec[tag=rlike] |=== include::{esql-specs}/docs.csv-spec[tag=rlike-result] |=== + +Matching special characters (eg. `.`, `*`, `(`...) will require escaping. +The escape character is backslash `\`. Since also backslash is a special character in string literals, +it will require further escaping. + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes] +---- +==== + +To reduce the overhead of escaping, we suggest using triple quotes strings `"""` + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes] +---- +==== // end::body[] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index 15fe6853ae491..a9c5a5214f159 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -382,7 +382,7 @@ count:long | languages:integer basicGrok // tag::basicGrok[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}""" | KEEP date, ip, email, num // end::basicGrok[] ; @@ -396,7 +396,7 @@ date:keyword | ip:keyword | email:keyword | num:keyword grokWithConversionSuffix // tag::grokWithConversionSuffix[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" | KEEP date, ip, email, num // end::grokWithConversionSuffix[] ; @@ -410,7 +410,7 @@ date:keyword | ip:keyword | email:keyword | num:integer grokWithToDatetime // tag::grokWithToDatetime[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" | KEEP date, ip, email, num | EVAL date = TO_DATETIME(date) // end::grokWithToDatetime[] @@ -436,11 +436,27 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" // end::grokWithEscape-result[] ; + +grokWithEscapeTripleQuotes +// tag::grokWithEscapeTripleQuotes[] +ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" +| GROK a """%{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status}""" +// end::grokWithEscapeTripleQuotes[] +| KEEP @timestamp +; + +// tag::grokWithEscapeTripleQuotes-result[] +@timestamp:keyword +2023-01-23T12:15:00.000Z +// end::grokWithEscapeTripleQuotes-result[] +; + + grokWithDuplicateFieldNames // tag::grokWithDuplicateFieldNames[] FROM addresses | KEEP city.name, zip_code -| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +| GROK zip_code """%{WORD:zip_parts} %{WORD:zip_parts}""" // end::grokWithDuplicateFieldNames[] | SORT city.name ; @@ -456,7 +472,7 @@ Tokyo | 100-7014 | null basicDissect // tag::basicDissect[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip // end::basicDissect[] ; @@ -470,7 +486,7 @@ date:keyword | msg:keyword | ip:keyword dissectWithToDatetime // tag::dissectWithToDatetime[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip | EVAL date = TO_DATETIME(date) // end::dissectWithToDatetime[] @@ -485,7 +501,7 @@ some text | 127.0.0.1 | 2023-01-23T12:15:00.000Z dissectRightPaddingModifier // tag::dissectRightPaddingModifier[] ROW message="1998-08-10T17:15:42 WARN" -| DISSECT message "%{ts->} %{level}" +| DISSECT message """%{ts->} %{level}""" // end::dissectRightPaddingModifier[] ; @@ -498,7 +514,7 @@ message:keyword | ts:keyword | level:keyword dissectEmptyRightPaddingModifier#[skip:-8.11.2, reason:Support for empty right padding modifiers introduced in 8.11.2] // tag::dissectEmptyRightPaddingModifier[] ROW message="[1998-08-10T17:15:42] [WARN]" -| DISSECT message "[%{ts}]%{->}[%{level}]" +| DISSECT message """[%{ts}]%{->}[%{level}]""" // end::dissectEmptyRightPaddingModifier[] ; @@ -511,7 +527,7 @@ ROW message="[1998-08-10T17:15:42] [WARN]" dissectAppendModifier // tag::dissectAppendModifier[] ROW message="john jacob jingleheimer schmidt" -| DISSECT message "%{+name} %{+name} %{+name} %{+name}" APPEND_SEPARATOR=" " +| DISSECT message """%{+name} %{+name} %{+name} %{+name}""" APPEND_SEPARATOR=" " // end::dissectAppendModifier[] ; @@ -524,7 +540,7 @@ john jacob jingleheimer schmidt|john jacob jingleheimer schmidt dissectAppendWithOrderModifier // tag::dissectAppendWithOrderModifier[] ROW message="john jacob jingleheimer schmidt" -| DISSECT message "%{+name/2} %{+name/4} %{+name/3} %{+name/1}" APPEND_SEPARATOR="," +| DISSECT message """%{+name/2} %{+name/4} %{+name/3} %{+name/1}""" APPEND_SEPARATOR="," // end::dissectAppendWithOrderModifier[] ; @@ -537,7 +553,7 @@ john jacob jingleheimer schmidt|schmidt,john,jingleheimer,jacob dissectNamedSkipKey // tag::dissectNamedSkipKey[] ROW message="1.2.3.4 - - 30/Apr/1998:22:00:52 +0000" -| DISSECT message "%{clientip} %{?ident} %{?auth} %{@timestamp}" +| DISSECT message """%{clientip} %{?ident} %{?auth} %{@timestamp}""" // end::dissectNamedSkipKey[] ; @@ -550,7 +566,7 @@ message:keyword | clientip:keyword | @timestamp:keyword docsLike // tag::like[] FROM employees -| WHERE first_name LIKE "?b*" +| WHERE first_name LIKE """?b*""" | KEEP first_name, last_name // end::like[] | SORT first_name @@ -566,7 +582,7 @@ Eberhardt |Terkki docsRlike // tag::rlike[] FROM employees -| WHERE first_name RLIKE ".leja.*" +| WHERE first_name RLIKE """.leja.*""" | KEEP first_name, last_name // end::rlike[] ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 5313e6630c75d..8790b68e35061 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -1799,3 +1799,59 @@ warning:Line 1:29: java.lang.IllegalArgumentException: single-value function enc x:keyword null ; + + +likeEscapingSingleQuotes +// tag::likeEscapingSingleQuotes[] +ROW message = "foo * bar" +| WHERE message LIKE "foo \\* bar" +// end::likeEscapingSingleQuotes[] +; + +// tag::likeEscapingSingleQuotes-result[] +message:keyword +foo * bar +// end::likeEscapingSingleQuotes-result[] +; + + +likeEscapingTripleQuotes +// tag::likeEscapingTripleQuotes[] +ROW message = "foo * bar" +| WHERE message RLIKE """foo \* bar""" +// end::likeEscapingTripleQuotes[] +; + +// tag::likeEscapingTripleQuotes-result[] +message:keyword +foo * bar +// end::likeEscapingTripleQuotes-result[] +; + + +rlikeEscapingSingleQuotes +// tag::rlikeEscapingSingleQuotes[] +ROW message = "foo ( bar" +| WHERE message RLIKE "foo \\( bar" +// end::rlikeEscapingSingleQuotes[] +; + +// tag::rlikeEscapingSingleQuotes-result[] +message:keyword +foo ( bar +// end::rlikeEscapingSingleQuotes-result[] +; + + +rlikeEscapingTripleQuotes +// tag::rlikeEscapingTripleQuotes[] +ROW message = "foo ( bar" +| WHERE message RLIKE """foo \( bar""" +// end::rlikeEscapingTripleQuotes[] +; + +// tag::rlikeEscapingTripleQuotes-result[] +message:keyword +foo ( bar +// end::rlikeEscapingTripleQuotes-result[] +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java index b46c46c89deba..63390353f37f0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java @@ -29,11 +29,34 @@ public class RLike extends org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLike implements EvaluatorMapper { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "RLike", RLike::new); - @FunctionInfo(returnType = "boolean", description = """ - Use `RLIKE` to filter data based on string patterns using using - <>. `RLIKE` usually acts on a field placed on - the left-hand side of the operator, but it can also act on a constant (literal) - expression. The right-hand side of the operator represents the pattern.""", examples = @Example(file = "docs", tag = "rlike")) + @FunctionInfo( + returnType = "boolean", + description = """ + Use `RLIKE` to filter data based on string patterns using using + <>. `RLIKE` usually acts on a field placed on + the left-hand side of the operator, but it can also act on a constant (literal) + expression. The right-hand side of the operator represents the pattern, as a <>.""", + detailedDescription = """ + Matching special characters (eg. `.`, `*`, `(`...) will require escaping. + The escape character is backslash `\\`. Since also backslash is a special character in string literals, + it will require further escaping. + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes] + ---- + ==== + + To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"` + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes] + ---- + ==== + """, + examples = @Example(file = "docs", tag = "rlike") + ) public RLike( Source source, @Param(name = "str", type = { "keyword", "text" }, description = "A literal value.") Expression value, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java index 714c4ca04a862..d675ab8930183 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java @@ -38,12 +38,30 @@ public class WildcardLike extends org.elasticsearch.xpack.esql.core.expression.p Use `LIKE` to filter data based on string patterns using wildcards. `LIKE` usually acts on a field placed on the left-hand side of the operator, but it can also act on a constant (literal) expression. The right-hand side of the operator - represents the pattern. + represents the pattern, as a <>. The following wildcard characters are supported: * `*` matches zero or more characters. - * `?` matches one character.""", examples = @Example(file = "docs", tag = "like")) + * `?` matches one character.""", detailedDescription = """ + Matching the exact characters `*` and `.` will require escaping. + The escape character is backslash `\\`. Since also backslash is a special character in string literals, + it will require further escaping. + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes] + ---- + ==== + + To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"` + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes] + ---- + ==== + """, examples = @Example(file = "docs", tag = "like")) public WildcardLike( Source source, @Param(name = "str", type = { "keyword", "text" }, description = "A literal expression.") Expression left,