From 6a7d3693e2db18ceabb4356868564c89c07c5060 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Thu, 24 Oct 2024 09:19:46 +0200 Subject: [PATCH] ES|QL: improve docs about escaping for GROK, DISSECT, LIKE, RLIKE (#115320) --- ...ql-process-data-with-dissect-grok.asciidoc | 31 ++++++---- .../functions/kibana/definition/like.json | 2 +- .../functions/kibana/definition/rlike.json | 2 +- .../esql/functions/kibana/docs/like.md | 2 +- .../esql/functions/kibana/docs/rlike.md | 2 +- docs/reference/esql/functions/like.asciidoc | 16 ++++++ docs/reference/esql/functions/rlike.asciidoc | 16 ++++++ .../src/main/resources/docs.csv-spec | 42 +++++++++----- .../src/main/resources/string.csv-spec | 56 +++++++++++++++++++ .../function/scalar/string/RLike.java | 18 +++++- .../function/scalar/string/WildcardLike.java | 18 +++++- 11 files changed, 175 insertions(+), 30 deletions(-) diff --git a/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc b/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc index 87748fee4f202..e626e058a4e56 100644 --- a/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc +++ b/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc @@ -40,7 +40,7 @@ delimiter-based pattern, and extracts the specified keys as columns. For example, the following pattern: [source,txt] ---- -%{clientip} [%{@timestamp}] %{status} +%{clientip} [%{@timestamp}] %{status} ---- matches a log line of this format: @@ -76,8 +76,8 @@ ignore certain fields, append fields, skip over padding, etc. ===== Terminology dissect pattern:: -the set of fields and delimiters describing the textual -format. Also known as a dissection. +the set of fields and delimiters describing the textual +format. Also known as a dissection. The dissection is described using a set of `%{}` sections: `%{a} - %{b} - %{c}` @@ -91,14 +91,14 @@ Any set of characters other than `%{`, `'not }'`, or `}` is a delimiter. key:: + -- -the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes -and the ordinal suffix. +the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes +and the ordinal suffix. Examples: -* `%{?aaa}` - the key is `aaa` -* `%{+bbb/3}` - the key is `bbb` -* `%{&ccc}` - the key is `ccc` +* `%{?aaa}` - the key is `aaa` +* `%{+bbb/3}` - the key is `bbb` +* `%{&ccc}` - the key is `ccc` -- [[esql-dissect-examples]] @@ -218,7 +218,7 @@ Putting it together as an {esql} query: [source.merge.styled,esql] ---- -include::{esql-specs}/docs.csv-spec[tag=grokWithEscape] +include::{esql-specs}/docs.csv-spec[tag=grokWithEscapeTripleQuotes] ---- `GROK` adds the following columns to the input table: @@ -239,15 +239,24 @@ with a `\`. For example, in the earlier pattern: %{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status} ---- -In {esql} queries, the backslash character itself is a special character that +In {esql} queries, when using single quotes for strings, the backslash character itself is a special character that needs to be escaped with another `\`. For this example, the corresponding {esql} query becomes: [source.merge.styled,esql] ---- include::{esql-specs}/docs.csv-spec[tag=grokWithEscape] ---- + +For this reason, in general it is more convenient to use triple quotes `"""` for GROK patterns, +that do not require escaping for backslash. + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithEscapeTripleQuotes] +---- ==== + [[esql-grok-patterns]] ===== Grok patterns @@ -318,4 +327,4 @@ as the `GROK` command. The `GROK` command does not support configuring <>, or <>. The `GROK` command is not subject to <>. -// end::grok-limitations[] \ No newline at end of file +// end::grok-limitations[] diff --git a/docs/reference/esql/functions/kibana/definition/like.json b/docs/reference/esql/functions/kibana/definition/like.json index 97e84e0361fd2..f375c697bd60d 100644 --- a/docs/reference/esql/functions/kibana/definition/like.json +++ b/docs/reference/esql/functions/kibana/definition/like.json @@ -42,7 +42,7 @@ } ], "examples" : [ - "FROM employees\n| WHERE first_name LIKE \"?b*\"\n| KEEP first_name, last_name" + "FROM employees\n| WHERE first_name LIKE \"\"\"?b*\"\"\"\n| KEEP first_name, last_name" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/esql/functions/kibana/definition/rlike.json b/docs/reference/esql/functions/kibana/definition/rlike.json index e442bb2c55050..7a328293383bb 100644 --- a/docs/reference/esql/functions/kibana/definition/rlike.json +++ b/docs/reference/esql/functions/kibana/definition/rlike.json @@ -42,7 +42,7 @@ } ], "examples" : [ - "FROM employees\n| WHERE first_name RLIKE \".leja.*\"\n| KEEP first_name, last_name" + "FROM employees\n| WHERE first_name RLIKE \"\"\".leja.*\"\"\"\n| KEEP first_name, last_name" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/esql/functions/kibana/docs/like.md b/docs/reference/esql/functions/kibana/docs/like.md index 4c400bdc65479..ea2ac11b6f4b9 100644 --- a/docs/reference/esql/functions/kibana/docs/like.md +++ b/docs/reference/esql/functions/kibana/docs/like.md @@ -15,6 +15,6 @@ The following wildcard characters are supported: ``` FROM employees -| WHERE first_name LIKE "?b*" +| WHERE first_name LIKE """?b*""" | KEEP first_name, last_name ``` diff --git a/docs/reference/esql/functions/kibana/docs/rlike.md b/docs/reference/esql/functions/kibana/docs/rlike.md index ed94553e7e44f..95b57799ffe29 100644 --- a/docs/reference/esql/functions/kibana/docs/rlike.md +++ b/docs/reference/esql/functions/kibana/docs/rlike.md @@ -10,6 +10,6 @@ expression. The right-hand side of the operator represents the pattern. ``` FROM employees -| WHERE first_name RLIKE ".leja.*" +| WHERE first_name RLIKE """.leja.*""" | KEEP first_name, last_name ``` diff --git a/docs/reference/esql/functions/like.asciidoc b/docs/reference/esql/functions/like.asciidoc index 2298617be5699..a569896bc3c1e 100644 --- a/docs/reference/esql/functions/like.asciidoc +++ b/docs/reference/esql/functions/like.asciidoc @@ -23,4 +23,20 @@ include::{esql-specs}/docs.csv-spec[tag=like] |=== include::{esql-specs}/docs.csv-spec[tag=like-result] |=== + +Matching the exact characters `*` and `.` will require escaping. +The escape character is backslash `\`. Since also backslash is a special character in string literals, +it will require further escaping. + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes] +---- + +To reduce the overhead of escaping, we suggest using triple quotes strings `"""` + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes] +---- // end::body[] diff --git a/docs/reference/esql/functions/rlike.asciidoc b/docs/reference/esql/functions/rlike.asciidoc index 031594ae403da..f6009b2c49528 100644 --- a/docs/reference/esql/functions/rlike.asciidoc +++ b/docs/reference/esql/functions/rlike.asciidoc @@ -18,4 +18,20 @@ include::{esql-specs}/docs.csv-spec[tag=rlike] |=== include::{esql-specs}/docs.csv-spec[tag=rlike-result] |=== + +Matching special characters (eg. `.`, `*`, `(`...) will require escaping. +The escape character is backslash `\`. Since also backslash is a special character in string literals, +it will require further escaping. + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes] +---- + +To reduce the overhead of escaping, we suggest using triple quotes strings `"""` + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes] +---- // end::body[] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index 15fe6853ae491..a9c5a5214f159 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -382,7 +382,7 @@ count:long | languages:integer basicGrok // tag::basicGrok[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}""" | KEEP date, ip, email, num // end::basicGrok[] ; @@ -396,7 +396,7 @@ date:keyword | ip:keyword | email:keyword | num:keyword grokWithConversionSuffix // tag::grokWithConversionSuffix[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" | KEEP date, ip, email, num // end::grokWithConversionSuffix[] ; @@ -410,7 +410,7 @@ date:keyword | ip:keyword | email:keyword | num:integer grokWithToDatetime // tag::grokWithToDatetime[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" | KEEP date, ip, email, num | EVAL date = TO_DATETIME(date) // end::grokWithToDatetime[] @@ -436,11 +436,27 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" // end::grokWithEscape-result[] ; + +grokWithEscapeTripleQuotes +// tag::grokWithEscapeTripleQuotes[] +ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" +| GROK a """%{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status}""" +// end::grokWithEscapeTripleQuotes[] +| KEEP @timestamp +; + +// tag::grokWithEscapeTripleQuotes-result[] +@timestamp:keyword +2023-01-23T12:15:00.000Z +// end::grokWithEscapeTripleQuotes-result[] +; + + grokWithDuplicateFieldNames // tag::grokWithDuplicateFieldNames[] FROM addresses | KEEP city.name, zip_code -| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +| GROK zip_code """%{WORD:zip_parts} %{WORD:zip_parts}""" // end::grokWithDuplicateFieldNames[] | SORT city.name ; @@ -456,7 +472,7 @@ Tokyo | 100-7014 | null basicDissect // tag::basicDissect[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip // end::basicDissect[] ; @@ -470,7 +486,7 @@ date:keyword | msg:keyword | ip:keyword dissectWithToDatetime // tag::dissectWithToDatetime[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip | EVAL date = TO_DATETIME(date) // end::dissectWithToDatetime[] @@ -485,7 +501,7 @@ some text | 127.0.0.1 | 2023-01-23T12:15:00.000Z dissectRightPaddingModifier // tag::dissectRightPaddingModifier[] ROW message="1998-08-10T17:15:42 WARN" -| DISSECT message "%{ts->} %{level}" +| DISSECT message """%{ts->} %{level}""" // end::dissectRightPaddingModifier[] ; @@ -498,7 +514,7 @@ message:keyword | ts:keyword | level:keyword dissectEmptyRightPaddingModifier#[skip:-8.11.2, reason:Support for empty right padding modifiers introduced in 8.11.2] // tag::dissectEmptyRightPaddingModifier[] ROW message="[1998-08-10T17:15:42] [WARN]" -| DISSECT message "[%{ts}]%{->}[%{level}]" +| DISSECT message """[%{ts}]%{->}[%{level}]""" // end::dissectEmptyRightPaddingModifier[] ; @@ -511,7 +527,7 @@ ROW message="[1998-08-10T17:15:42] [WARN]" dissectAppendModifier // tag::dissectAppendModifier[] ROW message="john jacob jingleheimer schmidt" -| DISSECT message "%{+name} %{+name} %{+name} %{+name}" APPEND_SEPARATOR=" " +| DISSECT message """%{+name} %{+name} %{+name} %{+name}""" APPEND_SEPARATOR=" " // end::dissectAppendModifier[] ; @@ -524,7 +540,7 @@ john jacob jingleheimer schmidt|john jacob jingleheimer schmidt dissectAppendWithOrderModifier // tag::dissectAppendWithOrderModifier[] ROW message="john jacob jingleheimer schmidt" -| DISSECT message "%{+name/2} %{+name/4} %{+name/3} %{+name/1}" APPEND_SEPARATOR="," +| DISSECT message """%{+name/2} %{+name/4} %{+name/3} %{+name/1}""" APPEND_SEPARATOR="," // end::dissectAppendWithOrderModifier[] ; @@ -537,7 +553,7 @@ john jacob jingleheimer schmidt|schmidt,john,jingleheimer,jacob dissectNamedSkipKey // tag::dissectNamedSkipKey[] ROW message="1.2.3.4 - - 30/Apr/1998:22:00:52 +0000" -| DISSECT message "%{clientip} %{?ident} %{?auth} %{@timestamp}" +| DISSECT message """%{clientip} %{?ident} %{?auth} %{@timestamp}""" // end::dissectNamedSkipKey[] ; @@ -550,7 +566,7 @@ message:keyword | clientip:keyword | @timestamp:keyword docsLike // tag::like[] FROM employees -| WHERE first_name LIKE "?b*" +| WHERE first_name LIKE """?b*""" | KEEP first_name, last_name // end::like[] | SORT first_name @@ -566,7 +582,7 @@ Eberhardt |Terkki docsRlike // tag::rlike[] FROM employees -| WHERE first_name RLIKE ".leja.*" +| WHERE first_name RLIKE """.leja.*""" | KEEP first_name, last_name // end::rlike[] ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index dd9d519649c01..00fa2fddb2106 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -1800,3 +1800,59 @@ warning:Line 1:29: java.lang.IllegalArgumentException: single-value function enc x:keyword null ; + + +likeEscapingSingleQuotes +// tag::likeEscapingSingleQuotes[] +ROW message = "foo * bar" +| WHERE message LIKE "foo \\* bar" +// end::likeEscapingSingleQuotes[] +; + +// tag::likeEscapingSingleQuotes-result[] +message:keyword +foo * bar +// end::likeEscapingSingleQuotes-result[] +; + + +likeEscapingTripleQuotes +// tag::likeEscapingTripleQuotes[] +ROW message = "foo * bar" +| WHERE message LIKE """foo \* bar""" +// end::likeEscapingTripleQuotes[] +; + +// tag::likeEscapingTripleQuotes-result[] +message:keyword +foo * bar +// end::likeEscapingTripleQuotes-result[] +; + + +rlikeEscapingSingleQuotes +// tag::rlikeEscapingSingleQuotes[] +ROW message = "foo ( bar" +| WHERE message RLIKE "foo \\( bar" +// end::rlikeEscapingSingleQuotes[] +; + +// tag::rlikeEscapingSingleQuotes-result[] +message:keyword +foo ( bar +// end::rlikeEscapingSingleQuotes-result[] +; + + +rlikeEscapingTripleQuotes +// tag::rlikeEscapingTripleQuotes[] +ROW message = "foo ( bar" +| WHERE message RLIKE """foo \( bar""" +// end::rlikeEscapingTripleQuotes[] +; + +// tag::rlikeEscapingTripleQuotes-result[] +message:keyword +foo ( bar +// end::rlikeEscapingTripleQuotes-result[] +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java index b46c46c89deba..cd42711177510 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java @@ -33,7 +33,23 @@ public class RLike extends org.elasticsearch.xpack.esql.core.expression.predicat Use `RLIKE` to filter data based on string patterns using using <>. `RLIKE` usually acts on a field placed on the left-hand side of the operator, but it can also act on a constant (literal) - expression. The right-hand side of the operator represents the pattern.""", examples = @Example(file = "docs", tag = "rlike")) + expression. The right-hand side of the operator represents the pattern.""", detailedDescription = """ + Matching special characters (eg. `.`, `*`, `(`...) will require escaping. + The escape character is backslash `\\`. Since also backslash is a special character in string literals, + it will require further escaping. + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes] + ---- + + To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"` + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes] + ---- + """, examples = @Example(file = "docs", tag = "rlike")) public RLike( Source source, @Param(name = "str", type = { "keyword", "text" }, description = "A literal value.") Expression value, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java index 714c4ca04a862..c1b4f20f41795 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java @@ -43,7 +43,23 @@ also act on a constant (literal) expression. The right-hand side of the operator The following wildcard characters are supported: * `*` matches zero or more characters. - * `?` matches one character.""", examples = @Example(file = "docs", tag = "like")) + * `?` matches one character.""", detailedDescription = """ + Matching the exact characters `*` and `.` will require escaping. + The escape character is backslash `\\`. Since also backslash is a special character in string literals, + it will require further escaping. + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes] + ---- + + To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"` + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes] + ---- + """, examples = @Example(file = "docs", tag = "like")) public WildcardLike( Source source, @Param(name = "str", type = { "keyword", "text" }, description = "A literal expression.") Expression left,