From d6e6469c6c369e01bcd552bd0b070e29a17b910c Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 3 Oct 2024 15:37:17 -0700 Subject: [PATCH 1/8] update documentation with examples comment specifications markdown pages Signed-off-by: YANGDB --- README.md | 9 +- docs/ppl-lang/PPL-Example-Commands.md | 346 +++++++++++++ docs/{ => ppl-lang}/PPL-on-Spark.md | 2 +- docs/ppl-lang/README.md | 98 ++++ .../ppl-correlation-command.md} | 0 docs/ppl-lang/ppl-dedup-command.md | 126 +++++ docs/ppl-lang/ppl-eval-command.md | 115 +++++ docs/ppl-lang/ppl-fields-command.md | 71 +++ docs/ppl-lang/ppl-grok-command.md | 75 +++ docs/ppl-lang/ppl-head-command.md | 60 +++ .../ppl-join-command.md} | 2 +- .../ppl-lookup-command.md} | 2 +- docs/ppl-lang/ppl-parse-command.md | 103 ++++ docs/ppl-lang/ppl-patterns-command.md | 69 +++ docs/ppl-lang/ppl-rare-command.md | 46 ++ docs/ppl-lang/ppl-rename-command.md | 52 ++ docs/ppl-lang/ppl-search-command.md | 42 ++ docs/ppl-lang/ppl-sort-command.md | 98 ++++ docs/ppl-lang/ppl-stats-command.md | 477 ++++++++++++++++++ docs/ppl-lang/ppl-top-command.md | 58 +++ docs/ppl-lang/ppl-where-command.md | 62 +++ ppl-spark-integration/README.md | 325 +----------- 22 files changed, 1912 insertions(+), 326 deletions(-) create mode 100644 docs/ppl-lang/PPL-Example-Commands.md rename docs/{ => ppl-lang}/PPL-on-Spark.md (97%) create mode 100644 docs/ppl-lang/README.md rename docs/{PPL-Correlation-command.md => ppl-lang/ppl-correlation-command.md} (100%) create mode 100644 docs/ppl-lang/ppl-dedup-command.md create mode 100644 docs/ppl-lang/ppl-eval-command.md create mode 100644 docs/ppl-lang/ppl-fields-command.md create mode 100644 docs/ppl-lang/ppl-grok-command.md create mode 100644 docs/ppl-lang/ppl-head-command.md rename docs/{PPL-Join-command.md => ppl-lang/ppl-join-command.md} (99%) rename docs/{PPL-Lookup-command.md => ppl-lang/ppl-lookup-command.md} (95%) create mode 100644 docs/ppl-lang/ppl-parse-command.md create mode 100644 docs/ppl-lang/ppl-patterns-command.md create mode 100644 docs/ppl-lang/ppl-rare-command.md create mode 100644 docs/ppl-lang/ppl-rename-command.md create mode 100644 docs/ppl-lang/ppl-search-command.md create mode 100644 docs/ppl-lang/ppl-sort-command.md create mode 100644 docs/ppl-lang/ppl-stats-command.md create mode 100644 docs/ppl-lang/ppl-top-command.md create mode 100644 docs/ppl-lang/ppl-where-command.md diff --git a/README.md b/README.md index e6f03ebdd..4c470e98b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,14 @@ OpenSearch Flint is ... It consists of four modules: ## Documentation Please refer to the [Flint Index Reference Manual](./docs/index.md) for more information. -For PPL language see [PPL Reference Manual](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/index.rst) for more information. + +### PPL-Language + +* For additional details on PPL commands, see [PPL Commands Docs](docs/ppl-lang/README.md) + +* For additional details on Spark PPL Architecture, see [PPL Architecture](docs/ppl-lang/PPL-on-Spark.md) + +* For additional details on Spark PPL commands project, see [PPL Project](https://github.com/orgs/opensearch-project/projects/214/views/2) ## Prerequisites diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md new file mode 100644 index 000000000..bcdd36293 --- /dev/null +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -0,0 +1,346 @@ +## Example PPL Queries + +#### **Describe** +- `describe table` This command is equal to the `DESCRIBE EXTENDED table` SQL command + +#### **Explain** +- `explain simple | source = table | where a = 1 | fields a,b,c` +- `explain extended | source = table` +- `explain codegen | source = table | dedup a | fields a,b,c` +- `explain cost | source = table | sort a | fields a,b,c` +- `explain formatted | source = table | fields - a` +- `explain simple | describe table` + +#### **Fields** +[See additional command details](ppl-fields-command) +- `source = table` +- `source = table | fields a,b,c` +- `source = table | fields + a,b,c` +- `source = table | fields - b,c` +- `source = table | eval b1 = b | fields - b1,c` + +_- **Limitation: new field added by eval command with a function cannot be dropped in current version:**_ +- `source = table | eval b1 = b + 1 | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) +- `source = table | eval b1 = lower(b) | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) + +**Nested-Fields** +- `source = catalog.schema.table1, catalog.schema.table2 | fields A.nested1, B.nested1` +- `source = catalog.table | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield` +- `source = catalog.schema.table | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield` + +#### **Filters** +- `source = table | where a = 1 | fields a,b,c` +- `source = table | where a >= 1 | fields a,b,c` +- `source = table | where a < 1 | fields a,b,c` +- `source = table | where b != 'test' | fields a,b,c` +- `source = table | where c = 'test' | fields a,b,c | head 3` +- `source = table | where ispresent(b)` +- `source = table | where isnull(coalesce(a, b)) | fields a,b,c | head 3` +- `source = table | where isempty(a)` +- `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` + +``` + source = table | eval status_category = + case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code') + | where case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code' + ) = 'Incorrect HTTP status code' +``` +- + ``` + source = table + | eval factor = case(a > 15, a - 14, isnull(b), a - 7, a < 3, a + 1 else 1) + | where case(factor = 2, 'even', factor = 4, 'even', factor = 6, 'even', factor = 8, 'even' else 'odd') = 'even' + | stats count() by factor + ``` + +#### **Filters With Logical Conditions** +- `source = table | where c = 'test' AND a = 1 | fields a,b,c` +- `source = table | where c != 'test' OR a > 1 | fields a,b,c | head 1` +- `source = table | where c = 'test' NOT a > 1 | fields a,b,c` + + +#### **Eval**: +[See additional command details](ppl-eval-command) + +Assumptions: `a`, `b`, `c` are existing fields in `table` +- `source = table | eval f = 1 | fields a,b,c,f` +- `source = table | eval f = 1` (output a,b,c,f fields) +- `source = table | eval n = now() | eval t = unix_timestamp(a) | fields n,t` +- `source = table | eval f = a | where f > 1 | sort f | fields a,b,c | head 5` +- `source = table | eval f = a * 2 | eval h = f * 2 | fields a,f,h` +- `source = table | eval f = a * 2, h = f * 2 | fields a,f,h` +- `source = table | eval f = a * 2, h = b | stats avg(f) by h` +- `source = table | eval f = ispresent(a)` +- `source = table | eval r = coalesce(a, b, c) | fields r` +- `source = table | eval e = isempty(a) | fields e` +- `source = table | eval f = case(a = 0, 'zero', a = 1, 'one', a = 2, 'two', a = 3, 'three', a = 4, 'four', a = 5, 'five', a = 6, 'six', a = 7, 'se7en', a = 8, 'eight', a = 9, 'nine')` +- `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else 'unknown')` +- `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else concat(a, ' is an incorrect binary digit'))` +- +``` +source = table | eval e = eval status_category = +case(a >= 200 AND a < 300, 'Success', +a >= 300 AND a < 400, 'Redirection', +a >= 400 AND a < 500, 'Client Error', +a >= 500, 'Server Error' +else 'Unknown' +) +``` +- +``` +source = table | where ispresent(a) | +eval status_category = + case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code' + ) + | stats count() by status_category +``` + +Limitation: Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" +- `source = table | eval a = 10 | fields a,b,c` +- `source = table | eval a = a * 2 | stats avg(a)` +- `source = table | eval a = abs(a) | where a > 0` +- `source = table | eval a = signum(a) | where a < 0` + +#### **Aggregations** +[See additional command details](ppl-stats-command) + +- `source = table | stats avg(a) ` +- `source = table | where a < 50 | stats avg(c) ` +- `source = table | stats max(c) by b` +- `source = table | stats count(c) by b | head 5` +- `source = table | stats distinct_count(c)` +- `source = table | stats stddev_samp(c)` +- `source = table | stats stddev_pop(c)` +- `source = table | stats percentile(c, 90)` +- `source = table | stats percentile_approx(c, 99)` + +**Aggregations With Span** +- `source = table | stats count(a) by span(a, 10) as a_span` +- `source = table | stats sum(age) by span(age, 5) as age_span | head 2` +- `source = table | stats avg(age) by span(age, 20) as age_span, country | sort - age_span | head 2` + +**Aggregations With TimeWindow Span (tumble windowing function)** +- `source = table | stats sum(productsAmount) by span(transactionDate, 1d) as age_date | sort age_date` +- `source = table | stats sum(productsAmount) by span(transactionDate, 1w) as age_date, productId` + +**Aggregations Group by Multiple Levels** +- `source = table | stats avg(age) as avg_state_age by country, state | stats avg(avg_state_age) as avg_country_age by country` +- `source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country` + +#### **Dedup** +[See additional command details](ppl-dedup-command) + +- `source = table | dedup a | fields a,b,c` +- `source = table | dedup a,b | fields a,b,c` +- `source = table | dedup a keepempty=true | fields a,b,c` +- `source = table | dedup a,b keepempty=true | fields a,b,c` +- `source = table | dedup 1 a | fields a,b,c` +- `source = table | dedup 1 a,b | fields a,b,c` +- `source = table | dedup 1 a keepempty=true | fields a,b,c` +- `source = table | dedup 1 a,b keepempty=true | fields a,b,c` +- `source = table | dedup 2 a | fields a,b,c` +- `source = table | dedup 2 a,b | fields a,b,c` +- `source = table | dedup 2 a keepempty=true | fields a,b,c` +- `source = table | dedup 2 a,b keepempty=true | fields a,b,c` +- `source = table | dedup 1 a consecutive=true| fields a,b,c` (Consecutive deduplication is unsupported) + +#### **Rare** +[See additional command details](ppl-rare-command) + +- `source=accounts | rare gender` +- `source=accounts | rare age by gender` + +#### **Top** +[See additional command details](ppl-top-command) + +- `source=accounts | top gender` +- `source=accounts | top 1 gender` +- `source=accounts | top 1 age by gender` + +#### **Parse** +[See additional command details](ppl-parse-command) + +- `source=accounts | parse email '.+@(?.+)' | fields email, host ` +- `source=accounts | parse email '.+@(?.+)' | top 1 host ` +- `source=accounts | parse email '.+@(?.+)' | stats count() by host` +- `source=accounts | parse email '.+@(?.+)' | eval eval_result=1 | fields host, eval_result` +- `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` +- `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street` +- **Limitation: [see limitations](ppl-parse-command.md#limitations) + +#### **Grok** +[See additional command details](ppl-grok-command) + +- `source=accounts | grok email '.+@%{HOSTNAME:host}' | top 1 host` +- `source=accounts | grok email '.+@%{HOSTNAME:host}' | stats count() by host` +- `source=accounts | grok email '.+@%{HOSTNAME:host}' | eval eval_result=1 | fields host, eval_result` +- `source=accounts | grok email '.+@%{HOSTNAME:host}' | eval eval_result=1 | fields host, eval_result` +- `source=accounts | grok street_address '%{NUMBER} %{GREEDYDATA:address}' | fields address ` +- `source=logs | grok message '%{COMMONAPACHELOG}' | fields COMMONAPACHELOG, timestamp, response, bytes` + +- **Limitation: Overriding existing field is unsupported:**_ +- `source=accounts | grok address '%{NUMBER} %{GREEDYDATA:address}' | fields address` +- **[see limitations](ppl-parse-command.md#limitations) + +#### **Patterns** +[See additional command details](ppl-patterns-command) + +- `source=accounts | patterns email | fields email, patterns_field ` +- `source=accounts | patterns email | where age > 45 | sort - age | fields email, patterns_field` +- `source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | fields message, no_numbers` +- `source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | stats count() by no_numbers` +- **Limitation: [see limitations](ppl-parse-command.md#limitations) + +#### **Rename** +[See additional command details](ppl-rename-command) + +- `source=accounts | rename email as user_email | fields id, user_email` +- `source=accounts | rename id as user_id, email as user_email | fields user_id, user_email` + + +#### **Join** +[See additional command details](ppl-join-command) + +- `source = table1 | inner join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` +- `source = table1 | left join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` +- `source = table1 | right join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` +- `source = table1 | full left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` +- `source = table1 | cross join left = l right = r table2` +- `source = table1 | left semi join left = l right = r on l.a = r.a table2` +- `source = table1 | left anti join left = l right = r on l.a = r.a table2` + +_- **Limitation: sub-searches is unsupported in join right side now**_ + + +#### **Lookup** +[See additional command details](ppl-lookup-command) + +- `source = table1 | lookup table2 id` +- `source = table1 | lookup table2 id, name` +- `source = table1 | lookup table2 id as cid, name` +- `source = table1 | lookup table2 id as cid, name replace dept as department` +- `source = table1 | lookup table2 id as cid, name replace dept as department, city as location` +- `source = table1 | lookup table2 id as cid, name append dept as department` +- `source = table1 | lookup table2 id as cid, name append dept as department, city as location` +- `source = table1 | lookup table2 id as cid, name replace dept` (dept without "as" is unsupported) + +_- **Limitation: "REPLACE" or "APPEND" clause must contain "AS"**_ + + +#### **InSubquery** +[See additional command details](ppl-inSubquery-command) + +- `source = outer | where a in [ source = inner | fields b ]` +- `source = outer | where (a) in [ source = inner | fields b ]` +- `source = outer | where (a,b,c) in [ source = inner | fields d,e,f ]` +- `source = outer | where a not in [ source = inner | fields b ]` +- `source = outer | where (a) not in [ source = inner | fields b ]` +- `source = outer | where (a,b,c) not in [ source = inner | fields d,e,f ]` +- `source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ]` (nested) +- `source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c` (as join filter) + +SQL Migration examples with IN-Subquery PPL: +1. tpch q4 (in-subquery with aggregation) +```sql +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and o_orderkey in ( + select + l_orderkey + from + lineitem + where l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority +``` +Rewritten by PPL InSubquery query: +```sql +source = orders +| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01" and o_orderkey IN + [ source = lineitem + | where l_commitdate < l_receiptdate + | fields l_orderkey + ] +| stats count(1) as order_count by o_orderpriority +| sort o_orderpriority +| fields o_orderpriority, order_count +``` +2.tpch q20 (nested in-subquery) +```sql +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'forest%' + ) + ) + and s_nationkey = n_nationkey + and n_name = 'CANADA' +order by + s_name +``` +Rewritten by PPL InSubquery query: +```sql +source = supplier +| where s_suppkey IN [ + source = partsupp + | where ps_partkey IN [ + source = part + | where like(p_name, "forest%") + | fields p_partkey + ] + | fields ps_suppkey + ] +| inner join left=l right=r on s_nationkey = n_nationkey and n_name = 'CANADA' + nation +| sort s_name +``` + +--- +#### Experimental Commands: +[See additional command details](ppl-correlation-command.md) + +```sql +- `source alb_logs, traces, metrics | where ip="10.0.0.1" AND cloud.provider="aws"| correlate exact on (ip, port) scope(@timestamp, 2018-07-02T22:23:00, 1 D)` +- `source alb_logs, traces | where alb_logs.ip="10.0.0.1" AND alb_logs.cloud.provider="aws"| + correlate exact fields(traceId, ip) scope(@timestamp, 1D) mapping(alb_logs.ip = traces.attributes.http.server.address, alb_logs.traceId = traces.traceId ) ` +``` + +> ppl-correlation-command is an experimental command - it may be removed in future versions + diff --git a/docs/PPL-on-Spark.md b/docs/ppl-lang/PPL-on-Spark.md similarity index 97% rename from docs/PPL-on-Spark.md rename to docs/ppl-lang/PPL-on-Spark.md index 7e7dbde5d..3b260bd37 100644 --- a/docs/PPL-on-Spark.md +++ b/docs/ppl-lang/PPL-on-Spark.md @@ -55,5 +55,5 @@ spark-sql --conf "spark.sql.extensions='org.opensearch.flint.spark.FlintPPLSpark ``` Once this is done, spark will allow both extensions to parse the query (SQL / PPL) and allow the correct execution of the query. -In addition, PPL queries will enjoy the acceleration capabilities supported by the Flint plugins as described [here](index.md) +In addition, PPL queries will enjoy the acceleration capabilities supported by the Flint plugins as described [here](../index.md) diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md new file mode 100644 index 000000000..30f1e0998 --- /dev/null +++ b/docs/ppl-lang/README.md @@ -0,0 +1,98 @@ + +### PPL Language + +Overview +--------- +Piped Processing Language (PPL), powered by OpenSearch, enables OpenSearch users with exploration and discovery of, and finding search patterns in data stored in OpenSearch, using a set of commands delimited by pipes (|). These are essentially read-only requests to process data and return results. + +Currently, OpenSearch users can query data using either Query DSL or SQL. Query DSL is powerful and fast. However, it has a steep learning curve, and was not designed as a human interface to easily create ad hoc queries and explore user data. SQL allows users to extract and analyze data in OpenSearch in a declarative manner. OpenSearch now makes its search and query engine robust by introducing Piped Processing Language (PPL). It enables users to extract insights from OpenSearch with a sequence of commands delimited by pipes (|). It supports a comprehensive set of commands including search, where, fields, rename, dedup, sort, eval, head, top and rare, and functions, operators and expressions. Even new users who have recently adopted OpenSearch, can be productive day one, if they are familiar with the pipe (|) syntax. It enables developers, DevOps engineers, support engineers, site reliability engineers (SREs), and IT managers to effectively discover and explore log, monitoring and observability data stored in OpenSearch. + +We expand the capabilities of our Workbench, a comprehensive and integrated visual query tool currently supporting only SQL, to run on-demand PPL commands, and view and save results as text and JSON. We also add a new interactive standalone command line tool, the PPL CLI, to run on-demand PPL commands, and view and save results as text and JSON. + +The query start with search command and then flowing a set of command delimited by pipe (|). +| for example, the following query retrieve firstname and lastname from accounts if age large than 18. + +```sql +source=accounts +| where age > 18 +| fields firstname, lastname +``` + +--- +### Specifications + + +* **Commands** + + - [`explain command `](PPL-Example-Commands.md/#explain) + + - [`dedup command `](ppl-dedup-command.md) + + - [`describe command`](PPL-Example-Commands.md/#describe) + + - [`eval command`](ppl-eval-command.md) + + - [`fields command`](ppl-fields-command.md) + + - [`grok command`](ppl-grok-command.md) + + - [`parse command`](ppl-parse-command.md) + + - [`patterns command`](ppl-patterns-command.md) + + - [`rename command`](ppl-rename-command.md) + + - [`search command`](ppl-search-command.md) + + - [`sort command`](ppl-sort-command.md) + + - [`stats command`](ppl-stats-command.md) + + - [`where command`](ppl-where-command.md) + + - [`head command`](ppl-head-command.md) + + - [`rare command`](ppl-rare-command.md) + + - [`top command`](ppl-top-command.md) + + - [`join commands`](ppl-join-command.md) + + - [`lookup commands`](ppl-lookup-command.md) + + - [`correlation commands`](ppl-correlation-command.md) + + +* **Functions** + + - `Expressions `_ + + - `Math Functions `_ + + - `Date and Time Functions `_ + + - `String Functions `_ + + - `Condition Functions `_ + + - `Relevance Functions `_ + + - `Type Conversion Functions `_ + + - `System Functions `_ + + +--- +### PPL On Spark + +[Running PPL On Spark](PPL-on-Spark.md) gives a basic overview of the PPL language an how it functions on top of Spark + + +--- +### Example PPL Queries +See samples of [PPL queries](PPL-Example-Commands.md) + +--- + +### PPL Project Roadmap +[PPL Github Project Roadmap](https://github.com/orgs/opensearch-project/projects/214) \ No newline at end of file diff --git a/docs/PPL-Correlation-command.md b/docs/ppl-lang/ppl-correlation-command.md similarity index 100% rename from docs/PPL-Correlation-command.md rename to docs/ppl-lang/ppl-correlation-command.md diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md new file mode 100644 index 000000000..f2f6dd086 --- /dev/null +++ b/docs/ppl-lang/ppl-dedup-command.md @@ -0,0 +1,126 @@ +# PPL dedup command + +## Table of contents + +- [Description](#description) +- [Syntax](#syntax) +- [Examples](#examples) + - [Example 1: Dedup by one field](#example-1-dedup-by-one-field) + - [Example 2: Keep 2 duplicates documents](#example-2-keep-2-duplicates-documents) + - [Example 3: Keep or Ignore the empty field by default](#example-3-keep-or-ignore-the-empty-field-by-default) + - [Example 4: Dedup in consecutive document](#example-4-dedup-in-consecutive-document) +- [Limitation](#limitation) + +## Description + +Using `dedup` command to remove identical document defined by field from the search result. + +## Syntax + +```sql +dedup [int] [keepempty=] [consecutive=] +``` + +* int: optional. The ``dedup`` command retains multiple events for each combination when you specify . The number for must be greater than 0. If you do not specify a number, only the first occurring event is kept. All other duplicates are removed from the results. **Default:** 1 +* keepempty: optional. if true, keep the document if the any field in the field-list has NULL value or field is MISSING. **Default:** false. +* consecutive: optional. If set to true, removes only events with duplicate combinations of values that are consecutive. **Default:** false. +* field-list: mandatory. The comma-delimited field list. At least one field is required. + + +### Example 1: Dedup by one field + +The example show dedup the document with gender field. + +PPL query: + + os> source=accounts | dedup gender | fields account_number, gender; + fetched rows / total rows = 2/2 + +------------------+----------+ + | account_number | gender | + |------------------+----------| + | 1 | M | + | 13 | F | + +------------------+----------+ + +### Example 2: Keep 2 duplicates documents + +The example show dedup the document with gender field keep 2 duplication. + +PPL query: + + os> source=accounts | dedup 2 gender | fields account_number, gender; + fetched rows / total rows = 3/3 + +------------------+----------+ + | account_number | gender | + |------------------+----------| + | 1 | M | + | 6 | M | + | 13 | F | + +------------------+----------+ + +### Example 3: Keep or Ignore the empty field by default + +The example show dedup the document by keep null value field. + +PPL query: + + os> source=accounts | dedup email keepempty=true | fields account_number, email; + fetched rows / total rows = 4/4 + +------------------+-----------------------+ + | account_number | email | + |------------------+-----------------------| + | 1 | amberduke@pyrami.com | + | 6 | hattiebond@netagy.com | + | 13 | null | + | 18 | daleadams@boink.com | + +------------------+-----------------------+ + + +The example show dedup the document by ignore the empty value field. + +PPL query: + + os> source=accounts | dedup email | fields account_number, email; + fetched rows / total rows = 3/3 + +------------------+-----------------------+ + | account_number | email | + |------------------+-----------------------| + | 1 | amberduke@pyrami.com | + | 6 | hattiebond@netagy.com | + | 18 | daleadams@boink.com | + +------------------+-----------------------+ + + +### Example 4: Dedup in consecutive document + +The example show dedup the consecutive document. + +PPL query: + + os> source=accounts | dedup gender consecutive=true | fields account_number, gender; + fetched rows / total rows = 3/3 + +------------------+----------+ + | account_number | gender | + |------------------+----------| + | 1 | M | + | 13 | F | + | 18 | M | + +------------------+----------+ + + +### Additional Examples + +- `source = table | dedup a | fields a,b,c` +- `source = table | dedup a,b | fields a,b,c` +- `source = table | dedup a keepempty=true | fields a,b,c` +- `source = table | dedup a,b keepempty=true | fields a,b,c` +- `source = table | dedup 1 a | fields a,b,c` +- `source = table | dedup 1 a,b | fields a,b,c` +- `source = table | dedup 1 a keepempty=true | fields a,b,c` +- `source = table | dedup 1 a,b keepempty=true | fields a,b,c` +- `source = table | dedup 2 a | fields a,b,c` +- `source = table | dedup 2 a,b | fields a,b,c` +- `source = table | dedup 2 a keepempty=true | fields a,b,c` +- `source = table | dedup 2 a,b keepempty=true | fields a,b,c` +- `source = table | dedup 1 a consecutive=true| fields a,b,c` (Consecutive deduplication is unsupported) + diff --git a/docs/ppl-lang/ppl-eval-command.md b/docs/ppl-lang/ppl-eval-command.md new file mode 100644 index 000000000..42cba1e2f --- /dev/null +++ b/docs/ppl-lang/ppl-eval-command.md @@ -0,0 +1,115 @@ +# PPL `eval` command + +## Description + The ``eval`` command evaluate the expression and append the result to the search result. + + +## Syntax +```sql +eval = ["," = ]... +``` +* field: mandatory. If the field name not exist, a new field is added. If the field name already exists, it will be overrided. +* expression: mandatory. Any expression support by the system. + +### Example 1: Create the new field + +The example show to create new field doubleAge for each document. The new doubleAge is the evaluation result of age multiply by 2. + +PPL query: + + os> source=accounts | eval doubleAge = age * 2 | fields age, doubleAge ; + fetched rows / total rows = 4/4 + +-------+-------------+ + | age | doubleAge | + |-------+-------------| + | 32 | 64 | + | 36 | 72 | + | 28 | 56 | + | 33 | 66 | + +-------+-------------+ + + +### Example 2: Override the existing field + +The example show to override the exist age field with age plus 1. + +PPL query: +```sql + os> source=accounts | eval age = age + 1 | fields age ; + fetched rows / total rows = 4/4 + +-------+ + | age | + |-------| + | 33 | + | 37 | + | 29 | + | 34 | + +-------+ +``` + +### Example 3: Create the new field with field defined in eval + +The example show to create a new field ddAge with field defined in eval command. The new field ddAge is the evaluation result of doubleAge multiply by 2, the doubleAge is defined in the eval command. + +PPL query: + + os> source=accounts | eval doubleAge = age * 2, ddAge = doubleAge * 2 | fields age, doubleAge, ddAge ; + fetched rows / total rows = 4/4 + +-------+-------------+---------+ + | age | doubleAge | ddAge | + |-------+-------------+---------| + | 32 | 64 | 128 | + | 36 | 72 | 144 | + | 28 | 56 | 112 | + | 33 | 66 | 132 | + +-------+-------------+---------+ + +### Additional Examples: +Assumptions: `a`, `b`, `c` are existing fields in `table` +- `source = table | eval f = 1 | fields a,b,c,f` +- `source = table | eval f = 1` (output a,b,c,f fields) +- `source = table | eval n = now() | eval t = unix_timestamp(a) | fields n,t` +- `source = table | eval f = a | where f > 1 | sort f | fields a,b,c | head 5` +- `source = table | eval f = a * 2 | eval h = f * 2 | fields a,f,h` +- `source = table | eval f = a * 2, h = f * 2 | fields a,f,h` +- `source = table | eval f = a * 2, h = b | stats avg(f) by h` +- `source = table | eval f = ispresent(a)` +- `source = table | eval r = coalesce(a, b, c) | fields r` +- `source = table | eval e = isempty(a) | fields e` +- `source = table | eval f = case(a = 0, 'zero', a = 1, 'one', a = 2, 'two', a = 3, 'three', a = 4, 'four', a = 5, 'five', a = 6, 'six', a = 7, 'se7en', a = 8, 'eight', a = 9, 'nine')` +- `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else 'unknown')` +- `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else concat(a, ' is an incorrect binary digit'))` + +Eval with `case` example: +```sql +source = table | eval e = eval status_category = +case(a >= 200 AND a < 300, 'Success', +a >= 300 AND a < 400, 'Redirection', +a >= 400 AND a < 500, 'Client Error', +a >= 500, 'Server Error' +else 'Unknown') +``` + +Eval with another `case` example: + +```sql +source = table | where ispresent(a) | +eval status_category = + case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code' + ) + | stats count() by status_category +``` + +### Limitation: +Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" + +```sql +- `source = table | eval a = 10 | fields a,b,c` +- `source = table | eval a = a * 2 | stats avg(a)` +- `source = table | eval a = abs(a) | where a > 0` +- `source = table | eval a = signum(a) | where a < 0` +``` \ No newline at end of file diff --git a/docs/ppl-lang/ppl-fields-command.md b/docs/ppl-lang/ppl-fields-command.md new file mode 100644 index 000000000..87c32b64d --- /dev/null +++ b/docs/ppl-lang/ppl-fields-command.md @@ -0,0 +1,71 @@ +# PPL `fields` command + +Description +============ +Using ``field`` command to keep or remove fields from the search result. + + +Syntax +============ +field [+|-] + +* index: optional. if the plus (+) is used, only the fields specified in the field list will be keep. if the minus (-) is used, all the fields specified in the field list will be removed. **Default** + +* field list: mandatory. comma-delimited keep or remove fields. + + +### Example 1: Select specified fields from result + +The example show fetch account_number, firstname and lastname fields from search results. + +PPL query: + + os> source=accounts | fields account_number, firstname, lastname; + fetched rows / total rows = 4/4 + +------------------+-------------+------------+ + | account_number | firstname | lastname | + |------------------+-------------+------------| + | 1 | Amber | Duke | + | 6 | Hattie | Bond | + | 13 | Nanette | Bates | + | 18 | Dale | Adams | + +------------------+-------------+------------+ + +### Example 2: Remove specified fields from result + +The example show fetch remove account_number field from search results. + +PPL query: + + os> source=accounts | fields account_number, firstname, lastname | fields - account_number ; + fetched rows / total rows = 4/4 + +-------------+------------+ + | firstname | lastname | + |-------------+------------| + | Amber | Duke | + | Hattie | Bond | + | Nanette | Bates | + | Dale | Adams | + +-------------+------------+ + +### Additional Examples + +- `source = table` +- `source = table | fields a,b,c` +- `source = table | fields + a,b,c` +- `source = table | fields - b,c` +- `source = table | eval b1 = b | fields - b1,c` + +### Limitation: +new field added by eval command with a function cannot be dropped in current version:**_ +```sql + `source = table | eval b1 = b + 1 | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) + `source = table | eval b1 = lower(b) | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) +``` + +**Nested-Fields** +```sql +`source = catalog.schema.table1, catalog.schema.table2 | fields A.nested1, B.nested1` +`source = catalog.table | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield` +`source = catalog.schema.table | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield` +``` +--- \ No newline at end of file diff --git a/docs/ppl-lang/ppl-grok-command.md b/docs/ppl-lang/ppl-grok-command.md new file mode 100644 index 000000000..06028109b --- /dev/null +++ b/docs/ppl-lang/ppl-grok-command.md @@ -0,0 +1,75 @@ +## PPL Correlation Command + + +### Description +The ``grok`` command parses a text field with a grok pattern and appends the results to the search result. + + +### Syntax +```sql +grok +``` + +* field: mandatory. The field must be a text field. +* pattern: mandatory string. The grok pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. + +## Grok Pattern + + +The grok pattern is used to match the text field of each document to extract new fields. + +### Example 1: Create the new field + +The example shows how to create new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. + +PPL query: + + os> source=accounts | grok email '.+@%{HOSTNAME:host}' | fields email, host ; + fetched rows / total rows = 4/4 + +-----------------------+------------+ + | email | host | + |-----------------------+------------| + | amberduke@pyrami.com | pyrami.com | + | hattiebond@netagy.com | netagy.com | + | null | | + | daleadams@boink.com | boink.com | + +-----------------------+------------+ + + +### Example 2: Override the existing field + +The example shows how to override the existing ``address`` field with street number removed. + +PPL query: + + os> source=accounts | grok address '%{NUMBER} %{GREEDYDATA:address}' | fields address ; + fetched rows / total rows = 4/4 + +------------------+ + | address | + |------------------| + | Holmes Lane | + | Bristol Street | + | Madison Street | + | Hutchinson Court | + +------------------+ + +### Example 3: Using grok to parse logs + +The example shows how to use grok to parse raw logs. + +PPL query: + + os> source=apache | grok message '%{COMMONAPACHELOG}' | fields COMMONAPACHELOG, timestamp, response, bytes ; + fetched rows / total rows = 4/4 + +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+------------+---------+ + | COMMONAPACHELOG | timestamp | response | bytes | + |-----------------------------------------------------------------------------------------------------------------------------+----------------------------+------------+---------| + | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | 28/Sep/2022:10:15:57 -0700 | 404 | 19927 | + | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | 28/Sep/2022:10:15:57 -0700 | 100 | 28722 | + | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | 28/Sep/2022:10:15:57 -0700 | 401 | 27439 | + | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | 28/Sep/2022:10:15:57 -0700 | 301 | 9481 | + +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+------------+---------+ + +### Limitations + +The grok command has the same limitations as the parse command, see `parse limitations` for details. diff --git a/docs/ppl-lang/ppl-head-command.md b/docs/ppl-lang/ppl-head-command.md new file mode 100644 index 000000000..e4172b1c6 --- /dev/null +++ b/docs/ppl-lang/ppl-head-command.md @@ -0,0 +1,60 @@ +## PPL `head` Command + +**Description** +The ``head`` command returns the first N number of specified results after an optional offset in search order. + + +### Syntax +`head [] [from ]` + +* : optional integer. number of results to return. **Default:** 10 +* : integer after optional ``from``. number of results to skip. **Default:** 0 + +### Example 1: Get first 10 results + +The example show maximum 10 results from accounts index. + +PPL query: + + os> source=accounts | fields firstname, age | head; + fetched rows / total rows = 4/4 + +-------------+-------+ + | firstname | age | + |-------------+-------| + | Amber | 32 | + | Hattie | 36 | + | Nanette | 28 | + | Dale | 33 | + +-------------+-------+ + +### Example 2: Get first N results + +The example show first N results from accounts index. + +PPL query: + + os> source=accounts | fields firstname, age | head 3; + fetched rows / total rows = 3/3 + +-------------+-------+ + | firstname | age | + |-------------+-------| + | Amber | 32 | + | Hattie | 36 | + | Nanette | 28 | + +-------------+-------+ + +### Example 3: Get first N results after offset M + +The example show first N results after offset M from accounts index. + +PPL query: + + os> source=accounts | fields firstname, age | head 3 from 1; + fetched rows / total rows = 3/3 + +-------------+-------+ + | firstname | age | + |-------------+-------| + | Hattie | 36 | + | Nanette | 28 | + | Dale | 33 | + +-------------+-------+ diff --git a/docs/PPL-Join-command.md b/docs/ppl-lang/ppl-join-command.md similarity index 99% rename from docs/PPL-Join-command.md rename to docs/ppl-lang/ppl-join-command.md index 2a1c7daf9..525373f7c 100644 --- a/docs/PPL-Join-command.md +++ b/docs/ppl-lang/ppl-join-command.md @@ -162,7 +162,7 @@ SEARCH source=customer | SORT - custdist, - c_count ``` -### Comparison with [Correlation](../docs/PPL-Correlation-command.md) +### Comparison with [Correlation](ppl-correlation-command) A primary difference between `correlate` and `join` is that both sides of `correlate` are tables, but both sides of `join` are subqueries. For example: diff --git a/docs/PPL-Lookup-command.md b/docs/ppl-lang/ppl-lookup-command.md similarity index 95% rename from docs/PPL-Lookup-command.md rename to docs/ppl-lang/ppl-lookup-command.md index 03fee5b4d..1b8350533 100644 --- a/docs/PPL-Lookup-command.md +++ b/docs/ppl-lang/ppl-lookup-command.md @@ -3,7 +3,7 @@ ## Overview Lookup command enriches your search data by adding or replacing data from a lookup index (dimension table). You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched. -As an alternative of [Join command](../docs/PPL-Join-command.md), lookup command is more suitable for enriching the source data with a static dataset. +As an alternative of [Join command](ppl-join-command), lookup command is more suitable for enriching the source data with a static dataset. ### Syntax of Lookup Command diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md new file mode 100644 index 000000000..a5cc59f04 --- /dev/null +++ b/docs/ppl-lang/ppl-parse-command.md @@ -0,0 +1,103 @@ +## PPL Parse Command + + +### Description +The ``parse`` command parses a text field with a regular expression and appends the result to the search result. + + +### Syntax +```sql +parse +``` + +* field: mandatory. The field must be a text field. +* pattern: mandatory string. The regular expression pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. + +## Regular Expression + +The regular expression pattern is used to match the whole text field of each document with Java regex engine. Each named capture group in the expression will become a new ``STRING`` field. + +### Example 1: Create a new field + +The example shows how to create a new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. + +PPL query: + + os> source=accounts | parse email '.+@(?.+)' | fields email, host ; + fetched rows / total rows = 4/4 + +-----------------------+------------+ + | email | host | + |-----------------------+------------| + | amberduke@pyrami.com | pyrami.com | + | hattiebond@netagy.com | netagy.com | + | null | | + | daleadams@boink.com | boink.com | + +-----------------------+------------+ + + +### Example 2: Override an existing field + +The example shows how to override the existing ``address`` field with street number removed. + +PPL query: + + os> source=accounts | parse address '\d+ (?
.+)' | fields address ; + fetched rows / total rows = 4/4 + +------------------+ + | address | + |------------------| + | Holmes Lane | + | Bristol Street | + | Madison Street | + | Hutchinson Court | + +------------------+ + +### Example 3: Filter and sort by casted parsed field + +The example shows how to sort street numbers that are higher than 500 in ``address`` field. + +PPL query: + + os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort num(streetNumber) | fields streetNumber, street ; + fetched rows / total rows = 3/3 + +----------------+----------------+ + | streetNumber | street | + |----------------+----------------| + | 671 | Bristol Street | + | 789 | Madison Street | + | 880 | Holmes Lane | + +----------------+----------------+ + +### Limitations + +There are a few limitations with parse command: + +- Fields defined by parse cannot be parsed again. + + The following command will not work: + + source=accounts | parse address '\d+ (?.+)' | parse street '\w+ (?\w+)' ; + +- Fields defined by parse cannot be overridden with other commands. + + ``where`` will not match any documents since ``street`` cannot be overridden: + +```sql +source=accounts | parse address '\d+ (?.+)' | eval street='1' | where street='1' ; +``` + +- The text field used by parse cannot be overridden. + + ``street`` will not be successfully parsed since ``address`` is overridden: + +```sql + source=accounts | parse address '\d+ (?.+)' | eval address='1' ; +``` + +- Fields defined by parse cannot be filtered/sorted after using them in ``stats`` command. + + ``where`` in the following command will not work: + +```sql + source=accounts | parse email '.+@(?.+)' | stats avg(age) by host | where host=pyrami.com ; +``` \ No newline at end of file diff --git a/docs/ppl-lang/ppl-patterns-command.md b/docs/ppl-lang/ppl-patterns-command.md new file mode 100644 index 000000000..77d330dbe --- /dev/null +++ b/docs/ppl-lang/ppl-patterns-command.md @@ -0,0 +1,69 @@ +## PPL `patterns` command + +### Description + The ``patterns`` command extracts log patterns from a text field and appends the results to the search result. Grouping logs by their patterns makes it easier to aggregate stats from large volumes of log data for analysis and troubleshooting. + + +### Syntax + +`patterns [new_field=] [pattern=] ` + +* new-field-name: optional string. The name of the new field for extracted patterns, default is ``patterns_field``. If the name already exists, it will replace the original field. +* pattern: optional string. The regex pattern of characters that should be filtered out from the text field. If absent, the default pattern is alphanumeric characters (``[a-zA-Z\d]``). +* field: mandatory. The field must be a text field. + +### Example 1: Create the new field + +The example shows how to use extract punctuations in ``email`` for each document. Parsing a null field will return an empty string. + +PPL query: + + os> source=accounts | patterns email | fields email, patterns_field ; + fetched rows / total rows = 4/4 + +-----------------------+------------------+ + | email | patterns_field | + |-----------------------+------------------| + | amberduke@pyrami.com | @. | + | hattiebond@netagy.com | @. | + | null | | + | daleadams@boink.com | @. | + +-----------------------+------------------+ + +### Example 2: Extract log patterns + +The example shows how to extract punctuations from a raw log field using the default patterns. + +PPL query: + + os> source=apache | patterns message | fields message, patterns_field ; + fetched rows / total rows = 4/4 + +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------+ + | message | patterns_field | + |-----------------------------------------------------------------------------------------------------------------------------+---------------------------------| + | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | ... - [//::: -] " /-/ /." | + | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | ... - [//::: -] " //// /." | + | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | ... - - [//::: -] " //--- /." | + | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | ... - - [//::: -] " / /." | + +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------+ + +### Example 3: Extract log patterns with custom regex pattern + +The example shows how to extract punctuations from a raw log field using user defined patterns. + +PPL query: + + os> source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | fields message, no_numbers ; + fetched rows / total rows = 4/4 + +-----------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ + | message | no_numbers | + |-----------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------| + | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | ... - upton [/Sep/::: -] "HEAD /e-business/mindshare HTTP/." | + | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | ... - pouros [/Sep/::: -] "GET /architectures/convergence/niches/mindshare HTTP/." | + | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | ... - - [/Sep/::: -] "PATCH /strategize/out-of-the-box HTTP/." | + | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | ... - - [/Sep/::: -] "POST /users HTTP/." | + +-----------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ + +Limitation +========== + +The patterns command has the same limitations as the parse command, see ``parse limitations` for details. diff --git a/docs/ppl-lang/ppl-rare-command.md b/docs/ppl-lang/ppl-rare-command.md new file mode 100644 index 000000000..5645382f8 --- /dev/null +++ b/docs/ppl-lang/ppl-rare-command.md @@ -0,0 +1,46 @@ +## PPL rare Command + +**Description** +Using ``rare`` command to find the least common tuple of values of all fields in the field list. + +**Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. + +**Syntax** +`rare [by-clause]` + +* field-list: mandatory. comma-delimited list of field names. +* by-clause: optional. one or more fields to group the results by. + + +### Example 1: Find the least common values in a field + +The example finds least common gender of all the accounts. + +PPL query: + + os> source=accounts | rare gender; + fetched rows / total rows = 2/2 + +----------+ + | gender | + |----------| + | F | + | M | + +----------+ + + +### Example 2: Find the least common values organized by gender + +The example finds least common age of all the accounts group by gender. + +PPL query: + + os> source=accounts | rare age by gender; + fetched rows / total rows = 4/4 + +----------+-------+ + | gender | age | + |----------+-------| + | F | 28 | + | M | 32 | + | M | 33 | + | M | 36 | + +----------+-------+ diff --git a/docs/ppl-lang/ppl-rename-command.md b/docs/ppl-lang/ppl-rename-command.md new file mode 100644 index 000000000..d7fd6921c --- /dev/null +++ b/docs/ppl-lang/ppl-rename-command.md @@ -0,0 +1,52 @@ +## PPL `rename` command + +### Description +Using ``rename`` command to rename one or more fields in the search result. + + +### Syntax +`rename AS ["," AS ]...` + +* source-field: mandatory. The name of the field you want to rename. +* field list: mandatory. The name you want to rename to. + + +### Example 1: Rename one field + +The example show rename one field. + +PPL query: + + os> source=accounts | rename account_number as an | fields an; + fetched rows / total rows = 4/4 + +------+ + | an | + |------| + | 1 | + | 6 | + | 13 | + | 18 | + +------+ + + +### Example 2: Rename multiple fields + +The example show rename multiple fields. + +PPL query: + + os> source=accounts | rename account_number as an, employer as emp | fields an, emp; + fetched rows / total rows = 4/4 + +------+---------+ + | an | emp | + |------+---------| + | 1 | Pyrami | + | 6 | Netagy | + | 13 | Quility | + | 18 | null | + +------+---------+ + +### Limitation: +Overriding existing field is unsupported: + +`source=accounts | grok address '%{NUMBER} %{GREEDYDATA:address}' | fields address` diff --git a/docs/ppl-lang/ppl-search-command.md b/docs/ppl-lang/ppl-search-command.md new file mode 100644 index 000000000..f81d9d907 --- /dev/null +++ b/docs/ppl-lang/ppl-search-command.md @@ -0,0 +1,42 @@ +## PPL `search` command + +### Description +Using ``search`` command to retrieve document from the index. ``search`` command could be only used as the first command in the PPL query. + + +### Syntax +`search source=[:] [boolean-expression]` + +* search: search keywords, which could be ignore. +* index: mandatory. search command must specify which index to query from. The index name can be prefixed by ":" for cross-cluster search. +* bool-expression: optional. any expression which could be evaluated to boolean value. + + +### Example 1: Fetch all the data +The example show fetch all the document from accounts index. + +PPL query: + + os> source=accounts; + +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + +### Example 2: Fetch data with condition +The example show fetch all the document from accounts index with . + +PPL query: + + os> source=accounts account_number=1 or gender="F"; + +------------------+-------------+--------------------+-----------+----------+--------+------------+---------+-------+----------------------+------------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |------------------+-------------+--------------------+-----------+----------+--------+------------+---------+-------+----------------------+------------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + +------------------+-------------+--------------------+-----------+----------+--------+------------+---------+-------+----------------------+------------+ + diff --git a/docs/ppl-lang/ppl-sort-command.md b/docs/ppl-lang/ppl-sort-command.md new file mode 100644 index 000000000..c3bf304d7 --- /dev/null +++ b/docs/ppl-lang/ppl-sort-command.md @@ -0,0 +1,98 @@ +## PPL `sort`command + +### Description +Using ``sort`` command to sorts all the search result by the specified fields. + + +### Syntax +`sort <[+|-] sort-field>...` + + +* [+|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. +* sort-field: mandatory. The field used to sort. + + +### Example 1: Sort by one field +The example show sort all the document with age field in ascending order. + +PPL query: + + os> source=accounts | sort age | fields account_number, age; + fetched rows / total rows = 4/4 + +------------------+-------+ + | account_number | age | + |------------------+-------| + | 13 | 28 | + | 1 | 32 | + | 18 | 33 | + | 6 | 36 | + +------------------+-------+ + + +### Example 2: Sort by one field return all the result + +The example show sort all the document with age field in ascending order. + +PPL query: + + os> source=accounts | sort age | fields account_number, age; + fetched rows / total rows = 4/4 + +------------------+-------+ + | account_number | age | + |------------------+-------| + | 13 | 28 | + | 1 | 32 | + | 18 | 33 | + | 6 | 36 | + +------------------+-------+ + + +### Example 3: Sort by one field in descending order +The example show sort all the document with age field in descending order. + +PPL query: + + os> source=accounts | sort - age | fields account_number, age; + fetched rows / total rows = 4/4 + +------------------+-------+ + | account_number | age | + |------------------+-------| + | 6 | 36 | + | 18 | 33 | + | 1 | 32 | + | 13 | 28 | + +------------------+-------+ + +### Example 4: Sort by multiple field + +The example show sort all the document with gender field in ascending order and age field in descending. + +PPL query: + + os> source=accounts | sort + gender, - age | fields account_number, gender, age; + fetched rows / total rows = 4/4 + +------------------+----------+-------+ + | account_number | gender | age | + |------------------+----------+-------| + | 13 | F | 28 | + | 6 | M | 36 | + | 18 | M | 33 | + | 1 | M | 32 | + +------------------+----------+-------+ + +### Example 4: Sort by field include null value + +The example show sort employer field by default option (ascending order and null first), the result show that null value is in the first row. + +PPL query: + + os> source=accounts | sort employer | fields employer; + fetched rows / total rows = 4/4 + +------------+ + | employer | + |------------| + | null | + | Netagy | + | Pyrami | + | Quility | + +------------+ diff --git a/docs/ppl-lang/ppl-stats-command.md b/docs/ppl-lang/ppl-stats-command.md new file mode 100644 index 000000000..552f83e46 --- /dev/null +++ b/docs/ppl-lang/ppl-stats-command.md @@ -0,0 +1,477 @@ +## PPL `stats` command + +### Description +Using ``stats`` command to calculate the aggregation from search result. + +### NULL/MISSING values handling: + +``` ++----------+-------------+-------------+ +| Function | NULL | MISSING | ++----------+-------------+-------------+ +| COUNT | Not counted | Not counted | ++----------+-------------+-------------+ +| SUM | Ignore | Ignore | ++----------+-------------+-------------+ +| AVG | Ignore | Ignore | ++----------+-------------+-------------+ +| MAX | Ignore | Ignore | ++----------+-------------+-------------+ +| MIN | Ignore | Ignore | ++----------+-------------+-------------+ +``` + + +### Syntax +`stats ... [by-clause]` + + +### **aggregation:** + mandatory. A aggregation function. The argument of aggregation must be field. + +**by-clause**: optional. + +#### Syntax: +`by [span-expression,] [field,]...` + +**Description:** + +The by clause could be the fields and expressions like scalar functions and aggregation functions. +Besides, the span clause can be used to split specific field into buckets in the same interval, the stats then does the aggregation by these span buckets. + +**Default**: + +If no `` is specified, the stats command returns only one row, which is the aggregation over the entire result set. + +### **`span-expression`**: +optional, at most one. + +#### Syntax: +`span(field_expr, interval_expr)` + +**Description:** + +The unit of the interval expression is the natural unit by default. +If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. + +For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``. + +* Available time unit: +``` ++----------------------------+ +| Span Interval Units | ++============================+ +| millisecond (ms) | ++----------------------------+ +| second (s) | ++----------------------------+ +| minute (m, case sensitive) | ++----------------------------+ +| hour (h) | ++----------------------------+ +| day (d) | ++----------------------------+ +| week (w) | ++----------------------------+ +| month (M, case sensitive) | ++----------------------------+ +| quarter (q) | ++----------------------------+ +| year (y) | ++----------------------------+ +``` + +### Aggregation Functions + +#### _COUNT_ + +**Description** + +Returns a count of the number of expr in the rows retrieved by a SELECT statement. + +Example: + + os> source=accounts | stats count(); + fetched rows / total rows = 1/1 + +-----------+ + | count() | + |-----------| + | 4 | + +-----------+ + +#### _SUM_ + +**Description** + +`SUM(expr)`. Returns the sum of expr. + +Example: + + os> source=accounts | stats sum(age) by gender; + fetched rows / total rows = 2/2 + +------------+----------+ + | sum(age) | gender | + |------------+----------| + | 28 | F | + | 101 | M | + +------------+----------+ + +#### _AVG_ + +**Description** + +`AVG(expr)`. Returns the average value of expr. + +Example: + + os> source=accounts | stats avg(age) by gender; + fetched rows / total rows = 2/2 + +--------------------+----------+ + | avg(age) | gender | + |--------------------+----------| + | 28.0 | F | + | 33.666666666666664 | M | + +--------------------+----------+ + +#### MAX + +**Description** + +`MAX(expr)` Returns the maximum value of expr. + +Example: + + os> source=accounts | stats max(age); + fetched rows / total rows = 1/1 + +------------+ + | max(age) | + |------------| + | 36 | + +------------+ + +#### MIN + +**Description** + +`MIN(expr)` Returns the minimum value of expr. + +Example: + + os> source=accounts | stats min(age); + fetched rows / total rows = 1/1 + +------------+ + | min(age) | + |------------| + | 28 | + +------------+ + +#### STDDEV_SAMP + +**Description** + +`STDDEV_SAMP(expr)` Return the sample standard deviation of expr. + +Example: + + os> source=accounts | stats stddev_samp(age); + fetched rows / total rows = 1/1 + +--------------------+ + | stddev_samp(age) | + |--------------------| + | 3.304037933599835 | + +--------------------+ + +#### STDDEV_POP + +**Description** + +`STDDEV_POP(expr)` Return the population standard deviation of expr. + +Example: + + os> source=accounts | stats stddev_pop(age); + fetched rows / total rows = 1/1 + +--------------------+ + | stddev_pop(age) | + |--------------------| + | 2.8613807855648994 | + +--------------------+ + +#### TAKE + +**Description** + +`TAKE(field [, size])` Return original values of a field. It does not guarantee on the order of values. + +* field: mandatory. The field must be a text field. +* size: optional integer. The number of values should be returned. Default is 10. + +Example: + + os> source=accounts | stats take(firstname); + fetched rows / total rows = 1/1 + +-----------------------------+ + | take(firstname) | + |-----------------------------| + | [Amber,Hattie,Nanette,Dale] | + +-----------------------------+ + +#### PERCENTILE or PERCENTILE_APPROX + +**Description** + +`PERCENTILE(expr, percent)` or `PERCENTILE_APPROX(expr, percent)` Return the approximate percentile value of expr at the specified percentage. + +* percent: The number must be a constant between 0 and 100. +--- + +### Examples: + + os> source=accounts | stats percentile(age, 90) by gender; + fetched rows / total rows = 2/2 + +-----------------------+----------+ + | percentile(age, 90) | gender | + |-----------------------+----------| + | 28 | F | + | 36 | M | + +-----------------------+----------+ + +### Example 1: Calculate the count of events + +The example show calculate the count of events in the accounts. + +PPL query: + + os> source=accounts | stats count(); + fetched rows / total rows = 1/1 + +-----------+ + | count() | + |-----------| + | 4 | + +-----------+ + + +### Example 2: Calculate the average of a field + +The example show calculate the average age of all the accounts. + +PPL query: + + os> source=accounts | stats avg(age); + fetched rows / total rows = 1/1 + +------------+ + | avg(age) | + |------------| + | 32.25 | + +------------+ + + +### Example 3: Calculate the average of a field by group + +The example show calculate the average age of all the accounts group by gender. + +PPL query: + + os> source=accounts | stats avg(age) by gender; + fetched rows / total rows = 2/2 + +--------------------+----------+ + | avg(age) | gender | + |--------------------+----------| + | 28.0 | F | + | 33.666666666666664 | M | + +--------------------+----------+ + + +### Example 4: Calculate the average, sum and count of a field by group + +The example show calculate the average age, sum age and count of events of all the accounts group by gender. + +PPL query: + + os> source=accounts | stats avg(age), sum(age), count() by gender; + fetched rows / total rows = 2/2 + +--------------------+------------+-----------+----------+ + | avg(age) | sum(age) | count() | gender | + |--------------------+------------+-----------+----------| + | 28.0 | 28 | 1 | F | + | 33.666666666666664 | 101 | 3 | M | + +--------------------+------------+-----------+----------+ + +### Example 5: Calculate the maximum of a field + +The example calculates the max age of all the accounts. + +PPL query: + + os> source=accounts | stats max(age); + fetched rows / total rows = 1/1 + +------------+ + | max(age) | + |------------| + | 36 | + +------------+ + +### Example 6: Calculate the maximum and minimum of a field by group + +The example calculates the max and min age values of all the accounts group by gender. + +PPL query: + + os> source=accounts | stats max(age), min(age) by gender; + fetched rows / total rows = 2/2 + +------------+------------+----------+ + | max(age) | min(age) | gender | + |------------+------------+----------| + | 28 | 28 | F | + | 36 | 32 | M | + +------------+------------+----------+ + +### Example 7: Calculate the distinct count of a field + +To get the count of distinct values of a field, you can use ``DISTINCT_COUNT`` (or ``DC``) function instead of ``COUNT``. The example calculates both the count and the distinct count of gender field of all the accounts. + +PPL query: + + os> source=accounts | stats count(gender), distinct_count(gender); + fetched rows / total rows = 1/1 + +-----------------+--------------------------+ + | count(gender) | distinct_count(gender) | + |-----------------+--------------------------| + | 4 | 2 | + +-----------------+--------------------------+ + +### Example 8: Calculate the count by a span + +The example gets the count of age by the interval of 10 years. + +PPL query: + + os> source=accounts | stats count(age) by span(age, 10) as age_span + fetched rows / total rows = 2/2 + +--------------+------------+ + | count(age) | age_span | + |--------------+------------| + | 1 | 20 | + | 3 | 30 | + +--------------+------------+ + +### Example 9: Calculate the count by a gender and span + +The example gets the count of age by the interval of 10 years and group by gender. + +PPL query: + + os> source=accounts | stats count() as cnt by span(age, 5) as age_span, gender + fetched rows / total rows = 3/3 + +-------+------------+----------+ + | cnt | age_span | gender | + |-------+------------+----------| + | 1 | 25 | F | + | 2 | 30 | M | + | 1 | 35 | M | + +-------+------------+----------+ + +Span will always be the first grouping key whatever order you specify. + +PPL query: + + os> source=accounts | stats count() as cnt by gender, span(age, 5) as age_span + fetched rows / total rows = 3/3 + +-------+------------+----------+ + | cnt | age_span | gender | + |-------+------------+----------| + | 1 | 25 | F | + | 2 | 30 | M | + | 1 | 35 | M | + +-------+------------+----------+ + +### Example 10: Calculate the count and get email list by a gender and span + +The example gets the count of age by the interval of 10 years and group by gender, additionally for each row get a list of at most 5 emails. + +PPL query: + + os> source=accounts | stats count() as cnt, take(email, 5) by span(age, 5) as age_span, gender + fetched rows / total rows = 3/3 + +-------+--------------------------------------------+------------+----------+ + | cnt | take(email, 5) | age_span | gender | + |-------+--------------------------------------------+------------+----------| + | 1 | [] | 25 | F | + | 2 | [amberduke@pyrami.com,daleadams@boink.com] | 30 | M | + | 1 | [hattiebond@netagy.com] | 35 | M | + +-------+--------------------------------------------+------------+----------+ + +### Example 11: Calculate the percentile of a field + +The example show calculate the percentile 90th age of all the accounts. + +PPL query: + + os> source=accounts | stats percentile(age, 90); + fetched rows / total rows = 1/1 + +-----------------------+ + | percentile(age, 90) | + |-----------------------| + | 36 | + +-----------------------+ + + +### Example 12: Calculate the percentile of a field by group + +The example show calculate the percentile 90th age of all the accounts group by gender. + +PPL query: + + os> source=accounts | stats percentile(age, 90) by gender; + fetched rows / total rows = 2/2 + +-----------------------+----------+ + | percentile(age, 90) | gender | + |-----------------------+----------| + | 28 | F | + | 36 | M | + +-----------------------+----------+ + +### Example 13: Calculate the percentile by a gender and span + +The example gets the percentile 90th age by the interval of 10 years and group by gender. + +PPL query: + + os> source=accounts | stats percentile(age, 90) as p90 by span(age, 10) as age_span, gender + fetched rows / total rows = 2/2 + +-------+------------+----------+ + | p90 | age_span | gender | + |-------+------------+----------| + | 28 | 20 | F | + | 36 | 30 | M | + +-------+------------+----------+ + +### Additional Examples +```sql +- `source = table | stats avg(a) ` +- `source = table | where a < 50 | stats avg(c) ` +- `source = table | stats max(c) by b` +- `source = table | stats count(c) by b | head 5` +- `source = table | stats distinct_count(c)` +- `source = table | stats stddev_samp(c)` +- `source = table | stats stddev_pop(c)` +- `source = table | stats percentile(c, 90)` +- `source = table | stats percentile_approx(c, 99)` +``` + +**Aggregations With Span** +```sql +- `source = table | stats count(a) by span(a, 10) as a_span` +- `source = table | stats sum(age) by span(age, 5) as age_span | head 2` +- `source = table | stats avg(age) by span(age, 20) as age_span, country | sort - age_span | head 2` +``` +**Aggregations With TimeWindow Span (tumble windowing function)** +```sql +- `source = table | stats sum(productsAmount) by span(transactionDate, 1d) as age_date | sort age_date` +- `source = table | stats sum(productsAmount) by span(transactionDate, 1w) as age_date, productId` +``` +**Aggregations Group by Multiple Levels** +```sql +- `source = table | stats avg(age) as avg_state_age by country, state | stats avg(avg_state_age) as avg_country_age by country` +- `source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country` +``` diff --git a/docs/ppl-lang/ppl-top-command.md b/docs/ppl-lang/ppl-top-command.md new file mode 100644 index 000000000..4ba56f692 --- /dev/null +++ b/docs/ppl-lang/ppl-top-command.md @@ -0,0 +1,58 @@ +## PPL top Command + +**Description** +Using ``top`` command to find the most common tuple of values of all fields in the field list. + + +### Syntax +`top [N] [by-clause]` + +* N: number of results to return. **Default**: 10 +* field-list: mandatory. comma-delimited list of field names. +* by-clause: optional. one or more fields to group the results by. + + +### Example 1: Find the most common values in a field + +The example finds most common gender of all the accounts. + +PPL query: + + os> source=accounts | top gender; + fetched rows / total rows = 2/2 + +----------+ + | gender | + |----------| + | M | + | F | + +----------+ + +### Example 2: Find the most common values in a field + +The example finds most common gender of all the accounts. + +PPL query: + + os> source=accounts | top 1 gender; + fetched rows / total rows = 1/1 + +----------+ + | gender | + |----------| + | M | + +----------+ + +### Example 2: Find the most common values organized by gender + +The example finds most common age of all the accounts group by gender. + +PPL query: + + os> source=accounts | top 1 age by gender; + fetched rows / total rows = 2/2 + +----------+-------+ + | gender | age | + |----------+-------| + | F | 28 | + | M | 32 | + +----------+-------+ + diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md new file mode 100644 index 000000000..73f3fbd94 --- /dev/null +++ b/docs/ppl-lang/ppl-where-command.md @@ -0,0 +1,62 @@ +## PPL where Command + +### Description +The ``where`` command bool-expression to filter the search result. The ``where`` command only return the result when bool-expression evaluated to true. + + +### Syntax +`where ` + +* bool-expression: optional. any expression which could be evaluated to boolean value. + +### Example 1: Filter result set with condition +=========================================== + +The example show fetch all the document from accounts index with . + +PPL query: + + os> source=accounts | where account_number=1 or gender="F" | fields account_number, gender; + fetched rows / total rows = 2/2 + +------------------+----------+ + | account_number | gender | + |------------------+----------| + | 1 | M | + | 13 | F | + +------------------+----------+ + +### Additional Examples + +#### **Filters With Logical Conditions** +``` +- `source = table | where c = 'test' AND a = 1 | fields a,b,c` +- `source = table | where c != 'test' OR a > 1 | fields a,b,c | head 1` +- `source = table | where c = 'test' NOT a > 1 | fields a,b,c` +- `source = table | where a = 1 | fields a,b,c` +- `source = table | where a >= 1 | fields a,b,c` +- `source = table | where a < 1 | fields a,b,c` +- `source = table | where b != 'test' | fields a,b,c` +- `source = table | where c = 'test' | fields a,b,c | head 3` +- `source = table | where ispresent(b)` +- `source = table | where isnull(coalesce(a, b)) | fields a,b,c | head 3` +- `source = table | where isempty(a)` +- `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` + +- `source = table | eval status_category = + case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code') + | where case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code' + ) = 'Incorrect HTTP status code' + +- `source = table + | eval factor = case(a > 15, a - 14, isnull(b), a - 7, a < 3, a + 1 else 1) + | where case(factor = 2, 'even', factor = 4, 'even', factor = 6, 'even', factor = 8, 'even' else 'odd') = 'even' + | stats count() by factor` +``` \ No newline at end of file diff --git a/ppl-spark-integration/README.md b/ppl-spark-integration/README.md index c8b82e337..73c526868 100644 --- a/ppl-spark-integration/README.md +++ b/ppl-spark-integration/README.md @@ -215,334 +215,15 @@ Next tasks ahead will resolve this: - Separate the PPL / SQL drivers inside the OpenSearch PPL client to better distinguish - Create a thin PPL client capable of interaction with the PPL Driver regardless of which driver (Spark , OpenSearch , Prometheus ) ---- - -### Roadmap - -This section describes the next steps planned for enabling additional commands and gamer translation. - -#### Example PPL Queries -See the next samples of PPL queries : - -**Describe** - - `describe table` This command is equal to the `DESCRIBE EXTENDED table` SQL command - -**Explain** - - `explain simple | source = table | where a = 1 | fields a,b,c` - - `explain extended | source = table` - - `explain codegen | source = table | dedup a | fields a,b,c` - - `explain cost | source = table | sort a | fields a,b,c` - - `explain formatted | source = table | fields - a` - - `explain simple | describe table` - -**Fields** - - `source = table` - - `source = table | fields a,b,c` - - `source = table | fields + a,b,c` - - `source = table | fields - b,c` - - `source = table | eval b1 = b | fields - b1,c` - -_- **Limitation: new field added by eval command with a function cannot be dropped in current version:**_ - - `source = table | eval b1 = b + 1 | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) - - `source = table | eval b1 = lower(b) | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) - -**Nested-Fields** - - `source = catalog.schema.table1, catalog.schema.table2 | fields A.nested1, B.nested1` - - `source = catalog.table | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield` - - `source = catalog.schema.table | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield` - -**Filters** - - `source = table | where a = 1 | fields a,b,c` - - `source = table | where a >= 1 | fields a,b,c` - - `source = table | where a < 1 | fields a,b,c` - - `source = table | where b != 'test' | fields a,b,c` - - `source = table | where c = 'test' | fields a,b,c | head 3` - - `source = table | where ispresent(b)` - - `source = table | where isnull(coalesce(a, b)) | fields a,b,c | head 3` - - `source = table | where isempty(a)` - - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'`; - - - ``` - source = table | eval status_category = - case(a >= 200 AND a < 300, 'Success', - a >= 300 AND a < 400, 'Redirection', - a >= 400 AND a < 500, 'Client Error', - a >= 500, 'Server Error' - else 'Incorrect HTTP status code') - | where case(a >= 200 AND a < 300, 'Success', - a >= 300 AND a < 400, 'Redirection', - a >= 400 AND a < 500, 'Client Error', - a >= 500, 'Server Error' - else 'Incorrect HTTP status code' - ) = 'Incorrect HTTP status code' - ``` -- - ``` - source = table - | eval factor = case(a > 15, a - 14, isnull(b), a - 7, a < 3, a + 1 else 1) - | where case(factor = 2, 'even', factor = 4, 'even', factor = 6, 'even', factor = 8, 'even' else 'odd') = 'even' - | stats count() by factor - ``` - -**Filters With Logical Conditions** - - `source = table | where c = 'test' AND a = 1 | fields a,b,c` - - `source = table | where c != 'test' OR a > 1 | fields a,b,c | head 1` - - `source = table | where c = 'test' NOT a > 1 | fields a,b,c` - - -**Eval** - -Assumptions: `a`, `b`, `c` are existing fields in `table` - - `source = table | eval f = 1 | fields a,b,c,f` - - `source = table | eval f = 1` (output a,b,c,f fields) - - `source = table | eval n = now() | eval t = unix_timestamp(a) | fields n,t` - - `source = table | eval f = a | where f > 1 | sort f | fields a,b,c | head 5` - - `source = table | eval f = a * 2 | eval h = f * 2 | fields a,f,h` - - `source = table | eval f = a * 2, h = f * 2 | fields a,f,h` - - `source = table | eval f = a * 2, h = b | stats avg(f) by h` - - `source = table | eval f = ispresent(a)` - - `source = table | eval r = coalesce(a, b, c) | fields r` - - `source = table | eval e = isempty(a) | fields e` - - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one', a = 2, 'two', a = 3, 'three', a = 4, 'four', a = 5, 'five', a = 6, 'six', a = 7, 'se7en', a = 8, 'eight', a = 9, 'nine')` - - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else 'unknown')` - - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else concat(a, ' is an incorrect binary digit'))` - - - ``` - source = table | eval e = eval status_category = - case(a >= 200 AND a < 300, 'Success', - a >= 300 AND a < 400, 'Redirection', - a >= 400 AND a < 500, 'Client Error', - a >= 500, 'Server Error' - else 'Unknown' - ) - ``` -- - ``` - source = table | where ispresent(a) | - eval status_category = - case(a >= 200 AND a < 300, 'Success', - a >= 300 AND a < 400, 'Redirection', - a >= 400 AND a < 500, 'Client Error', - a >= 500, 'Server Error' - else 'Incorrect HTTP status code' - ) - | stats count() by status_category - ``` - -Limitation: Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" - - `source = table | eval a = 10 | fields a,b,c` - - `source = table | eval a = a * 2 | stats avg(a)` - - `source = table | eval a = abs(a) | where a > 0` - - `source = table | eval a = signum(a) | where a < 0` - -**Aggregations** - - `source = table | stats avg(a) ` - - `source = table | where a < 50 | stats avg(c) ` - - `source = table | stats max(c) by b` - - `source = table | stats count(c) by b | head 5` - - `source = table | stats distinct_count(c)` - - `source = table | stats stddev_samp(c)` - - `source = table | stats stddev_pop(c)` - - `source = table | stats percentile(c, 90)` - - `source = table | stats percentile_approx(c, 99)` - -**Aggregations With Span** -- `source = table | stats count(a) by span(a, 10) as a_span` -- `source = table | stats sum(age) by span(age, 5) as age_span | head 2` -- `source = table | stats avg(age) by span(age, 20) as age_span, country | sort - age_span | head 2` - -**Aggregations With TimeWindow Span (tumble windowing function)** -- `source = table | stats sum(productsAmount) by span(transactionDate, 1d) as age_date | sort age_date` -- `source = table | stats sum(productsAmount) by span(transactionDate, 1w) as age_date, productId` - -**Aggregations Group by Multiple Levels** -- `source = table | stats avg(age) as avg_state_age by country, state | stats avg(avg_state_age) as avg_country_age by country` -- `source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country` - -**Dedup** -- `source = table | dedup a | fields a,b,c` -- `source = table | dedup a,b | fields a,b,c` -- `source = table | dedup a keepempty=true | fields a,b,c` -- `source = table | dedup a,b keepempty=true | fields a,b,c` -- `source = table | dedup 1 a | fields a,b,c` -- `source = table | dedup 1 a,b | fields a,b,c` -- `source = table | dedup 1 a keepempty=true | fields a,b,c` -- `source = table | dedup 1 a,b keepempty=true | fields a,b,c` -- `source = table | dedup 2 a | fields a,b,c` -- `source = table | dedup 2 a,b | fields a,b,c` -- `source = table | dedup 2 a keepempty=true | fields a,b,c` -- `source = table | dedup 2 a,b keepempty=true | fields a,b,c` -- `source = table | dedup 1 a consecutive=true| fields a,b,c` (Consecutive deduplication is unsupported) - -**Rare** -- `source=accounts | rare gender` -- `source=accounts | rare age by gender` - -**Top** -- `source=accounts | top gender` -- `source=accounts | top 1 gender` -- `source=accounts | top 1 age by gender` - -**Parse** -- `source=accounts | parse email '.+@(?.+)' | fields email, host ` -- `source=accounts | parse email '.+@(?.+)' | top 1 host ` -- `source=accounts | parse email '.+@(?.+)' | stats count() by host` -- `source=accounts | parse email '.+@(?.+)' | eval eval_result=1 | fields host, eval_result` -- `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` -- `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street` - -**Grok** -- `source=accounts | grok email '.+@%{HOSTNAME:host}' | top 1 host` -- `source=accounts | grok email '.+@%{HOSTNAME:host}' | stats count() by host` -- `source=accounts | grok email '.+@%{HOSTNAME:host}' | eval eval_result=1 | fields host, eval_result` -- `source=accounts | grok email '.+@%{HOSTNAME:host}' | eval eval_result=1 | fields host, eval_result` -- `source=accounts | grok street_address '%{NUMBER} %{GREEDYDATA:address}' | fields address ` -- `source=logs | grok message '%{COMMONAPACHELOG}' | fields COMMONAPACHELOG, timestamp, response, bytes` - -**Patterns** -- `source=accounts | patterns email | fields email, patterns_field ` -- `source=accounts | patterns email | where age > 45 | sort - age | fields email, patterns_field` -- `source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | fields message, no_numbers` -- `source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | stats count() by no_numbers` - -**Rename** -- `source=accounts | rename email as user_email | fields id, user_email` -- `source=accounts | rename id as user_id, email as user_email | fields user_id, user_email` - -_- **Limitation: Overriding existing field is unsupported:**_ - - `source=accounts | grok address '%{NUMBER} %{GREEDYDATA:address}' | fields address` - -**Join** -- `source = table1 | inner join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` -- `source = table1 | left join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` -- `source = table1 | right join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` -- `source = table1 | full left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c` -- `source = table1 | cross join left = l right = r table2` -- `source = table1 | left semi join left = l right = r on l.a = r.a table2` -- `source = table1 | left anti join left = l right = r on l.a = r.a table2` - -_- **Limitation: sub-searches is unsupported in join right side now**_ - -Details of Join command, see [PPL-Join-Command](../docs/PPL-Join-command.md) - -**Lookup** -- `source = table1 | lookup table2 id` -- `source = table1 | lookup table2 id, name` -- `source = table1 | lookup table2 id as cid, name` -- `source = table1 | lookup table2 id as cid, name replace dept as department` -- `source = table1 | lookup table2 id as cid, name replace dept as department, city as location` -- `source = table1 | lookup table2 id as cid, name append dept as department` -- `source = table1 | lookup table2 id as cid, name append dept as department, city as location` -- `source = table1 | lookup table2 id as cid, name replace dept` (dept without "as" is unsupported) - -_- **Limitation: "REPLACE" or "APPEND" clause must contain "AS"**_ - -Details of Lookup command syntax, see [PPL-Lookup-Command](../docs/PPL-Lookup-command.md) - -**InSubquery** -- `source = outer | where a in [ source = inner | fields b ]` -- `source = outer | where (a) in [ source = inner | fields b ]` -- `source = outer | where (a,b,c) in [ source = inner | fields d,e,f ]` -- `source = outer | where a not in [ source = inner | fields b ]` -- `source = outer | where (a) not in [ source = inner | fields b ]` -- `source = outer | where (a,b,c) not in [ source = inner | fields d,e,f ]` -- `source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ]` (nested) -- `source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c` (as join filter) - -SQL Migration examples with IN-Subquery PPL: -1. tpch q4 (in-subquery with aggregation) -```sql -select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= date '1993-07-01' - and o_orderdate < date '1993-07-01' + interval '3' month - and o_orderkey in ( - select - l_orderkey - from - lineitem - where l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority -``` -Rewritten by PPL InSubquery query: -```sql -source = orders -| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01" and o_orderkey IN - [ source = lineitem - | where l_commitdate < l_receiptdate - | fields l_orderkey - ] -| stats count(1) as order_count by o_orderpriority -| sort o_orderpriority -| fields o_orderpriority, order_count -``` -2.tpch q20 (nested in-subquery) -```sql -select - s_name, - s_address -from - supplier, - nation -where - s_suppkey in ( - select - ps_suppkey - from - partsupp - where - ps_partkey in ( - select - p_partkey - from - part - where - p_name like 'forest%' - ) - ) - and s_nationkey = n_nationkey - and n_name = 'CANADA' -order by - s_name -``` -Rewritten by PPL InSubquery query: -```sql -source = supplier -| where s_suppkey IN [ - source = partsupp - | where ps_partkey IN [ - source = part - | where like(p_name, "forest%") - | fields p_partkey - ] - | fields ps_suppkey - ] -| inner join left=l right=r on s_nationkey = n_nationkey and n_name = 'CANADA' - nation -| sort s_name -``` - ---- -#### Experimental Commands: -- `correlation` - [See details](../docs/PPL-Correlation-command.md) -> This is an experimental command - it may be removed in future versions - --- ### Documentations -For additional details on PPL commands, see [PPL Commands Docs](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/index.rst) +For additional details on PPL commands, see [PPL Commands Docs](../docs/ppl-lang/README.md) + +For additional details on Spark PPL Architecture, see [PPL Architecture](../docs/ppl-lang/PPL-on-Spark.md) For additional details on Spark PPL commands project, see [PPL Project](https://github.com/orgs/opensearch-project/projects/214/views/2) -For additional details on Spark PPL commands support campaign, see [PPL Commands Campaign](https://github.com/opensearch-project/opensearch-spark/issues/408) \ No newline at end of file From b1791fff1e076f7af4682ce46aae2a7d213b1ef1 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 3 Oct 2024 17:23:04 -0700 Subject: [PATCH 2/8] update documentation with specifications markdown pages including ppl expressions Signed-off-by: YANGDB --- docs/ppl-lang/README.md | 16 +- docs/ppl-lang/functions/ppl-condition.md | 171 ++ docs/ppl-lang/functions/ppl-conversion.md | 65 + docs/ppl-lang/functions/ppl-datetime.md | 2033 ++++++++++++++++++++ docs/ppl-lang/functions/ppl-expressions.md | 138 ++ docs/ppl-lang/functions/ppl-math.md | 717 +++++++ docs/ppl-lang/functions/ppl-string.md | 253 +++ 7 files changed, 3383 insertions(+), 10 deletions(-) create mode 100644 docs/ppl-lang/functions/ppl-condition.md create mode 100644 docs/ppl-lang/functions/ppl-conversion.md create mode 100644 docs/ppl-lang/functions/ppl-datetime.md create mode 100644 docs/ppl-lang/functions/ppl-expressions.md create mode 100644 docs/ppl-lang/functions/ppl-math.md create mode 100644 docs/ppl-lang/functions/ppl-string.md diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md index 30f1e0998..f561e11c2 100644 --- a/docs/ppl-lang/README.md +++ b/docs/ppl-lang/README.md @@ -65,21 +65,17 @@ source=accounts * **Functions** - - `Expressions `_ + - [`Expressions`](functions/ppl-expressions.md) - - `Math Functions `_ + - [`Math Functions`](functions/ppl-math.md) - - `Date and Time Functions `_ + - [`Date and Time Functions`](functions/ppl-datetime.md) - - `String Functions `_ + - [`String Functions`](functions/ppl-string.md) - - `Condition Functions `_ + - [`Condition Functions`](functions/ppl-condition.md) - - `Relevance Functions `_ - - - `Type Conversion Functions `_ - - - `System Functions `_ + - [`Type Conversion Functions`](functions/ppl-conversion.md) --- diff --git a/docs/ppl-lang/functions/ppl-condition.md b/docs/ppl-lang/functions/ppl-condition.md new file mode 100644 index 000000000..bcb91e961 --- /dev/null +++ b/docs/ppl-lang/functions/ppl-condition.md @@ -0,0 +1,171 @@ +## PPL Condition Functions + +### `ISNULL` + +**Description** + +`isnull(field)` return true if field is null. + +**Argument type:** + - all the supported data type. + - Return type: **BOOLEAN** + +Example: + + os> source=accounts | eval result = isnull(employer) | fields result, employer, firstname + fetched rows / total rows = 4/4 + +----------+------------+-------------+ + | result | employer | firstname | + |----------+------------+-------------| + | False | Pyrami | Amber | + | False | Netagy | Hattie | + | False | Quility | Nanette | + | True | null | Dale | + +----------+------------+-------------+ + +### `ISNOTNULL` + +**Description** + +`isnotnull(field)` return true if field is not null. + +**Argument type:** + - all the supported data type. + - Return type: **BOOLEAN** + +Example: + + os> source=accounts | where not isnotnull(employer) | fields account_number, employer + fetched rows / total rows = 1/1 + +------------------+------------+ + | account_number | employer | + |------------------+------------| + | 18 | null | + +------------------+------------+ + +### `EXISTS` + + os> source=accounts | where isnull(email) | fields account_number, email + fetched rows / total rows = 1/1 + +------------------+---------+ + | account_number | email | + |------------------+---------| + | 13 | null | + +------------------+---------+ + +### `IFNULL` + +**Description** + +`ifnull(field1, field2)` return field2 if field1 is null. + +**Argument type:** + - all the supported data type, (NOTE : if two parameters has different type, you will fail semantic check.) + - Return type: **any** + +Example: + + os> source=accounts | eval result = ifnull(employer, 'default') | fields result, employer, firstname + fetched rows / total rows = 4/4 + +----------+------------+-------------+ + | result | employer | firstname | + |----------+------------+-------------| + | Pyrami | Pyrami | Amber | + | Netagy | Netagy | Hattie | + | Quility | Quility | Nanette | + | default | null | Dale | + +----------+------------+-------------+ + +### `NULLIF` + +**Description** + +`nullif(field1, field2)` return null if two parameters are same, otherwiser return field1. + +**Argument type:** + + - all the supported data type, (NOTE : if two parameters has different type, if two parameters has different type, you will fail semantic check) + - Return type: **any** + +Example: + + os> source=accounts | eval result = nullif(employer, 'Pyrami') | fields result, employer, firstname + fetched rows / total rows = 4/4 + +----------+------------+-------------+ + | result | employer | firstname | + |----------+------------+-------------| + | null | Pyrami | Amber | + | Netagy | Netagy | Hattie | + | Quility | Quility | Nanette | + | null | null | Dale | + +----------+------------+-------------+ + + +### `ISNULL` + +**Description** + +`isnull(field1, field2)` return null if two parameters are same, otherwise return field1. + +**Argument type:** + - all the supported data type + - Return type: **any** + +Example: + + os> source=accounts | eval result = isnull(employer) | fields result, employer, firstname + fetched rows / total rows = 4/4 + +----------+------------+-------------+ + | result | employer | firstname | + |----------+------------+-------------| + | False | Pyrami | Amber | + | False | Netagy | Hattie | + | False | Quility | Nanette | + | True | null | Dale | + +----------+------------+-------------+ + +### `IF` + +**Description** + +`if(condition, expr1, expr2)` return expr1 if condition is true, otherwiser return expr2. + +**Argument type:** + + - all the supported data type, (NOTE : if expr1 and expr2 are different type, you will fail semantic check + - Return type: **any** + +Example: + + os> source=accounts | eval result = if(true, firstname, lastname) | fields result, firstname, lastname + fetched rows / total rows = 4/4 + +----------+-------------+------------+ + | result | firstname | lastname | + |----------+-------------+------------| + | Amber | Amber | Duke | + | Hattie | Hattie | Bond | + | Nanette | Nanette | Bates | + | Dale | Dale | Adams | + +----------+-------------+------------+ + + os> source=accounts | eval result = if(false, firstname, lastname) | fields result, firstname, lastname + fetched rows / total rows = 4/4 + +----------+-------------+------------+ + | result | firstname | lastname | + |----------+-------------+------------| + | Duke | Amber | Duke | + | Bond | Hattie | Bond | + | Bates | Nanette | Bates | + | Adams | Dale | Adams | + +----------+-------------+------------+ + + os> source=accounts | eval is_vip = if(age > 30 AND isnotnull(employer), true, false) | fields is_vip, firstname, lastname + fetched rows / total rows = 4/4 + +----------+-------------+------------+ + | is_vip | firstname | lastname | + |----------+-------------+------------| + | True | Amber | Duke | + | True | Hattie | Bond | + | False | Nanette | Bates | + | False | Dale | Adams | + +----------+-------------+------------+ diff --git a/docs/ppl-lang/functions/ppl-conversion.md b/docs/ppl-lang/functions/ppl-conversion.md new file mode 100644 index 000000000..48e4106ca --- /dev/null +++ b/docs/ppl-lang/functions/ppl-conversion.md @@ -0,0 +1,65 @@ +## PPL Type Conversion Functions + +### `CAST` + +**Description** + +`cast(expr as dateType)` cast the expr to dataType. return the value of dataType. The following conversion rules are used: + +``` ++------------+--------+--------+---------+-------------+--------+--------+ +| Src/Target | STRING | NUMBER | BOOLEAN | TIMESTAMP | DATE | TIME | ++------------+--------+--------+---------+-------------+--------+--------+ +| STRING | | Note1 | Note1 | TIMESTAMP() | DATE() | TIME() | ++------------+--------+--------+---------+-------------+--------+--------+ +| NUMBER | Note1 | | v!=0 | N/A | N/A | N/A | ++------------+--------+--------+---------+-------------+--------+--------+ +| BOOLEAN | Note1 | v?1:0 | | N/A | N/A | N/A | ++------------+--------+--------+---------+-------------+--------+--------+ +| TIMESTAMP | Note1 | N/A | N/A | | DATE() | TIME() | ++------------+--------+--------+---------+-------------+--------+--------+ +| DATE | Note1 | N/A | N/A | N/A | | N/A | ++------------+--------+--------+---------+-------------+--------+--------+ +| TIME | Note1 | N/A | N/A | N/A | N/A | | ++------------+--------+--------+---------+-------------+--------+--------+ +``` + +Cast to **string** example: + + os> source=people | eval `cbool` = CAST(true as string), `cint` = CAST(1 as string), `cdate` = CAST(CAST('2012-08-07' as date) as string) | fields `cbool`, `cint`, `cdate` + fetched rows / total rows = 1/1 + +---------+--------+------------+ + | cbool | cint | cdate | + |---------+--------+------------| + | true | 1 | 2012-08-07 | + +---------+--------+------------+ + +Cast to **number** example: + + os> source=people | eval `cbool` = CAST(true as int), `cstring` = CAST('1' as int) | fields `cbool`, `cstring` + fetched rows / total rows = 1/1 + +---------+-----------+ + | cbool | cstring | + |---------+-----------| + | 1 | 1 | + +---------+-----------+ + +Cast to **date** example: + + os> source=people | eval `cdate` = CAST('2012-08-07' as date), `ctime` = CAST('01:01:01' as time), `ctimestamp` = CAST('2012-08-07 01:01:01' as timestamp) | fields `cdate`, `ctime`, `ctimestamp` + fetched rows / total rows = 1/1 + +------------+----------+---------------------+ + | cdate | ctime | ctimestamp | + |------------+----------+---------------------| + | 2012-08-07 | 01:01:01 | 2012-08-07 01:01:01 | + +------------+----------+---------------------+ + +Cast function can be **chained**: + + os> source=people | eval `cbool` = CAST(CAST(true as string) as boolean) | fields `cbool` + fetched rows / total rows = 1/1 + +---------+ + | cbool | + |---------| + | True | + +---------+ diff --git a/docs/ppl-lang/functions/ppl-datetime.md b/docs/ppl-lang/functions/ppl-datetime.md new file mode 100644 index 000000000..d3ca272e3 --- /dev/null +++ b/docs/ppl-lang/functions/ppl-datetime.md @@ -0,0 +1,2033 @@ +## PPL Date and Time Functions + +### `ADDDATE` + +**Description:** + + +**Usage:** adddate(date, INTERVAL expr unit) / adddate(date, days) adds the interval of second argument to date; adddate(date, days) adds the second argument as integer number of days to date. +If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. + +Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG + +**Return type map:** + +(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP + +(DATE, LONG) -> DATE + +(TIMESTAMP/TIME, LONG) -> TIMESTAMP + +Synonyms: `DATE_ADD`_ when invoked with the INTERVAL form of the second argument. + +Antonyms: `SUBDATE`_ + +Example: + + os> source=people | eval `'2020-08-26' + 1h` = ADDDATE(DATE('2020-08-26'), INTERVAL 1 HOUR), `'2020-08-26' + 1` = ADDDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' + 1` = ADDDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2020-08-26' + 1h`, `'2020-08-26' + 1`, `ts '2020-08-26 01:01:01' + 1` + fetched rows / total rows = 1/1 + +---------------------+--------------------+--------------------------------+ + | '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | + |---------------------+--------------------+--------------------------------| + | 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | + +---------------------+--------------------+--------------------------------+ + + + +### `ADDTIME` + +**Description:** + + +**Usage:** addtime(expr1, expr2) adds expr2 to expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. + +Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME + +**Return type map:** + +(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP + +(TIME, DATE/TIMESTAMP/TIME) -> TIME + +Antonyms: `SUBTIME`_ + +Example: + + os> source=people | eval `'2008-12-12' + 0` = ADDTIME(DATE('2008-12-12'), DATE('2008-11-15')) | fields `'2008-12-12' + 0` + fetched rows / total rows = 1/1 + +---------------------+ + | '2008-12-12' + 0 | + |---------------------| + | 2008-12-12 00:00:00 | + +---------------------+ + + os> source=people | eval `'23:59:59' + 0` = ADDTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' + 0` + fetched rows / total rows = 1/1 + +------------------+ + | '23:59:59' + 0 | + |------------------| + | 23:59:59 | + +------------------+ + + os> source=people | eval `'2004-01-01' + '23:59:59'` = ADDTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' + '23:59:59'` + fetched rows / total rows = 1/1 + +-----------------------------+ + | '2004-01-01' + '23:59:59' | + |-----------------------------| + | 2004-01-01 23:59:59 | + +-----------------------------+ + + os> source=people | eval `'10:20:30' + '00:05:42'` = ADDTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' + '00:05:42'` + fetched rows / total rows = 1/1 + +---------------------------+ + | '10:20:30' + '00:05:42' | + |---------------------------| + | 10:26:12 | + +---------------------------+ + + os> source=people | eval `'2007-02-28 10:20:30' + '20:40:50'` = ADDTIME(TIMESTAMP('2007-02-28 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-02-28 10:20:30' + '20:40:50'` + fetched rows / total rows = 1/1 + +--------------------------------------+ + | '2007-02-28 10:20:30' + '20:40:50' | + |--------------------------------------| + | 2007-03-01 07:01:20 | + +--------------------------------------+ + + +### `CONVERT_TZ` + + +**Description:** + + +**Usage:** convert_tz(timestamp, from_timezone, to_timezone) constructs a local timestamp converted from the from_timezone to the to_timezone. CONVERT_TZ returns null when any of the three function arguments are invalid, i.e. timestamp is not in the format yyyy-MM-dd HH:mm:ss or the timeszone is not in (+/-)HH:mm. It also is invalid for invalid dates, such as February 30th and invalid timezones, which are ones outside of -13:59 and +14:00. + +Argument type: TIMESTAMP, STRING, STRING + +Return type: TIMESTAMP + +Conversion from +00:00 timezone to +10:00 timezone. Returns the timestamp argument converted from +00:00 to +10:00 + +Example: + + os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | fields `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | + |-------------------------------------------------------| + | 2008-05-15 22:00:00 | + +-------------------------------------------------------+ + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +15:00 in this example will return null. + +Example: + + os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+15:00')| fields `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-05-15 12:00:00','+00:00','+15:00') | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ + +Conversion from a positive timezone to a negative timezone that goes over date line. + +Example: + + os> source=people | eval `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` = convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | + |-------------------------------------------------------| + | 2008-05-14 22:30:00 | + +-------------------------------------------------------+ + +Valid dates are required in convert_tz, invalid dates such as April 31st (not a date in the Gregorian calendar) will result in null. + +Example: + + os> source=people | eval `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` = convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ + +Valid dates are required in convert_tz, invalid dates such as February 30th (not a date in the Gregorian calendar) will result in null. + +Example: + + os> source=people | eval `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ + +February 29th 2008 is a valid date because it is a leap year. + +Example: + + os> source=people | eval `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | + |-------------------------------------------------------| + | 2008-02-28 22:30:00 | + +-------------------------------------------------------+ + +Valid dates are required in convert_tz, invalid dates such as February 29th 2007 (2007 is not a leap year) will result in null. + +Example: + + os> source=people | eval `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:01 in this example will return null. + +Example: + + os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:00 in this example will return a correctly converted date time object. + +Example: + + os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | + |-------------------------------------------------------| + | 2008-01-31 22:00:00 | + +-------------------------------------------------------+ + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as -14:00 will result in null + +Example: + + os> source=people | eval `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. This timezone is within range so it is valid and will convert the time. + +Example: + + os> source=people | eval `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` = convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` + fetched rows / total rows = 1/1 + +-------------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | + |-------------------------------------------------------| + | 2008-02-02 01:59:00 | + +-------------------------------------------------------+ + + +### `CURDATE` + +**Description:** + + +Returns the current time as a value in 'YYYY-MM-DD'. +`CURDATE()` returns the time at which it executes as `SYSDATE() <#sysdate>`_ does. + +Return type: DATE + +Specification: CURDATE() -> DATE + +Example: + + > source=people | eval `CURDATE()` = CURDATE() | fields `CURDATE()` + fetched rows / total rows = 1/1 + +-------------+ + | CURDATE() | + |-------------| + | 2022-08-02 | + +-------------+ + + +### `CURRENT_DATE` + + +**Description:** + + +`CURRENT_DATE()` are synonyms for `CURDATE() <#curdate>`_. + +Example: + + > source=people | eval `CURRENT_DATE()` = CURRENT_DATE() | fields `CURRENT_DATE()` + fetched rows / total rows = 1/1 + +------------------+ + | CURRENT_DATE() | + |------------------+ + | 2022-08-02 | + +------------------+ + + +### `CURRENT_TIME` + +**Description:** + + +`CURRENT_TIME()` are synonyms for `CURTIME() <#curtime>`_. + +Example: + + > source=people | eval `CURRENT_TIME()` = CURRENT_TIME() | fields `CURRENT_TIME()` + fetched rows / total rows = 1/1 + +------------------+ + | CURRENT_TIME() | + |------------------+ + | 15:39:05 | + +------------------+ + + +### `CURRENT_TIMESTAMP` + +**Description:** + + +`CURRENT_TIMESTAMP()` are synonyms for `NOW() <#now>`_. + +Example: + + > source=people | eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() | fields `CURRENT_TIMESTAMP()` + fetched rows / total rows = 1/1 + +-----------------------+ + | CURRENT_TIMESTAMP() | + |-----------------------+ + | 2022-08-02 15:54:19 | + +-----------------------+ + + +### `CURTIME` + +**Description:** + + +Returns the current time as a value in 'hh:mm:ss'. +`CURTIME()` returns the time at which the statement began to execute as `NOW() <#now>`_ does. + +Return type: TIME + +Specification: CURTIME() -> TIME + +Example: + + > source=people | eval `value_1` = CURTIME(), `value_2` = CURTIME() | fields `value_1`, `value_2` + fetched rows / total rows = 1/1 + +-----------+-----------+ + | value_1 | value_2 | + |-----------+-----------| + | 15:39:05 | 15:39:05 | + +-----------+-----------+ + + +### `DATE` + +**Description:** + + +**Usage:** date(expr) constructs a date type with the input string expr as a date. If the argument is of date/timestamp, it extracts the date value part from the expression. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: DATE + +Example: + + os> source=people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')` + fetched rows / total rows = 1/1 + +----------------------+ + | DATE('2020-08-26') | + |----------------------| + | 2020-08-26 | + +----------------------+ + + os> source=people | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))` + fetched rows / total rows = 1/1 + +------------------------------------------+ + | DATE(TIMESTAMP('2020-08-26 13:49:00')) | + |------------------------------------------| + | 2020-08-26 | + +------------------------------------------+ + + os> source=people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')` + fetched rows / total rows = 1/1 + +----------------------------+ + | DATE('2020-08-26 13:49') | + |----------------------------| + | 2020-08-26 | + +----------------------------+ + + os> source=people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')` + fetched rows / total rows = 1/1 + +----------------------------+ + | DATE('2020-08-26 13:49') | + |----------------------------| + | 2020-08-26 | + +----------------------------+ + + +### `DATE_ADD` + +**Description:** + + +**Usage:** date_add(date, INTERVAL expr unit) adds the interval expr to date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. + +Argument type: DATE/TIMESTAMP/TIME, INTERVAL + +Return type: TIMESTAMP + +Synonyms: `ADDDATE`_ + +Antonyms: `DATE_SUB`_ + +Example: + + os> source=people | eval `'2020-08-26' + 1h` = DATE_ADD(DATE('2020-08-26'), INTERVAL 1 HOUR), `ts '2020-08-26 01:01:01' + 1d` = DATE_ADD(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 DAY) | fields `'2020-08-26' + 1h`, `ts '2020-08-26 01:01:01' + 1d` + fetched rows / total rows = 1/1 + +---------------------+---------------------------------+ + | '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | + |---------------------+---------------------------------| + | 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | + +---------------------+---------------------------------+ + + +### `DATE_FORMAT` + + +**Description:** + + +**Usage:** date_format(date, format) formats the date argument using the specifiers in the format argument. +If an argument of type TIME is provided, the local date is used. + +| Specifier | **Description:** | +|-----------|------------------| +| %a | Abbreviated weekday name (Sun..Sat) | +| %b | Abbreviated month name (Jan..Dec) | +| %c | Month, numeric (0..12) | +| %D | Day of the month with English suffix (0th, 1st, 2nd, 3rd, ...) | +| %d | Day of the month, numeric (00..31) | +| %e | Day of the month, numeric (0..31) | +| %f | Microseconds (000000..999999) | +| %H | Hour (00..23) | +| %h | Hour (01..12) | +| %I | Hour (01..12) | +| %i | Minutes, numeric (00..59) | +| %j | Day of year (001..366) | +| %k | Hour (0..23) | +| %l | Hour (1..12) | +| %M | Month name (January..December) | +| %m | Month, numeric (00..12) | +| %p | AM or PM | +| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | +| %S | Seconds (00..59) | +| %s | Seconds (00..59) | +| %T | Time, 24-hour (hh:mm:ss) | +| %U | Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 | +| %u | Week (00..53), where Monday is the first day of the week; WEEK() mode 1 | +| %V | Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with %X | +| %v | Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with %x | +| %W | Weekday name (Sunday..Saturday) | +| %w | Day of the week (0=Sunday..6=Saturday) | +| %X | Year for the week where Sunday is the first day of the week, numeric, four digits; used with %V | +| %x | Year for the week, where Monday is the first day of the week, numeric, four digits; used with %v | +| %Y | Year, numeric, four digits | +| %y | Year, numeric (two digits) | +| %% | A literal % character | +| %x | x, for any “x” not listed above | +| x | x, for any smallcase/uppercase alphabet except [aydmshiHIMYDSEL] | + +Argument type: STRING/DATE/TIME/TIMESTAMP, STRING + +Return type: STRING + +Example: + + os> source=people | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')` = DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` + fetched rows / total rows = 1/1 + +------------------------------------------------------+-----------------------------------------------------------------------+ + | DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | + |------------------------------------------------------+-----------------------------------------------------------------------| + | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | + +------------------------------------------------------+-----------------------------------------------------------------------+ + + +### `DATETIME` + + +**Description:** + +**Usage:** `DATETIME(timestamp)/ DATETIME(date, to_timezone)` Converts the datetime to a new timezone + +Argument type: timestamp/STRING + +**Return type map:** + +(TIMESTAMP, STRING) -> TIMESTAMP + +(TIMESTAMP) -> TIMESTAMP + + +Converting timestamp with timezone to the second argument timezone. + +Example: + + os> source=people | eval `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` = DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | fields `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` + fetched rows / total rows = 1/1 + +---------------------------------------------------+ + | DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | + |---------------------------------------------------| + | 2004-02-29 19:00:00 | + +---------------------------------------------------+ + + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. + +Example: + + os> source=people | eval `DATETIME('2008-01-01 02:00:00', '-14:00')` = DATETIME('2008-01-01 02:00:00', '-14:00') | fields `DATETIME('2008-01-01 02:00:00', '-14:00')` + fetched rows / total rows = 1/1 + +---------------------------------------------+ + | DATETIME('2008-01-01 02:00:00', '-14:00') | + |---------------------------------------------| + | null | + +---------------------------------------------+ + + +### `DATE_SUB` + + +**Description:** + + +**Usage:** date_sub(date, INTERVAL expr unit) subtracts the interval expr from date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. + +Argument type: DATE/TIMESTAMP/TIME, INTERVAL + +Return type: TIMESTAMP + +Synonyms: `SUBDATE`_ + +Antonyms: `DATE_ADD`_ + +Example: + + os> source=people | eval `'2008-01-02' - 31d` = DATE_SUB(DATE('2008-01-02'), INTERVAL 31 DAY), `ts '2020-08-26 01:01:01' + 1h` = DATE_SUB(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 HOUR) | fields `'2008-01-02' - 31d`, `ts '2020-08-26 01:01:01' + 1h` + fetched rows / total rows = 1/1 + +----------------------+---------------------------------+ + | '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | + |----------------------+---------------------------------| + | 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | + +----------------------+---------------------------------+ + + +### `DATEDIFF` + +**Usage:** Calculates the difference of date parts of given values. If the first argument is time, today's date is used. + +Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME + +Return type: LONG + +Example: + + os> source=people | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')), `today - today` = DATEDIFF(TIME('23:59:59'), TIME('00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`, `today - today` + fetched rows / total rows = 1/1 + +-------------------------------+-------------------------------+-----------------+ + | '2000-01-02' - '2000-01-01' | '2001-02-01' - '2004-01-01' | today - today | + |-------------------------------+-------------------------------+-----------------| + | 1 | -1064 | 0 | + +-------------------------------+-------------------------------+-----------------+ + + +### `DAY` + +**Description:** + + +**Usage:** day(date) extracts the day of the month for date, in the range 1 to 31. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `DAYOFMONTH`_, `DAY_OF_MONTH`_ + +Example: + + os> source=people | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +---------------------------+ + | DAY(DATE('2020-08-26')) | + |---------------------------| + | 26 | + +---------------------------+ + + +### `DAYNAME` + +**Description:** + + +**Usage:** + +`dayname(date)` returns the name of the weekday for date, including Monday, Tuesday, Wednesday, Thursday, Friday, Saturday and Sunday. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: STRING + +Example: + + os> source=people | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +-------------------------------+ + | DAYNAME(DATE('2020-08-26')) | + |-------------------------------| + | Wednesday | + +-------------------------------+ + + +### `DAYOFMONTH` + +**Description:** + + +**Usage:** + +`dayofmonth(date)` extracts the day of the month for date, in the range 1 to 31. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `DAY`_, `DAY_OF_MONTH`_ + +Example: + + os> source=people | eval `DAYOFMONTH(DATE('2020-08-26'))` = DAYOFMONTH(DATE('2020-08-26')) | fields `DAYOFMONTH(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +----------------------------------+ + | DAYOFMONTH(DATE('2020-08-26')) | + |----------------------------------| + | 26 | + +----------------------------------+ + + +### `DAY_OF_MONTH` + +**Description:** + + +**Usage:** + +`day_of_month(date)` extracts the day of the month for date, in the range 1 to 31. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `DAY`_, `DAYOFMONTH`_ + +Example: + + os> source=people | eval `DAY_OF_MONTH(DATE('2020-08-26'))` = DAY_OF_MONTH(DATE('2020-08-26')) | fields `DAY_OF_MONTH(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +------------------------------------+ + | DAY_OF_MONTH(DATE('2020-08-26')) | + |------------------------------------| + | 26 | + +------------------------------------+ + + +### `DAYOFWEEK` + +**Description:** + + +**Usage:** + +`dayofweek(date)` returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `DAY_OF_WEEK`_ + +Example: + + os> source=people | eval `DAYOFWEEK(DATE('2020-08-26'))` = DAYOFWEEK(DATE('2020-08-26')) | fields `DAYOFWEEK(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +---------------------------------+ + | DAYOFWEEK(DATE('2020-08-26')) | + |---------------------------------| + | 4 | + +---------------------------------+ + + +### `DAY_OF_WEEK` + + +**Description:** + + +**Usage:** day_of_week(date) returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `DAYOFWEEK`_ + +Example: + + os> source=people | eval `DAY_OF_WEEK(DATE('2020-08-26'))` = DAY_OF_WEEK(DATE('2020-08-26')) | fields `DAY_OF_WEEK(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +-----------------------------------+ + | DAY_OF_WEEK(DATE('2020-08-26')) | + |-----------------------------------| + | 4 | + +-----------------------------------+ + + +### `DAYOFYEAR` + +**Description:** + + +**Usage:** + +`dayofyear(date)` returns the day of the year for date, in the range 1 to 366. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `DAY_OF_YEAR`_ + +Example: + + os> source=people | eval `DAYOFYEAR(DATE('2020-08-26'))` = DAYOFYEAR(DATE('2020-08-26')) | fields `DAYOFYEAR(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +---------------------------------+ + | DAYOFYEAR(DATE('2020-08-26')) | + |---------------------------------| + | 239 | + +---------------------------------+ + + +### `DAY_OF_YEAR` + +**Description:** + + +**Usage:** day_of_year(date) returns the day of the year for date, in the range 1 to 366. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `DAYOFYEAR`_ + +Example: + + os> source=people | eval `DAY_OF_YEAR(DATE('2020-08-26'))` = DAY_OF_YEAR(DATE('2020-08-26')) | fields `DAY_OF_YEAR(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +-----------------------------------+ + | DAY_OF_YEAR(DATE('2020-08-26')) | + |-----------------------------------| + | 239 | + +-----------------------------------+ + + +### `EXTRACT` + +**Description:** + + +**Usage:** + +extract(part FROM date) returns a LONG with digits in order according to the given 'part' arguments. +The specific format of the returned long is determined by the table below. + +Argument type: PART, where PART is one of the following tokens in the table below. + +The format specifiers found in this table are the same as those found in the `DATE_FORMAT`_ function. + +| Part | Format | +|----------------------|---------------| +| MICROSECOND | %f | +| SECOND | %s | +| MINUTE | %i | +| HOUR | %H | +| DAY | %d | +| WEEK | %X | +| MONTH | %m | +| YEAR | %V | +| SECOND_MICROSECOND | %s%f | +| MINUTE_MICROSECOND | %i%s%f | +| MINUTE_SECOND | %i%s | +| HOUR_MICROSECOND | %H%i%s%f | +| HOUR_SECOND | %H%i%s | +| HOUR_MINUTE | %H%i | +| DAY_MICROSECOND | %d%H%i%s%f | +| DAY_SECOND | %d%H%i%s | +| DAY_MINUTE | %d%H%i | +| DAY_HOUR | %d%H% | +| YEAR_MONTH | %V%m | + + +Return type: LONG + +Example: + + os> source=people | eval `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` = extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | fields `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` + fetched rows / total rows = 1/1 + +--------------------------------------------------+ + | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | + |--------------------------------------------------| + | 202302 | + +--------------------------------------------------+ + + +### `FROM_DAYS` + +**Description:** + + +**Usage:** from_days(N) returns the date value given the day number N. + +Argument type: INTEGER/LONG + +Return type: DATE + +Example: + + os> source=people | eval `FROM_DAYS(733687)` = FROM_DAYS(733687) | fields `FROM_DAYS(733687)` + fetched rows / total rows = 1/1 + +---------------------+ + | FROM_DAYS(733687) | + |---------------------| + | 2008-10-07 | + +---------------------+ + + +### `FROM_UNIXTIME` + +**Description:** + + +**Usage:** + +Returns a representation of the argument given as a timestamp or character string value. Perform reverse conversion for `UNIX_TIMESTAMP`_ function. +If second argument is provided, it is used to format the result in the same way as the format string used for the `DATE_FORMAT`_ function. +If timestamp is outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. +Argument type: DOUBLE, STRING + +**Return type map:** + +DOUBLE -> TIMESTAMP + +DOUBLE, STRING -> STRING + +Examples: + + os> source=people | eval `FROM_UNIXTIME(1220249547)` = FROM_UNIXTIME(1220249547) | fields `FROM_UNIXTIME(1220249547)` + fetched rows / total rows = 1/1 + +-----------------------------+ + | FROM_UNIXTIME(1220249547) | + |-----------------------------| + | 2008-09-01 06:12:27 | + +-----------------------------+ + + os> source=people | eval `FROM_UNIXTIME(1220249547, '%T')` = FROM_UNIXTIME(1220249547, '%T') | fields `FROM_UNIXTIME(1220249547, '%T')` + fetched rows / total rows = 1/1 + +-----------------------------------+ + | FROM_UNIXTIME(1220249547, '%T') | + |-----------------------------------| + | 06:12:27 | + +-----------------------------------+ + + +### `GET_FORMAT` + + +**Description:** + + +**Usage:** + +Returns a string value containing string format specifiers based on the input arguments. + +Argument type: TYPE, STRING, where TYPE must be one of the following tokens: [DATE, TIME, TIMESTAMP], and +STRING must be one of the following tokens: ["USA", "JIS", "ISO", "EUR", "INTERNAL"] (" can be replaced by '). + +Examples: + + os> source=people | eval `GET_FORMAT(DATE, 'USA')` = GET_FORMAT(DATE, 'USA') | fields `GET_FORMAT(DATE, 'USA')` + fetched rows / total rows = 1/1 + +---------------------------+ + | GET_FORMAT(DATE, 'USA') | + |---------------------------| + | %m.%d.%Y | + +---------------------------+ + + +### `HOUR` + +**Description:** + + +**Usage:** + +hour(time) extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour(time) can be also greater than 23. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Synonyms: `HOUR_OF_DAY`_ + +Example: + + os> source=people | eval `HOUR(TIME('01:02:03'))` = HOUR(TIME('01:02:03')) | fields `HOUR(TIME('01:02:03'))` + fetched rows / total rows = 1/1 + +--------------------------+ + | HOUR(TIME('01:02:03')) | + |--------------------------| + | 1 | + +--------------------------+ + + +### `HOUR_OF_DAY` + +**Description:** + + +**Usage:** + +hour_of_day(time) extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour_of_day(time) can be also greater than 23. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Synonyms: `HOUR`_ + +Example: + + os> source=people | eval `HOUR_OF_DAY(TIME('01:02:03'))` = HOUR_OF_DAY(TIME('01:02:03')) | fields `HOUR_OF_DAY(TIME('01:02:03'))` + fetched rows / total rows = 1/1 + +---------------------------------+ + | HOUR_OF_DAY(TIME('01:02:03')) | + |---------------------------------| + | 1 | + +---------------------------------+ + + +### `LAST_DAY` + +**Usage:** + +Returns the last day of the month as a DATE for a valid argument. + +Argument type: DATE/STRING/TIMESTAMP/TIME + +Return type: DATE + +Example: + + os> source=people | eval `last_day('2023-02-06')` = last_day('2023-02-06') | fields `last_day('2023-02-06')` + fetched rows / total rows = 1/1 + +--------------------------+ + | last_day('2023-02-06') | + |--------------------------| + | 2023-02-28 | + +--------------------------+ + + +### `LOCALTIMESTAMP` + +**Description:** + +`LOCALTIMESTAMP()` are synonyms for `NOW() <#now>`_. + +Example: + + > source=people | eval `LOCALTIMESTAMP()` = LOCALTIMESTAMP() | fields `LOCALTIMESTAMP()` + fetched rows / total rows = 1/1 + +---------------------+ + | LOCALTIMESTAMP() | + |---------------------+ + | 2022-08-02 15:54:19 | + +---------------------+ + + +### `LOCALTIME` + +**Description:** + + +`LOCALTIME()` are synonyms for `NOW() <#now>`_. + +Example: + + > source=people | eval `LOCALTIME()` = LOCALTIME() | fields `LOCALTIME()` + fetched rows / total rows = 1/1 + +---------------------+ + | LOCALTIME() | + |---------------------+ + | 2022-08-02 15:54:19 | + +---------------------+ + + +### `MAKEDATE` + + +**Description:** + + +Returns a date, given `year` and `day-of-year` values. `dayofyear` must be greater than 0 or the result is `NULL`. The result is also `NULL` if either argument is `NULL`. +Arguments are rounded to an integer. + +**Limitations**: +- Zero `year` interpreted as 2000; +- Negative `year` is not accepted; +- `day-of-year` should be greater than zero; +- `day-of-year` could be greater than 365/366, calculation switches to the next year(s) (see example). + +**Specifications**: + +1. MAKEDATE(DOUBLE, DOUBLE) -> DATE + +Argument type: DOUBLE + +Return type: DATE + +Example: + + os> source=people | eval `MAKEDATE(1945, 5.9)` = MAKEDATE(1945, 5.9), `MAKEDATE(1984, 1984)` = MAKEDATE(1984, 1984) | fields `MAKEDATE(1945, 5.9)`, `MAKEDATE(1984, 1984)` + fetched rows / total rows = 1/1 + +-----------------------+------------------------+ + | MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | + |-----------------------+------------------------| + | 1945-01-06 | 1989-06-06 | + +-----------------------+------------------------+ + + +### `MAKETIME` + + +**Description:** + + +Returns a time value calculated from the hour, minute, and second arguments. Returns `NULL` if any of its arguments are `NULL`. +The second argument can have a fractional part, rest arguments are rounded to an integer. + +**Limitations**: +- 24-hour clock is used, available time range is [00:00:00.0 - 23:59:59.(9)]; +- Up to 9 digits of second fraction part is taken (nanosecond precision). + +**Specifications**: + +1. `MAKETIME(DOUBLE, DOUBLE, DOUBLE)` -> TIME + +Argument type: DOUBLE + +Return type: TIME + +Example: + + os> source=people | eval `MAKETIME(20, 30, 40)` = MAKETIME(20, 30, 40), `MAKETIME(20.2, 49.5, 42.100502)` = MAKETIME(20.2, 49.5, 42.100502) | fields `MAKETIME(20, 30, 40)`, `MAKETIME(20.2, 49.5, 42.100502)` + fetched rows / total rows = 1/1 + +------------------------+-----------------------------------+ + | MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | + |------------------------+-----------------------------------| + | 20:30:40 | 20:50:42.100502 | + +------------------------+-----------------------------------+ + + +### `MICROSECOND` + +**Description:** + + +**Usage:** microsecond(expr) returns the microseconds from the time or timestamp expression expr as a number in the range from 0 to 999999. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Example: + + os> source=people | eval `MICROSECOND(TIME('01:02:03.123456'))` = MICROSECOND(TIME('01:02:03.123456')) | fields `MICROSECOND(TIME('01:02:03.123456'))` + fetched rows / total rows = 1/1 + +----------------------------------------+ + | MICROSECOND(TIME('01:02:03.123456')) | + |----------------------------------------| + | 123456 | + +----------------------------------------+ + + +### `MINUTE` + +**Description:** + + +**Usage:** minute(time) returns the minute for time, in the range 0 to 59. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Synonyms: `MINUTE_OF_HOUR`_ + +Example: + + os> source=people | eval `MINUTE(TIME('01:02:03'))` = MINUTE(TIME('01:02:03')) | fields `MINUTE(TIME('01:02:03'))` + fetched rows / total rows = 1/1 + +----------------------------+ + | MINUTE(TIME('01:02:03')) | + |----------------------------| + | 2 | + +----------------------------+ + + +### `MINUTE_OF_DAY` + +**Description:** + + +**Usage:** minute(time) returns the amount of minutes in the day, in the range of 0 to 1439. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Example: + + os> source=people | eval `MINUTE_OF_DAY(TIME('01:02:03'))` = MINUTE_OF_DAY(TIME('01:02:03')) | fields `MINUTE_OF_DAY(TIME('01:02:03'))` + fetched rows / total rows = 1/1 + +-----------------------------------+ + | MINUTE_OF_DAY(TIME('01:02:03')) | + |-----------------------------------| + | 62 | + +-----------------------------------+ + + +### `MINUTE_OF_HOUR` + +**Description:** + + +**Usage:** minute(time) returns the minute for time, in the range 0 to 59. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Synonyms: `MINUTE`_ + +Example: + + os> source=people | eval `MINUTE_OF_HOUR(TIME('01:02:03'))` = MINUTE_OF_HOUR(TIME('01:02:03')) | fields `MINUTE_OF_HOUR(TIME('01:02:03'))` + fetched rows / total rows = 1/1 + +------------------------------------+ + | MINUTE_OF_HOUR(TIME('01:02:03')) | + |------------------------------------| + | 2 | + +------------------------------------+ + + +### `MONTH` + +**Description:** + +**Usage:** month(date) returns the month for date, in the range 1 to 12 for January to December. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `MONTH_OF_YEAR`_ + +Example: + + os> source=people | eval `MONTH(DATE('2020-08-26'))` = MONTH(DATE('2020-08-26')) | fields `MONTH(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +-----------------------------+ + | MONTH(DATE('2020-08-26')) | + |-----------------------------| + | 8 | + +-----------------------------+ + + +### `MONTH_OF_YEAR` + +**Description:** + + +**Usage:** month_of_year(date) returns the month for date, in the range 1 to 12 for January to December. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Synonyms: `MONTH`_ + +Example: + + os> source=people | eval `MONTH_OF_YEAR(DATE('2020-08-26'))` = MONTH_OF_YEAR(DATE('2020-08-26')) | fields `MONTH_OF_YEAR(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +-------------------------------------+ + | MONTH_OF_YEAR(DATE('2020-08-26')) | + |-------------------------------------| + | 8 | + +-------------------------------------+ + + +### `MONTHNAME` + +**Description:** + + +**Usage:** monthname(date) returns the full name of the month for date. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: STRING + +Example: + + os> source=people | eval `MONTHNAME(DATE('2020-08-26'))` = MONTHNAME(DATE('2020-08-26')) | fields `MONTHNAME(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +---------------------------------+ + | MONTHNAME(DATE('2020-08-26')) | + |---------------------------------| + | August | + +---------------------------------+ + + +### `NOW` + +**Description:** + + +Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss' format. The value is expressed in the cluster time zone. +`NOW()` returns a constant time that indicates the time at which the statement began to execute. This differs from the behavior for `SYSDATE() <#sysdate>`_, which returns the exact time at which it executes. + +Return type: TIMESTAMP + +Specification: NOW() -> TIMESTAMP + +Example: + + > source=people | eval `value_1` = NOW(), `value_2` = NOW() | fields `value_1`, `value_2` + fetched rows / total rows = 1/1 + +---------------------+---------------------+ + | value_1 | value_2 | + |---------------------+---------------------| + | 2022-08-02 15:39:05 | 2022-08-02 15:39:05 | + +---------------------+---------------------+ + + +### `PERIOD_ADD` + + +**Description:** + + +**Usage:** period_add(P, N) add N months to period P (in the format YYMM or YYYYMM). Returns a value in the format YYYYMM. + +Argument type: INTEGER, INTEGER + +Return type: INTEGER + +Example: + + os> source=people | eval `PERIOD_ADD(200801, 2)` = PERIOD_ADD(200801, 2), `PERIOD_ADD(200801, -12)` = PERIOD_ADD(200801, -12) | fields `PERIOD_ADD(200801, 2)`, `PERIOD_ADD(200801, -12)` + fetched rows / total rows = 1/1 + +-------------------------+---------------------------+ + | PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | + |-------------------------+---------------------------| + | 200803 | 200701 | + +-------------------------+---------------------------+ + + +### `PERIOD_DIFF` + +**Description:** + + +**Usage:** period_diff(P1, P2) returns the number of months between periods P1 and P2 given in the format YYMM or YYYYMM. + +Argument type: INTEGER, INTEGER + +Return type: INTEGER + +Example: + + os> source=people | eval `PERIOD_DIFF(200802, 200703)` = PERIOD_DIFF(200802, 200703), `PERIOD_DIFF(200802, 201003)` = PERIOD_DIFF(200802, 201003) | fields `PERIOD_DIFF(200802, 200703)`, `PERIOD_DIFF(200802, 201003)` + fetched rows / total rows = 1/1 + +-------------------------------+-------------------------------+ + | PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | + |-------------------------------+-------------------------------| + | 11 | -25 | + +-------------------------------+-------------------------------+ + + +### `QUARTER` + +**Description:** + + +**Usage:** quarter(date) returns the quarter of the year for date, in the range 1 to 4. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Example: + + os> source=people | eval `QUARTER(DATE('2020-08-26'))` = QUARTER(DATE('2020-08-26')) | fields `QUARTER(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +-------------------------------+ + | QUARTER(DATE('2020-08-26')) | + |-------------------------------| + | 3 | + +-------------------------------+ + + +### `SEC_TO_TIME` + +**Description:** + + +**Usage:** + +sec_to_time(number) returns the time in HH:mm:ssss[.nnnnnn] format. +Note that the function returns a time between 00:00:00 and 23:59:59. +If an input value is too large (greater than 86399), the function will wrap around and begin returning outputs starting from 00:00:00. +If an input value is too small (less than 0), the function will wrap around and begin returning outputs counting down from 23:59:59. + +Argument type: INTEGER, LONG, DOUBLE, FLOAT + +Return type: TIME + +Example: + + os> source=people | eval `SEC_TO_TIME(3601)` = SEC_TO_TIME(3601) | eval `SEC_TO_TIME(1234.123)` = SEC_TO_TIME(1234.123) | fields `SEC_TO_TIME(3601)`, `SEC_TO_TIME(1234.123)` + fetched rows / total rows = 1/1 + +---------------------+-------------------------+ + | SEC_TO_TIME(3601) | SEC_TO_TIME(1234.123) | + |---------------------+-------------------------| + | 01:00:01 | 00:20:34.123 | + +---------------------+-------------------------+ + + +### `SECOND` + +**Description:** + + +**Usage:** second(time) returns the second for time, in the range 0 to 59. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Synonyms: `SECOND_OF_MINUTE`_ + +Example: + + os> source=people | eval `SECOND(TIME('01:02:03'))` = SECOND(TIME('01:02:03')) | fields `SECOND(TIME('01:02:03'))` + fetched rows / total rows = 1/1 + +----------------------------+ + | SECOND(TIME('01:02:03')) | + |----------------------------| + | 3 | + +----------------------------+ + + +### `SECOND_OF_MINUTE` + +**Description:** + + +**Usage:** second_of_minute(time) returns the second for time, in the range 0 to 59. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: INTEGER + +Synonyms: `SECOND`_ + +Example: + + os> source=people | eval `SECOND_OF_MINUTE(TIME('01:02:03'))` = SECOND_OF_MINUTE(TIME('01:02:03')) | fields `SECOND_OF_MINUTE(TIME('01:02:03'))` + fetched rows / total rows = 1/1 + +--------------------------------------+ + | SECOND_OF_MINUTE(TIME('01:02:03')) | + |--------------------------------------| + | 3 | + +--------------------------------------+ + + +### `STR_TO_DATE` + +**Description:** + + +**Usage:** str_to_date(string, string) is used to extract a TIMESTAMP from the first argument string using the formats specified in the second argument string. +The input argument must have enough information to be parsed as a DATE, TIMESTAMP, or TIME. +Acceptable string format specifiers are the same as those used in the `DATE_FORMAT`_ function. +It returns NULL when a statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any DATE field. Otherwise, it will return a TIMESTAMP with the parsed values (as well as default values for any field that was not parsed). + +Argument type: STRING, STRING + +Return type: TIMESTAMP + +Example: + + OS> source=people | eval `str_to_date("01,5,2013", "%d,%m,%Y")` = str_to_date("01,5,2013", "%d,%m,%Y") | fields = `str_to_date("01,5,2013", "%d,%m,%Y")` + fetched rows / total rows = 1/1 + +----------------------------------------+ + | str_to_date("01,5,2013", "%d,%m,%Y") | + |----------------------------------------| + | 2013-05-01 00:00:00 | + +----------------------------------------+ + + +### `SUBDATE` + +**Description:** + + +**Usage:** subdate(date, INTERVAL expr unit) / subdate(date, days) subtracts the interval expr from date; subdate(date, days) subtracts the second argument as integer number of days from date. +If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. + +Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG + +**Return type map:** + +(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP + +(DATE, LONG) -> DATE + +(TIMESTAMP/TIME, LONG) -> TIMESTAMP + +Synonyms: `DATE_SUB`_ when invoked with the INTERVAL form of the second argument. + +Antonyms: `ADDDATE`_ + +Example: + + os> source=people | eval `'2008-01-02' - 31d` = SUBDATE(DATE('2008-01-02'), INTERVAL 31 DAY), `'2020-08-26' - 1` = SUBDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' - 1` = SUBDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2008-01-02' - 31d`, `'2020-08-26' - 1`, `ts '2020-08-26 01:01:01' - 1` + fetched rows / total rows = 1/1 + +----------------------+--------------------+--------------------------------+ + | '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | + |----------------------+--------------------+--------------------------------| + | 2007-12-02 00:00:00 | 2020-08-25 | 2020-08-25 01:01:01 | + +----------------------+--------------------+--------------------------------+ + + +### `SUBTIME` + +**Description:** + + +**Usage:** subtime(expr1, expr2) subtracts expr2 from expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. + +Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME + +**Return type map:** + +(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP + +(TIME, DATE/TIMESTAMP/TIME) -> TIME + +Antonyms: `ADDTIME`_ + +Example: + + os> source=people | eval `'2008-12-12' - 0` = SUBTIME(DATE('2008-12-12'), DATE('2008-11-15')) | fields `'2008-12-12' - 0` + fetched rows / total rows = 1/1 + +---------------------+ + | '2008-12-12' - 0 | + |---------------------| + | 2008-12-12 00:00:00 | + +---------------------+ + + os> source=people | eval `'23:59:59' - 0` = SUBTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' - 0` + fetched rows / total rows = 1/1 + +------------------+ + | '23:59:59' - 0 | + |------------------| + | 23:59:59 | + +------------------+ + + os> source=people | eval `'2004-01-01' - '23:59:59'` = SUBTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' - '23:59:59'` + fetched rows / total rows = 1/1 + +-----------------------------+ + | '2004-01-01' - '23:59:59' | + |-----------------------------| + | 2003-12-31 00:00:01 | + +-----------------------------+ + + os> source=people | eval `'10:20:30' - '00:05:42'` = SUBTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' - '00:05:42'` + fetched rows / total rows = 1/1 + +---------------------------+ + | '10:20:30' - '00:05:42' | + |---------------------------| + | 10:14:48 | + +---------------------------+ + + os> source=people | eval `'2007-03-01 10:20:30' - '20:40:50'` = SUBTIME(TIMESTAMP('2007-03-01 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-03-01 10:20:30' - '20:40:50'` + fetched rows / total rows = 1/1 + +--------------------------------------+ + | '2007-03-01 10:20:30' - '20:40:50' | + |--------------------------------------| + | 2007-02-28 13:39:40 | + +--------------------------------------+ + + +### `SYSDATE` + +**Description:** + + +Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss[.nnnnnn]'. +SYSDATE() returns the time at which it executes. This differs from the behavior for `NOW() <#now>`_, which returns a constant time that indicates the time at which the statement began to execute. +If the argument is given, it specifies a fractional seconds precision from 0 to 6, the return value includes a fractional seconds part of that many digits. + +Optional argument type: INTEGER + +Return type: TIMESTAMP + +Specification: SYSDATE([INTEGER]) -> TIMESTAMP + +Example: + + > source=people | eval `value_1` = SYSDATE(), `value_2` = SYSDATE(6) | fields `value_1`, `value_2` + fetched rows / total rows = 1/1 + +---------------------+----------------------------+ + | value_1 | value_2 | + |---------------------+----------------------------| + | 2022-08-02 15:39:05 | 2022-08-02 15:39:05.123456 | + +---------------------+----------------------------+ + + +### `TIME` + +**Description:** + + +**Usage:** time(expr) constructs a time type with the input string expr as a time. If the argument is of date/time/timestamp, it extracts the time value part from the expression. + +Argument type: STRING/DATE/TIME/TIMESTAMP + +Return type: TIME + +Example: + + os> source=people | eval `TIME('13:49:00')` = TIME('13:49:00') | fields `TIME('13:49:00')` + fetched rows / total rows = 1/1 + +--------------------+ + | TIME('13:49:00') | + |--------------------| + | 13:49:00 | + +--------------------+ + + os> source=people | eval `TIME('13:49')` = TIME('13:49') | fields `TIME('13:49')` + fetched rows / total rows = 1/1 + +-----------------+ + | TIME('13:49') | + |-----------------| + | 13:49:00 | + +-----------------+ + + os> source=people | eval `TIME('2020-08-26 13:49:00')` = TIME('2020-08-26 13:49:00') | fields `TIME('2020-08-26 13:49:00')` + fetched rows / total rows = 1/1 + +-------------------------------+ + | TIME('2020-08-26 13:49:00') | + |-------------------------------| + | 13:49:00 | + +-------------------------------+ + + os> source=people | eval `TIME('2020-08-26 13:49')` = TIME('2020-08-26 13:49') | fields `TIME('2020-08-26 13:49')` + fetched rows / total rows = 1/1 + +----------------------------+ + | TIME('2020-08-26 13:49') | + |----------------------------| + | 13:49:00 | + +----------------------------+ + + +### `TIME_FORMAT` + + +**Description:** + + +**Usage:** + +time_format(time, format) formats the time argument using the specifiers in the format argument. +This supports a subset of the time format specifiers available for the `date_format`_ function. +Using date format specifiers supported by `date_format`_ will return 0 or null. +Acceptable format specifiers are listed in the table below. +If an argument of type DATE is passed in, it is treated as a TIMESTAMP at midnight (i.e., 00:00:00). + +| Specifier | **Description** | +|-----------|-----------------| +| %f | Microseconds (000000..999999) | +| %H | Hour (00..23) | +| %h | Hour (01..12) | +| %I | Hour (01..12) | +| %i | Minutes, numeric (00..59) | +| %p | AM or PM | +| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | +| %S | Seconds (00..59) | +| %s | Seconds (00..59) | +| %T | Time, 24-hour (hh:mm:ss) | + + +Argument type: STRING/DATE/TIME/TIMESTAMP, STRING + +Return type: STRING + +Example: + + os> source=people | eval `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` = TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | fields `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` + fetched rows / total rows = 1/1 + +------------------------------------------------------------------------------+ + | TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | + |------------------------------------------------------------------------------| + | 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | + +------------------------------------------------------------------------------+ + + +### `TIME_TO_SEC` + +**Description:** + + +**Usage:** time_to_sec(time) returns the time argument, converted to seconds. + +Argument type: STRING/TIME/TIMESTAMP + +Return type: LONG + +Example: + + os> source=people | eval `TIME_TO_SEC(TIME('22:23:00'))` = TIME_TO_SEC(TIME('22:23:00')) | fields `TIME_TO_SEC(TIME('22:23:00'))` + fetched rows / total rows = 1/1 + +---------------------------------+ + | TIME_TO_SEC(TIME('22:23:00')) | + |---------------------------------| + | 80580 | + +---------------------------------+ + + +### `TIMEDIFF` + +**Description:** + + +**Usage:** returns the difference between two time expressions as a time. + +Argument type: TIME, TIME + +Return type: TIME + +Example: + + os> source=people | eval `TIMEDIFF('23:59:59', '13:00:00')` = TIMEDIFF('23:59:59', '13:00:00') | fields `TIMEDIFF('23:59:59', '13:00:00')` + fetched rows / total rows = 1/1 + +------------------------------------+ + | TIMEDIFF('23:59:59', '13:00:00') | + |------------------------------------| + | 10:59:59 | + +------------------------------------+ + + +### `TIMESTAMP` + +**Description:** + + +**Usage:** timestamp(expr) constructs a timestamp type with the input string `expr` as an timestamp. If the argument is not a string, it casts `expr` to timestamp type with default timezone UTC. If argument is a time, it applies today's date before cast. +With two arguments `timestamp(expr1, expr2)` adds the time expression `expr2` to the date or timestamp expression `expr1` and returns the result as a timestamp value. + +Argument type: `STRING/DATE/TIME/TIMESTAMP` + +**Return type map:** + +(STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP + +(STRING/DATE/TIME/TIMESTAMP, STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP + +Example: + + os> source=people | eval `TIMESTAMP('2020-08-26 13:49:00')` = TIMESTAMP('2020-08-26 13:49:00'), `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` = TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | fields `TIMESTAMP('2020-08-26 13:49:00')`, `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` + fetched rows / total rows = 1/1 + +------------------------------------+------------------------------------------------------+ + | TIMESTAMP('2020-08-26 13:49:00') | TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | + |------------------------------------+------------------------------------------------------| + | 2020-08-26 13:49:00 | 2020-08-27 02:04:42 | + +------------------------------------+------------------------------------------------------+ + + +### `TIMESTAMPADD` + +**Description:** + + +**Usage:** Returns a TIMESTAMP value based on a passed in DATE/TIME/TIMESTAMP/STRING argument and an INTERVAL and INTEGER argument which determine the amount of time to be added. +If the third argument is a STRING, it must be formatted as a valid TIMESTAMP. If only a TIME is provided, a TIMESTAMP is still returned with the DATE portion filled in using the current date. +If the third argument is a DATE, it will be automatically converted to a TIMESTAMP. + +Argument type: INTERVAL, INTEGER, DATE/TIME/TIMESTAMP/STRING + +INTERVAL must be one of the following tokens: `[MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR]` + +Examples: + + os> source=people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` + fetched rows / total rows = 1/1 + +------------------------------------------------+----------------------------------------------------+ + | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | + |------------------------------------------------+----------------------------------------------------| + | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | + +------------------------------------------------+----------------------------------------------------+ + + +### `TIMESTAMPDIFF` + +**Description:** + + +**Usage:** + +`TIMESTAMPDIFF(interval, start, end)` returns the difference between the start and end date/times in interval units. +If a TIME is provided as an argument, it will be converted to a TIMESTAMP with the DATE portion filled in using the current date. +Arguments will be automatically converted to a TIME/TIMESTAMP when appropriate. +Any argument that is a STRING must be formatted as a valid TIMESTAMP. + +Argument type: INTERVAL, DATE/TIME/TIMESTAMP/STRING, DATE/TIME/TIMESTAMP/STRING + +INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] + +Examples: + + os> source=people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` + fetched rows / total rows = 1/1 + +---------------------------------------------------------------------+-------------------------------------------------------------+ + | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | + |---------------------------------------------------------------------+-------------------------------------------------------------| + | 4 | -23 | + +---------------------------------------------------------------------+-------------------------------------------------------------+ + + +### `TO_DAYS` + +**Description:** + + +**Usage:** to_days(date) returns the day number (the number of days since year 0) of the given date. Returns NULL if date is invalid. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: LONG + +Example: + + os> source=people | eval `TO_DAYS(DATE('2008-10-07'))` = TO_DAYS(DATE('2008-10-07')) | fields `TO_DAYS(DATE('2008-10-07'))` + fetched rows / total rows = 1/1 + +-------------------------------+ + | TO_DAYS(DATE('2008-10-07')) | + |-------------------------------| + | 733687 | + +-------------------------------+ + + +### `TO_SECONDS` + + +**Description:** + + +**Usage:** to_seconds(date) returns the number of seconds since the year 0 of the given value. Returns NULL if value is invalid. +An argument of a LONG type can be used. It must be formatted as YMMDD, YYMMDD, YYYMMDD or YYYYMMDD. Note that a LONG type argument cannot have leading 0s as it will be parsed using an octal numbering system. + +Argument type: STRING/LONG/DATE/TIME/TIMESTAMP + +Return type: LONG + +Example: + + os> source=people | eval `TO_SECONDS(DATE('2008-10-07'))` = TO_SECONDS(DATE('2008-10-07')) | eval `TO_SECONDS(950228)` = TO_SECONDS(950228) | fields `TO_SECONDS(DATE('2008-10-07'))`, `TO_SECONDS(950228)` + fetched rows / total rows = 1/1 + +----------------------------------+----------------------+ + | TO_SECONDS(DATE('2008-10-07')) | TO_SECONDS(950228) | + |----------------------------------+----------------------| + | 63390556800 | 62961148800 | + +----------------------------------+----------------------+ + + +### `UNIX_TIMESTAMP` + + +**Description:** + + +**Usage**: + +Converts given argument to Unix time (seconds since Epoch - very beginning of year 1970). If no argument given, it returns the current Unix time. +The date argument may be a DATE, or TIMESTAMP string, or a number in YYMMDD, YYMMDDhhmmss, YYYYMMDD, or YYYYMMDDhhmmss format. If the argument includes a time part, it may optionally include a fractional seconds part. +If argument is in invalid format or outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. +You can use `FROM_UNIXTIME`_ to do reverse conversion. + +Argument type: /DOUBLE/DATE/TIMESTAMP + +Return type: DOUBLE + +Example: + + os> source=people | eval `UNIX_TIMESTAMP(double)` = UNIX_TIMESTAMP(20771122143845), `UNIX_TIMESTAMP(timestamp)` = UNIX_TIMESTAMP(TIMESTAMP('1996-11-15 17:05:42')) | fields `UNIX_TIMESTAMP(double)`, `UNIX_TIMESTAMP(timestamp)` + fetched rows / total rows = 1/1 + +--------------------------+-----------------------------+ + | UNIX_TIMESTAMP(double) | UNIX_TIMESTAMP(timestamp) | + |--------------------------+-----------------------------| + | 3404817525.0 | 848077542.0 | + +--------------------------+-----------------------------+ + + +### `UTC_DATE` + +**Description:** + + +Returns the current UTC date as a value in 'YYYY-MM-DD'. + +Return type: DATE + +Specification: UTC_DATE() -> DATE + +Example: + + > source=people | eval `UTC_DATE()` = UTC_DATE() | fields `UTC_DATE()` + fetched rows / total rows = 1/1 + +--------------+ + | UTC_DATE() | + |--------------| + | 2022-10-03 | + +--------------+ + + +### `UTC_TIME` + + +**Description:** + + +Returns the current UTC time as a value in 'hh:mm:ss'. + +Return type: TIME + +Specification: UTC_TIME() -> TIME + +Example: + + > source=people | eval `UTC_TIME()` = UTC_TIME() | fields `UTC_TIME()` + fetched rows / total rows = 1/1 + +--------------+ + | UTC_TIME() | + |--------------| + | 17:54:27 | + +--------------+ + + +### `UTC_TIMESTAMP` + +**Description:** + + +Returns the current UTC timestamp as a value in 'YYYY-MM-DD hh:mm:ss'. + +Return type: TIMESTAMP + +Specification: UTC_TIMESTAMP() -> TIMESTAMP + +Example: + + > source=people | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()` + fetched rows / total rows = 1/1 + +---------------------+ + | UTC_TIMESTAMP() | + |---------------------| + | 2022-10-03 17:54:28 | + +---------------------+ + + +### `WEEK` + +**Description:** + +**Usage:** week(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. + +| Mode | First day of week | Range | Week 1 is the first week... | +|------|-------------------|-------|-----------------------------| +| 0 | Sunday | 0-53 | with a Sunday in this year | +| 1 | Monday | 0-53 | with 4 or more days this year | +| 2 | Sunday | 1-53 | with a Sunday in this year | +| 3 | Monday | 1-53 | with 4 or more days this year | +| 4 | Sunday | 0-53 | with 4 or more days this year | +| 5 | Monday | 0-53 | with a Monday in this year | +| 6 | Sunday | 1-53 | with 4 or more days this year | +| 7 | Monday | 1-53 | with a Monday in this year | + + +Argument type: DATE/TIMESTAMP/STRING + +Return type: INTEGER + +Synonyms: `WEEK_OF_YEAR`_ + +Example: + + os> source=people | eval `WEEK(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK(DATE('2008-02-20'), 1)` = WEEK(DATE('2008-02-20'), 1) | fields `WEEK(DATE('2008-02-20'))`, `WEEK(DATE('2008-02-20'), 1)` + fetched rows / total rows = 1/1 + +----------------------------+-------------------------------+ + | WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | + |----------------------------+-------------------------------| + | 7 | 8 | + +----------------------------+-------------------------------+ + + +### `WEEKDAY` + +**Description:** + + +**Usage:** weekday(date) returns the weekday index for date (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). + +It is similar to the `dayofweek`_ function, but returns different indexes for each day. + +Argument type: STRING/DATE/TIME/TIMESTAMP + +Return type: INTEGER + +Example: + + os> source=people | eval `weekday(DATE('2020-08-26'))` = weekday(DATE('2020-08-26')) | eval `weekday(DATE('2020-08-27'))` = weekday(DATE('2020-08-27')) | fields `weekday(DATE('2020-08-26'))`, `weekday(DATE('2020-08-27'))` + fetched rows / total rows = 1/1 + +-------------------------------+-------------------------------+ + | weekday(DATE('2020-08-26')) | weekday(DATE('2020-08-27')) | + |-------------------------------+-------------------------------| + | 2 | 3 | + +-------------------------------+-------------------------------+ + + +### `WEEK_OF_YEAR` + +**Description:** + + +**Usage:** week_of_year(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. + +| Mode | First day of week | Range | Week 1 is the first week ... | +|------|-------------------|-------|------------------------------| +| 0 | Sunday | 0-53 | with a Sunday in this year | +| 1 | Monday | 0-53 | with 4 or more days this year| +| 2 | Sunday | 1-53 | with a Sunday in this year | +| 3 | Monday | 1-53 | with 4 or more days this year| +| 4 | Sunday | 0-53 | with 4 or more days this year| +| 5 | Monday | 0-53 | with a Monday in this year | +| 6 | Sunday | 1-53 | with 4 or more days this year| +| 7 | Monday | 1-53 | with a Monday in this year | + + +Argument type: DATE/TIMESTAMP/STRING + +Return type: INTEGER + +Synonyms: `WEEK`_ + +Example: + + os> source=people | eval `WEEK_OF_YEAR(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` = WEEK_OF_YEAR(DATE('2008-02-20'), 1) | fields `WEEK_OF_YEAR(DATE('2008-02-20'))`, `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` + fetched rows / total rows = 1/1 + +------------------------------------+---------------------------------------+ + | WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | + |------------------------------------+---------------------------------------| + | 7 | 8 | + +------------------------------------+---------------------------------------+ + + +### `YEAR` + +**Description:** + + +**Usage:** year(date) returns the year for date, in the range 1000 to 9999, or 0 for the “zero” date. + +Argument type: STRING/DATE/TIMESTAMP + +Return type: INTEGER + +Example: + + os> source=people | eval `YEAR(DATE('2020-08-26'))` = YEAR(DATE('2020-08-26')) | fields `YEAR(DATE('2020-08-26'))` + fetched rows / total rows = 1/1 + +----------------------------+ + | YEAR(DATE('2020-08-26')) | + |----------------------------| + | 2020 | + +----------------------------+ + + +### `YEARWEEK` + + +**Description:** + + +**Usage:** yearweek(date) returns the year and week for date as an integer. It accepts and optional mode arguments aligned with those available for the `WEEK`_ function. + +Argument type: STRING/DATE/TIME/TIMESTAMP + +Return type: INTEGER + +Example: + + os> source=people | eval `YEARWEEK('2020-08-26')` = YEARWEEK('2020-08-26') | eval `YEARWEEK('2019-01-05', 1)` = YEARWEEK('2019-01-05', 1) | fields `YEARWEEK('2020-08-26')`, `YEARWEEK('2019-01-05', 1)` + fetched rows / total rows = 1/1 + +--------------------------+-----------------------------+ + | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 1) | + |--------------------------+-----------------------------| + | 202034 | 201901 | + +--------------------------+-----------------------------+ + + diff --git a/docs/ppl-lang/functions/ppl-expressions.md b/docs/ppl-lang/functions/ppl-expressions.md new file mode 100644 index 000000000..6315663c2 --- /dev/null +++ b/docs/ppl-lang/functions/ppl-expressions.md @@ -0,0 +1,138 @@ +## PPL Expressions + +### Introduction + +Expressions, particularly value expressions, are those which return a scalar value. Expressions have different types and forms. For example, there are literal values as atom expression and arithmetic, predicate and function expression built on top of them. And also expressions can be used in different clauses, such as using arithmetic expression in ``Filter``, ``Stats`` command. + +### Arithmetic Operators + +**Description:** + +**Operators** +Arithmetic expression is an expression formed by numeric literals and binary arithmetic operators as follows: +````````` +1. ``+``: Add. +2. ``-``: Subtract. +3. ``*``: Multiply. +4. ``/``: Divide. For integers, the result is an integer with fractional part discarded. +5. ``%``: Modulo. This can be used with integers only with remainder of the division as result. +`````````` +**Precedence** + +Parentheses can be used to control the precedence of arithmetic operators. Otherwise, operators of higher precedence is performed first. + +**Type Conversion** + +Implicit type conversion is performed when looking up operator signature. For example, an integer ``+`` a real number matches signature ``+(double,double)`` which results in a real number. This rule also applies to function call discussed below. + +Examples +-------- + +Here is an example for different type of arithmetic expressions: + + os> source=accounts | where age > (25 + 5) | fields age ; + fetched rows / total rows = 3/3 + +-------+ + | age | + |-------| + | 32 | + | 36 | + | 33 | + +-------+ + +### Predicate Operators + +**Description:** + +Predicate operator is an expression that evaluated to be ture. The MISSING and NULL value comparison has following the rule. MISSING value only equal to MISSING value and less than all the other values. NULL value equals to NULL value, large than MISSING value, but less than all the other values. + +**Operators:** +``` ++----------------+----------------------------------------+ +| name | **Description:** | ++----------------+----------------------------------------+ +| > | Greater than operator | ++----------------+----------------------------------------+ +| >= | Greater than or equal operator | ++----------------+----------------------------------------+ +| < | Less than operator | ++----------------+----------------------------------------+ +| != | Not equal operator | ++----------------+----------------------------------------+ +| <= | Less than or equal operator | ++----------------+----------------------------------------+ +| = | Equal operator | ++----------------+----------------------------------------+ +| LIKE | Simple Pattern matching | ++----------------+----------------------------------------+ +| IN | NULL value test | ++----------------+----------------------------------------+ +| AND | AND operator | ++----------------+----------------------------------------+ +| OR | OR operator | ++----------------+----------------------------------------+ +| XOR | XOR operator | ++----------------+----------------------------------------+ +| NOT | NOT NULL value test | ++----------------+----------------------------------------+ +``` + +It is possible to compare datetimes. When comparing different datetime types, for example `DATE` and `TIME`, both converted to `DATETIME`. +The following rule is applied on coversion: a `TIME` applied to today's date; `DATE` is interpreted at midnight. + +**Examples** + +_Basic Predicate Operator:_ + +Here is an example for comparison operators: + + os> source=accounts | where age > 33 | fields age ; + fetched rows / total rows = 1/1 + +-------+ + | age | + |-------| + | 36 | + +-------+ + + +### `IN` + +IN operator test field in value lists: + + os> source=accounts | where age in (32, 33) | fields age ; + fetched rows / total rows = 2/2 + +-------+ + | age | + |-------| + | 32 | + | 33 | + +-------+ + + +### `OR` + +OR operator : + + os> source=accounts | where age = 32 OR age = 33 | fields age ; + fetched rows / total rows = 2/2 + +-------+ + | age | + |-------| + | 32 | + | 33 | + +-------+ + + +### `NOT` + +NOT operator : + + os> source=accounts | where not age in (32, 33) | fields age ; + fetched rows / total rows = 2/2 + +-------+ + | age | + |-------| + | 36 | + | 28 | + +-------+ + diff --git a/docs/ppl-lang/functions/ppl-math.md b/docs/ppl-lang/functions/ppl-math.md new file mode 100644 index 000000000..2fc97d1a9 --- /dev/null +++ b/docs/ppl-lang/functions/ppl-math.md @@ -0,0 +1,717 @@ +## PPL Mathematical Functions + +### `ABS` + +**Description** + + +Usage: abs(x) calculate the abs x. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: INTEGER/LONG/FLOAT/DOUBLE + +Example: + + os> source=people | eval `ABS(-1)` = ABS(-1) | fields `ABS(-1)` + fetched rows / total rows = 1/1 + +-----------+ + | ABS(-1) | + |-----------| + | 1 | + +-----------+ + + +### `ACOS` +---- + +**Description** + + +Usage: acos(x) calculate the arc cosine of x. Returns NULL if x is not in the range -1 to 1. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `ACOS(0)` = ACOS(0) | fields `ACOS(0)` + fetched rows / total rows = 1/1 + +--------------------+ + | ACOS(0) | + |--------------------| + | 1.5707963267948966 | + +--------------------+ + + +### `ASIN` +---- + +**Description** + + +Usage: asin(x) calculate the arc sine of x. Returns NULL if x is not in the range -1 to 1. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `ASIN(0)` = ASIN(0) | fields `ASIN(0)` + fetched rows / total rows = 1/1 + +-----------+ + | ASIN(0) | + |-----------| + | 0.0 | + +-----------+ + + +### `ATAN` +---- + +**Description** + + +Usage: atan(x) calculates the arc tangent of x. atan(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `ATAN(2)` = ATAN(2), `ATAN(2, 3)` = ATAN(2, 3) | fields `ATAN(2)`, `ATAN(2, 3)` + fetched rows / total rows = 1/1 + +--------------------+--------------------+ + | ATAN(2) | ATAN(2, 3) | + |--------------------+--------------------| + | 1.1071487177940904 | 0.5880026035475675 | + +--------------------+--------------------+ + + +### `ATAN2 +----- + +**Description** + + +Usage: atan2(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `ATAN2(2, 3)` = ATAN2(2, 3) | fields `ATAN2(2, 3)` + fetched rows / total rows = 1/1 + +--------------------+ + | ATAN2(2, 3) | + |--------------------| + | 0.5880026035475675 | + +--------------------+ + + +### `CEIL` +---- + +An alias for `CEILING`_ function. + + +### `CEILING +------- + +**Description** + + +Usage: CEILING(T) takes the ceiling of value T. + +Note: `CEIL`_ and CEILING functions have the same implementation & functionality + +Limitation: CEILING only works as expected when IEEE 754 double type displays decimal when stored. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **LONG** + +Example: + + os> source=people | eval `CEILING(0)` = CEILING(0), `CEILING(50.00005)` = CEILING(50.00005), `CEILING(-50.00005)` = CEILING(-50.00005) | fields `CEILING(0)`, `CEILING(50.00005)`, `CEILING(-50.00005)` + fetched rows / total rows = 1/1 + +--------------+---------------------+----------------------+ + | CEILING(0) | CEILING(50.00005) | CEILING(-50.00005) | + |--------------+---------------------+----------------------| + | 0 | 51 | -50 | + +--------------+---------------------+----------------------+ + + os> source=people | eval `CEILING(3147483647.12345)` = CEILING(3147483647.12345), `CEILING(113147483647.12345)` = CEILING(113147483647.12345), `CEILING(3147483647.00001)` = CEILING(3147483647.00001) | fields `CEILING(3147483647.12345)`, `CEILING(113147483647.12345)`, `CEILING(3147483647.00001)` + fetched rows / total rows = 1/1 + +-----------------------------+-------------------------------+-----------------------------+ + | CEILING(3147483647.12345) | CEILING(113147483647.12345) | CEILING(3147483647.00001) | + |-----------------------------+-------------------------------+-----------------------------| + | 3147483648 | 113147483648 | 3147483648 | + +-----------------------------+-------------------------------+-----------------------------+ + + +### `CONV` +---- + +**Description** + + +Usage: CONV(x, a, b) converts the number x from a base to b base. + +Argument type: x: STRING, a: INTEGER, b: INTEGER + +Return type: **STRING** + +Example: + + os> source=people | eval `CONV('12', 10, 16)` = CONV('12', 10, 16), `CONV('2C', 16, 10)` = CONV('2C', 16, 10), `CONV(12, 10, 2)` = CONV(12, 10, 2), `CONV(1111, 2, 10)` = CONV(1111, 2, 10) | fields `CONV('12', 10, 16)`, `CONV('2C', 16, 10)`, `CONV(12, 10, 2)`, `CONV(1111, 2, 10)` + fetched rows / total rows = 1/1 + +----------------------+----------------------+-------------------+---------------------+ + | CONV('12', 10, 16) | CONV('2C', 16, 10) | CONV(12, 10, 2) | CONV(1111, 2, 10) | + |----------------------+----------------------+-------------------+---------------------| + | c | 44 | 1100 | 15 | + +----------------------+----------------------+-------------------+---------------------+ + + +### `COS` +--- + +**Description** + + +Usage: cos(x) calculate the cosine of x, where x is given in radians. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `COS(0)` = COS(0) | fields `COS(0)` + fetched rows / total rows = 1/1 + +----------+ + | COS(0) | + |----------| + | 1.0 | + +----------+ + + +### `COT` +--- + +**Description** + + +Usage: cot(x) calculate the cotangent of x. Returns out-of-range error if x equals to 0. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `COT(1)` = COT(1) | fields `COT(1)` + fetched rows / total rows = 1/1 + +--------------------+ + | COT(1) | + |--------------------| + | 0.6420926159343306 | + +--------------------+ + + +### `CRC32` +----- + +**Description** + + +Usage: Calculates a cyclic redundancy check value and returns a 32-bit unsigned value. + +Argument type: STRING + +Return type: **LONG** + +Example: + + os> source=people | eval `CRC32('MySQL')` = CRC32('MySQL') | fields `CRC32('MySQL')` + fetched rows / total rows = 1/1 + +------------------+ + | CRC32('MySQL') | + |------------------| + | 3259397556 | + +------------------+ + + +### `DEGREES` +------- + +**Description** + + +Usage: degrees(x) converts x from radians to degrees. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `DEGREES(1.57)` = DEGREES(1.57) | fields `DEGREES(1.57)` + fetched rows / total rows = 1/1 + +-------------------+ + | DEGREES(1.57) | + |-------------------| + | 89.95437383553924 | + +-------------------+ + + +### `E` +- + +**Description** + + +Usage: E() returns the Euler's number + +Return type: **DOUBLE** + +Example: + + os> source=people | eval `E()` = E() | fields `E()` + fetched rows / total rows = 1/1 + +-------------------+ + | E() | + |-------------------| + | 2.718281828459045 | + +-------------------+ + + +### `EXP` +--- + +**Description** + + +Usage: exp(x) return e raised to the power of x. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Example: + + os> source=people | eval `EXP(2)` = EXP(2) | fields `EXP(2)` + fetched rows / total rows = 1/1 + +------------------+ + | EXP(2) | + |------------------| + | 7.38905609893065 | + +------------------+ + + +### `FLOOR` +----- + +**Description** + + +Usage: FLOOR(T) takes the floor of value T. + +Limitation: FLOOR only works as expected when IEEE 754 double type displays decimal when stored. + +Argument type: a: INTEGER/LONG/FLOAT/DOUBLE + +Return type: LONG + +Example: + + os> source=people | eval `FLOOR(0)` = FLOOR(0), `FLOOR(50.00005)` = FLOOR(50.00005), `FLOOR(-50.00005)` = FLOOR(-50.00005) | fields `FLOOR(0)`, `FLOOR(50.00005)`, `FLOOR(-50.00005)` + fetched rows / total rows = 1/1 + +------------+-------------------+--------------------+ + | FLOOR(0) | FLOOR(50.00005) | FLOOR(-50.00005) | + |------------+-------------------+--------------------| + | 0 | 50 | -51 | + +------------+-------------------+--------------------+ + + os> source=people | eval `FLOOR(3147483647.12345)` = FLOOR(3147483647.12345), `FLOOR(113147483647.12345)` = FLOOR(113147483647.12345), `FLOOR(3147483647.00001)` = FLOOR(3147483647.00001) | fields `FLOOR(3147483647.12345)`, `FLOOR(113147483647.12345)`, `FLOOR(3147483647.00001)` + fetched rows / total rows = 1/1 + +---------------------------+-----------------------------+---------------------------+ + | FLOOR(3147483647.12345) | FLOOR(113147483647.12345) | FLOOR(3147483647.00001) | + |---------------------------+-----------------------------+---------------------------| + | 3147483647 | 113147483647 | 3147483647 | + +---------------------------+-----------------------------+---------------------------+ + + os> source=people | eval `FLOOR(282474973688888.022)` = FLOOR(282474973688888.022), `FLOOR(9223372036854775807.022)` = FLOOR(9223372036854775807.022), `FLOOR(9223372036854775807.0000001)` = FLOOR(9223372036854775807.0000001) | fields `FLOOR(282474973688888.022)`, `FLOOR(9223372036854775807.022)`, `FLOOR(9223372036854775807.0000001)` + fetched rows / total rows = 1/1 + +------------------------------+----------------------------------+--------------------------------------+ + | FLOOR(282474973688888.022) | FLOOR(9223372036854775807.022) | FLOOR(9223372036854775807.0000001) | + |------------------------------+----------------------------------+--------------------------------------| + | 282474973688888 | 9223372036854775807 | 9223372036854775807 | + +------------------------------+----------------------------------+--------------------------------------+ + + +### `LN` +-- + +**Description** + + +Usage: ln(x) return the the natural logarithm of x. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Example: + + os> source=people | eval `LN(2)` = LN(2) | fields `LN(2)` + fetched rows / total rows = 1/1 + +--------------------+ + | LN(2) | + |--------------------| + | 0.6931471805599453 | + +--------------------+ + + +### `LOG` +--- + +**Description** + + +**Specifications:** + +Usage: log(x) returns the natural logarithm of x that is the base e logarithm of the x. log(B, x) is equivalent to log(x)/log(B). + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Example: + + os> source=people | eval `LOG(2)` = LOG(2), `LOG(2, 8)` = LOG(2, 8) | fields `LOG(2)`, `LOG(2, 8)` + fetched rows / total rows = 1/1 + +--------------------+-------------+ + | LOG(2) | LOG(2, 8) | + |--------------------+-------------| + | 0.6931471805599453 | 3.0 | + +--------------------+-------------+ + + +### `LOG2` +---- + +**Description** + + +**Specifications:** + +Usage: log2(x) is equivalent to log(x)/log(2). + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Example: + + os> source=people | eval `LOG2(8)` = LOG2(8) | fields `LOG2(8)` + fetched rows / total rows = 1/1 + +-----------+ + | LOG2(8) | + |-----------| + | 3.0 | + +-----------+ + + +### `LOG10` +----- + +**Description** + + +**Specifications:** + +Usage: log10(x) is equivalent to log(x)/log(10). + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Example: + + os> source=people | eval `LOG10(100)` = LOG10(100) | fields `LOG10(100)` + fetched rows / total rows = 1/1 + +--------------+ + | LOG10(100) | + |--------------| + | 2.0 | + +--------------+ + + +### `MOD` +--- + +**Description** + + +Usage: MOD(n, m) calculates the remainder of the number n divided by m. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: Wider type between types of n and m if m is nonzero value. If m equals to 0, then returns NULL. + +Example: + + os> source=people | eval `MOD(3, 2)` = MOD(3, 2), `MOD(3.1, 2)` = MOD(3.1, 2) | fields `MOD(3, 2)`, `MOD(3.1, 2)` + fetched rows / total rows = 1/1 + +-------------+---------------+ + | MOD(3, 2) | MOD(3.1, 2) | + |-------------+---------------| + | 1 | 1.1 | + +-------------+---------------+ + + +### `PI` +-- + +**Description** + + +Usage: PI() returns the constant pi + +Return type: DOUBLE + +Example: + + os> source=people | eval `PI()` = PI() | fields `PI()` + fetched rows / total rows = 1/1 + +-------------------+ + | PI() | + |-------------------| + | 3.141592653589793 | + +-------------------+ + + +### `POW` +--- + +**Description** + + +Usage: POW(x, y) calculates the value of x raised to the power of y. Bad inputs return NULL result. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Synonyms: `POWER(_ , _)` + +Example: + + os> source=people | eval `POW(3, 2)` = POW(3, 2), `POW(-3, 2)` = POW(-3, 2), `POW(3, -2)` = POW(3, -2) | fields `POW(3, 2)`, `POW(-3, 2)`, `POW(3, -2)` + fetched rows / total rows = 1/1 + +-------------+--------------+--------------------+ + | POW(3, 2) | POW(-3, 2) | POW(3, -2) | + |-------------+--------------+--------------------| + | 9.0 | 9.0 | 0.1111111111111111 | + +-------------+--------------+--------------------+ + + +### `POWER` +----- + +**Description** + + +Usage: POWER(x, y) calculates the value of x raised to the power of y. Bad inputs return NULL result. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Synonyms: `POW(_ , _)`_ + +Example: + + os> source=people | eval `POWER(3, 2)` = POWER(3, 2), `POWER(-3, 2)` = POWER(-3, 2), `POWER(3, -2)` = POWER(3, -2) | fields `POWER(3, 2)`, `POWER(-3, 2)`, `POWER(3, -2)` + fetched rows / total rows = 1/1 + +---------------+----------------+--------------------+ + | POWER(3, 2) | POWER(-3, 2) | POWER(3, -2) | + |---------------+----------------+--------------------| + | 9.0 | 9.0 | 0.1111111111111111 | + +---------------+----------------+--------------------+ + + +### `RADIANS` +------- + +**Description** + + +Usage: radians(x) converts x from degrees to radians. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Example: + + os> source=people | eval `RADIANS(90)` = RADIANS(90) | fields `RADIANS(90)` + fetched rows / total rows = 1/1 + +--------------------+ + | RADIANS(90) | + |--------------------| + | 1.5707963267948966 | + +--------------------+ + + +### `RAND` +---- + +**Description** + + +Usage: RAND()/RAND(N) returns a random floating-point value in the range 0 <= value < 1.0. If integer N is specified, the seed is initialized prior to execution. One implication of this behavior is with identical argument N, rand(N) returns the same value each time, and thus produces a repeatable sequence of column values. + +Argument type: INTEGER + +Return type: FLOAT + +Example: + + os> source=people | eval `RAND(3)` = RAND(3) | fields `RAND(3)` + fetched rows / total rows = 1/1 + +------------+ + | RAND(3) | + |------------| + | 0.73105735 | + +------------+ + + +### `ROUND` +----- + +**Description** + + +Usage: ROUND(x, d) rounds the argument x to d decimal places, d defaults to 0 if not specified + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type map: + + - (INTEGER/LONG [,INTEGER]) -> LONG + - (FLOAT/DOUBLE [,INTEGER]) -> LONG + +Example: + + os> source=people | eval `ROUND(12.34)` = ROUND(12.34), `ROUND(12.34, 1)` = ROUND(12.34, 1), `ROUND(12.34, -1)` = ROUND(12.34, -1), `ROUND(12, 1)` = ROUND(12, 1) | fields `ROUND(12.34)`, `ROUND(12.34, 1)`, `ROUND(12.34, -1)`, `ROUND(12, 1)` + fetched rows / total rows = 1/1 + +----------------+-------------------+--------------------+----------------+ + | ROUND(12.34) | ROUND(12.34, 1) | ROUND(12.34, -1) | ROUND(12, 1) | + |----------------+-------------------+--------------------+----------------| + | 12.0 | 12.3 | 10.0 | 12 | + +----------------+-------------------+--------------------+----------------+ + + +### `SIGN` +---- + +**Description** + + +Usage: Returns the sign of the argument as -1, 0, or 1, depending on whether the number is negative, zero, or positive + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: INTEGER + +Example: + + os> source=people | eval `SIGN(1)` = SIGN(1), `SIGN(0)` = SIGN(0), `SIGN(-1.1)` = SIGN(-1.1) | fields `SIGN(1)`, `SIGN(0)`, `SIGN(-1.1)` + fetched rows / total rows = 1/1 + +-----------+-----------+--------------+ + | SIGN(1) | SIGN(0) | SIGN(-1.1) | + |-----------+-----------+--------------| + | 1 | 0 | -1 | + +-----------+-----------+--------------+ + + +### `SIN` +--- + +**Description** + + +Usage: sin(x) calculate the sine of x, where x is given in radians. + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type: DOUBLE + +Example: + + os> source=people | eval `SIN(0)` = SIN(0) | fields `SIN(0)` + fetched rows / total rows = 1/1 + +----------+ + | SIN(0) | + |----------| + | 0.0 | + +----------+ + + +### `SQRT` +---- + +**Description** + + +Usage: Calculates the square root of a non-negative number + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type map: + + - (Non-negative) INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE + - (Negative) INTEGER/LONG/FLOAT/DOUBLE -> NULL + +Example: + + os> source=people | eval `SQRT(4)` = SQRT(4), `SQRT(4.41)` = SQRT(4.41) | fields `SQRT(4)`, `SQRT(4.41)` + fetched rows / total rows = 1/1 + +-----------+--------------+ + | SQRT(4) | SQRT(4.41) | + |-----------+--------------| + | 2.0 | 2.1 | + +-----------+--------------+ + + +### `CBRT` +---- + +**Description** + + +Usage: Calculates the cube root of a number + +Argument type: INTEGER/LONG/FLOAT/DOUBLE + +Return type DOUBLE: + +INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE + +Example: + + opensearchsql> source=location | eval `CBRT(8)` = CBRT(8), `CBRT(9.261)` = CBRT(9.261), `CBRT(-27)` = CBRT(-27) | fields `CBRT(8)`, `CBRT(9.261)`, `CBRT(-27)`; + fetched rows / total rows = 2/2 + +-----------+---------------+-------------+ + | CBRT(8) | CBRT(9.261) | CBRT(-27) | + |-----------+---------------+-------------| + | 2.0 | 2.1 | -3.0 | + | 2.0 | 2.1 | -3.0 | + +-----------+---------------+-------------+ diff --git a/docs/ppl-lang/functions/ppl-string.md b/docs/ppl-lang/functions/ppl-string.md new file mode 100644 index 000000000..b198f5ce2 --- /dev/null +++ b/docs/ppl-lang/functions/ppl-string.md @@ -0,0 +1,253 @@ +## PPL String Functions + +### `CONCAT` + +**Description** + +`CONCAT(str1, str2, ...., str_9)` adds up to 9 strings together. + +**Argument type:** + - STRING, STRING, ...., STRING + - Return type: **STRING** + +Example: + + os> source=people | eval `CONCAT('hello', 'world')` = CONCAT('hello', 'world'), `CONCAT('hello ', 'whole ', 'world', '!')` = CONCAT('hello ', 'whole ', 'world', '!') | fields `CONCAT('hello', 'world')`, `CONCAT('hello ', 'whole ', 'world', '!')` + fetched rows / total rows = 1/1 + +----------------------------+--------------------------------------------+ + | CONCAT('hello', 'world') | CONCAT('hello ', 'whole ', 'world', '!') | + |----------------------------+--------------------------------------------| + | helloworld | hello whole world! | + +----------------------------+--------------------------------------------+ + + +### `CONCAT_WS` + +**Description** + +`CONCAT_WS(sep, str1, str2)` returns str1 concatenated with str2 using sep as a separator between them. + +**Argument type:** +- STRING, STRING, ...., STRING +- Return type: **STRING** + +Example: + + os> source=people | eval `CONCAT_WS(',', 'hello', 'world')` = CONCAT_WS(',', 'hello', 'world') | fields `CONCAT_WS(',', 'hello', 'world')` + fetched rows / total rows = 1/1 + +------------------------------------+ + | CONCAT_WS(',', 'hello', 'world') | + |------------------------------------| + | hello,world | + +------------------------------------+ + + +### `LENGTH` +------ + +**Description** + +Specifications: + +`length(str)` returns length of string measured in bytes. + +**Argument type:** + - STRING + - Return type: **INTEGER** + +Example: + + os> source=people | eval `LENGTH('helloworld')` = LENGTH('helloworld') | fields `LENGTH('helloworld')` + fetched rows / total rows = 1/1 + +------------------------+ + | LENGTH('helloworld') | + |------------------------| + | 10 | + +------------------------+ + +### `LOWER` + +**Description** + +`lower(string)` converts the string to lowercase. + +**Argument type:** + - STRING + - Return type: **STRING** + +Example: + + os> source=people | eval `LOWER('helloworld')` = LOWER('helloworld'), `LOWER('HELLOWORLD')` = LOWER('HELLOWORLD') | fields `LOWER('helloworld')`, `LOWER('HELLOWORLD')` + fetched rows / total rows = 1/1 + +-----------------------+-----------------------+ + | LOWER('helloworld') | LOWER('HELLOWORLD') | + |-----------------------+-----------------------| + | helloworld | helloworld | + +-----------------------+-----------------------+ + + +### `LTRIM` + +**Description** + +`ltrim(str)` trims leading space characters from the string. + +**Argument type:** + - STRING + - Return type: **STRING** + +Example: + + os> source=people | eval `LTRIM(' hello')` = LTRIM(' hello'), `LTRIM('hello ')` = LTRIM('hello ') | fields `LTRIM(' hello')`, `LTRIM('hello ')` + fetched rows / total rows = 1/1 + +---------------------+---------------------+ + | LTRIM(' hello') | LTRIM('hello ') | + |---------------------+---------------------| + | hello | hello | + +---------------------+---------------------+ + + +### `POSITION` + +**Description** + +The syntax `POSITION(substr IN str)` returns the position of the first occurrence of substring substr in string str. Returns 0 if substr is not in str. Returns NULL if any argument is NULL. + +**Argument type:** + - STRING, STRING + - Return type **INTEGER** + + +Example: + + os> source=people | eval `POSITION('world' IN 'helloworld')` = POSITION('world' IN 'helloworld'), `POSITION('invalid' IN 'helloworld')`= POSITION('invalid' IN 'helloworld') | fields `POSITION('world' IN 'helloworld')`, `POSITION('invalid' IN 'helloworld')` + fetched rows / total rows = 1/1 + +-------------------------------------+---------------------------------------+ + | POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | + |-------------------------------------+---------------------------------------| + | 6 | 0 | + +-------------------------------------+---------------------------------------+ + + +### `REVERSE` + +**Description** + +`REVERSE(str)` returns reversed string of the string supplied as an argument. + +**Argument type:** + - STRING + - Return type: **STRING** + +Example: + + os> source=people | eval `REVERSE('abcde')` = REVERSE('abcde') | fields `REVERSE('abcde')` + fetched rows / total rows = 1/1 + +--------------------+ + | REVERSE('abcde') | + |--------------------| + | edcba | + +--------------------+ + + +### `RIGHT` + +**Description** + +`right(str, len)` returns the rightmost len characters from the string str, or NULL if any argument is NULL. + +**Argument type:** + - STRING, INTEGER + - Return type: **STRING** + +Example: + + os> source=people | eval `RIGHT('helloworld', 5)` = RIGHT('helloworld', 5), `RIGHT('HELLOWORLD', 0)` = RIGHT('HELLOWORLD', 0) | fields `RIGHT('helloworld', 5)`, `RIGHT('HELLOWORLD', 0)` + fetched rows / total rows = 1/1 + +--------------------------+--------------------------+ + | RIGHT('helloworld', 5) | RIGHT('HELLOWORLD', 0) | + |--------------------------+--------------------------| + | world | | + +--------------------------+--------------------------+ + + +### `RTRIM` + +**Description** + +`rtrim(str)` trims trailing space characters from the string. + +**Argument type:** + - STRING + - Return type: **STRING** + +Example: + + os> source=people | eval `RTRIM(' hello')` = RTRIM(' hello'), `RTRIM('hello ')` = RTRIM('hello ') | fields `RTRIM(' hello')`, `RTRIM('hello ')` + fetched rows / total rows = 1/1 + +---------------------+---------------------+ + | RTRIM(' hello') | RTRIM('hello ') | + |---------------------+---------------------| + | hello | hello | + +---------------------+---------------------+ + + +### `SUBSTRING` + +**Description** + +`substring(str, start)` or `substring(str, start, length)` returns substring using start and length. With no length, entire string from start is returned. + +**Argument type:** + - STRING, INTEGER, INTEGER + - Return type: **STRING** + +Example: + + os> source=people | eval `SUBSTRING('helloworld', 5)` = SUBSTRING('helloworld', 5), `SUBSTRING('helloworld', 5, 3)` = SUBSTRING('helloworld', 5, 3) | fields `SUBSTRING('helloworld', 5)`, `SUBSTRING('helloworld', 5, 3)` + fetched rows / total rows = 1/1 + +------------------------------+---------------------------------+ + | SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | + |------------------------------+---------------------------------| + | oworld | owo | + +------------------------------+---------------------------------+ + + +### `TRIM` + +**Description** + +**Argument type:** + - STRING + - Return type: **STRING** + +Example: + + os> source=people | eval `TRIM(' hello')` = TRIM(' hello'), `TRIM('hello ')` = TRIM('hello ') | fields `TRIM(' hello')`, `TRIM('hello ')` + fetched rows / total rows = 1/1 + +--------------------+--------------------+ + | TRIM(' hello') | TRIM('hello ') | + |--------------------+--------------------| + | hello | hello | + +--------------------+--------------------+ + + +### `UPPER` + +**Description** + +`upper(string)` converts the string to uppercase. + +**Argument type:** + - STRING + - Return type: **STRING** + +Example: + + os> source=people | eval `UPPER('helloworld')` = UPPER('helloworld'), `UPPER('HELLOWORLD')` = UPPER('HELLOWORLD') | fields `UPPER('helloworld')`, `UPPER('HELLOWORLD')` + fetched rows / total rows = 1/1 + +-----------------------+-----------------------+ + | UPPER('helloworld') | UPPER('HELLOWORLD') | + |-----------------------+-----------------------| + | HELLOWORLD | HELLOWORLD | + +-----------------------+-----------------------+ From d7ee664315f38634ecdfb7a72ed9a3d2f4d9e1c2 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 3 Oct 2024 17:36:21 -0700 Subject: [PATCH 3/8] update documentation with specifications markdown pages including ppl expressions Signed-off-by: YANGDB --- docs/ppl-lang/ppl-dedup-command.md | 31 +++++++++++++++++++++++++++++ docs/ppl-lang/ppl-eval-command.md | 4 +++- docs/ppl-lang/ppl-fields-command.md | 5 ++++- docs/ppl-lang/ppl-rename-command.md | 4 +++- 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md index f2f6dd086..de8814c60 100644 --- a/docs/ppl-lang/ppl-dedup-command.md +++ b/docs/ppl-lang/ppl-dedup-command.md @@ -124,3 +124,34 @@ PPL query: - `source = table | dedup 2 a,b keepempty=true | fields a,b,c` - `source = table | dedup 1 a consecutive=true| fields a,b,c` (Consecutive deduplication is unsupported) +### Limitation: + +**Spark Support** (3.4) + +To translate `dedup` command with `allowedDuplication > 1`, such as `| dedup 2 a,b` to Spark plan, the solution is translating to a plan with Window function (e.g row_number) and a new column `row_number_col` as Filter. + +- For `| dedup 2 a, b keepempty=false` + +``` +DataFrameDropColumns('_row_number_) ++- Filter ('_row_number_ <= 2) // allowed duplication = 2 + +- Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _row_number_], ['a, 'b], ['a ASC NULLS FIRST, 'b ASC NULLS FIRST] + +- Filter (isnotnull('a) AND isnotnull('b)) // keepempty=false + +- Project + +- UnresolvedRelation +``` +- For `| dedup 2 a, b keepempty=true` +``` +Union +:- DataFrameDropColumns('_row_number_) +: +- Filter ('_row_number_ <= 2) +: +- Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _row_number_], ['a, 'b], ['a ASC NULLS FIRST, 'b ASC NULLS FIRST] +: +- Filter (isnotnull('a) AND isnotnull('b)) +: +- Project +: +- UnresolvedRelation ++- Filter (isnull('a) OR isnull('b)) + +- Project + +- UnresolvedRelation +``` + + - this `dedup` command with `allowedDuplication > 1` feature needs spark version >= 3.4 \ No newline at end of file diff --git a/docs/ppl-lang/ppl-eval-command.md b/docs/ppl-lang/ppl-eval-command.md index 42cba1e2f..aa86220db 100644 --- a/docs/ppl-lang/ppl-eval-command.md +++ b/docs/ppl-lang/ppl-eval-command.md @@ -105,7 +105,9 @@ eval status_category = ``` ### Limitation: -Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" + - `eval` with comma separated expression needs spark version >= 3.4 + + - Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" ```sql - `source = table | eval a = 10 | fields a,b,c` diff --git a/docs/ppl-lang/ppl-fields-command.md b/docs/ppl-lang/ppl-fields-command.md index 87c32b64d..cb67865dc 100644 --- a/docs/ppl-lang/ppl-fields-command.md +++ b/docs/ppl-lang/ppl-fields-command.md @@ -56,13 +56,16 @@ PPL query: - `source = table | eval b1 = b | fields - b1,c` ### Limitation: -new field added by eval command with a function cannot be dropped in current version:**_ + - `fields - list` shows incorrect results for spark version 3.3 - see [issue](https://github.com/opensearch-project/opensearch-spark/pull/732) + - new field added by eval command with a function cannot be dropped in current version:**_ + ```sql `source = table | eval b1 = b + 1 | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) `source = table | eval b1 = lower(b) | fields - b1,c` (Field `b1` cannot be dropped caused by SPARK-49782) ``` **Nested-Fields** + - nested field shows incorrect results for spark version 3.3 - see [issue](https://github.com/opensearch-project/opensearch-spark/issues/739) ```sql `source = catalog.schema.table1, catalog.schema.table2 | fields A.nested1, B.nested1` `source = catalog.table | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield` diff --git a/docs/ppl-lang/ppl-rename-command.md b/docs/ppl-lang/ppl-rename-command.md index d7fd6921c..8a3e4e3b5 100644 --- a/docs/ppl-lang/ppl-rename-command.md +++ b/docs/ppl-lang/ppl-rename-command.md @@ -47,6 +47,8 @@ PPL query: +------+---------+ ### Limitation: -Overriding existing field is unsupported: +- `rename` command needs spark version >= 3.4 + +- Overriding existing field is unsupported: `source=accounts | grok address '%{NUMBER} %{GREEDYDATA:address}' | fields address` From 4db82f040857587d378c669f08a859653a3f141a Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 3 Oct 2024 17:43:06 -0700 Subject: [PATCH 4/8] update main ppl-lang README.md page Signed-off-by: YANGDB --- docs/ppl-lang/README.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md index f561e11c2..81b7736b7 100644 --- a/docs/ppl-lang/README.md +++ b/docs/ppl-lang/README.md @@ -1,16 +1,12 @@ -### PPL Language +## PPL Language -Overview ---------- -Piped Processing Language (PPL), powered by OpenSearch, enables OpenSearch users with exploration and discovery of, and finding search patterns in data stored in OpenSearch, using a set of commands delimited by pipes (|). These are essentially read-only requests to process data and return results. +## Overview -Currently, OpenSearch users can query data using either Query DSL or SQL. Query DSL is powerful and fast. However, it has a steep learning curve, and was not designed as a human interface to easily create ad hoc queries and explore user data. SQL allows users to extract and analyze data in OpenSearch in a declarative manner. OpenSearch now makes its search and query engine robust by introducing Piped Processing Language (PPL). It enables users to extract insights from OpenSearch with a sequence of commands delimited by pipes (|). It supports a comprehensive set of commands including search, where, fields, rename, dedup, sort, eval, head, top and rare, and functions, operators and expressions. Even new users who have recently adopted OpenSearch, can be productive day one, if they are familiar with the pipe (|) syntax. It enables developers, DevOps engineers, support engineers, site reliability engineers (SREs), and IT managers to effectively discover and explore log, monitoring and observability data stored in OpenSearch. +Piped Processing Language (PPL), powered by OpenSearch, enables OpenSearch users with exploration and discovery of, and finding search patterns in data stored in OpenSearch Or S3. -We expand the capabilities of our Workbench, a comprehensive and integrated visual query tool currently supporting only SQL, to run on-demand PPL commands, and view and save results as text and JSON. We also add a new interactive standalone command line tool, the PPL CLI, to run on-demand PPL commands, and view and save results as text and JSON. - -The query start with search command and then flowing a set of command delimited by pipe (|). -| for example, the following query retrieve firstname and lastname from accounts if age large than 18. +The PPL query start with search command and then flowing a set of command delimited by pipe (|). +for example, the following query retrieve firstname and lastname from accounts if age larger than 18. ```sql source=accounts @@ -18,8 +14,10 @@ source=accounts | fields firstname, lastname ``` +For additional examples see the next [documentation](PPL-Example-Commands.md). + --- -### Specifications +### Commands Specifications * **Commands** From c563a86aad46cec5c34c6b831b2dafa5f6779f72 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 3 Oct 2024 17:47:20 -0700 Subject: [PATCH 5/8] update PPL-ex Signed-off-by: YANGDB --- docs/ppl-lang/PPL-Example-Commands.md | 83 +++++++++++++++------------ 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/docs/ppl-lang/PPL-Example-Commands.md b/docs/ppl-lang/PPL-Example-Commands.md index bcdd36293..fbe5f6ace 100644 --- a/docs/ppl-lang/PPL-Example-Commands.md +++ b/docs/ppl-lang/PPL-Example-Commands.md @@ -39,26 +39,27 @@ _- **Limitation: new field added by eval command with a function cannot be dropp - `source = table | where isempty(a)` - `source = table | where case(length(a) > 6, 'True' else 'False') = 'True'` -``` +```sql source = table | eval status_category = case(a >= 200 AND a < 300, 'Success', - a >= 300 AND a < 400, 'Redirection', - a >= 400 AND a < 500, 'Client Error', - a >= 500, 'Server Error' - else 'Incorrect HTTP status code') + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code') | where case(a >= 200 AND a < 300, 'Success', - a >= 300 AND a < 400, 'Redirection', - a >= 400 AND a < 500, 'Client Error', - a >= 500, 'Server Error' - else 'Incorrect HTTP status code' + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code' ) = 'Incorrect HTTP status code' ``` -- - ``` + + +```sql source = table - | eval factor = case(a > 15, a - 14, isnull(b), a - 7, a < 3, a + 1 else 1) - | where case(factor = 2, 'even', factor = 4, 'even', factor = 6, 'even', factor = 8, 'even' else 'odd') = 'even' - | stats count() by factor + | eval factor = case(a > 15, a - 14, isnull(b), a - 7, a < 3, a + 1 else 1) + | where case(factor = 2, 'even', factor = 4, 'even', factor = 6, 'even', factor = 8, 'even' else 'odd') = 'even' + | stats count() by factor ``` #### **Filters With Logical Conditions** @@ -84,30 +85,33 @@ Assumptions: `a`, `b`, `c` are existing fields in `table` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one', a = 2, 'two', a = 3, 'three', a = 4, 'four', a = 5, 'five', a = 6, 'six', a = 7, 'se7en', a = 8, 'eight', a = 9, 'nine')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else 'unknown')` - `source = table | eval f = case(a = 0, 'zero', a = 1, 'one' else concat(a, ' is an incorrect binary digit'))` -- -``` + + +```sql source = table | eval e = eval status_category = -case(a >= 200 AND a < 300, 'Success', -a >= 300 AND a < 400, 'Redirection', -a >= 400 AND a < 500, 'Client Error', -a >= 500, 'Server Error' -else 'Unknown' -) -``` -- + case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Unknown' + ) ``` + +```sql source = table | where ispresent(a) | -eval status_category = - case(a >= 200 AND a < 300, 'Success', - a >= 300 AND a < 400, 'Redirection', - a >= 400 AND a < 500, 'Client Error', - a >= 500, 'Server Error' - else 'Incorrect HTTP status code' - ) - | stats count() by status_category + eval status_category = + case(a >= 200 AND a < 300, 'Success', + a >= 300 AND a < 400, 'Redirection', + a >= 400 AND a < 500, 'Client Error', + a >= 500, 'Server Error' + else 'Incorrect HTTP status code' + ) + | stats count() by status_category ``` -Limitation: Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" +**Limitation**: + Overriding existing field is unsupported, following queries throw exceptions with "Reference 'a' is ambiguous" + - `source = table | eval a = 10 | fields a,b,c` - `source = table | eval a = a * 2 | stats avg(a)` - `source = table | eval a = abs(a) | where a > 0` @@ -140,6 +144,7 @@ Limitation: Overriding existing field is unsupported, following queries throw ex - `source = table | stats avg(age) as avg_city_age by country, state, city | eval new_avg_city_age = avg_city_age - 1 | stats avg(new_avg_city_age) as avg_state_age by country, state | where avg_state_age > 18 | stats avg(avg_state_age) as avg_adult_country_age by country` #### **Dedup** + [See additional command details](ppl-dedup-command) - `source = table | dedup a | fields a,b,c` @@ -178,7 +183,7 @@ Limitation: Overriding existing field is unsupported, following queries throw ex - `source=accounts | parse email '.+@(?.+)' | eval eval_result=1 | fields host, eval_result` - `source=accounts | parse email '.+@(?.+)' | where age > 45 | sort - age | fields age, email, host` - `source=accounts | parse address '(?\d+) (?.+)' | where streetNumber > 500 | sort num(streetNumber) | fields streetNumber, street` -- **Limitation: [see limitations](ppl-parse-command.md#limitations) +- Limitation: [see limitations](ppl-parse-command.md#limitations) #### **Grok** [See additional command details](ppl-grok-command) @@ -192,7 +197,7 @@ Limitation: Overriding existing field is unsupported, following queries throw ex - **Limitation: Overriding existing field is unsupported:**_ - `source=accounts | grok address '%{NUMBER} %{GREEDYDATA:address}' | fields address` -- **[see limitations](ppl-parse-command.md#limitations) +- [see limitations](ppl-parse-command.md#limitations) #### **Patterns** [See additional command details](ppl-patterns-command) @@ -201,7 +206,7 @@ Limitation: Overriding existing field is unsupported, following queries throw ex - `source=accounts | patterns email | where age > 45 | sort - age | fields email, patterns_field` - `source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | fields message, no_numbers` - `source=apache | patterns new_field='no_numbers' pattern='[0-9]' message | stats count() by no_numbers` -- **Limitation: [see limitations](ppl-parse-command.md#limitations) +- Limitation: [see limitations](ppl-parse-command.md#limitations) #### **Rename** [See additional command details](ppl-rename-command) @@ -251,7 +256,8 @@ _- **Limitation: "REPLACE" or "APPEND" clause must contain "AS"**_ - `source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ]` (nested) - `source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c` (as join filter) -SQL Migration examples with IN-Subquery PPL: +**SQL Migration examples with IN-Subquery PPL:** + 1. tpch q4 (in-subquery with aggregation) ```sql select @@ -274,6 +280,7 @@ group by order by o_orderpriority ``` + Rewritten by PPL InSubquery query: ```sql source = orders @@ -286,6 +293,7 @@ source = orders | sort o_orderpriority | fields o_orderpriority, order_count ``` + 2.tpch q20 (nested in-subquery) ```sql select @@ -315,6 +323,7 @@ where order by s_name ``` + Rewritten by PPL InSubquery query: ```sql source = supplier From e7b5f0ea7dc715897693605bda8476571500ef37 Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 3 Oct 2024 17:50:51 -0700 Subject: [PATCH 6/8] update fields & where docs Signed-off-by: YANGDB --- docs/ppl-lang/ppl-fields-command.md | 9 ++++----- docs/ppl-lang/ppl-where-command.md | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/ppl-lang/ppl-fields-command.md b/docs/ppl-lang/ppl-fields-command.md index cb67865dc..e37fc644f 100644 --- a/docs/ppl-lang/ppl-fields-command.md +++ b/docs/ppl-lang/ppl-fields-command.md @@ -1,12 +1,11 @@ -# PPL `fields` command +## PPL `fields` command -Description -============ +**Description** Using ``field`` command to keep or remove fields from the search result. -Syntax -============ +**Syntax** + field [+|-] * index: optional. if the plus (+) is used, only the fields specified in the field list will be keep. if the minus (-) is used, all the fields specified in the field list will be removed. **Default** + diff --git a/docs/ppl-lang/ppl-where-command.md b/docs/ppl-lang/ppl-where-command.md index 73f3fbd94..f6f069f11 100644 --- a/docs/ppl-lang/ppl-where-command.md +++ b/docs/ppl-lang/ppl-where-command.md @@ -10,7 +10,6 @@ The ``where`` command bool-expression to filter the search result. The ``where`` * bool-expression: optional. any expression which could be evaluated to boolean value. ### Example 1: Filter result set with condition -=========================================== The example show fetch all the document from accounts index with . From e72379cea56649ddc9f2ecec8f5f4dff71eca53c Mon Sep 17 00:00:00 2001 From: YANGDB Date: Thu, 3 Oct 2024 18:05:10 -0700 Subject: [PATCH 7/8] add ppl subquery command documentation Signed-off-by: YANGDB --- docs/ppl-lang/README.md | 2 + docs/ppl-lang/ppl-parse-command.md | 4 +- docs/ppl-lang/ppl-patterns-command.md | 3 +- docs/ppl-lang/ppl-subquery-command.md | 174 ++++++++++++++++++++++++++ 4 files changed, 179 insertions(+), 4 deletions(-) create mode 100644 docs/ppl-lang/ppl-subquery-command.md diff --git a/docs/ppl-lang/README.md b/docs/ppl-lang/README.md index 81b7736b7..efbeafe91 100644 --- a/docs/ppl-lang/README.md +++ b/docs/ppl-lang/README.md @@ -58,6 +58,8 @@ For additional examples see the next [documentation](PPL-Example-Commands.md). - [`lookup commands`](ppl-lookup-command.md) + - [`subquery commands`](ppl-subquery-command.md) + - [`correlation commands`](ppl-correlation-command.md) diff --git a/docs/ppl-lang/ppl-parse-command.md b/docs/ppl-lang/ppl-parse-command.md index a5cc59f04..10be21cc0 100644 --- a/docs/ppl-lang/ppl-parse-command.md +++ b/docs/ppl-lang/ppl-parse-command.md @@ -76,14 +76,14 @@ There are a few limitations with parse command: The following command will not work: - source=accounts | parse address '\d+ (?.+)' | parse street '\w+ (?\w+)' ; + `source=accounts | parse address '\d+ (?.+)' | parse street '\w+ (?\w+)'` - Fields defined by parse cannot be overridden with other commands. ``where`` will not match any documents since ``street`` cannot be overridden: ```sql -source=accounts | parse address '\d+ (?.+)' | eval street='1' | where street='1' ; + source=accounts | parse address '\d+ (?.+)' | eval street='1' | where street='1' ; ``` - The text field used by parse cannot be overridden. diff --git a/docs/ppl-lang/ppl-patterns-command.md b/docs/ppl-lang/ppl-patterns-command.md index 77d330dbe..c20e01944 100644 --- a/docs/ppl-lang/ppl-patterns-command.md +++ b/docs/ppl-lang/ppl-patterns-command.md @@ -63,7 +63,6 @@ PPL query: | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | ... - - [/Sep/::: -] "POST /users HTTP/." | +-----------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+ -Limitation -========== +**Limitation** The patterns command has the same limitations as the parse command, see ``parse limitations` for details. diff --git a/docs/ppl-lang/ppl-subquery-command.md b/docs/ppl-lang/ppl-subquery-command.md new file mode 100644 index 000000000..85cbe1dca --- /dev/null +++ b/docs/ppl-lang/ppl-subquery-command.md @@ -0,0 +1,174 @@ +## PPL SubQuery Commands: + +**Syntax** +The subquery command should be implemented using a clean, logical syntax that integrates with existing PPL structure. + +```sql +source=logs | where field in (subquery source=events | where condition | return field) +``` + +In this example, the primary search (`source=logs`) is filtered by results from the subquery (`source=events`). + +The subquery command should allow nested queries to be as complex as necessary, supporting multiple levels of nesting. + +Example: + +```sql + source=logs | where field in (subquery source=users | where user in (subquery source=actions | where action="login")) +``` + +For additional info See [Issue](https://github.com/opensearch-project/opensearch-spark/issues/661) + +--- + +**InSubquery usage** +- `source = outer | where a in [ source = inner | fields b ]` +- `source = outer | where (a) in [ source = inner | fields b ]` +- `source = outer | where (a,b,c) in [ source = inner | fields d,e,f ]` +- `source = outer | where a not in [ source = inner | fields b ]` +- `source = outer | where (a) not in [ source = inner | fields b ]` +- `source = outer | where (a,b,c) not in [ source = inner | fields d,e,f ]` +- `source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ]` (nested) +- `source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c` (as join filter) + +**_SQL Migration examples with IN-Subquery PPL:_** +1. tpch q4 (in-subquery with aggregation) +```sql +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and o_orderkey in ( + select + l_orderkey + from + lineitem + where l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority +``` +Rewritten by PPL InSubquery query: +```sql +source = orders +| where o_orderdate >= "1993-07-01" and o_orderdate < "1993-10-01" and o_orderkey IN + [ source = lineitem + | where l_commitdate < l_receiptdate + | fields l_orderkey + ] +| stats count(1) as order_count by o_orderpriority +| sort o_orderpriority +| fields o_orderpriority, order_count +``` +2.tpch q20 (nested in-subquery) +```sql +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'forest%' + ) + ) + and s_nationkey = n_nationkey + and n_name = 'CANADA' +order by + s_name +``` +Rewritten by PPL InSubquery query: +```sql +source = supplier +| where s_suppkey IN [ + source = partsupp + | where ps_partkey IN [ + source = part + | where like(p_name, "forest%") + | fields p_partkey + ] + | fields ps_suppkey + ] +| inner join left=l right=r on s_nationkey = n_nationkey and n_name = 'CANADA' + nation +| sort s_name +``` + +### **Additional Context** + +The most cases in the description is to request a `InSubquery` expression. + +The `where` command syntax is: + +``` +| where +``` +So the subquery in description is part of boolean expression, such as + +```sql +| where orders.order_id in (subquery source=returns | where return_reason="damaged" | return order_id) +``` + +The `orders.order_id in (subquery source=...)` is a ``. + +In general, we name this kind of subquery clause the `InSubquery` expression, it is a ``, one kind of `subquery expressions`. + +PS: there are many kinds of `subquery expressions`, another commonly used one is `ScalarSubquery` expression: + +**Subquery with Different Join Types** + +In issue description is a `ScalarSubquery`: + +```sql +source=employees +| join source=sales on employees.employee_id = sales.employee_id +| where sales.sale_amount > (subquery source=targets | where target_met="true" | return target_value) +``` + +Recall the join command doc: https://github.com/opensearch-project/opensearch-spark/blob/main/docs/PPL-Join-command.md#more-examples, the example is a subquery/subsearch **plan**, rather than a **expression**. + +```sql +SEARCH source=customer +| FIELDS c_custkey +| LEFT OUTER JOIN left = c, right = o ON c.c_custkey = o.o_custkey + [ + SEARCH source=orders + | WHERE o_comment NOT LIKE '%unusual%packages%' + | FIELDS o_orderkey, o_custkey + ] +| STATS ... +``` +simply into +```sql +SEARCH +| LEFT OUTER JOIN ON + [ + + ] +| STATS ... +``` +Apply the syntax here and simply into + +```sql +search | left join on (subquery search ...) +``` + +The `(subquery search ...)` is not a `expression`, it's `plan`, similar to the `relation` plan \ No newline at end of file From 5377af42f05a373cc986535ce5116ee4780aa84e Mon Sep 17 00:00:00 2001 From: YANGDB Date: Fri, 4 Oct 2024 11:49:48 -0700 Subject: [PATCH 8/8] add ppl subquery command documentation Signed-off-by: YANGDB --- docs/ppl-lang/ppl-dedup-command.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ppl-lang/ppl-dedup-command.md b/docs/ppl-lang/ppl-dedup-command.md index de8814c60..28fe7f4a4 100644 --- a/docs/ppl-lang/ppl-dedup-command.md +++ b/docs/ppl-lang/ppl-dedup-command.md @@ -126,7 +126,7 @@ PPL query: ### Limitation: -**Spark Support** (3.4) +**Spark Support** ( >= 3.4) To translate `dedup` command with `allowedDuplication > 1`, such as `| dedup 2 a,b` to Spark plan, the solution is translating to a plan with Window function (e.g row_number) and a new column `row_number_col` as Filter.