From 8ab81ae355592e42ab73bab8f5f829766745ab0e Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Sat, 19 Oct 2024 10:07:03 +0800 Subject: [PATCH] Refactor datetime functions docs and IT (#787) Signed-off-by: Lantao Jin --- docs/ppl-lang/functions/ppl-datetime.md | 1243 ++--------------- .../flint/spark/ppl/FlintPPLSuite.scala | 12 + ...arkPPLBuiltInDateTimeFunctionITSuite.scala | 517 +++++++ .../FlintSparkPPLBuiltinFunctionITSuite.scala | 25 - .../src/main/antlr4/OpenSearchPPLLexer.g4 | 1 + .../src/main/antlr4/OpenSearchPPLParser.g4 | 1 + .../function/BuiltinFunctionName.java | 61 +- .../ppl/utils/BuiltinFunctionTranslator.java | 4 +- 8 files changed, 662 insertions(+), 1202 deletions(-) create mode 100644 integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltInDateTimeFunctionITSuite.scala diff --git a/docs/ppl-lang/functions/ppl-datetime.md b/docs/ppl-lang/functions/ppl-datetime.md index d3ca272e3..e7b423d41 100644 --- a/docs/ppl-lang/functions/ppl-datetime.md +++ b/docs/ppl-lang/functions/ppl-datetime.md @@ -5,244 +5,32 @@ **Description:** -**Usage:** adddate(date, INTERVAL expr unit) / adddate(date, days) adds the interval of second argument to date; adddate(date, days) adds the second argument as integer number of days to date. -If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. +**Usage:** adddate(date, days) adds the second argument as integer number of days to date. +If days is negative abs(days) are subtracted from date. -Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG +Argument type: DATE, LONG **Return type map:** -(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP - (DATE, LONG) -> DATE -(TIMESTAMP/TIME, LONG) -> TIMESTAMP - -Synonyms: `DATE_ADD`_ when invoked with the INTERVAL form of the second argument. - Antonyms: `SUBDATE`_ Example: - os> source=people | eval `'2020-08-26' + 1h` = ADDDATE(DATE('2020-08-26'), INTERVAL 1 HOUR), `'2020-08-26' + 1` = ADDDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' + 1` = ADDDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2020-08-26' + 1h`, `'2020-08-26' + 1`, `ts '2020-08-26 01:01:01' + 1` - fetched rows / total rows = 1/1 - +---------------------+--------------------+--------------------------------+ - | '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | - |---------------------+--------------------+--------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | - +---------------------+--------------------+--------------------------------+ - - - -### `ADDTIME` - -**Description:** - - -**Usage:** addtime(expr1, expr2) adds expr2 to expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME - -**Return type map:** - -(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP - -(TIME, DATE/TIMESTAMP/TIME) -> TIME - -Antonyms: `SUBTIME`_ - -Example: - - os> source=people | eval `'2008-12-12' + 0` = ADDTIME(DATE('2008-12-12'), DATE('2008-11-15')) | fields `'2008-12-12' + 0` - fetched rows / total rows = 1/1 - +---------------------+ - | '2008-12-12' + 0 | - |---------------------| - | 2008-12-12 00:00:00 | - +---------------------+ - - os> source=people | eval `'23:59:59' + 0` = ADDTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' + 0` - fetched rows / total rows = 1/1 - +------------------+ - | '23:59:59' + 0 | - |------------------| - | 23:59:59 | - +------------------+ - - os> source=people | eval `'2004-01-01' + '23:59:59'` = ADDTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' + '23:59:59'` - fetched rows / total rows = 1/1 - +-----------------------------+ - | '2004-01-01' + '23:59:59' | - |-----------------------------| - | 2004-01-01 23:59:59 | - +-----------------------------+ - - os> source=people | eval `'10:20:30' + '00:05:42'` = ADDTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' + '00:05:42'` - fetched rows / total rows = 1/1 - +---------------------------+ - | '10:20:30' + '00:05:42' | - |---------------------------| - | 10:26:12 | - +---------------------------+ - - os> source=people | eval `'2007-02-28 10:20:30' + '20:40:50'` = ADDTIME(TIMESTAMP('2007-02-28 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-02-28 10:20:30' + '20:40:50'` - fetched rows / total rows = 1/1 - +--------------------------------------+ - | '2007-02-28 10:20:30' + '20:40:50' | - |--------------------------------------| - | 2007-03-01 07:01:20 | - +--------------------------------------+ - - -### `CONVERT_TZ` - - -**Description:** - - -**Usage:** convert_tz(timestamp, from_timezone, to_timezone) constructs a local timestamp converted from the from_timezone to the to_timezone. CONVERT_TZ returns null when any of the three function arguments are invalid, i.e. timestamp is not in the format yyyy-MM-dd HH:mm:ss or the timeszone is not in (+/-)HH:mm. It also is invalid for invalid dates, such as February 30th and invalid timezones, which are ones outside of -13:59 and +14:00. - -Argument type: TIMESTAMP, STRING, STRING - -Return type: TIMESTAMP - -Conversion from +00:00 timezone to +10:00 timezone. Returns the timestamp argument converted from +00:00 to +10:00 - -Example: - - os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | fields `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | - |-------------------------------------------------------| - | 2008-05-15 22:00:00 | - +-------------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +15:00 in this example will return null. - -Example: - - os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+15:00')| fields `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+00:00','+15:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ - -Conversion from a positive timezone to a negative timezone that goes over date line. - -Example: - - os> source=people | eval `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` = convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | 2008-05-14 22:30:00 | - +-------------------------------------------------------+ - -Valid dates are required in convert_tz, invalid dates such as April 31st (not a date in the Gregorian calendar) will result in null. - -Example: - - os> source=people | eval `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` = convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ - -Valid dates are required in convert_tz, invalid dates such as February 30th (not a date in the Gregorian calendar) will result in null. - -Example: - - os> source=people | eval `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ - -February 29th 2008 is a valid date because it is a leap year. - -Example: - - os> source=people | eval `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | 2008-02-28 22:30:00 | - +-------------------------------------------------------+ - -Valid dates are required in convert_tz, invalid dates such as February 29th 2007 (2007 is not a leap year) will result in null. - -Example: - - os> source=people | eval `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:01 in this example will return null. - -Example: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:00 in this example will return a correctly converted date time object. - -Example: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` + os> source=people | eval `'2020-08-26' + 1` = ADDDATE(DATE('2020-08-26'), 1) | fields `'2020-08-26' + 1` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | - |-------------------------------------------------------| - | 2008-01-31 22:00:00 | - +-------------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as -14:00 will result in null - -Example: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. This timezone is within range so it is valid and will convert the time. - -Example: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` = convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | - |-------------------------------------------------------| - | 2008-02-02 01:59:00 | - +-------------------------------------------------------+ - + +--------------------+ + | '2020-08-26' + 1 | + +--------------------+ + | 2020-08-27 | + +--------------------+ ### `CURDATE` **Description:** +This function requires Spark 3.4.0+, if you use old Spark version, use `CURRENT_DATE` instead. Returns the current time as a value in 'YYYY-MM-DD'. `CURDATE()` returns the time at which it executes as `SYSDATE() <#sysdate>`_ does. @@ -280,25 +68,6 @@ Example: | 2022-08-02 | +------------------+ - -### `CURRENT_TIME` - -**Description:** - - -`CURRENT_TIME()` are synonyms for `CURTIME() <#curtime>`_. - -Example: - - > source=people | eval `CURRENT_TIME()` = CURRENT_TIME() | fields `CURRENT_TIME()` - fetched rows / total rows = 1/1 - +------------------+ - | CURRENT_TIME() | - |------------------+ - | 15:39:05 | - +------------------+ - - ### `CURRENT_TIMESTAMP` **Description:** @@ -317,29 +86,6 @@ Example: +-----------------------+ -### `CURTIME` - -**Description:** - - -Returns the current time as a value in 'hh:mm:ss'. -`CURTIME()` returns the time at which the statement began to execute as `NOW() <#now>`_ does. - -Return type: TIME - -Specification: CURTIME() -> TIME - -Example: - - > source=people | eval `value_1` = CURTIME(), `value_2` = CURTIME() | fields `value_1`, `value_2` - fetched rows / total rows = 1/1 - +-----------+-----------+ - | value_1 | value_2 | - |-----------+-----------| - | 15:39:05 | 15:39:05 | - +-----------+-----------+ - - ### `DATE` **Description:** @@ -377,40 +123,6 @@ Example: | 2020-08-26 | +----------------------------+ - os> source=people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')` - fetched rows / total rows = 1/1 - +----------------------------+ - | DATE('2020-08-26 13:49') | - |----------------------------| - | 2020-08-26 | - +----------------------------+ - - -### `DATE_ADD` - -**Description:** - - -**Usage:** date_add(date, INTERVAL expr unit) adds the interval expr to date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, INTERVAL - -Return type: TIMESTAMP - -Synonyms: `ADDDATE`_ - -Antonyms: `DATE_SUB`_ - -Example: - - os> source=people | eval `'2020-08-26' + 1h` = DATE_ADD(DATE('2020-08-26'), INTERVAL 1 HOUR), `ts '2020-08-26 01:01:01' + 1d` = DATE_ADD(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 DAY) | fields `'2020-08-26' + 1h`, `ts '2020-08-26 01:01:01' + 1d` - fetched rows / total rows = 1/1 - +---------------------+---------------------------------+ - | '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | - |---------------------+---------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | - +---------------------+---------------------------------+ - ### `DATE_FORMAT` @@ -421,42 +133,33 @@ Example: **Usage:** date_format(date, format) formats the date argument using the specifiers in the format argument. If an argument of type TIME is provided, the local date is used. -| Specifier | **Description:** | -|-----------|------------------| -| %a | Abbreviated weekday name (Sun..Sat) | -| %b | Abbreviated month name (Jan..Dec) | -| %c | Month, numeric (0..12) | -| %D | Day of the month with English suffix (0th, 1st, 2nd, 3rd, ...) | -| %d | Day of the month, numeric (00..31) | -| %e | Day of the month, numeric (0..31) | -| %f | Microseconds (000000..999999) | -| %H | Hour (00..23) | -| %h | Hour (01..12) | -| %I | Hour (01..12) | -| %i | Minutes, numeric (00..59) | -| %j | Day of year (001..366) | -| %k | Hour (0..23) | -| %l | Hour (1..12) | -| %M | Month name (January..December) | -| %m | Month, numeric (00..12) | -| %p | AM or PM | -| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | -| %S | Seconds (00..59) | -| %s | Seconds (00..59) | -| %T | Time, 24-hour (hh:mm:ss) | -| %U | Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 | -| %u | Week (00..53), where Monday is the first day of the week; WEEK() mode 1 | -| %V | Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with %X | -| %v | Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with %x | -| %W | Weekday name (Sunday..Saturday) | -| %w | Day of the week (0=Sunday..6=Saturday) | -| %X | Year for the week where Sunday is the first day of the week, numeric, four digits; used with %V | -| %x | Year for the week, where Monday is the first day of the week, numeric, four digits; used with %v | -| %Y | Year, numeric, four digits | -| %y | Year, numeric (two digits) | -| %% | A literal % character | -| %x | x, for any “x” not listed above | -| x | x, for any smallcase/uppercase alphabet except [aydmshiHIMYDSEL] | +| Symbol | Meaning | Presentation | Examples | +|--------|-------------------------------|----------------|---------------------------------------------| +| G | era | text | AD; Anno Domini | +| y | year | year | 2020; 20 | +| D | day-of-year | number(3) | 189 | +| M/L | month-of-year | month | 7; 07; Jul; July | +| d | day-of-month | number(3) | 28 | +| Q/q | quarter-of-year | number/text | 3; 03; Q3; 3rd quarter | +| E | day-of-week | text | Tue; Tuesday | +| F | aligned day of week in month | number(1) | 3 | +| a | am-pm-of-day | am-pm | PM | +| h | clock-hour-of-am-pm (1-12) | number(2) | 12 | +| K | hour-of-am-pm (0-11) | number(2) | 0 | +| k | clock-hour-of-day (1-24) | number(2) | 0 | +| H | hour-of-day (0-23) | number(2) | 0 | +| m | minute-of-hour | number(2) | 30 | +| s | second-of-minute | number(2) | 55 | +| S | fraction-of-second | fraction | 978 | +| V | time-zone ID | zone-id | America/Los_Angeles; Z; -08:30 | +| z | time-zone name | zone-name | Pacific Standard Time; PST | +| O | localized zone-offset | offset-O | GMT+8; GMT+08:00; UTC-08:00 | +| X | zone-offset 'Z' for zero | offset-X | Z; -08; -0830; -08:30; -083015; -08:30:15 | +| x | zone-offset | offset-x | +0000; -08; -0830; -08:30; -083015; -08:30:15 | +| Z | zone-offset | offset-Z | +0000; -0800; -08:00 | +| [ | optional section start | | | +| ] | optional section end | | | + Argument type: STRING/DATE/TIME/TIMESTAMP, STRING @@ -464,89 +167,20 @@ Return type: STRING Example: - os> source=people | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')` = DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` - fetched rows / total rows = 1/1 - +------------------------------------------------------+-----------------------------------------------------------------------+ - | DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | - |------------------------------------------------------+-----------------------------------------------------------------------| - | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | - +------------------------------------------------------+-----------------------------------------------------------------------+ - - -### `DATETIME` - - -**Description:** - -**Usage:** `DATETIME(timestamp)/ DATETIME(date, to_timezone)` Converts the datetime to a new timezone - -Argument type: timestamp/STRING - -**Return type map:** - -(TIMESTAMP, STRING) -> TIMESTAMP - -(TIMESTAMP) -> TIMESTAMP - - -Converting timestamp with timezone to the second argument timezone. - -Example: - - os> source=people | eval `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` = DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | fields `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` - fetched rows / total rows = 1/1 - +---------------------------------------------------+ - | DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | - |---------------------------------------------------| - | 2004-02-29 19:00:00 | - +---------------------------------------------------+ - - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. - -Example: - - os> source=people | eval `DATETIME('2008-01-01 02:00:00', '-14:00')` = DATETIME('2008-01-01 02:00:00', '-14:00') | fields `DATETIME('2008-01-01 02:00:00', '-14:00')` - fetched rows / total rows = 1/1 - +---------------------------------------------+ - | DATETIME('2008-01-01 02:00:00', '-14:00') | - |---------------------------------------------| - | null | - +---------------------------------------------+ - - -### `DATE_SUB` - - -**Description:** - - -**Usage:** date_sub(date, INTERVAL expr unit) subtracts the interval expr from date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, INTERVAL - -Return type: TIMESTAMP - -Synonyms: `SUBDATE`_ - -Antonyms: `DATE_ADD`_ - -Example: - - os> source=people | eval `'2008-01-02' - 31d` = DATE_SUB(DATE('2008-01-02'), INTERVAL 31 DAY), `ts '2020-08-26 01:01:01' + 1h` = DATE_SUB(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 HOUR) | fields `'2008-01-02' - 31d`, `ts '2020-08-26 01:01:01' + 1h` + os> source=people | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')` = DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` fetched rows / total rows = 1/1 - +----------------------+---------------------------------+ - | '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | - |----------------------+---------------------------------| - | 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | - +----------------------+---------------------------------+ + +------------------------------------------------------------------+------------------------------------------------------------------------------------+ + | `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')` | `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` | + |------------------------------------------------------------------+------------------------------------------------------------------------------------| + | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | + +------------------------------------------------------+------------------------------------------------------------------------------------------------+ ### `DATEDIFF` **Usage:** Calculates the difference of date parts of given values. If the first argument is time, today's date is used. -Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME +Argument type: DATE/TIMESTAMP, DATE/TIMESTAMP Return type: LONG @@ -585,30 +219,6 @@ Example: +---------------------------+ -### `DAYNAME` - -**Description:** - - -**Usage:** - -`dayname(date)` returns the name of the weekday for date, including Monday, Tuesday, Wednesday, Thursday, Friday, Saturday and Sunday. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: STRING - -Example: - - os> source=people | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-------------------------------+ - | DAYNAME(DATE('2020-08-26')) | - |-------------------------------| - | Wednesday | - +-------------------------------+ - - ### `DAYOFMONTH` **Description:** @@ -762,76 +372,29 @@ Example: +-----------------------------------+ -### `EXTRACT` +### `DAYNAME` **Description:** +This function requires Spark 4.0.0+. -**Usage:** - -extract(part FROM date) returns a LONG with digits in order according to the given 'part' arguments. -The specific format of the returned long is determined by the table below. - -Argument type: PART, where PART is one of the following tokens in the table below. - -The format specifiers found in this table are the same as those found in the `DATE_FORMAT`_ function. - -| Part | Format | -|----------------------|---------------| -| MICROSECOND | %f | -| SECOND | %s | -| MINUTE | %i | -| HOUR | %H | -| DAY | %d | -| WEEK | %X | -| MONTH | %m | -| YEAR | %V | -| SECOND_MICROSECOND | %s%f | -| MINUTE_MICROSECOND | %i%s%f | -| MINUTE_SECOND | %i%s | -| HOUR_MICROSECOND | %H%i%s%f | -| HOUR_SECOND | %H%i%s | -| HOUR_MINUTE | %H%i | -| DAY_MICROSECOND | %d%H%i%s%f | -| DAY_SECOND | %d%H%i%s | -| DAY_MINUTE | %d%H%i | -| DAY_HOUR | %d%H% | -| YEAR_MONTH | %V%m | - - -Return type: LONG - -Example: - - os> source=people | eval `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` = extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | fields `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` - fetched rows / total rows = 1/1 - +--------------------------------------------------+ - | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | - |--------------------------------------------------| - | 202302 | - +--------------------------------------------------+ - - -### `FROM_DAYS` - -**Description:** - +**Usage:** -**Usage:** from_days(N) returns the date value given the day number N. +`dayname(date)` returns the name of the weekday for date, including Monday, Tuesday, Wednesday, Thursday, Friday, Saturday and Sunday. -Argument type: INTEGER/LONG +Argument type: STRING/DATE/TIMESTAMP -Return type: DATE +Return type: STRING Example: - os> source=people | eval `FROM_DAYS(733687)` = FROM_DAYS(733687) | fields `FROM_DAYS(733687)` + os> source=people | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +---------------------+ - | FROM_DAYS(733687) | - |---------------------| - | 2008-10-07 | - +---------------------+ + +-------------------------------+ + | DAYNAME(DATE('2020-08-26')) | + |-------------------------------| + | Wednesday | + +-------------------------------+ ### `FROM_UNIXTIME` @@ -862,40 +425,16 @@ Examples: | 2008-09-01 06:12:27 | +-----------------------------+ - os> source=people | eval `FROM_UNIXTIME(1220249547, '%T')` = FROM_UNIXTIME(1220249547, '%T') | fields `FROM_UNIXTIME(1220249547, '%T')` + os> source=people | eval `FROM_UNIXTIME(1220249547, 'HH:mm:ss')` = FROM_UNIXTIME(1220249547, 'HH:mm:ss') | fields `FROM_UNIXTIME(1220249547, 'HH:mm:ss')` fetched rows / total rows = 1/1 - +-----------------------------------+ - | FROM_UNIXTIME(1220249547, '%T') | - |-----------------------------------| - | 06:12:27 | - +-----------------------------------+ + +-----------------------------------------+ + | FROM_UNIXTIME(1220249547, 'HH:mm:ss') | + |-----------------------------------------| + | 06:12:27 | + +-----------------------------------------+ -### `GET_FORMAT` - - -**Description:** - - -**Usage:** - -Returns a string value containing string format specifiers based on the input arguments. - -Argument type: TYPE, STRING, where TYPE must be one of the following tokens: [DATE, TIME, TIMESTAMP], and -STRING must be one of the following tokens: ["USA", "JIS", "ISO", "EUR", "INTERNAL"] (" can be replaced by '). - -Examples: - - os> source=people | eval `GET_FORMAT(DATE, 'USA')` = GET_FORMAT(DATE, 'USA') | fields `GET_FORMAT(DATE, 'USA')` - fetched rows / total rows = 1/1 - +---------------------------+ - | GET_FORMAT(DATE, 'USA') | - |---------------------------| - | %m.%d.%Y | - +---------------------------+ - - -### `HOUR` +### `HOUR` **Description:** @@ -1003,92 +542,32 @@ Example: +---------------------+ -### `MAKEDATE` +### `MAKE_DATE` **Description:** -Returns a date, given `year` and `day-of-year` values. `dayofyear` must be greater than 0 or the result is `NULL`. The result is also `NULL` if either argument is `NULL`. +Returns a date, given `year`, `month` and `day` values. Arguments are rounded to an integer. -**Limitations**: -- Zero `year` interpreted as 2000; -- Negative `year` is not accepted; -- `day-of-year` should be greater than zero; -- `day-of-year` could be greater than 365/366, calculation switches to the next year(s) (see example). - **Specifications**: -1. MAKEDATE(DOUBLE, DOUBLE) -> DATE +1. MAKE_DATE(INTEGER, INTEGER, INTEGER) -> DATE -Argument type: DOUBLE +Argument type: INTEGER, INTEGER, INTEGER Return type: DATE Example: - os> source=people | eval `MAKEDATE(1945, 5.9)` = MAKEDATE(1945, 5.9), `MAKEDATE(1984, 1984)` = MAKEDATE(1984, 1984) | fields `MAKEDATE(1945, 5.9)`, `MAKEDATE(1984, 1984)` - fetched rows / total rows = 1/1 - +-----------------------+------------------------+ - | MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | - |-----------------------+------------------------| - | 1945-01-06 | 1989-06-06 | - +-----------------------+------------------------+ - - -### `MAKETIME` - - -**Description:** - - -Returns a time value calculated from the hour, minute, and second arguments. Returns `NULL` if any of its arguments are `NULL`. -The second argument can have a fractional part, rest arguments are rounded to an integer. - -**Limitations**: -- 24-hour clock is used, available time range is [00:00:00.0 - 23:59:59.(9)]; -- Up to 9 digits of second fraction part is taken (nanosecond precision). - -**Specifications**: - -1. `MAKETIME(DOUBLE, DOUBLE, DOUBLE)` -> TIME - -Argument type: DOUBLE - -Return type: TIME - -Example: - - os> source=people | eval `MAKETIME(20, 30, 40)` = MAKETIME(20, 30, 40), `MAKETIME(20.2, 49.5, 42.100502)` = MAKETIME(20.2, 49.5, 42.100502) | fields `MAKETIME(20, 30, 40)`, `MAKETIME(20.2, 49.5, 42.100502)` + os> source=people | eval `MAKE_DATE(1945, 5, 9)` = MAKEDATE(1945, 5, 9) | fields `MAKEDATE(1945, 5, 9)` fetched rows / total rows = 1/1 - +------------------------+-----------------------------------+ - | MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | - |------------------------+-----------------------------------| - | 20:30:40 | 20:50:42.100502 | - +------------------------+-----------------------------------+ - - -### `MICROSECOND` - -**Description:** - - -**Usage:** microsecond(expr) returns the microseconds from the time or timestamp expression expr as a number in the range from 0 to 999999. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Example: - - os> source=people | eval `MICROSECOND(TIME('01:02:03.123456'))` = MICROSECOND(TIME('01:02:03.123456')) | fields `MICROSECOND(TIME('01:02:03.123456'))` - fetched rows / total rows = 1/1 - +----------------------------------------+ - | MICROSECOND(TIME('01:02:03.123456')) | - |----------------------------------------| - | 123456 | - +----------------------------------------+ + +------------------------+ + | MAKEDATE(1945, 5, 9) | + |------------------------+ + | 1945-05-09 | + +------------------------+ ### `MINUTE` @@ -1115,28 +594,6 @@ Example: +----------------------------+ -### `MINUTE_OF_DAY` - -**Description:** - - -**Usage:** minute(time) returns the amount of minutes in the day, in the range of 0 to 1439. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Example: - - os> source=people | eval `MINUTE_OF_DAY(TIME('01:02:03'))` = MINUTE_OF_DAY(TIME('01:02:03')) | fields `MINUTE_OF_DAY(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +-----------------------------------+ - | MINUTE_OF_DAY(TIME('01:02:03')) | - |-----------------------------------| - | 62 | - +-----------------------------------+ - - ### `MINUTE_OF_HOUR` **Description:** @@ -1210,6 +667,8 @@ Example: ### `MONTHNAME` +This function requires Spark 4.0.0+. + **Description:** @@ -1253,51 +712,6 @@ Example: +---------------------+---------------------+ -### `PERIOD_ADD` - - -**Description:** - - -**Usage:** period_add(P, N) add N months to period P (in the format YYMM or YYYYMM). Returns a value in the format YYYYMM. - -Argument type: INTEGER, INTEGER - -Return type: INTEGER - -Example: - - os> source=people | eval `PERIOD_ADD(200801, 2)` = PERIOD_ADD(200801, 2), `PERIOD_ADD(200801, -12)` = PERIOD_ADD(200801, -12) | fields `PERIOD_ADD(200801, 2)`, `PERIOD_ADD(200801, -12)` - fetched rows / total rows = 1/1 - +-------------------------+---------------------------+ - | PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | - |-------------------------+---------------------------| - | 200803 | 200701 | - +-------------------------+---------------------------+ - - -### `PERIOD_DIFF` - -**Description:** - - -**Usage:** period_diff(P1, P2) returns the number of months between periods P1 and P2 given in the format YYMM or YYYYMM. - -Argument type: INTEGER, INTEGER - -Return type: INTEGER - -Example: - - os> source=people | eval `PERIOD_DIFF(200802, 200703)` = PERIOD_DIFF(200802, 200703), `PERIOD_DIFF(200802, 201003)` = PERIOD_DIFF(200802, 201003) | fields `PERIOD_DIFF(200802, 200703)`, `PERIOD_DIFF(200802, 201003)` - fetched rows / total rows = 1/1 - +-------------------------------+-------------------------------+ - | PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | - |-------------------------------+-------------------------------| - | 11 | -25 | - +-------------------------------+-------------------------------+ - - ### `QUARTER` **Description:** @@ -1320,33 +734,6 @@ Example: +-------------------------------+ -### `SEC_TO_TIME` - -**Description:** - - -**Usage:** - -sec_to_time(number) returns the time in HH:mm:ssss[.nnnnnn] format. -Note that the function returns a time between 00:00:00 and 23:59:59. -If an input value is too large (greater than 86399), the function will wrap around and begin returning outputs starting from 00:00:00. -If an input value is too small (less than 0), the function will wrap around and begin returning outputs counting down from 23:59:59. - -Argument type: INTEGER, LONG, DOUBLE, FLOAT - -Return type: TIME - -Example: - - os> source=people | eval `SEC_TO_TIME(3601)` = SEC_TO_TIME(3601) | eval `SEC_TO_TIME(1234.123)` = SEC_TO_TIME(1234.123) | fields `SEC_TO_TIME(3601)`, `SEC_TO_TIME(1234.123)` - fetched rows / total rows = 1/1 - +---------------------+-------------------------+ - | SEC_TO_TIME(3601) | SEC_TO_TIME(1234.123) | - |---------------------+-------------------------| - | 01:00:01 | 00:20:34.123 | - +---------------------+-------------------------+ - - ### `SECOND` **Description:** @@ -1395,56 +782,24 @@ Example: +--------------------------------------+ -### `STR_TO_DATE` - -**Description:** - - -**Usage:** str_to_date(string, string) is used to extract a TIMESTAMP from the first argument string using the formats specified in the second argument string. -The input argument must have enough information to be parsed as a DATE, TIMESTAMP, or TIME. -Acceptable string format specifiers are the same as those used in the `DATE_FORMAT`_ function. -It returns NULL when a statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any DATE field. Otherwise, it will return a TIMESTAMP with the parsed values (as well as default values for any field that was not parsed). - -Argument type: STRING, STRING - -Return type: TIMESTAMP - -Example: - - OS> source=people | eval `str_to_date("01,5,2013", "%d,%m,%Y")` = str_to_date("01,5,2013", "%d,%m,%Y") | fields = `str_to_date("01,5,2013", "%d,%m,%Y")` - fetched rows / total rows = 1/1 - +----------------------------------------+ - | str_to_date("01,5,2013", "%d,%m,%Y") | - |----------------------------------------| - | 2013-05-01 00:00:00 | - +----------------------------------------+ - - ### `SUBDATE` **Description:** -**Usage:** subdate(date, INTERVAL expr unit) / subdate(date, days) subtracts the interval expr from date; subdate(date, days) subtracts the second argument as integer number of days from date. -If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. +**Usage:** subdate(date, days) subtracts the second argument as integer number of days from date. -Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG +Argument type: DATE/TIMESTAMP, LONG **Return type map:** -(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP - (DATE, LONG) -> DATE -(TIMESTAMP/TIME, LONG) -> TIMESTAMP - -Synonyms: `DATE_SUB`_ when invoked with the INTERVAL form of the second argument. - Antonyms: `ADDDATE`_ Example: - os> source=people | eval `'2008-01-02' - 31d` = SUBDATE(DATE('2008-01-02'), INTERVAL 31 DAY), `'2020-08-26' - 1` = SUBDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' - 1` = SUBDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2008-01-02' - 31d`, `'2020-08-26' - 1`, `ts '2020-08-26 01:01:01' - 1` + os> source=people | eval `'2008-01-02' - 31d` = SUBDATE(DATE('2008-01-02'), 31), `'2020-08-26' - 1` = SUBDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' - 1` = SUBDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2008-01-02' - 31d`, `'2020-08-26' - 1`, `ts '2020-08-26 01:01:01' - 1` fetched rows / total rows = 1/1 +----------------------+--------------------+--------------------------------+ | '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | @@ -1453,72 +808,12 @@ Example: +----------------------+--------------------+--------------------------------+ -### `SUBTIME` - -**Description:** - - -**Usage:** subtime(expr1, expr2) subtracts expr2 from expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME - -**Return type map:** - -(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP - -(TIME, DATE/TIMESTAMP/TIME) -> TIME - -Antonyms: `ADDTIME`_ - -Example: - - os> source=people | eval `'2008-12-12' - 0` = SUBTIME(DATE('2008-12-12'), DATE('2008-11-15')) | fields `'2008-12-12' - 0` - fetched rows / total rows = 1/1 - +---------------------+ - | '2008-12-12' - 0 | - |---------------------| - | 2008-12-12 00:00:00 | - +---------------------+ - - os> source=people | eval `'23:59:59' - 0` = SUBTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' - 0` - fetched rows / total rows = 1/1 - +------------------+ - | '23:59:59' - 0 | - |------------------| - | 23:59:59 | - +------------------+ - - os> source=people | eval `'2004-01-01' - '23:59:59'` = SUBTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' - '23:59:59'` - fetched rows / total rows = 1/1 - +-----------------------------+ - | '2004-01-01' - '23:59:59' | - |-----------------------------| - | 2003-12-31 00:00:01 | - +-----------------------------+ - - os> source=people | eval `'10:20:30' - '00:05:42'` = SUBTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' - '00:05:42'` - fetched rows / total rows = 1/1 - +---------------------------+ - | '10:20:30' - '00:05:42' | - |---------------------------| - | 10:14:48 | - +---------------------------+ - - os> source=people | eval `'2007-03-01 10:20:30' - '20:40:50'` = SUBTIME(TIMESTAMP('2007-03-01 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-03-01 10:20:30' - '20:40:50'` - fetched rows / total rows = 1/1 - +--------------------------------------+ - | '2007-03-01 10:20:30' - '20:40:50' | - |--------------------------------------| - | 2007-02-28 13:39:40 | - +--------------------------------------+ - - ### `SYSDATE` **Description:** -Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss[.nnnnnn]'. +Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss.nnnnnn'. SYSDATE() returns the time at which it executes. This differs from the behavior for `NOW() <#now>`_, which returns a constant time that indicates the time at which the statement began to execute. If the argument is given, it specifies a fractional seconds precision from 0 to 6, the return value includes a fractional seconds part of that many digits. @@ -1526,152 +821,17 @@ Optional argument type: INTEGER Return type: TIMESTAMP -Specification: SYSDATE([INTEGER]) -> TIMESTAMP - -Example: - - > source=people | eval `value_1` = SYSDATE(), `value_2` = SYSDATE(6) | fields `value_1`, `value_2` - fetched rows / total rows = 1/1 - +---------------------+----------------------------+ - | value_1 | value_2 | - |---------------------+----------------------------| - | 2022-08-02 15:39:05 | 2022-08-02 15:39:05.123456 | - +---------------------+----------------------------+ - - -### `TIME` - -**Description:** - - -**Usage:** time(expr) constructs a time type with the input string expr as a time. If the argument is of date/time/timestamp, it extracts the time value part from the expression. - -Argument type: STRING/DATE/TIME/TIMESTAMP - -Return type: TIME - Example: - os> source=people | eval `TIME('13:49:00')` = TIME('13:49:00') | fields `TIME('13:49:00')` - fetched rows / total rows = 1/1 - +--------------------+ - | TIME('13:49:00') | - |--------------------| - | 13:49:00 | - +--------------------+ - - os> source=people | eval `TIME('13:49')` = TIME('13:49') | fields `TIME('13:49')` - fetched rows / total rows = 1/1 - +-----------------+ - | TIME('13:49') | - |-----------------| - | 13:49:00 | - +-----------------+ - - os> source=people | eval `TIME('2020-08-26 13:49:00')` = TIME('2020-08-26 13:49:00') | fields `TIME('2020-08-26 13:49:00')` - fetched rows / total rows = 1/1 - +-------------------------------+ - | TIME('2020-08-26 13:49:00') | - |-------------------------------| - | 13:49:00 | - +-------------------------------+ - - os> source=people | eval `TIME('2020-08-26 13:49')` = TIME('2020-08-26 13:49') | fields `TIME('2020-08-26 13:49')` + > source=people | eval `SYSDATE()` = SYSDATE() | fields `SYSDATE()` fetched rows / total rows = 1/1 +----------------------------+ - | TIME('2020-08-26 13:49') | - |----------------------------| - | 13:49:00 | + | SYSDATE() | + |----------------------------+ + | 2022-08-02 15:39:05.123456 | +----------------------------+ -### `TIME_FORMAT` - - -**Description:** - - -**Usage:** - -time_format(time, format) formats the time argument using the specifiers in the format argument. -This supports a subset of the time format specifiers available for the `date_format`_ function. -Using date format specifiers supported by `date_format`_ will return 0 or null. -Acceptable format specifiers are listed in the table below. -If an argument of type DATE is passed in, it is treated as a TIMESTAMP at midnight (i.e., 00:00:00). - -| Specifier | **Description** | -|-----------|-----------------| -| %f | Microseconds (000000..999999) | -| %H | Hour (00..23) | -| %h | Hour (01..12) | -| %I | Hour (01..12) | -| %i | Minutes, numeric (00..59) | -| %p | AM or PM | -| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | -| %S | Seconds (00..59) | -| %s | Seconds (00..59) | -| %T | Time, 24-hour (hh:mm:ss) | - - -Argument type: STRING/DATE/TIME/TIMESTAMP, STRING - -Return type: STRING - -Example: - - os> source=people | eval `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` = TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | fields `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` - fetched rows / total rows = 1/1 - +------------------------------------------------------------------------------+ - | TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | - |------------------------------------------------------------------------------| - | 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | - +------------------------------------------------------------------------------+ - - -### `TIME_TO_SEC` - -**Description:** - - -**Usage:** time_to_sec(time) returns the time argument, converted to seconds. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: LONG - -Example: - - os> source=people | eval `TIME_TO_SEC(TIME('22:23:00'))` = TIME_TO_SEC(TIME('22:23:00')) | fields `TIME_TO_SEC(TIME('22:23:00'))` - fetched rows / total rows = 1/1 - +---------------------------------+ - | TIME_TO_SEC(TIME('22:23:00')) | - |---------------------------------| - | 80580 | - +---------------------------------+ - - -### `TIMEDIFF` - -**Description:** - - -**Usage:** returns the difference between two time expressions as a time. - -Argument type: TIME, TIME - -Return type: TIME - -Example: - - os> source=people | eval `TIMEDIFF('23:59:59', '13:00:00')` = TIMEDIFF('23:59:59', '13:00:00') | fields `TIMEDIFF('23:59:59', '13:00:00')` - fetched rows / total rows = 1/1 - +------------------------------------+ - | TIMEDIFF('23:59:59', '13:00:00') | - |------------------------------------| - | 10:59:59 | - +------------------------------------+ - - ### `TIMESTAMP` **Description:** @@ -1699,103 +859,6 @@ Example: +------------------------------------+------------------------------------------------------+ -### `TIMESTAMPADD` - -**Description:** - - -**Usage:** Returns a TIMESTAMP value based on a passed in DATE/TIME/TIMESTAMP/STRING argument and an INTERVAL and INTEGER argument which determine the amount of time to be added. -If the third argument is a STRING, it must be formatted as a valid TIMESTAMP. If only a TIME is provided, a TIMESTAMP is still returned with the DATE portion filled in using the current date. -If the third argument is a DATE, it will be automatically converted to a TIMESTAMP. - -Argument type: INTERVAL, INTEGER, DATE/TIME/TIMESTAMP/STRING - -INTERVAL must be one of the following tokens: `[MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR]` - -Examples: - - os> source=people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` - fetched rows / total rows = 1/1 - +------------------------------------------------+----------------------------------------------------+ - | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | - |------------------------------------------------+----------------------------------------------------| - | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | - +------------------------------------------------+----------------------------------------------------+ - - -### `TIMESTAMPDIFF` - -**Description:** - - -**Usage:** - -`TIMESTAMPDIFF(interval, start, end)` returns the difference between the start and end date/times in interval units. -If a TIME is provided as an argument, it will be converted to a TIMESTAMP with the DATE portion filled in using the current date. -Arguments will be automatically converted to a TIME/TIMESTAMP when appropriate. -Any argument that is a STRING must be formatted as a valid TIMESTAMP. - -Argument type: INTERVAL, DATE/TIME/TIMESTAMP/STRING, DATE/TIME/TIMESTAMP/STRING - -INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] - -Examples: - - os> source=people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` - fetched rows / total rows = 1/1 - +---------------------------------------------------------------------+-------------------------------------------------------------+ - | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | - |---------------------------------------------------------------------+-------------------------------------------------------------| - | 4 | -23 | - +---------------------------------------------------------------------+-------------------------------------------------------------+ - - -### `TO_DAYS` - -**Description:** - - -**Usage:** to_days(date) returns the day number (the number of days since year 0) of the given date. Returns NULL if date is invalid. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: LONG - -Example: - - os> source=people | eval `TO_DAYS(DATE('2008-10-07'))` = TO_DAYS(DATE('2008-10-07')) | fields `TO_DAYS(DATE('2008-10-07'))` - fetched rows / total rows = 1/1 - +-------------------------------+ - | TO_DAYS(DATE('2008-10-07')) | - |-------------------------------| - | 733687 | - +-------------------------------+ - - -### `TO_SECONDS` - - -**Description:** - - -**Usage:** to_seconds(date) returns the number of seconds since the year 0 of the given value. Returns NULL if value is invalid. -An argument of a LONG type can be used. It must be formatted as YMMDD, YYMMDD, YYYMMDD or YYYYMMDD. Note that a LONG type argument cannot have leading 0s as it will be parsed using an octal numbering system. - -Argument type: STRING/LONG/DATE/TIME/TIMESTAMP - -Return type: LONG - -Example: - - os> source=people | eval `TO_SECONDS(DATE('2008-10-07'))` = TO_SECONDS(DATE('2008-10-07')) | eval `TO_SECONDS(950228)` = TO_SECONDS(950228) | fields `TO_SECONDS(DATE('2008-10-07'))`, `TO_SECONDS(950228)` - fetched rows / total rows = 1/1 - +----------------------------------+----------------------+ - | TO_SECONDS(DATE('2008-10-07')) | TO_SECONDS(950228) | - |----------------------------------+----------------------| - | 63390556800 | 62961148800 | - +----------------------------------+----------------------+ - - ### `UNIX_TIMESTAMP` @@ -1824,89 +887,11 @@ Example: +--------------------------+-----------------------------+ -### `UTC_DATE` - -**Description:** - - -Returns the current UTC date as a value in 'YYYY-MM-DD'. - -Return type: DATE - -Specification: UTC_DATE() -> DATE - -Example: - - > source=people | eval `UTC_DATE()` = UTC_DATE() | fields `UTC_DATE()` - fetched rows / total rows = 1/1 - +--------------+ - | UTC_DATE() | - |--------------| - | 2022-10-03 | - +--------------+ - - -### `UTC_TIME` - - -**Description:** - - -Returns the current UTC time as a value in 'hh:mm:ss'. - -Return type: TIME - -Specification: UTC_TIME() -> TIME - -Example: - - > source=people | eval `UTC_TIME()` = UTC_TIME() | fields `UTC_TIME()` - fetched rows / total rows = 1/1 - +--------------+ - | UTC_TIME() | - |--------------| - | 17:54:27 | - +--------------+ - - -### `UTC_TIMESTAMP` - -**Description:** - - -Returns the current UTC timestamp as a value in 'YYYY-MM-DD hh:mm:ss'. - -Return type: TIMESTAMP - -Specification: UTC_TIMESTAMP() -> TIMESTAMP - -Example: - - > source=people | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()` - fetched rows / total rows = 1/1 - +---------------------+ - | UTC_TIMESTAMP() | - |---------------------| - | 2022-10-03 17:54:28 | - +---------------------+ - - ### `WEEK` **Description:** -**Usage:** week(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. - -| Mode | First day of week | Range | Week 1 is the first week... | -|------|-------------------|-------|-----------------------------| -| 0 | Sunday | 0-53 | with a Sunday in this year | -| 1 | Monday | 0-53 | with 4 or more days this year | -| 2 | Sunday | 1-53 | with a Sunday in this year | -| 3 | Monday | 1-53 | with 4 or more days this year | -| 4 | Sunday | 0-53 | with 4 or more days this year | -| 5 | Monday | 0-53 | with a Monday in this year | -| 6 | Sunday | 1-53 | with 4 or more days this year | -| 7 | Monday | 1-53 | with a Monday in this year | +**Usage:** week(date) returns the week number for date. Argument type: DATE/TIMESTAMP/STRING @@ -1917,13 +902,13 @@ Synonyms: `WEEK_OF_YEAR`_ Example: - os> source=people | eval `WEEK(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK(DATE('2008-02-20'), 1)` = WEEK(DATE('2008-02-20'), 1) | fields `WEEK(DATE('2008-02-20'))`, `WEEK(DATE('2008-02-20'), 1)` + os> source=people | eval `WEEK(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')) | fields `WEEK(DATE('2008-02-20'))` fetched rows / total rows = 1/1 - +----------------------------+-------------------------------+ - | WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | - |----------------------------+-------------------------------| - | 7 | 8 | - +----------------------------+-------------------------------+ + +----------------------------+ + | WEEK(DATE('2008-02-20')) | + |----------------------------+ + | 8 | + +----------------------------+ ### `WEEKDAY` @@ -1955,18 +940,7 @@ Example: **Description:** -**Usage:** week_of_year(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. - -| Mode | First day of week | Range | Week 1 is the first week ... | -|------|-------------------|-------|------------------------------| -| 0 | Sunday | 0-53 | with a Sunday in this year | -| 1 | Monday | 0-53 | with 4 or more days this year| -| 2 | Sunday | 1-53 | with a Sunday in this year | -| 3 | Monday | 1-53 | with 4 or more days this year| -| 4 | Sunday | 0-53 | with 4 or more days this year| -| 5 | Monday | 0-53 | with a Monday in this year | -| 6 | Sunday | 1-53 | with 4 or more days this year| -| 7 | Monday | 1-53 | with a Monday in this year | +**Usage:** week_of_year(date) returns the week number for date. Argument type: DATE/TIMESTAMP/STRING @@ -1977,13 +951,13 @@ Synonyms: `WEEK`_ Example: - os> source=people | eval `WEEK_OF_YEAR(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` = WEEK_OF_YEAR(DATE('2008-02-20'), 1) | fields `WEEK_OF_YEAR(DATE('2008-02-20'))`, `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` + os> source=people | eval `WEEK_OF_YEAR(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20'))| fields `WEEK_OF_YEAR(DATE('2008-02-20'))` fetched rows / total rows = 1/1 - +------------------------------------+---------------------------------------+ - | WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | - |------------------------------------+---------------------------------------| - | 7 | 8 | - +------------------------------------+---------------------------------------+ + +------------------------------------+ + | WEEK_OF_YEAR(DATE('2008-02-20')) | + |------------------------------------+ + | 8 | + +------------------------------------+ ### `YEAR` @@ -2008,26 +982,3 @@ Example: +----------------------------+ -### `YEARWEEK` - - -**Description:** - - -**Usage:** yearweek(date) returns the year and week for date as an integer. It accepts and optional mode arguments aligned with those available for the `WEEK`_ function. - -Argument type: STRING/DATE/TIME/TIMESTAMP - -Return type: INTEGER - -Example: - - os> source=people | eval `YEARWEEK('2020-08-26')` = YEARWEEK('2020-08-26') | eval `YEARWEEK('2019-01-05', 1)` = YEARWEEK('2019-01-05', 1) | fields `YEARWEEK('2020-08-26')`, `YEARWEEK('2019-01-05', 1)` - fetched rows / total rows = 1/1 - +--------------------------+-----------------------------+ - | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 1) | - |--------------------------+-----------------------------| - | 202034 | 201901 | - +--------------------------+-----------------------------+ - - diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintPPLSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintPPLSuite.scala index 1ece33ce1..26940020f 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintPPLSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintPPLSuite.scala @@ -8,6 +8,7 @@ package org.opensearch.flint.spark.ppl import org.opensearch.flint.spark.{FlintPPLSparkExtensions, FlintSparkExtensions, FlintSparkSuite} import org.apache.spark.SparkConf +import org.apache.spark.sql.{DataFrame, QueryTest, Row} import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation import org.apache.spark.sql.flint.config.FlintSparkConf.OPTIMIZER_RULE_ENABLED @@ -24,4 +25,15 @@ trait FlintPPLSuite extends FlintSparkSuite { .set(OPTIMIZER_RULE_ENABLED.key, "false") conf } + + def assertSameRows(expected: Seq[Row], df: DataFrame): Unit = { + QueryTest.sameRows(expected, df.collect().toSeq).foreach { results => + fail(s""" + |Results do not match for query: + |${df.queryExecution} + |== Results == + |$results + """.stripMargin) + } + } } diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltInDateTimeFunctionITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltInDateTimeFunctionITSuite.scala new file mode 100644 index 000000000..71ed72814 --- /dev/null +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltInDateTimeFunctionITSuite.scala @@ -0,0 +1,517 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.flint.spark.ppl + +import java.sql.{Date, Timestamp} + +import org.opensearch.sql.ppl.utils.DataTypeTransformer.seq + +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation} +import org.apache.spark.sql.catalyst.expressions.{GreaterThan, Literal} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} +import org.apache.spark.sql.streaming.StreamTest + +class FlintSparkPPLBuiltInDateTimeFunctionITSuite + extends QueryTest + with LogicalPlanTestUtils + with FlintPPLSuite + with StreamTest { + + /** Test table and index name */ + private val testTable = "spark_catalog.default.flint_ppl_test" + + override def beforeAll(): Unit = { + super.beforeAll() + + // Create test table + createPartitionedStateCountryTable(testTable) + } + + protected override def afterEach(): Unit = { + super.afterEach() + // Stop all streaming jobs if any + spark.streams.active.foreach { job => + job.stop() + job.awaitTermination() + } + } + + test("test adddate(date, numDays)") { + val frame = sql(s""" + | source = $testTable + | | eval `'2020-08-26' + 1` = ADDDATE(DATE('2020-08-26'), 1), `'2020-08-26' + (-1)` = ADDDATE(DATE('2020-08-26'), -1) + | | fields `'2020-08-26' + 1`, `'2020-08-26' + (-1)` | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(Date.valueOf("2020-08-27"), Date.valueOf("2020-08-25"))), frame) + } + + test("test subdate(date, numDays)") { + val frame = sql(s""" + | source = $testTable + | | eval `'2020-08-26' - 1` = SUBDATE(DATE('2020-08-26'), 1), `'2020-08-26' - (-1)` = SUBDATE(DATE('2020-08-26'), -1) + | | fields `'2020-08-26' - 1`, `'2020-08-26' - (-1)` | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(Date.valueOf("2020-08-25"), Date.valueOf("2020-08-27"))), frame) + } + + test("test CURRENT_DATE, CURDATE are synonyms") { + val frame = sql(s""" + | source = $testTable + | | eval `CURRENT_DATE` = CURRENT_DATE(), `CURDATE` = CURDATE() + | | where CURRENT_DATE = CURDATE + | | fields CURRENT_DATE, CURDATE | head 1 + | """.stripMargin) + val results: Array[Row] = frame.collect() + assert(results.length == 1) + } + + test("test LOCALTIME, LOCALTIMESTAMP, NOW are synonyms") { + val frame = sql(s""" + | source = $testTable + | | eval `LOCALTIME` = LOCALTIME(), `LOCALTIMESTAMP` = LOCALTIMESTAMP(), `NOW` = NOW() + | | where LOCALTIME = LOCALTIMESTAMP and LOCALTIME = NOW + | | fields LOCALTIME, LOCALTIMESTAMP, NOW | head 1 + | """.stripMargin) + val results: Array[Row] = frame.collect() + assert(results.length == 1) + } + + test("test DATE, TIMESTAMP") { + val frame = sql(s""" + | source = $testTable + | | eval `DATE('2020-08-26')` = DATE('2020-08-26') + | | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) + | | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') + | | fields `DATE('2020-08-26')`, `DATE(TIMESTAMP('2020-08-26 13:49:00'))`, `DATE('2020-08-26 13:49')` + | | head 1 + | """.stripMargin) + assertSameRows( + Seq( + Row(Date.valueOf("2020-08-26"), Date.valueOf("2020-08-26"), Date.valueOf("2020-08-26"))), + frame) + } + + test("test DATE_FORMAT") { + val frame = sql(s""" + | source = $testTable + | | eval format1 = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a') + | | eval format2 = DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS') + | | fields format1, format2 + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row("1998-Jan-31 01:14:15 PM", "13:14:15.012345")), frame) + } + + test("test DATEDIFF") { + val frame = sql(s""" + | source = $testTable + | | eval diff1 = DATEDIFF(DATE('2020-08-27'), DATE('2020-08-26')) + | | eval diff2 = DATEDIFF(DATE('2020-08-26'), DATE('2020-08-27')) + | | eval diff3 = DATEDIFF(DATE('2020-08-27'), DATE('2020-08-27')) + | | eval diff4 = DATEDIFF(DATE('2020-08-26'), '2020-08-27') + | | eval diff5 = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')) + | | eval diff6 = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')) + | | fields diff1, diff2, diff3, diff4, diff5, diff6 + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(1, -1, 0, -1, 1, -1064)), frame) + } + + test("test DAY, DAYOFMONTH, DAY_OF_MONTH are synonyms") { + val frame = sql(s""" + | source = $testTable + | | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) + | | eval `DAYOFMONTH(DATE('2020-08-26'))` = DAYOFMONTH(DATE('2020-08-26')) + | | eval `DAY_OF_MONTH(DATE('2020-08-26'))` = DAY_OF_MONTH(DATE('2020-08-26')) + | | fields `DAY(DATE('2020-08-26'))`, `DAYOFMONTH(DATE('2020-08-26'))`, `DAY_OF_MONTH(DATE('2020-08-26'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(26, 26, 26)), frame) + } + + test("test DAYOFWEEK, DAY_OF_WEEK are synonyms") { + val frame = sql(s""" + | source = $testTable + | | eval `DAYOFWEEK(DATE('2020-08-26'))` = DAYOFWEEK(DATE('2020-08-26')) + | | eval `DAY_OF_WEEK(DATE('2020-08-26'))` = DAY_OF_WEEK(DATE('2020-08-26')) + | | fields `DAYOFWEEK(DATE('2020-08-26'))`, `DAY_OF_WEEK(DATE('2020-08-26'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(4, 4)), frame) + } + + test("test DAYOFYEAR, DAY_OF_YEAR are synonyms") { + val frame = sql(s""" + | source = $testTable + | | eval `DAY_OF_YEAR(DATE('2020-08-26'))` = DAY_OF_YEAR(DATE('2020-08-26')) + | | eval `DAYOFYEAR(DATE('2020-08-26'))` = DAYOFYEAR(DATE('2020-08-26')) + | | fields `DAY_OF_YEAR(DATE('2020-08-26'))`, `DAYOFYEAR(DATE('2020-08-26'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(239, 239)), frame) + } + + test("test WEEK, WEEK_OF_YEAR are synonyms") { + val frame = sql(s""" + | source = $testTable + | | eval `WEEK(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')) + | | eval `WEEK_OF_YEAR(DATE('2008-02-20'))` = WEEK_OF_YEAR(DATE('2008-02-20')) + | | fields `WEEK(DATE('2008-02-20'))`, `WEEK_OF_YEAR(DATE('2008-02-20'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(8, 8)), frame) + } + + test("test MONTH, MONTH_OF_YEAR are synonyms") { + val frame = sql(s""" + | source = $testTable + | | eval `MONTH(DATE('2020-08-26'))` = MONTH(DATE('2020-08-26')) + | | eval `MONTH_OF_YEAR(DATE('2020-08-26'))` = MONTH_OF_YEAR(DATE('2020-08-26')) + | | fields `MONTH(DATE('2020-08-26'))`, `MONTH_OF_YEAR(DATE('2020-08-26'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(8, 8)), frame) + } + test("test WEEKDAY") { + val frame = sql(s""" + | source = $testTable + | | eval `weekday(DATE('2020-08-26'))` = weekday(DATE('2020-08-26')) + | | eval `weekday(DATE('2020-08-27'))` = weekday(DATE('2020-08-27')) + | | fields `weekday(DATE('2020-08-26'))`, `weekday(DATE('2020-08-27'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(2, 3)), frame) + } + + test("test YEAR") { + val frame = sql(s""" + | source = $testTable + | | eval `YEAR(DATE('2020-08-26'))` = YEAR(DATE('2020-08-26')) | fields `YEAR(DATE('2020-08-26'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(2020)), frame) + } + + test("test from_unixtime and unix_timestamp") { + val frame = sql(s""" + | source = $testTable |where unix_timestamp(from_unixtime(1700000001)) > 1700000000 | fields name, age + | """.stripMargin) + assertSameRows( + Seq(Row("Jake", 70), Row("Hello", 30), Row("John", 25), Row("Jane", 20)), + frame) + + val logicalPlan: LogicalPlan = frame.queryExecution.logical + val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) + val filterExpr = GreaterThan( + UnresolvedFunction( + "unix_timestamp", + seq(UnresolvedFunction("from_unixtime", seq(Literal(1700000001)), isDistinct = false)), + isDistinct = false), + Literal(1700000000)) + val filterPlan = Filter(filterExpr, table) + val projectList = Seq(UnresolvedAttribute("name"), UnresolvedAttribute("age")) + val expectedPlan = Project(projectList, filterPlan) + comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) + } + + test("test hour, minute, second, HOUR_OF_DAY, MINUTE_OF_HOUR") { + val frame = sql(s""" + | source = $testTable + | | eval h = hour(timestamp('01:02:03')), m = minute(timestamp('01:02:03')), s = second(timestamp('01:02:03')) + | | eval hs = hour('2024-07-30 01:02:03'), ms = minute('2024-07-30 01:02:03'), ss = second('01:02:03') + | | eval h_d = HOUR_OF_DAY(timestamp('01:02:03')), m_h = MINUTE_OF_HOUR(timestamp('01:02:03')), s_m = SECOND_OF_MINUTE(timestamp('01:02:03')) + | | fields h, m, s, hs, ms, ss, h_d, m_h, s_m | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(1, 2, 3, 1, 2, 3, 1, 2, 3)), frame) + } + + test("test LAST_DAY") { + val frame = sql(s""" + | source = $testTable + | | eval `last_day('2023-02-06')` = last_day('2023-02-06') + | | fields `last_day('2023-02-06')` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(Date.valueOf("2023-02-28"))), frame) + } + + test("test MAKE_DATE") { + val frame = sql(s""" + | source = $testTable + | | eval `MAKE_DATE(1945, 5, 9)` = MAKE_DATE(1945, 5, 9) | fields `MAKE_DATE(1945, 5, 9)` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(Date.valueOf("1945-05-09"))), frame) + } + + test("test QUARTER") { + val frame = sql(s""" + | source = $testTable + | | eval `QUARTER(DATE('2020-08-26'))` = QUARTER(DATE('2020-08-26')) | fields `QUARTER(DATE('2020-08-26'))` + | | head 1 + | """.stripMargin) + assertSameRows(Seq(Row(3)), frame) + } + + test("test CURRENT_TIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `CURRENT_TIME` = CURRENT_TIME() + | | fields CURRENT_TIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("CURRENT_TIME is not a builtin function of PPL")) + } + + test("test CONVERT_TZ is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `CONVERT_TZ` = CONVERT_TZ() + | | fields CONVERT_TZ | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("CONVERT_TZ is not a builtin function of PPL")) + } + + test("test ADDTIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `ADDTIME` = ADDTIME() + | | fields ADDTIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("ADDTIME is not a builtin function of PPL")) + } + + test("test DATE_ADD is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `DATE_ADD` = DATE_ADD() + | | fields DATE_ADD | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("DATE_ADD is not a builtin function of PPL")) + } + + test("test DATE_SUB is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `DATE_SUB` = DATE_SUB() + | | fields DATE_SUB | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("DATE_SUB is not a builtin function of PPL")) + } + + test("test DATETIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `DATETIME` = DATETIME() + | | fields DATETIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("DATETIME is not a builtin function of PPL")) + } + + test("test DAYNAME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `DAYNAME` = DAYNAME() + | | fields DAYNAME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("DAYNAME is not a builtin function of PPL")) + } + + test("test FROM_DAYS is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `FROM_DAYS` = FROM_DAYS() + | | fields FROM_DAYS | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("FROM_DAYS is not a builtin function of PPL")) + } + + test("test GET_FORMAT is not supported") { + intercept[Exception](sql(s""" + | source = $testTable + | | eval `GET_FORMAT` = GET_FORMAT(DATE, 'USA') + | | fields GET_FORMAT | head 1 + | """.stripMargin)) + } + + test("test MAKETIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `MAKETIME` = MAKETIME() + | | fields MAKETIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("MAKETIME is not a builtin function of PPL")) + } + + test("test MICROSECOND is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `MICROSECOND` = MICROSECOND() + | | fields MICROSECOND | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("MICROSECOND is not a builtin function of PPL")) + } + + test("test MINUTE_OF_DAY is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `MINUTE_OF_DAY` = MINUTE_OF_DAY() + | | fields MINUTE_OF_DAY | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("MINUTE_OF_DAY is not a builtin function of PPL")) + } + + test("test PERIOD_ADD is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `PERIOD_ADD` = PERIOD_ADD() + | | fields PERIOD_ADD | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("PERIOD_ADD is not a builtin function of PPL")) + } + + test("test PERIOD_DIFF is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `PERIOD_DIFF` = PERIOD_DIFF() + | | fields PERIOD_DIFF | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("PERIOD_DIFF is not a builtin function of PPL")) + } + + test("test SEC_TO_TIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `SEC_TO_TIME` = SEC_TO_TIME() + | | fields SEC_TO_TIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("SEC_TO_TIME is not a builtin function of PPL")) + } + + test("test STR_TO_DATE is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `STR_TO_DATE` = STR_TO_DATE() + | | fields STR_TO_DATE | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("STR_TO_DATE is not a builtin function of PPL")) + } + + test("test SUBTIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `SUBTIME` = SUBTIME() + | | fields SUBTIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("SUBTIME is not a builtin function of PPL")) + } + + test("test TIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `TIME` = TIME() + | | fields TIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("TIME is not a builtin function of PPL")) + } + + test("test TIME_FORMAT is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `TIME_FORMAT` = TIME_FORMAT() + | | fields TIME_FORMAT | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("TIME_FORMAT is not a builtin function of PPL")) + } + + test("test TIME_TO_SEC is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `TIME_TO_SEC` = TIME_TO_SEC() + | | fields TIME_TO_SEC | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("TIME_TO_SEC is not a builtin function of PPL")) + } + + test("test TIMEDIFF is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `TIMEDIFF` = TIMEDIFF() + | | fields TIMEDIFF | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("TIMEDIFF is not a builtin function of PPL")) + } + + test("test TIMESTAMPADD is not supported") { + intercept[Exception](sql(s""" + | source = $testTable + | | eval `TIMESTAMPADD` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') + | | fields TIMESTAMPADD | head 1 + | """.stripMargin)) + } + + test("test TIMESTAMPDIFF is not supported") { + intercept[Exception](sql(s""" + | source = $testTable + | | eval `TIMESTAMPDIFF_1` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') + | | fields TIMESTAMPDIFF_1 | head 1 + | """.stripMargin)) + } + + test("test TO_DAYS is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `TO_DAYS` = TO_DAYS() + | | fields TO_DAYS | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("TO_DAYS is not a builtin function of PPL")) + } + + test("test TO_SECONDS is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `TO_SECONDS` = TO_SECONDS() + | | fields TO_SECONDS | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("TO_SECONDS is not a builtin function of PPL")) + } + + test("test UTC_DATE is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `UTC_DATE` = UTC_DATE() + | | fields UTC_DATE | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("UTC_DATE is not a builtin function of PPL")) + } + + test("test UTC_TIME is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `UTC_TIME` = UTC_TIME() + | | fields UTC_TIME | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("UTC_TIME is not a builtin function of PPL")) + } + + test("test UTC_TIMESTAMP is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `UTC_TIMESTAMP` = UTC_TIMESTAMP() + | | fields UTC_TIMESTAMP | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("UTC_TIMESTAMP is not a builtin function of PPL")) + } + + test("test YEARWEEK is not supported") { + val ex = intercept[UnsupportedOperationException](sql(s""" + | source = $testTable + | | eval `YEARWEEK` = YEARWEEK() + | | fields YEARWEEK | head 1 + | """.stripMargin)) + assert(ex.getMessage.contains("YEARWEEK is not a builtin function of PPL")) + } +} diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala index 4c35549df..67e799c00 100644 --- a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala +++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLBuiltinFunctionITSuite.scala @@ -605,31 +605,6 @@ class FlintSparkPPLBuiltinFunctionITSuite comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) } - test("test time functions - from_unixtime and unix_timestamp") { - val frame = sql(s""" - | source = $testTable |where unix_timestamp(from_unixtime(1700000001)) > 1700000000 | fields name, age - | """.stripMargin) - - val results: Array[Row] = frame.collect() - val expectedResults: Array[Row] = - Array(Row("Jake", 70), Row("Hello", 30), Row("John", 25), Row("Jane", 20)) - implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0)) - assert(results.sorted.sameElements(expectedResults.sorted)) - - val logicalPlan: LogicalPlan = frame.queryExecution.logical - val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test")) - val filterExpr = GreaterThan( - UnresolvedFunction( - "unix_timestamp", - seq(UnresolvedFunction("from_unixtime", seq(Literal(1700000001)), isDistinct = false)), - isDistinct = false), - Literal(1700000000)) - val filterPlan = Filter(filterExpr, table) - val projectList = Seq(UnresolvedAttribute("name"), UnresolvedAttribute("age")) - val expectedPlan = Project(projectList, filterPlan) - comparePlans(logicalPlan, expectedPlan, checkAnalysis = false) - } - test("test arithmetic operators (+ - * / %)") { val frame = sql(s""" | source = $testTable | where (sqrt(pow(age, 2)) + sqrt(pow(age, 2)) / 1 - sqrt(pow(age, 2)) * 1) % 25.0 = 0 | fields name, age diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 index 2b916a245..2b41530f0 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -308,6 +308,7 @@ LAST_DAY: 'LAST_DAY'; LOCALTIME: 'LOCALTIME'; LOCALTIMESTAMP: 'LOCALTIMESTAMP'; MAKEDATE: 'MAKEDATE'; +MAKE_DATE: 'MAKE_DATE'; MAKETIME: 'MAKETIME'; MONTHNAME: 'MONTHNAME'; NOW: 'NOW'; diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 index c205fc236..e0672690d 100644 --- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 @@ -654,6 +654,7 @@ dateTimeFunctionName | LOCALTIME | LOCALTIMESTAMP | MAKEDATE + | MAKE_DATE | MAKETIME | MICROSECOND | MINUTE diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 6b549663a..5d92f4b58 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -54,16 +54,16 @@ public enum BuiltinFunctionName { /** Date and Time Functions. */ ADDDATE(FunctionName.of("adddate")), - ADDTIME(FunctionName.of("addtime")), - CONVERT_TZ(FunctionName.of("convert_tz")), +// ADDTIME(FunctionName.of("addtime")), +// CONVERT_TZ(FunctionName.of("convert_tz")), DATE(FunctionName.of("date")), DATEDIFF(FunctionName.of("datediff")), - DATETIME(FunctionName.of("datetime")), - DATE_ADD(FunctionName.of("date_add")), +// DATETIME(FunctionName.of("datetime")), +// DATE_ADD(FunctionName.of("date_add")), DATE_FORMAT(FunctionName.of("date_format")), - DATE_SUB(FunctionName.of("date_sub")), +// DATE_SUB(FunctionName.of("date_sub")), DAY(FunctionName.of("day")), - DAYNAME(FunctionName.of("dayname")), +// DAYNAME(FunctionName.of("dayname")), DAYOFMONTH(FunctionName.of("dayofmonth")), DAY_OF_MONTH(FunctionName.of("day_of_month")), DAYOFWEEK(FunctionName.of("dayofweek")), @@ -71,56 +71,57 @@ public enum BuiltinFunctionName { DAY_OF_WEEK(FunctionName.of("day_of_week")), DAY_OF_YEAR(FunctionName.of("day_of_year")), EXTRACT(FunctionName.of("extract")), - FROM_DAYS(FunctionName.of("from_days")), +// FROM_DAYS(FunctionName.of("from_days")), FROM_UNIXTIME(FunctionName.of("from_unixtime")), - GET_FORMAT(FunctionName.of("get_format")), +// GET_FORMAT(FunctionName.of("get_format")), HOUR(FunctionName.of("hour")), HOUR_OF_DAY(FunctionName.of("hour_of_day")), LAST_DAY(FunctionName.of("last_day")), MAKEDATE(FunctionName.of("makedate")), - MAKETIME(FunctionName.of("maketime")), - MICROSECOND(FunctionName.of("microsecond")), + MAKE_DATE(FunctionName.of("make_date")), +// MAKETIME(FunctionName.of("maketime")), +// MICROSECOND(FunctionName.of("microsecond")), MINUTE(FunctionName.of("minute")), - MINUTE_OF_DAY(FunctionName.of("minute_of_day")), +// MINUTE_OF_DAY(FunctionName.of("minute_of_day")), MINUTE_OF_HOUR(FunctionName.of("minute_of_hour")), MONTH(FunctionName.of("month")), MONTH_OF_YEAR(FunctionName.of("month_of_year")), MONTHNAME(FunctionName.of("monthname")), - PERIOD_ADD(FunctionName.of("period_add")), - PERIOD_DIFF(FunctionName.of("period_diff")), +// PERIOD_ADD(FunctionName.of("period_add")), +// PERIOD_DIFF(FunctionName.of("period_diff")), QUARTER(FunctionName.of("quarter")), - SEC_TO_TIME(FunctionName.of("sec_to_time")), +// SEC_TO_TIME(FunctionName.of("sec_to_time")), SECOND(FunctionName.of("second")), SECOND_OF_MINUTE(FunctionName.of("second_of_minute")), - STR_TO_DATE(FunctionName.of("str_to_date")), +// STR_TO_DATE(FunctionName.of("str_to_date")), SUBDATE(FunctionName.of("subdate")), - SUBTIME(FunctionName.of("subtime")), - TIME(FunctionName.of("time")), - TIMEDIFF(FunctionName.of("timediff")), - TIME_TO_SEC(FunctionName.of("time_to_sec")), +// SUBTIME(FunctionName.of("subtime")), +// TIME(FunctionName.of("time")), +// TIMEDIFF(FunctionName.of("timediff")), +// TIME_TO_SEC(FunctionName.of("time_to_sec")), TIMESTAMP(FunctionName.of("timestamp")), - TIMESTAMPADD(FunctionName.of("timestampadd")), - TIMESTAMPDIFF(FunctionName.of("timestampdiff")), - TIME_FORMAT(FunctionName.of("time_format")), - TO_DAYS(FunctionName.of("to_days")), - TO_SECONDS(FunctionName.of("to_seconds")), - UTC_DATE(FunctionName.of("utc_date")), - UTC_TIME(FunctionName.of("utc_time")), - UTC_TIMESTAMP(FunctionName.of("utc_timestamp")), +// TIMESTAMPADD(FunctionName.of("timestampadd")), +// TIMESTAMPDIFF(FunctionName.of("timestampdiff")), +// TIME_FORMAT(FunctionName.of("time_format")), +// TO_DAYS(FunctionName.of("to_days")), +// TO_SECONDS(FunctionName.of("to_seconds")), +// UTC_DATE(FunctionName.of("utc_date")), +// UTC_TIME(FunctionName.of("utc_time")), +// UTC_TIMESTAMP(FunctionName.of("utc_timestamp")), UNIX_TIMESTAMP(FunctionName.of("unix_timestamp")), WEEK(FunctionName.of("week")), WEEKDAY(FunctionName.of("weekday")), WEEKOFYEAR(FunctionName.of("weekofyear")), WEEK_OF_YEAR(FunctionName.of("week_of_year")), YEAR(FunctionName.of("year")), - YEARWEEK(FunctionName.of("yearweek")), +// YEARWEEK(FunctionName.of("yearweek")), // `now`-like functions NOW(FunctionName.of("now")), CURDATE(FunctionName.of("curdate")), CURRENT_DATE(FunctionName.of("current_date")), - CURTIME(FunctionName.of("curtime")), - CURRENT_TIME(FunctionName.of("current_time")), +// CURTIME(FunctionName.of("curtime")), +// CURRENT_TIME(FunctionName.of("current_time")), LOCALTIME(FunctionName.of("localtime")), CURRENT_TIMESTAMP(FunctionName.of("current_timestamp")), LOCALTIMESTAMP(FunctionName.of("localtimestamp")), diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java index d817305a9..7c5b0fad1 100644 --- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java +++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/BuiltinFunctionTranslator.java @@ -33,6 +33,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SECOND_OF_MINUTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBDATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SYSDATE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRIM; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; @@ -66,7 +67,8 @@ public interface BuiltinFunctionTranslator { .put(ADDDATE, "date_add") // only maps adddate(date, days) .put(DATEDIFF, "datediff") .put(LOCALTIME, "localtimestamp") - //condition functions + .put(SYSDATE, "now") + // condition functions .put(IS_NULL, "isnull") .put(IS_NOT_NULL, "isnotnull") .put(BuiltinFunctionName.ISPRESENT, "isnotnull")