From 9968a32594d48fea433da7e7dad815518b811cc6 Mon Sep 17 00:00:00 2001 From: Marko Nikacevic Date: Tue, 29 Oct 2024 14:30:29 +0100 Subject: [PATCH] [SPARK-50071][SQL][PYTHON] Add try_make_timestamp(_ltz and _ntz) and related tests ### What changes were proposed in this pull request? This PR adds try_make_timestamp, try_make_timestamp_ntz and try_make_timestamp_ltz expressions that set failOnError to false by default (ANSI compliant versions of related functions). ### Why are the changes needed? We need these functions in order to provide an alternative for the standard functions, that do not throw an error when ANSI is enabled. ### Does this PR introduce _any_ user-facing change? Yes, new expressions added. ### How was this patch tested? Unit tests were added. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48624 from markonik-db/SPARK-50071-tryMakeTimestamp. Authored-by: Marko Nikacevic Signed-off-by: Max Gekk --- .../spark/sql/PlanGenerationTestSuite.scala | 52 ++++ docs/sql-ref-ansi-compliance.md | 4 + .../reference/pyspark.sql/functions.rst | 3 + .../pyspark/sql/connect/functions/builtin.py | 60 ++++ python/pyspark/sql/functions/builtin.py | 282 ++++++++++++++++++ python/pyspark/sql/tests/test_functions.py | 55 ++++ .../org/apache/spark/sql/functions.scala | 84 ++++++ .../catalyst/analysis/FunctionRegistry.scala | 5 + .../expressions/datetimeExpressions.scala | 181 +++++++++++ ...y_make_timestamp_ltz_with_timezone.explain | 2 + ...ake_timestamp_ltz_without_timezone.explain | 2 + .../function_try_make_timestamp_ntz.explain | 2 + ...n_try_make_timestamp_with_timezone.explain | 2 + ...ry_make_timestamp_without_timezone.explain | 2 + ..._try_make_timestamp_ltz_with_timezone.json | 49 +++ ...make_timestamp_ltz_with_timezone.proto.bin | Bin 0 -> 233 bytes ...y_make_timestamp_ltz_without_timezone.json | 45 +++ ...e_timestamp_ltz_without_timezone.proto.bin | Bin 0 -> 226 bytes .../function_try_make_timestamp_ntz.json | 45 +++ .../function_try_make_timestamp_ntz.proto.bin | Bin 0 -> 226 bytes ...tion_try_make_timestamp_with_timezone.json | 49 +++ ...try_make_timestamp_with_timezone.proto.bin | Bin 0 -> 229 bytes ...n_try_make_timestamp_without_timezone.json | 45 +++ ..._make_timestamp_without_timezone.proto.bin | Bin 0 -> 222 bytes .../sql-functions/sql-expression-schema.md | 3 + .../apache/spark/sql/DateFunctionsSuite.scala | 78 +++++ 26 files changed, 1050 insertions(+) create mode 100644 sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_with_timezone.explain create mode 100644 sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_without_timezone.explain create mode 100644 sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ntz.explain create mode 100644 sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_with_timezone.explain create mode 100644 sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_without_timezone.explain create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json create mode 100644 sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index 72f56f35bf935..a11de64ed61fe 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -1977,6 +1977,58 @@ class PlanGenerationTestSuite fn.col("b")) } + functionTest("try_make_timestamp with timezone") { + fn.try_make_timestamp( + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("b"), + fn.col("g")) + } + + functionTest("try_make_timestamp without timezone") { + fn.try_make_timestamp( + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("b")) + } + + functionTest("try_make_timestamp_ltz with timezone") { + fn.try_make_timestamp_ltz( + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("b"), + fn.col("g")) + } + + functionTest("try_make_timestamp_ltz without timezone") { + fn.try_make_timestamp_ltz( + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("b")) + } + + functionTest("try_make_timestamp_ntz") { + fn.try_make_timestamp_ntz( + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("a"), + fn.col("b")) + } + functionTest("make_ym_interval years months") { fn.make_ym_interval(fn.col("a"), fn.col("a")) } diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 200ddc9a20f3d..500b41f7569a3 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -380,6 +380,10 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound. - `try_to_timestamp`: identical to the function `to_timestamp`, except that it returns `NULL` result instead of throwing an exception on string parsing error. - `try_parse_url`: identical to the function `parse_url`, except that it returns `NULL` result instead of throwing an exception on url parsing error. + - `try_make_timestamp`: identical to the function `make_timestamp`, except that it returns `NULL` result instead of throwing an exception on error. + - `try_make_timestamp_ltz`: identical to the function `make_timestamp_ltz`, except that it returns `NULL` result instead of throwing an exception on error. + - `try_make_timestamp_ntz`: identical to the function `make_timestamp_ntz`, except that it returns `NULL` result instead of throwing an exception on error. + ### SQL Keywords (optional, disabled by default) diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index bf73fec58280d..b9df5691b82a9 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -301,6 +301,9 @@ Date and Timestamp Functions to_unix_timestamp to_utc_timestamp trunc + try_make_timestamp + try_make_timestamp_ltz + try_make_timestamp_ntz try_to_timestamp unix_date unix_micros diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 850e863e90794..b8bd0e9bf7fdc 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -3759,6 +3759,28 @@ def make_timestamp( make_timestamp.__doc__ = pysparkfuncs.make_timestamp.__doc__ +def try_make_timestamp( + years: "ColumnOrName", + months: "ColumnOrName", + days: "ColumnOrName", + hours: "ColumnOrName", + mins: "ColumnOrName", + secs: "ColumnOrName", + timezone: Optional["ColumnOrName"] = None, +) -> Column: + if timezone is not None: + return _invoke_function_over_columns( + "try_make_timestamp", years, months, days, hours, mins, secs, timezone + ) + else: + return _invoke_function_over_columns( + "try_make_timestamp", years, months, days, hours, mins, secs + ) + + +try_make_timestamp.__doc__ = pysparkfuncs.try_make_timestamp.__doc__ + + def make_timestamp_ltz( years: "ColumnOrName", months: "ColumnOrName", @@ -3781,6 +3803,28 @@ def make_timestamp_ltz( make_timestamp_ltz.__doc__ = pysparkfuncs.make_timestamp_ltz.__doc__ +def try_make_timestamp_ltz( + years: "ColumnOrName", + months: "ColumnOrName", + days: "ColumnOrName", + hours: "ColumnOrName", + mins: "ColumnOrName", + secs: "ColumnOrName", + timezone: Optional["ColumnOrName"] = None, +) -> Column: + if timezone is not None: + return _invoke_function_over_columns( + "try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone + ) + else: + return _invoke_function_over_columns( + "try_make_timestamp_ltz", years, months, days, hours, mins, secs + ) + + +try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__ + + def make_timestamp_ntz( years: "ColumnOrName", months: "ColumnOrName", @@ -3797,6 +3841,22 @@ def make_timestamp_ntz( make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__ +def try_make_timestamp_ntz( + years: "ColumnOrName", + months: "ColumnOrName", + days: "ColumnOrName", + hours: "ColumnOrName", + mins: "ColumnOrName", + secs: "ColumnOrName", +) -> Column: + return _invoke_function_over_columns( + "try_make_timestamp_ntz", years, months, days, hours, mins, secs + ) + + +try_make_timestamp_ntz.__doc__ = pysparkfuncs.try_make_timestamp_ntz.__doc__ + + def make_ym_interval( years: Optional["ColumnOrName"] = None, months: Optional["ColumnOrName"] = None, diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index cf5862ada02f4..810c6731de9a7 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -21748,6 +21748,108 @@ def make_timestamp( ) +@_try_remote_functions +def try_make_timestamp( + years: "ColumnOrName", + months: "ColumnOrName", + days: "ColumnOrName", + hours: "ColumnOrName", + mins: "ColumnOrName", + secs: "ColumnOrName", + timezone: Optional["ColumnOrName"] = None, +) -> Column: + """ + Try to create timestamp from years, months, days, hours, mins, secs and timezone fields. + The result data type is consistent with the value of configuration `spark.sql.timestampType`. + The function returns NULL on invalid inputs. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + years : :class:`~pyspark.sql.Column` or column name + The year to represent, from 1 to 9999 + months : :class:`~pyspark.sql.Column` or column name + The month-of-year to represent, from 1 (January) to 12 (December) + days : :class:`~pyspark.sql.Column` or column name + The day-of-month to represent, from 1 to 31 + hours : :class:`~pyspark.sql.Column` or column name + The hour-of-day to represent, from 0 to 23 + mins : :class:`~pyspark.sql.Column` or column name + The minute-of-hour to represent, from 0 to 59 + secs : :class:`~pyspark.sql.Column` or column name + The second-of-minute and its micro-fraction to represent, from 0 to 60. + The value can be either an integer like 13 , or a fraction like 13.123. + If the sec argument equals to 60, the seconds field is set + to 0 and 1 minute is added to the final timestamp. + timezone : :class:`~pyspark.sql.Column` or column name, optional + The time zone identifier. For example, CET, UTC and etc. + + Returns + ------- + :class:`~pyspark.sql.Column` + A new column that contains a timestamp or NULL in case of an error. + + Examples + -------- + + Example 1: Make timestamp from years, months, days, hours, mins and secs. + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']], + ... ["year", "month", "day", "hour", "min", "sec", "timezone"]) + >>> df.select(sf.try_make_timestamp( + ... df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone) + ... ).show(truncate=False) + +----------------------------------------------------+ + |try_make_timestamp(year, month, day, hour, min, sec)| + +----------------------------------------------------+ + |2014-12-27 21:30:45.887 | + +----------------------------------------------------+ + + Example 2: Make timestamp without timezone. + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']], + ... ["year", "month", "day", "hour", "min", "sec", "timezone"]) + >>> df.select(sf.try_make_timestamp( + ... df.year, df.month, df.day, df.hour, df.min, df.sec) + ... ).show(truncate=False) + +----------------------------------------------------+ + |try_make_timestamp(year, month, day, hour, min, sec)| + +----------------------------------------------------+ + |2014-12-28 06:30:45.887 | + +----------------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") + + Example 3: Make timestamp with invalid input. + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887, 'CET']], + ... ["year", "month", "day", "hour", "min", "sec", "timezone"]) + >>> df.select(sf.try_make_timestamp( + ... df.year, df.month, df.day, df.hour, df.min, df.sec) + ... ).show(truncate=False) + +----------------------------------------------------+ + |try_make_timestamp(year, month, day, hour, min, sec)| + +----------------------------------------------------+ + |NULL | + +----------------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") + """ + if timezone is not None: + return _invoke_function_over_columns( + "try_make_timestamp", years, months, days, hours, mins, secs, timezone + ) + else: + return _invoke_function_over_columns( + "try_make_timestamp", years, months, days, hours, mins, secs + ) + + @_try_remote_functions def make_timestamp_ltz( years: "ColumnOrName", @@ -21834,6 +21936,108 @@ def make_timestamp_ltz( ) +@_try_remote_functions +def try_make_timestamp_ltz( + years: "ColumnOrName", + months: "ColumnOrName", + days: "ColumnOrName", + hours: "ColumnOrName", + mins: "ColumnOrName", + secs: "ColumnOrName", + timezone: Optional["ColumnOrName"] = None, +) -> Column: + """ + Try to create the current timestamp with local time zone from years, months, days, hours, mins, + secs and timezone fields. + The function returns NULL on invalid inputs. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + years : :class:`~pyspark.sql.Column` or column name + The year to represent, from 1 to 9999 + months : :class:`~pyspark.sql.Column` or column name + The month-of-year to represent, from 1 (January) to 12 (December) + days : :class:`~pyspark.sql.Column` or column name + The day-of-month to represent, from 1 to 31 + hours : :class:`~pyspark.sql.Column` or column name + The hour-of-day to represent, from 0 to 23 + mins : :class:`~pyspark.sql.Column` or column name + The minute-of-hour to represent, from 0 to 59 + secs : :class:`~pyspark.sql.Column` or column name + The second-of-minute and its micro-fraction to represent, from 0 to 60. + The value can be either an integer like 13 , or a fraction like 13.123. + If the sec argument equals to 60, the seconds field is set + to 0 and 1 minute is added to the final timestamp. + timezone : :class:`~pyspark.sql.Column` or column name, optional + The time zone identifier. For example, CET, UTC and etc. + + Returns + ------- + :class:`~pyspark.sql.Column` + A new column that contains a current timestamp, or NULL in case of an error. + + Examples + -------- + + Example 1: Make the current timestamp from years, months, days, hours, mins and secs. + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']], + ... ["year", "month", "day", "hour", "min", "sec", "timezone"]) + >>> df.select(sf.try_make_timestamp_ltz( + ... df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone) + ... ).show(truncate=False) + +------------------------------------------------------------------+ + |try_make_timestamp_ltz(year, month, day, hour, min, sec, timezone)| + +------------------------------------------------------------------+ + |2014-12-27 21:30:45.887 | + +------------------------------------------------------------------+ + + Example 2: Make the current timestamp without timezone. + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']], + ... ["year", "month", "day", "hour", "min", "sec", "timezone"]) + >>> df.select(sf.try_make_timestamp_ltz( + ... df.year, df.month, df.day, df.hour, df.min, df.sec) + ... ).show(truncate=False) + +--------------------------------------------------------+ + |try_make_timestamp_ltz(year, month, day, hour, min, sec)| + +--------------------------------------------------------+ + |2014-12-28 06:30:45.887 | + +--------------------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") + + Example 3: Make the current timestamp with invalid input. + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887, 'CET']], + ... ["year", "month", "day", "hour", "min", "sec", "timezone"]) + >>> df.select(sf.try_make_timestamp_ltz( + ... df.year, df.month, df.day, df.hour, df.min, df.sec) + ... ).show(truncate=False) + +--------------------------------------------------------+ + |try_make_timestamp_ltz(year, month, day, hour, min, sec)| + +--------------------------------------------------------+ + |NULL | + +--------------------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") + """ + if timezone is not None: + return _invoke_function_over_columns( + "try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone + ) + else: + return _invoke_function_over_columns( + "try_make_timestamp_ltz", years, months, days, hours, mins, secs + ) + + @_try_remote_functions def make_timestamp_ntz( years: "ColumnOrName", @@ -21897,6 +22101,84 @@ def make_timestamp_ntz( ) +@_try_remote_functions +def try_make_timestamp_ntz( + years: "ColumnOrName", + months: "ColumnOrName", + days: "ColumnOrName", + hours: "ColumnOrName", + mins: "ColumnOrName", + secs: "ColumnOrName", +) -> Column: + """ + Try to create local date-time from years, months, days, hours, mins, secs fields. + The function returns NULL on invalid inputs. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + years : :class:`~pyspark.sql.Column` or column name + The year to represent, from 1 to 9999 + months : :class:`~pyspark.sql.Column` or column name + The month-of-year to represent, from 1 (January) to 12 (December) + days : :class:`~pyspark.sql.Column` or column name + The day-of-month to represent, from 1 to 31 + hours : :class:`~pyspark.sql.Column` or column name + The hour-of-day to represent, from 0 to 23 + mins : :class:`~pyspark.sql.Column` or column name + The minute-of-hour to represent, from 0 to 59 + secs : :class:`~pyspark.sql.Column` or column name + The second-of-minute and its micro-fraction to represent, from 0 to 60. + The value can be either an integer like 13 , or a fraction like 13.123. + If the sec argument equals to 60, the seconds field is set + to 0 and 1 minute is added to the final timestamp. + + Returns + ------- + :class:`~pyspark.sql.Column` + A new column that contains a local date-time, or NULL in case of an error. + + Examples + -------- + + Example 1: Make local date-time from years, months, days, hours, mins, secs. + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]], + ... ["year", "month", "day", "hour", "min", "sec"]) + >>> df.select(sf.try_make_timestamp_ntz( + ... df.year, df.month, df.day, df.hour, df.min, df.sec) + ... ).show(truncate=False) + +--------------------------------------------------------+ + |try_make_timestamp_ntz(year, month, day, hour, min, sec)| + +--------------------------------------------------------+ + |2014-12-28 06:30:45.887 | + +--------------------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") + + Example 2: Make local date-time with invalid input + + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887]], + ... ["year", "month", "day", "hour", "min", "sec"]) + >>> df.select(sf.try_make_timestamp_ntz( + ... df.year, df.month, df.day, df.hour, df.min, df.sec) + ... ).show(truncate=False) + +--------------------------------------------------------+ + |try_make_timestamp_ntz(year, month, day, hour, min, sec)| + +--------------------------------------------------------+ + |NULL | + +--------------------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") + """ + return _invoke_function_over_columns( + "try_make_timestamp_ntz", years, months, days, hours, mins, secs + ) + + @_try_remote_functions def make_ym_interval( years: Optional["ColumnOrName"] = None, diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index c00a0e7febf67..74e043ca1e6e8 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -347,6 +347,61 @@ def test_try_parse_url(self): actual = df.select(F.try_parse_url(df.url, df.part, df.key)).collect() self.assertEqual(actual, [Row(None)]) + def test_try_make_timestamp(self): + data = [(2024, 5, 22, 10, 30, 0)] + df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"]) + actual = df.select( + F.try_make_timestamp(df.year, df.month, df.day, df.hour, df.minute, df.second) + ).collect() + self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))]) + + data = [(2024, 13, 22, 10, 30, 0)] + df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"]) + actual = df.select( + F.try_make_timestamp(df.year, df.month, df.day, df.hour, df.minute, df.second) + ).collect() + self.assertEqual(actual, [Row(None)]) + + def test_try_make_timestamp_ltz(self): + # use local timezone here to avoid flakiness + data = [(2024, 5, 22, 10, 30, 0, datetime.datetime.now().astimezone().tzinfo.__str__())] + df = self.spark.createDataFrame( + data, ["year", "month", "day", "hour", "minute", "second", "timezone"] + ) + actual = df.select( + F.try_make_timestamp_ltz( + df.year, df.month, df.day, df.hour, df.minute, df.second, df.timezone + ) + ).collect() + self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 0))]) + + # use local timezone here to avoid flakiness + data = [(2024, 13, 22, 10, 30, 0, datetime.datetime.now().astimezone().tzinfo.__str__())] + df = self.spark.createDataFrame( + data, ["year", "month", "day", "hour", "minute", "second", "timezone"] + ) + actual = df.select( + F.try_make_timestamp_ltz( + df.year, df.month, df.day, df.hour, df.minute, df.second, df.timezone + ) + ).collect() + self.assertEqual(actual, [Row(None)]) + + def test_try_make_timestamp_ntz(self): + data = [(2024, 5, 22, 10, 30, 0)] + df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"]) + actual = df.select( + F.try_make_timestamp_ntz(df.year, df.month, df.day, df.hour, df.minute, df.second) + ).collect() + self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))]) + + data = [(2024, 13, 22, 10, 30, 0)] + df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"]) + actual = df.select( + F.try_make_timestamp_ntz(df.year, df.month, df.day, df.hour, df.minute, df.second) + ).collect() + self.assertEqual(actual, [Row(None)]) + def test_string_functions(self): string_functions = [ "upper", diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index 0b295b8539410..d7b61468b43d7 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -8119,6 +8119,41 @@ object functions { secs: Column): Column = Column.fn("make_timestamp", years, months, days, hours, mins, secs) + /** + * Try to create a timestamp from years, months, days, hours, mins, secs and timezone fields. + * The result data type is consistent with the value of configuration `spark.sql.timestampType`. + * The function returns NULL on invalid inputs. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def try_make_timestamp( + years: Column, + months: Column, + days: Column, + hours: Column, + mins: Column, + secs: Column, + timezone: Column): Column = + Column.fn("try_make_timestamp", years, months, days, hours, mins, secs, timezone) + + /** + * Try to create a timestamp from years, months, days, hours, mins, and secs fields. The result + * data type is consistent with the value of configuration `spark.sql.timestampType`. The + * function returns NULL on invalid inputs. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def try_make_timestamp( + years: Column, + months: Column, + days: Column, + hours: Column, + mins: Column, + secs: Column): Column = + Column.fn("try_make_timestamp", years, months, days, hours, mins, secs) + /** * Create the current timestamp with local time zone from years, months, days, hours, mins, secs * and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function @@ -8154,6 +8189,39 @@ object functions { secs: Column): Column = Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs) + /** + * Try to create the current timestamp with local time zone from years, months, days, hours, + * mins, secs and timezone fields. The function returns NULL on invalid inputs. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def try_make_timestamp_ltz( + years: Column, + months: Column, + days: Column, + hours: Column, + mins: Column, + secs: Column, + timezone: Column): Column = + Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone) + + /** + * Try to create the current timestamp with local time zone from years, months, days, hours, + * mins and secs fields. The function returns NULL on invalid inputs. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def try_make_timestamp_ltz( + years: Column, + months: Column, + days: Column, + hours: Column, + mins: Column, + secs: Column): Column = + Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs) + /** * Create local date-time from years, months, days, hours, mins, secs fields. If the * configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. @@ -8171,6 +8239,22 @@ object functions { secs: Column): Column = Column.fn("make_timestamp_ntz", years, months, days, hours, mins, secs) + /** + * Try to create a local date-time from years, months, days, hours, mins, secs fields. The + * function returns NULL on invalid inputs. + * + * @group datetime_funcs + * @since 4.0.0 + */ + def try_make_timestamp_ntz( + years: Column, + months: Column, + days: Column, + hours: Column, + mins: Column, + secs: Column): Column = + Column.fn("try_make_timestamp_ntz", years, months, days, hours, mins, secs) + /** * Make year-month interval from years, months. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3836eabe6bec6..4ad0b81b8f269 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -665,10 +665,15 @@ object FunctionRegistry { expression[WindowTime]("window_time"), expression[MakeDate]("make_date"), expression[MakeTimestamp]("make_timestamp"), + expression[TryMakeTimestamp]("try_make_timestamp"), expression[MonthName]("monthname"), // We keep the 2 expression builders below to have different function docs. expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true), expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true), + expressionBuilder( + "try_make_timestamp_ntz", TryMakeTimestampNTZExpressionBuilder, setAlias = true), + expressionBuilder( + "try_make_timestamp_ltz", TryMakeTimestampLTZExpressionBuilder, setAlias = true), expression[MakeInterval]("make_interval"), expression[MakeDTInterval]("make_dt_interval"), expression[MakeYMInterval]("make_ym_interval"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index d0c4a53e491d8..7f615dbc245b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2561,6 +2561,53 @@ object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder { } } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(year, month, day, hour, min, sec) - Try to create local date-time from year, month, day, hour, min, sec fields. The function returns NULL on invalid inputs.", + arguments = """ + Arguments: + * year - the year to represent, from 1 to 9999 + * month - the month-of-year to represent, from 1 (January) to 12 (December) + * day - the day-of-month to represent, from 1 to 31 + * hour - the hour-of-day to represent, from 0 to 23 + * min - the minute-of-hour to represent, from 0 to 59 + * sec - the second-of-minute and its micro-fraction to represent, from + 0 to 60. If the sec argument equals to 60, the seconds field is set + to 0 and 1 minute is added to the final timestamp. + """, + examples = """ + Examples: + > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887); + 2014-12-28 06:30:45.887 + > SELECT _FUNC_(2019, 6, 30, 23, 59, 60); + 2019-07-01 00:00:00 + > SELECT _FUNC_(null, 7, 22, 15, 30, 0); + NULL + > SELECT _FUNC_(2024, 13, 22, 15, 30, 0); + NULL + """, + group = "datetime_funcs", + since = "4.0.0") +// scalastyle:on line.size.limit +object TryMakeTimestampNTZExpressionBuilder extends ExpressionBuilder { + override def build(funcName: String, expressions: Seq[Expression]): Expression = { + val numArgs = expressions.length + if (numArgs == 6) { + MakeTimestamp( + expressions(0), + expressions(1), + expressions(2), + expressions(3), + expressions(4), + expressions(5), + dataType = TimestampNTZType, + failOnError = false) + } else { + throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(6), numArgs) + } + } +} + // scalastyle:off line.size.limit @ExpressionDescription( usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create the current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.", @@ -2609,6 +2656,57 @@ object MakeTimestampLTZExpressionBuilder extends ExpressionBuilder { } } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Try to create the current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields. The function returns NULL on invalid inputs.", + arguments = """ + Arguments: + * year - the year to represent, from 1 to 9999 + * month - the month-of-year to represent, from 1 (January) to 12 (December) + * day - the day-of-month to represent, from 1 to 31 + * hour - the hour-of-day to represent, from 0 to 23 + * min - the minute-of-hour to represent, from 0 to 59 + * sec - the second-of-minute and its micro-fraction to represent, from + 0 to 60. If the sec argument equals to 60, the seconds field is set + to 0 and 1 minute is added to the final timestamp. + * timezone - the time zone identifier. For example, CET, UTC and etc. + """, + examples = """ + Examples: + > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887); + 2014-12-28 06:30:45.887 + > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET'); + 2014-12-27 21:30:45.887 + > SELECT _FUNC_(2019, 6, 30, 23, 59, 60); + 2019-07-01 00:00:00 + > SELECT _FUNC_(null, 7, 22, 15, 30, 0); + NULL + > SELECT _FUNC_(2024, 13, 22, 15, 30, 0); + NULL + """, + group = "datetime_funcs", + since = "4.0.0") +// scalastyle:on line.size.limit +object TryMakeTimestampLTZExpressionBuilder extends ExpressionBuilder { + override def build(funcName: String, expressions: Seq[Expression]): Expression = { + val numArgs = expressions.length + if (numArgs == 6 || numArgs == 7) { + MakeTimestamp( + expressions(0), + expressions(1), + expressions(2), + expressions(3), + expressions(4), + expressions(5), + expressions.drop(6).lastOption, + dataType = TimestampType, + failOnError = false) + } else { + throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(6), numArgs) + } + } +} + // scalastyle:off line.size.limit @ExpressionDescription( usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create timestamp from year, month, day, hour, min, sec and timezone fields. The result data type is consistent with the value of configuration `spark.sql.timestampType`. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.", @@ -2812,6 +2910,89 @@ case class MakeTimestamp( } } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Try to create a timestamp from year, month, day, hour, min, sec and timezone fields. The result data type is consistent with the value of configuration `spark.sql.timestampType`. The function returns NULL on invalid inputs.", + arguments = """ + Arguments: + * year - the year to represent, from 1 to 9999 + * month - the month-of-year to represent, from 1 (January) to 12 (December) + * day - the day-of-month to represent, from 1 to 31 + * hour - the hour-of-day to represent, from 0 to 23 + * min - the minute-of-hour to represent, from 0 to 59 + * sec - the second-of-minute and its micro-fraction to represent, from 0 to 60. + The value can be either an integer like 13 , or a fraction like 13.123. + If the sec argument equals to 60, the seconds field is set + to 0 and 1 minute is added to the final timestamp. + * timezone - the time zone identifier. For example, CET, UTC and etc. + """, + examples = """ + Examples: + > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887); + 2014-12-28 06:30:45.887 + > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET'); + 2014-12-27 21:30:45.887 + > SELECT _FUNC_(2019, 6, 30, 23, 59, 60); + 2019-07-01 00:00:00 + > SELECT _FUNC_(2019, 6, 30, 23, 59, 1); + 2019-06-30 23:59:01 + > SELECT _FUNC_(null, 7, 22, 15, 30, 0); + NULL + > SELECT _FUNC_(2024, 13, 22, 15, 30, 0); + NULL + """, + group = "datetime_funcs", + since = "4.0.0") +// scalastyle:on line.size.limit +case class TryMakeTimestamp( + year: Expression, + month: Expression, + day: Expression, + hour: Expression, + min: Expression, + sec: Expression, + timezone: Option[Expression], + timeZoneId: Option[String], + replacement: Expression) + extends RuntimeReplaceable with InheritAnalysisRules { + + private def this( + year: Expression, + month: Expression, + day: Expression, + hour: Expression, + min: Expression, + sec: Expression, + timezone: Option[Expression]) = this(year, month, day, hour, min, sec, timezone, None, + MakeTimestamp(year, month, day, hour, min, sec, timezone, None, failOnError = false)) + + def this( + year: Expression, + month: Expression, + day: Expression, + hour: Expression, + min: Expression, + sec: Expression, + timezone: Expression) = this(year, month, day, hour, min, sec, Some(timezone)) + + def this( + year: Expression, + month: Expression, + day: Expression, + hour: Expression, + min: Expression, + sec: Expression) = this(year, month, day, hour, min, sec, None) + + override def prettyName: String = "try_make_timestamp" + + override def parameters: Seq[Expression] = Seq( + year, month, day, hour, min, sec) + + override protected def withNewChildInternal(newChild: Expression): TryMakeTimestamp = { + copy(replacement = newChild) + } +} + object DatePart { def parseExtractField( diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_with_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_with_timezone.explain new file mode 100644 index 0000000000000..ec8a7336a9b71 --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_with_timezone.explain @@ -0,0 +1,2 @@ +Project [try_make_timestamp_ltz(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), Some(g#0), Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp_ltz(a, a, a, a, a, b, g)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_without_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_without_timezone.explain new file mode 100644 index 0000000000000..39f8095a1e095 --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_without_timezone.explain @@ -0,0 +1,2 @@ +Project [try_make_timestamp_ltz(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp_ltz(a, a, a, a, a, b)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ntz.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ntz.explain new file mode 100644 index 0000000000000..aa6613263622e --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ntz.explain @@ -0,0 +1,2 @@ +Project [try_make_timestamp_ntz(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampNTZType) AS try_make_timestamp_ntz(a, a, a, a, a, b)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_with_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_with_timezone.explain new file mode 100644 index 0000000000000..91d8e638750e6 --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_with_timezone.explain @@ -0,0 +1,2 @@ +Project [make_timestamp(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), Some(g#0), Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp(a, a, a, a, a, b)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_without_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_without_timezone.explain new file mode 100644 index 0000000000000..5bca1302ead5e --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_without_timezone.explain @@ -0,0 +1,2 @@ +Project [make_timestamp(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp(a, a, a, a, a, b)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json new file mode 100644 index 0000000000000..179f6e06988fc --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json @@ -0,0 +1,49 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_make_timestamp_ltz", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0c60ba1c7bf8433f58e3c7372cd923b2ce801d6 GIT binary patch literal 233 zcmd;L5@3{i#K<*?k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1K7|PcE^NqRRN( i#O&1glFZ!H;*!MNg7}=0Dj`-OW-i7=!c-DONjd;XtV6&6 literal 0 HcmV?d00001 diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json new file mode 100644 index 0000000000000..29aa2096c2273 --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json @@ -0,0 +1,45 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_make_timestamp_ltz", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..9caf6f6ba5285aea06bda2ed3462350672e4fe93 GIT binary patch literal 226 zcmd;L5@3|N&B!&0k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1G|_CoZv)qRRN( f#O&1glFZ!H;*!MNg7}=0Dj`-OW-i7=!c-CfXH-KC literal 0 HcmV?d00001 diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json new file mode 100644 index 0000000000000..6b8d31d0c58e5 --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json @@ -0,0 +1,45 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_make_timestamp_ntz", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d7e2a8029def537f48cb5a8b7abec88acebdcb4 GIT binary patch literal 226 zcmd;L5@3|N&B!&0k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1G|_CoZv)qRRN( f#O&1glFZ!H;*!MNg800WDj`-OW-i7=!c-CfXRbpJ literal 0 HcmV?d00001 diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json new file mode 100644 index 0000000000000..79e11efc20d41 --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json @@ -0,0 +1,49 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_make_timestamp", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..53b9839cf8c1fb0d68158c9f6f046287c9f66272 GIT binary patch literal 229 zcmd;L5@3|N$H+B_k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1KW5S1zHFqRRN( e#O&1glFZ!H;*!MN0wGo*W-i7=!c-DONjd=W+d=mL literal 0 HcmV?d00001 diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json new file mode 100644 index 0000000000000..39ce728a38862 --- /dev/null +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json @@ -0,0 +1,45 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_make_timestamp", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "b" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..74918d42f89c66313832a6e42c4ea00261fd071d GIT binary patch literal 222 zcmd;L5@3|N&d4>1k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1GY#J1(J;qRRN( b#O&1glFZ!H;*!MN0wGo*W-i7=!c-Cf9c4j8 literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 9006a20d13f08..27d9367c49e9f 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -354,6 +354,9 @@ | org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct | | org.apache.spark.sql.catalyst.expressions.TryDivide | try_divide | SELECT try_divide(3, 2) | struct | | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct | +| org.apache.spark.sql.catalyst.expressions.TryMakeTimestamp | try_make_timestamp | SELECT try_make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct | +| org.apache.spark.sql.catalyst.expressions.TryMakeTimestampLTZExpressionBuilder | try_make_timestamp_ltz | SELECT try_make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct | +| org.apache.spark.sql.catalyst.expressions.TryMakeTimestampNTZExpressionBuilder | try_make_timestamp_ntz | SELECT try_make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct | | org.apache.spark.sql.catalyst.expressions.TryMod | try_mod | SELECT try_mod(3, 2) | struct | | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.TryParseUrl | try_parse_url | SELECT try_parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 4cab05dfd2b9b..b65636dfcde07 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -1366,6 +1366,84 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(result1, result2) } + test("try_make_timestamp") { + val df = Seq((100, 11, 1, 12, 30, 01.001001, "UTC")). + toDF("year", "month", "day", "hour", "min", "sec", "timezone") + + val result1 = df.selectExpr("try_make_timestamp(year, month, day, hour, min, sec, timezone)") + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val result2 = df.select(make_timestamp( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"), col("timezone"))) + checkAnswer(result1, result2) + } + + val result3 = df.selectExpr("try_make_timestamp(year, month, day, hour, min, sec)") + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val result4 = df.select(make_timestamp( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"))) + checkAnswer(result3, result4) + } + + val result5 = df.selectExpr("try_make_timestamp(year, month, day, hour, min, sec)") + val result6 = df.select(try_make_timestamp( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"))) + checkAnswer(result5, result6) + } + + test("try_make_timestamp_ntz") { + val df = Seq((100, 11, 1, 12, 30, 01.001001)). + toDF("year", "month", "day", "hour", "min", "sec") + + val result1 = df.selectExpr( + "try_make_timestamp_ntz(year, month, day, hour, min, sec)") + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val result2 = df.select(make_timestamp_ntz( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"))) + checkAnswer(result1, result2) + } + + val result3 = df.selectExpr( + "try_make_timestamp_ntz(year, month, day, hour, min, sec)") + val result4 = df.select(try_make_timestamp_ntz( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"))) + checkAnswer(result3, result4) + } + + test("try_make_timestamp_ltz") { + val df = Seq((100, 11, 1, 12, 30, 01.001001, "UTC")). + toDF("year", "month", "day", "hour", "min", "sec", "timezone") + + val result1 = df.selectExpr( + "try_make_timestamp_ltz(year, month, day, hour, min, sec, timezone)") + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val result2 = df.select(make_timestamp_ltz( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"), col("timezone"))) + checkAnswer(result1, result2) + } + + val result3 = df.selectExpr( + "try_make_timestamp_ltz(year, month, day, hour, min, sec)") + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + val result4 = df.select(make_timestamp_ltz( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"))) + checkAnswer(result3, result4) + } + + val result5 = df.selectExpr( + "try_make_timestamp_ltz(year, month, day, hour, min, sec)") + val result6 = df.select(try_make_timestamp_ltz( + col("year"), col("month"), col("day"), col("hour"), + col("min"), col("sec"))) + checkAnswer(result5, result6) + } + test("make_ym_interval") { val df = Seq((100, 11)).toDF("year", "month")