From 30d0b011ef877969d070f7ec9f60a633cc2ed319 Mon Sep 17 00:00:00 2001 From: Karim Ramadan Date: Wed, 20 Nov 2024 17:00:57 +0100 Subject: [PATCH] [SPARK-50364][SQL] Implement serialization for LocalDateTime type in Row.jsonValue ### What changes were proposed in this pull request? In this Pull request, I propose to add a LocalDateTime serializer to the Row.jsonValue method ```scala case (d: LocalDateTime, _) => JString(timestampFormatter.format(d)) ``` In order to enable JSON serialization of _TimestampNTZType_ columns ### Why are the changes needed? Currently trying to serialize a Row containing a _TimestampNTZType_ column results in an error: ```java [FAILED_ROW_TO_JSON] Failed to convert the row value '2018-05-14T12:13' of the class class java.time.LocalDateTime to the target SQL type "TIMESTAMPNTZTYPE" in the JSON format. SQLSTATE: 2203G org.apache.spark.SparkIllegalArgumentException: [FAILED_ROW_TO_JSON] Failed to convert the row value '2018-05-14T12:13' of the class class java.time.LocalDateTime to the target SQL type "TIMESTAMPNTZTYPE" in the JSON format. SQLSTATE: 2203G at org.apache.spark.sql.Row.toJson$1(Row.scala:663) at org.apache.spark.sql.Row.toJson$1(Row.scala:651) at org.apache.spark.sql.Row.jsonValue(Row.scala:665) at org.apache.spark.sql.Row.jsonValue$(Row.scala:598) at org.apache.spark.sql.catalyst.expressions.GenericRow.jsonValue(rows.scala:28) at org.apache.spark.sql.RowJsonSuite.$anonfun$testJson$1(RowJsonSuite.scala:41) ``` How to reproduce the issue: ```scala import org.apache.spark.sql.Row import java.time.LocalDateTime val r = Row.fromSeq(LocalDateTime.of(2024,8,10,12,33) :: Nil) r.json r.prettyJson ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Tests were added to the already existing RowJsonSuite.scala class ### Was this patch authored or co-authored using generative AI tooling? No Closes #48905 from karim-ramadan/bugfix/LocalDateTime-support-in-Row-json. Authored-by: Karim Ramadan Signed-off-by: Max Gekk --- sql/api/src/main/scala/org/apache/spark/sql/Row.scala | 3 ++- .../src/test/scala/org/apache/spark/sql/RowJsonSuite.scala | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala index aa14115453aea..764bdb17b37e2 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import java.sql.{Date, Timestamp} -import java.time.{Instant, LocalDate} +import java.time.{Instant, LocalDate, LocalDateTime} import java.util.Base64 import scala.collection.mutable @@ -627,6 +627,7 @@ trait Row extends Serializable { case (d: Date, _) => JString(dateFormatter.format(d)) case (i: Instant, _) => JString(timestampFormatter.format(i)) case (t: Timestamp, _) => JString(timestampFormatter.format(t)) + case (d: LocalDateTime, _) => JString(timestampFormatter.format(d)) case (i: CalendarInterval, _) => JString(i.toString) case (a: Array[_], ArrayType(elementType, _)) => iteratorToJsonArray(a.iterator, elementType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala index cf50063baa13c..49138532616e7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql import java.sql.{Date, Timestamp} -import java.time.LocalDate +import java.time.{LocalDate, LocalDateTime} import org.json4s.JsonAST.{JArray, JBool, JDecimal, JDouble, JLong, JNull, JObject, JString, JValue} @@ -84,6 +84,7 @@ class RowJsonSuite extends SparkFunSuite { Timestamp.valueOf("2017-05-30 10:22:03.00").toInstant, TimestampType, JString("2017-05-30 10:22:03")) + testJson(LocalDateTime.of(2018, 5, 14, 12, 13), TimestampNTZType, JString("2018-05-14 12:13:00")) // Complex types testJson(