[SPARK-47261][SQL] Assign better name for errors _LEGACY_ERROR_TEMP_1…

…172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174 ### What changes were proposed in this pull request? Assign better name for error _LEGACY_ERROR_TEMP_1172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174. ### Why are the changes needed? Proper name improves user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Added new tests and ran all the tests in the suite: ``` org.apache.spark.sql.execution.datasources.parquetParquetSchemaSuite org.apache.spark.SparkThrowableSuite ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#47421 from junyuc25/SPARK-47261. Authored-by: junyuc25 <[email protected]> Signed-off-by: Max Gekk <[email protected]>
miland-db · Aug 9, 2024 · 731a104 · 731a104
1 parent 81948bb
commit 731a104
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 22 deletions.
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -3543,6 +3543,24 @@
     ],
     "sqlState" : "42805"
   },
+  "PARQUET_TYPE_ILLEGAL" : {
+    "message" : [
+      "Illegal Parquet type: <parquetType>."
+    ],
+    "sqlState" : "42846"
+  },
+  "PARQUET_TYPE_NOT_RECOGNIZED" : {
+    "message" : [
+      "Unrecognized Parquet type: <field>."
+    ],
+    "sqlState" : "42846"
+  },
+  "PARQUET_TYPE_NOT_SUPPORTED" : {
+    "message" : [
+      "Parquet type not yet supported: <parquetType>."
+    ],
+    "sqlState" : "42846"
+  },
   "PARSE_EMPTY_STATEMENT" : {
     "message" : [
       "Syntax error, unexpected empty statement."
@@ -5881,21 +5899,6 @@
       "createTableColumnTypes option column <col> not found in schema <schema>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1172" : {
-    "message" : [
-      "Parquet type not yet supported: <parquetType>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1173" : {
-    "message" : [
-      "Illegal Parquet type: <parquetType>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1174" : {
-    "message" : [
-      "Unrecognized Parquet type: <field>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1181" : {
     "message" : [
       "Stream-stream join without equality predicate is not supported."

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1995,19 +1995,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1172",
+      errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
       messageParameters = Map("parquetType" -> parquetType))
   }
 
   def illegalParquetTypeError(parquetType: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1173",
+      errorClass = "PARQUET_TYPE_ILLEGAL",
       messageParameters = Map("parquetType" -> parquetType))
   }
 
   def unrecognizedParquetTypeError(field: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1174",
+      errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
       messageParameters = Map("field" -> field))
   }
 

diff --git a/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
diff --git a/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
diff --git a/...rc/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/...rc/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -1111,10 +1111,37 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
 
   test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") {
     val testDataPath = testFile("test-data/timestamp-nanos.parquet")
-    val e = intercept[AnalysisException] {
-      spark.read.parquet(testDataPath).collect()
-    }
-    assert(e.getMessage.contains("Illegal Parquet type: INT64 (TIMESTAMP(NANOS,true))."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      errorClass = "PARQUET_TYPE_ILLEGAL",
+      parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))")
+    )
+  }
+
+  test("SPARK-47261: parquet file with unsupported type") {
+    val testDataPath = testFile("test-data/interval-using-fixed-len-byte-array.parquet")
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
+      parameters = Map("parquetType" -> "FIXED_LEN_BYTE_ARRAY (INTERVAL)")
+    )
+  }
+
+  test("SPARK-47261: parquet file with unrecognized parquet type") {
+    val testDataPath = testFile("test-data/group-field-with-enum-as-logical-annotation.parquet")
+    val expectedParameter = "required group my_list (ENUM) {\n  repeated group list {\n" +
+      "    optional binary element (STRING);\n  }\n}"
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
+      parameters = Map("field" -> expectedParameter)
+    )
   }
 
   // =======================================================