From 4418f24a03f483aff5da040b13bf920f1e00e3e1 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 6 Nov 2024 09:01:18 +0100 Subject: [PATCH] [SPARK-50156][SQL] Integrate `_LEGACY_ERROR_TEMP_2113` into `UNRECOGNIZED_STATISTIC` ### What changes were proposed in this pull request? This PR proposes to Integrate `_LEGACY_ERROR_TEMP_2113` into `UNRECOGNIZED_STATISTIC` ### Why are the changes needed? To improve the error message by assigning proper error condition and SQLSTATE ### Does this PR introduce _any_ user-facing change? No, only user-facing error message improved ### How was this patch tested? Updated the existing tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #48692 from itholic/LEGACY_2113. Authored-by: Haejoon Lee Signed-off-by: Max Gekk --- .../utils/src/main/resources/error/error-conditions.json | 7 +------ .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 6 +++--- .../scala/org/apache/spark/sql/DataFrameStatSuite.scala | 6 +++--- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 18603b61d8ae1..b8722f26105df 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -4765,7 +4765,7 @@ }, "UNRECOGNIZED_STATISTIC" : { "message" : [ - "The statistic is not recognized. Valid statistics include `count`, `count_distinct`, `approx_count_distinct`, `mean`, `stddev`, `min`, `max`, and percentile values." + "The statistic is not recognized. Valid statistics include `count`, `count_distinct`, `approx_count_distinct`, `mean`, `stddev`, `min`, `max`, and percentile values. Percentile must be a numeric value followed by '%', within the range 0% to 100%." ], "sqlState" : "42704" }, @@ -7164,11 +7164,6 @@ "Unexpected window function frame ." ] }, - "_LEGACY_ERROR_TEMP_2113" : { - "message" : [ - "Unable to parse as a percentile." - ] - }, "_LEGACY_ERROR_TEMP_2115" : { "message" : [ "Unknown column: ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 2cc223ba69fa7..a32016eee61f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1155,15 +1155,15 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE def cannotParseStatisticAsPercentileError( stats: String, e: NumberFormatException): SparkIllegalArgumentException = { new SparkIllegalArgumentException( - errorClass = "_LEGACY_ERROR_TEMP_2113", - messageParameters = Map("stats" -> stats), + errorClass = "UNRECOGNIZED_STATISTIC", + messageParameters = Map("stats" -> toSQLValue(stats)), cause = e) } def statisticNotRecognizedError(stats: String): SparkIllegalArgumentException = { new SparkIllegalArgumentException( errorClass = "UNRECOGNIZED_STATISTIC", - messageParameters = Map("stats" -> toSQLId(stats))) + messageParameters = Map("stats" -> toSQLValue(stats))) } def unknownColumnError(unknownColumn: String): SparkIllegalArgumentException = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala index 37319de0b6624..6581cf07d8e9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala @@ -589,15 +589,15 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession { person2.summary("foo") }, condition = "UNRECOGNIZED_STATISTIC", - parameters = Map("stats" -> "`foo`") + parameters = Map("stats" -> "'foo'") ) checkError( exception = intercept[SparkIllegalArgumentException] { person2.summary("foo%") }, - condition = "_LEGACY_ERROR_TEMP_2113", - parameters = Map("stats" -> "foo%") + condition = "UNRECOGNIZED_STATISTIC", + parameters = Map("stats" -> "'foo%'") ) }