diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index cdee8c906054d..759779b535760 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -40,6 +40,11 @@ options { tokenVocab = SqlBaseLexer; } * When true, double quoted literals are identifiers rather than STRINGs. */ public boolean double_quoted_identifiers = false; + + /** + * When true, SELECT lists (and other named expression lists) support an optional trailing comma. + */ + public boolean optional_trailing_comma_in_named_expression_lists = true; } compoundOrSingleStatement @@ -1025,7 +1030,10 @@ namedExpression ; namedExpressionSeq - : namedExpression (COMMA namedExpression)* + : namedExpression + {optional_trailing_comma_in_named_expression_lists}? (COMMA namedExpression)* COMMA? + | namedExpression + {!optional_trailing_comma_in_named_expression_lists}? (COMMA namedExpression)* ; partitionFieldList diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala index 54af195847dac..cb0dcc30ad043 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala @@ -70,6 +70,8 @@ abstract class AbstractParser extends DataTypeParserInterface with Logging { parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords parser.double_quoted_identifiers = conf.doubleQuotedIdentifiers + parser.optional_trailing_comma_in_named_expression_lists = + conf.optionalTrailingCommaInNamedExpressionLists // https://github.com/antlr/antlr4/issues/192#issuecomment-15238595 // Save a great deal of time on correct inputs by using a two-stage parsing strategy. diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala index d5668cc721750..cc148197831fe 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala @@ -47,6 +47,7 @@ private[sql] trait SqlApiConf { def stackTracesInDataFrameContext: Int def dataFrameQueryContextEnabled: Boolean def legacyAllowUntypedScalaUDFs: Boolean + def optionalTrailingCommaInNamedExpressionLists: Boolean } private[sql] object SqlApiConf { @@ -87,4 +88,5 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf { override def stackTracesInDataFrameContext: Int = 1 override def dataFrameQueryContextEnabled: Boolean = true override def legacyAllowUntypedScalaUDFs: Boolean = false + override def optionalTrailingCommaInNamedExpressionLists: Boolean = true } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ba0a37541e490..c51e5f442ab71 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -5268,6 +5268,15 @@ object SQLConf { .booleanConf .createWithDefault(true) + val OPTIONAL_TRAILING_COMMA_IN_NAMED_EXPRESSION_LISTS = + buildConf("spark.sql.optionalTrailingCommaInNamedExpressionLists") + .internal() + .doc("When set to true, SELECT lists (and other places named expression lists are " + + "supported) allow an optional trailing comma at the end of the list.") + .version("4.0.0") + .booleanConf + .createWithDefault(true) + /** * Holds information about keys that have been deprecated. * @@ -6107,6 +6116,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf { override def setOpsPrecedenceEnforced: Boolean = getConf(SQLConf.LEGACY_SETOPS_PRECEDENCE_ENABLED) + override def optionalTrailingCommaInNamedExpressionLists: Boolean = + getConf(SQLConf.OPTIONAL_TRAILING_COMMA_IN_NAMED_EXPRESSION_LISTS) + override def exponentLiteralAsDecimalEnabled: Boolean = getConf(SQLConf.LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index c556a92373954..1317d0cf68dd2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -1974,4 +1974,16 @@ class PlanParserSuite extends AnalysisTest { assert(unresolvedRelation2.options == CaseInsensitiveStringMap.empty) assert(unresolvedRelation2.isStreaming) } + + test("SPARK-50418: Support an optional trailing comma at the end of SELECT lists") { + withSQLConf(SQLConf.OPTIONAL_TRAILING_COMMA_IN_NAMED_EXPRESSION_LISTS.key -> "true") { + assertEqual("select 1, ", OneRowRelation().select(1)) + } + withSQLConf(SQLConf.OPTIONAL_TRAILING_COMMA_IN_NAMED_EXPRESSION_LISTS.key -> "false") { + checkError( + exception = parseException("select 1,"), + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "','", "hint" -> "")) + } + } }