diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala index 65a7a0ebbd916..23555c98135f6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala @@ -140,7 +140,7 @@ case class AnalyzeColumnCommand( case DoubleType | FloatType => true case BooleanType => true case _: DatetimeType => true - case BinaryType | StringType => true + case BinaryType | _: StringType => true case _ => false } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala index 91454c79df600..48d98c14c3889 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala @@ -411,7 +411,7 @@ object CommandUtils extends Logging { case DoubleType | FloatType => fixedLenTypeStruct case BooleanType => fixedLenTypeStruct case _: DatetimeType => fixedLenTypeStruct - case BinaryType | StringType => + case BinaryType | _: StringType => // For string and binary type, we don't compute min, max or histogram val nullLit = Literal(null, col.dataType) struct( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 948a0e3444cd1..8d7ada15381bf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -678,6 +678,21 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } } + test("analyze stats for collated strings") { + val tableName = "collated_strings" + Seq[String]("sr_CI").foreach { collation => + withTable(tableName) { + sql(s"CREATE TABLE $tableName (c STRING COLLATE $collation) USING PARQUET") + sql(s"INSERT INTO $tableName VALUES ('a'), ('A')") + sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS c") + + val table = getCatalogTable(tableName) + assert(table.stats.get.colStats("c") == + CatalogColumnStat(Some(1), None, None, Some(0), Some(1), Some(1))) + } + } + } + test("analyzes table statistics in cached catalog view") { def getTableStats(tableName: String): Statistics = { spark.table(tableName).queryExecution.optimizedPlan.stats