diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala index 32eb884942763..a8f73cebf31e4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala @@ -475,6 +475,31 @@ abstract class ExternalCatalogSuite extends SparkFunSuite { assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown"))).isEmpty) } + test("SPARK-45054: list partitions should restore stats") { + val catalog = newBasicCatalog() + val stats = Some(CatalogStatistics(sizeInBytes = 1)) + val newPart = CatalogTablePartition(Map("a" -> "1", "b" -> "2"), storageFormat, stats = stats) + catalog.alterPartitions("db2", "tbl2", Seq(newPart)) + val parts = catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "1"))) + + assert(parts.length == 1) + val part = parts.head + assert(part.stats.exists(_.sizeInBytes == 1)) + } + + test("SPARK-45054: list partitions by filter should restore stats") { + val catalog = newBasicCatalog() + val stats = Some(CatalogStatistics(sizeInBytes = 1)) + val newPart = CatalogTablePartition(Map("a" -> "1", "b" -> "2"), storageFormat, stats = stats) + catalog.alterPartitions("db2", "tbl2", Seq(newPart)) + val tz = TimeZone.getDefault.getID + val parts = catalog.listPartitionsByFilter("db2", "tbl2", Seq($"a".int === 1), tz) + + assert(parts.length == 1) + val part = parts.head + assert(part.stats.exists(_.sizeInBytes == 1)) + } + test("SPARK-21457: list partitions with special chars") { val catalog = newBasicCatalog() assert(catalog.listPartitions("db2", "tbl1").isEmpty) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 67b780f13c431..e4325989b7066 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -1275,13 +1275,14 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat db: String, table: String, partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = withClient { - val partColNameMap = buildLowerCasePartColNameMap(getTable(db, table)) + val catalogTable = getTable(db, table) + val partColNameMap = buildLowerCasePartColNameMap(catalogTable) val metaStoreSpec = partialSpec.map(toMetaStorePartitionSpec) val res = client.getPartitions(db, table, metaStoreSpec) .map { part => part.copy(spec = restorePartitionSpec(part.spec, partColNameMap)) } - metaStoreSpec match { + val parts = metaStoreSpec match { // This might be a bug of Hive: When the partition value inside the partial partition spec // contains dot, and we ask Hive to list partitions w.r.t. the partial partition spec, Hive // treats dot as matching any single character and may return more partitions than we @@ -1290,6 +1291,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat res.filter(p => isPartialPartitionSpec(spec, toMetaStorePartitionSpec(p.spec))) case _ => res } + parts.map(restorePartitionMetadata(_, catalogTable)) } override def listPartitionsByFilter( @@ -1303,6 +1305,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val clientPrunedPartitions = client.getPartitionsByFilter(rawHiveTable, predicates).map { part => part.copy(spec = restorePartitionSpec(part.spec, partColNameMap)) + restorePartitionMetadata(part, catalogTable) } prunePartitionsByFilter(catalogTable, clientPrunedPartitions, predicates, defaultTimeZoneId) }