From ae58bb96dee26d4ef346cfd29f8c7f598db00fd8 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 5 Dec 2024 14:46:45 -0500 Subject: [PATCH] fix: Consistent metadata table column names for DataFusion stats (#1577) Fixes #1576. --- vortex-array/src/stats/mod.rs | 30 +++++++++++-------- .../src/persistent/statistics.rs | 9 +++--- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/vortex-array/src/stats/mod.rs b/vortex-array/src/stats/mod.rs index 517f7a57d1..afd50a4070 100644 --- a/vortex-array/src/stats/mod.rs +++ b/vortex-array/src/stats/mod.rs @@ -98,6 +98,22 @@ impl Stat { Stat::UncompressedSizeInBytes => DType::Primitive(PType::U64, NonNullable), } } + + pub fn name(&self) -> &str { + match self { + Self::BitWidthFreq => "bit_width_frequency", + Self::TrailingZeroFreq => "trailing_zero_frequency", + Self::IsConstant => "is_constant", + Self::IsSorted => "is_sorted", + Self::IsStrictSorted => "is_strict_sorted", + Self::Max => "max", + Self::Min => "min", + Self::RunCount => "run_count", + Self::TrueCount => "true_count", + Self::NullCount => "null_count", + Self::UncompressedSizeInBytes => "uncompressed_size_in_bytes", + } + } } pub fn as_stat_bitset_bytes(stats: &[Stat]) -> Vec { @@ -134,19 +150,7 @@ pub fn stats_from_bitset_bytes(bytes: &[u8]) -> Vec { impl Display for Stat { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::BitWidthFreq => write!(f, "bit_width_frequency"), - Self::TrailingZeroFreq => write!(f, "trailing_zero_frequency"), - Self::IsConstant => write!(f, "is_constant"), - Self::IsSorted => write!(f, "is_sorted"), - Self::IsStrictSorted => write!(f, "is_strict_sorted"), - Self::Max => write!(f, "max"), - Self::Min => write!(f, "min"), - Self::RunCount => write!(f, "run_count"), - Self::TrueCount => write!(f, "true_count"), - Self::NullCount => write!(f, "null_count"), - Self::UncompressedSizeInBytes => write!(f, "uncompressed_size_in_bytes"), - } + write!(f, "{}", self.name()) } } diff --git a/vortex-datafusion/src/persistent/statistics.rs b/vortex-datafusion/src/persistent/statistics.rs index e181bf6f5a..99409676db 100644 --- a/vortex-datafusion/src/persistent/statistics.rs +++ b/vortex-datafusion/src/persistent/statistics.rs @@ -5,6 +5,7 @@ use datafusion_common::stats::Precision; use datafusion_common::ColumnStatistics; use datafusion_expr::Accumulator; use vortex_array::array::StructArray; +use vortex_array::stats::Stat; use vortex_array::variants::StructArrayTrait as _; use vortex_array::IntoCanonical; use vortex_error::VortexResult; @@ -12,7 +13,7 @@ use vortex_error::VortexResult; pub fn array_to_col_statistics(array: &StructArray) -> VortexResult { let mut stats = ColumnStatistics::new_unknown(); - if let Some(null_count_array) = array.field_by_name("null_count") { + if let Some(null_count_array) = array.field_by_name(Stat::NullCount.name()) { let array = null_count_array.into_canonical()?.into_arrow()?; let array = array.as_primitive::(); @@ -20,7 +21,7 @@ pub fn array_to_col_statistics(array: &StructArray) -> VortexResult VortexResult VortexResult VortexResult> { - match array.field_by_name("uncompressed_size") { + match array.field_by_name(Stat::UncompressedSizeInBytes.name()) { None => Ok(None), Some(array) => { let array = array.into_canonical()?.into_arrow()?;