From 61e838e709fbafbdb7c01d84bcf8c5b7472ad544 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 28 Nov 2024 16:19:04 -0500 Subject: [PATCH] Short-circuit BoolArray null count (#1509) I believe this is the regression from https://github.com/spiraldb/vortex/commit/10a1f2172fd70a77c66b6d81d7bb0c8d912dedae It checks both true_count and null_count, but the null count was triggering a full stats compute. --- vortex-array/src/array/bool/stats.rs | 7 ++++--- vortex-array/src/stats/statsset.rs | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/vortex-array/src/array/bool/stats.rs b/vortex-array/src/array/bool/stats.rs index 39a3003633..b819be5fdb 100644 --- a/vortex-array/src/array/bool/stats.rs +++ b/vortex-array/src/array/bool/stats.rs @@ -43,11 +43,12 @@ impl StatisticsVTable> for BoolEncoding { // Fast-path if we just want the true-count if matches!( stat, - Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant + Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant | Stat::NullCount ) { + let _null_count = array.1.count_set_bits(); return Ok(StatsSet::bools_with_true_and_null_count( array.0.bitand(array.1).count_set_bits(), - array.1.count_set_bits(), + array.1.len() - array.1.count_set_bits(), array.0.len(), )); } @@ -85,7 +86,7 @@ impl StatisticsVTable for BoolEncoding { // Fast-path if we just want the true-count if matches!( stat, - Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant + Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant | Stat::NullCount ) { return Ok(StatsSet::bools_with_true_and_null_count( buffer.count_set_bits(), diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index bea81aaf62..0c2b12ed65 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -90,9 +90,10 @@ impl StatsSet { true_count: usize, null_count: usize, len: usize, - ) -> StatsSet { + ) -> Self { StatsSet::from_iter([ (Stat::TrueCount, true_count.into()), + (Stat::NullCount, null_count.into()), (Stat::Min, (true_count == len).into()), (Stat::Max, (true_count > 0).into()), (