From b1b3c71282c9e8d994b7942c0ad18165e46815c7 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Wed, 3 Apr 2024 17:30:26 +0100 Subject: [PATCH] Skip codecs where can_compress on sample is null (#188) **Before:** This can cause errors when an entire sample is null because calculated stats (eg Min) will be null, and compress() will error out on null scalar/stats. **After:** Run can_compress on samples and skip if the result is None --- vortex-array/src/compress.rs | 6 ++++++ vortex-fastlanes/src/for/compress.rs | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/vortex-array/src/compress.rs b/vortex-array/src/compress.rs index 0ae7249644..0084bcea22 100644 --- a/vortex-array/src/compress.rs +++ b/vortex-array/src/compress.rs @@ -361,6 +361,12 @@ fn find_best_compression<'a>( compression.id(), sample ); + if compression + .can_compress(sample, ctx.options.as_ref()) + .is_none() + { + continue; + } let compressed_sample = compression.compress(sample, None, ctx.for_encoding(compression))?; let compressed_size = compression.compressed_nbytes(compressed_sample.as_ref()); diff --git a/vortex-fastlanes/src/for/compress.rs b/vortex-fastlanes/src/for/compress.rs index 0e08c95c01..4f0a2961b6 100644 --- a/vortex-fastlanes/src/for/compress.rs +++ b/vortex-fastlanes/src/for/compress.rs @@ -10,7 +10,7 @@ use vortex::match_each_integer_ptype; use vortex::ptype::{NativePType, PType}; use vortex::scalar::ListScalarVec; use vortex::stats::Stat; -use vortex_error::VortexResult; +use vortex_error::{vortex_err, VortexResult}; use crate::downcast::DowncastFastlanes; use crate::{FoRArray, FoREncoding}; @@ -51,11 +51,16 @@ impl EncodingCompression for FoREncoding { ) -> VortexResult { let parray = array.as_primitive(); let shift = trailing_zeros(parray); + let min = parray + .stats() + .get_or_compute(&Stat::Min) + .ok_or_else(|| vortex_err!("Min stat not found"))?; + let child = match_each_integer_ptype!(parray.ptype(), |$T| { if shift == <$T>::PTYPE.bit_width() as u8 { ConstantArray::new($T::default(), parray.len()).into_array() } else { - compress_primitive::<$T>(parray, shift).into_array() + compress_primitive::<$T>(parray, shift, $T::try_from(min.clone())?).into_array() } }); @@ -63,21 +68,16 @@ impl EncodingCompression for FoREncoding { .named("for") .excluding(&FoREncoding) .compress(&child, like.map(|l| l.as_for().encoded()))?; - let reference = parray.stats().get(&Stat::Min).unwrap(); - Ok(FoRArray::try_new(compressed_child, reference, shift)?.into_array()) + Ok(FoRArray::try_new(compressed_child, min, shift)?.into_array()) } } fn compress_primitive( parray: &PrimitiveArray, shift: u8, + min: T, ) -> PrimitiveArray { assert!(shift < T::PTYPE.bit_width() as u8); - let min = parray - .stats() - .get_or_compute_as::(&Stat::Min) - .unwrap_or_default(); - let values = if shift > 0 { let shifted_min = min >> shift as usize; parray