Skip to content

Commit

Permalink
Skip codecs where can_compress on sample is null (#188)
Browse files Browse the repository at this point in the history
**Before:**
This can cause errors when an entire sample is null because calculated
stats (eg Min) will be null, and compress() will error out on null
scalar/stats.
**After:**
Run can_compress on samples and skip if the result is None
  • Loading branch information
jdcasale authored Apr 3, 2024
1 parent 74ed32d commit b1b3c71
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 9 deletions.
6 changes: 6 additions & 0 deletions vortex-array/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,12 @@ fn find_best_compression<'a>(
compression.id(),
sample
);
if compression
.can_compress(sample, ctx.options.as_ref())
.is_none()
{
continue;
}
let compressed_sample =
compression.compress(sample, None, ctx.for_encoding(compression))?;
let compressed_size = compression.compressed_nbytes(compressed_sample.as_ref());
Expand Down
18 changes: 9 additions & 9 deletions vortex-fastlanes/src/for/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use vortex::match_each_integer_ptype;
use vortex::ptype::{NativePType, PType};
use vortex::scalar::ListScalarVec;
use vortex::stats::Stat;
use vortex_error::VortexResult;
use vortex_error::{vortex_err, VortexResult};

use crate::downcast::DowncastFastlanes;
use crate::{FoRArray, FoREncoding};
Expand Down Expand Up @@ -51,33 +51,33 @@ impl EncodingCompression for FoREncoding {
) -> VortexResult<ArrayRef> {
let parray = array.as_primitive();
let shift = trailing_zeros(parray);
let min = parray
.stats()
.get_or_compute(&Stat::Min)
.ok_or_else(|| vortex_err!("Min stat not found"))?;

let child = match_each_integer_ptype!(parray.ptype(), |$T| {
if shift == <$T>::PTYPE.bit_width() as u8 {
ConstantArray::new($T::default(), parray.len()).into_array()
} else {
compress_primitive::<$T>(parray, shift).into_array()
compress_primitive::<$T>(parray, shift, $T::try_from(min.clone())?).into_array()
}
});

let compressed_child = ctx
.named("for")
.excluding(&FoREncoding)
.compress(&child, like.map(|l| l.as_for().encoded()))?;
let reference = parray.stats().get(&Stat::Min).unwrap();
Ok(FoRArray::try_new(compressed_child, reference, shift)?.into_array())
Ok(FoRArray::try_new(compressed_child, min, shift)?.into_array())
}
}

fn compress_primitive<T: NativePType + WrappingSub + PrimInt>(
parray: &PrimitiveArray,
shift: u8,
min: T,
) -> PrimitiveArray {
assert!(shift < T::PTYPE.bit_width() as u8);
let min = parray
.stats()
.get_or_compute_as::<T>(&Stat::Min)
.unwrap_or_default();

let values = if shift > 0 {
let shifted_min = min >> shift as usize;
parray
Expand Down

0 comments on commit b1b3c71

Please sign in to comment.