diff --git a/bench-vortex/src/bin/serde.rs b/bench-vortex/src/bin/serde.rs index 3667e1504b..ba78d1add1 100644 --- a/bench-vortex/src/bin/serde.rs +++ b/bench-vortex/src/bin/serde.rs @@ -5,6 +5,6 @@ use log::LevelFilter; pub fn main() { setup_logger(LevelFilter::Debug); let taxi_spiral = write_taxi_data(); - let rows = take_taxi_data(&taxi_spiral, &[10, 11, 12, 13]); //, 100_000, 3_000_000]); + let rows = take_taxi_data(&taxi_spiral, &[10, 11, 12, 13, 100_000, 3_000_000]); println!("TAKE TAXI DATA: {:?}", rows); } diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 342d8e94b1..3336ae5f12 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -18,7 +18,8 @@ use vortex::compress::{CompressConfig, CompressCtx}; use vortex::formatter::display_tree; use vortex_alp::ALPEncoding; use vortex_datetime::DateTimeEncoding; -use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding}; +use vortex_dict::DictEncoding; +use vortex_fastlanes::{BitPackedEncoding, FoREncoding}; use vortex_ree::REEEncoding; use vortex_roaring::RoaringBoolEncoding; use vortex_schema::DType; @@ -51,11 +52,11 @@ pub fn enumerate_arrays() -> Vec { println!("FOUND {:?}", ENCODINGS.iter().map(|e| e.id()).collect_vec()); vec![ &ALPEncoding, - //&DictEncoding, + &DictEncoding, &BitPackedEncoding, &FoREncoding, &DateTimeEncoding, - &DeltaEncoding, + // &DeltaEncoding, Blows up the search space too much. &REEEncoding, &RoaringBoolEncoding, // RoaringIntEncoding, diff --git a/vortex-fastlanes/src/bitpacking/compress.rs b/vortex-fastlanes/src/bitpacking/compress.rs index 3e0bfb0fcf..df52cde228 100644 --- a/vortex-fastlanes/src/bitpacking/compress.rs +++ b/vortex-fastlanes/src/bitpacking/compress.rs @@ -120,13 +120,13 @@ fn bitpack_primitive(array: &[T], bit_width: usize) } // How many fastlanes vectors we will process. - let num_chunks = (array.len() + 1023) / 1024; + let num_chunks = array.len() / 1024; // Allocate a result byte array. let mut output = Vec::with_capacity(num_chunks * bit_width * 128); // Loop over all but the last chunk. - (0..num_chunks - 1).for_each(|i| { + (0..num_chunks).for_each(|i| { let start_elem = i * 1024; let chunk: &[T; 1024] = array_ref![array, start_elem, 1024]; TryBitPack::try_bitpack_into(chunk, bit_width, &mut output).unwrap(); @@ -211,14 +211,14 @@ fn bitunpack_primitive( } // How many fastlanes vectors we will process. - let num_chunks = (length + 1023) / 1024; + let num_chunks = length / 1024; // Allocate a result vector. let mut output = Vec::with_capacity(length); // Loop over all but the last chunk. let bytes_per_chunk = 128 * bit_width; - (0..num_chunks - 1).for_each(|i| { + (0..num_chunks).for_each(|i| { let chunk: &[u8] = &packed[i * bytes_per_chunk..][0..bytes_per_chunk]; TryBitPack::try_bitunpack_into(chunk, bit_width, &mut output).unwrap(); }); @@ -228,7 +228,7 @@ fn bitunpack_primitive( if last_chunk_size > 0 { let mut last_output = Vec::with_capacity(1024); TryBitPack::try_bitunpack_into( - &packed[(num_chunks - 1) * bytes_per_chunk..], + &packed[num_chunks * bytes_per_chunk..], bit_width, &mut last_output, ) @@ -309,11 +309,18 @@ mod test { } #[test] - fn test_decompress() { + fn test_compression_roundtrip() { + compression_roundtrip(125); + compression_roundtrip(1024); + compression_roundtrip(10_000); + compression_roundtrip(10_240); + } + + fn compression_roundtrip(n: usize) { let cfg = CompressConfig::new().with_enabled([&BitPackedEncoding as EncodingRef]); let ctx = CompressCtx::new(Arc::new(cfg)); - let values = PrimitiveArray::from(Vec::from_iter((0..10_000).map(|i| (i % 63) as u8))); + let values = PrimitiveArray::from(Vec::from_iter((0..n).map(|i| (i % 63) as u8))); let compressed = ctx.compress(&values, None).unwrap(); assert_eq!(compressed.encoding().id(), BitPackedEncoding.id());