From be175378c7159a22c3f7555aaf6666fff01ef5c7 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 25 Jun 2024 12:56:05 +0100 Subject: [PATCH] Fix Slice and SearchSorted on BitPackedArray --- .../src/bitpacking/compute/search_sorted.rs | 30 +++++++++++++- .../fastlanes/src/bitpacking/compute/slice.rs | 39 ++++++++++++++++++- encodings/fastlanes/src/bitpacking/mod.rs | 10 ++++- 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index 85b7dd3daf..d5097378ef 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -35,6 +35,7 @@ impl SearchSortedFn for BitPackedArray { #[derive(Debug)] struct BitPackedSearch { packed: PrimitiveArray, + offset: usize, length: usize, bit_width: usize, min_patch_offset: Option, @@ -44,6 +45,7 @@ impl BitPackedSearch { pub fn new(array: &BitPackedArray) -> Self { Self { packed: array.packed().into_primitive().unwrap(), + offset: array.offset(), length: array.len(), bit_width: array.bit_width(), min_patch_offset: array.patches().map(|p| { @@ -64,8 +66,12 @@ impl IndexOrd for BitPackedSearch { } // SAFETY: Used in search_sorted_by which ensures that idx is within bounds let val: T = unsafe { - unpack_single_primitive(self.packed.maybe_null_slice::(), self.bit_width, idx) - .unwrap() + unpack_single_primitive( + self.packed.maybe_null_slice::(), + self.bit_width, + idx + self.offset, + ) + .unwrap() }; val.partial_cmp(elem) } @@ -81,6 +87,7 @@ impl Len for BitPackedSearch { mod test { use vortex::array::primitive::PrimitiveArray; use vortex::compute::search_sorted::{search_sorted, SearchResult, SearchSortedSide}; + use vortex::compute::slice::slice; use vortex::IntoArray; use crate::BitPackedArray; @@ -110,4 +117,23 @@ mod test { SearchResult::NotFound(0) ); } + + #[test] + fn search_sliced() { + let bitpacked = slice( + &BitPackedArray::encode( + &PrimitiveArray::from(vec![1u32, 2, 3, 4, 5]).into_array(), + 2, + ) + .unwrap() + .into_array(), + 3, + 5, + ) + .unwrap(); + assert_eq!( + search_sorted(&bitpacked, 4, SearchSortedSide::Left).unwrap(), + SearchResult::Found(0) + ); + } } diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index 693f7df404..1fa7bdbc68 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -12,8 +12,8 @@ impl SliceFn for BitPackedArray { let block_start = max(0, start - offset); let block_stop = ((stop + 1023) / 1024) * 1024; - let encoded_start = (block_start / 8) * self.bit_width(); - let encoded_stop = (block_stop / 8) * self.bit_width(); + let encoded_start = (block_start / 8) * self.bit_width() / self.ptype().byte_width(); + let encoded_stop = (block_stop / 8) * self.bit_width() / self.ptype().byte_width(); Self::try_new_from_offset( slice(&self.packed(), encoded_start, encoded_stop)?, self.validity().slice(start, stop)?, @@ -25,3 +25,38 @@ impl SliceFn for BitPackedArray { .map(|a| a.into_array()) } } + +#[cfg(test)] +mod test { + use vortex::array::primitive::PrimitiveArray; + use vortex::compute::slice::slice; + use vortex::{ArrayTrait, IntoArray}; + + use crate::BitPackedArray; + + #[test] + pub fn slice_block() { + let arr = BitPackedArray::encode( + &PrimitiveArray::from((0u32..2048).map(|v| v % 64).collect::>()).into_array(), + 6, + ) + .unwrap() + .into_array(); + let sliced = BitPackedArray::try_from(slice(&arr, 1024, 2048).unwrap()).unwrap(); + assert_eq!(sliced.offset(), 0); + assert_eq!(sliced.len(), 1024); + } + + #[test] + pub fn slice_within_block() { + let arr = BitPackedArray::encode( + &PrimitiveArray::from((0u32..2048).map(|v| v % 64).collect::>()).into_array(), + 6, + ) + .unwrap() + .into_array(); + let sliced = BitPackedArray::try_from(slice(&arr, 512, 1434).unwrap()).unwrap(); + assert_eq!(sliced.offset(), 512); + assert_eq!(sliced.len(), 922); + } +} diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index 5bcee29017..641cdcd993 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -50,10 +50,16 @@ impl BitPackedArray { if bit_width > 64 { vortex_bail!("Unsupported bit width {}", bit_width); } + if offset > 1023 { + vortex_bail!( + "Offset must be less than full block, i.e. 1024, got {}", + offset + ); + } let ptype: PType = (&dtype).try_into()?; - let expected_packed_size = - ((length + 1023) / 1024) * (128 * bit_width / ptype.byte_width()); + let expected_packed_size = (((length + 1023) / 1024) + if offset == 0 { 0 } else { 1 }) + * (128 * bit_width / ptype.byte_width()); if packed.len() != expected_packed_size { return Err(vortex_err!( "Expected {} packed bytes, got {}",