From 44a9277d5b232dc6d209e5d5c73c514c81737e66 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 19 Dec 2024 11:18:26 -0500 Subject: [PATCH] fix: BitPackedArray ptype changed by compute funcs (#1724) We need to reinterpret_cast the array before we patch it --- bench-vortex/src/bin/clickbench.rs | 4 +- .../fastlanes/src/bitpacking/compress.rs | 2 +- .../src/bitpacking/compute/filter.rs | 3 -- .../fastlanes/src/bitpacking/compute/take.rs | 32 +++++++++++++-- encodings/fastlanes/src/bitpacking/mod.rs | 32 ++++++++++++++- vortex-array/src/array/primitive/patch.rs | 41 +++++++++++++------ 6 files changed, 93 insertions(+), 21 deletions(-) diff --git a/bench-vortex/src/bin/clickbench.rs b/bench-vortex/src/bin/clickbench.rs index 9bfe74248..c9f702508 100644 --- a/bench-vortex/src/bin/clickbench.rs +++ b/bench-vortex/src/bin/clickbench.rs @@ -163,7 +163,9 @@ fn main() { for _ in 0..args.iterations { let exec_duration = runtime.block_on(async { let start = Instant::now(); - execute_query(&context, &query).await.unwrap(); + execute_query(&context, &query) + .await + .unwrap_or_else(|e| panic!("executing query {query_idx}: {e}")); start.elapsed() }); diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs index 346019042..715b51d2a 100644 --- a/encodings/fastlanes/src/bitpacking/compress.rs +++ b/encodings/fastlanes/src/bitpacking/compress.rs @@ -194,7 +194,7 @@ pub fn unpack(array: BitPackedArray) -> VortexResult { let length = array.len(); let offset = array.offset() as usize; let ptype = array.ptype(); - let mut unpacked = match_each_unsigned_integer_ptype!(array.ptype().to_unsigned(), |$P| { + let mut unpacked = match_each_unsigned_integer_ptype!(ptype.to_unsigned(), |$P| { PrimitiveArray::from_vec( unpack_primitive::<$P>(array.packed_slice::<$P>(), bit_width, offset, length), array.validity(), diff --git a/encodings/fastlanes/src/bitpacking/compute/filter.rs b/encodings/fastlanes/src/bitpacking/compute/filter.rs index 3ea778de5..88a72562c 100644 --- a/encodings/fastlanes/src/bitpacking/compute/filter.rs +++ b/encodings/fastlanes/src/bitpacking/compute/filter.rs @@ -178,9 +178,6 @@ mod test { #[test] fn filter_bitpacked_signed() { - // Elements 0..=499 are negative integers (patches) - // Element 500 = 0 (packed) - // Elements 501..999 are positive integers (packed) let values: Vec = (0..500).collect_vec(); let unpacked = PrimitiveArray::from(values.clone()); let bitpacked = BitPackedArray::encode(unpacked.as_ref(), 9).unwrap(); diff --git a/encodings/fastlanes/src/bitpacking/compute/take.rs b/encodings/fastlanes/src/bitpacking/compute/take.rs index 3e085cd99..1939e491c 100644 --- a/encodings/fastlanes/src/bitpacking/compute/take.rs +++ b/encodings/fastlanes/src/bitpacking/compute/take.rs @@ -28,12 +28,12 @@ impl TakeFn for BitPackedEncoding { // NOTE: we use the unsigned PType because all values in the BitPackedArray must // be non-negative (pre-condition of creating the BitPackedArray). - let ptype: PType = PType::try_from(array.dtype())?.to_unsigned(); + let ptype: PType = PType::try_from(array.dtype())?; let validity = array.validity(); let taken_validity = validity.take(indices)?; let indices = indices.clone().into_primitive()?; - let taken = match_each_unsigned_integer_ptype!(ptype, |$T| { + let taken = match_each_unsigned_integer_ptype!(ptype.to_unsigned(), |$T| { match_each_integer_ptype!(indices.ptype(), |$I| { take_primitive::<$T, $I>(array, &indices, taken_validity)? }) @@ -107,7 +107,11 @@ fn take_primitive( } }); - let unpatched_taken = PrimitiveArray::from_vec(output, taken_validity); + let mut unpatched_taken = PrimitiveArray::from_vec(output, taken_validity); + // Flip back to signed type before patching. + if array.ptype().is_signed_int() { + unpatched_taken = unpatched_taken.reinterpret_cast(array.ptype()); + } if let Some(patches) = array.patches() { if let Some(patches) = patches.take(&indices.to_array())? { return unpatched_taken.patch(patches); @@ -125,6 +129,7 @@ mod test { use rand::{thread_rng, Rng}; use vortex_array::array::PrimitiveArray; use vortex_array::compute::{scalar_at, slice, take}; + use vortex_array::validity::Validity; use vortex_array::{IntoArrayData, IntoArrayVariant}; use crate::BitPackedArray; @@ -210,4 +215,25 @@ mod test { ); }); } + + #[test] + #[cfg_attr(miri, ignore)] + fn take_signed_with_patches() { + let start = BitPackedArray::encode( + &PrimitiveArray::from(vec![1i32, 2i32, 3i32, 4i32]).into_array(), + 1, + ) + .unwrap(); + + let taken_primitive = super::take_primitive::( + &start, + &PrimitiveArray::from(vec![0u64, 1, 2, 3]), + Validity::NonNullable, + ) + .unwrap(); + assert_eq!( + taken_primitive.into_maybe_null_slice::(), + vec![1i32, 2, 3, 4] + ); + } } diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index edd1ea95a..1250ad223 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -100,6 +100,17 @@ impl BitPackedArray { ); } + if let Some(ref patches) = patches { + // Ensure that array and patches have same PType + if !patches.dtype().eq_ignore_nullability(ptype.into()) { + vortex_bail!( + "Patches DType {} does not match BitPackedArray dtype {}", + patches.dtype().as_nonnullable(), + ptype + ) + } + } + // expected packed size is in bytes let expected_packed_size = ((length + offset as usize + 1023) / 1024) * (128 * bit_width as usize); @@ -276,8 +287,9 @@ impl PrimitiveArrayTrait for BitPackedArray {} #[cfg(test)] mod test { + use itertools::Itertools; use vortex_array::array::PrimitiveArray; - use vortex_array::{IntoArrayData, IntoArrayVariant}; + use vortex_array::{IntoArrayData, IntoArrayVariant, IntoCanonical}; use crate::BitPackedArray; @@ -305,4 +317,22 @@ mod test { let _packed = BitPackedArray::encode(uncompressed.as_ref(), 9) .expect_err("Cannot pack value into larger width"); } + + #[test] + fn signed_with_patches() { + let values = (0i32..=512).collect_vec(); + let parray = PrimitiveArray::from(values.clone()).into_array(); + + let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap(); + assert!(packed_with_patches.patches().is_some()); + assert_eq!( + packed_with_patches + .into_canonical() + .unwrap() + .into_primitive() + .unwrap() + .into_maybe_null_slice::(), + values + ); + } } diff --git a/vortex-array/src/array/primitive/patch.rs b/vortex-array/src/array/primitive/patch.rs index c37d94a37..4b6dc0419 100644 --- a/vortex-array/src/array/primitive/patch.rs +++ b/vortex-array/src/array/primitive/patch.rs @@ -1,33 +1,50 @@ -use itertools::Itertools; -use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype}; +use arrow_buffer::ArrowNativeType; +use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType}; use vortex_error::VortexResult; use crate::array::PrimitiveArray; use crate::patches::Patches; +use crate::validity::Validity; use crate::variants::PrimitiveArrayTrait; use crate::{ArrayLen, IntoArrayVariant}; impl PrimitiveArray { #[allow(clippy::cognitive_complexity)] pub fn patch(self, patches: Patches) -> VortexResult { - let (_, indices, values) = patches.into_parts(); - let indices = indices.into_primitive()?; - let values = values.into_primitive()?; + let (_, patch_indices, patch_values) = patches.into_parts(); + let patch_indices = patch_indices.into_primitive()?; + let patch_values = patch_values.into_primitive()?; let patched_validity = self.validity() - .patch(self.len(), indices.as_ref(), values.validity())?; + .patch(self.len(), patch_indices.as_ref(), patch_values.validity())?; - match_each_integer_ptype!(indices.ptype(), |$I| { + match_each_integer_ptype!(patch_indices.ptype(), |$I| { match_each_native_ptype!(self.ptype(), |$T| { - let mut own_values = self.into_maybe_null_slice::<$T>(); - for (idx, value) in indices.into_maybe_null_slice::<$I>().into_iter().zip_eq(values.into_maybe_null_slice::<$T>().into_iter()) { - own_values[idx as usize] = value; - } - Ok(Self::from_vec(own_values, patched_validity)) + self.patch_typed::<$T, $I>(patch_indices, patch_values, patched_validity) }) }) } + + fn patch_typed( + self, + patch_indices: PrimitiveArray, + patch_values: PrimitiveArray, + patched_validity: Validity, + ) -> VortexResult + where + T: NativePType + ArrowNativeType, + I: NativePType + ArrowNativeType, + { + let mut own_values = self.into_maybe_null_slice::(); + + let patch_indices = patch_indices.into_maybe_null_slice::(); + let patch_values = patch_values.into_maybe_null_slice::(); + for (idx, value) in itertools::zip_eq(patch_indices, patch_values) { + own_values[idx.as_usize()] = value; + } + Ok(Self::from_vec(own_values, patched_validity)) + } } #[cfg(test)]