Skip to content

Commit

Permalink
Fix Slice and SearchSorted for BitPackedArray (#410)
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 authored Jun 25, 2024
1 parent 5c1e544 commit af10b71
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 46 deletions.
36 changes: 33 additions & 3 deletions encodings/fastlanes/src/bitpacking/compute/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ impl SearchSortedFn for BitPackedArray {
let unwrapped_value: $P = value.cast(self.dtype())?.try_into().unwrap();
if let Some(patches_array) = self.patches() {
if unwrapped_value as usize >= self.max_packed_value() {
search_sorted(&patches_array, value.clone(), side)
Ok(search_sorted(&patches_array, value.clone(), side)?.map(|i| i - self.offset()))
} else {
Ok(SearchSorted::search_sorted(&BitPackedSearch::new(self), &unwrapped_value, side))
}
Expand All @@ -35,6 +35,7 @@ impl SearchSortedFn for BitPackedArray {
#[derive(Debug)]
struct BitPackedSearch {
packed: PrimitiveArray,
offset: usize,
length: usize,
bit_width: usize,
min_patch_offset: Option<usize>,
Expand All @@ -44,6 +45,7 @@ impl BitPackedSearch {
pub fn new(array: &BitPackedArray) -> Self {
Self {
packed: array.packed().into_primitive().unwrap(),
offset: array.offset(),
length: array.len(),
bit_width: array.bit_width(),
min_patch_offset: array.patches().map(|p| {
Expand All @@ -64,8 +66,12 @@ impl<T: BitPacking + NativePType> IndexOrd<T> for BitPackedSearch {
}
// SAFETY: Used in search_sorted_by which ensures that idx is within bounds
let val: T = unsafe {
unpack_single_primitive(self.packed.maybe_null_slice::<T>(), self.bit_width, idx)
.unwrap()
unpack_single_primitive(
self.packed.maybe_null_slice::<T>(),
self.bit_width,
idx + self.offset,
)
.unwrap()
};
val.partial_cmp(elem)
}
Expand All @@ -81,6 +87,7 @@ impl Len for BitPackedSearch {
mod test {
use vortex::array::primitive::PrimitiveArray;
use vortex::compute::search_sorted::{search_sorted, SearchResult, SearchSortedSide};
use vortex::compute::slice::slice;
use vortex::IntoArray;

use crate::BitPackedArray;
Expand Down Expand Up @@ -110,4 +117,27 @@ mod test {
SearchResult::NotFound(0)
);
}

#[test]
fn search_sliced() {
let bitpacked = slice(
&BitPackedArray::encode(
&PrimitiveArray::from(vec![1u32, 2, 3, 4, 5]).into_array(),
2,
)
.unwrap()
.into_array(),
2,
4,
)
.unwrap();
assert_eq!(
search_sorted(&bitpacked, 3, SearchSortedSide::Left).unwrap(),
SearchResult::Found(0)
);
assert_eq!(
search_sorted(&bitpacked, 4, SearchSortedSide::Left).unwrap(),
SearchResult::Found(1)
);
}
}
88 changes: 86 additions & 2 deletions encodings/fastlanes/src/bitpacking/compute/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ impl SliceFn for BitPackedArray {
let block_start = max(0, start - offset);
let block_stop = ((stop + 1023) / 1024) * 1024;

let encoded_start = (block_start / 8) * self.bit_width();
let encoded_stop = (block_stop / 8) * self.bit_width();
let encoded_start = (block_start / 8) * self.bit_width() / self.ptype().byte_width();
let encoded_stop = (block_stop / 8) * self.bit_width() / self.ptype().byte_width();
Self::try_new_from_offset(
slice(&self.packed(), encoded_start, encoded_stop)?,
self.validity().slice(start, stop)?,
Expand All @@ -25,3 +25,87 @@ impl SliceFn for BitPackedArray {
.map(|a| a.into_array())
}
}

#[cfg(test)]
mod test {
use vortex::array::primitive::PrimitiveArray;
use vortex::compute::slice::slice;
use vortex::compute::unary::scalar_at::scalar_at;
use vortex::{ArrayTrait, IntoArray};

use crate::BitPackedArray;

#[test]
pub fn slice_block() {
let arr = BitPackedArray::encode(
PrimitiveArray::from((0u32..2048).map(|v| v % 64).collect::<Vec<_>>()).array(),
6,
)
.unwrap()
.into_array();
let sliced = BitPackedArray::try_from(slice(&arr, 1024, 2048).unwrap()).unwrap();
assert_eq!(scalar_at(sliced.array(), 0).unwrap(), (1024u32 % 64).into());
assert_eq!(
scalar_at(sliced.array(), 1023).unwrap(),
(2047u32 % 64).into()
);
assert_eq!(sliced.offset(), 0);
assert_eq!(sliced.len(), 1024);
}

#[test]
pub fn slice_within_block() {
let arr = BitPackedArray::encode(
PrimitiveArray::from((0u32..2048).map(|v| v % 64).collect::<Vec<_>>()).array(),
6,
)
.unwrap()
.into_array();
let sliced = BitPackedArray::try_from(slice(&arr, 512, 1434).unwrap()).unwrap();
assert_eq!(scalar_at(sliced.array(), 0).unwrap(), (512u32 % 64).into());
assert_eq!(
scalar_at(sliced.array(), 921).unwrap(),
(1433u32 % 64).into()
);
assert_eq!(sliced.offset(), 512);
assert_eq!(sliced.len(), 922);
}

#[test]
fn slice_within_block_u8s() {
let packed = BitPackedArray::encode(
PrimitiveArray::from((0..10_000).map(|i| (i % 63) as u8).collect::<Vec<_>>()).array(),
7,
)
.unwrap();

let compressed = slice(packed.array(), 768, 9999).unwrap();
assert_eq!(
scalar_at(&compressed, 0).unwrap(),
((768 % 63) as u8).into()
);
assert_eq!(
scalar_at(&compressed, compressed.len() - 1).unwrap(),
((9998 % 63) as u8).into()
);
}

#[test]
fn slice_block_boundary_u8s() {
let packed = BitPackedArray::encode(
PrimitiveArray::from((0..10_000).map(|i| (i % 63) as u8).collect::<Vec<_>>()).array(),
7,
)
.unwrap();

let compressed = slice(packed.array(), 7168, 9216).unwrap();
assert_eq!(
scalar_at(&compressed, 0).unwrap(),
((7168 % 63) as u8).into()
);
assert_eq!(
scalar_at(&compressed, compressed.len() - 1).unwrap(),
((9215 % 63) as u8).into()
);
}
}
48 changes: 7 additions & 41 deletions encodings/fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,16 @@ impl BitPackedArray {
if bit_width > 64 {
vortex_bail!("Unsupported bit width {}", bit_width);
}
if offset > 1023 {
vortex_bail!(
"Offset must be less than full block, i.e. 1024, got {}",
offset
);
}

let ptype: PType = (&dtype).try_into()?;
let expected_packed_size =
((length + 1023) / 1024) * (128 * bit_width / ptype.byte_width());
((length + offset + 1023) / 1024) * (128 * bit_width / ptype.byte_width());
if packed.len() != expected_packed_size {
return Err(vortex_err!(
"Expected {} packed bytes, got {}",
Expand Down Expand Up @@ -180,50 +186,10 @@ impl ArrayTrait for BitPackedArray {
#[cfg(test)]
mod test {
use vortex::array::primitive::PrimitiveArray;
use vortex::compute::slice::slice;
use vortex::compute::unary::scalar_at::scalar_at;
use vortex::{IntoArray, IntoCanonical};

use crate::BitPackedArray;

#[test]
fn slice_within_block() {
let packed = BitPackedArray::encode(
PrimitiveArray::from((0..10_000).map(|i| (i % 63) as u8).collect::<Vec<_>>()).array(),
7,
)
.unwrap();

let compressed = slice(packed.array(), 768, 9999).unwrap();
assert_eq!(
scalar_at(&compressed, 0).unwrap(),
((768 % 63) as u8).into()
);
assert_eq!(
scalar_at(&compressed, compressed.len() - 1).unwrap(),
((9998 % 63) as u8).into()
);
}

#[test]
fn slice_block_boundary() {
let packed = BitPackedArray::encode(
PrimitiveArray::from((0..10_000).map(|i| (i % 63) as u8).collect::<Vec<_>>()).array(),
7,
)
.unwrap();

let compressed = slice(packed.array(), 7168, 9216).unwrap();
assert_eq!(
scalar_at(&compressed, 0).unwrap(),
((7168 % 63) as u8).into()
);
assert_eq!(
scalar_at(&compressed, compressed.len() - 1).unwrap(),
((9215 % 63) as u8).into()
);
}

#[test]
fn test_encode() {
let values = vec![Some(1), None, Some(1), None, Some(1), None, Some(u64::MAX)];
Expand Down
7 changes: 7 additions & 0 deletions vortex-array/src/compute/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ impl SearchResult {
Self::NotFound(i) => i,
}
}

pub fn map<F: FnOnce(usize) -> usize>(self, f: F) -> Self {
match self {
Self::Found(i) => Self::Found(f(i)),
Self::NotFound(i) => Self::NotFound(f(i)),
}
}
}

pub trait SearchSortedFn {
Expand Down

0 comments on commit af10b71

Please sign in to comment.