Skip to content

Commit

Permalink
added arb list type and arb slice check
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-isaacs committed Dec 20, 2024
1 parent 0cc9df2 commit 2cf4f98
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 9 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ cargo-fuzz = true

[dependencies]
libfuzzer-sys = { workspace = true }
arrow-buffer = { workspace = true }
vortex-array = { workspace = true, features = ["arbitrary"] }
vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true, features = ["arbitrary"] }
Expand Down
59 changes: 51 additions & 8 deletions fuzz/src/slice.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use arrow_buffer::ArrowNativeType;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::array::{BoolArray, ListArray, PrimitiveArray, StructArray, VarBinViewArray};
use vortex_array::validity::{ArrayValidity, Validity};
use vortex_array::variants::StructArrayTrait;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait};
use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
use vortex_error::VortexExpect;

pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> ArrayData {
Expand All @@ -28,11 +29,12 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
.vortex_expect("Validity length cannot mismatch")
.into_array()
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
DType::Primitive(p, _) => {
let primitive_array = array.clone().into_primitive().unwrap();
let vec_values = primitive_array.into_maybe_null_slice::<$P>();
PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array()
}),
match_each_native_ptype!(p, |$P| {
slice_primitive::<$P>(primitive_array, validity, start, stop)
})
}
DType::Utf8(_) | DType::Binary(_) => {
let utf8 = array.clone().into_varbinview().unwrap();
let values = utf8
Expand All @@ -56,6 +58,47 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar
.unwrap()
.into_array()
}
DType::List(..) => {
let list_array = array.clone().into_list().unwrap();
let offsets = slice_canonical_array(&list_array.offsets(), start, stop)
.into_primitive()
.unwrap();

let elements = slice_canonical_array(
&list_array.elements(),
offsets.get_as_cast::<u64>(0) as usize,
offsets.get_as_cast::<u64>(offsets.len()) as usize,
);
let offsets = match_each_native_ptype!(offsets.ptype(), |$P| {
shift_offsets::<$P>(offsets)
})
.into_array();
ListArray::try_new(elements, offsets, validity)
.unwrap()
.into_array()
}
_ => unreachable!("Not a canonical array"),
}
}

fn shift_offsets<O: NativePType + ArrowNativeType>(offsets: PrimitiveArray) -> PrimitiveArray {
if offsets.is_empty() {
return offsets;
}
let offsets = offsets.into_maybe_null_slice::<O>();
let start = offsets[0];
PrimitiveArray::from_vec(
offsets.into_iter().map(|o| o - start).collect::<Vec<_>>(),
Validity::NonNullable,
)
}

fn slice_primitive<T: NativePType + ArrowNativeType>(
prim: PrimitiveArray,
validity: Validity,
start: usize,
stop: usize,
) -> ArrayData {
let vec_values = prim.into_maybe_null_slice::<T>();
PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array()
}
2 changes: 1 addition & 1 deletion vortex-dtype/src/arbitrary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ fn random_dtype(u: &mut Unstructured<'_>, depth: u8) -> Result<DType> {
2 => DType::Utf8(u.arbitrary()?),
3 => DType::Binary(u.arbitrary()?),
4 => DType::Struct(random_struct_dtype(u, depth - 1)?, u.arbitrary()?),
5 => DType::List(Arc::new(u.arbitrary()?), u.arbitrary()?),
// Null,
// List(Arc<DType>, Nullability),
// Extension(ExtDType, Nullability),
_ => unreachable!("Number out of range"),
})
Expand Down

0 comments on commit 2cf4f98

Please sign in to comment.