From 2cf4f98a1a3eef887685ffcae20c3f288cd0c525 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 20 Dec 2024 14:14:49 +0000 Subject: [PATCH 1/2] added arb list type and arb slice check --- Cargo.lock | 1 + fuzz/Cargo.toml | 1 + fuzz/src/slice.rs | 59 ++++++++++++++++++++++++++++++----- vortex-dtype/src/arbitrary.rs | 2 +- 4 files changed, 54 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba562e550e..4a60f7bbfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5155,6 +5155,7 @@ dependencies = [ name = "vortex-fuzz" version = "0.21.1" dependencies = [ + "arrow-buffer", "libfuzzer-sys", "vortex-array", "vortex-buffer", diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 4460e73d9b..925e178d62 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -19,6 +19,7 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = { workspace = true } +arrow-buffer = { workspace = true } vortex-array = { workspace = true, features = ["arbitrary"] } vortex-buffer = { workspace = true } vortex-dtype = { workspace = true, features = ["arbitrary"] } diff --git a/fuzz/src/slice.rs b/fuzz/src/slice.rs index 00b82029f2..3ca18b45e1 100644 --- a/fuzz/src/slice.rs +++ b/fuzz/src/slice.rs @@ -1,9 +1,10 @@ +use arrow_buffer::ArrowNativeType; use vortex_array::accessor::ArrayAccessor; -use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray}; +use vortex_array::array::{BoolArray, ListArray, PrimitiveArray, StructArray, VarBinViewArray}; use vortex_array::validity::{ArrayValidity, Validity}; -use vortex_array::variants::StructArrayTrait; -use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant}; -use vortex_dtype::{match_each_native_ptype, DType}; +use vortex_array::variants::{PrimitiveArrayTrait, StructArrayTrait}; +use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant}; +use vortex_dtype::{match_each_native_ptype, DType, NativePType}; use vortex_error::VortexExpect; pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> ArrayData { @@ -28,11 +29,12 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar .vortex_expect("Validity length cannot mismatch") .into_array() } - DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| { + DType::Primitive(p, _) => { let primitive_array = array.clone().into_primitive().unwrap(); - let vec_values = primitive_array.into_maybe_null_slice::<$P>(); - PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array() - }), + match_each_native_ptype!(p, |$P| { + slice_primitive::<$P>(primitive_array, validity, start, stop) + }) + } DType::Utf8(_) | DType::Binary(_) => { let utf8 = array.clone().into_varbinview().unwrap(); let values = utf8 @@ -56,6 +58,47 @@ pub fn slice_canonical_array(array: &ArrayData, start: usize, stop: usize) -> Ar .unwrap() .into_array() } + DType::List(..) => { + let list_array = array.clone().into_list().unwrap(); + let offsets = slice_canonical_array(&list_array.offsets(), start, stop) + .into_primitive() + .unwrap(); + + let elements = slice_canonical_array( + &list_array.elements(), + offsets.get_as_cast::(0) as usize, + offsets.get_as_cast::(offsets.len()) as usize, + ); + let offsets = match_each_native_ptype!(offsets.ptype(), |$P| { + shift_offsets::<$P>(offsets) + }) + .into_array(); + ListArray::try_new(elements, offsets, validity) + .unwrap() + .into_array() + } _ => unreachable!("Not a canonical array"), } } + +fn shift_offsets(offsets: PrimitiveArray) -> PrimitiveArray { + if offsets.is_empty() { + return offsets; + } + let offsets = offsets.into_maybe_null_slice::(); + let start = offsets[0]; + PrimitiveArray::from_vec( + offsets.into_iter().map(|o| o - start).collect::>(), + Validity::NonNullable, + ) +} + +fn slice_primitive( + prim: PrimitiveArray, + validity: Validity, + start: usize, + stop: usize, +) -> ArrayData { + let vec_values = prim.into_maybe_null_slice::(); + PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array() +} diff --git a/vortex-dtype/src/arbitrary.rs b/vortex-dtype/src/arbitrary.rs index 161141dae6..3cd3fe08ef 100644 --- a/vortex-dtype/src/arbitrary.rs +++ b/vortex-dtype/src/arbitrary.rs @@ -18,8 +18,8 @@ fn random_dtype(u: &mut Unstructured<'_>, depth: u8) -> Result { 2 => DType::Utf8(u.arbitrary()?), 3 => DType::Binary(u.arbitrary()?), 4 => DType::Struct(random_struct_dtype(u, depth - 1)?, u.arbitrary()?), + 5 => DType::List(Arc::new(u.arbitrary()?), u.arbitrary()?), // Null, - // List(Arc, Nullability), // Extension(ExtDType, Nullability), _ => unreachable!("Number out of range"), }) From 0a2b50bab6684f8060a8a965c96193ef05cf41ef Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 20 Dec 2024 14:37:23 +0000 Subject: [PATCH 2/2] wip --- fuzz/src/slice.rs | 4 ++-- fuzz/src/take.rs | 31 +++++++++++++++++++++---------- vortex-dtype/src/arbitrary.rs | 2 +- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/fuzz/src/slice.rs b/fuzz/src/slice.rs index 3ca18b45e1..41614d59b5 100644 --- a/fuzz/src/slice.rs +++ b/fuzz/src/slice.rs @@ -94,11 +94,11 @@ fn shift_offsets(offsets: PrimitiveArray) -> P } fn slice_primitive( - prim: PrimitiveArray, + primitive_array: PrimitiveArray, validity: Validity, start: usize, stop: usize, ) -> ArrayData { - let vec_values = prim.into_maybe_null_slice::(); + let vec_values = primitive_array.into_maybe_null_slice::(); PrimitiveArray::from_vec(vec_values[start..stop].into(), validity).into_array() } diff --git a/fuzz/src/take.rs b/fuzz/src/take.rs index 65ac9cdcf1..d0da4df0f2 100644 --- a/fuzz/src/take.rs +++ b/fuzz/src/take.rs @@ -1,9 +1,10 @@ +use arrow_buffer::ArrowNativeType; use vortex_array::accessor::ArrayAccessor; use vortex_array::array::{BoolArray, PrimitiveArray, StructArray, VarBinViewArray}; use vortex_array::validity::{ArrayValidity, Validity}; use vortex_array::variants::StructArrayTrait; use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant}; -use vortex_dtype::{match_each_native_ptype, DType}; +use vortex_dtype::{match_each_native_ptype, DType, NativePType}; use vortex_error::VortexExpect; pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { @@ -30,16 +31,12 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { .vortex_expect("Validity length cannot mismatch") .into_array() } - DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| { + DType::Primitive(p, _) => { let primitive_array = array.clone().into_primitive().unwrap(); - let vec_values = primitive_array - .maybe_null_slice::<$P>() - .iter() - .copied() - .collect::>(); - PrimitiveArray::from_vec(indices.iter().map(|i| vec_values[*i]).collect(),validity) - .into_array() - }), + match_each_native_ptype!(p, |$P| { + take_primitive::<$P>(primitive_array, validity, indices) + }) + } DType::Utf8(_) | DType::Binary(_) => { let utf8 = array.clone().into_varbinview().unwrap(); let values = utf8 @@ -70,3 +67,17 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { _ => unreachable!("Not a canonical array"), } } + +fn take_primitive( + primitive_array: PrimitiveArray, + validity: Validity, + indices: &[usize], +) -> ArrayData { + let vec_values = primitive_array + .maybe_null_slice::() + .iter() + .copied() + .collect::>(); + PrimitiveArray::from_vec(indices.iter().map(|i| vec_values[*i]).collect(), validity) + .into_array() +} diff --git a/vortex-dtype/src/arbitrary.rs b/vortex-dtype/src/arbitrary.rs index 3cd3fe08ef..b04d4348c8 100644 --- a/vortex-dtype/src/arbitrary.rs +++ b/vortex-dtype/src/arbitrary.rs @@ -18,7 +18,7 @@ fn random_dtype(u: &mut Unstructured<'_>, depth: u8) -> Result { 2 => DType::Utf8(u.arbitrary()?), 3 => DType::Binary(u.arbitrary()?), 4 => DType::Struct(random_struct_dtype(u, depth - 1)?, u.arbitrary()?), - 5 => DType::List(Arc::new(u.arbitrary()?), u.arbitrary()?), + 5 => DType::List(Arc::new(random_dtype(u, depth - 1)?), u.arbitrary()?), // Null, // Extension(ExtDType, Nullability), _ => unreachable!("Number out of range"),