From 3a27f4a43d57682e918a3ff4be58c94d5c84511c Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Fri, 6 Dec 2024 07:24:55 -0500 Subject: [PATCH] Fix: fuzzer reference take implementation respects generated values nullability (#1586) --- fuzz/src/take.rs | 53 +++++++++++++---------------------- vortex-dtype/src/arbitrary.rs | 9 ++++-- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/fuzz/src/take.rs b/fuzz/src/take.rs index b2e70c7793..65ac9cdcf1 100644 --- a/fuzz/src/take.rs +++ b/fuzz/src/take.rs @@ -7,24 +7,28 @@ use vortex_dtype::{match_each_native_ptype, DType}; use vortex_error::VortexExpect; pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { + let validity = if array.dtype().is_nullable() { + let validity_idx = array + .logical_validity() + .into_array() + .into_bool() + .unwrap() + .boolean_buffer() + .iter() + .collect::>(); + + Validity::from_iter(indices.iter().map(|i| validity_idx[*i])) + } else { + Validity::NonNullable + }; + match array.dtype() { DType::Bool(_) => { let bool_array = array.clone().into_bool().unwrap(); let vec_values = bool_array.boolean_buffer().iter().collect::>(); - let vec_validity = bool_array - .logical_validity() + BoolArray::try_new(indices.iter().map(|i| vec_values[*i]).collect(), validity) + .vortex_expect("Validity length cannot mismatch") .into_array() - .into_bool() - .unwrap() - .boolean_buffer() - .iter() - .collect::>(); - BoolArray::try_new( - indices.iter().map(|i| vec_values[*i]).collect(), - Validity::from_iter(indices.iter().map(|i| vec_validity[*i])), - ) - .vortex_expect("Validity length cannot mismatch") - .into_array() } DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| { let primitive_array = array.clone().into_primitive().unwrap(); @@ -33,19 +37,8 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { .iter() .copied() .collect::>(); - let vec_validity = primitive_array - .logical_validity() + PrimitiveArray::from_vec(indices.iter().map(|i| vec_values[*i]).collect(),validity) .into_array() - .into_bool() - .unwrap() - .boolean_buffer() - .iter() - .collect::>(); - PrimitiveArray::from_vec( - indices.iter().map(|i| vec_values[*i]).collect(), - Validity::from_iter(indices.iter().map(|i| vec_validity[*i])) - ) - .into_array() }), DType::Utf8(_) | DType::Binary(_) => { let utf8 = array.clone().into_varbinview().unwrap(); @@ -64,20 +57,12 @@ pub fn take_canonical_array(array: &ArrayData, indices: &[usize]) -> ArrayData { .children() .map(|c| take_canonical_array(&c, indices)) .collect::>(); - let vec_validity = struct_array - .logical_validity() - .into_array() - .into_bool() - .unwrap() - .boolean_buffer() - .iter() - .collect::>(); StructArray::try_new( struct_array.names().clone(), taken_children, indices.len(), - Validity::from_iter(indices.iter().map(|i| vec_validity[*i])), + validity, ) .unwrap() .into_array() diff --git a/vortex-dtype/src/arbitrary.rs b/vortex-dtype/src/arbitrary.rs index 5ba93c9a0e..161141dae6 100644 --- a/vortex-dtype/src/arbitrary.rs +++ b/vortex-dtype/src/arbitrary.rs @@ -1,6 +1,8 @@ +use std::sync::Arc; + use arbitrary::{Arbitrary, Result, Unstructured}; -use crate::{DType, FieldNames, Nullability, PType, StructDType}; +use crate::{DType, FieldName, FieldNames, Nullability, PType, StructDType}; impl<'a> Arbitrary<'a> for DType { fn arbitrary(u: &mut Unstructured<'a>) -> Result { @@ -59,7 +61,10 @@ impl<'a> Arbitrary<'a> for StructDType { } fn random_struct_dtype(u: &mut Unstructured<'_>, depth: u8) -> Result { - let names: FieldNames = u.arbitrary()?; + let field_count = u.choose_index(3)?; + let names: FieldNames = (0..field_count) + .map(|_| FieldName::arbitrary(u)) + .collect::>>()?; let dtypes = (0..names.len()) .map(|_| random_dtype(u, depth)) .collect::>>()?;