Skip to content

Commit

Permalink
Fuzz filter implementation respects data validity (#1573)
Browse files Browse the repository at this point in the history
Validity::from_iter will never produce non nullable.

```
assertion `left == right` failed: {} != {} at index 0, lhs is vortex.struct(0x04) rhs is vortex.struct(0x04) in step 0
  left: Scalar { dtype: Struct(StructDType { names: [], dtypes: [] }, Nullable), value: ScalarValue(List([])) }
 right: Scalar { dtype: Struct(StructDType { names: [], dtypes: [] }, NonNullable), value: ScalarValue(List([])) }
```
  • Loading branch information
robert3005 authored Dec 5, 2024
1 parent 0942d55 commit 22c4c5e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 40 deletions.
60 changes: 21 additions & 39 deletions fuzz/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,27 @@ use vortex_dtype::{match_each_native_ptype, DType};
use vortex_error::VortexExpect;

pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
let validity = if array.dtype().is_nullable() {
let validity_buff = array
.logical_validity()
.into_array()
.into_bool()
.unwrap()
.boolean_buffer();
Validity::from_iter(
filter
.iter()
.zip(validity_buff.iter())
.filter(|(f, _)| **f)
.map(|(_, v)| v),
)
} else {
Validity::NonNullable
};

match array.dtype() {
DType::Bool(_) => {
let bool_array = array.clone().into_bool().unwrap();
let vec_validity = bool_array
.logical_validity()
.into_array()
.into_bool()
.unwrap()
.boolean_buffer();
BoolArray::try_new(
BooleanBuffer::from_iter(
filter
Expand All @@ -24,39 +36,21 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
.filter(|(f, _)| **f)
.map(|(_, v)| v),
),
Validity::from_iter(
filter
.iter()
.zip(vec_validity.iter())
.filter(|(f, _)| **f)
.map(|(_, v)| v),
),
validity,
)
.vortex_expect("Validity length cannot mismatch")
.into_array()
}
DType::Primitive(p, _) => match_each_native_ptype!(p, |$P| {
let primitive_array = array.clone().into_primitive().unwrap();
let vec_validity = primitive_array
.logical_validity()
.into_array()
.into_bool()
.unwrap()
.boolean_buffer();
PrimitiveArray::from_vec(
filter
.iter()
.zip(primitive_array.maybe_null_slice::<$P>().iter().copied())
.filter(|(f, _)| **f)
.map(|(_, v)| v)
.collect::<Vec<_>>(),
Validity::from_iter(
filter
.iter()
.zip(vec_validity.iter())
.filter(|(f, _)| **f)
.map(|(_, v)| v)
),
validity,
)
.into_array()
}),
Expand All @@ -78,24 +72,12 @@ pub fn filter_canonical_array(array: &ArrayData, filter: &[bool]) -> ArrayData {
.children()
.map(|c| filter_canonical_array(&c, filter))
.collect::<Vec<_>>();
let vec_validity = struct_array
.logical_validity()
.into_array()
.into_bool()
.unwrap()
.boolean_buffer();

StructArray::try_new(
struct_array.names().clone(),
filtered_children,
filter.iter().filter(|b| **b).map(|b| *b as usize).sum(),
Validity::from_iter(
filter
.iter()
.zip(vec_validity.iter())
.filter(|(f, _)| **f)
.map(|(_, v)| v),
),
validity,
)
.unwrap()
.into_array()
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ impl FromIterator<LogicalValidity> for Validity {

impl FromIterator<bool> for Validity {
fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
Self::Array(BoolArray::from_iter(iter).into_array())
Validity::from(BooleanBuffer::from_iter(iter))
}
}

Expand Down

0 comments on commit 22c4c5e

Please sign in to comment.