Skip to content

Commit

Permalink
Teach RunEnd take to respect its own validity (#691)
Browse files Browse the repository at this point in the history
Before this change, the validity is completely ignored. The test added
in this PR fails in the develop branch. This change also takes the
desired indices from the validity and converts from an array of Booleans
to an array of set indices. This array of indices can be used as the
indices of a sprase array with the fill values as null.

Resolves #609.
  • Loading branch information
danking authored Aug 27, 2024
1 parent 960048e commit 81d529c
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 9 deletions.
44 changes: 37 additions & 7 deletions encodings/runend/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use vortex::array::PrimitiveArray;
use vortex::array::{ConstantArray, PrimitiveArray, SparseArray};
use vortex::compute::unary::{scalar_at, scalar_at_unchecked, ScalarAtFn};
use vortex::compute::{slice, take, ArrayCompute, SliceFn, TakeFn};
use vortex::{Array, IntoArray, IntoArrayVariant};
use vortex::compute::{filter, slice, take, ArrayCompute, SliceFn, TakeFn};
use vortex::validity::Validity;
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::match_each_integer_ptype;
use vortex_error::{vortex_bail, VortexResult};
use vortex_scalar::Scalar;
Expand Down Expand Up @@ -49,12 +50,41 @@ impl TakeFn for RunEndArray {
}
self.find_physical_index(idx).map(|loc| loc as u64)
})

.collect::<VortexResult<Vec<_>>>()?
});
take(
&self.values(),
&PrimitiveArray::from(physical_indices).into_array(),
)
let physical_indices_array = PrimitiveArray::from(physical_indices).into_array();
let dense_values = take(&self.values(), &physical_indices_array)?;

Ok(match self.validity() {
Validity::NonNullable => dense_values,
Validity::AllValid => dense_values,
Validity::AllInvalid => {
ConstantArray::new(Scalar::null(self.dtype().clone()), indices.len()).into_array()
}
Validity::Array(original_validity) => {
let dense_validity = take(&original_validity, indices)?;
let dense_nonnull_indices = PrimitiveArray::from(
dense_validity
.clone()
.into_bool()?
.boolean_buffer()
.set_indices()
.map(|idx| idx as u64)
.collect::<Vec<u64>>(),
)
.into_array();
let length = dense_validity.len();

SparseArray::try_new(
dense_nonnull_indices,
filter(&dense_values, &dense_validity)?,
length,
Scalar::null(self.dtype().clone()),
)?
.into_array()
}
})
}
}

Expand Down
35 changes: 33 additions & 2 deletions encodings/runend/src/runend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,10 @@ impl ArrayStatisticsCompute for RunEndArray {}

#[cfg(test)]
mod test {
use vortex::compute::slice;
use vortex::array::{BoolArray, PrimitiveArray};
use vortex::compute::unary::scalar_at;
use vortex::validity::Validity;
use vortex::compute::{slice, take};
use vortex::validity::{ArrayValidity, Validity};
use vortex::{ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::{DType, Nullability, PType};

Expand Down Expand Up @@ -262,4 +263,34 @@ mod test {
vec![1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
);
}

#[test]
fn with_nulls() {
let uncompressed = PrimitiveArray::from_vec(vec![1i32, 0, 3], Validity::AllValid);
let validity = BoolArray::from_vec(
vec![
true, true, false, false, false, true, true, true, true, true,
],
Validity::NonNullable,
);
let arr = RunEndArray::try_new(
vec![2u32, 5, 10].into_array(),
uncompressed.into(),
Validity::Array(validity.into()),
)
.unwrap();

let test_indices = PrimitiveArray::from_vec(vec![0, 2, 4, 6], Validity::NonNullable);
let taken = take(arr.array(), test_indices.array()).unwrap();

assert_eq!(taken.len(), test_indices.len());

let parray = taken.into_primitive().unwrap();
assert_eq!(
(0..4)
.map(|idx| parray.is_valid(idx).then(|| parray.get_as_cast::<i32>(idx)))
.collect::<Vec<Option<i32>>>(),
vec![Some(1), None, None, Some(3),]
);
}
}

0 comments on commit 81d529c

Please sign in to comment.