Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slightly faster iter of LogicalValidity to Validity #673

Merged
merged 3 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 46 additions & 9 deletions vortex-array/src/array/chunked/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,12 @@ use crate::{

impl IntoCanonical for ChunkedArray {
fn into_canonical(self) -> VortexResult<Canonical> {
try_canonicalize_chunks(
self.chunks().collect(),
if self.dtype().is_nullable() {
self.logical_validity().into_validity()
} else {
Validity::NonNullable
},
self.dtype(),
)
let validity = if self.dtype().is_nullable() {
self.logical_validity().into_validity()
} else {
Validity::NonNullable
};
try_canonicalize_chunks(self.chunks().collect(), validity, self.dtype())
}
}

Expand Down Expand Up @@ -240,3 +237,43 @@ fn pack_varbin(chunks: &[Array], validity: Validity, dtype: &DType) -> VortexRes
validity,
)
}

#[cfg(test)]
mod tests {
use rstest::{fixture, rstest};
use vortex_dtype::Nullability;

use super::*;

#[fixture]
fn binary_array() -> Array {
let values = PrimitiveArray::from(
"hello worldhello world this is a long string"
.as_bytes()
.to_vec(),
);
let offsets = PrimitiveArray::from(vec![0, 11, 44]);

VarBinArray::try_new(
offsets.into_array(),
values.into_array(),
DType::Utf8(Nullability::NonNullable),
Validity::NonNullable,
)
.unwrap()
.into_array()
}

#[rstest]
fn test_pack_varbin(binary_array: Array) {
let arrays = vec![binary_array.clone(), binary_array.clone()];
let packed_array = pack_varbin(
&arrays,
Validity::NonNullable,
&DType::Utf8(Nullability::NonNullable),
)
.unwrap();

assert_eq!(packed_array.len(), binary_array.len() * arrays.len());
}
}
4 changes: 2 additions & 2 deletions vortex-array/src/array/chunked/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,10 @@ impl ArrayValidity for ChunkedArray {
}

fn logical_validity(&self) -> LogicalValidity {
let validity: Validity = self
let validity = self
.chunks()
.map(|a| a.with_dyn(|arr| arr.logical_validity()))
.collect();
.collect::<Validity>();
validity.to_logical(self.len())
}
}
Expand Down
22 changes: 14 additions & 8 deletions vortex-array/src/array/sparse/flatten.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
use itertools::Itertools;
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
use vortex_error::{VortexError, VortexResult};
Expand All @@ -15,12 +15,18 @@ impl IntoCanonical for SparseArray {
// Resolve our indices into a vector of usize applying the offset
let indices = self.resolved_indices();

let mut validity = BooleanBufferBuilder::new(self.len());
validity.append_n(self.len(), false);
let validity_buffer =
BooleanBufferBuilder::new_from_buffer(MutableBuffer::new_null(self.len()), self.len());

if matches!(self.dtype(), DType::Bool(_)) {
let values = self.values().into_bool()?.boolean_buffer();
canonicalize_sparse_bools(values, &indices, self.len(), self.fill_value(), validity)
canonicalize_sparse_bools(
values,
&indices,
self.len(),
self.fill_value(),
validity_buffer,
)
} else {
let values = self.values().into_primitive()?;
match_each_native_ptype!(values.ptype(), |$P| {
Expand All @@ -29,7 +35,7 @@ impl IntoCanonical for SparseArray {
&indices,
self.len(),
self.fill_value(),
validity
validity_buffer
)
})
}
Expand All @@ -41,7 +47,7 @@ fn canonicalize_sparse_bools(
indices: &[usize],
len: usize,
fill_value: &Scalar,
mut validity: BooleanBufferBuilder,
mut validity_buffer: BooleanBufferBuilder,
) -> VortexResult<Canonical> {
let fill_bool: bool = if fill_value.is_null() {
bool::default()
Expand All @@ -51,10 +57,10 @@ fn canonicalize_sparse_bools(
let mut flat_bools = vec![fill_bool; len];
for (i, idx) in indices.iter().enumerate() {
flat_bools[*idx] = values.value(i);
validity.set_bit(*idx, true);
validity_buffer.set_bit(*idx, true);
}

let validity = Validity::from(validity.finish());
let validity = Validity::from(validity_buffer.finish());
let bool_values = BoolArray::from_vec(flat_bools, validity);

Ok(Canonical::Bool(bool_values))
Expand Down
19 changes: 10 additions & 9 deletions vortex-array/src/array/varbin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ pub fn varbin_scalar(value: Buffer, dtype: &DType) -> Scalar {

#[cfg(test)]
mod test {
use rstest::{fixture, rstest};
use vortex_dtype::{DType, Nullability};

use crate::array::primitive::PrimitiveArray;
Expand All @@ -236,6 +237,7 @@ mod test {
use crate::validity::Validity;
use crate::{Array, IntoArray};

#[fixture]
fn binary_array() -> Array {
let values = PrimitiveArray::from(
"hello worldhello world this is a long string"
Expand All @@ -254,20 +256,19 @@ mod test {
.into_array()
}

#[test]
pub fn test_scalar_at() {
let binary_arr = binary_array();
assert_eq!(binary_arr.len(), 2);
assert_eq!(scalar_at(&binary_arr, 0).unwrap(), "hello world".into());
#[rstest]
pub fn test_scalar_at(binary_array: Array) {
assert_eq!(binary_array.len(), 2);
assert_eq!(scalar_at(&binary_array, 0).unwrap(), "hello world".into());
assert_eq!(
scalar_at(&binary_arr, 1).unwrap(),
scalar_at(&binary_array, 1).unwrap(),
"hello world this is a long string".into()
)
}

#[test]
pub fn slice_array() {
let binary_arr = slice(&binary_array(), 1, 2).unwrap();
#[rstest]
pub fn slice_array(binary_array: Array) {
let binary_arr = slice(&binary_array, 1, 2).unwrap();
assert_eq!(
scalar_at(&binary_arr, 0).unwrap(),
"hello world this is a long string".into()
Expand Down
21 changes: 11 additions & 10 deletions vortex-array/src/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,18 +197,19 @@ impl FromIterator<LogicalValidity> for Validity {
// Else, construct the boolean buffer
let mut buffer = BooleanBufferBuilder::new(validities.iter().map(|v| v.len()).sum());
for validity in validities {
let present = match validity {
LogicalValidity::AllValid(count) => BooleanBuffer::new_set(count),
LogicalValidity::AllInvalid(count) => BooleanBuffer::new_unset(count),
LogicalValidity::Array(array) => array
.into_bool()
.expect("validity must flatten to BoolArray")
.boolean_buffer(),
match validity {
LogicalValidity::AllValid(count) => buffer.append_n(count, true),
LogicalValidity::AllInvalid(count) => buffer.append_n(count, false),
LogicalValidity::Array(array) => {
let array_buffer = array
.into_bool()
.expect("validity must flatten to BoolArray")
.boolean_buffer();
buffer.append_buffer(&array_buffer);
}
};
buffer.append_buffer(&present);
}
let bool_array = BoolArray::try_new(buffer.finish(), Validity::NonNullable)
.expect("BoolArray::try_new from BooleanBuffer should always succeed");
let bool_array = BoolArray::from(buffer.finish());
Self::Array(bool_array.into_array())
}
}
Expand Down
Loading