Skip to content

Commit

Permalink
Added bool iterators index and slice and filtering across some array …
Browse files Browse the repository at this point in the history
…types (#505)

Implemented for (bool, primitive, varbin and constant).
  • Loading branch information
joseph-isaacs authored Jul 24, 2024
1 parent 71e446f commit 056f1b4
Show file tree
Hide file tree
Showing 21 changed files with 673 additions and 20 deletions.
10 changes: 9 additions & 1 deletion encodings/byte-bool/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,15 @@ impl ArrayVariants for ByteBoolArray {
}
}

impl BoolArrayTrait for ByteBoolArray {}
impl BoolArrayTrait for ByteBoolArray {
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
todo!()
}

fn maybe_null_slices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (usize, usize)> + 'a> {
todo!()
}
}

impl From<Vec<bool>> for ByteBoolArray {
fn from(value: Vec<bool>) -> Self {
Expand Down
10 changes: 9 additions & 1 deletion encodings/roaring/src/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,15 @@ impl ArrayVariants for RoaringBoolArray {
}
}

impl BoolArrayTrait for RoaringBoolArray {}
impl BoolArrayTrait for RoaringBoolArray {
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
todo!()
}

fn maybe_null_slices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (usize, usize)> + 'a> {
todo!()
}
}

impl AcceptArrayVisitor for RoaringBoolArray {
fn accept(&self, _visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
Expand Down
10 changes: 9 additions & 1 deletion encodings/runend-bool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,15 @@ impl RunEndBoolArray {
}
}

impl BoolArrayTrait for RunEndBoolArray {}
impl BoolArrayTrait for RunEndBoolArray {
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
todo!()
}

fn maybe_null_slices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (usize, usize)> + 'a> {
todo!()
}
}

impl ArrayVariants for RunEndBoolArray {
fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> {
Expand Down
102 changes: 102 additions & 0 deletions vortex-array/src/array/bool/compute/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
use vortex_error::{vortex_err, VortexResult};

use crate::array::bool::BoolArray;
use crate::compute::FilterFn;
use crate::validity::filter_validity;
use crate::variants::BoolArrayTrait;
use crate::{Array, IntoArray};

impl FilterFn for BoolArray {
fn filter(&self, predicate: &Array) -> VortexResult<Array> {
filter_select_bool(self, predicate).map(|a| a.into_array())
}
}

fn filter_select_bool(arr: &BoolArray, predicate: &Array) -> VortexResult<BoolArray> {
predicate.with_dyn(|b| {
let validity = filter_validity(arr.validity(), predicate)?;
let predicate = b.as_bool_array().ok_or(vortex_err!(
NotImplemented: "as_bool_array",
predicate.encoding().id()
))?;
let selection_count = predicate.true_count();
let out = if selection_count * 2 > arr.len() {
filter_select_bool_by_slice(&arr.boolean_buffer(), predicate, selection_count)
} else {
filter_select_bool_by_index(&arr.boolean_buffer(), predicate, selection_count)
};
BoolArray::try_new(out, validity)
})
}

fn filter_select_bool_by_slice(
values: &BooleanBuffer,
predicate: &dyn BoolArrayTrait,
selection_count: usize,
) -> BooleanBuffer {
let mut out_buf = BooleanBufferBuilder::new(selection_count);
predicate.maybe_null_slices_iter().for_each(|(start, end)| {
out_buf.append_buffer(&values.slice(start, end - start));
});
out_buf.finish()
}

fn filter_select_bool_by_index(
values: &BooleanBuffer,
predicate: &dyn BoolArrayTrait,
selection_count: usize,
) -> BooleanBuffer {
let mut out_buf = BooleanBufferBuilder::new(selection_count);
predicate
.maybe_null_indices_iter()
.for_each(|idx| out_buf.append(values.value(idx)));
out_buf.finish()
}

#[cfg(test)]
mod test {
use itertools::Itertools;

use crate::array::bool::compute::filter::{
filter_select_bool, filter_select_bool_by_index, filter_select_bool_by_slice,
};
use crate::array::bool::BoolArray;
use crate::ToArray;

#[test]
fn filter_bool_test() {
let arr = BoolArray::from(vec![true, true, false]);
let filter = BoolArray::from(vec![true, false, true]);

let filtered = filter_select_bool(&arr, &filter.to_array()).unwrap();
assert_eq!(2, filtered.len());

assert_eq!(
vec![true, false],
filtered.boolean_buffer().iter().collect_vec()
)
}

#[test]
fn filter_bool_by_slice_test() {
let arr = BoolArray::from(vec![true, true, false]);
let filter = BoolArray::from(vec![true, false, true]);

let filtered = filter_select_bool_by_slice(&arr.boolean_buffer(), &filter, 2);
assert_eq!(2, filtered.len());

assert_eq!(vec![true, false], filtered.iter().collect_vec())
}

#[test]
fn filter_bool_by_index_test() {
let arr = BoolArray::from(vec![true, true, false]);
let filter = BoolArray::from(vec![true, false, true]);

let filtered = filter_select_bool_by_index(&arr.boolean_buffer(), &filter, 2);
assert_eq!(2, filtered.len());

assert_eq!(vec![true, false], filtered.iter().collect_vec())
}
}
1 change: 1 addition & 0 deletions vortex-array/src/array/bool/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::compute::{ArrayCompute, CompareFn, SliceFn, TakeFn};
mod boolean;
mod compare;
mod fill;
mod filter;
mod flatten;
mod scalar_at;
mod slice;
Expand Down
38 changes: 37 additions & 1 deletion vortex-array/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use arrow_buffer::bit_iterator::{BitIndexIterator, BitSliceIterator};
use arrow_buffer::BooleanBuffer;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -83,7 +84,15 @@ impl ArrayVariants for BoolArray {
}
}

impl BoolArrayTrait for BoolArray {}
impl BoolArrayTrait for BoolArray {
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a> {
Box::new(BitIndexIterator::new(self.buffer(), 0, self.len()))
}

fn maybe_null_slices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (usize, usize)> + 'a> {
Box::new(BitSliceIterator::new(self.buffer(), 0, self.len()))
}
}

impl From<BooleanBuffer> for BoolArray {
fn from(value: BooleanBuffer) -> Self {
Expand Down Expand Up @@ -139,8 +148,11 @@ impl AcceptArrayVisitor for BoolArray {

#[cfg(test)]
mod tests {
use itertools::Itertools;

use crate::array::bool::BoolArray;
use crate::compute::unary::scalar_at;
use crate::variants::BoolArrayTrait;
use crate::IntoArray;

#[test]
Expand Down Expand Up @@ -170,4 +182,28 @@ mod tests {
let scalar = scalar_at(&arr, 4).unwrap();
assert!(scalar.is_null());
}

#[test]
fn constant_iter_true_test() {
let arr = BoolArray::from(vec![true, true, true]);
assert_eq!(vec![0, 1, 2], arr.maybe_null_indices_iter().collect_vec());
assert_eq!(vec![(0, 3)], arr.maybe_null_slices_iter().collect_vec());
}

#[test]
fn constant_iter_true_false_test() {
let arr = BoolArray::from(vec![true, false, true]);
assert_eq!(vec![0, 2], arr.maybe_null_indices_iter().collect_vec());
assert_eq!(
vec![(0, 1), (2, 3)],
arr.maybe_null_slices_iter().collect_vec()
);
}

#[test]
fn constant_iter_false_test() {
let arr = BoolArray::from(vec![false, false, false]);
assert_eq!(0, arr.maybe_null_indices_iter().collect_vec().len());
assert_eq!(0, arr.maybe_null_slices_iter().collect_vec().len());
}
}
10 changes: 9 additions & 1 deletion vortex-array/src/array/chunked/variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,15 @@ impl ArrayVariants for ChunkedArray {

impl NullArrayTrait for ChunkedArray {}

impl BoolArrayTrait for ChunkedArray {}
impl BoolArrayTrait for ChunkedArray {
fn maybe_null_indices_iter(&self) -> Box<dyn Iterator<Item = usize>> {
todo!()
}

fn maybe_null_slices_iter(&self) -> Box<dyn Iterator<Item = (usize, usize)>> {
todo!()
}
}

impl PrimitiveArrayTrait for ChunkedArray {}

Expand Down
21 changes: 19 additions & 2 deletions vortex-array/src/array/constant/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ use std::sync::Arc;

use arrow_array::Datum;
use vortex_dtype::Nullability;
use vortex_error::{vortex_bail, VortexResult};
use vortex_error::{vortex_bail, vortex_err, VortexResult};
use vortex_expr::Operator;
use vortex_scalar::Scalar;

use crate::array::constant::ConstantArray;
use crate::arrow::FromArrowArray;
use crate::compute::unary::{scalar_at, ScalarAtFn};
use crate::compute::{
scalar_cmp, AndFn, ArrayCompute, CompareFn, OrFn, SearchResult, SearchSortedFn,
scalar_cmp, AndFn, ArrayCompute, CompareFn, FilterFn, OrFn, SearchResult, SearchSortedFn,
SearchSortedSide, SliceFn, TakeFn,
};
use crate::stats::{ArrayStatistics, Stat};
Expand Down Expand Up @@ -65,6 +65,23 @@ impl SliceFn for ConstantArray {
}
}

impl FilterFn for ConstantArray {
fn filter(&self, predicate: &Array) -> VortexResult<Array> {
Ok(Self::new(
self.scalar().clone(),
predicate.with_dyn(|p| {
p.as_bool_array()
.ok_or(vortex_err!(
NotImplemented: "as_bool_array",
predicate.encoding().id()
))
.map(|x| x.true_count())
})?,
)
.into_array())
}
}

impl SearchSortedFn for ConstantArray {
fn search_sorted(&self, value: &Scalar, side: SearchSortedSide) -> VortexResult<SearchResult> {
match self.scalar().partial_cmp(value).unwrap_or(Ordering::Less) {
Expand Down
48 changes: 47 additions & 1 deletion vortex-array/src/array/constant/variants.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::iter;

use vortex_dtype::DType;
use vortex_scalar::StructScalar;

Expand Down Expand Up @@ -77,7 +79,27 @@ impl ArrayVariants for ConstantArray {

impl NullArrayTrait for ConstantArray {}

impl BoolArrayTrait for ConstantArray {}
impl BoolArrayTrait for ConstantArray {
fn maybe_null_indices_iter(&self) -> Box<dyn Iterator<Item = usize>> {
let value = self.scalar().value().as_bool().unwrap();
if value.unwrap_or(false) {
Box::new(0..self.len())
} else {
Box::new(iter::empty())
}
}

fn maybe_null_slices_iter(&self) -> Box<dyn Iterator<Item = (usize, usize)>> {
// Must be a boolean scalar
let value = self.scalar().value().as_bool().unwrap();

if value.unwrap_or(false) {
Box::new(iter::once((0, self.len())))
} else {
Box::new(iter::empty())
}
}
}

impl PrimitiveArrayTrait for ConstantArray {}

Expand All @@ -97,3 +119,27 @@ impl StructArrayTrait for ConstantArray {
impl ListArrayTrait for ConstantArray {}

impl ExtensionArrayTrait for ConstantArray {}

#[cfg(test)]
mod test {
use itertools::Itertools;
use vortex_dtype::Nullability;
use vortex_scalar::Scalar;

use crate::array::constant::ConstantArray;
use crate::variants::BoolArrayTrait;

#[test]
fn constant_iter_true_test() {
let arr = ConstantArray::new(Scalar::bool(true, Nullability::NonNullable), 3);
assert_eq!(vec![0, 1, 2], arr.maybe_null_indices_iter().collect_vec());
assert_eq!(vec![(0, 3)], arr.maybe_null_slices_iter().collect_vec());
}

#[test]
fn constant_iter_false_test() {
let arr = ConstantArray::new(Scalar::bool(false, Nullability::NonNullable), 3);
assert_eq!(0, arr.maybe_null_indices_iter().collect_vec().len());
assert_eq!(0, arr.maybe_null_slices_iter().collect_vec().len());
}
}
Loading

0 comments on commit 056f1b4

Please sign in to comment.