Skip to content

Commit

Permalink
Added a bool true count function and updated error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-isaacs committed Jul 24, 2024
1 parent 20d5b91 commit 25fe1e5
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 51 deletions.
31 changes: 11 additions & 20 deletions vortex-array/src/array/bool/compute/filter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
use vortex_error::{vortex_bail, VortexResult};
use vortex_error::{vortex_err, VortexResult};

use crate::array::bool::BoolArray;
use crate::compute::FilterFn;
use crate::stats::ArrayStatistics;
use crate::validity::filter_validity;
use crate::variants::BoolArrayTrait;
use crate::{Array, IntoArray};
Expand All @@ -15,27 +14,19 @@ impl FilterFn for BoolArray {
}

fn filter_select_bool(arr: &BoolArray, predicate: &Array) -> VortexResult<BoolArray> {
let Some(selection_count) = predicate.statistics().compute_true_count() else {
vortex_bail!(
NotImplemented: "compute_true_count",
predicate.encoding().id()
)
};
predicate.with_dyn(|b| {
let validity = filter_validity(arr.validity(), predicate)?;
if let Some(predicate) = b.as_bool_array() {
let out = if selection_count * 2 > arr.len() {
filter_select_bool_by_slice(&arr.boolean_buffer(), predicate, selection_count)
} else {
filter_select_bool_by_index(&arr.boolean_buffer(), predicate, selection_count)
};
BoolArray::try_new(out, validity)
let predicate = b.as_bool_array().ok_or(vortex_err!(
NotImplemented: "as_bool_array",
predicate.encoding().id()
))?;
let selection_count = predicate.true_count();
let out = if selection_count * 2 > arr.len() {
filter_select_bool_by_slice(&arr.boolean_buffer(), predicate, selection_count)
} else {
vortex_bail!(
NotImplemented: "as_bool_array",
predicate.encoding().id()
)
}
filter_select_bool_by_index(&arr.boolean_buffer(), predicate, selection_count)
};
BoolArray::try_new(out, validity)
})
}

Expand Down
11 changes: 9 additions & 2 deletions vortex-array/src/array/constant/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;

use arrow_array::Datum;
use vortex_dtype::Nullability;
use vortex_error::{vortex_bail, VortexResult};
use vortex_error::{vortex_bail, vortex_err, VortexResult};
use vortex_expr::Operator;
use vortex_scalar::Scalar;

Expand Down Expand Up @@ -69,7 +69,14 @@ impl FilterFn for ConstantArray {
fn filter(&self, predicate: &Array) -> VortexResult<Array> {
Ok(Self::new(
self.scalar().clone(),
predicate.statistics().compute_true_count().unwrap(),
predicate.with_dyn(|p| {
p.as_bool_array()
.ok_or(vortex_err!(
NotImplemented: "as_bool_array",
predicate.encoding().id()
))
.map(|x| x.true_count())
})?,
)
.into_array())
}
Expand Down
25 changes: 8 additions & 17 deletions vortex-array/src/array/primitive/compute/filter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use vortex_dtype::{match_each_native_ptype, NativePType};
use vortex_error::{vortex_bail, VortexResult};
use vortex_error::{vortex_err, VortexResult};

use crate::array::primitive::PrimitiveArray;
use crate::compute::FilterFn;
use crate::stats::ArrayStatistics;
use crate::validity::filter_validity;
use crate::variants::BoolArrayTrait;
use crate::{Array, IntoArray};
Expand All @@ -18,25 +17,17 @@ fn filter_select_primitive(
arr: &PrimitiveArray,
predicate: &Array,
) -> VortexResult<PrimitiveArray> {
let Some(selection_count) = predicate.statistics().compute_true_count() else {
vortex_bail!(
NotImplemented: "compute_true_count",
predicate.encoding().id()
)
};
predicate.with_dyn(|b| {
let validity = filter_validity(arr.validity(), predicate)?;
if let Some(bb) = b.as_bool_array() {
match_each_native_ptype!(arr.ptype(), |$T| {
let slice = arr.maybe_null_slice::<$T>();
Ok(PrimitiveArray::from_vec(filter_primitive_slice(slice, bb, selection_count), validity))
})
} else {
vortex_bail!(
let predicate = b.as_bool_array().ok_or_else(||vortex_err!(
NotImplemented: "as_bool_array",
predicate.encoding().id()
)
}
))?;
let selection_count = predicate.true_count();
match_each_native_ptype!(arr.ptype(), |$T| {
let slice = arr.maybe_null_slice::<$T>();
Ok(PrimitiveArray::from_vec(filter_primitive_slice(slice, predicate, selection_count), validity))
})
})
}

Expand Down
13 changes: 8 additions & 5 deletions vortex-array/src/array/varbin/compute/filter.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use itertools::Itertools;
use num_traits::{AsPrimitive, Zero};
use vortex_dtype::{match_each_integer_ptype, DType, NativePType};
use vortex_error::VortexResult;
use vortex_error::{vortex_err, VortexResult};

use crate::array::varbin::builder::VarBinBuilder;
use crate::array::varbin::VarBinArray;
use crate::compute::FilterFn;
use crate::stats::ArrayStatistics;
use crate::validity::Validity;
use crate::variants::BoolArrayTrait;
use crate::{Array, ArrayDType, IntoArray, IntoArrayVariant};
Expand All @@ -18,10 +17,14 @@ impl FilterFn for VarBinArray {
}

fn filter_select_var_bin(arr: &VarBinArray, predicate: &Array) -> VortexResult<VarBinArray> {
let selection_count = predicate.statistics().compute_true_count().unwrap();
predicate.with_dyn(|p| {
let predicate = p.as_bool_array_unchecked();

let predicate = p.as_bool_array().ok_or_else(|| {
vortex_err!(
NotImplemented: "as_bool_array",
predicate.encoding().id()
)
})?;
let selection_count = predicate.true_count();
if selection_count * 2 > predicate.len() {
filter_select_var_bin_by_slice(arr, predicate, selection_count)
} else {
Expand Down
10 changes: 5 additions & 5 deletions vortex-array/src/compute/unary/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
mod cast;
mod fill_forward;
mod scalar_at;
mod scalar_subtract;

pub use cast::{try_cast, CastFn};
pub use fill_forward::{fill_forward, FillForwardFn};
pub use scalar_at::{scalar_at, ScalarAtFn};
pub use scalar_subtract::{subtract_scalar, SubtractScalarFn};

mod cast;
mod fill_forward;
mod scalar_at;
mod scalar_subtract;
3 changes: 1 addition & 2 deletions vortex-array/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@ use std::fmt::{Debug, Display, Formatter};
use std::future::ready;

pub use ::paste;
use itertools::Itertools;

pub use canonical::*;
pub use context::*;
pub use data::*;
pub use implementation::*;
use itertools::Itertools;
pub use metadata::*;
pub use typed::*;
pub use view::*;
Expand Down
6 changes: 6 additions & 0 deletions vortex-array/src/variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ pub trait ArrayVariants {
pub trait NullArrayTrait: ArrayTrait {}

pub trait BoolArrayTrait: ArrayTrait {
fn true_count(&self) -> usize {
self.statistics()
.compute_true_count()
.unwrap_or_else(|| self.maybe_null_indices_iter().count())
}

// An iterator over the sorted indices of set values in the underlying boolean array
// good to array with low number of set values.
fn maybe_null_indices_iter<'a>(&'a self) -> Box<dyn Iterator<Item = usize> + 'a>;
Expand Down

0 comments on commit 25fe1e5

Please sign in to comment.