From f8cd0167c76e3eaae86fe42b81fc62b26fc333dc Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Thu, 16 May 2024 16:31:14 +0100 Subject: [PATCH 01/17] initial sketch --- Cargo.lock | 1 + vortex-array/Cargo.toml | 1 + .../array/primitive/compute/filter_indices.rs | 234 ++++++++++++++++++ .../src/array/primitive/compute/mod.rs | 1 + vortex-array/src/compute/filter_indices.rs | 31 +++ vortex-array/src/compute/mod.rs | 6 + vortex-dtype/src/field_paths.rs | 31 ++- vortex-expr/src/display.rs | 6 +- 8 files changed, 300 insertions(+), 11 deletions(-) create mode 100644 vortex-array/src/array/primitive/compute/filter_indices.rs create mode 100644 vortex-array/src/compute/filter_indices.rs diff --git a/Cargo.lock b/Cargo.lock index b7185c6b7d..d67e1be9c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5045,6 +5045,7 @@ dependencies = [ "vortex-buffer", "vortex-dtype", "vortex-error", + "vortex-expr", "vortex-flatbuffers", "vortex-scalar", ] diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 1f2b783afd..8f1de4b41f 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -36,6 +36,7 @@ rand = { workspace = true } vortex-buffer = { path = "../vortex-buffer" } vortex-dtype = { path = "../vortex-dtype", features = ["flatbuffers", "serde"] } vortex-error = { path = "../vortex-error", features = ["flexbuffers"] } +vortex-expr = { path = "../vortex-expr" } vortex-flatbuffers = { path = "../vortex-flatbuffers" } vortex-scalar = { path = "../vortex-scalar", features = ["flatbuffers", "serde"] } serde = { workspace = true, features = ["derive"] } diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs new file mode 100644 index 0000000000..4a751eead1 --- /dev/null +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -0,0 +1,234 @@ +use itertools::{Itertools}; +use vortex_dtype::{match_each_native_ptype, NativePType}; +use vortex_error::{vortex_bail, VortexResult}; +use vortex_expr::expressions::{Conjunction, Disjunction, Predicate, Value}; +use vortex_expr::operators::Operator; + +use crate::{Array, ArrayTrait, IntoArray}; +use crate::array::primitive::PrimitiveArray; +use crate::compute::filter_indices::FilterIndicesFn; +use crate::validity::Validity; + +impl FilterIndicesFn for PrimitiveArray { + fn apply_disjunctive_filter(&self, predicate: &Disjunction) -> VortexResult { + let map = predicate.conjunctions.iter() + .map(|conj| { + BoolMultiZip( + conj.predicates.iter().map(|pred| { + self.indices_matching_predicate(pred).unwrap() + }) + .map(|a| a.into_iter()).collect_vec(), + Comparison::All).collect_vec().into_iter() + }).collect_vec(); + let bitmask = BoolMultiZip(map, Comparison::Any).collect_vec(); + let indices = bitmask.iter() + .enumerate() + .filter(|(_, &v)| v) + .map(|(idx, _)| (idx + 1) as u64) + .collect_vec(); + Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) + } + + fn apply_conjunctive_filter(&self, conj: &Conjunction) -> VortexResult { + let bitmask = BoolMultiZip( + conj.predicates.iter().map(|pred| { + self.indices_matching_predicate(pred).unwrap() + }) + .map(|a| a.into_iter()).collect_vec(), + Comparison::All) + .collect_vec(); + let indices = bitmask.iter() + .enumerate() + .filter(|(_, &v)| v) + .map(|(idx, _)| (idx + 1) as u64) + .collect_vec(); + Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) + } + + fn indices_matching_predicate(&self, predicate: &Predicate) -> VortexResult> { + if predicate.left.first().is_some() { + vortex_bail!("Invalid path for primitive array") + } + let validity = self.validity(); + let rhs = match &predicate.right { + Value::Field(_) => { vortex_bail!("") } + Value::Literal(scalar) => { scalar } + }; + + let matching_idxs = match_each_native_ptype!(self.ptype(), |$T| { + let rhs_typed: $T = rhs.try_into().unwrap(); + let predicate_fn = get_predicate::<$T>(&predicate.op); + self.typed_data::<$T>().iter().enumerate().filter(|(idx, &v)| { + predicate_fn(&v, &rhs_typed) + }) + .filter(|(idx, _)| validity.is_valid(idx.clone())) + .map(|(idx, _)| idx ) + .collect_vec() + }); + let mut bitmap = vec![false; self.len()]; + matching_idxs.into_iter().for_each(|idx| bitmap[idx] = true); + + Ok(bitmap) + } +} + +fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { + match op { + Operator::EqualTo => { + PartialEq::eq + } + Operator::NotEqualTo => { + PartialEq::ne + } + Operator::GreaterThan => { + PartialOrd::gt + } + Operator::GreaterThanOrEqualTo => { + PartialOrd::ge + } + Operator::LessThan => { + PartialOrd::lt + } + Operator::LessThanOrEqualTo => { + PartialOrd::le + } + } +} + +enum Comparison { + Any, + All, +} + +/// Zip together an arbitrary number of boolean iterators +struct BoolMultiZip(Vec>, Comparison); + +impl Iterator for BoolMultiZip { + type Item = bool; + + fn next(&mut self) -> Option { + let zipped = self.0 + .iter_mut() + .map(|iter| iter.next()) + .collect::>>(); + + match self.1 { + Comparison::Any => zipped.map(|inner| inner.iter().any(|&v| v)), + Comparison::All => zipped.map(|inner| inner.iter().all(|&v| v)) + } + } +} + +#[cfg(test)] +mod test { + use vortex_dtype::field_paths::{FieldPathBuilder}; + use vortex_expr::expressions::lit; + use vortex_expr::field_paths::FieldPathOperations; + use crate::validity::Validity; + use super::*; + + #[test] + fn test_basic_filter() { + let arr = PrimitiveArray::from_vec( + vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9], Validity::AllValid); + + let field = FieldPathBuilder::new().build(); + let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { + predicates: vec![field.clone().lt(lit(5u32))] + }).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, [1u64, 2, 3, 4]); + + let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { + predicates: vec![field.clone().gt(lit(5u32))] + }).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, [6u64, 7, 8, 9]); + + let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { + predicates: vec![field.clone().eq(lit(5u32))] + }).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, [5]); + + let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { + predicates: vec![field.clone().gte(lit(5u32))] + }).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, [5u64, 6, 7, 8, 9]); + + let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { + predicates: vec![field.clone().lte(lit(5u32))] + }).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, [1u64, 2, 3, 4, 5]); + } + + #[test] + fn test_multiple_predicates() { + let arr = PrimitiveArray::from_vec( + vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); + let field = FieldPathBuilder::new().build(); + let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { + predicates: vec![ + field.clone().lt(lit(5u32)), + field.clone().gt(lit(2u32)), + ] + }).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, [3, 4]) + } + + #[test] + fn test_disjoint_predicates() { + let arr = PrimitiveArray::from_vec( + vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); + let field = FieldPathBuilder::new().build(); + let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { + predicates: vec![ + field.clone().lt(lit(5u32)), + field.clone().gt(lit(5u32)), + ] + }).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, []) + } + + #[test] + fn test_disjunctive_predicate() { + let arr = PrimitiveArray::from_vec( + vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); + let field = FieldPathBuilder::new().build(); + let c1 = Conjunction { + predicates: vec![ + field.clone().lt(lit(5u32)), + ] + }; + let c2 = Conjunction { + predicates: vec![ + field.clone().gt(lit(5u32)), + ] + }; + + let disj = Disjunction { conjunctions: vec![c1, c2] }; + let filtered_primitive = arr.apply_disjunctive_filter(&disj).unwrap() + .flatten_primitive().unwrap(); + let filtered = filtered_primitive + .typed_data::(); + assert_eq!(filtered, [1u64, 2, 3, 4, 6, 7, 8, 9, 10]) + } +} diff --git a/vortex-array/src/array/primitive/compute/mod.rs b/vortex-array/src/array/primitive/compute/mod.rs index cbf64331dc..000abd39f1 100644 --- a/vortex-array/src/array/primitive/compute/mod.rs +++ b/vortex-array/src/array/primitive/compute/mod.rs @@ -19,6 +19,7 @@ mod search_sorted; mod slice; mod subtract_scalar; mod take; +mod filter_indices; impl ArrayCompute for PrimitiveArray { fn as_arrow(&self) -> Option<&dyn AsArrowArray> { diff --git a/vortex-array/src/compute/filter_indices.rs b/vortex-array/src/compute/filter_indices.rs new file mode 100644 index 0000000000..4543be16a9 --- /dev/null +++ b/vortex-array/src/compute/filter_indices.rs @@ -0,0 +1,31 @@ +use vortex_dtype::DType; +use vortex_error::{vortex_err, VortexResult}; +use vortex_expr::expressions::{Conjunction, Disjunction, Predicate}; + +use crate::{Array, ArrayDType}; + +pub trait FilterIndicesFn { + fn apply_disjunctive_filter(&self, predicate: &Disjunction) -> VortexResult; + fn apply_conjunctive_filter(&self, predicate: &Conjunction) -> VortexResult; + fn indices_matching_predicate(&self, predicate: &Predicate) -> VortexResult>; +} + +pub fn filter_indices(array: &Array, predicate: &Conjunction) -> VortexResult { + if let Some(subtraction_result) = + array.with_dyn(|c| c.filter_indices().map(|t| t.apply_conjunctive_filter(predicate))) + { + return subtraction_result; + } + // if filter is not implemented for the given array type, but the array has a numeric + // DType, we can flatten the array and apply filter to the flattened primitive array + match array.dtype() { + DType::Primitive(..) => { + let flat = array.clone().flatten_primitive()?; + flat.apply_conjunctive_filter(predicate) + } + _ => Err(vortex_err!( + NotImplemented: "filter_indices", + array.encoding().id() + )), + } +} diff --git a/vortex-array/src/compute/mod.rs b/vortex-array/src/compute/mod.rs index 9974e63236..cb395367ab 100644 --- a/vortex-array/src/compute/mod.rs +++ b/vortex-array/src/compute/mod.rs @@ -7,6 +7,7 @@ use scalar_at::ScalarAtFn; use search_sorted::SearchSortedFn; use slice::SliceFn; use take::TakeFn; +use crate::compute::filter_indices::FilterIndicesFn; use crate::compute::scalar_subtract::SubtractScalarFn; @@ -20,6 +21,7 @@ pub mod scalar_subtract; pub mod search_sorted; pub mod slice; pub mod take; +pub mod filter_indices; pub trait ArrayCompute { fn as_arrow(&self) -> Option<&dyn AsArrowArray> { @@ -38,6 +40,10 @@ pub trait ArrayCompute { None } + fn filter_indices(&self) -> Option<&dyn FilterIndicesFn> { + None + } + fn patch(&self) -> Option<&dyn PatchFn> { None } diff --git a/vortex-dtype/src/field_paths.rs b/vortex-dtype/src/field_paths.rs index d1497fc985..5b17c6f88b 100644 --- a/vortex-dtype/src/field_paths.rs +++ b/vortex-dtype/src/field_paths.rs @@ -1,8 +1,6 @@ use core::fmt; use std::fmt::{Display, Formatter}; -use vortex_error::{vortex_bail, VortexResult}; - #[derive(Clone, Debug, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct FieldPath { @@ -13,6 +11,20 @@ impl FieldPath { pub fn builder() -> FieldPathBuilder { FieldPathBuilder::default() } + + pub fn first(&self) -> Option<&FieldIdentifier> { + self.field_names + .first() + } + + pub fn tail(&self) -> Option { + if self.first().is_none() { + None + } else { + let new_field_names = self.field_names[1..self.field_names.len()].to_vec(); + Some(Self::builder().join_all(new_field_names).build()) + } + } } #[derive(Clone, Debug, PartialEq)] @@ -38,13 +50,16 @@ impl FieldPathBuilder { self } - pub fn build(self) -> VortexResult { - if self.field_names.is_empty() { - vortex_bail!("Cannot build empty path"); - } - Ok(FieldPath { + pub fn join_all(mut self, identifiers: Vec>) -> Self { + self.field_names + .extend(identifiers.into_iter().map(|v| v.into())); + self + } + + pub fn build(self) -> FieldPath { + FieldPath { field_names: self.field_names, - }) + } } } diff --git a/vortex-expr/src/display.rs b/vortex-expr/src/display.rs index 90a8acbee1..cc37916207 100644 --- a/vortex-expr/src/display.rs +++ b/vortex-expr/src/display.rs @@ -69,13 +69,13 @@ mod tests { assert_eq!(format!("{}", !lit(1u32).lte(f1)), "($field <= 1)"); // nested field path - let f2 = FieldPath::builder().join("field").join(0).build().unwrap(); + let f2 = FieldPath::builder().join("field").join(0).build(); assert_eq!(format!("{}", !f2.lte(lit(1u32))), "($field.[0] > 1)"); } #[test] fn test_dnf_formatting() { - let path = FieldPath::builder().join(2).join("col1").build().unwrap(); + let path = FieldPath::builder().join(2).join("col1").build(); let d1 = Conjunction { predicates: vec![ lit(1u32).lt(path.clone()), @@ -83,7 +83,7 @@ mod tests { !lit(1u32).lte(path), ], }; - let path2 = FieldPath::builder().join("col1").join(2).build().unwrap(); + let path2 = FieldPath::builder().join("col1").join(2).build(); let d2 = Conjunction { predicates: vec![ lit(2u32).lt(path2), From 3a927407db77591ccc436065d8aee067feec4783 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Thu, 16 May 2024 18:17:12 +0100 Subject: [PATCH 02/17] review --- .../array/primitive/compute/filter_indices.rs | 347 +++++++++--------- .../src/array/primitive/compute/mod.rs | 2 +- vortex-array/src/compute/filter_indices.rs | 12 +- vortex-array/src/compute/mod.rs | 4 +- vortex-dtype/src/field_paths.rs | 3 +- 5 files changed, 188 insertions(+), 180 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 4a751eead1..c36d31f3f6 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -1,234 +1,245 @@ -use itertools::{Itertools}; +use itertools::Itertools; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; -use vortex_expr::expressions::{Conjunction, Disjunction, Predicate, Value}; +use vortex_expr::expressions::{Disjunction, Predicate, Value}; use vortex_expr::operators::Operator; -use crate::{Array, ArrayTrait, IntoArray}; use crate::array::primitive::PrimitiveArray; use crate::compute::filter_indices::FilterIndicesFn; use crate::validity::Validity; +use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { - fn apply_disjunctive_filter(&self, predicate: &Disjunction) -> VortexResult { - let map = predicate.conjunctions.iter() + fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { + let conjunction_indices = predicate + .conjunctions + .iter() .map(|conj| { - BoolMultiZip( - conj.predicates.iter().map(|pred| { - self.indices_matching_predicate(pred).unwrap() - }) - .map(|a| a.into_iter()).collect_vec(), - Comparison::All).collect_vec().into_iter() - }).collect_vec(); - let bitmask = BoolMultiZip(map, Comparison::Any).collect_vec(); - let indices = bitmask.iter() - .enumerate() - .filter(|(_, &v)| v) - .map(|(idx, _)| (idx + 1) as u64) - .collect_vec(); - Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) - } - - fn apply_conjunctive_filter(&self, conj: &Conjunction) -> VortexResult { - let bitmask = BoolMultiZip( - conj.predicates.iter().map(|pred| { - self.indices_matching_predicate(pred).unwrap() + MergeOp::All( + conj.predicates + .iter() + .map(|pred| indices_matching_predicate(self, pred).unwrap()) + .map(|a| a.into_iter()) + .collect_vec(), + ) + .collect_vec() + .into_iter() }) - .map(|a| a.into_iter()).collect_vec(), - Comparison::All) .collect_vec(); - let indices = bitmask.iter() + let indices = MergeOp::Any(conjunction_indices) .enumerate() - .filter(|(_, &v)| v) - .map(|(idx, _)| (idx + 1) as u64) + .filter(|(_, v)| *v) + .map(|(idx, _)| idx as u64) .collect_vec(); Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) } +} - fn indices_matching_predicate(&self, predicate: &Predicate) -> VortexResult> { - if predicate.left.first().is_some() { - vortex_bail!("Invalid path for primitive array") - } - let validity = self.validity(); - let rhs = match &predicate.right { - Value::Field(_) => { vortex_bail!("") } - Value::Literal(scalar) => { scalar } - }; - - let matching_idxs = match_each_native_ptype!(self.ptype(), |$T| { - let rhs_typed: $T = rhs.try_into().unwrap(); - let predicate_fn = get_predicate::<$T>(&predicate.op); - self.typed_data::<$T>().iter().enumerate().filter(|(idx, &v)| { - predicate_fn(&v, &rhs_typed) - }) - .filter(|(idx, _)| validity.is_valid(idx.clone())) - .map(|(idx, _)| idx ) - .collect_vec() - }); - let mut bitmap = vec![false; self.len()]; - matching_idxs.into_iter().for_each(|idx| bitmap[idx] = true); - - Ok(bitmap) +fn indices_matching_predicate( + arr: &PrimitiveArray, + predicate: &Predicate, +) -> VortexResult> { + if predicate.left.first().is_some() { + vortex_bail!("Invalid path for primitive array") } + let validity = arr.validity(); + let rhs = match &predicate.right { + Value::Field(_) => { + vortex_bail!("") + } + Value::Literal(scalar) => scalar, + }; + + let matching_idxs = match_each_native_ptype!(arr.ptype(), |$T| { + let rhs_typed: $T = rhs.try_into().unwrap(); + let predicate_fn = get_predicate::<$T>(&predicate.op); + arr.typed_data::<$T>().iter().enumerate().filter(|(idx, &v)| { + predicate_fn(&v, &rhs_typed) + }) + .filter(|(idx, _)| validity.is_valid(idx.clone())) + .map(|(idx, _)| idx ) + .collect_vec() + }); + let mut bitmap = vec![false; arr.len()]; + matching_idxs.into_iter().for_each(|idx| bitmap[idx] = true); + + Ok(bitmap) } fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { match op { - Operator::EqualTo => { - PartialEq::eq - } - Operator::NotEqualTo => { - PartialEq::ne - } - Operator::GreaterThan => { - PartialOrd::gt - } - Operator::GreaterThanOrEqualTo => { - PartialOrd::ge - } - Operator::LessThan => { - PartialOrd::lt - } - Operator::LessThanOrEqualTo => { - PartialOrd::le - } + Operator::EqualTo => PartialEq::eq, + Operator::NotEqualTo => PartialEq::ne, + Operator::GreaterThan => PartialOrd::gt, + Operator::GreaterThanOrEqualTo => PartialOrd::ge, + Operator::LessThan => PartialOrd::lt, + Operator::LessThanOrEqualTo => PartialOrd::le, } } -enum Comparison { - Any, - All, +/// Merge an arbitrary number of boolean iterators +enum MergeOp { + Any(Vec>), + All(Vec>), } -/// Zip together an arbitrary number of boolean iterators -struct BoolMultiZip(Vec>, Comparison); - -impl Iterator for BoolMultiZip { +impl Iterator for MergeOp { type Item = bool; fn next(&mut self) -> Option { - let zipped = self.0 - .iter_mut() - .map(|iter| iter.next()) - .collect::>>(); - - match self.1 { - Comparison::Any => zipped.map(|inner| inner.iter().any(|&v| v)), - Comparison::All => zipped.map(|inner| inner.iter().all(|&v| v)) + let zipped = match self { + MergeOp::Any(vecs) => vecs, + MergeOp::All(vecs) => vecs, + } + .iter_mut() + .map(|iter| iter.next()) + .collect::>>(); + + match self { + MergeOp::Any(_) => zipped.map(|inner| inner.iter().any(|&v| v)), + MergeOp::All(_) => zipped.map(|inner| inner.iter().all(|&v| v)), } } } #[cfg(test)] mod test { - use vortex_dtype::field_paths::{FieldPathBuilder}; - use vortex_expr::expressions::lit; + use vortex_dtype::field_paths::FieldPathBuilder; + use vortex_expr::expressions::{lit, Conjunction}; use vortex_expr::field_paths::FieldPathOperations; - use crate::validity::Validity; + use super::*; + use crate::validity::Validity; + + fn apply_conjunctive_filter(arr: &PrimitiveArray, conj: Conjunction) -> VortexResult { + arr.filter_indices(&Disjunction { + conjunctions: vec![conj], + }) + } #[test] fn test_basic_filter() { - let arr = PrimitiveArray::from_vec( - vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9], Validity::AllValid); + let arr = PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9], Validity::AllValid); let field = FieldPathBuilder::new().build(); - let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { - predicates: vec![field.clone().lt(lit(5u32))] - }).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); - assert_eq!(filtered, [1u64, 2, 3, 4]); - - let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { - predicates: vec![field.clone().gt(lit(5u32))] - }).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); - assert_eq!(filtered, [6u64, 7, 8, 9]); - - let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { - predicates: vec![field.clone().eq(lit(5u32))] - }).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); - assert_eq!(filtered, [5]); - - let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { - predicates: vec![field.clone().gte(lit(5u32))] - }).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); - assert_eq!(filtered, [5u64, 6, 7, 8, 9]); - - let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { - predicates: vec![field.clone().lte(lit(5u32))] - }).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); - assert_eq!(filtered, [1u64, 2, 3, 4, 5]); + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().lt(lit(5u32))], + }, + ) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); + assert_eq!(filtered, [0u64, 1, 2, 3]); + + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().gt(lit(5u32))], + }, + ) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); + assert_eq!(filtered, [5u64, 6, 7, 8]); + + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().eq(lit(5u32))], + }, + ) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); + assert_eq!(filtered, [4]); + + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().gte(lit(5u32))], + }, + ) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); + assert_eq!(filtered, [4u64, 5, 6, 7, 8]); + + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().lte(lit(5u32))], + }, + ) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); + assert_eq!(filtered, [0u64, 1, 2, 3, 4]); } #[test] fn test_multiple_predicates() { - let arr = PrimitiveArray::from_vec( - vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); + let arr = + PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); let field = FieldPathBuilder::new().build(); - let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { - predicates: vec![ - field.clone().lt(lit(5u32)), - field.clone().gt(lit(2u32)), - ] - }).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); - assert_eq!(filtered, [3, 4]) + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))], + }, + ) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); + assert_eq!(filtered, [2, 3]) } #[test] fn test_disjoint_predicates() { - let arr = PrimitiveArray::from_vec( - vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); + let arr = + PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); let field = FieldPathBuilder::new().build(); - let filtered_primitive = arr.apply_conjunctive_filter(&Conjunction { - predicates: vec![ - field.clone().lt(lit(5u32)), - field.clone().gt(lit(5u32)), - ] - }).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], + }, + ) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, []) } #[test] fn test_disjunctive_predicate() { - let arr = PrimitiveArray::from_vec( - vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); + let arr = + PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); let field = FieldPathBuilder::new().build(); let c1 = Conjunction { - predicates: vec![ - field.clone().lt(lit(5u32)), - ] + predicates: vec![field.clone().lt(lit(5u32))], }; let c2 = Conjunction { - predicates: vec![ - field.clone().gt(lit(5u32)), - ] + predicates: vec![field.clone().gt(lit(5u32))], }; - let disj = Disjunction { conjunctions: vec![c1, c2] }; - let filtered_primitive = arr.apply_disjunctive_filter(&disj).unwrap() - .flatten_primitive().unwrap(); - let filtered = filtered_primitive - .typed_data::(); - assert_eq!(filtered, [1u64, 2, 3, 4, 6, 7, 8, 9, 10]) + let disj = Disjunction { + conjunctions: vec![c1, c2], + }; + let filtered_primitive = arr + .filter_indices(&disj) + .unwrap() + .flatten_primitive() + .unwrap(); + let filtered = filtered_primitive.typed_data::(); + assert_eq!(filtered, [0u64, 1, 2, 3, 5, 6, 7, 8, 9]) } } diff --git a/vortex-array/src/array/primitive/compute/mod.rs b/vortex-array/src/array/primitive/compute/mod.rs index 000abd39f1..8aa141a4de 100644 --- a/vortex-array/src/array/primitive/compute/mod.rs +++ b/vortex-array/src/array/primitive/compute/mod.rs @@ -14,12 +14,12 @@ mod as_arrow; mod as_contiguous; mod cast; mod fill; +mod filter_indices; mod scalar_at; mod search_sorted; mod slice; mod subtract_scalar; mod take; -mod filter_indices; impl ArrayCompute for PrimitiveArray { fn as_arrow(&self) -> Option<&dyn AsArrowArray> { diff --git a/vortex-array/src/compute/filter_indices.rs b/vortex-array/src/compute/filter_indices.rs index 4543be16a9..30b5c4538a 100644 --- a/vortex-array/src/compute/filter_indices.rs +++ b/vortex-array/src/compute/filter_indices.rs @@ -1,18 +1,16 @@ use vortex_dtype::DType; use vortex_error::{vortex_err, VortexResult}; -use vortex_expr::expressions::{Conjunction, Disjunction, Predicate}; +use vortex_expr::expressions::Disjunction; use crate::{Array, ArrayDType}; pub trait FilterIndicesFn { - fn apply_disjunctive_filter(&self, predicate: &Disjunction) -> VortexResult; - fn apply_conjunctive_filter(&self, predicate: &Conjunction) -> VortexResult; - fn indices_matching_predicate(&self, predicate: &Predicate) -> VortexResult>; + fn filter_indices(&self, predicate: &Disjunction) -> VortexResult; } -pub fn filter_indices(array: &Array, predicate: &Conjunction) -> VortexResult { +pub fn filter_indices(array: &Array, predicate: &Disjunction) -> VortexResult { if let Some(subtraction_result) = - array.with_dyn(|c| c.filter_indices().map(|t| t.apply_conjunctive_filter(predicate))) + array.with_dyn(|c| c.filter_indices().map(|t| t.filter_indices(predicate))) { return subtraction_result; } @@ -21,7 +19,7 @@ pub fn filter_indices(array: &Array, predicate: &Conjunction) -> VortexResult { let flat = array.clone().flatten_primitive()?; - flat.apply_conjunctive_filter(predicate) + flat.filter_indices(predicate) } _ => Err(vortex_err!( NotImplemented: "filter_indices", diff --git a/vortex-array/src/compute/mod.rs b/vortex-array/src/compute/mod.rs index cb395367ab..8853d27d53 100644 --- a/vortex-array/src/compute/mod.rs +++ b/vortex-array/src/compute/mod.rs @@ -7,21 +7,21 @@ use scalar_at::ScalarAtFn; use search_sorted::SearchSortedFn; use slice::SliceFn; use take::TakeFn; -use crate::compute::filter_indices::FilterIndicesFn; +use crate::compute::filter_indices::FilterIndicesFn; use crate::compute::scalar_subtract::SubtractScalarFn; pub mod as_arrow; pub mod as_contiguous; pub mod cast; pub mod fill; +pub mod filter_indices; pub mod patch; pub mod scalar_at; pub mod scalar_subtract; pub mod search_sorted; pub mod slice; pub mod take; -pub mod filter_indices; pub trait ArrayCompute { fn as_arrow(&self) -> Option<&dyn AsArrowArray> { diff --git a/vortex-dtype/src/field_paths.rs b/vortex-dtype/src/field_paths.rs index 5b17c6f88b..a6dd3ed143 100644 --- a/vortex-dtype/src/field_paths.rs +++ b/vortex-dtype/src/field_paths.rs @@ -13,8 +13,7 @@ impl FieldPath { } pub fn first(&self) -> Option<&FieldIdentifier> { - self.field_names - .first() + self.field_names.first() } pub fn tail(&self) -> Option { From f316729797703c24c5881c47595f34b93913a18b Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Thu, 16 May 2024 18:24:37 +0100 Subject: [PATCH 03/17] nit --- vortex-array/src/array/primitive/compute/filter_indices.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index c36d31f3f6..363ea81b30 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -216,7 +216,8 @@ mod test { .flatten_primitive() .unwrap(); let filtered = filtered_primitive.typed_data::(); - assert_eq!(filtered, []) + let expected: [u64; 0] = []; + assert_eq!(filtered, expected) } #[test] From 7254956c2ac51a728f839a0e8790018f2f0c314d Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Thu, 16 May 2024 23:13:02 +0100 Subject: [PATCH 04/17] fixes --- Cargo.lock | 1 + vortex-array/Cargo.toml | 1 + .../array/primitive/compute/filter_indices.rs | 82 ++++++++----------- vortex-array/src/compute/filter_indices.rs | 4 +- vortex-dtype/src/field_paths.rs | 4 +- 5 files changed, 39 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d67e1be9c1..2dbd8ad6f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5029,6 +5029,7 @@ dependencies = [ "arrow-schema", "build-vortex", "criterion", + "croaring", "enum-iterator", "flatbuffers", "flexbuffers", diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 8f1de4b41f..e66ee5deb5 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -22,6 +22,7 @@ workspace = true arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-schema = { workspace = true } +croaring = { workspace = true } enum-iterator = { workspace = true } flatbuffers = { workspace = true } flexbuffers = { workspace = true } diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 363ea81b30..a4457a06cb 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -1,3 +1,4 @@ +use croaring::Bitmap; use itertools::Itertools; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; @@ -11,57 +12,50 @@ use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { - let conjunction_indices = predicate - .conjunctions - .iter() - .map(|conj| { - MergeOp::All( - conj.predicates - .iter() - .map(|pred| indices_matching_predicate(self, pred).unwrap()) - .map(|a| a.into_iter()) - .collect_vec(), - ) - .collect_vec() - .into_iter() - }) - .collect_vec(); - let indices = MergeOp::Any(conjunction_indices) - .enumerate() - .filter(|(_, v)| *v) - .map(|(idx, _)| idx as u64) - .collect_vec(); + let mut conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { + MergeOp::All( + &mut conj + .predicates + .iter() + .map(|pred| indices_matching_predicate(self, pred).unwrap()), + ) + .merge() + }); + let indices = MergeOp::Any(&mut conjunction_indices) + .merge() + .map(|bitmap| bitmap.iter().map(|idx| idx as u64).collect_vec()) + .unwrap_or(Vec::new()); Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) } } -fn indices_matching_predicate( - arr: &PrimitiveArray, - predicate: &Predicate, -) -> VortexResult> { - if predicate.left.first().is_some() { +fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { + if predicate.left.head().is_some() { vortex_bail!("Invalid path for primitive array") } let validity = arr.validity(); let rhs = match &predicate.right { Value::Field(_) => { - vortex_bail!("") + vortex_bail!("Right-hand-side fields not yet supported.") } Value::Literal(scalar) => scalar, }; - let matching_idxs = match_each_native_ptype!(arr.ptype(), |$T| { - let rhs_typed: $T = rhs.try_into().unwrap(); - let predicate_fn = get_predicate::<$T>(&predicate.op); + let matching_idxs: Vec = match_each_native_ptype!(arr.ptype(), |$T| { + let rhs_typed: $T = rhs.try_into().unwrap(); + let predicate_fn = get_predicate::<$T>(&predicate.op); + arr.typed_data::<$T>().iter().enumerate().filter(|(idx, &v)| { predicate_fn(&v, &rhs_typed) }) .filter(|(idx, _)| validity.is_valid(idx.clone())) - .map(|(idx, _)| idx ) + .map(|(idx, _)| idx as u32) .collect_vec() }); - let mut bitmap = vec![false; arr.len()]; - matching_idxs.into_iter().for_each(|idx| bitmap[idx] = true); + + let mut bitmap = Bitmap::with_container_capacity(arr.len() as u32); + + matching_idxs.into_iter().for_each(|idx| bitmap.add(idx)); Ok(bitmap) } @@ -78,26 +72,16 @@ fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { } /// Merge an arbitrary number of boolean iterators -enum MergeOp { - Any(Vec>), - All(Vec>), +enum MergeOp<'a> { + Any(&'a mut dyn Iterator), + All(&'a mut dyn Iterator), } -impl Iterator for MergeOp { - type Item = bool; - - fn next(&mut self) -> Option { - let zipped = match self { - MergeOp::Any(vecs) => vecs, - MergeOp::All(vecs) => vecs, - } - .iter_mut() - .map(|iter| iter.next()) - .collect::>>(); - +impl MergeOp<'_> { + fn merge(self) -> Option { match self { - MergeOp::Any(_) => zipped.map(|inner| inner.iter().any(|&v| v)), - MergeOp::All(_) => zipped.map(|inner| inner.iter().all(|&v| v)), + MergeOp::Any(bitmaps) => bitmaps.reduce(|a, b| a.or(&b)), + MergeOp::All(bitmaps) => bitmaps.reduce(|a, b| a.and(&b)), } } } diff --git a/vortex-array/src/compute/filter_indices.rs b/vortex-array/src/compute/filter_indices.rs index 30b5c4538a..0f399786eb 100644 --- a/vortex-array/src/compute/filter_indices.rs +++ b/vortex-array/src/compute/filter_indices.rs @@ -9,10 +9,10 @@ pub trait FilterIndicesFn { } pub fn filter_indices(array: &Array, predicate: &Disjunction) -> VortexResult { - if let Some(subtraction_result) = + if let Some(matching_indices) = array.with_dyn(|c| c.filter_indices().map(|t| t.filter_indices(predicate))) { - return subtraction_result; + return matching_indices; } // if filter is not implemented for the given array type, but the array has a numeric // DType, we can flatten the array and apply filter to the flattened primitive array diff --git a/vortex-dtype/src/field_paths.rs b/vortex-dtype/src/field_paths.rs index a6dd3ed143..00f2052fe7 100644 --- a/vortex-dtype/src/field_paths.rs +++ b/vortex-dtype/src/field_paths.rs @@ -12,12 +12,12 @@ impl FieldPath { FieldPathBuilder::default() } - pub fn first(&self) -> Option<&FieldIdentifier> { + pub fn head(&self) -> Option<&FieldIdentifier> { self.field_names.first() } pub fn tail(&self) -> Option { - if self.first().is_none() { + if self.head().is_none() { None } else { let new_field_names = self.field_names[1..self.field_names.len()].to_vec(); From 44f226c0702bf36db3cb0a99cc9df896d954a9ce Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Thu, 16 May 2024 23:14:54 +0100 Subject: [PATCH 05/17] nit --- .../src/array/primitive/compute/filter_indices.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index a4457a06cb..b28e06666b 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -13,7 +13,7 @@ use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { let mut conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { - MergeOp::All( + BitmapMergeOp::All( &mut conj .predicates .iter() @@ -21,7 +21,7 @@ impl FilterIndicesFn for PrimitiveArray { ) .merge() }); - let indices = MergeOp::Any(&mut conjunction_indices) + let indices = BitmapMergeOp::Any(&mut conjunction_indices) .merge() .map(|bitmap| bitmap.iter().map(|idx| idx as u64).collect_vec()) .unwrap_or(Vec::new()); @@ -71,17 +71,17 @@ fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { } } -/// Merge an arbitrary number of boolean iterators -enum MergeOp<'a> { +/// Merge an arbitrary number of bitmaps +enum BitmapMergeOp<'a> { Any(&'a mut dyn Iterator), All(&'a mut dyn Iterator), } -impl MergeOp<'_> { +impl BitmapMergeOp<'_> { fn merge(self) -> Option { match self { - MergeOp::Any(bitmaps) => bitmaps.reduce(|a, b| a.or(&b)), - MergeOp::All(bitmaps) => bitmaps.reduce(|a, b| a.and(&b)), + BitmapMergeOp::Any(bitmaps) => bitmaps.reduce(|a, b| a.or(&b)), + BitmapMergeOp::All(bitmaps) => bitmaps.reduce(|a, b| a.and(&b)), } } } From 655ea7a9615a1f26c7e907a1746553858d4fe80c Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Thu, 16 May 2024 23:33:11 +0100 Subject: [PATCH 06/17] comment --- vortex-array/src/array/primitive/compute/filter_indices.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index b28e06666b..69b31bbfde 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -49,10 +49,11 @@ fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> Vo predicate_fn(&v, &rhs_typed) }) .filter(|(idx, _)| validity.is_valid(idx.clone())) + //todo(@jcasale): 64-bit RoaringBitmap? .map(|(idx, _)| idx as u32) .collect_vec() }); - + //todo(@jcasale): 64-bit RoaringBitmap? let mut bitmap = Bitmap::with_container_capacity(arr.len() as u32); matching_idxs.into_iter().for_each(|idx| bitmap.add(idx)); From f9e9268df199801549e7629554168b3eccdd47b1 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 13:06:57 +0100 Subject: [PATCH 07/17] temp --- .../array/primitive/compute/filter_indices.rs | 132 +++++++++++------- 1 file changed, 78 insertions(+), 54 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 69b31bbfde..effce7d57f 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -1,4 +1,5 @@ -use croaring::Bitmap; +use std::ops::{BitAnd, BitOr}; +use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder}; use itertools::Itertools; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; @@ -13,27 +14,34 @@ use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { let mut conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { - BitmapMergeOp::All( + BooleanBufferMergeOp::All( &mut conj .predicates .iter() .map(|pred| indices_matching_predicate(self, pred).unwrap()), ) - .merge() + .merge() }); - let indices = BitmapMergeOp::Any(&mut conjunction_indices) + let indices = BooleanBufferMergeOp::Any(&mut conjunction_indices) .merge() - .map(|bitmap| bitmap.iter().map(|idx| idx as u64).collect_vec()) + .map(|bitset| bitset.iter() + .enumerate() + .flat_map(|(idx, v)| if v { + Some(idx as u64) + } else { + None + }) + .collect_vec()) .unwrap_or(Vec::new()); Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) } } -fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { +fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { if predicate.left.head().is_some() { vortex_bail!("Invalid path for primitive array") } - let validity = arr.validity(); + let rhs = match &predicate.right { Value::Field(_) => { vortex_bail!("Right-hand-side fields not yet supported.") @@ -41,24 +49,29 @@ fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> Vo Value::Literal(scalar) => scalar, }; - let matching_idxs: Vec = match_each_native_ptype!(arr.ptype(), |$T| { + let validity_buf = arr.validity() + .to_logical(arr.len()) + .to_present_null_buffer()?.into_inner(); + + let matching_idxs = match_each_native_ptype!(arr.ptype(), |$T| { let rhs_typed: $T = rhs.try_into().unwrap(); let predicate_fn = get_predicate::<$T>(&predicate.op); - - arr.typed_data::<$T>().iter().enumerate().filter(|(idx, &v)| { - predicate_fn(&v, &rhs_typed) - }) - .filter(|(idx, _)| validity.is_valid(idx.clone())) - //todo(@jcasale): 64-bit RoaringBitmap? - .map(|(idx, _)| idx as u32) - .collect_vec() + apply_predicate(arr.typed_data::<$T>(), &rhs_typed, predicate_fn) }); - //todo(@jcasale): 64-bit RoaringBitmap? - let mut bitmap = Bitmap::with_container_capacity(arr.len() as u32); - matching_idxs.into_iter().for_each(|idx| bitmap.add(idx)); + Ok(matching_idxs.bitand(&validity_buf)) +} - Ok(bitmap) +fn apply_predicate bool>(lhs: &[T], rhs: &T, f: F) -> BooleanBuffer { + let matches = lhs.iter() + .filter(|lhs| f(lhs, rhs)) + .enumerate() + .map(|(idx, _)| idx) + .collect_vec(); + let mut matching_idx_bitset = BooleanBufferBuilder::new(lhs.len()); + matching_idx_bitset.resize(lhs.len()); + matches.into_iter().for_each(|idx| matching_idx_bitset.set_bit(idx, true)); + matching_idx_bitset.finish() } fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { @@ -72,17 +85,21 @@ fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { } } -/// Merge an arbitrary number of bitmaps -enum BitmapMergeOp<'a> { - Any(&'a mut dyn Iterator), - All(&'a mut dyn Iterator), +/// Merge an arbitrary number of bitsets +enum BooleanBufferMergeOp<'a> { + Any(&'a mut dyn Iterator), + All(&'a mut dyn Iterator), } -impl BitmapMergeOp<'_> { - fn merge(self) -> Option { +impl BooleanBufferMergeOp<'_> { + fn merge(self) -> Option { match self { - BitmapMergeOp::Any(bitmaps) => bitmaps.reduce(|a, b| a.or(&b)), - BitmapMergeOp::All(bitmaps) => bitmaps.reduce(|a, b| a.and(&b)), + BooleanBufferMergeOp::Any(bitsets) => bitsets.reduce(|a, b| { + a.bitor(&b) + }), + BooleanBufferMergeOp::All(bitsets) => bitsets.reduce(|a, b| { + a.bitand(&b) + }), } } } @@ -104,7 +121,14 @@ mod test { #[test] fn test_basic_filter() { - let arr = PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9], Validity::AllValid); + let arr = PrimitiveArray::from_nullable_vec(vec![ + Some(1u32), Some(2), Some(3), Some(4), + None, + Some(5), Some(6), Some(7), Some(8), + None, + Some(9), + None, + ]); let field = FieldPathBuilder::new().build(); let filtered_primitive = apply_conjunctive_filter( @@ -113,9 +137,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [0u64, 1, 2, 3]); @@ -125,11 +149,11 @@ mod test { predicates: vec![field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); - assert_eq!(filtered, [5u64, 6, 7, 8]); + assert_eq!(filtered, [6u64, 7, 8, 10]); let filtered_primitive = apply_conjunctive_filter( &arr, @@ -137,11 +161,11 @@ mod test { predicates: vec![field.clone().eq(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); - assert_eq!(filtered, [4]); + assert_eq!(filtered, [5]); let filtered_primitive = apply_conjunctive_filter( &arr, @@ -149,11 +173,11 @@ mod test { predicates: vec![field.clone().gte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); - assert_eq!(filtered, [4u64, 5, 6, 7, 8]); + assert_eq!(filtered, [5u64, 6, 7, 8, 10]); let filtered_primitive = apply_conjunctive_filter( &arr, @@ -161,11 +185,11 @@ mod test { predicates: vec![field.clone().lte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); - assert_eq!(filtered, [0u64, 1, 2, 3, 4]); + assert_eq!(filtered, [0u64, 1, 2, 3, 5]); } #[test] @@ -179,9 +203,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [2, 3]) } @@ -197,9 +221,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); let expected: [u64; 0] = []; assert_eq!(filtered, expected) From daf268ec88dab753ddc8f2ceab47dc621dfeb1a6 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 13:19:07 +0100 Subject: [PATCH 08/17] mergeops uses booleanbuffer and tree_fold --- .../array/primitive/compute/filter_indices.rs | 127 +++++++++--------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index effce7d57f..cdc4b1c039 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -1,5 +1,6 @@ use std::ops::{BitAnd, BitOr}; -use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder}; + +use arrow_buffer::BooleanBuffer; use itertools::Itertools; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; @@ -14,30 +15,31 @@ use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { let mut conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { - BooleanBufferMergeOp::All( + BooleanBufferMergeOp::All.merge( &mut conj .predicates .iter() .map(|pred| indices_matching_predicate(self, pred).unwrap()), ) - .merge() }); - let indices = BooleanBufferMergeOp::Any(&mut conjunction_indices) - .merge() - .map(|bitset| bitset.iter() - .enumerate() - .flat_map(|(idx, v)| if v { - Some(idx as u64) - } else { - None - }) - .collect_vec()) - .unwrap_or(Vec::new()); + let indices = BooleanBufferMergeOp::Any + .merge(&mut conjunction_indices) + .map(|bitset| { + bitset + .iter() + .enumerate() + .flat_map(|(idx, v)| if v { Some(idx as u64) } else { None }) + .collect_vec() + }) + .unwrap_or_default(); Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) } } -fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { +fn indices_matching_predicate( + arr: &PrimitiveArray, + predicate: &Predicate, +) -> VortexResult { if predicate.left.head().is_some() { vortex_bail!("Invalid path for primitive array") } @@ -49,9 +51,11 @@ fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> Vo Value::Literal(scalar) => scalar, }; - let validity_buf = arr.validity() + let present_buf = arr + .validity() .to_logical(arr.len()) - .to_present_null_buffer()?.into_inner(); + .to_present_null_buffer()? + .into_inner(); let matching_idxs = match_each_native_ptype!(arr.ptype(), |$T| { let rhs_typed: $T = rhs.try_into().unwrap(); @@ -59,19 +63,16 @@ fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> Vo apply_predicate(arr.typed_data::<$T>(), &rhs_typed, predicate_fn) }); - Ok(matching_idxs.bitand(&validity_buf)) + Ok(matching_idxs.bitand(&present_buf)) } -fn apply_predicate bool>(lhs: &[T], rhs: &T, f: F) -> BooleanBuffer { - let matches = lhs.iter() - .filter(|lhs| f(lhs, rhs)) - .enumerate() - .map(|(idx, _)| idx) - .collect_vec(); - let mut matching_idx_bitset = BooleanBufferBuilder::new(lhs.len()); - matching_idx_bitset.resize(lhs.len()); - matches.into_iter().for_each(|idx| matching_idx_bitset.set_bit(idx, true)); - matching_idx_bitset.finish() +fn apply_predicate bool>( + lhs: &[T], + rhs: &T, + f: F, +) -> BooleanBuffer { + let matches = lhs.iter().map(|lhs| f(lhs, rhs)); + BooleanBuffer::from_iter(matches) } fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { @@ -86,20 +87,16 @@ fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { } /// Merge an arbitrary number of bitsets -enum BooleanBufferMergeOp<'a> { - Any(&'a mut dyn Iterator), - All(&'a mut dyn Iterator), +enum BooleanBufferMergeOp { + Any, + All, } -impl BooleanBufferMergeOp<'_> { - fn merge(self) -> Option { +impl BooleanBufferMergeOp { + fn merge(self, bitsets: &mut dyn Iterator) -> Option { match self { - BooleanBufferMergeOp::Any(bitsets) => bitsets.reduce(|a, b| { - a.bitor(&b) - }), - BooleanBufferMergeOp::All(bitsets) => bitsets.reduce(|a, b| { - a.bitand(&b) - }), + BooleanBufferMergeOp::Any => bitsets.tree_fold1(|a, b| a.bitor(&b)), + BooleanBufferMergeOp::All => bitsets.tree_fold1(|a, b| a.bitand(&b)), } } } @@ -122,9 +119,15 @@ mod test { #[test] fn test_basic_filter() { let arr = PrimitiveArray::from_nullable_vec(vec![ - Some(1u32), Some(2), Some(3), Some(4), + Some(1u32), + Some(2), + Some(3), + Some(4), None, - Some(5), Some(6), Some(7), Some(8), + Some(5), + Some(6), + Some(7), + Some(8), None, Some(9), None, @@ -137,9 +140,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [0u64, 1, 2, 3]); @@ -149,9 +152,9 @@ mod test { predicates: vec![field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [6u64, 7, 8, 10]); @@ -161,9 +164,9 @@ mod test { predicates: vec![field.clone().eq(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [5]); @@ -173,9 +176,9 @@ mod test { predicates: vec![field.clone().gte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [5u64, 6, 7, 8, 10]); @@ -185,9 +188,9 @@ mod test { predicates: vec![field.clone().lte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [0u64, 1, 2, 3, 5]); } @@ -203,9 +206,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [2, 3]) } @@ -221,9 +224,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); let expected: [u64; 0] = []; assert_eq!(filtered, expected) From c31925deffb395219ef4187a988055e7a1abb0c6 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 13:22:03 +0100 Subject: [PATCH 09/17] nit --- vortex-array/src/array/primitive/compute/filter_indices.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index cdc4b1c039..9234176346 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -168,7 +168,7 @@ mod test { .flatten_primitive() .unwrap(); let filtered = filtered_primitive.typed_data::(); - assert_eq!(filtered, [5]); + assert_eq!(filtered, [5u64]); let filtered_primitive = apply_conjunctive_filter( &arr, @@ -210,7 +210,7 @@ mod test { .flatten_primitive() .unwrap(); let filtered = filtered_primitive.typed_data::(); - assert_eq!(filtered, [2, 3]) + assert_eq!(filtered, [2u64, 3]) } #[test] From 8c3a2119f9fc7596821f76c7fa4d4790f94e51c5 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 14:04:52 +0100 Subject: [PATCH 10/17] use bitset instead of booleanbuffer because allocations --- .../array/primitive/compute/filter_indices.rs | 69 ++++++++++--------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 9234176346..583f742590 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -1,6 +1,6 @@ -use std::ops::{BitAnd, BitOr}; +use std::ops::{BitAndAssign, BitOrAssign}; -use arrow_buffer::BooleanBuffer; +use croaring::Bitset; use itertools::Itertools; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; @@ -15,31 +15,35 @@ use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { let mut conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { - BooleanBufferMergeOp::All.merge( + BitsetMergeOp::All.merge( &mut conj .predicates .iter() .map(|pred| indices_matching_predicate(self, pred).unwrap()), ) }); - let indices = BooleanBufferMergeOp::Any + let present_buf = Bitset::from_iter( + self.validity() + .to_logical(self.len()) + .to_present_null_buffer()? + .into_inner() + .set_indices(), + ); + + let indices = BitsetMergeOp::Any .merge(&mut conjunction_indices) - .map(|bitset| { + .map(|mut bitset| { + bitset.bitand_assign(&present_buf); bitset - .iter() - .enumerate() - .flat_map(|(idx, v)| if v { Some(idx as u64) } else { None }) - .collect_vec() }) + .map(|bitset| bitset.iter().map(|idx| idx as u64).collect_vec()) .unwrap_or_default(); + Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) } } -fn indices_matching_predicate( - arr: &PrimitiveArray, - predicate: &Predicate, -) -> VortexResult { +fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { if predicate.left.head().is_some() { vortex_bail!("Invalid path for primitive array") } @@ -50,29 +54,22 @@ fn indices_matching_predicate( } Value::Literal(scalar) => scalar, }; - - let present_buf = arr - .validity() - .to_logical(arr.len()) - .to_present_null_buffer()? - .into_inner(); - let matching_idxs = match_each_native_ptype!(arr.ptype(), |$T| { let rhs_typed: $T = rhs.try_into().unwrap(); let predicate_fn = get_predicate::<$T>(&predicate.op); apply_predicate(arr.typed_data::<$T>(), &rhs_typed, predicate_fn) }); - Ok(matching_idxs.bitand(&present_buf)) + Ok(matching_idxs) } -fn apply_predicate bool>( - lhs: &[T], - rhs: &T, - f: F, -) -> BooleanBuffer { - let matches = lhs.iter().map(|lhs| f(lhs, rhs)); - BooleanBuffer::from_iter(matches) +fn apply_predicate bool>(lhs: &[T], rhs: &T, f: F) -> Bitset { + let matches = lhs + .iter() + .map(|lhs| f(lhs, rhs)) + .enumerate() + .flat_map(|(idx, v)| if v { Some(idx) } else { None }); + Bitset::from_iter(matches) } fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { @@ -87,16 +84,22 @@ fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { } /// Merge an arbitrary number of bitsets -enum BooleanBufferMergeOp { +enum BitsetMergeOp { Any, All, } -impl BooleanBufferMergeOp { - fn merge(self, bitsets: &mut dyn Iterator) -> Option { +impl BitsetMergeOp { + fn merge(self, bitsets: &mut dyn Iterator) -> Option { match self { - BooleanBufferMergeOp::Any => bitsets.tree_fold1(|a, b| a.bitor(&b)), - BooleanBufferMergeOp::All => bitsets.tree_fold1(|a, b| a.bitand(&b)), + BitsetMergeOp::Any => bitsets.tree_fold1(|mut a, b| { + a.bitor_assign(&b); + a + }), + BitsetMergeOp::All => bitsets.tree_fold1(|mut a, b| { + a.bitand_assign(&b); + a + }), } } } From 9dc3ae3b9ecabbb5b40b7ea01552ca0ade74ced2 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 16:09:15 +0100 Subject: [PATCH 11/17] temp --- Cargo.lock | 1 - vortex-array/Cargo.toml | 1 - .../array/primitive/compute/filter_indices.rs | 120 +++++++----------- 3 files changed, 49 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2dbd8ad6f8..d67e1be9c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5029,7 +5029,6 @@ dependencies = [ "arrow-schema", "build-vortex", "criterion", - "croaring", "enum-iterator", "flatbuffers", "flexbuffers", diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index e66ee5deb5..8f1de4b41f 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -22,7 +22,6 @@ workspace = true arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-schema = { workspace = true } -croaring = { workspace = true } enum-iterator = { workspace = true } flatbuffers = { workspace = true } flexbuffers = { workspace = true } diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 583f742590..55b179f68b 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -1,6 +1,7 @@ -use std::ops::{BitAndAssign, BitOrAssign}; +use std::ops::{BitAnd, BitOr}; + +use arrow_buffer::BooleanBuffer; -use croaring::Bitset; use itertools::Itertools; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; @@ -14,36 +15,35 @@ use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { - let mut conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { - BitsetMergeOp::All.merge( - &mut conj - .predicates - .iter() - .map(|pred| indices_matching_predicate(self, pred).unwrap()), - ) + let conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { + conj + .predicates + .iter() + .map(|pred| indices_matching_predicate(self, pred).unwrap()) + .reduce(|a, b| { + a.bitand(&b) + }) }); - let present_buf = Bitset::from_iter( - self.validity() - .to_logical(self.len()) - .to_present_null_buffer()? - .into_inner() - .set_indices(), - ); - - let indices = BitsetMergeOp::Any - .merge(&mut conjunction_indices) - .map(|mut bitset| { - bitset.bitand_assign(&present_buf); - bitset + let present_buf = self.validity() + .to_logical(self.len()) + .to_present_null_buffer()? + .into_inner(); + + let indices = conjunction_indices + .reduce(|a, b| { + a.bitor(&b) + }) + .map(|bitset| { + bitset.bitand(&present_buf) }) .map(|bitset| bitset.iter().map(|idx| idx as u64).collect_vec()) .unwrap_or_default(); - + Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) } } -fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { +fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { if predicate.left.head().is_some() { vortex_bail!("Invalid path for primitive array") } @@ -63,13 +63,12 @@ fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> Vo Ok(matching_idxs) } -fn apply_predicate bool>(lhs: &[T], rhs: &T, f: F) -> Bitset { +fn apply_predicate bool>(lhs: &[T], rhs: &T, f: F) -> BooleanBuffer { let matches = lhs .iter() - .map(|lhs| f(lhs, rhs)) - .enumerate() - .flat_map(|(idx, v)| if v { Some(idx) } else { None }); - Bitset::from_iter(matches) + .map(|lhs| f(lhs, rhs)); + + BooleanBuffer::from_iter(matches) } fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { @@ -83,27 +82,6 @@ fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { } } -/// Merge an arbitrary number of bitsets -enum BitsetMergeOp { - Any, - All, -} - -impl BitsetMergeOp { - fn merge(self, bitsets: &mut dyn Iterator) -> Option { - match self { - BitsetMergeOp::Any => bitsets.tree_fold1(|mut a, b| { - a.bitor_assign(&b); - a - }), - BitsetMergeOp::All => bitsets.tree_fold1(|mut a, b| { - a.bitand_assign(&b); - a - }), - } - } -} - #[cfg(test)] mod test { use vortex_dtype::field_paths::FieldPathBuilder; @@ -143,9 +121,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [0u64, 1, 2, 3]); @@ -155,9 +133,9 @@ mod test { predicates: vec![field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [6u64, 7, 8, 10]); @@ -167,9 +145,9 @@ mod test { predicates: vec![field.clone().eq(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [5u64]); @@ -179,9 +157,9 @@ mod test { predicates: vec![field.clone().gte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [5u64, 6, 7, 8, 10]); @@ -191,9 +169,9 @@ mod test { predicates: vec![field.clone().lte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [0u64, 1, 2, 3, 5]); } @@ -209,9 +187,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); assert_eq!(filtered, [2u64, 3]) } @@ -227,9 +205,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); + .unwrap() + .flatten_primitive() + .unwrap(); let filtered = filtered_primitive.typed_data::(); let expected: [u64; 0] = []; assert_eq!(filtered, expected) From 08720d4183457ac69fd4be37f4f1c1544c5a2840 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 16:19:01 +0100 Subject: [PATCH 12/17] use booleanbuffers (again), return boolarray --- .../array/primitive/compute/filter_indices.rs | 121 +++++++++--------- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 55b179f68b..f024682c30 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -1,49 +1,43 @@ use std::ops::{BitAnd, BitOr}; use arrow_buffer::BooleanBuffer; - -use itertools::Itertools; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; use vortex_expr::expressions::{Disjunction, Predicate, Value}; use vortex_expr::operators::Operator; +use crate::array::bool::BoolArray; use crate::array::primitive::PrimitiveArray; use crate::compute::filter_indices::FilterIndicesFn; -use crate::validity::Validity; use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { let conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { - conj - .predicates + conj.predicates .iter() .map(|pred| indices_matching_predicate(self, pred).unwrap()) - .reduce(|a, b| { - a.bitand(&b) - }) + .reduce(|a, b| a.bitand(&b)) }); - let present_buf = self.validity() + let present_buf = self + .validity() .to_logical(self.len()) .to_present_null_buffer()? .into_inner(); - let indices = conjunction_indices - .reduce(|a, b| { - a.bitor(&b) - }) - .map(|bitset| { - bitset.bitand(&present_buf) - }) - .map(|bitset| bitset.iter().map(|idx| idx as u64).collect_vec()) - .unwrap_or_default(); - - Ok(PrimitiveArray::from_vec(indices, Validity::AllValid).into_array()) + let bitset = conjunction_indices + .reduce(|a, b| a.bitor(&b)) + .map(|bitset| bitset.bitand(&present_buf)) + .unwrap_or_else(|| BooleanBuffer::new_set(self.len())); + + Ok(BoolArray::from(bitset).into_array()) } } -fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> VortexResult { +fn indices_matching_predicate( + arr: &PrimitiveArray, + predicate: &Predicate, +) -> VortexResult { if predicate.left.head().is_some() { vortex_bail!("Invalid path for primitive array") } @@ -63,10 +57,12 @@ fn indices_matching_predicate(arr: &PrimitiveArray, predicate: &Predicate) -> Vo Ok(matching_idxs) } -fn apply_predicate bool>(lhs: &[T], rhs: &T, f: F) -> BooleanBuffer { - let matches = lhs - .iter() - .map(|lhs| f(lhs, rhs)); +fn apply_predicate bool>( + lhs: &[T], + rhs: &T, + f: F, +) -> BooleanBuffer { + let matches = lhs.iter().map(|lhs| f(lhs, rhs)); BooleanBuffer::from_iter(matches) } @@ -84,6 +80,7 @@ fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { #[cfg(test)] mod test { + use itertools::Itertools; use vortex_dtype::field_paths::FieldPathBuilder; use vortex_expr::expressions::{lit, Conjunction}; use vortex_expr::field_paths::FieldPathOperations; @@ -97,6 +94,16 @@ mod test { }) } + fn to_int_indices(filtered_primitive: BoolArray) -> Vec { + let filtered = filtered_primitive + .boolean_buffer() + .iter() + .enumerate() + .flat_map(|(idx, v)| if v { Some(idx as u64) } else { None }) + .collect_vec(); + filtered + } + #[test] fn test_basic_filter() { let arr = PrimitiveArray::from_nullable_vec(vec![ @@ -121,10 +128,10 @@ mod test { predicates: vec![field.clone().lt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + .unwrap() + .flatten_bool() + .unwrap(); + let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3]); let filtered_primitive = apply_conjunctive_filter( @@ -133,10 +140,10 @@ mod test { predicates: vec![field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + .unwrap() + .flatten_bool() + .unwrap(); + let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [6u64, 7, 8, 10]); let filtered_primitive = apply_conjunctive_filter( @@ -145,10 +152,10 @@ mod test { predicates: vec![field.clone().eq(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + .unwrap() + .flatten_bool() + .unwrap(); + let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [5u64]); let filtered_primitive = apply_conjunctive_filter( @@ -157,10 +164,10 @@ mod test { predicates: vec![field.clone().gte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + .unwrap() + .flatten_bool() + .unwrap(); + let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [5u64, 6, 7, 8, 10]); let filtered_primitive = apply_conjunctive_filter( @@ -169,10 +176,10 @@ mod test { predicates: vec![field.clone().lte(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + .unwrap() + .flatten_bool() + .unwrap(); + let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3, 5]); } @@ -187,10 +194,10 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + .unwrap() + .flatten_bool() + .unwrap(); + let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [2u64, 3]) } @@ -205,10 +212,10 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + .unwrap() + .flatten_bool() + .unwrap(); + let filtered = to_int_indices(filtered_primitive); let expected: [u64; 0] = []; assert_eq!(filtered, expected) } @@ -228,12 +235,8 @@ mod test { let disj = Disjunction { conjunctions: vec![c1, c2], }; - let filtered_primitive = arr - .filter_indices(&disj) - .unwrap() - .flatten_primitive() - .unwrap(); - let filtered = filtered_primitive.typed_data::(); + let filtered_primitive = arr.filter_indices(&disj).unwrap().flatten_bool().unwrap(); + let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3, 5, 6, 7, 8, 9]) } } From 5fbaa2addc105cd13a7426bb349a5c0a20ebf152 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 16:37:17 +0100 Subject: [PATCH 13/17] nit --- .../array/primitive/compute/filter_indices.rs | 20 +++++-------------- vortex-expr/src/operators.rs | 13 ++++++++++++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index f024682c30..9eb4f516d7 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -4,7 +4,6 @@ use arrow_buffer::BooleanBuffer; use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_error::{vortex_bail, VortexResult}; use vortex_expr::expressions::{Disjunction, Predicate, Value}; -use vortex_expr::operators::Operator; use crate::array::bool::BoolArray; use crate::array::primitive::PrimitiveArray; @@ -44,13 +43,16 @@ fn indices_matching_predicate( let rhs = match &predicate.right { Value::Field(_) => { - vortex_bail!("Right-hand-side fields not yet supported.") + vortex_bail!( + "Cannot apply predicate with right-hand-side reference to primitive array." + ) } Value::Literal(scalar) => scalar, }; + let matching_idxs = match_each_native_ptype!(arr.ptype(), |$T| { let rhs_typed: $T = rhs.try_into().unwrap(); - let predicate_fn = get_predicate::<$T>(&predicate.op); + let predicate_fn = &predicate.op.to_predicate::<$T>(); apply_predicate(arr.typed_data::<$T>(), &rhs_typed, predicate_fn) }); @@ -63,21 +65,9 @@ fn apply_predicate bool>( f: F, ) -> BooleanBuffer { let matches = lhs.iter().map(|lhs| f(lhs, rhs)); - BooleanBuffer::from_iter(matches) } -fn get_predicate(op: &Operator) -> fn(&T, &T) -> bool { - match op { - Operator::EqualTo => PartialEq::eq, - Operator::NotEqualTo => PartialEq::ne, - Operator::GreaterThan => PartialOrd::gt, - Operator::GreaterThanOrEqualTo => PartialOrd::ge, - Operator::LessThan => PartialOrd::lt, - Operator::LessThanOrEqualTo => PartialOrd::le, - } -} - #[cfg(test)] mod test { use itertools::Itertools; diff --git a/vortex-expr/src/operators.rs b/vortex-expr/src/operators.rs index da762dc586..b6c6c289af 100644 --- a/vortex-expr/src/operators.rs +++ b/vortex-expr/src/operators.rs @@ -1,5 +1,7 @@ use std::ops; +use vortex_dtype::NativePType; + use crate::expressions::Predicate; #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd)] @@ -45,4 +47,15 @@ impl Operator { Operator::LessThanOrEqualTo => Operator::GreaterThan, } } + + pub fn to_predicate(&self) -> fn(&T, &T) -> bool { + match self { + Operator::EqualTo => PartialEq::eq, + Operator::NotEqualTo => PartialEq::ne, + Operator::GreaterThan => PartialOrd::gt, + Operator::GreaterThanOrEqualTo => PartialOrd::ge, + Operator::LessThan => PartialOrd::lt, + Operator::LessThanOrEqualTo => PartialOrd::le, + } + } } From affde69569ff59c880409432aa8d2915db0f1104 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 16:39:42 +0100 Subject: [PATCH 14/17] nit --- .../array/primitive/compute/filter_indices.rs | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 9eb4f516d7..e4ca2377fc 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -44,7 +44,7 @@ fn indices_matching_predicate( let rhs = match &predicate.right { Value::Field(_) => { vortex_bail!( - "Cannot apply predicate with right-hand-side reference to primitive array." + "Cannot apply predicate with right-hand-side field reference to primitive array." ) } Value::Literal(scalar) => scalar, @@ -118,9 +118,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3]); @@ -130,9 +130,9 @@ mod test { predicates: vec![field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [6u64, 7, 8, 10]); @@ -142,9 +142,9 @@ mod test { predicates: vec![field.clone().eq(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [5u64]); @@ -154,9 +154,9 @@ mod test { predicates: vec![field.clone().gte(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [5u64, 6, 7, 8, 10]); @@ -166,9 +166,9 @@ mod test { predicates: vec![field.clone().lte(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3, 5]); } @@ -184,9 +184,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [2u64, 3]) } @@ -202,9 +202,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); let expected: [u64; 0] = []; assert_eq!(filtered, expected) From 3f1c16a40ab82a57cc738b8892e70ee1c0b8a45f Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 16:41:52 +0100 Subject: [PATCH 15/17] nit --- .../array/primitive/compute/filter_indices.rs | 60 +++++++++++-------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index e4ca2377fc..94160880c1 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -43,9 +43,7 @@ fn indices_matching_predicate( let rhs = match &predicate.right { Value::Field(_) => { - vortex_bail!( - "Cannot apply predicate with right-hand-side field reference to primitive array." - ) + vortex_bail!("Cannot apply field reference to primitive array") } Value::Literal(scalar) => scalar, }; @@ -118,9 +116,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3]); @@ -130,9 +128,9 @@ mod test { predicates: vec![field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [6u64, 7, 8, 10]); @@ -142,9 +140,9 @@ mod test { predicates: vec![field.clone().eq(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [5u64]); @@ -154,9 +152,9 @@ mod test { predicates: vec![field.clone().gte(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [5u64, 6, 7, 8, 10]); @@ -166,9 +164,9 @@ mod test { predicates: vec![field.clone().lte(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3, 5]); } @@ -184,9 +182,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [2u64, 3]) } @@ -202,9 +200,9 @@ mod test { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], }, ) - .unwrap() - .flatten_bool() - .unwrap(); + .unwrap() + .flatten_bool() + .unwrap(); let filtered = to_int_indices(filtered_primitive); let expected: [u64; 0] = []; assert_eq!(filtered, expected) @@ -229,4 +227,18 @@ mod test { let filtered = to_int_indices(filtered_primitive); assert_eq!(filtered, [0u64, 1, 2, 3, 5, 6, 7, 8, 9]) } + + #[test] + fn test_invalid_path_err() { + let arr = + PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); + let field = FieldPathBuilder::new().join("some_field").build(); + let filtered_primitive = apply_conjunctive_filter( + &arr, + Conjunction { + predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], + }, + ) + .expect_err("Cannot apply field reference to primitive array"); + } } From 09b28b8b96e80d288846fc5af23ebd489569d33b Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 17 May 2024 17:01:43 +0100 Subject: [PATCH 16/17] nit --- .../array/primitive/compute/filter_indices.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index 94160880c1..a49834d5d3 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -12,11 +12,12 @@ use crate::{Array, ArrayTrait, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, predicate: &Disjunction) -> VortexResult { - let conjunction_indices = predicate.conjunctions.iter().flat_map(|conj| { + let conjunction_indices = predicate.conjunctions.iter().map(|conj| { conj.predicates .iter() - .map(|pred| indices_matching_predicate(self, pred).unwrap()) - .reduce(|a, b| a.bitand(&b)) + .map(|pred| indices_matching_predicate(self, pred)) + .reduce(|a, b| Ok(a?.bitand(&b?))) + .unwrap() }); let present_buf = self .validity() @@ -24,12 +25,12 @@ impl FilterIndicesFn for PrimitiveArray { .to_present_null_buffer()? .into_inner(); - let bitset = conjunction_indices - .reduce(|a, b| a.bitor(&b)) - .map(|bitset| bitset.bitand(&present_buf)) - .unwrap_or_else(|| BooleanBuffer::new_set(self.len())); + let bitset: VortexResult = conjunction_indices + .reduce(|a, b| Ok(a?.bitor(&b?))) + .map(|bitset| Ok(bitset?.bitand(&present_buf))) + .unwrap_or_else(|| Ok(BooleanBuffer::new_set(self.len()))); - Ok(BoolArray::from(bitset).into_array()) + Ok(BoolArray::from(bitset?).into_array()) } } @@ -233,7 +234,7 @@ mod test { let arr = PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid); let field = FieldPathBuilder::new().join("some_field").build(); - let filtered_primitive = apply_conjunctive_filter( + apply_conjunctive_filter( &arr, Conjunction { predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))], From 180af051c20a80c38b23e6ebb90d53eb1db2c0b5 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Mon, 20 May 2024 10:27:19 +0100 Subject: [PATCH 17/17] benchmark --- vortex-array/Cargo.toml | 4 +++ vortex-array/benches/filter_indices.rs | 38 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 vortex-array/benches/filter_indices.rs diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 8f1de4b41f..838b5e105a 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -58,3 +58,7 @@ harness = false [[bench]] name = "scalar_subtract" harness = false + +[[bench]] +name = "filter_indices" +harness = false \ No newline at end of file diff --git a/vortex-array/benches/filter_indices.rs b/vortex-array/benches/filter_indices.rs new file mode 100644 index 0000000000..7f5e7127ff --- /dev/null +++ b/vortex-array/benches/filter_indices.rs @@ -0,0 +1,38 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use itertools::Itertools; +use rand::distributions::Uniform; +use rand::{thread_rng, Rng}; +use vortex::IntoArray; +use vortex_dtype::field_paths::FieldPath; +use vortex_error::VortexError; +use vortex_expr::expressions::{lit, Conjunction, Disjunction}; +use vortex_expr::field_paths::FieldPathOperations; + +fn filter_indices(c: &mut Criterion) { + let mut group = c.benchmark_group("filter_indices"); + + let mut rng = thread_rng(); + let range = Uniform::new(0i64, 100_000_000); + let arr = (0..10_000_000) + .map(|_| rng.sample(range)) + .collect_vec() + .into_array(); + + let predicate = Disjunction { + conjunctions: vec![Conjunction { + predicates: vec![FieldPath::builder().build().lt(lit(50_000_000i64))], + }], + }; + + group.bench_function("vortex", |b| { + b.iter(|| { + let indices = + vortex::compute::filter_indices::filter_indices(&arr, &predicate).unwrap(); + black_box(indices); + Ok::<(), VortexError>(()) + }); + }); +} + +criterion_group!(benches, filter_indices); +criterion_main!(benches);