-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add FilterFn trait + default implementation #458
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
use arrow_array::cast::AsArray; | ||
use vortex_dtype::{DType, Nullability}; | ||
use vortex_error::VortexResult; | ||
|
||
use crate::arrow::FromArrowArray; | ||
use crate::{Array, ArrayDType, ArrayData, IntoArray, IntoCanonical}; | ||
|
||
pub trait FilterFn { | ||
/// Filter an array by the provided predicate. | ||
fn filter(&self, predicate: &Array) -> Array; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about moving to this taking a |
||
} | ||
|
||
/// Return a new array by applying a boolean predicate to select items from a base Array. | ||
/// | ||
/// # Performance | ||
/// | ||
/// This function attempts to amortize the cost of copying | ||
/// | ||
/// # Panics | ||
/// | ||
/// The `predicate` must receive an Array with type non-nullable bool, and will panic if this is | ||
/// not the case. | ||
pub fn filter(array: &Array, predicate: &Array) -> VortexResult<Array> { | ||
assert_eq!( | ||
predicate.dtype(), | ||
&DType::Bool(Nullability::NonNullable), | ||
"predicate must be non-nullable bool" | ||
); | ||
assert_eq!( | ||
predicate.len(), | ||
array.len(), | ||
"predicate.len() must equal array.len()" | ||
); | ||
|
||
array.with_dyn(|a| { | ||
if let Some(ref filter_fn) = a.filter() { | ||
Ok(filter_fn.filter(array)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops, yes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ill fix it in a follow up. |
||
} else { | ||
// Fallback: implement using Arrow kernels. | ||
let array_ref = array.clone().into_canonical()?.into_arrow(); | ||
let predicate_ref = predicate.clone().into_canonical()?.into_arrow(); | ||
let filtered = | ||
arrow_select::filter::filter(array_ref.as_ref(), predicate_ref.as_boolean())?; | ||
|
||
Ok(ArrayData::from_arrow(filtered, array.dtype().is_nullable()).into_array()) | ||
} | ||
}) | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use crate::array::bool::BoolArray; | ||
use crate::array::primitive::PrimitiveArray; | ||
use crate::compute::filter::filter; | ||
use crate::validity::Validity; | ||
use crate::{IntoArray, IntoCanonical}; | ||
|
||
#[test] | ||
fn test_filter() { | ||
let items = | ||
PrimitiveArray::from_nullable_vec(vec![Some(0i32), None, Some(1i32), None, Some(2i32)]) | ||
.into_array(); | ||
let predicate = | ||
BoolArray::from_vec(vec![true, false, true, false, true], Validity::NonNullable) | ||
.into_array(); | ||
|
||
let filtered = filter(&items, &predicate).unwrap(); | ||
assert_eq!( | ||
filtered | ||
.into_canonical() | ||
.unwrap() | ||
.into_primitive() | ||
.unwrap() | ||
.into_maybe_null_slice::<i32>(), | ||
vec![0i32, 1i32, 2i32] | ||
); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these were cleanups to make
cargo doc -p vortex-array
complete cleanly