-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Basic predicate pushdown support for Datafusion (#472)
Enables basic support for predicate pushdown over in-memory vortex arrays for `eq` operations under fairly limited conditions.
- Loading branch information
Showing
17 changed files
with
203 additions
and
161 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,32 @@ | ||
use vortex_dtype::DType; | ||
use vortex_error::{vortex_err, VortexResult}; | ||
use arrow_ord::cmp; | ||
use vortex_error::VortexResult; | ||
use vortex_expr::Operator; | ||
|
||
use crate::{Array, ArrayDType, IntoArrayVariant}; | ||
use crate::{arrow::FromArrowArray, Array, ArrayData, IntoArray, IntoCanonical}; | ||
|
||
pub trait CompareFn { | ||
fn compare(&self, array: &Array, predicate: Operator) -> VortexResult<Array>; | ||
fn compare(&self, array: &Array, operator: Operator) -> VortexResult<Array>; | ||
} | ||
|
||
pub fn compare(left: &Array, right: &Array, operator: Operator) -> VortexResult<Array> { | ||
if let Some(matching_indices) = | ||
left.with_dyn(|lhs| lhs.compare().map(|rhs| rhs.compare(right, operator))) | ||
if let Some(selection) = | ||
left.with_dyn(|lhs| lhs.compare().map(|lhs| lhs.compare(right, operator))) | ||
{ | ||
return matching_indices; | ||
return selection; | ||
} | ||
|
||
// if compare is not implemented for the given array type, but the array has a numeric | ||
// DType, we can flatten the array and apply filter to the flattened primitive array | ||
match left.dtype() { | ||
DType::Primitive(..) => { | ||
let flat = left.clone().into_primitive()?; | ||
flat.compare(right, operator) | ||
} | ||
_ => Err(vortex_err!( | ||
NotImplemented: "compare", | ||
left.encoding().id() | ||
)), | ||
} | ||
// Fallback to arrow on canonical types | ||
let lhs = left.clone().into_canonical()?.into_arrow(); | ||
let rhs = right.clone().into_canonical()?.into_arrow(); | ||
|
||
let array = match operator { | ||
Operator::Eq => cmp::eq(&lhs.as_ref(), &rhs.as_ref())?, | ||
Operator::NotEq => cmp::neq(&lhs.as_ref(), &rhs.as_ref())?, | ||
Operator::Gt => cmp::gt(&lhs.as_ref(), &rhs.as_ref())?, | ||
Operator::Gte => cmp::gt_eq(&lhs.as_ref(), &rhs.as_ref())?, | ||
Operator::Lt => cmp::lt(&lhs.as_ref(), &rhs.as_ref())?, | ||
Operator::Lte => cmp::lt_eq(&lhs.as_ref(), &rhs.as_ref())?, | ||
}; | ||
|
||
Ok(ArrayData::from_arrow(&array, true).into_array()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
use datafusion_expr::{Expr, Operator as DFOperator}; | ||
use vortex::{ | ||
array::{bool::BoolArray, constant::ConstantArray}, | ||
compute::compare, | ||
Array, IntoArray, IntoArrayVariant, | ||
}; | ||
use vortex_error::{vortex_bail, vortex_err, VortexResult}; | ||
use vortex_expr::Operator; | ||
|
||
pub struct ExpressionEvaluator; | ||
|
||
impl ExpressionEvaluator { | ||
pub fn eval(array: Array, expr: &Expr) -> VortexResult<Array> { | ||
match expr { | ||
Expr::BinaryExpr(expr) => { | ||
let lhs = expr.left.as_ref(); | ||
let rhs = expr.right.as_ref(); | ||
|
||
// TODO(adamg): turn and/or into more general compute functions | ||
match expr.op { | ||
DFOperator::And => { | ||
let lhs = ExpressionEvaluator::eval(array.clone(), lhs)?.into_bool()?; | ||
let rhs = ExpressionEvaluator::eval(array, rhs)?.into_bool()?; | ||
let buffer = &lhs.boolean_buffer() & &rhs.boolean_buffer(); | ||
Ok(BoolArray::from(buffer).into_array()) | ||
} | ||
DFOperator::Or => { | ||
let lhs = ExpressionEvaluator::eval(array.clone(), lhs)?.into_bool()?; | ||
let rhs = ExpressionEvaluator::eval(array.clone(), rhs)?.into_bool()?; | ||
let buffer = &lhs.boolean_buffer() | &rhs.boolean_buffer(); | ||
Ok(BoolArray::from(buffer).into_array()) | ||
} | ||
DFOperator::Eq => { | ||
let lhs = ExpressionEvaluator::eval(array.clone(), lhs)?; | ||
let rhs = ExpressionEvaluator::eval(array.clone(), rhs)?; | ||
compare(&lhs, &rhs, Operator::Eq) | ||
} | ||
_ => vortex_bail!("{} is an unsupported operator", expr.op), | ||
} | ||
} | ||
Expr::Column(col) => { | ||
// TODO(adamg): Use variant trait once its merged | ||
let array = array.clone().into_struct()?; | ||
let name = col.name(); | ||
array | ||
.field_by_name(name) | ||
.ok_or(vortex_err!("Missing field {name} in struct")) | ||
} | ||
Expr::Literal(lit) => Ok(ConstantArray::new(lit.clone(), array.len()).into_array()), | ||
_ => unreachable!(), | ||
} | ||
} | ||
} |
Oops, something went wrong.