-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Minimal expressions API for vortex (#318)
Minimal set of expressions and operators for defining predicates over vortex arrays. This does **_not_** express recursive expression trees, but instead restricts the algebra to disjunction-of-conjunction (OR-of-AND) of field/value comparison operations. I tried to restrict this to a manageable set of expressions that we will definitely need as to avoid bloat; the expectation here is that this may grow over time. Subsequent changes will introduce: - [ ] A pushdown API for vortex arrays - [ ] Array pushdown implementations for the various expressions Supercedes #308
- Loading branch information
Showing
9 changed files
with
298 additions
and
1 deletion.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
[package] | ||
name = "vortex-expr" | ||
version = { workspace = true } | ||
description = "Vortex Expressions" | ||
homepage = { workspace = true } | ||
repository = { workspace = true } | ||
authors = { workspace = true } | ||
license = { workspace = true } | ||
keywords = { workspace = true } | ||
include = { workspace = true } | ||
edition = { workspace = true } | ||
rust-version = { workspace = true } | ||
|
||
[lints] | ||
workspace = true | ||
|
||
[dependencies] | ||
vortex-dtype = { path = "../vortex-dtype" } | ||
vortex-error = { path = "../vortex-error" } | ||
vortex-scalar = { path = "../vortex-scalar" } | ||
serde = { workspace = true, optional = true, features = ["derive"] } | ||
|
||
|
||
[dev-dependencies] | ||
|
||
|
||
[features] | ||
serde = ["dep:serde", "vortex-dtype/serde", "vortex-scalar/serde"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Vortex Expressions | ||
|
||
A crate defining serializable predicate expressions. Used predominantly for filter push-down. | ||
|
||
Takes inspiration from postgres https://www.postgresql.org/docs/current/sql-expressions.html | ||
and datafusion https://github.com/apache/datafusion/tree/5fac581efbaffd0e6a9edf931182517524526afd/datafusion/expr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
use core::fmt; | ||
use std::fmt::{Display, Formatter}; | ||
|
||
use crate::expressions::{Conjunction, Disjunction, Predicate, Value}; | ||
use crate::operators::Operator; | ||
|
||
impl Display for Disjunction { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
self.conjunctions | ||
.iter() | ||
.map(|v| format!("{}", v)) | ||
.intersperse("\nOR \n".to_string()) | ||
.try_for_each(|s| write!(f, "{}", s)) | ||
} | ||
} | ||
|
||
impl Display for Conjunction { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
self.predicates | ||
.iter() | ||
.map(|v| format!("{}", v)) | ||
.intersperse(" AND ".to_string()) | ||
.try_for_each(|s| write!(f, "{}", s)) | ||
} | ||
} | ||
|
||
impl Display for Predicate { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
write!(f, "({} {} {})", self.left, self.op, self.right) | ||
} | ||
} | ||
|
||
impl Display for Value { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { | ||
match self { | ||
Value::Field(expr) => std::fmt::Display::fmt(expr, f), | ||
Value::Literal(scalar) => scalar.fmt(f), | ||
} | ||
} | ||
} | ||
|
||
impl Display for Operator { | ||
fn fmt(&self, f: &mut Formatter) -> fmt::Result { | ||
let display = match &self { | ||
Operator::EqualTo => "=", | ||
Operator::NotEqualTo => "!=", | ||
Operator::GreaterThan => ">", | ||
Operator::GreaterThanOrEqualTo => ">=", | ||
Operator::LessThan => "<", | ||
Operator::LessThanOrEqualTo => "<=", | ||
}; | ||
write!(f, "{display}") | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::expressions::{lit, Conjunction, Disjunction}; | ||
|
||
#[test] | ||
fn test_predicate_formatting() { | ||
// And | ||
assert_eq!(format!("{}", lit(1u32).lt(lit(2u32))), "(1 < 2)"); | ||
// Or | ||
assert_eq!(format!("{}", lit(1u32).gte(lit(2u32))), "(1 >= 2)"); | ||
// Not | ||
assert_eq!(format!("{}", !lit(1u32).lte(lit(2u32))), "(1 > 2)"); | ||
} | ||
|
||
#[test] | ||
fn test_dnf_formatting() { | ||
let d1 = Conjunction { | ||
predicates: vec![ | ||
lit(1u32).lt(lit(2u32)), | ||
lit(1u32).gte(lit(2u32)), | ||
!lit(1u32).lte(lit(2u32)), | ||
], | ||
}; | ||
let d2 = Conjunction { | ||
predicates: vec![ | ||
lit(2u32).lt(lit(3u32)), | ||
lit(3u32).gte(lit(4u32)), | ||
!lit(5u32).lte(lit(6u32)), | ||
], | ||
}; | ||
|
||
let dnf = Disjunction { | ||
conjunctions: vec![d1, d2], | ||
}; | ||
|
||
let string = format!("{}", dnf); | ||
print!("{}", string); | ||
assert_eq!( | ||
string, | ||
"(1 < 2) AND (1 >= 2) AND (1 > 2)\nOR \n(2 < 3) AND (3 >= 4) AND (5 > 6)" | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
use vortex_dtype::FieldName; | ||
use vortex_scalar::Scalar; | ||
|
||
use crate::expressions::Value::Field; | ||
use crate::operators::Operator; | ||
|
||
#[cfg_attr( | ||
feature = "serde", | ||
derive(serde::Serialize, serde::Deserialize), | ||
serde(transparent) | ||
)] | ||
pub struct Disjunction { | ||
pub conjunctions: Vec<Conjunction>, | ||
} | ||
|
||
#[derive(Clone, Debug, PartialEq)] | ||
#[cfg_attr( | ||
feature = "serde", | ||
derive(serde::Serialize, serde::Deserialize), | ||
serde(transparent) | ||
)] | ||
pub struct Conjunction { | ||
pub predicates: Vec<Predicate>, | ||
} | ||
|
||
#[derive(Clone, Debug, PartialEq)] | ||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] | ||
pub enum Value { | ||
/// A named reference to a qualified field in a dtype. | ||
Field(FieldName), | ||
/// A constant scalar value. | ||
Literal(Scalar), | ||
} | ||
|
||
impl Value { | ||
pub fn field(field_name: impl Into<FieldName>) -> Value { | ||
Field(field_name.into()) | ||
} | ||
// comparisons | ||
pub fn eq(self, other: Value) -> Predicate { | ||
Predicate { | ||
left: self, | ||
op: Operator::EqualTo, | ||
right: other, | ||
} | ||
} | ||
|
||
pub fn not_eq(self, other: Value) -> Predicate { | ||
Predicate { | ||
left: self, | ||
op: Operator::NotEqualTo, | ||
right: other, | ||
} | ||
} | ||
|
||
pub fn gt(self, other: Value) -> Predicate { | ||
Predicate { | ||
left: self, | ||
op: Operator::GreaterThan, | ||
right: other, | ||
} | ||
} | ||
|
||
pub fn gte(self, other: Value) -> Predicate { | ||
Predicate { | ||
left: self, | ||
op: Operator::GreaterThanOrEqualTo, | ||
right: other, | ||
} | ||
} | ||
|
||
pub fn lt(self, other: Value) -> Predicate { | ||
Predicate { | ||
left: self, | ||
op: Operator::LessThan, | ||
right: other, | ||
} | ||
} | ||
|
||
pub fn lte(self, other: Value) -> Predicate { | ||
Predicate { | ||
left: self, | ||
op: Operator::LessThanOrEqualTo, | ||
right: other, | ||
} | ||
} | ||
} | ||
|
||
#[derive(Clone, Debug, PartialEq)] | ||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] | ||
pub struct Predicate { | ||
pub left: Value, | ||
pub op: Operator, | ||
pub right: Value, | ||
} | ||
|
||
pub fn lit<T: Into<Scalar>>(n: T) -> Value { | ||
Value::Literal(n.into()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_lit() { | ||
let scalar: Scalar = 1.into(); | ||
let rhs: Value = lit(scalar); | ||
let expr = Value::field("id").eq(rhs); | ||
assert_eq!(format!("{}", expr), "(id = 1)"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#![feature(iter_intersperse)] | ||
extern crate core; | ||
|
||
mod display; | ||
pub mod expressions; | ||
pub mod operators; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
use std::ops; | ||
|
||
use crate::expressions::Predicate; | ||
|
||
#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd)] | ||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] | ||
pub enum Operator { | ||
// comparison | ||
EqualTo, | ||
NotEqualTo, | ||
GreaterThan, | ||
GreaterThanOrEqualTo, | ||
LessThan, | ||
LessThanOrEqualTo, | ||
} | ||
|
||
impl ops::Not for Predicate { | ||
type Output = Self; | ||
|
||
fn not(self) -> Self::Output { | ||
let inverse_op = match self.op { | ||
Operator::EqualTo => Operator::NotEqualTo, | ||
Operator::NotEqualTo => Operator::EqualTo, | ||
Operator::GreaterThan => Operator::LessThanOrEqualTo, | ||
Operator::GreaterThanOrEqualTo => Operator::LessThan, | ||
Operator::LessThan => Operator::GreaterThanOrEqualTo, | ||
Operator::LessThanOrEqualTo => Operator::GreaterThan, | ||
}; | ||
Predicate { | ||
left: self.left, | ||
op: inverse_op, | ||
right: self.right, | ||
} | ||
} | ||
} |