Skip to content

Commit

Permalink
Minimal expressions API for vortex (#318)
Browse files Browse the repository at this point in the history
Minimal set of expressions and operators for defining predicates over
vortex arrays.

This does **_not_** express recursive expression trees, but instead
restricts the algebra to disjunction-of-conjunction (OR-of-AND) of
field/value comparison operations.

I tried to restrict this to a manageable set of expressions that we will
definitely need as to avoid bloat; the expectation here is that this may
grow over time.

Subsequent changes will introduce:

- [ ] A pushdown API for vortex arrays
- [ ] Array pushdown implementations for the various expressions

Supercedes #308
  • Loading branch information
jdcasale authored May 15, 2024
1 parent 0ad2232 commit ae401e9
Show file tree
Hide file tree
Showing 9 changed files with 298 additions and 1 deletion.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ members = [
"vortex-datetime-parts",
"vortex-dict",
"vortex-error",
"vortex-expr",
"vortex-fastlanes",
"vortex-flatbuffers",
"vortex-ipc",
Expand Down
3 changes: 2 additions & 1 deletion vortex-dtype/src/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use DType::*;
use crate::nullability::Nullability;
use crate::{ExtDType, PType};

pub type FieldNames = Arc<[Arc<str>]>;
pub type FieldName = Arc<str>;
pub type FieldNames = Arc<[FieldName]>;

pub type Metadata = Vec<u8>;

Expand Down
28 changes: 28 additions & 0 deletions vortex-expr/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[package]
name = "vortex-expr"
version = { workspace = true }
description = "Vortex Expressions"
homepage = { workspace = true }
repository = { workspace = true }
authors = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
include = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }

[lints]
workspace = true

[dependencies]
vortex-dtype = { path = "../vortex-dtype" }
vortex-error = { path = "../vortex-error" }
vortex-scalar = { path = "../vortex-scalar" }
serde = { workspace = true, optional = true, features = ["derive"] }


[dev-dependencies]


[features]
serde = ["dep:serde", "vortex-dtype/serde", "vortex-scalar/serde"]
6 changes: 6 additions & 0 deletions vortex-expr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Vortex Expressions

A crate defining serializable predicate expressions. Used predominantly for filter push-down.

Takes inspiration from postgres https://www.postgresql.org/docs/current/sql-expressions.html
and datafusion https://github.com/apache/datafusion/tree/5fac581efbaffd0e6a9edf931182517524526afd/datafusion/expr
98 changes: 98 additions & 0 deletions vortex-expr/src/display.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use core::fmt;
use std::fmt::{Display, Formatter};

use crate::expressions::{Conjunction, Disjunction, Predicate, Value};
use crate::operators::Operator;

impl Display for Disjunction {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.conjunctions
.iter()
.map(|v| format!("{}", v))
.intersperse("\nOR \n".to_string())
.try_for_each(|s| write!(f, "{}", s))
}
}

impl Display for Conjunction {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.predicates
.iter()
.map(|v| format!("{}", v))
.intersperse(" AND ".to_string())
.try_for_each(|s| write!(f, "{}", s))
}
}

impl Display for Predicate {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "({} {} {})", self.left, self.op, self.right)
}
}

impl Display for Value {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Value::Field(expr) => std::fmt::Display::fmt(expr, f),
Value::Literal(scalar) => scalar.fmt(f),
}
}
}

impl Display for Operator {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let display = match &self {
Operator::EqualTo => "=",
Operator::NotEqualTo => "!=",
Operator::GreaterThan => ">",
Operator::GreaterThanOrEqualTo => ">=",
Operator::LessThan => "<",
Operator::LessThanOrEqualTo => "<=",
};
write!(f, "{display}")
}
}

#[cfg(test)]
mod tests {
use crate::expressions::{lit, Conjunction, Disjunction};

#[test]
fn test_predicate_formatting() {
// And
assert_eq!(format!("{}", lit(1u32).lt(lit(2u32))), "(1 < 2)");
// Or
assert_eq!(format!("{}", lit(1u32).gte(lit(2u32))), "(1 >= 2)");
// Not
assert_eq!(format!("{}", !lit(1u32).lte(lit(2u32))), "(1 > 2)");
}

#[test]
fn test_dnf_formatting() {
let d1 = Conjunction {
predicates: vec![
lit(1u32).lt(lit(2u32)),
lit(1u32).gte(lit(2u32)),
!lit(1u32).lte(lit(2u32)),
],
};
let d2 = Conjunction {
predicates: vec![
lit(2u32).lt(lit(3u32)),
lit(3u32).gte(lit(4u32)),
!lit(5u32).lte(lit(6u32)),
],
};

let dnf = Disjunction {
conjunctions: vec![d1, d2],
};

let string = format!("{}", dnf);
print!("{}", string);
assert_eq!(
string,
"(1 < 2) AND (1 >= 2) AND (1 > 2)\nOR \n(2 < 3) AND (3 >= 4) AND (5 > 6)"
);
}
}
112 changes: 112 additions & 0 deletions vortex-expr/src/expressions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use vortex_dtype::FieldName;
use vortex_scalar::Scalar;

use crate::expressions::Value::Field;
use crate::operators::Operator;

#[cfg_attr(
feature = "serde",
derive(serde::Serialize, serde::Deserialize),
serde(transparent)
)]
pub struct Disjunction {
pub conjunctions: Vec<Conjunction>,
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(
feature = "serde",
derive(serde::Serialize, serde::Deserialize),
serde(transparent)
)]
pub struct Conjunction {
pub predicates: Vec<Predicate>,
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Value {
/// A named reference to a qualified field in a dtype.
Field(FieldName),
/// A constant scalar value.
Literal(Scalar),
}

impl Value {
pub fn field(field_name: impl Into<FieldName>) -> Value {
Field(field_name.into())
}
// comparisons
pub fn eq(self, other: Value) -> Predicate {
Predicate {
left: self,
op: Operator::EqualTo,
right: other,
}
}

pub fn not_eq(self, other: Value) -> Predicate {
Predicate {
left: self,
op: Operator::NotEqualTo,
right: other,
}
}

pub fn gt(self, other: Value) -> Predicate {
Predicate {
left: self,
op: Operator::GreaterThan,
right: other,
}
}

pub fn gte(self, other: Value) -> Predicate {
Predicate {
left: self,
op: Operator::GreaterThanOrEqualTo,
right: other,
}
}

pub fn lt(self, other: Value) -> Predicate {
Predicate {
left: self,
op: Operator::LessThan,
right: other,
}
}

pub fn lte(self, other: Value) -> Predicate {
Predicate {
left: self,
op: Operator::LessThanOrEqualTo,
right: other,
}
}
}

#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Predicate {
pub left: Value,
pub op: Operator,
pub right: Value,
}

pub fn lit<T: Into<Scalar>>(n: T) -> Value {
Value::Literal(n.into())
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_lit() {
let scalar: Scalar = 1.into();
let rhs: Value = lit(scalar);
let expr = Value::field("id").eq(rhs);
assert_eq!(format!("{}", expr), "(id = 1)");
}
}
6 changes: 6 additions & 0 deletions vortex-expr/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#![feature(iter_intersperse)]
extern crate core;

mod display;
pub mod expressions;
pub mod operators;
35 changes: 35 additions & 0 deletions vortex-expr/src/operators.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use std::ops;

use crate::expressions::Predicate;

#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Operator {
// comparison
EqualTo,
NotEqualTo,
GreaterThan,
GreaterThanOrEqualTo,
LessThan,
LessThanOrEqualTo,
}

impl ops::Not for Predicate {
type Output = Self;

fn not(self) -> Self::Output {
let inverse_op = match self.op {
Operator::EqualTo => Operator::NotEqualTo,
Operator::NotEqualTo => Operator::EqualTo,
Operator::GreaterThan => Operator::LessThanOrEqualTo,
Operator::GreaterThanOrEqualTo => Operator::LessThan,
Operator::LessThan => Operator::GreaterThanOrEqualTo,
Operator::LessThanOrEqualTo => Operator::GreaterThan,
};
Predicate {
left: self.left,
op: inverse_op,
right: self.right,
}
}
}

0 comments on commit ae401e9

Please sign in to comment.