Skip to content

Commit

Permalink
feat: RowFilter is a valid pruning predicate (#1438)
Browse files Browse the repository at this point in the history
The appearance of `union` makes me think this whole file needs to be
cleaned up. These functions should probably all live on the rewriter who
can accumulate all the references. I figure we can clean this up at some
later point.
  • Loading branch information
danking authored Nov 21, 2024
1 parent 21f5216 commit 918ec30
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
28 changes: 28 additions & 0 deletions vortex-file/src/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ use vortex_error::{VortexExpect as _, VortexResult};
use vortex_expr::{BinaryExpr, Column, ExprRef, Literal, Not, Operator};
use vortex_scalar::Scalar;

use crate::RowFilter;

#[derive(Debug, Clone)]
pub struct Relation<K, V> {
map: HashMap<K, HashSet<V>>,
Expand All @@ -39,6 +41,17 @@ impl<K: Hash + Eq, V: Hash + Eq> Relation<K, V> {
}
}

pub fn union(mut iter: impl Iterator<Item = Relation<K, V>>) -> Relation<K, V> {
if let Some(mut x) = iter.next() {
for y in iter {
x.extend(y)
}
x
} else {
Relation::new()
}
}

pub fn extend(&mut self, other: Relation<K, V>) {
for (l, rs) in other.map.into_iter() {
self.map.entry(l).or_default().extend(rs.into_iter())
Expand Down Expand Up @@ -183,6 +196,21 @@ fn convert_to_pruning_expression(expr: &ExprRef) -> PruningPredicateStats {
}
}

if let Some(RowFilter { conjunction }) = expr.as_any().downcast_ref::<RowFilter>() {
let (rewritten_conjunction, refses): (Vec<ExprRef>, Vec<Relation<Field, Stat>>) =
conjunction
.iter()
.map(convert_to_pruning_expression)
.unzip();

let refs = Relation::union(refses.into_iter());

return (
RowFilter::from_conjunction_expr(rewritten_conjunction),
refs,
);
}

not_prunable()
}

Expand Down
7 changes: 6 additions & 1 deletion vortex-file/src/read/filtering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::read::expr_project::expr_project;

#[derive(Debug, Clone)]
pub struct RowFilter {
conjunction: Vec<ExprRef>,
pub(crate) conjunction: Vec<ExprRef>,
}

impl RowFilter {
Expand All @@ -37,6 +37,11 @@ impl RowFilter {
Self { conjunction }
}

/// Create a new row filter from a conjunction. The conjunction **must** have length > 0.
pub fn from_conjunction_expr(conjunction: Vec<ExprRef>) -> Arc<Self> {
Arc::new(Self::from_conjunction(conjunction))
}

pub fn only_fields(&self, fields: &[Field]) -> Option<Self> {
let conj = self
.conjunction
Expand Down

0 comments on commit 918ec30

Please sign in to comment.