Skip to content

Commit

Permalink
Clean up fields / field paths (#353)
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn authored Jun 12, 2024
1 parent d9635b7 commit 192c0eb
Show file tree
Hide file tree
Showing 16 changed files with 769 additions and 560 deletions.
671 changes: 450 additions & 221 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions vortex-array/benches/filter_indices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use itertools::Itertools;
use rand::distributions::Uniform;
use rand::{thread_rng, Rng};
use vortex::IntoArray;
use vortex_dtype::field_paths::FieldPath;
use vortex_dtype::field::FieldPath;
use vortex_error::VortexError;
use vortex_expr::FieldPathOperations;
use vortex_expr::{lit, Conjunction, Disjunction};
Expand All @@ -18,11 +18,7 @@ fn filter_indices(c: &mut Criterion) {
.collect_vec()
.into_array();

let predicate = Disjunction {
conjunctions: vec![Conjunction {
predicates: vec![FieldPath::builder().build().lt(lit(50_000_000i64))],
}],
};
let predicate = Disjunction::from(Conjunction::from(FieldPath::root().lt(lit(50_000_000i64))));

group.bench_function("vortex", |b| {
b.iter(|| {
Expand Down
126 changes: 45 additions & 81 deletions vortex-array/src/array/primitive/compute/filter_indices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@ use crate::compute::filter_indices::FilterIndicesFn;
use crate::{Array, ArrayTrait, IntoArray};

impl FilterIndicesFn for PrimitiveArray {
fn filter_indices(&self, predicate: &Disjunction) -> VortexResult<Array> {
let conjunction_indices = predicate.conjunctions.iter().map(|conj| {
conj.predicates
.iter()
fn filter_indices(&self, disjunction: &Disjunction) -> VortexResult<Array> {
let conjunction_indices = disjunction.iter().map(|conj| {
conj.iter()
.map(|pred| indices_matching_predicate(self, pred))
.reduce(|a, b| Ok(a?.bitand(&b?)))
.unwrap()
Expand All @@ -38,11 +37,11 @@ fn indices_matching_predicate(
arr: &PrimitiveArray,
predicate: &Predicate,
) -> VortexResult<BooleanBuffer> {
if predicate.left.head().is_some() {
if !predicate.lhs.path().is_empty() {
vortex_bail!("Invalid path for primitive array")
}

let rhs = match &predicate.right {
let rhs = match &predicate.rhs {
Value::Field(_) => {
vortex_bail!("Cannot apply field reference to primitive array")
}
Expand Down Expand Up @@ -70,17 +69,14 @@ fn apply_predicate<T: NativePType, F: Fn(&T, &T) -> bool>(
#[cfg(test)]
mod test {
use itertools::Itertools;
use vortex_dtype::field_paths::FieldPathBuilder;
use vortex_expr::FieldPathOperations;
use vortex_expr::{lit, Conjunction};
use vortex_dtype::field::FieldPath;
use vortex_expr::{lit, Conjunction, FieldPathOperations};

use super::*;
use crate::validity::Validity;

fn apply_conjunctive_filter(arr: &PrimitiveArray, conj: Conjunction) -> VortexResult<Array> {
arr.filter_indices(&Disjunction {
conjunctions: vec![conj],
})
arr.filter_indices(&Disjunction::from_iter([conj]))
}

fn to_int_indices(filtered_primitive: BoolArray) -> Vec<u64> {
Expand Down Expand Up @@ -110,64 +106,44 @@ mod test {
None,
]);

let field = FieldPathBuilder::new().build();
let filtered_primitive = apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().lt(lit(5u32))],
},
)
.unwrap()
.flatten_bool()
.unwrap();
let field = FieldPath::root();
let filtered_primitive =
apply_conjunctive_filter(&arr, Conjunction::from(field.lt(lit(5u32))))
.unwrap()
.flatten_bool()
.unwrap();
let filtered = to_int_indices(filtered_primitive);
assert_eq!(filtered, [0u64, 1, 2, 3]);

let filtered_primitive = apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().gt(lit(5u32))],
},
)
.unwrap()
.flatten_bool()
.unwrap();
let filtered_primitive =
apply_conjunctive_filter(&arr, Conjunction::from(field.gt(lit(5u32))))
.unwrap()
.flatten_bool()
.unwrap();
let filtered = to_int_indices(filtered_primitive);
assert_eq!(filtered, [6u64, 7, 8, 10]);

let filtered_primitive = apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().eq(lit(5u32))],
},
)
.unwrap()
.flatten_bool()
.unwrap();
let filtered_primitive =
apply_conjunctive_filter(&arr, Conjunction::from(field.equal(lit(5u32))))
.unwrap()
.flatten_bool()
.unwrap();
let filtered = to_int_indices(filtered_primitive);
assert_eq!(filtered, [5u64]);

let filtered_primitive = apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().gte(lit(5u32))],
},
)
.unwrap()
.flatten_bool()
.unwrap();
let filtered_primitive =
apply_conjunctive_filter(&arr, Conjunction::from(field.gte(lit(5u32))))
.unwrap()
.flatten_bool()
.unwrap();
let filtered = to_int_indices(filtered_primitive);
assert_eq!(filtered, [5u64, 6, 7, 8, 10]);

let filtered_primitive = apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().lte(lit(5u32))],
},
)
.unwrap()
.flatten_bool()
.unwrap();
let filtered_primitive =
apply_conjunctive_filter(&arr, Conjunction::from(field.lte(lit(5u32))))
.unwrap()
.flatten_bool()
.unwrap();
let filtered = to_int_indices(filtered_primitive);
assert_eq!(filtered, [0u64, 1, 2, 3, 5]);
}
Expand All @@ -176,12 +152,10 @@ mod test {
fn test_multiple_predicates() {
let arr =
PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid);
let field = FieldPathBuilder::new().build();
let field = FieldPath::root();
let filtered_primitive = apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(2u32))],
},
Conjunction::from_iter([field.lt(lit(5u32)), field.gt(lit(2u32))]),
)
.unwrap()
.flatten_bool()
Expand All @@ -194,12 +168,10 @@ mod test {
fn test_disjoint_predicates() {
let arr =
PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid);
let field = FieldPathBuilder::new().build();
let field = FieldPath::root();
let filtered_primitive = apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))],
},
Conjunction::from_iter([field.lt(lit(5u32)), field.gt(lit(5u32))]),
)
.unwrap()
.flatten_bool()
Expand All @@ -213,17 +185,11 @@ mod test {
fn test_disjunctive_predicate() {
let arr =
PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid);
let field = FieldPathBuilder::new().build();
let c1 = Conjunction {
predicates: vec![field.clone().lt(lit(5u32))],
};
let c2 = Conjunction {
predicates: vec![field.clone().gt(lit(5u32))],
};

let disj = Disjunction {
conjunctions: vec![c1, c2],
};
let field = FieldPath::root();
let c1 = Conjunction::from(field.lt(lit(5u32)));
let c2 = Conjunction::from(field.gt(lit(5u32)));

let disj = Disjunction::from_iter([c1, c2]);
let filtered_primitive = arr.filter_indices(&disj).unwrap().flatten_bool().unwrap();
let filtered = to_int_indices(filtered_primitive);
assert_eq!(filtered, [0u64, 1, 2, 3, 5, 6, 7, 8, 9])
Expand All @@ -233,12 +199,10 @@ mod test {
fn test_invalid_path_err() {
let arr =
PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5, 6, 7, 8, 9, 10], Validity::AllValid);
let field = FieldPathBuilder::new().join("some_field").build();
let field = FieldPath::from_name("some_field");
apply_conjunctive_filter(
&arr,
Conjunction {
predicates: vec![field.clone().lt(lit(5u32)), field.clone().gt(lit(5u32))],
},
Conjunction::from_iter([field.lt(lit(5u32)), field.gt(lit(5u32))]),
)
.expect_err("Cannot apply field reference to primitive array");
}
Expand Down
11 changes: 11 additions & 0 deletions vortex-dtype/proto/vortex/dtype/dtype.proto
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,15 @@ message DType {
List list = 8;
Extension extension = 9;
}
}

message Field {
oneof field_type {
string name = 1;
int32 index = 2;
}
}

message FieldPath {
repeated Field path = 1;
}
14 changes: 0 additions & 14 deletions vortex-dtype/proto/vortex/dtype/field_path.proto

This file was deleted.

86 changes: 86 additions & 0 deletions vortex-dtype/src/field.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
use core::fmt;
use std::fmt::{Display, Formatter};

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Field {
Name(String),
Index(i32),
}

impl From<&str> for Field {
fn from(value: &str) -> Self {
Field::Name(value.into())
}
}

impl From<i32> for Field {
fn from(value: i32) -> Self {
Field::Index(value)
}
}

impl Display for Field {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Field::Name(name) => write!(f, "${name}"),
Field::Index(idx) => write!(f, "[{idx}]"),
}
}
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct FieldPath(Vec<Field>);

impl FieldPath {
pub fn root() -> Self {
Self(vec![])
}

pub fn from_name(name: &str) -> Self {
Self(vec![Field::from(name)])
}

pub fn path(&self) -> &[Field] {
&self.0
}

pub fn to_name(&self) -> &str {
assert_eq!(self.0.len(), 1);
match &self.0[0] {
Field::Name(name) => name.as_str(),
_ => panic!("FieldPath is not a name"),
}
}
}

impl FromIterator<Field> for FieldPath {
fn from_iter<T: IntoIterator<Item = Field>>(iter: T) -> Self {
FieldPath(iter.into_iter().collect())
}
}

impl From<Field> for FieldPath {
fn from(value: Field) -> Self {
FieldPath(vec![value])
}
}

impl From<Vec<Field>> for FieldPath {
fn from(value: Vec<Field>) -> Self {
FieldPath(value)
}
}

impl Display for FieldPath {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let formatted = self
.0
.iter()
.map(|fid| format!("{fid}"))
.collect::<Vec<_>>()
.join(".");
write!(f, "{}", formatted)
}
}
Loading

0 comments on commit 192c0eb

Please sign in to comment.