Skip to content

Commit

Permalink
Move expression filters out of datafusion (#638)
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 authored Aug 16, 2024
1 parent f583ef3 commit 19e993c
Show file tree
Hide file tree
Showing 63 changed files with 1,098 additions and 1,371 deletions.
22 changes: 20 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"vortex-array",
"vortex-buffer",
"vortex-datafusion",
"vortex-datetime-dtype",
"vortex-dtype",
"vortex-error",
"vortex-expr",
Expand Down Expand Up @@ -130,6 +131,7 @@ vortex-array = { version = "0.7.0", path = "./vortex-array" }
vortex-buffer = { version = "0.7.0", path = "./vortex-buffer" }
vortex-byte-bool = { version = "0.7.0", path = "./encodings/byte-bool" }
vortex-datafusion = { version = "0.7.0", path = "./vortex-datafusion" }
vortex-datetime-dtype = { version = "0.7.0", path = "./vortex-datetime-dtype" }
vortex-datetime-parts = { version = "0.7.0", path = "./encodings/datetime-parts" }
vortex-dict = { version = "0.7.0", path = "./encodings/dict" }
vortex-dtype = { version = "0.7.0", path = "./vortex-dtype", default-features = false }
Expand Down
1 change: 0 additions & 1 deletion encodings/byte-bool/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ vortex-array = { workspace = true }
vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }
vortex-expr = { workspace = true }
vortex-scalar = { workspace = true }

[dev-dependencies]
Expand Down
3 changes: 1 addition & 2 deletions encodings/byte-bool/src/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ use std::ops::{BitAnd, BitOr, BitXor, Not};
use arrow_buffer::BooleanBuffer;
use num_traits::AsPrimitive;
use vortex::compute::unary::{FillForwardFn, ScalarAtFn};
use vortex::compute::{ArrayCompute, CompareFn, SliceFn, TakeFn};
use vortex::compute::{ArrayCompute, CompareFn, Operator, SliceFn, TakeFn};
use vortex::validity::{ArrayValidity, Validity};
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::{match_each_integer_ptype, Nullability};
use vortex_error::{vortex_err, VortexResult};
use vortex_expr::Operator;
use vortex_scalar::{Scalar, ScalarValue};

use super::ByteBoolArray;
Expand Down
1 change: 1 addition & 0 deletions encodings/datetime-parts/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ workspace = true
log = { workspace = true }
serde = { workspace = true, features = ["derive"] }
vortex-array = { workspace = true }
vortex-datetime-dtype = { workspace = true }
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }
vortex-scalar = { workspace = true }
3 changes: 2 additions & 1 deletion encodings/datetime-parts/src/compress.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use vortex::array::{PrimitiveArray, TemporalArray, TimeUnit};
use vortex::array::{PrimitiveArray, TemporalArray};
use vortex::compute::unary::try_cast;
use vortex::{Array, IntoArray, IntoArrayVariant};
use vortex_datetime_dtype::TimeUnit;
use vortex_dtype::PType;
use vortex_error::{vortex_bail, VortexResult};

Expand Down
7 changes: 4 additions & 3 deletions encodings/datetime-parts/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use vortex::array::temporal::TemporalMetadata;
use vortex::array::{PrimitiveArray, TemporalArray, TimeUnit};
use vortex::array::{PrimitiveArray, TemporalArray};
use vortex::compute::unary::{scalar_at, ScalarAtFn};
use vortex::compute::{slice, take, ArrayCompute, SliceFn, TakeFn};
use vortex::validity::ArrayValidity;
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_datetime_dtype::{TemporalMetadata, TimeUnit};
use vortex_dtype::DType;
use vortex_error::{vortex_bail, VortexResult};
use vortex_scalar::Scalar;
Expand Down Expand Up @@ -120,8 +120,9 @@ pub fn decode_to_temporal(array: &DateTimePartsArray) -> VortexResult<TemporalAr

#[cfg(test)]
mod test {
use vortex::array::{PrimitiveArray, TemporalArray, TimeUnit};
use vortex::array::{PrimitiveArray, TemporalArray};
use vortex::{IntoArray, IntoArrayVariant};
use vortex_datetime_dtype::TimeUnit;
use vortex_dtype::{DType, Nullability};

use crate::compute::decode_to_temporal;
Expand Down
22 changes: 9 additions & 13 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,24 @@ pin-project = { workspace = true }
rand = { workspace = true }
serde = { workspace = true, features = ["derive"] }
vortex-buffer = { workspace = true }
vortex-datetime-dtype = { workspace = true }
vortex-dtype = { workspace = true }
vortex-error = { workspace = true }
vortex-expr = { workspace = true }
vortex-flatbuffers = { workspace = true, optional = true }
vortex-scalar = { workspace = true }

[features]
default = ["flatbuffers", "serde"]
arbitrary = ["dep:arbitrary"]
flatbuffers = [
"dep:flatbuffers",
"dep:flexbuffers",
"dep:vortex-flatbuffers",
"vortex-flatbuffers/array",
"vortex-dtype/flatbuffers",
"vortex-error/flatbuffers",
"vortex-error/flexbuffers",
"vortex-scalar/flatbuffers",
"dep:flatbuffers",
"dep:flexbuffers",
"dep:vortex-flatbuffers",
"vortex-flatbuffers/array",
"vortex-dtype/flatbuffers",
"vortex-error/flatbuffers",
"vortex-error/flexbuffers",
"vortex-scalar/flatbuffers",
]
serde = ["vortex-dtype/serde", "vortex-scalar/serde"]

Expand All @@ -82,10 +82,6 @@ harness = false
name = "scalar_subtract"
harness = false

[[bench]]
name = "filter_indices"
harness = false

[[bench]]
name = "compare"
harness = false
8 changes: 4 additions & 4 deletions vortex-array/benches/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ use itertools::Itertools;
use rand::distributions::Uniform;
use rand::{thread_rng, Rng};
use vortex::array::BoolArray;
use vortex::compute::Operator;
use vortex::IntoArray;
use vortex_error::VortexError;
use vortex_expr::Operator;

fn filter_bool_indices(c: &mut Criterion) {
fn compare_bool(c: &mut Criterion) {
let mut group = c.benchmark_group("compare");

let mut rng = thread_rng();
Expand All @@ -34,7 +34,7 @@ fn filter_bool_indices(c: &mut Criterion) {
});
}

fn filter_indices(c: &mut Criterion) {
fn compare_primitive(c: &mut Criterion) {
let mut group = c.benchmark_group("compare");

let mut rng = thread_rng();
Expand All @@ -58,5 +58,5 @@ fn filter_indices(c: &mut Criterion) {
});
}

criterion_group!(benches, filter_indices, filter_bool_indices);
criterion_group!(benches, compare_primitive, compare_bool);
criterion_main!(benches);
32 changes: 0 additions & 32 deletions vortex-array/benches/filter_indices.rs

This file was deleted.

37 changes: 26 additions & 11 deletions vortex-array/src/array/bool/compute/compare.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use std::ops::{BitAnd, BitOr, BitXor, Not};

use vortex_error::VortexResult;
use vortex_expr::Operator;

use crate::array::BoolArray;
use crate::compute::CompareFn;
use crate::compute::{CompareFn, Operator};
use crate::{Array, IntoArray, IntoArrayVariant};

impl CompareFn for BoolArray {
Expand Down Expand Up @@ -34,7 +33,6 @@ impl CompareFn for BoolArray {
}

#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod test {
use itertools::Itertools;

Expand All @@ -54,17 +52,23 @@ mod test {
}

#[test]
fn test_basic_comparisons() -> VortexResult<()> {
fn test_basic_comparisons() {
let arr = BoolArray::from_vec(
vec![true, true, false, true, false],
Validity::Array(BoolArray::from(vec![false, true, true, true, true]).into_array()),
)
.into_array();

let matches = compare(&arr, &arr, Operator::Eq)?.into_bool()?;
let matches = compare(&arr, &arr, Operator::Eq)
.unwrap()
.into_bool()
.unwrap();
assert_eq!(to_int_indices(matches), [1u64, 2, 3, 4]);

let matches = compare(&arr, &arr, Operator::NotEq)?.into_bool()?;
let matches = compare(&arr, &arr, Operator::NotEq)
.unwrap()
.into_bool()
.unwrap();
let empty: [u64; 0] = [];
assert_eq!(to_int_indices(matches), empty);

Expand All @@ -74,17 +78,28 @@ mod test {
)
.into_array();

let matches = compare(&arr, &other, Operator::Lte)?.into_bool()?;
let matches = compare(&arr, &other, Operator::Lte)
.unwrap()
.into_bool()
.unwrap();
assert_eq!(to_int_indices(matches), [2u64, 3, 4]);

let matches = compare(&arr, &other, Operator::Lt)?.into_bool()?;
let matches = compare(&arr, &other, Operator::Lt)
.unwrap()
.into_bool()
.unwrap();
assert_eq!(to_int_indices(matches), [4u64]);

let matches = compare(&other, &arr, Operator::Gte)?.into_bool()?;
let matches = compare(&other, &arr, Operator::Gte)
.unwrap()
.into_bool()
.unwrap();
assert_eq!(to_int_indices(matches), [2u64, 3, 4]);

let matches = compare(&other, &arr, Operator::Gt)?.into_bool()?;
let matches = compare(&other, &arr, Operator::Gt)
.unwrap()
.into_bool()
.unwrap();
assert_eq!(to_int_indices(matches), [4u64]);
Ok(())
}
}
18 changes: 9 additions & 9 deletions vortex-array/src/array/constant/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ use std::cmp::Ordering;
use std::sync::Arc;

use arrow_array::Datum;
use arrow_ord::cmp;
use vortex_dtype::Nullability;
use vortex_error::{vortex_bail, vortex_err, VortexResult};
use vortex_expr::Operator;
use vortex_scalar::Scalar;

use crate::array::constant::ConstantArray;
use crate::arrow::FromArrowArray;
use crate::compute::unary::{scalar_at, ScalarAtFn};
use crate::compute::{
scalar_cmp, AndFn, ArrayCompute, CompareFn, FilterFn, OrFn, SearchResult, SearchSortedFn,
SearchSortedSide, SliceFn, TakeFn,
scalar_cmp, AndFn, ArrayCompute, CompareFn, FilterFn, Operator, OrFn, SearchResult,
SearchSortedFn, SearchSortedSide, SliceFn, TakeFn,
};
use crate::stats::{ArrayStatistics, Stat};
use crate::{Array, ArrayDType, AsArray, IntoArray, IntoCanonical};
Expand Down Expand Up @@ -110,12 +110,12 @@ impl CompareFn for ConstantArray {
let rhs = rhs.as_ref();

let boolean_array = match operator {
Operator::Eq => arrow_ord::cmp::eq(datum.as_ref(), &rhs)?,
Operator::NotEq => arrow_ord::cmp::neq(datum.as_ref(), &rhs)?,
Operator::Gt => arrow_ord::cmp::gt(datum.as_ref(), &rhs)?,
Operator::Gte => arrow_ord::cmp::gt_eq(datum.as_ref(), &rhs)?,
Operator::Lt => arrow_ord::cmp::lt(datum.as_ref(), &rhs)?,
Operator::Lte => arrow_ord::cmp::lt_eq(datum.as_ref(), &rhs)?,
Operator::Eq => cmp::eq(datum.as_ref(), &rhs)?,
Operator::NotEq => cmp::neq(datum.as_ref(), &rhs)?,
Operator::Gt => cmp::gt(datum.as_ref(), &rhs)?,
Operator::Gte => cmp::gt_eq(datum.as_ref(), &rhs)?,
Operator::Lt => cmp::lt(datum.as_ref(), &rhs)?,
Operator::Lte => cmp::lt_eq(datum.as_ref(), &rhs)?,
};

Ok(Array::from_arrow(&boolean_array, true))
Expand Down
Loading

0 comments on commit 19e993c

Please sign in to comment.