Skip to content

Commit

Permalink
cleanup, tests, benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
jdcasale committed May 30, 2024
1 parent 8d9b8c9 commit 5fc96c1
Show file tree
Hide file tree
Showing 13 changed files with 277 additions and 184 deletions.
19 changes: 4 additions & 15 deletions vortex-array/benches/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,12 @@ fn compare_bool(c: &mut Criterion) {
let mut group = c.benchmark_group("compare");

let mut rng = thread_rng();
let range = Uniform::new(0u8, 1);
let arr = BoolArray::from(
(0..10_000_000)
.map(|_| rng.sample(range) == 0)
.collect_vec(),
)
.into_array();
let arr2 = BoolArray::from(
(0..10_000_000)
.map(|_| rng.sample(range) == 0)
.collect_vec(),
)
.into_array();
let arr = BoolArray::from((0..10_000_000).map(|_| rng.gen()).collect_vec()).into_array();
let arr2 = BoolArray::from((0..10_000_000).map(|_| rng.gen()).collect_vec()).into_array();

group.bench_function("compare_bool", |b| {
b.iter(|| {
let indices = compare(&arr, &arr2, Operator::GreaterThanOrEqualTo).unwrap();
let indices = compare(&arr, &arr2, Operator::LessThan).unwrap();
black_box(indices);
Ok::<(), VortexError>(())
});
Expand All @@ -52,7 +41,7 @@ fn compare_int(c: &mut Criterion) {

group.bench_function("compare_int", |b| {
b.iter(|| {
let indices = compare(&arr, &arr2, Operator::GreaterThanOrEqualTo).unwrap();
let indices = compare(&arr, &arr2, Operator::LessThan).unwrap();
black_box(indices);
Ok::<(), VortexError>(())
});
Expand Down
14 changes: 3 additions & 11 deletions vortex-array/benches/compare_scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,11 @@ fn compare_bool_scalar(c: &mut Criterion) {
let mut group = c.benchmark_group("compare_scalar");

let mut rng = thread_rng();
let range = Uniform::new(0u8, 1);
let arr = BoolArray::from(
(0..10_000_000)
.map(|_| rng.sample(range) == 0)
.collect_vec(),
)
.into_array();
let arr = BoolArray::from((0..10_000_000).map(|_| rng.gen()).collect_vec()).into_array();

group.bench_function("compare_bool", |b| {
b.iter(|| {
let indices =
compare_scalar(&arr, Operator::GreaterThanOrEqualTo, &false.into()).unwrap();
let indices = compare_scalar(&arr, Operator::LessThan, &false.into()).unwrap();
black_box(indices);
Ok::<(), VortexError>(())
});
Expand All @@ -42,8 +35,7 @@ fn compare_int_scalar(c: &mut Criterion) {

group.bench_function("compare_int", |b| {
b.iter(|| {
let indices =
compare_scalar(&arr, Operator::GreaterThanOrEqualTo, &50_000_000.into()).unwrap();
let indices = compare_scalar(&arr, Operator::LessThan, &50_000_000.into()).unwrap();
black_box(indices);
Ok::<(), VortexError>(())
});
Expand Down
47 changes: 43 additions & 4 deletions vortex-array/benches/filter_indices.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
use std::sync::Arc;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use itertools::Itertools;
use rand::distributions::Uniform;
use rand::{thread_rng, Rng};
use vortex::array::r#struct::StructArray;
use vortex::validity::Validity;
use vortex::IntoArray;
use vortex_dtype::field_paths::FieldPath;
use vortex_dtype::field_paths::{field, FieldPath};
use vortex_error::VortexError;
use vortex_expr::expressions::{lit, Conjunction, Disjunction};
use vortex_expr::field_paths::FieldPathOperations;
use vortex_expr::operators::{field_comparison, Operator};

fn filter_indices(c: &mut Criterion) {
let mut group = c.benchmark_group("filter_indices");
fn filter_indices_primitive(c: &mut Criterion) {
let mut group = c.benchmark_group("filter_indices_primitive");

let mut rng = thread_rng();
let range = Uniform::new(0i64, 100_000_000);
Expand All @@ -34,5 +39,39 @@ fn filter_indices(c: &mut Criterion) {
});
}

criterion_group!(benches, filter_indices);
fn filter_indices_struct(c: &mut Criterion) {
let mut group = c.benchmark_group("filter_indices_struct");

let mut rng = thread_rng();
let range = Uniform::new(0i64, 100_000_000);
let arr = (0..10_000_000)
.map(|_| rng.sample(range))
.collect_vec()
.into_array();
let arr2 = (0..10_000_000)
.map(|_| rng.sample(range))
.collect_vec()
.into_array();

let structs = StructArray::try_new(
Arc::new([Arc::from("field_a"), Arc::from("field_b")]),
vec![arr, arr2.clone()],
arr2.len(),
Validity::AllValid,
)
.unwrap()
.into_array();
let predicate = field_comparison(Operator::LessThan, field("field_a"), field("field_b"));

group.bench_function("vortex", |b| {
b.iter(|| {
let indices =
vortex::compute::filter_indices::filter_indices(&structs, &predicate).unwrap();
black_box(indices);
Ok::<(), VortexError>(())
});
});
}

criterion_group!(benches, filter_indices_primitive, filter_indices_struct);
criterion_main!(benches);
17 changes: 5 additions & 12 deletions vortex-array/src/array/bool/compute/compare.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::ops::{BitAnd, BitOr, BitXor, Not};
use std::ops::BitAnd;

use vortex_error::{vortex_err, VortexResult};
use vortex_expr::operators::Operator;

use crate::array::bool::BoolArray;
use crate::array::bool::{apply_comparison_op, BoolArray};
use crate::compute::compare::CompareFn;
use crate::{Array, ArrayTrait, IntoArray};

Expand All @@ -15,21 +15,14 @@ impl CompareFn for BoolArray {
.map_err(|_| vortex_err!("Cannot compare boolean array with non-boolean array"))?;
let lhs = self.boolean_buffer();
let rhs = flattened.boolean_buffer();
let result_buf = match op {
Operator::EqualTo => lhs.bitxor(&rhs).not(),
Operator::NotEqualTo => lhs.bitxor(&rhs),
let comparison_result = apply_comparison_op(lhs, rhs, op);

Operator::GreaterThan => lhs.bitand(&rhs.not()),
Operator::GreaterThanOrEqualTo => lhs.bitor(&rhs.not()),
Operator::LessThan => lhs.not().bitand(&rhs),
Operator::LessThanOrEqualTo => lhs.not().bitor(&rhs),
};
Ok(BoolArray::from(
self.validity()
.to_logical(self.len())
.to_null_buffer()?
.map(|nulls| result_buf.bitand(&nulls.into_inner()))
.unwrap_or(result_buf),
.map(|nulls| comparison_result.bitand(&nulls.into_inner()))
.unwrap_or(comparison_result),
)
.into_array())
}
Expand Down
53 changes: 29 additions & 24 deletions vortex-array/src/array/bool/compute/compare_scalar.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
use std::ops::{BitAnd, BitOr, BitXor, Not};
use std::ops::BitAnd;

use arrow_buffer::BooleanBufferBuilder;
use vortex_dtype::DType;
use vortex_error::{vortex_bail, vortex_err, VortexResult};
use vortex_expr::operators::Operator;
use vortex_scalar::Scalar;

use crate::array::bool::BoolArray;
use crate::array::bool::{apply_comparison_op, BoolArray};
use crate::compute::compare_scalar::CompareScalarFn;
use crate::{Array, ArrayTrait, IntoArray};

impl CompareScalarFn for BoolArray {
fn compare_scalar(&self, op: Operator, scalar: &Scalar) -> VortexResult<Array> {
match scalar.dtype() {
DType::Bool(_) => {}
_ => {
vortex_bail!("Invalid dtype for boolean scalar comparison")
}
if let DType::Bool(_) = scalar.dtype() {
} else {
vortex_bail!("Invalid dtype for boolean scalar comparison")
}
let lhs = self.boolean_buffer();

let lhs = self.boolean_buffer();
let scalar_val = scalar
.value()
.as_bool()?
Expand All @@ -28,22 +26,14 @@ impl CompareScalarFn for BoolArray {
let mut rhs = BooleanBufferBuilder::new(self.len());
rhs.append_n(self.len(), scalar_val);
let rhs = rhs.finish();
let result_buf = match op {
Operator::EqualTo => lhs.bitxor(&rhs).not(),
Operator::NotEqualTo => lhs.bitxor(&rhs),
Operator::GreaterThan => lhs.bitand(&rhs.not()),
Operator::GreaterThanOrEqualTo => lhs.bitor(&rhs.not()),
Operator::LessThan => lhs.not().bitand(&rhs),
Operator::LessThanOrEqualTo => lhs.not().bitor(&rhs),
};

let present = self
.validity()
.to_logical(self.len())
.to_present_null_buffer()?
.into_inner();

Ok(BoolArray::from(result_buf.bitand(&present)).into_array())
let comparison_result = apply_comparison_op(lhs, rhs, op);

let present = self.validity().to_logical(self.len()).to_null_buffer()?;
let with_validity_applied = present
.map(|p| comparison_result.bitand(&p.into_inner()))
.unwrap_or(comparison_result);

Ok(BoolArray::from(with_validity_applied).into_array())
}
}

Expand Down Expand Up @@ -78,6 +68,21 @@ mod test {

let matches = compare_scalar(&arr, Operator::NotEqualTo, &false.into())?.flatten_bool()?;
assert_eq!(to_int_indices(matches), [1u64, 3]);

let matches = compare_scalar(&arr, Operator::GreaterThan, &false.into())?.flatten_bool()?;
assert_eq!(to_int_indices(matches), [1u64, 3]);

let matches =
compare_scalar(&arr, Operator::GreaterThanOrEqualTo, &false.into())?.flatten_bool()?;
assert_eq!(to_int_indices(matches), [1u64, 2, 3]);

let matches = compare_scalar(&arr, Operator::LessThan, &false.into())?.flatten_bool()?;
let empty: [u64; 0] = [];
assert_eq!(to_int_indices(matches), empty);

let matches =
compare_scalar(&arr, Operator::LessThanOrEqualTo, &false.into())?.flatten_bool()?;
assert_eq!(to_int_indices(matches), [2u64]);
Ok(())
}
}
14 changes: 14 additions & 0 deletions vortex-array/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use std::ops::{BitAnd, BitOr, BitXor, Not};

use arrow_buffer::BooleanBuffer;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use vortex_buffer::Buffer;
use vortex_expr::operators::Operator;

use crate::validity::{ArrayValidity, ValidityMetadata};
use crate::validity::{LogicalValidity, Validity};
Expand Down Expand Up @@ -57,6 +60,17 @@ impl BoolArray {
}
}

pub fn apply_comparison_op(lhs: BooleanBuffer, rhs: BooleanBuffer, op: Operator) -> BooleanBuffer {
match op {
Operator::EqualTo => lhs.bitxor(&rhs).not(),
Operator::NotEqualTo => lhs.bitxor(&rhs),
Operator::GreaterThan => lhs.bitand(&rhs.not()),
Operator::GreaterThanOrEqualTo => lhs.bitor(&rhs.not()),
Operator::LessThan => lhs.not().bitand(&rhs),
Operator::LessThanOrEqualTo => lhs.not().bitor(&rhs),
}
}

impl From<BooleanBuffer> for BoolArray {
fn from(value: BooleanBuffer) -> Self {
Self::try_new(value, Validity::NonNullable).unwrap()
Expand Down
21 changes: 13 additions & 8 deletions vortex-array/src/array/primitive/compute/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use crate::compute::compare::CompareFn;
use crate::{Array, ArrayTrait, IntoArray};

impl CompareFn for PrimitiveArray {
// @TODO(@jcasale) take stats into account here, which may allow us to elide some comparison
// work based on sortedness/min/max/etc.
fn compare(&self, other: &Array, predicate: Operator) -> VortexResult<Array> {
let flattened = other
.clone()
Expand All @@ -22,18 +24,21 @@ impl CompareFn for PrimitiveArray {
apply_predicate(self.typed_data::<$T>(), flattened.typed_data::<$T>(), predicate_fn)
});

let present = self
.validity()
.to_logical(self.len())
.to_present_null_buffer()?
.into_inner();
let present = self.validity().to_logical(self.len()).to_null_buffer()?;
let with_validity_applied = present
.map(|p| matching_idxs.bitand(&p.into_inner()))
.unwrap_or(matching_idxs);

let present_other = flattened
.validity()
.to_logical(self.len())
.to_present_null_buffer()?
.into_inner();
.to_null_buffer()?;

let with_other_validity_applied = present_other
.map(|p| with_validity_applied.bitand(&p.into_inner()))
.unwrap_or(with_validity_applied);

Ok(BoolArray::from(matching_idxs.bitand(&present).bitand(&present_other)).into_array())
Ok(BoolArray::from(with_other_validity_applied).into_array())
}
}

Expand Down
27 changes: 15 additions & 12 deletions vortex-array/src/array/primitive/compute/compare_scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@ use vortex_scalar::Scalar;
use crate::array::bool::BoolArray;
use crate::array::primitive::PrimitiveArray;
use crate::compute::compare_scalar::CompareScalarFn;
use crate::{Array, ArrayDType, ArrayTrait, IntoArray};
use crate::{Array, ArrayTrait, IntoArray};

impl CompareScalarFn for PrimitiveArray {
// @TODO(@jcasale) take stats into account here, which may allow us to elide some comparison
// work based on sortedness/min/max/etc.
fn compare_scalar(&self, op: Operator, scalar: &Scalar) -> VortexResult<Array> {
match self.dtype() {
DType::Primitive(..) => {}
_ => {
vortex_bail!("Invalid scalar dtype for primitive comparison")
}
if let DType::Primitive(..) = scalar.dtype() {
} else {
vortex_bail!("Invalid scalar dtype for boolean scalar comparison")
}

let p_val = scalar
.value()
.as_pvalue()?
Expand All @@ -29,13 +30,12 @@ impl CompareScalarFn for PrimitiveArray {
apply_predicate(self.typed_data::<$T>(), &rhs, predicate_fn)
});

let present = self
.validity()
.to_logical(self.len())
.to_present_null_buffer()?
.into_inner();
let present = self.validity().to_logical(self.len()).to_null_buffer()?;
let with_validity_applied = present
.map(|p| matching_idxs.bitand(&p.into_inner()))
.unwrap_or(matching_idxs);

Ok(BoolArray::from(matching_idxs.bitand(&present)).into_array())
Ok(BoolArray::from(with_validity_applied).into_array())
}
}

Expand Down Expand Up @@ -86,6 +86,9 @@ mod test {
let matches = compare_scalar(&arr, Operator::EqualTo, &5.into())?.flatten_bool()?;
assert_eq!(to_int_indices(matches), [5u64]);

let matches = compare_scalar(&arr, Operator::NotEqualTo, &5.into())?.flatten_bool()?;
assert_eq!(to_int_indices(matches), [0u64, 1, 2, 3, 6, 7, 8, 10]);

let matches = compare_scalar(&arr, Operator::EqualTo, &11.into())?.flatten_bool()?;
let empty: [u64; 0] = [];
assert_eq!(to_int_indices(matches), empty);
Expand Down
Loading

0 comments on commit 5fc96c1

Please sign in to comment.