From e43ec57455771852e15d749e97a90e63e05c9131 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 24 Jun 2024 23:23:19 +0300 Subject: [PATCH] Add `ByteBoolArray` type and fixe a bug in `BoolArray` (#383) The main two contributions in this PR are: 1. A new `ByteBoolArray` type (closes #335) 2. An important fix for slicing and initialization of `BoolArray` --- Cargo.lock | 18 ++ encodings/byte_bool/Cargo.toml | 31 +++ encodings/byte_bool/src/compute/mod.rs | 265 +++++++++++++++++++ encodings/byte_bool/src/lib.rs | 158 +++++++++++ encodings/byte_bool/src/stats.rs | 102 +++++++ vortex-array/src/array/bool/compute/slice.rs | 29 ++ vortex-array/src/array/bool/mod.rs | 44 ++- vortex-array/src/compute/compare.rs | 6 +- vortex-array/src/validity.rs | 2 +- vortex-scalar/src/lib.rs | 4 + 10 files changed, 651 insertions(+), 8 deletions(-) create mode 100644 encodings/byte_bool/Cargo.toml create mode 100644 encodings/byte_bool/src/compute/mod.rs create mode 100644 encodings/byte_bool/src/lib.rs create mode 100644 encodings/byte_bool/src/stats.rs diff --git a/Cargo.lock b/Cargo.lock index 09e1105b3b..32297e9ff6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3816,6 +3816,24 @@ dependencies = [ "flexbuffers", ] +[[package]] +name = "vortex-bytebool" +version = "0.1.0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "divan", + "itertools 0.13.0", + "num-traits", + "serde", + "vortex-array", + "vortex-buffer", + "vortex-dtype", + "vortex-error", + "vortex-expr", + "vortex-scalar", +] + [[package]] name = "vortex-datafusion" version = "0.1.0" diff --git a/encodings/byte_bool/Cargo.toml b/encodings/byte_bool/Cargo.toml new file mode 100644 index 0000000000..9f610ddc3d --- /dev/null +++ b/encodings/byte_bool/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "vortex-bytebool" +version = { workspace = true } +description = "Vortex byte-boolean array" +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +include = { workspace = true } +edition = { workspace = true } +rust-version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +itertools = { workspace = true } +num-traits = { workspace = true } +serde = { workspace = true, features = ["derive"] } +vortex-array = { path = "../../vortex-array" } +vortex-buffer = { path = "../../vortex-buffer" } +vortex-error = { path = "../../vortex-error" } +vortex-dtype = { path = "../../vortex-dtype" } +vortex-scalar = { path = "../../vortex-scalar" } +vortex-expr = { path = "../../vortex-expr" } + +[dev-dependencies] +divan = { workspace = true } diff --git a/encodings/byte_bool/src/compute/mod.rs b/encodings/byte_bool/src/compute/mod.rs new file mode 100644 index 0000000000..eb0e273f87 --- /dev/null +++ b/encodings/byte_bool/src/compute/mod.rs @@ -0,0 +1,265 @@ +use std::ops::{BitAnd, BitOr, BitXor, Not}; +use std::sync::Arc; + +use arrow_buffer::BooleanBuffer; +use num_traits::AsPrimitive; +use vortex::validity::Validity; +use vortex::ToArrayData; +use vortex::{ + compute::{ + compare::CompareFn, slice::SliceFn, take::TakeFn, unary::fill_forward::FillForwardFn, + unary::scalar_at::ScalarAtFn, ArrayCompute, + }, + encoding::ArrayEncodingRef, + stats::StatsSet, + validity::ArrayValidity, + ArrayDType, ArrayData, ArrayTrait, IntoArray, +}; +use vortex::{Array, IntoCanonical}; +use vortex_dtype::{match_each_integer_ptype, Nullability}; +use vortex_error::{vortex_bail, VortexResult}; +use vortex_expr::Operator; +use vortex_scalar::{Scalar, ScalarValue}; + +use super::{ByteBoolArray, ByteBoolMetadata}; + +impl ArrayCompute for ByteBoolArray { + fn compare(&self) -> Option<&dyn CompareFn> { + Some(self) + } + + fn fill_forward(&self) -> Option<&dyn FillForwardFn> { + None + } + + fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { + Some(self) + } + + fn slice(&self) -> Option<&dyn SliceFn> { + Some(self) + } + + fn take(&self) -> Option<&dyn TakeFn> { + Some(self) + } +} + +impl ScalarAtFn for ByteBoolArray { + fn scalar_at(&self, index: usize) -> VortexResult { + if index >= self.len() { + vortex_bail!(OutOfBounds: index, 0, self.len()); + } + + let scalar = match self.is_valid(index).then(|| self.buffer()[index] == 1) { + Some(b) => Scalar::new(self.dtype().clone(), ScalarValue::Bool(b)), + None => Scalar::null(self.dtype().clone()), + }; + + Ok(scalar) + } +} + +impl SliceFn for ByteBoolArray { + fn slice(&self, start: usize, stop: usize) -> VortexResult { + let length = stop - start; + + let validity = self.validity().slice(start, stop)?; + + let slice_metadata = Arc::new(ByteBoolMetadata { + validity: validity.to_metadata(length)?, + }); + + ArrayData::try_new( + self.encoding(), + self.dtype().clone(), + slice_metadata, + Some(self.buffer().slice(start..stop)), + validity.into_array().into_iter().collect::>().into(), + StatsSet::new(), + ) + .map(Array::Data) + } +} + +impl TakeFn for ByteBoolArray { + fn take(&self, indices: &Array) -> VortexResult { + let validity = self.validity(); + let indices = indices.clone().as_primitive(); + let bools = self.maybe_null_slice(); + + let arr = match validity { + Validity::AllValid | Validity::NonNullable => { + let bools = match_each_integer_ptype!(indices.ptype(), |$I| { + indices.maybe_null_slice::<$I>() + .iter() + .map(|&idx| { + let idx: usize = idx.as_(); + bools[idx] + }) + .collect::>() + }); + + Self::from(bools).into_array() + } + Validity::AllInvalid => Self::from(vec![None; indices.len()]).into_array(), + + Validity::Array(_) => { + let bools = match_each_integer_ptype!(indices.ptype(), |$I| { + indices.maybe_null_slice::<$I>() + .iter() + .map(|&idx| { + let idx = idx.as_(); + if validity.is_valid(idx) { + Some(bools[idx]) + } else { + None + } + }) + .collect::>>() + }); + + Self::from(bools).into_array() + } + }; + + Ok(arr) + } +} + +impl CompareFn for ByteBoolArray { + fn compare(&self, other: &Array, op: Operator) -> VortexResult { + let canonical = other.clone().into_canonical()?.into_bool()?; + let lhs = BooleanBuffer::from(self.maybe_null_slice()); + let rhs = canonical.boolean_buffer(); + + let result_buf = match op { + Operator::Eq => lhs.bitxor(&rhs).not(), + Operator::NotEq => lhs.bitxor(&rhs), + + Operator::Gt => lhs.bitand(&rhs.not()), + Operator::Gte => lhs.bitor(&rhs.not()), + Operator::Lt => lhs.not().bitand(&rhs), + Operator::Lte => lhs.not().bitor(&rhs), + }; + + let mut validity = Vec::with_capacity(self.len()); + + let lhs_validity = self.validity(); + let rhs_validity = canonical.validity(); + + for idx in 0..self.len() { + let l = lhs_validity.is_valid(idx); + let r = rhs_validity.is_valid(idx); + validity.push(l & r); + } + + ByteBoolArray::try_from_vec(Vec::from_iter(result_buf.iter()), validity) + .map(ByteBoolArray::into_array) + } +} + +impl FillForwardFn for ByteBoolArray { + fn fill_forward(&self) -> VortexResult { + if self.dtype().nullability() == Nullability::NonNullable { + return Ok(self.to_array_data().into_array()); + } + + let validity = self.logical_validity().to_null_buffer()?.unwrap(); + let bools = self.maybe_null_slice(); + let mut last_value = bool::default(); + + let filled = bools + .iter() + .zip(validity.inner().iter()) + .map(|(&v, is_valid)| { + if is_valid { + last_value = v + } + + last_value + }) + .collect::>(); + + Ok(Self::from(filled).into_array()) + } +} + +#[cfg(test)] +mod tests { + use vortex::{ + compute::{compare::compare, slice::slice, unary::scalar_at::scalar_at}, + AsArray as _, + }; + + use super::*; + + #[test] + fn test_slice() { + let original = vec![Some(true), Some(true), None, Some(false), None]; + let vortex_arr = ByteBoolArray::from(original.clone()); + + let sliced_arr = slice(vortex_arr.as_array_ref(), 1, 4).unwrap(); + let sliced_arr = ByteBoolArray::try_from(sliced_arr).unwrap(); + + let s = scalar_at(sliced_arr.as_array_ref(), 0).unwrap(); + assert_eq!(s.into_value().as_bool().unwrap(), Some(true)); + + let s = scalar_at(sliced_arr.as_array_ref(), 1).unwrap(); + assert!(!sliced_arr.is_valid(1)); + assert!(s.is_null()); + assert_eq!(s.into_value().as_bool().unwrap(), None); + + let s = scalar_at(sliced_arr.as_array_ref(), 2).unwrap(); + assert_eq!(s.into_value().as_bool().unwrap(), Some(false)); + } + + #[test] + fn test_compare_all_equal() { + let lhs = ByteBoolArray::from(vec![true; 5]); + let rhs = ByteBoolArray::from(vec![true; 5]); + + let arr = compare(lhs.as_array_ref(), rhs.as_array_ref(), Operator::Eq).unwrap(); + + for i in 0..arr.len() { + let s = scalar_at(arr.as_array_ref(), i).unwrap(); + assert!(s.is_valid()); + assert_eq!(s.value(), &ScalarValue::Bool(true)); + } + } + + #[test] + fn test_compare_all_different() { + let lhs = ByteBoolArray::from(vec![false; 5]); + let rhs = ByteBoolArray::from(vec![true; 5]); + + let arr = compare(lhs.as_array_ref(), rhs.as_array_ref(), Operator::Eq).unwrap(); + + for i in 0..arr.len() { + let s = scalar_at(&arr, i).unwrap(); + assert!(s.is_valid()); + assert_eq!(s.value(), &ScalarValue::Bool(false)); + } + } + + #[test] + fn test_compare_with_nulls() { + let lhs = ByteBoolArray::from(vec![true; 5]); + let rhs = ByteBoolArray::from(vec![Some(true), Some(true), Some(true), Some(false), None]); + + let arr = compare(lhs.as_array_ref(), rhs.as_array_ref(), Operator::Eq).unwrap(); + + for i in 0..3 { + let s = scalar_at(&arr, i).unwrap(); + assert!(s.is_valid()); + assert_eq!(s.value(), &ScalarValue::Bool(true)); + } + + let s = scalar_at(&arr, 3).unwrap(); + assert!(s.is_valid()); + assert_eq!(s.value(), &ScalarValue::Bool(false)); + + let s = scalar_at(&arr, 4).unwrap(); + assert!(s.is_null()); + } +} diff --git a/encodings/byte_bool/src/lib.rs b/encodings/byte_bool/src/lib.rs new file mode 100644 index 0000000000..4aac1433e9 --- /dev/null +++ b/encodings/byte_bool/src/lib.rs @@ -0,0 +1,158 @@ +use std::mem::ManuallyDrop; + +use arrow_buffer::BooleanBuffer; +use serde::{Deserialize, Serialize}; +use vortex::array::bool::BoolArray; +use vortex::{ + impl_encoding, + validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}, + visitor::{AcceptArrayVisitor, ArrayVisitor}, +}; +use vortex::{Canonical, IntoCanonical}; +use vortex_buffer::Buffer; + +mod compute; +mod stats; + +impl_encoding!("vortex.byte_bool", ByteBool); + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ByteBoolMetadata { + validity: ValidityMetadata, +} + +impl ByteBoolArray { + pub fn validity(&self) -> Validity { + self.metadata() + .validity + .to_validity(self.array().child(0, &Validity::DTYPE)) + } + + pub fn try_new(buffer: Buffer, validity: Validity) -> VortexResult { + let length = buffer.len(); + + let typed = TypedArray::try_from_parts( + DType::Bool(validity.nullability()), + ByteBoolMetadata { + validity: validity.to_metadata(length)?, + }, + Some(buffer), + validity.into_array().into_iter().collect::>().into(), + StatsSet::new(), + )?; + + Ok(typed.into()) + } + + pub fn try_from_vec>(data: Vec, validity: V) -> VortexResult { + let validity = validity.into(); + let mut vec = ManuallyDrop::new(data); + vec.shrink_to_fit(); + + let ptr = vec.as_mut_ptr() as *mut u8; + let length = vec.len(); + let capacity = vec.capacity(); + + let bytes = unsafe { Vec::from_raw_parts(ptr, length, capacity) }; + + let buffer = Buffer::from(bytes); + + Self::try_new(buffer, validity) + } + + pub fn buffer(&self) -> &Buffer { + self.array().buffer().expect("missing mandatory buffer") + } + + fn maybe_null_slice(&self) -> &[bool] { + // Safety: The internal buffer contains byte-sized bools + unsafe { std::mem::transmute(self.buffer().as_slice()) } + } +} + +impl From> for ByteBoolArray { + fn from(value: Vec) -> Self { + Self::try_from_vec(value, Validity::AllValid).unwrap() + } +} + +impl From>> for ByteBoolArray { + fn from(value: Vec>) -> Self { + let validity = Validity::from_iter(value.iter()); + + // This doesn't reallocate, and the compiler even vectorizes it + let data = value.into_iter().map(|b| b.unwrap_or_default()).collect(); + + Self::try_from_vec(data, validity).unwrap() + } +} + +impl ArrayTrait for ByteBoolArray { + fn len(&self) -> usize { + self.buffer().len() + } +} + +impl IntoCanonical for ByteBoolArray { + fn into_canonical(self) -> VortexResult { + let boolean_buffer = BooleanBuffer::from(self.maybe_null_slice()); + let validity = self.validity(); + + BoolArray::try_new(boolean_buffer, validity).map(Canonical::Bool) + } +} + +impl ArrayValidity for ByteBoolArray { + fn is_valid(&self, index: usize) -> bool { + self.validity().is_valid(index) + } + + fn logical_validity(&self) -> LogicalValidity { + self.validity().to_logical(self.len()) + } +} + +impl AcceptArrayVisitor for ByteBoolArray { + fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { + visitor.visit_buffer(self.buffer())?; + visitor.visit_validity(&self.validity()) + } +} + +impl EncodingCompression for ByteBoolEncoding {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validity_construction() { + let v = vec![true, false]; + let v_len = v.len(); + + let arr = ByteBoolArray::from(v); + assert_eq!(v_len, arr.len()); + + for idx in 0..arr.len() { + assert!(arr.is_valid(idx)); + } + + let v = vec![Some(true), None, Some(false)]; + let arr = ByteBoolArray::from(v); + assert!(arr.is_valid(0)); + assert!(!arr.is_valid(1)); + assert!(arr.is_valid(2)); + assert_eq!(arr.len(), 3); + + let v: Vec> = vec![None, None]; + let v_len = v.len(); + + let arr = ByteBoolArray::from(v); + assert_eq!(v_len, arr.len()); + + for idx in 0..arr.len() { + assert!(!arr.is_valid(idx)); + } + assert_eq!(arr.len(), 2); + } +} diff --git a/encodings/byte_bool/src/stats.rs b/encodings/byte_bool/src/stats.rs new file mode 100644 index 0000000000..e7124dc0ad --- /dev/null +++ b/encodings/byte_bool/src/stats.rs @@ -0,0 +1,102 @@ +use vortex::{ + stats::{ArrayStatisticsCompute, Stat, StatsSet}, + ArrayTrait, AsArray, IntoCanonical, +}; +use vortex_error::VortexResult; + +use super::ByteBoolArray; + +impl ArrayStatisticsCompute for ByteBoolArray { + fn compute_statistics(&self, stat: Stat) -> VortexResult { + if self.is_empty() { + return Ok(StatsSet::new()); + } + + // TODO(adamgs): This is slightly wasteful and could be optimized in the future + let bools = self.as_array_ref().clone().into_canonical()?.into_bool()?; + bools.compute_statistics(stat) + } +} + +#[cfg(test)] +mod tests { + use vortex::stats::ArrayStatistics; + use vortex_dtype::{DType, Nullability}; + use vortex_scalar::Scalar; + + use super::*; + + #[test] + fn bool_stats() { + let bool_arr = + ByteBoolArray::from(vec![false, false, true, true, false, true, true, false]); + assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap()); + assert!(!bool_arr.statistics().compute_is_sorted().unwrap()); + assert!(!bool_arr.statistics().compute_is_constant().unwrap()); + assert!(!bool_arr.statistics().compute_min::().unwrap()); + assert!(bool_arr.statistics().compute_max::().unwrap()); + assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 5); + assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 4); + } + + #[test] + fn strict_sorted() { + let bool_arr_1 = ByteBoolArray::from(vec![false, true]); + assert!(bool_arr_1.statistics().compute_is_strict_sorted().unwrap()); + assert!(bool_arr_1.statistics().compute_is_sorted().unwrap()); + + let bool_arr_2 = ByteBoolArray::from(vec![true]); + assert!(bool_arr_2.statistics().compute_is_strict_sorted().unwrap()); + assert!(bool_arr_2.statistics().compute_is_sorted().unwrap()); + + let bool_arr_3 = ByteBoolArray::from(vec![false]); + assert!(bool_arr_3.statistics().compute_is_strict_sorted().unwrap()); + assert!(bool_arr_3.statistics().compute_is_sorted().unwrap()); + + let bool_arr_4 = ByteBoolArray::from(vec![true, false]); + assert!(!bool_arr_4.statistics().compute_is_strict_sorted().unwrap()); + assert!(!bool_arr_4.statistics().compute_is_sorted().unwrap()); + + let bool_arr_5 = ByteBoolArray::from(vec![false, true, true]); + assert!(!bool_arr_5.statistics().compute_is_strict_sorted().unwrap()); + assert!(bool_arr_5.statistics().compute_is_sorted().unwrap()); + } + + #[test] + fn nullable_stats() { + let bool_arr = ByteBoolArray::from(vec![ + Some(false), + Some(true), + None, + Some(true), + Some(false), + None, + None, + ]); + assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap()); + assert!(!bool_arr.statistics().compute_is_sorted().unwrap()); + assert!(!bool_arr.statistics().compute_is_constant().unwrap()); + assert!(!bool_arr.statistics().compute_min::().unwrap()); + assert!(bool_arr.statistics().compute_max::().unwrap()); + assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 3); + assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 2); + } + + #[test] + fn all_nullable_stats() { + let bool_arr = ByteBoolArray::from(vec![None, None, None, None, None]); + assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap()); + assert!(bool_arr.statistics().compute_is_sorted().unwrap()); + assert!(bool_arr.statistics().compute_is_constant().unwrap()); + assert_eq!( + bool_arr.statistics().compute(Stat::Min).unwrap(), + Scalar::null(DType::Bool(Nullability::Nullable)) + ); + assert_eq!( + bool_arr.statistics().compute(Stat::Max).unwrap(), + Scalar::null(DType::Bool(Nullability::Nullable)) + ); + assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 1); + assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 0); + } +} diff --git a/vortex-array/src/array/bool/compute/slice.rs b/vortex-array/src/array/bool/compute/slice.rs index ce3e94f0d5..6939c41a13 100644 --- a/vortex-array/src/array/bool/compute/slice.rs +++ b/vortex-array/src/array/bool/compute/slice.rs @@ -13,3 +13,32 @@ impl SliceFn for BoolArray { .map(|a| a.into_array()) } } + +#[cfg(test)] +mod tests { + + use super::*; + use crate::compute::slice::slice; + use crate::validity::ArrayValidity; + use crate::ArrayTrait; + use crate::{compute::unary::scalar_at::scalar_at, AsArray}; + + #[test] + fn test_slice() { + let arr = BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]); + let sliced_arr = slice(arr.as_array_ref(), 1, 4).unwrap(); + let sliced_arr = BoolArray::try_from(sliced_arr).unwrap(); + + assert_eq!(sliced_arr.len(), 3); + + let s = scalar_at(sliced_arr.as_array_ref(), 0).unwrap(); + assert_eq!(s.into_value().as_bool().unwrap(), Some(true)); + + let s = scalar_at(sliced_arr.as_array_ref(), 1).unwrap(); + assert!(!sliced_arr.is_valid(1)); + assert!(s.is_null()); + + let s = scalar_at(sliced_arr.as_array_ref(), 2).unwrap(); + assert_eq!(s.into_value().as_bool().unwrap(), Some(false)); + } +} diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 79246f9f0e..7bbfbae119 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -18,6 +18,7 @@ impl_encoding!("vortex.bool", Bool); pub struct BoolMetadata { validity: ValidityMetadata, length: usize, + bit_offset: usize, } impl BoolArray { @@ -26,7 +27,11 @@ impl BoolArray { } pub fn boolean_buffer(&self) -> BooleanBuffer { - BooleanBuffer::new(self.buffer().clone().into(), 0, self.len()) + BooleanBuffer::new( + self.buffer().clone().into(), + self.metadata().bit_offset, + self.len(), + ) } pub fn validity(&self) -> Validity { @@ -38,14 +43,24 @@ impl BoolArray { impl BoolArray { pub fn try_new(buffer: BooleanBuffer, validity: Validity) -> VortexResult { + let buffer_len = buffer.len(); + let buffer_offset = buffer.offset(); + let last_byte_bit_offset = buffer_offset % 8; + let buffer_byte_offset = buffer_offset - last_byte_bit_offset; + + let inner = buffer + .into_inner() + .bit_slice(buffer_byte_offset, buffer_len); + Ok(Self { typed: TypedArray::try_from_parts( DType::Bool(validity.nullability()), BoolMetadata { - validity: validity.to_metadata(buffer.len())?, - length: buffer.len(), + validity: validity.to_metadata(buffer_len)?, + length: buffer_len, + bit_offset: last_byte_bit_offset, }, - Some(Buffer::from(buffer.into_inner())), + Some(Buffer::from(inner)), validity.into_array().into_iter().collect_vec().into(), StatsSet::new(), )?, @@ -130,4 +145,25 @@ mod tests { let scalar = bool::try_from(&scalar_at(&arr, 0).unwrap()).unwrap(); assert!(scalar); } + + #[test] + fn test_bool_from_iter() { + let arr = + BoolArray::from_iter([Some(true), Some(true), None, Some(false), None]).into_array(); + + let scalar = bool::try_from(&scalar_at(&arr, 0).unwrap()).unwrap(); + assert!(scalar); + + let scalar = bool::try_from(&scalar_at(&arr, 1).unwrap()).unwrap(); + assert!(scalar); + + let scalar = scalar_at(&arr, 2).unwrap(); + assert!(scalar.is_null()); + + let scalar = bool::try_from(&scalar_at(&arr, 3).unwrap()).unwrap(); + assert!(!scalar); + + let scalar = scalar_at(&arr, 4).unwrap(); + assert!(scalar.is_null()); + } } diff --git a/vortex-array/src/compute/compare.rs b/vortex-array/src/compute/compare.rs index 4231cca34f..80459f887d 100644 --- a/vortex-array/src/compute/compare.rs +++ b/vortex-array/src/compute/compare.rs @@ -8,9 +8,9 @@ pub trait CompareFn { fn compare(&self, array: &Array, predicate: Operator) -> VortexResult; } -pub fn compare(left: &Array, right: &Array, predicate: Operator) -> VortexResult { +pub fn compare(left: &Array, right: &Array, operator: Operator) -> VortexResult { if let Some(matching_indices) = - left.with_dyn(|lhs| lhs.compare().map(|rhs| rhs.compare(right, predicate))) + left.with_dyn(|lhs| lhs.compare().map(|rhs| rhs.compare(right, operator))) { return matching_indices; } @@ -20,7 +20,7 @@ pub fn compare(left: &Array, right: &Array, predicate: Operator) -> VortexResult match left.dtype() { DType::Primitive(..) => { let flat = left.clone().into_primitive()?; - flat.compare(right, predicate) + flat.compare(right, operator) } _ => Err(vortex_err!( NotImplemented: "compare", diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index a4353d8af0..09c82584a1 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -169,7 +169,7 @@ impl From> for Validity { } else if !bools.iter().any(|b| *b) { Self::AllInvalid } else { - Self::Array(BoolArray::from_vec(bools, Self::NonNullable).into_array()) + Self::Array(BoolArray::from(bools).into_array()) } } } diff --git a/vortex-scalar/src/lib.rs b/vortex-scalar/src/lib.rs index e628c098de..1612839a72 100644 --- a/vortex-scalar/src/lib.rs +++ b/vortex-scalar/src/lib.rs @@ -83,6 +83,10 @@ impl Scalar { self.value } + pub fn is_valid(&self) -> bool { + !self.value.is_null() + } + pub fn is_null(&self) -> bool { self.value.is_null() }