From 91487e594393a835ce6dd18f20a87ea6f3e2d1a5 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Fri, 14 Jun 2024 12:40:53 +0300 Subject: [PATCH] Initial work on ByteBoolArray --- .../src/array/byte_bool/compute/mod.rs | 4 + vortex-array/src/array/byte_bool/mod.rs | 158 ++++++++++++++++++ vortex-array/src/array/byte_bool/stats.rs | 27 +++ vortex-array/src/array/mod.rs | 1 + 4 files changed, 190 insertions(+) create mode 100644 vortex-array/src/array/byte_bool/compute/mod.rs create mode 100644 vortex-array/src/array/byte_bool/mod.rs create mode 100644 vortex-array/src/array/byte_bool/stats.rs diff --git a/vortex-array/src/array/byte_bool/compute/mod.rs b/vortex-array/src/array/byte_bool/compute/mod.rs new file mode 100644 index 0000000000..ab20e30a32 --- /dev/null +++ b/vortex-array/src/array/byte_bool/compute/mod.rs @@ -0,0 +1,4 @@ +use super::ByteBoolArray; +use crate::compute::ArrayCompute; + +impl ArrayCompute for ByteBoolArray {} diff --git a/vortex-array/src/array/byte_bool/mod.rs b/vortex-array/src/array/byte_bool/mod.rs new file mode 100644 index 0000000000..e2f503f2d1 --- /dev/null +++ b/vortex-array/src/array/byte_bool/mod.rs @@ -0,0 +1,158 @@ +use std::mem::ManuallyDrop; + +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use vortex_buffer::Buffer; +use vortex_dtype::Nullability; + +use crate::{ + impl_encoding, + validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}, + visitor::{AcceptArrayVisitor, ArrayVisitor}, + ArrayFlatten, +}; + +mod compute; +mod stats; + +impl_encoding!("vortex.byte_bool", ByteBool); + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ByteBoolMetadata { + validity: ValidityMetadata, + length: usize, +} + +impl ByteBoolArray { + pub fn validity(&self) -> Validity { + self.metadata() + .validity + .to_validity(self.array().child(0, &Validity::DTYPE)) + } + + pub fn buffer(&self) -> &Buffer { + self.array().buffer().expect("missing mandatory buffer") + } +} + +impl From> for ByteBoolArray { + fn from(value: Vec) -> Self { + let mut value = ManuallyDrop::new(value); + let ptr = value.as_mut_ptr() as *mut u8; + let length = value.len(); + let capacity = value.capacity(); + + let bytes_vec = unsafe { Vec::from_raw_parts(ptr, length, capacity) }; + + let buffer = Buffer::from(bytes_vec); + let typed = TypedArray::try_from_parts( + DType::Bool(Nullability::NonNullable), + ByteBoolMetadata { + validity: ValidityMetadata::NonNullable, + length, + }, + Some(buffer), + Validity::NonNullable + .into_array_data() + .into_iter() + .collect_vec() + .into(), + StatsSet::new(), + ) + .unwrap(); + + typed.into() + } +} + +impl From>> for ByteBoolArray { + fn from(value: Vec>) -> Self { + let mut value = ManuallyDrop::new(value); + let ptr = value.as_mut_ptr() as *mut u8; + let length = value.len(); + let capacity = value.capacity(); + + let validity = Validity::from_iter(value.iter()); + + // SAFETY: `Option` is the same as `bool`, so as long as we keep the validity data the data is still valid. + // If we ever want to turn this Array back to a Vec, we might have to do some work + let bytes_vec = unsafe { Vec::from_raw_parts(ptr, length, capacity) }; + + let buffer = Buffer::from(bytes_vec); + let typed = TypedArray::try_from_parts( + DType::Bool(Nullability::Nullable), + ByteBoolMetadata { + validity: validity.to_metadata(length).unwrap(), + length, + }, + Some(buffer), + validity.into_array_data().into_iter().collect_vec().into(), + StatsSet::new(), + ) + .unwrap(); + + typed.into() + } +} + +impl ArrayTrait for ByteBoolArray { + fn len(&self) -> usize { + self.metadata().length + } +} + +impl ArrayFlatten for ByteBoolArray { + fn flatten(self) -> VortexResult { + todo!() + // Err(VortexError::NotImplemented((), (), ())) + } +} + +impl ArrayValidity for ByteBoolArray { + fn is_valid(&self, index: usize) -> bool { + self.validity().is_valid(index) + } + + fn logical_validity(&self) -> LogicalValidity { + self.validity().to_logical(self.len()) + } +} + +impl AcceptArrayVisitor for ByteBoolArray { + fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { + visitor.visit_buffer(self.buffer())?; + visitor.visit_validity(&self.validity()) + } +} + +impl EncodingCompression for ByteBoolEncoding {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validity_construction() { + let v = vec![true, false]; + + let arr = ByteBoolArray::from(v); + for idx in 0..arr.len() { + assert!(arr.is_valid(idx)); + } + + let v = vec![Some(true), None, Some(false)]; + let arr = ByteBoolArray::from(v); + assert!(arr.is_valid(0)); + assert!(!arr.is_valid(1)); + assert!(arr.is_valid(2)); + assert_eq!(arr.len(), 3); + + let v: Vec> = vec![None, None]; + + let arr = ByteBoolArray::from(v); + for idx in 0..arr.len() { + assert!(!arr.is_valid(idx)); + } + assert_eq!(arr.len(), 2); + } +} diff --git a/vortex-array/src/array/byte_bool/stats.rs b/vortex-array/src/array/byte_bool/stats.rs new file mode 100644 index 0000000000..28cd02938b --- /dev/null +++ b/vortex-array/src/array/byte_bool/stats.rs @@ -0,0 +1,27 @@ +use vortex_error::VortexResult; + +use super::ByteBoolArray; +use crate::{ + stats::{ArrayStatisticsCompute, Stat, StatsSet}, + // validity::{ArrayValidity, LogicalValidity}, + ArrayTrait, +}; + +impl ArrayStatisticsCompute for ByteBoolArray { + fn compute_statistics(&self, _stat: Stat) -> VortexResult { + if self.is_empty() { + return Ok(StatsSet::new()); + } + + // match self.logical_validity() { + // LogicalValidity::AllValid(_) => self.boolean_buffer().compute_statistics(stat), + // LogicalValidity::AllInvalid(v) => all_null_stats(v), + // LogicalValidity::Array(a) => NullableBools( + // &self.boolean_buffer(), + // &a.into_array().flatten_bool()?.boolean_buffer(), + // ) + // .compute_statistics(stat), + // } + todo!() + } +} diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index f42cbf28b1..90f61f24bf 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -1,4 +1,5 @@ pub mod bool; +pub mod byte_bool; pub mod chunked; pub mod constant; pub mod datetime;