diff --git a/encodings/alp/src/array.rs b/encodings/alp/src/array.rs index 95fc44f5be..c62ba337bf 100644 --- a/encodings/alp/src/array.rs +++ b/encodings/alp/src/array.rs @@ -17,6 +17,7 @@ pub struct ALPMetadata { exponents: Exponents, encoded_dtype: DType, patches_dtype: Option, + patches_len: usize, } impl ALPArray { @@ -32,7 +33,10 @@ impl ALPArray { d => vortex_bail!(MismatchedTypes: "int32 or int64", d), }; + let length = encoded.len(); + let patches_dtype = patches.as_ref().map(|a| a.dtype().as_nullable()); + let patches_len = patches.as_ref().map(|a| a.len()).unwrap_or(0); let mut children = Vec::with_capacity(2); children.push(encoded); if let Some(patch) = patches { @@ -41,10 +45,12 @@ impl ALPArray { Self::try_from_parts( dtype, + length, ALPMetadata { exponents, encoded_dtype, patches_dtype, + patches_len, }, children.into(), Default::default(), @@ -61,7 +67,7 @@ impl ALPArray { pub fn encoded(&self) -> Array { self.array() - .child(0, &self.metadata().encoded_dtype) + .child(0, &self.metadata().encoded_dtype, self.len()) .expect("Missing encoded array") } @@ -73,7 +79,7 @@ impl ALPArray { pub fn patches(&self) -> Option { self.metadata().patches_dtype.as_ref().map(|dt| { self.array() - .child(1, dt) + .child(1, dt, self.metadata().patches_len) .expect("Missing patches with present metadata flag") }) } @@ -84,6 +90,8 @@ impl ALPArray { } } +impl ArrayTrait for ALPArray {} + impl ArrayValidity for ALPArray { fn is_valid(&self, index: usize) -> bool { self.encoded().with_dyn(|a| a.is_valid(index)) @@ -114,9 +122,3 @@ impl AcceptArrayVisitor for ALPArray { } impl ArrayStatisticsCompute for ALPArray {} - -impl ArrayTrait for ALPArray { - fn len(&self) -> usize { - self.encoded().len() - } -} diff --git a/encodings/byte_bool/src/compute/mod.rs b/encodings/byte_bool/src/compute/mod.rs index eb0e273f87..550c151c4c 100644 --- a/encodings/byte_bool/src/compute/mod.rs +++ b/encodings/byte_bool/src/compute/mod.rs @@ -13,7 +13,7 @@ use vortex::{ encoding::ArrayEncodingRef, stats::StatsSet, validity::ArrayValidity, - ArrayDType, ArrayData, ArrayTrait, IntoArray, + ArrayDType, ArrayData, IntoArray, }; use vortex::{Array, IntoCanonical}; use vortex_dtype::{match_each_integer_ptype, Nullability}; @@ -73,6 +73,7 @@ impl SliceFn for ByteBoolArray { ArrayData::try_new( self.encoding(), self.dtype().clone(), + length, slice_metadata, Some(self.buffer().slice(start..stop)), validity.into_array().into_iter().collect::>().into(), diff --git a/encodings/byte_bool/src/lib.rs b/encodings/byte_bool/src/lib.rs index ce20fa86a0..4eb3d6a7be 100644 --- a/encodings/byte_bool/src/lib.rs +++ b/encodings/byte_bool/src/lib.rs @@ -25,7 +25,7 @@ impl ByteBoolArray { pub fn validity(&self) -> Validity { self.metadata() .validity - .to_validity(self.array().child(0, &Validity::DTYPE)) + .to_validity(self.array().child(0, &Validity::DTYPE, self.len())) } pub fn try_new(buffer: Buffer, validity: Validity) -> VortexResult { @@ -33,6 +33,7 @@ impl ByteBoolArray { let typed = TypedArray::try_from_parts( DType::Bool(validity.nullability()), + length, ByteBoolMetadata { validity: validity.to_metadata(length)?, }, @@ -70,6 +71,8 @@ impl ByteBoolArray { } } +impl ArrayTrait for ByteBoolArray {} + impl From> for ByteBoolArray { fn from(value: Vec) -> Self { Self::try_from_vec(value, Validity::AllValid).unwrap() @@ -87,12 +90,6 @@ impl From>> for ByteBoolArray { } } -impl ArrayTrait for ByteBoolArray { - fn len(&self) -> usize { - self.buffer().len() - } -} - impl IntoCanonical for ByteBoolArray { fn into_canonical(self) -> VortexResult { let boolean_buffer = BooleanBuffer::from(self.maybe_null_slice()); diff --git a/encodings/byte_bool/src/stats.rs b/encodings/byte_bool/src/stats.rs index e7124dc0ad..d5d8ff81b5 100644 --- a/encodings/byte_bool/src/stats.rs +++ b/encodings/byte_bool/src/stats.rs @@ -1,6 +1,6 @@ use vortex::{ stats::{ArrayStatisticsCompute, Stat, StatsSet}, - ArrayTrait, AsArray, IntoCanonical, + AsArray, IntoCanonical, }; use vortex_error::VortexResult; diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index 2ca4e8fc35..0dd60cf671 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -47,6 +47,7 @@ impl DateTimePartsArray { Self::try_from_parts( dtype, + length, DateTimePartsMetadata { days_dtype: days.dtype().clone(), seconds_dtype: seconds.dtype().clone(), @@ -59,23 +60,25 @@ impl DateTimePartsArray { pub fn days(&self) -> Array { self.array() - .child(0, &self.metadata().days_dtype) + .child(0, &self.metadata().days_dtype, self.len()) .expect("Missing days array") } pub fn seconds(&self) -> Array { self.array() - .child(1, &self.metadata().seconds_dtype) + .child(1, &self.metadata().seconds_dtype, self.len()) .expect("Missing seconds array") } pub fn subsecond(&self) -> Array { self.array() - .child(2, &self.metadata().subseconds_dtype) + .child(2, &self.metadata().subseconds_dtype, self.len()) .expect("Missing subsecond array") } } +impl ArrayTrait for DateTimePartsArray {} + impl IntoCanonical for DateTimePartsArray { fn into_canonical(self) -> VortexResult { Ok(Canonical::Extension( @@ -103,9 +106,3 @@ impl AcceptArrayVisitor for DateTimePartsArray { } impl ArrayStatisticsCompute for DateTimePartsArray {} - -impl ArrayTrait for DateTimePartsArray { - fn len(&self) -> usize { - self.days().len() - } -} diff --git a/encodings/datetime-parts/src/compress.rs b/encodings/datetime-parts/src/compress.rs index b4811c2c4a..163cd52de4 100644 --- a/encodings/datetime-parts/src/compress.rs +++ b/encodings/datetime-parts/src/compress.rs @@ -1,7 +1,7 @@ use vortex::array::datetime::{LocalDateTimeArray, TimeUnit}; use vortex::array::primitive::PrimitiveArray; use vortex::compute::unary::cast::try_cast; -use vortex::{Array, ArrayTrait, IntoArray, IntoCanonical}; +use vortex::{Array, IntoArray, IntoCanonical}; use vortex_dtype::PType; use vortex_error::VortexResult; diff --git a/encodings/dict/src/dict.rs b/encodings/dict/src/dict.rs index b9cf1a453a..bce7a116bb 100644 --- a/encodings/dict/src/dict.rs +++ b/encodings/dict/src/dict.rs @@ -14,6 +14,7 @@ impl_encoding!("vortex.dict", 20u16, Dict); #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DictMetadata { codes_dtype: DType, + values_len: usize, } impl DictArray { @@ -23,8 +24,10 @@ impl DictArray { } Self::try_from_parts( values.dtype().clone(), + codes.len(), DictMetadata { codes_dtype: codes.dtype().clone(), + values_len: values.len(), }, [values, codes].into(), StatsSet::new(), @@ -33,17 +36,21 @@ impl DictArray { #[inline] pub fn values(&self) -> Array { - self.array().child(0, self.dtype()).expect("Missing values") + self.array() + .child(0, self.dtype(), self.metadata().values_len) + .expect("Missing values") } #[inline] pub fn codes(&self) -> Array { self.array() - .child(1, &self.metadata().codes_dtype) + .child(1, &self.metadata().codes_dtype, self.len()) .expect("Missing codes") } } +impl ArrayTrait for DictArray {} + impl IntoCanonical for DictArray { fn into_canonical(self) -> VortexResult { take(&self.values(), &self.codes())?.into_canonical() @@ -89,9 +96,3 @@ impl AcceptArrayVisitor for DictArray { visitor.visit_child("codes", &self.codes()) } } - -impl ArrayTrait for DictArray { - fn len(&self) -> usize { - self.codes().len() - } -} diff --git a/encodings/fastlanes/src/bitpacking/compress.rs b/encodings/fastlanes/src/bitpacking/compress.rs index 57f612a476..ef5ad9b04d 100644 --- a/encodings/fastlanes/src/bitpacking/compress.rs +++ b/encodings/fastlanes/src/bitpacking/compress.rs @@ -6,7 +6,7 @@ use vortex::array::sparse::{Sparse, SparseArray}; use vortex::stats::ArrayStatistics; use vortex::validity::Validity; use vortex::IntoArrayVariant; -use vortex::{Array, ArrayDType, ArrayDef, ArrayTrait, IntoArray}; +use vortex::{Array, ArrayDType, ArrayDef, IntoArray}; use vortex_dtype::{ match_each_integer_ptype, match_each_unsigned_integer_ptype, NativePType, PType, }; diff --git a/encodings/fastlanes/src/bitpacking/compute/mod.rs b/encodings/fastlanes/src/bitpacking/compute/mod.rs index 546b96709d..6f1a18c519 100644 --- a/encodings/fastlanes/src/bitpacking/compute/mod.rs +++ b/encodings/fastlanes/src/bitpacking/compute/mod.rs @@ -3,7 +3,7 @@ use vortex::compute::slice::SliceFn; use vortex::compute::take::TakeFn; use vortex::compute::unary::scalar_at::{scalar_at, ScalarAtFn}; use vortex::compute::ArrayCompute; -use vortex::{ArrayDType, ArrayTrait}; +use vortex::ArrayDType; use vortex_error::{vortex_err, VortexResult}; use vortex_scalar::Scalar; diff --git a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs index f4f009f370..f3a58b7bd9 100644 --- a/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs +++ b/encodings/fastlanes/src/bitpacking/compute/search_sorted.rs @@ -7,7 +7,7 @@ use vortex::array::sparse::SparseArray; use vortex::compute::search_sorted::{ search_sorted, IndexOrd, Len, SearchResult, SearchSorted, SearchSortedFn, SearchSortedSide, }; -use vortex::{ArrayDType, ArrayTrait, IntoArrayVariant}; +use vortex::{ArrayDType, IntoArrayVariant}; use vortex_dtype::{match_each_unsigned_integer_ptype, NativePType}; use vortex_error::VortexResult; use vortex_scalar::Scalar; diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index 9b37cc1fea..89817584cd 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -31,7 +31,7 @@ mod test { use vortex::array::primitive::PrimitiveArray; use vortex::compute::slice::slice; use vortex::compute::unary::scalar_at::scalar_at; - use vortex::{ArrayTrait, IntoArray}; + use vortex::IntoArray; use crate::BitPackedArray; diff --git a/encodings/fastlanes/src/bitpacking/compute/take.rs b/encodings/fastlanes/src/bitpacking/compute/take.rs index 9afb3751a9..df5e3be427 100644 --- a/encodings/fastlanes/src/bitpacking/compute/take.rs +++ b/encodings/fastlanes/src/bitpacking/compute/take.rs @@ -7,7 +7,7 @@ use vortex::array::primitive::PrimitiveArray; use vortex::array::sparse::SparseArray; use vortex::compute::slice::slice; use vortex::compute::take::{take, TakeFn}; -use vortex::{Array, ArrayDType, ArrayTrait, IntoArray, IntoArrayVariant}; +use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant}; use vortex_dtype::{ match_each_integer_ptype, match_each_unsigned_integer_ptype, NativePType, PType, }; diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index 1bc874457a..c1cea11d38 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -17,7 +17,7 @@ impl_encoding!("fastlanes.bitpacked", 14u16, BitPacked); pub struct BitPackedMetadata { // TODO(ngates): serialize into compact form validity: ValidityMetadata, - patches: bool, + patches_len: usize, bit_width: usize, offset: usize, // Know to be <1024 length: usize, // Store end padding instead <1024 @@ -70,7 +70,7 @@ impl BitPackedArray { let metadata = BitPackedMetadata { validity: validity.to_metadata(length)?, - patches: patches.is_some(), + patches_len: patches.as_ref().map(|a| a.len()).unwrap_or(0), offset, length, bit_width, @@ -85,13 +85,22 @@ impl BitPackedArray { children.push(a) } - Self::try_from_parts(dtype, metadata, children.into(), StatsSet::new()) + Self::try_from_parts(dtype, length, metadata, children.into(), StatsSet::new()) + } + + fn packed_len(&self) -> usize { + ((self.len() + self.offset() + 1023) / 1024) + * (128 * self.bit_width() / self.ptype().byte_width()) } #[inline] pub fn packed(&self) -> Array { self.array() - .child(0, &self.dtype().with_nullability(Nullability::NonNullable)) + .child( + 0, + &self.dtype().with_nullability(Nullability::NonNullable), + self.packed_len(), + ) .expect("Missing packed array") } @@ -102,11 +111,19 @@ impl BitPackedArray { #[inline] pub fn patches(&self) -> Option { - self.metadata().patches.then(|| { - self.array() - .child(1, &self.dtype().with_nullability(Nullability::Nullable)) - .expect("Missing patches array") - }) + if self.metadata().patches_len > 0 { + Some( + self.array() + .child( + 1, + &self.dtype().with_nullability(Nullability::Nullable), + self.metadata().patches_len, + ) + .expect("Missing patches array"), + ) + } else { + None + } } #[inline] @@ -116,8 +133,13 @@ impl BitPackedArray { pub fn validity(&self) -> Validity { self.metadata().validity.to_validity(self.array().child( - if self.metadata().patches { 2 } else { 1 }, + if self.metadata().patches_len > 0 { + 2 + } else { + 1 + }, &Validity::DTYPE, + self.len(), )) } @@ -159,7 +181,7 @@ impl ArrayValidity for BitPackedArray { impl AcceptArrayVisitor for BitPackedArray { fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { visitor.visit_child("packed", &self.packed())?; - if self.metadata().patches { + if self.metadata().patches_len > 0 { visitor.visit_child( "patches", &self.patches().expect("Expected patches to be present "), @@ -172,10 +194,6 @@ impl AcceptArrayVisitor for BitPackedArray { impl ArrayStatisticsCompute for BitPackedArray {} impl ArrayTrait for BitPackedArray { - fn len(&self) -> usize { - self.metadata().length - } - fn nbytes(&self) -> usize { // Ignore any overheads like padding or the bit-width flag. let packed_size = ((self.bit_width() * self.len()) + 7) / 8; diff --git a/encodings/fastlanes/src/delta/compress.rs b/encodings/fastlanes/src/delta/compress.rs index 7b8e248541..0073c1271a 100644 --- a/encodings/fastlanes/src/delta/compress.rs +++ b/encodings/fastlanes/src/delta/compress.rs @@ -186,7 +186,6 @@ mod test { let (bases, deltas) = delta_compress(&PrimitiveArray::from(input.clone())).unwrap(); let delta = DeltaArray::try_new( - input.len(), bases.into_array(), deltas.into_array(), Validity::NonNullable, diff --git a/encodings/fastlanes/src/delta/mod.rs b/encodings/fastlanes/src/delta/mod.rs index eb91b45949..c17d07cdab 100644 --- a/encodings/fastlanes/src/delta/mod.rs +++ b/encodings/fastlanes/src/delta/mod.rs @@ -20,12 +20,7 @@ pub struct DeltaMetadata { } impl DeltaArray { - pub fn try_new( - len: usize, - bases: Array, - deltas: Array, - validity: Validity, - ) -> VortexResult { + pub fn try_new(bases: Array, deltas: Array, validity: Validity) -> VortexResult { if bases.dtype() != deltas.dtype() { vortex_bail!( "DeltaArray: bases and deltas must have the same dtype, got {:?} and {:?}", @@ -33,15 +28,9 @@ impl DeltaArray { deltas.dtype() ); } - if deltas.len() != len { - vortex_bail!( - "DeltaArray: provided deltas array of len {} does not match array len {}", - deltas.len(), - len - ); - } let dtype = bases.dtype().clone(); + let len = deltas.len(); let metadata = DeltaMetadata { validity: validity.to_metadata(len)?, len, @@ -52,33 +41,32 @@ impl DeltaArray { children.push(varray) } - let delta = Self::try_from_parts(dtype, metadata, children.into(), StatsSet::new())?; - - let expected_bases_len = { - let num_chunks = len / 1024; - let remainder_base_size = if len % 1024 > 0 { 1 } else { 0 }; - num_chunks * delta.lanes() + remainder_base_size - }; - if delta.bases().len() != expected_bases_len { + let delta = Self::try_from_parts(dtype, len, metadata, children.into(), StatsSet::new())?; + if delta.bases().len() != delta.bases_len() { vortex_bail!( "DeltaArray: bases.len() ({}) != expected_bases_len ({}), based on len ({}) and lane count ({})", delta.bases().len(), - expected_bases_len, + delta.bases_len(), len, delta.lanes() ); } + Ok(delta) } #[inline] pub fn bases(&self) -> Array { - self.array().child(0, self.dtype()).expect("Missing bases") + self.array() + .child(0, self.dtype(), self.bases_len()) + .expect("Missing bases") } #[inline] pub fn deltas(&self) -> Array { - self.array().child(1, self.dtype()).expect("Missing deltas") + self.array() + .child(1, self.dtype(), self.len()) + .expect("Missing deltas") } #[inline] @@ -92,10 +80,18 @@ impl DeltaArray { pub fn validity(&self) -> Validity { self.metadata() .validity - .to_validity(self.array().child(2, &Validity::DTYPE)) + .to_validity(self.array().child(2, &Validity::DTYPE, self.len())) + } + + fn bases_len(&self) -> usize { + let num_chunks = self.len() / 1024; + let remainder_base_size = if self.len() % 1024 > 0 { 1 } else { 0 }; + num_chunks * self.lanes() + remainder_base_size } } +impl ArrayTrait for DeltaArray {} + impl IntoCanonical for DeltaArray { fn into_canonical(self) -> VortexResult { delta_decompress(self).map(Canonical::Primitive) @@ -120,9 +116,3 @@ impl AcceptArrayVisitor for DeltaArray { } impl ArrayStatisticsCompute for DeltaArray {} - -impl ArrayTrait for DeltaArray { - fn len(&self) -> usize { - self.metadata().len - } -} diff --git a/encodings/fastlanes/src/for/compress.rs b/encodings/fastlanes/src/for/compress.rs index 9c85daaf50..36e1b5da59 100644 --- a/encodings/fastlanes/src/for/compress.rs +++ b/encodings/fastlanes/src/for/compress.rs @@ -3,7 +3,7 @@ use num_traits::{PrimInt, WrappingAdd, WrappingSub}; use vortex::array::constant::ConstantArray; use vortex::array::primitive::PrimitiveArray; use vortex::stats::{trailing_zeros, ArrayStatistics, Stat}; -use vortex::{Array, ArrayDType, ArrayTrait, IntoArray, IntoArrayVariant}; +use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant}; use vortex_dtype::{match_each_integer_ptype, NativePType}; use vortex_error::{vortex_err, VortexResult}; use vortex_scalar::Scalar; diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index 70d8902067..009e5cfb6a 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -31,6 +31,7 @@ impl FoRArray { )?; Self::try_from_parts( reference.dtype().clone(), + child.len(), FoRMetadata { reference, shift }, [child].into(), StatsSet::new(), @@ -44,7 +45,9 @@ impl FoRArray { } else { self.dtype() }; - self.array().child(0, dtype).expect("Missing FoR child") + self.array() + .child(0, dtype, self.len()) + .expect("Missing FoR child") } #[inline] @@ -88,10 +91,6 @@ impl AcceptArrayVisitor for FoRArray { impl ArrayStatisticsCompute for FoRArray {} impl ArrayTrait for FoRArray { - fn len(&self) -> usize { - self.encoded().len() - } - fn nbytes(&self) -> usize { self.encoded().nbytes() } diff --git a/encodings/roaring/src/boolean/compress.rs b/encodings/roaring/src/boolean/compress.rs index df9be99c1d..1bf1f4acb6 100644 --- a/encodings/roaring/src/boolean/compress.rs +++ b/encodings/roaring/src/boolean/compress.rs @@ -1,6 +1,5 @@ use croaring::Bitmap; use vortex::array::bool::BoolArray; -use vortex::ArrayTrait; use vortex_error::VortexResult; use crate::RoaringBoolArray; diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index b2f40d2463..03f5809ef4 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -31,6 +31,7 @@ impl RoaringBoolArray { Ok(Self { typed: TypedArray::try_from_parts( DType::Bool(NonNullable), + length, RoaringBoolMetadata { length }, Some(Buffer::from(bitmap.serialize::())), vec![].into(), @@ -58,6 +59,9 @@ impl RoaringBoolArray { } } } + +impl ArrayTrait for RoaringBoolArray {} + impl AcceptArrayVisitor for RoaringBoolArray { fn accept(&self, _visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { // TODO(ngates): should we store a buffer in memory? Or delay serialization? @@ -68,12 +72,6 @@ impl AcceptArrayVisitor for RoaringBoolArray { } } -impl ArrayTrait for RoaringBoolArray { - fn len(&self) -> usize { - self.metadata().length - } -} - impl ArrayStatisticsCompute for RoaringBoolArray {} impl ArrayValidity for RoaringBoolArray { diff --git a/encodings/roaring/src/integer/mod.rs b/encodings/roaring/src/integer/mod.rs index e454d99c4e..91c0dcefdd 100644 --- a/encodings/roaring/src/integer/mod.rs +++ b/encodings/roaring/src/integer/mod.rs @@ -19,9 +19,6 @@ impl_encoding!("vortex.roaring_int", 18u16, RoaringInt); #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RoaringIntMetadata { ptype: PType, - // NB: this is stored because we want to avoid the overhead of deserializing the bitmap - // on every len() call. It's CRITICAL that this is kept up-to date. - length: usize, } impl RoaringIntArray { @@ -29,13 +26,12 @@ impl RoaringIntArray { if !ptype.is_unsigned_int() { vortex_bail!("RoaringInt expected unsigned int"); } + let length = bitmap.statistics().cardinality as usize; Ok(Self { typed: TypedArray::try_from_parts( DType::Bool(NonNullable), - RoaringIntMetadata { - ptype, - length: bitmap.statistics().cardinality as usize, - }, + length, + RoaringIntMetadata { ptype }, Some(Buffer::from(bitmap.serialize::())), vec![].into(), StatsSet::new(), @@ -66,6 +62,8 @@ impl RoaringIntArray { } } +impl ArrayTrait for RoaringIntArray {} + impl ArrayValidity for RoaringIntArray { fn is_valid(&self, _index: usize) -> bool { true @@ -90,12 +88,6 @@ impl AcceptArrayVisitor for RoaringIntArray { impl ArrayStatisticsCompute for RoaringIntArray {} -impl ArrayTrait for RoaringIntArray { - fn len(&self) -> usize { - self.metadata().length - } -} - #[cfg(test)] mod test { use vortex::array::primitive::PrimitiveArray; diff --git a/encodings/runend/src/compress.rs b/encodings/runend/src/compress.rs index 6a2ff4dbb1..1acfc34009 100644 --- a/encodings/runend/src/compress.rs +++ b/encodings/runend/src/compress.rs @@ -5,7 +5,7 @@ use num_traits::{AsPrimitive, FromPrimitive}; use vortex::array::primitive::PrimitiveArray; use vortex::stats::{ArrayStatistics, Stat}; use vortex::validity::Validity; -use vortex::{ArrayDType, ArrayTrait}; +use vortex::ArrayDType; use vortex_dtype::Nullability; use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType}; use vortex_error::VortexResult; @@ -111,7 +111,7 @@ pub fn runend_decode_primitive< mod test { use vortex::array::primitive::PrimitiveArray; use vortex::validity::{ArrayValidity, Validity}; - use vortex::{ArrayTrait, IntoArray}; + use vortex::IntoArray; use crate::compress::{runend_decode, runend_encode}; use crate::RunEndArray; diff --git a/encodings/runend/src/runend.rs b/encodings/runend/src/runend.rs index 1b6fc08725..e2edcc3ce0 100644 --- a/encodings/runend/src/runend.rs +++ b/encodings/runend/src/runend.rs @@ -16,6 +16,7 @@ impl_encoding!("vortex.runend", 19u16, RunEnd); pub struct RunEndMetadata { validity: ValidityMetadata, ends_dtype: DType, + num_runs: usize, offset: usize, length: usize, } @@ -48,6 +49,7 @@ impl RunEndArray { let metadata = RunEndMetadata { validity: validity.to_metadata(length)?, ends_dtype: ends.dtype().clone(), + num_runs: ends.len(), offset, length, }; @@ -59,7 +61,7 @@ impl RunEndArray { children.push(a) } - Self::try_from_parts(dtype, metadata, children.into(), StatsSet::new()) + Self::try_from_parts(dtype, length, metadata, children.into(), StatsSet::new()) } pub fn find_physical_index(&self, index: usize) -> VortexResult { @@ -80,7 +82,7 @@ impl RunEndArray { pub fn validity(&self) -> Validity { self.metadata() .validity - .to_validity(self.array().child(2, &Validity::DTYPE)) + .to_validity(self.array().child(2, &Validity::DTYPE, self.len())) } #[inline] @@ -91,16 +93,20 @@ impl RunEndArray { #[inline] pub fn ends(&self) -> Array { self.array() - .child(0, &self.metadata().ends_dtype) + .child(0, &self.metadata().ends_dtype, self.metadata().num_runs) .expect("missing ends") } #[inline] pub fn values(&self) -> Array { - self.array().child(1, self.dtype()).expect("missing values") + self.array() + .child(1, self.dtype(), self.metadata().num_runs) + .expect("missing values") } } +impl ArrayTrait for RunEndArray {} + impl ArrayValidity for RunEndArray { fn is_valid(&self, index: usize) -> bool { self.validity().is_valid(index) @@ -130,18 +136,12 @@ impl AcceptArrayVisitor for RunEndArray { impl ArrayStatisticsCompute for RunEndArray {} -impl ArrayTrait for RunEndArray { - fn len(&self) -> usize { - self.metadata().length - } -} - #[cfg(test)] mod test { use vortex::compute::slice::slice; use vortex::compute::unary::scalar_at::scalar_at; use vortex::validity::Validity; - use vortex::{ArrayDType, ArrayTrait, IntoArray, IntoCanonical}; + use vortex::{ArrayDType, IntoArray, IntoCanonical}; use vortex_dtype::{DType, Nullability, PType}; use crate::RunEndArray; diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index 6f90f8b779..3c0b99f47f 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -28,8 +28,10 @@ impl ZigZagArray { let dtype = DType::from(PType::try_from(&encoded_dtype).expect("ptype").to_signed()) .with_nullability(encoded_dtype.nullability()); + let len = encoded.len(); let children = [encoded]; - Self::try_from_parts(dtype, ZigZagMetadata, children.into(), StatsSet::new()) + + Self::try_from_parts(dtype, len, ZigZagMetadata, children.into(), StatsSet::new()) } pub fn encode(array: &Array) -> VortexResult { @@ -43,11 +45,13 @@ impl ZigZagArray { let ptype = PType::try_from(self.dtype()).expect("ptype"); let encoded = DType::from(ptype.to_unsigned()).with_nullability(self.dtype().nullability()); self.array() - .child(0, &encoded) + .child(0, &encoded, self.len()) .expect("Missing encoded array") } } +impl ArrayTrait for ZigZagArray {} + impl ArrayValidity for ZigZagArray { fn is_valid(&self, index: usize) -> bool { self.encoded().with_dyn(|a| a.is_valid(index)) @@ -71,9 +75,3 @@ impl IntoCanonical for ZigZagArray { todo!("ZigZagArray::flatten") } } - -impl ArrayTrait for ZigZagArray { - fn len(&self) -> usize { - self.encoded().len() - } -} diff --git a/vortex-array/src/array/bool/accessors.rs b/vortex-array/src/array/bool/accessors.rs index 6c464f1cff..5a904dadd3 100644 --- a/vortex-array/src/array/bool/accessors.rs +++ b/vortex-array/src/array/bool/accessors.rs @@ -3,7 +3,7 @@ use vortex_error::VortexResult; use crate::accessor::ArrayAccessor; use crate::array::bool::BoolArray; use crate::validity::Validity; -use crate::{ArrayTrait, IntoArrayVariant}; +use crate::IntoArrayVariant; static TRUE: bool = true; static FALSE: bool = false; diff --git a/vortex-array/src/array/bool/compute/compare.rs b/vortex-array/src/array/bool/compute/compare.rs index a63e432cae..372265ee53 100644 --- a/vortex-array/src/array/bool/compute/compare.rs +++ b/vortex-array/src/array/bool/compute/compare.rs @@ -5,7 +5,7 @@ use vortex_expr::Operator; use crate::array::bool::BoolArray; use crate::compute::compare::CompareFn; -use crate::{Array, ArrayTrait, IntoArray, IntoArrayVariant}; +use crate::{Array, IntoArray, IntoArrayVariant}; impl CompareFn for BoolArray { // TODO(aduffy): replace these with Arrow compute kernels. diff --git a/vortex-array/src/array/bool/compute/slice.rs b/vortex-array/src/array/bool/compute/slice.rs index 6939c41a13..10c74d4d30 100644 --- a/vortex-array/src/array/bool/compute/slice.rs +++ b/vortex-array/src/array/bool/compute/slice.rs @@ -20,7 +20,6 @@ mod tests { use super::*; use crate::compute::slice::slice; use crate::validity::ArrayValidity; - use crate::ArrayTrait; use crate::{compute::unary::scalar_at::scalar_at, AsArray}; #[test] diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 927f731a06..5f4c160a99 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -37,11 +37,9 @@ impl BoolArray { pub fn validity(&self) -> Validity { self.metadata() .validity - .to_validity(self.array().child(0, &Validity::DTYPE)) + .to_validity(self.array().child(0, &Validity::DTYPE, self.len())) } -} -impl BoolArray { pub fn try_new(buffer: BooleanBuffer, validity: Validity) -> VortexResult { let buffer_len = buffer.len(); let buffer_offset = buffer.offset(); @@ -55,6 +53,7 @@ impl BoolArray { Ok(Self { typed: TypedArray::try_from_parts( DType::Bool(validity.nullability()), + buffer_len, BoolMetadata { validity: validity.to_metadata(buffer_len)?, length: buffer_len, @@ -73,6 +72,8 @@ impl BoolArray { } } +impl ArrayTrait for BoolArray {} + impl From for BoolArray { fn from(value: BooleanBuffer) -> Self { Self::try_new(value, Validity::NonNullable).unwrap() @@ -102,12 +103,6 @@ impl FromIterator> for BoolArray { } } -impl ArrayTrait for BoolArray { - fn len(&self) -> usize { - self.metadata().length - } -} - impl IntoCanonical for BoolArray { fn into_canonical(self) -> VortexResult { Ok(Canonical::Bool(self)) diff --git a/vortex-array/src/array/bool/stats.rs b/vortex-array/src/array/bool/stats.rs index 22588e9f59..70b971c630 100644 --- a/vortex-array/src/array/bool/stats.rs +++ b/vortex-array/src/array/bool/stats.rs @@ -6,7 +6,7 @@ use vortex_error::VortexResult; use crate::array::bool::BoolArray; use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity}; -use crate::{ArrayDType, ArrayTrait, IntoArrayVariant}; +use crate::{ArrayDType, IntoArrayVariant}; impl ArrayStatisticsCompute for BoolArray { fn compute_statistics(&self, stat: Stat) -> VortexResult { diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index f038b2f465..2d88d7fdd9 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -14,8 +14,7 @@ use crate::array::varbin::builder::VarBinBuilder; use crate::array::varbin::VarBinArray; use crate::validity::Validity; use crate::{ - Array, ArrayDType, ArrayTrait, ArrayValidity, Canonical, IntoArray, IntoArrayVariant, - IntoCanonical, + Array, ArrayDType, ArrayValidity, Canonical, IntoArray, IntoArrayVariant, IntoCanonical, }; impl IntoCanonical for ChunkedArray { diff --git a/vortex-array/src/array/chunked/compute/take.rs b/vortex-array/src/array/chunked/compute/take.rs index 08647131d0..9afd96649a 100644 --- a/vortex-array/src/array/chunked/compute/take.rs +++ b/vortex-array/src/array/chunked/compute/take.rs @@ -5,8 +5,8 @@ use crate::array::chunked::ChunkedArray; use crate::array::primitive::PrimitiveArray; use crate::compute::take::{take, TakeFn}; use crate::compute::unary::cast::try_cast; +use crate::ArrayDType; use crate::{Array, IntoArray, ToArray}; -use crate::{ArrayDType, ArrayTrait}; impl TakeFn for ChunkedArray { fn take(&self, indices: &Array) -> VortexResult { @@ -55,7 +55,7 @@ impl TakeFn for ChunkedArray { mod test { use crate::array::chunked::ChunkedArray; use crate::compute::take::take; - use crate::{ArrayDType, ArrayTrait, AsArray, IntoArray, IntoCanonical}; + use crate::{ArrayDType, AsArray, IntoArray, IntoCanonical}; #[test] fn test_take() { diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index 7077e47149..d56b24e49c 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -26,7 +26,9 @@ mod stats; impl_encoding!("vortex.chunked", 11u16, Chunked); #[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ChunkedMetadata; +pub struct ChunkedMetadata { + num_chunks: usize, +} impl ChunkedArray { const ENDS_DTYPE: DType = DType::Primitive(PType::U64, Nullability::NonNullable); @@ -38,38 +40,48 @@ impl ChunkedArray { } } - let chunk_ends = PrimitiveArray::from_vec( - [0u64] - .into_iter() - .chain(chunks.iter().map(|c| c.len() as u64)) - .scan(0, |acc, c| { - *acc += c; - Some(*acc) - }) - .collect_vec(), - NonNullable, - ); + let chunk_ends = [0u64] + .into_iter() + .chain(chunks.iter().map(|c| c.len() as u64)) + .scan(0, |acc, c| { + *acc += c; + Some(*acc) + }) + .collect_vec(); - let mut children = vec![chunk_ends.into_array()]; + let num_chunks = chunk_ends.len() - 1; + let length = (*chunk_ends.last().unwrap()) as usize; + + let mut children = vec![PrimitiveArray::from_vec(chunk_ends, NonNullable).into_array()]; children.extend(chunks); - Self::try_from_parts(dtype, ChunkedMetadata, children.into(), StatsSet::new()) + Self::try_from_parts( + dtype, + length, + ChunkedMetadata { num_chunks }, + children.into(), + StatsSet::new(), + ) } #[inline] pub fn chunk(&self, idx: usize) -> Option { + let chunk_start = usize::try_from(&scalar_at(&self.chunk_ends(), idx).unwrap()).unwrap(); + let chunk_end = usize::try_from(&scalar_at(&self.chunk_ends(), idx + 1).unwrap()).unwrap(); + // Offset the index since chunk_ends is child 0. - self.array().child(idx + 1, self.array().dtype()) + self.array() + .child(idx + 1, self.array().dtype(), chunk_end - chunk_start) } pub fn nchunks(&self) -> usize { - self.chunk_ends().len() - 1 + self.metadata().num_chunks } #[inline] pub fn chunk_ends(&self) -> Array { self.array() - .child(0, &Self::ENDS_DTYPE) + .child(0, &Self::ENDS_DTYPE, self.nchunks() + 1) .expect("missing chunk ends") } @@ -101,6 +113,8 @@ impl ChunkedArray { } } +impl ArrayTrait for ChunkedArray {} + impl FromIterator for ChunkedArray { fn from_iter>(iter: T) -> Self { let chunks: Vec = iter.into_iter().collect(); @@ -122,12 +136,6 @@ impl AcceptArrayVisitor for ChunkedArray { } } -impl ArrayTrait for ChunkedArray { - fn len(&self) -> usize { - usize::try_from(&scalar_at(&self.chunk_ends(), self.nchunks()).unwrap()).unwrap() - } -} - impl ArrayValidity for ChunkedArray { fn is_valid(&self, _index: usize) -> bool { todo!() diff --git a/vortex-array/src/array/constant/canonical.rs b/vortex-array/src/array/constant/canonical.rs index 661eaeaa7b..18fd8234c1 100644 --- a/vortex-array/src/array/constant/canonical.rs +++ b/vortex-array/src/array/constant/canonical.rs @@ -6,7 +6,7 @@ use crate::array::bool::BoolArray; use crate::array::constant::ConstantArray; use crate::array::primitive::PrimitiveArray; use crate::validity::Validity; -use crate::{ArrayDType, ArrayTrait}; +use crate::ArrayDType; use crate::{Canonical, IntoCanonical}; impl IntoCanonical for ConstantArray { diff --git a/vortex-array/src/array/constant/compute.rs b/vortex-array/src/array/constant/compute.rs index 019556f932..bcbffafe52 100644 --- a/vortex-array/src/array/constant/compute.rs +++ b/vortex-array/src/array/constant/compute.rs @@ -9,7 +9,7 @@ use crate::compute::slice::SliceFn; use crate::compute::take::TakeFn; use crate::compute::unary::scalar_at::ScalarAtFn; use crate::compute::ArrayCompute; -use crate::{Array, ArrayTrait, IntoArray}; +use crate::{Array, IntoArray}; impl ArrayCompute for ConstantArray { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { diff --git a/vortex-array/src/array/constant/mod.rs b/vortex-array/src/array/constant/mod.rs index b5bf1aa565..706aa21b8d 100644 --- a/vortex-array/src/array/constant/mod.rs +++ b/vortex-array/src/array/constant/mod.rs @@ -37,6 +37,7 @@ impl ConstantArray { ])); Self::try_from_parts( scalar.dtype().clone(), + length, ConstantMetadata { scalar, length }, [].into(), stats, @@ -49,6 +50,8 @@ impl ConstantArray { } } +impl ArrayTrait for ConstantArray {} + impl ArrayValidity for ConstantArray { fn is_valid(&self, _index: usize) -> bool { match self.metadata().scalar.dtype().is_nullable() { @@ -70,9 +73,3 @@ impl AcceptArrayVisitor for ConstantArray { Ok(()) } } - -impl ArrayTrait for ConstantArray { - fn len(&self) -> usize { - self.metadata().length - } -} diff --git a/vortex-array/src/array/constant/stats.rs b/vortex-array/src/array/constant/stats.rs index 7214245497..96792ab37e 100644 --- a/vortex-array/src/array/constant/stats.rs +++ b/vortex-array/src/array/constant/stats.rs @@ -5,7 +5,6 @@ use vortex_scalar::BoolScalar; use crate::array::constant::ConstantArray; use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; -use crate::ArrayTrait; impl ArrayStatisticsCompute for ConstantArray { fn compute_statistics(&self, _stat: Stat) -> VortexResult { diff --git a/vortex-array/src/array/extension/mod.rs b/vortex-array/src/array/extension/mod.rs index 170aab6d32..f074e9e51d 100644 --- a/vortex-array/src/array/extension/mod.rs +++ b/vortex-array/src/array/extension/mod.rs @@ -19,6 +19,7 @@ impl ExtensionArray { pub fn new(ext_dtype: ExtDType, storage: Array) -> Self { Self::try_from_parts( DType::Extension(ext_dtype, storage.dtype().nullability()), + storage.len(), ExtensionMetadata { storage_dtype: storage.dtype().clone(), }, @@ -30,7 +31,7 @@ impl ExtensionArray { pub fn storage(&self) -> Array { self.array() - .child(0, &self.metadata().storage_dtype) + .child(0, &self.metadata().storage_dtype, self.len()) .expect("Missing storage array") } @@ -49,6 +50,8 @@ impl ExtensionArray { } } +impl ArrayTrait for ExtensionArray {} + impl IntoCanonical for ExtensionArray { fn into_canonical(self) -> VortexResult { Ok(Canonical::Extension(self)) @@ -74,9 +77,3 @@ impl AcceptArrayVisitor for ExtensionArray { impl ArrayStatisticsCompute for ExtensionArray { // TODO(ngates): pass through stats to the underlying and cast the scalars. } - -impl ArrayTrait for ExtensionArray { - fn len(&self) -> usize { - self.storage().len() - } -} diff --git a/vortex-array/src/array/null/compute.rs b/vortex-array/src/array/null/compute.rs index acf6e21188..157a37be68 100644 --- a/vortex-array/src/array/null/compute.rs +++ b/vortex-array/src/array/null/compute.rs @@ -7,7 +7,7 @@ use crate::compute::slice::SliceFn; use crate::compute::take::TakeFn; use crate::compute::unary::scalar_at::ScalarAtFn; use crate::compute::ArrayCompute; -use crate::{Array, ArrayTrait, IntoArray, IntoArrayVariant}; +use crate::{Array, IntoArray, IntoArrayVariant}; impl ArrayCompute for NullArray { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { @@ -62,7 +62,7 @@ mod test { use crate::compute::take::take; use crate::compute::unary::scalar_at::scalar_at; use crate::validity::{ArrayValidity, LogicalValidity}; - use crate::{ArrayTrait, IntoArray}; + use crate::IntoArray; #[test] fn test_slice_nulls() { diff --git a/vortex-array/src/array/null/mod.rs b/vortex-array/src/array/null/mod.rs index 15b6f16b40..9311d32e7c 100644 --- a/vortex-array/src/array/null/mod.rs +++ b/vortex-array/src/array/null/mod.rs @@ -18,6 +18,7 @@ impl NullArray { pub fn new(len: usize) -> Self { Self::try_from_parts( DType::Null, + len, NullMetadata { len }, Arc::new([]), StatsSet::nulls(len, &DType::Null), @@ -55,10 +56,6 @@ impl AcceptArrayVisitor for NullArray { } impl ArrayTrait for NullArray { - fn len(&self) -> usize { - self.metadata().len - } - fn nbytes(&self) -> usize { 0 } diff --git a/vortex-array/src/array/primitive/compute/compare.rs b/vortex-array/src/array/primitive/compute/compare.rs index 2feb092ec2..37d80b72c6 100644 --- a/vortex-array/src/array/primitive/compute/compare.rs +++ b/vortex-array/src/array/primitive/compute/compare.rs @@ -8,7 +8,7 @@ use vortex_expr::Operator; use crate::array::bool::BoolArray; use crate::array::primitive::PrimitiveArray; use crate::compute::compare::CompareFn; -use crate::{Array, ArrayTrait, IntoArray, IntoArrayVariant}; +use crate::{Array, IntoArray, IntoArrayVariant}; impl CompareFn for PrimitiveArray { fn compare(&self, other: &Array, predicate: Operator) -> VortexResult { diff --git a/vortex-array/src/array/primitive/compute/filter_indices.rs b/vortex-array/src/array/primitive/compute/filter_indices.rs index bc5ece0282..b6f2c91552 100644 --- a/vortex-array/src/array/primitive/compute/filter_indices.rs +++ b/vortex-array/src/array/primitive/compute/filter_indices.rs @@ -8,7 +8,7 @@ use vortex_expr::{Disjunction, Predicate, Value}; use crate::array::bool::BoolArray; use crate::array::primitive::PrimitiveArray; use crate::compute::filter_indices::FilterIndicesFn; -use crate::{Array, ArrayTrait, IntoArray}; +use crate::{Array, IntoArray}; impl FilterIndicesFn for PrimitiveArray { fn filter_indices(&self, disjunction: &Disjunction) -> VortexResult { diff --git a/vortex-array/src/array/primitive/compute/subtract_scalar.rs b/vortex-array/src/array/primitive/compute/subtract_scalar.rs index e724cb0beb..bfbce0e86d 100644 --- a/vortex-array/src/array/primitive/compute/subtract_scalar.rs +++ b/vortex-array/src/array/primitive/compute/subtract_scalar.rs @@ -11,7 +11,7 @@ use crate::array::primitive::PrimitiveArray; use crate::compute::unary::scalar_subtract::SubtractScalarFn; use crate::stats::{ArrayStatistics, Stat}; use crate::validity::ArrayValidity; -use crate::{Array, ArrayDType, ArrayTrait, IntoArray}; +use crate::{Array, ArrayDType, IntoArray}; impl SubtractScalarFn for PrimitiveArray { fn subtract_scalar(&self, to_subtract: &Scalar) -> VortexResult { @@ -106,7 +106,7 @@ mod test { use crate::array::primitive::PrimitiveArray; use crate::compute::unary::scalar_subtract::subtract_scalar; - use crate::{ArrayTrait, IntoArray, IntoCanonical}; + use crate::{IntoArray, IntoCanonical}; #[test] fn test_scalar_subtract_unsigned() { diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 7d30f42d38..ca03f19f69 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -31,6 +31,7 @@ impl PrimitiveArray { Ok(Self { typed: TypedArray::try_from_parts( DType::from(T::PTYPE).with_nullability(validity.nullability()), + buffer.len(), PrimitiveMetadata { validity: validity.to_metadata(buffer.len())?, }, @@ -58,7 +59,7 @@ impl PrimitiveArray { pub fn validity(&self) -> Validity { self.metadata() .validity - .to_validity(self.array().child(0, &Validity::DTYPE)) + .to_validity(self.array().child(0, &Validity::DTYPE, self.len())) } pub fn ptype(&self) -> PType { @@ -165,6 +166,8 @@ impl PrimitiveArray { } } +impl ArrayTrait for PrimitiveArray {} + impl From> for PrimitiveArray { fn from(values: Vec) -> Self { Self::from_vec(values, Validity::NonNullable) @@ -183,12 +186,6 @@ impl IntoCanonical for PrimitiveArray { } } -impl ArrayTrait for PrimitiveArray { - fn len(&self) -> usize { - self.buffer().len() / self.ptype().byte_width() - } -} - impl ArrayValidity for PrimitiveArray { fn is_valid(&self, index: usize) -> bool { self.validity().is_valid(index) diff --git a/vortex-array/src/array/sparse/compute/take.rs b/vortex-array/src/array/sparse/compute/take.rs index d487c9e0d8..66fd50001c 100644 --- a/vortex-array/src/array/sparse/compute/take.rs +++ b/vortex-array/src/array/sparse/compute/take.rs @@ -92,7 +92,7 @@ mod test { use crate::array::sparse::SparseArray; use crate::compute::take::take; use crate::validity::Validity; - use crate::{Array, ArrayTrait, IntoArray, IntoArrayVariant}; + use crate::{Array, IntoArray, IntoArrayVariant}; fn sparse_array() -> Array { SparseArray::try_new( diff --git a/vortex-array/src/array/sparse/flatten.rs b/vortex-array/src/array/sparse/flatten.rs index bf382b4a6b..eff5645068 100644 --- a/vortex-array/src/array/sparse/flatten.rs +++ b/vortex-array/src/array/sparse/flatten.rs @@ -8,7 +8,7 @@ use crate::array::bool::BoolArray; use crate::array::primitive::PrimitiveArray; use crate::array::sparse::SparseArray; use crate::validity::Validity; -use crate::{ArrayDType, ArrayTrait, Canonical, IntoArrayVariant, IntoCanonical}; +use crate::{ArrayDType, Canonical, IntoArrayVariant, IntoCanonical}; impl IntoCanonical for SparseArray { fn into_canonical(self) -> VortexResult { diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index b16ac0015a..62aa6e2852 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -21,6 +21,7 @@ pub struct SparseMetadata { indices_dtype: DType, // Offset value for patch indices as a result of slicing indices_offset: usize, + indices_len: usize, len: usize, fill_value: Scalar, } @@ -55,9 +56,11 @@ impl SparseArray { Self::try_from_parts( values.dtype().clone(), + len, SparseMetadata { indices_dtype: indices.dtype().clone(), indices_offset, + indices_len: indices.len(), len, fill_value, }, @@ -76,14 +79,18 @@ impl SparseArray { #[inline] pub fn values(&self) -> Array { self.array() - .child(1, self.dtype()) + .child(1, self.dtype(), self.indices().len()) .expect("missing child array") } #[inline] pub fn indices(&self) -> Array { self.array() - .child(0, &self.metadata().indices_dtype) + .child( + 0, + &self.metadata().indices_dtype, + self.metadata().indices_len, + ) .expect("missing indices array") } @@ -129,11 +136,7 @@ impl SparseArray { } } -impl ArrayTrait for SparseArray { - fn len(&self) -> usize { - self.metadata().len - } -} +impl ArrayTrait for SparseArray {} impl AcceptArrayVisitor for SparseArray { fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 68333ec6a9..e8494b92f2 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -24,7 +24,7 @@ impl StructArray { unreachable!() }; let dtype = st.dtypes().get(idx)?; - self.array().child(idx, dtype) + self.array().child(idx, dtype, self.len()) } pub fn field_by_name(&self, name: &str) -> Option { @@ -55,9 +55,11 @@ impl StructArray { } pub fn validity(&self) -> Validity { - self.metadata() - .validity - .to_validity(self.array().child(self.nfields(), &Validity::DTYPE)) + self.metadata().validity.to_validity(self.array().child( + self.nfields(), + &Validity::DTYPE, + self.len(), + )) } } @@ -78,7 +80,7 @@ impl StructArray { vortex_bail!("Got {} names and {} fields", names.len(), fields.len()); } - if fields.iter().any(|a| a.with_dyn(|a| a.len()) != length) { + if fields.iter().any(|a| a.len() != length) { vortex_bail!("Expected all struct fields to have length {}", length); } @@ -97,6 +99,7 @@ impl StructArray { StructDType::new(names, field_dtypes), Nullability::NonNullable, ), + length, StructMetadata { length, validity: validity_metadata, @@ -153,6 +156,8 @@ impl StructArray { } } +impl ArrayTrait for StructArray {} + impl IntoCanonical for StructArray { /// StructEncoding is the canonical form for a [DType::Struct] array, so return self. fn into_canonical(self) -> VortexResult { @@ -160,12 +165,6 @@ impl IntoCanonical for StructArray { } } -impl ArrayTrait for StructArray { - fn len(&self) -> usize { - self.metadata().length - } -} - impl ArrayValidity for StructArray { fn is_valid(&self, index: usize) -> bool { self.validity().is_valid(index) @@ -197,7 +196,7 @@ mod test { use crate::array::struct_::StructArray; use crate::array::varbin::VarBinArray; use crate::validity::Validity; - use crate::{ArrayTrait, IntoArray}; + use crate::IntoArray; #[test] fn test_project() { diff --git a/vortex-array/src/array/varbin/array.rs b/vortex-array/src/array/varbin/array.rs index 5841c7a4d6..655377d6c8 100644 --- a/vortex-array/src/array/varbin/array.rs +++ b/vortex-array/src/array/varbin/array.rs @@ -3,7 +3,6 @@ use vortex_error::VortexResult; use crate::array::varbin::VarBinArray; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; -use crate::ArrayTrait; impl ArrayValidity for VarBinArray { fn is_valid(&self, index: usize) -> bool { @@ -22,9 +21,3 @@ impl AcceptArrayVisitor for VarBinArray { visitor.visit_validity(&self.validity()) } } - -impl ArrayTrait for VarBinArray { - fn len(&self) -> usize { - self.offsets().len() - 1 - } -} diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index 5069bb7303..06726a0211 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -27,6 +27,7 @@ impl_encoding!("vortex.varbin", 4u16, VarBin); pub struct VarBinMetadata { validity: ValidityMetadata, offsets_dtype: DType, + bytes_len: usize, } impl VarBinArray { @@ -49,9 +50,12 @@ impl VarBinArray { vortex_bail!("incorrect validity {:?}", validity); } + let length = offsets.len() - 1; + let metadata = VarBinMetadata { validity: validity.to_metadata(offsets.len() - 1)?, offsets_dtype: offsets.dtype().clone(), + bytes_len: bytes.len(), }; let mut children = Vec::with_capacity(3); @@ -61,13 +65,13 @@ impl VarBinArray { children.push(a) } - Self::try_from_parts(dtype, metadata, children.into(), StatsSet::new()) + Self::try_from_parts(dtype, length, metadata, children.into(), StatsSet::new()) } #[inline] pub fn offsets(&self) -> Array { self.array() - .child(0, &self.metadata().offsets_dtype) + .child(0, &self.metadata().offsets_dtype, self.len() + 1) .expect("missing offsets") } @@ -82,13 +86,15 @@ impl VarBinArray { #[inline] pub fn bytes(&self) -> Array { - self.array().child(1, &DType::BYTES).expect("missing bytes") + self.array() + .child(1, &DType::BYTES, self.metadata().bytes_len) + .expect("missing bytes") } pub fn validity(&self) -> Validity { self.metadata() .validity - .to_validity(self.array().child(2, &Validity::DTYPE)) + .to_validity(self.array().child(2, &Validity::DTYPE, self.len())) } pub fn sliced_bytes(&self) -> VortexResult { @@ -157,6 +163,8 @@ impl VarBinArray { } } +impl ArrayTrait for VarBinArray {} + impl From> for VarBinArray { fn from(value: Vec<&[u8]>) -> Self { Self::from_vec(value, DType::Binary(Nullability::NonNullable)) diff --git a/vortex-array/src/array/varbin/stats.rs b/vortex-array/src/array/varbin/stats.rs index 5a8ff73b15..8c22fa92a6 100644 --- a/vortex-array/src/array/varbin/stats.rs +++ b/vortex-array/src/array/varbin/stats.rs @@ -7,7 +7,7 @@ use vortex_error::VortexResult; use crate::accessor::ArrayAccessor; use crate::array::varbin::{varbin_scalar, VarBinArray}; use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; -use crate::{ArrayDType, ArrayTrait}; +use crate::ArrayDType; impl ArrayStatisticsCompute for VarBinArray { fn compute_statistics(&self, _stat: Stat) -> VortexResult { diff --git a/vortex-array/src/array/varbinview/accessor.rs b/vortex-array/src/array/varbinview/accessor.rs index cf2ef05578..8740ac0b48 100644 --- a/vortex-array/src/array/varbinview/accessor.rs +++ b/vortex-array/src/array/varbinview/accessor.rs @@ -12,7 +12,7 @@ impl ArrayAccessor<[u8]> for VarBinViewArray { f: F, ) -> VortexResult { let views = self.view_slice(); - let bytes: Vec = (0..self.metadata().n_children) + let bytes: Vec = (0..self.metadata().data_lens.len()) .map(|i| { self.bytes(i) .into_canonical() diff --git a/vortex-array/src/array/varbinview/compute.rs b/vortex-array/src/array/varbinview/compute.rs index cf9a06b352..b6263d3be9 100644 --- a/vortex-array/src/array/varbinview/compute.rs +++ b/vortex-array/src/array/varbinview/compute.rs @@ -36,7 +36,7 @@ impl SliceFn for VarBinViewArray { slice(&self.views(), start * VIEW_SIZE, stop * VIEW_SIZE)? .into_array_data() .into_array(), - (0..self.metadata().n_children) + (0..self.metadata().data_lens.len()) .map(|i| self.bytes(i)) .collect::>(), self.dtype().clone(), diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 339f05b9b3..f5e08f83c0 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -108,7 +108,7 @@ impl_encoding!("vortex.varbinview", 5u16, VarBinView); #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VarBinViewMetadata { validity: ValidityMetadata, - n_children: usize, + data_lens: Vec, } impl VarBinViewArray { @@ -136,9 +136,11 @@ impl VarBinViewArray { vortex_bail!("incorrect validity {:?}", validity); } + let length = views.len() / VIEW_SIZE; + let metadata = VarBinViewMetadata { validity: validity.to_metadata(views.len() / VIEW_SIZE)?, - n_children: data.len(), + data_lens: data.iter().map(|a| a.len()).collect_vec(), }; let mut children = Vec::with_capacity(data.len() + 2); @@ -148,7 +150,7 @@ impl VarBinViewArray { children.push(a) } - Self::try_from_parts(dtype, metadata, children.into(), StatsSet::new()) + Self::try_from_parts(dtype, length, metadata, children.into(), StatsSet::new()) } fn view_slice(&self) -> &[BinaryView] { @@ -169,21 +171,24 @@ impl VarBinViewArray { #[inline] pub fn views(&self) -> Array { - self.array().child(0, &DType::BYTES).expect("missing views") + self.array() + .child(0, &DType::BYTES, self.len() * VIEW_SIZE) + .expect("missing views") } #[inline] pub fn bytes(&self, idx: usize) -> Array { self.array() - .child(idx + 1, &DType::BYTES) + .child(idx + 1, &DType::BYTES, self.metadata().data_lens[idx]) .expect("Missing data buffer") } pub fn validity(&self) -> Validity { - self.metadata().validity.to_validity( - self.array() - .child(self.metadata().n_children + 1, &Validity::DTYPE), - ) + self.metadata().validity.to_validity(self.array().child( + self.metadata().data_lens.len() + 1, + &Validity::DTYPE, + self.len(), + )) } pub fn from_vec>(vec: Vec, dtype: DType) -> Self { @@ -225,6 +230,8 @@ impl VarBinViewArray { } } +impl ArrayTrait for VarBinViewArray {} + impl IntoCanonical for VarBinViewArray { fn into_canonical(self) -> VortexResult { let nullable = self.dtype().is_nullable(); @@ -251,7 +258,7 @@ fn as_arrow(var_bin_view: VarBinViewArray) -> ArrayRef { .to_null_buffer() .expect("null buffer"); - let data = (0..var_bin_view.metadata().n_children) + let data = (0..var_bin_view.metadata().data_lens.len()) .map(|i| { var_bin_view .bytes(i) @@ -299,19 +306,13 @@ impl ArrayValidity for VarBinViewArray { impl AcceptArrayVisitor for VarBinViewArray { fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { visitor.visit_child("views", &self.views())?; - for i in 0..self.metadata().n_children { + for i in 0..self.metadata().data_lens.len() { visitor.visit_child(format!("bytes_{i}").as_str(), &self.bytes(i))?; } visitor.visit_validity(&self.validity()) } } -impl ArrayTrait for VarBinViewArray { - fn len(&self) -> usize { - self.view_slice().len() - } -} - impl From> for VarBinViewArray { fn from(value: Vec<&[u8]>) -> Self { Self::from_vec(value, DType::Binary(Nullability::NonNullable)) @@ -367,7 +368,7 @@ mod test { use crate::array::varbinview::VarBinViewArray; use crate::compute::slice::slice; use crate::compute::unary::scalar_at::scalar_at; - use crate::{ArrayTrait, Canonical, IntoArray, IntoCanonical}; + use crate::{Canonical, IntoArray, IntoCanonical}; #[test] pub fn varbin_view() { diff --git a/vortex-array/src/array/varbinview/stats.rs b/vortex-array/src/array/varbinview/stats.rs index 3eaf25af50..0afaa0f764 100644 --- a/vortex-array/src/array/varbinview/stats.rs +++ b/vortex-array/src/array/varbinview/stats.rs @@ -4,7 +4,7 @@ use crate::accessor::ArrayAccessor; use crate::array::varbin::compute_stats; use crate::array::varbinview::VarBinViewArray; use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet}; -use crate::{ArrayDType, ArrayTrait}; +use crate::ArrayDType; impl ArrayStatisticsCompute for VarBinViewArray { fn compute_statistics(&self, _stat: Stat) -> VortexResult { diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 2d737243fb..c21497fe2e 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -26,7 +26,7 @@ use crate::arrow::wrappers::as_offset_buffer; use crate::compute::unary::cast::try_cast; use crate::encoding::ArrayEncoding; use crate::validity::ArrayValidity; -use crate::{Array, ArrayDType, ArrayTrait, IntoArray, ToArray}; +use crate::{Array, ArrayDType, IntoArray, ToArray}; /// The set of canonical array encodings, also the set of encodings that can be transferred to /// Arrow with zero-copy. diff --git a/vortex-array/src/data.rs b/vortex-array/src/data.rs index 288245144b..269e482355 100644 --- a/vortex-array/src/data.rs +++ b/vortex-array/src/data.rs @@ -13,6 +13,7 @@ use crate::{Array, ArrayDType, ArrayMetadata, IntoArray, ToArray}; pub struct ArrayData { encoding: EncodingRef, dtype: DType, // FIXME(ngates): Arc? + len: usize, metadata: Arc, buffer: Option, children: Arc<[Array]>, @@ -23,6 +24,7 @@ impl ArrayData { pub fn try_new( encoding: EncodingRef, dtype: DType, + len: usize, metadata: Arc, buffer: Option, children: Arc<[Array]>, @@ -31,6 +33,7 @@ impl ArrayData { let data = Self { encoding, dtype, + len, metadata, buffer, children, @@ -53,6 +56,14 @@ impl ArrayData { &self.dtype } + pub fn len(&self) -> usize { + self.len + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + pub fn metadata(&self) -> &Arc { &self.metadata } @@ -65,11 +76,12 @@ impl ArrayData { self.buffer } - pub fn child(&self, index: usize, dtype: &DType) -> Option<&Array> { + pub fn child(&self, index: usize, dtype: &DType, len: usize) -> Option<&Array> { match self.children.get(index) { None => None, Some(child) => { assert_eq!(child.dtype(), dtype, "Child requested with incorrect dtype"); + assert_eq!(child.len(), len, "Child requested with incorrect length"); Some(child) } } diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index 6dca0708a0..7ccc9fa9bf 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -77,14 +77,23 @@ macro_rules! impl_encoding { self.typed.metadata() } + pub fn len(&self) -> usize { + self.typed.array().len() + } + + pub fn is_empty(&self) -> bool { + self.typed.array().is_empty() + } + #[allow(dead_code)] fn try_from_parts( dtype: DType, + len: usize, metadata: [<$Name Metadata>], children: Arc<[Array]>, stats: StatsSet, ) -> VortexResult { - Ok(Self { typed: TypedArray::try_from_parts(dtype, metadata, None, children, stats)? }) + Ok(Self { typed: TypedArray::try_from_parts(dtype, len, metadata, None, children, stats)? }) } } impl GetArrayMetadata for [<$Name Array>] { @@ -237,6 +246,7 @@ impl IntoA ArrayData::try_new( encoding, array.dtype().clone(), + array.len(), metadata, visitor.buffer, visitor.children.into(), diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index c30b3521b2..b85421a8b0 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -92,21 +92,27 @@ impl Array { } pub fn len(&self) -> usize { - self.with_dyn(|a| a.len()) + match self { + Self::Data(d) => d.len(), + Self::View(v) => v.len(), + } } - pub fn nbytes(&self) -> usize { - self.with_dyn(|a| a.nbytes()) + pub fn is_empty(&self) -> bool { + match self { + Self::Data(d) => d.is_empty(), + Self::View(v) => v.is_empty(), + } } - pub fn is_empty(&self) -> bool { - self.with_dyn(|a| a.is_empty()) + pub fn nbytes(&self) -> usize { + self.with_dyn(|a| a.nbytes()) } - pub fn child<'a>(&'a self, idx: usize, dtype: &'a DType) -> Option { + pub fn child<'a>(&'a self, idx: usize, dtype: &'a DType, len: usize) -> Option { match self { - Self::Data(d) => d.child(idx, dtype).cloned(), - Self::View(v) => v.child(idx, dtype).map(Array::View), + Self::Data(d) => d.child(idx, dtype, len).cloned(), + Self::View(v) => v.child(idx, dtype, len).map(Array::View), } } @@ -229,13 +235,6 @@ pub trait ArrayTrait: + ArrayStatisticsCompute + ToArrayData { - fn len(&self) -> usize; - - fn is_empty(&self) -> bool { - // TODO(ngates): remove this default impl to encourage explicit implementation - self.len() == 0 - } - fn nbytes(&self) -> usize { let mut visitor = NBytesVisitor(0); self.accept(&mut visitor).unwrap(); diff --git a/vortex-array/src/typed.rs b/vortex-array/src/typed.rs index e82cf6b233..d7e5a74f7d 100644 --- a/vortex-array/src/typed.rs +++ b/vortex-array/src/typed.rs @@ -16,6 +16,7 @@ pub struct TypedArray { impl TypedArray { pub fn try_from_parts( dtype: DType, + len: usize, metadata: D::Metadata, buffer: Option, children: Arc<[Array]>, @@ -24,6 +25,7 @@ impl TypedArray { let array = Array::Data(ArrayData::try_new( D::ENCODING, dtype, + len, Arc::new(metadata.clone()), buffer, children, diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 09c82584a1..638632972a 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -62,7 +62,7 @@ impl Validity { Self::AllInvalid => Ok(ValidityMetadata::AllInvalid), Self::Array(a) => { // We force the caller to validate the length here. - let validity_len = a.with_dyn(|a| a.len()); + let validity_len = a.len(); if validity_len != length { vortex_bail!( "Validity array length {} doesn't match array length {}", diff --git a/vortex-array/src/view.rs b/vortex-array/src/view.rs index d1765c0f58..30075ea541 100644 --- a/vortex-array/src/view.rs +++ b/vortex-array/src/view.rs @@ -20,6 +20,7 @@ use crate::{Array, IntoArray, ToArray}; pub struct ArrayView { encoding: EncodingRef, dtype: DType, + len: usize, flatbuffer: Buffer, flatbuffer_loc: usize, // TODO(ngates): create an RC'd vector that can be lazily sliced. @@ -45,6 +46,7 @@ impl ArrayView { pub fn try_new( ctx: Arc, dtype: DType, + len: usize, flatbuffer: Buffer, flatbuffer_init: F, buffers: Vec, @@ -69,6 +71,7 @@ impl ArrayView { let view = Self { encoding, dtype, + len, flatbuffer, flatbuffer_loc, buffers, @@ -98,12 +101,20 @@ impl ArrayView { &self.dtype } + pub fn len(&self) -> usize { + self.len + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + pub fn metadata(&self) -> Option<&[u8]> { self.flatbuffer().metadata().map(|m| m.bytes()) } // TODO(ngates): should we separate self and DType lifetimes? Should DType be cloned? - pub fn child(&self, idx: usize, dtype: &DType) -> Option { + pub fn child(&self, idx: usize, dtype: &DType, len: usize) -> Option { let child = self.array_child(idx)?; let flatbuffer_loc = child._tab.loc(); @@ -123,6 +134,7 @@ impl ArrayView { Some(Self { encoding, dtype: dtype.clone(), + len, flatbuffer: self.flatbuffer.clone(), flatbuffer_loc, buffers: self.buffers[buffer_offset..][0..buffer_count].to_vec(), diff --git a/vortex-ipc/flatbuffers/message.fbs b/vortex-ipc/flatbuffers/message.fbs index 01ca8fef2f..8836537d23 100644 --- a/vortex-ipc/flatbuffers/message.fbs +++ b/vortex-ipc/flatbuffers/message.fbs @@ -23,6 +23,7 @@ struct Buffer { table Chunk { array: vortex.array.Array; + length: uint64; buffers: [Buffer]; buffer_size: uint64; } diff --git a/vortex-ipc/src/chunked_reader/take_rows.rs b/vortex-ipc/src/chunked_reader/take_rows.rs index 93d9a33f48..562e515899 100644 --- a/vortex-ipc/src/chunked_reader/take_rows.rs +++ b/vortex-ipc/src/chunked_reader/take_rows.rs @@ -222,7 +222,7 @@ mod test { use itertools::Itertools; use vortex::array::chunked::ChunkedArray; use vortex::array::primitive::PrimitiveArray; - use vortex::{ArrayTrait, Context, IntoArray, IntoCanonical}; + use vortex::{Context, IntoArray, IntoCanonical}; use vortex_buffer::Buffer; use vortex_dtype::PType; use vortex_error::VortexResult; diff --git a/vortex-ipc/src/message_reader.rs b/vortex-ipc/src/message_reader.rs index 1b1273470a..df07f1642e 100644 --- a/vortex-ipc/src/message_reader.rs +++ b/vortex-ipc/src/message_reader.rs @@ -159,9 +159,10 @@ impl MessageReader { ctx: Arc, dtype: DType, ) -> VortexResult> { - if self.peek().and_then(|m| m.header_as_chunk()).is_none() { - return Ok(None); - } + let length = match self.peek().and_then(|m| m.header_as_chunk()) { + None => return Ok(None), + Some(chunk) => chunk.length() as usize, + }; let buffers = self.read_buffers().await?; let flatbuffer = self.next_raw().await?; @@ -169,6 +170,7 @@ impl MessageReader { let view = ArrayView::try_new( ctx, dtype, + length, flatbuffer, |flatbuffer| { unsafe { root_unchecked::(flatbuffer) } diff --git a/vortex-ipc/src/messages.rs b/vortex-ipc/src/messages.rs index 8cbc25dfb9..5462b823da 100644 --- a/vortex-ipc/src/messages.rs +++ b/vortex-ipc/src/messages.rs @@ -70,6 +70,8 @@ impl<'a> WriteFlatBuffer for IPCChunk<'a> { let array_data = self.0; let array = Some(IPCArray(array_data).write_flatbuffer(fbb)); + let length = array_data.len() as u64; + // Walk the ColumnData depth-first to compute the buffer offsets. let mut buffers = vec![]; let mut offset = 0; @@ -90,6 +92,7 @@ impl<'a> WriteFlatBuffer for IPCChunk<'a> { fbb, &fb::ChunkArgs { array, + length, buffers, buffer_size: offset as u64, }, diff --git a/vortex-sampling-compressor/src/compressors/bitpacked.rs b/vortex-sampling-compressor/src/compressors/bitpacked.rs index d16edfca08..4c5e9461f6 100644 --- a/vortex-sampling-compressor/src/compressors/bitpacked.rs +++ b/vortex-sampling-compressor/src/compressors/bitpacked.rs @@ -1,6 +1,6 @@ use vortex::array::primitive::PrimitiveArray; use vortex::stats::ArrayStatistics; -use vortex::{Array, ArrayDef, ArrayTrait, IntoArray}; +use vortex::{Array, ArrayDef, IntoArray}; use vortex_error::{vortex_err, VortexResult}; use vortex_fastlanes::{ bitpack, bitpack_patches, count_exceptions, find_best_bit_width, BitPacked, BitPackedArray, diff --git a/vortex-sampling-compressor/src/compressors/delta.rs b/vortex-sampling-compressor/src/compressors/delta.rs index d5bf0f1ce1..507c74703e 100644 --- a/vortex-sampling-compressor/src/compressors/delta.rs +++ b/vortex-sampling-compressor/src/compressors/delta.rs @@ -47,7 +47,7 @@ impl EncodingCompressor for DeltaCompressor { .compress(deltas.array(), like.as_ref().and_then(|l| l.child(1)))?; Ok(CompressedArray::new( - DeltaArray::try_new(array.len(), bases.array, deltas.array, validity)?.into_array(), + DeltaArray::try_new(bases.array, deltas.array, validity)?.into_array(), Some(CompressionTree::new(self, vec![bases.path, deltas.path])), )) } diff --git a/vortex-sampling-compressor/src/compressors/sparse.rs b/vortex-sampling-compressor/src/compressors/sparse.rs index 2206b46b02..7c099d15f1 100644 --- a/vortex-sampling-compressor/src/compressors/sparse.rs +++ b/vortex-sampling-compressor/src/compressors/sparse.rs @@ -1,5 +1,5 @@ use vortex::array::sparse::{Sparse, SparseArray}; -use vortex::{Array, ArrayDef, ArrayTrait, IntoArray}; +use vortex::{Array, ArrayDef, IntoArray}; use vortex_error::VortexResult; use crate::compressors::{CompressedArray, CompressionTree, EncodingCompressor}; diff --git a/vortex-sampling-compressor/src/lib.rs b/vortex-sampling-compressor/src/lib.rs index 344b0d2dc7..a948d92932 100644 --- a/vortex-sampling-compressor/src/lib.rs +++ b/vortex-sampling-compressor/src/lib.rs @@ -8,7 +8,7 @@ use vortex::array::struct_::{Struct, StructArray}; use vortex::compress::{check_dtype_unchanged, check_validity_unchanged, CompressionStrategy}; use vortex::compute::slice::slice; use vortex::validity::Validity; -use vortex::{Array, ArrayDType, ArrayDef, ArrayTrait, IntoArray, IntoCanonical}; +use vortex::{Array, ArrayDType, ArrayDef, IntoArray, IntoCanonical}; use vortex_error::VortexResult; use crate::compressors::alp::ALPCompressor;