From 9a78dd486b83e35d991427545e02fa1d006e26b4 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 28 Nov 2024 16:55:23 -0500 Subject: [PATCH] Variants VTable (#1501) --- encodings/alp/src/alp/array.rs | 8 +- encodings/alp/src/alp_rd/variants.rs | 14 +- encodings/bytebool/src/array.rs | 8 +- encodings/datetime-parts/src/array.rs | 11 +- encodings/dict/src/variants.rs | 24 +- encodings/fastlanes/src/bitpacking/mod.rs | 11 +- encodings/fastlanes/src/delta/mod.rs | 8 +- encodings/fastlanes/src/for/mod.rs | 8 +- encodings/fsst/src/array.rs | 12 +- encodings/roaring/src/boolean/mod.rs | 8 +- encodings/roaring/src/integer/mod.rs | 11 +- encodings/runend-bool/src/array.rs | 8 +- encodings/runend/src/array.rs | 15 +- encodings/zigzag/src/array.rs | 11 +- vortex-array/src/array/bool/mod.rs | 8 +- vortex-array/src/array/chunked/canonical.rs | 10 +- vortex-array/src/array/chunked/variants.rs | 68 +++--- vortex-array/src/array/constant/variants.rs | 48 ++-- vortex-array/src/array/extension/mod.rs | 11 +- vortex-array/src/array/null/mod.rs | 8 +- vortex-array/src/array/primitive/mod.rs | 11 +- vortex-array/src/array/sparse/variants.rs | 66 +++--- vortex-array/src/array/struct_/mod.rs | 8 +- vortex-array/src/array/varbin/variants.rs | 16 +- vortex-array/src/array/varbinview/variants.rs | 16 +- vortex-array/src/data/mod.rs | 63 +++-- vortex-array/src/encoding/mod.rs | 2 + vortex-array/src/encoding/opaque.rs | 3 + vortex-array/src/lib.rs | 2 - vortex-array/src/variants.rs | 221 ++++++++++++------ vortex-datafusion/src/memory/plans.rs | 9 +- vortex-datafusion/src/memory/statistics.rs | 81 +++---- vortex-expr/src/select.rs | 56 +++-- vortex-file/src/pruning.rs | 17 +- vortex-file/src/read/layouts/columnar.rs | 32 ++- vortex-file/src/read/metadata.rs | 122 +++++----- vortex-file/src/tests.rs | 42 +--- vortex-sampling-compressor/tests/smoketest.rs | 3 +- 38 files changed, 586 insertions(+), 494 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 3938fe785b..91e1f67d3c 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -5,7 +5,7 @@ use vortex_array::array::PrimitiveArray; use vortex_array::encoding::ids; use vortex_array::stats::StatisticsVTable; use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; +use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -124,9 +124,9 @@ impl ALPArray { impl ArrayTrait for ALPArray {} -impl ArrayVariants for ALPArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for ALPEncoding { + fn as_primitive_array<'a>(&self, array: &'a ALPArray) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/encodings/alp/src/alp_rd/variants.rs b/encodings/alp/src/alp_rd/variants.rs index 521800b4f6..5aeed5adc0 100644 --- a/encodings/alp/src/alp_rd/variants.rs +++ b/encodings/alp/src/alp_rd/variants.rs @@ -1,14 +1,10 @@ -use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; +use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable}; -use crate::ALPRDArray; +use crate::{ALPRDArray, ALPRDEncoding}; -impl ArrayVariants for ALPRDArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) - } - - fn as_primitive_array_unchecked(&self) -> &dyn PrimitiveArrayTrait { - self +impl VariantsVTable for ALPRDEncoding { + fn as_primitive_array<'a>(&self, array: &'a ALPRDArray) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index e4518121c8..9f4c5f4fea 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -8,7 +8,7 @@ use vortex_array::array::BoolArray; use vortex_array::encoding::ids; use vortex_array::stats::StatsSet; use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, BoolArrayTrait}; +use vortex_array::variants::{BoolArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical}; use vortex_buffer::Buffer; @@ -84,9 +84,9 @@ impl ByteBoolArray { impl ArrayTrait for ByteBoolArray {} -impl ArrayVariants for ByteBoolArray { - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - Some(self) +impl VariantsVTable for ByteBoolEncoding { + fn as_bool_array<'a>(&self, array: &'a ByteBoolArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } } diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index 1c24369b25..cc4d672426 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -6,7 +6,7 @@ use vortex_array::compute::try_cast; use vortex_array::encoding::ids; use vortex_array::stats::{Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, ExtensionArrayTrait}; +use vortex_array::variants::{ExtensionArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -114,9 +114,12 @@ impl DateTimePartsArray { impl ArrayTrait for DateTimePartsArray {} -impl ArrayVariants for DateTimePartsArray { - fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { - Some(self) +impl VariantsVTable for DateTimePartsEncoding { + fn as_extension_array<'a>( + &self, + array: &'a DateTimePartsArray, + ) -> Option<&'a dyn ExtensionArrayTrait> { + Some(array) } } diff --git a/encodings/dict/src/variants.rs b/encodings/dict/src/variants.rs index 5725a07370..1e4fc15a10 100644 --- a/encodings/dict/src/variants.rs +++ b/encodings/dict/src/variants.rs @@ -1,26 +1,24 @@ use vortex_array::variants::{ - ArrayVariants, BinaryArrayTrait, BoolArrayTrait, PrimitiveArrayTrait, Utf8ArrayTrait, + BinaryArrayTrait, BoolArrayTrait, PrimitiveArrayTrait, Utf8ArrayTrait, VariantsVTable, }; -use vortex_array::ArrayDType; -use vortex_dtype::DType; -use crate::DictArray; +use crate::{DictArray, DictEncoding}; -impl ArrayVariants for DictArray { - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - matches!(self.dtype(), DType::Bool(..)).then_some(self) +impl VariantsVTable for DictEncoding { + fn as_bool_array<'a>(&self, array: &'a DictArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - matches!(self.dtype(), DType::Primitive(..)).then_some(self) + fn as_primitive_array<'a>(&self, array: &'a DictArray) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { - matches!(self.dtype(), DType::Utf8(..)).then_some(self) + fn as_utf8_array<'a>(&self, array: &'a DictArray) -> Option<&'a dyn Utf8ArrayTrait> { + Some(array) } - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - matches!(self.dtype(), DType::Binary(..)).then_some(self) + fn as_binary_array<'a>(&self, array: &'a DictArray) -> Option<&'a dyn BinaryArrayTrait> { + Some(array) } } diff --git a/encodings/fastlanes/src/bitpacking/mod.rs b/encodings/fastlanes/src/bitpacking/mod.rs index 2ca9be458f..3cd8171332 100644 --- a/encodings/fastlanes/src/bitpacking/mod.rs +++ b/encodings/fastlanes/src/bitpacking/mod.rs @@ -8,7 +8,7 @@ use vortex_array::array::{PrimitiveArray, SparseArray}; use vortex_array::encoding::ids; use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; +use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical, @@ -232,9 +232,12 @@ impl StatisticsVTable for BitPackedEncoding {} impl ArrayTrait for BitPackedArray {} -impl ArrayVariants for BitPackedArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for BitPackedEncoding { + fn as_primitive_array<'a>( + &self, + array: &'a BitPackedArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/encodings/fastlanes/src/delta/mod.rs b/encodings/fastlanes/src/delta/mod.rs index 58b17bb381..4c15f888e3 100644 --- a/encodings/fastlanes/src/delta/mod.rs +++ b/encodings/fastlanes/src/delta/mod.rs @@ -6,7 +6,7 @@ use vortex_array::array::PrimitiveArray; use vortex_array::encoding::ids; use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; +use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -218,9 +218,9 @@ impl DeltaArray { impl ArrayTrait for DeltaArray {} -impl ArrayVariants for DeltaArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for DeltaEncoding { + fn as_primitive_array<'a>(&self, array: &'a DeltaArray) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index 7b8a2488c8..6292b4638c 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use vortex_array::encoding::ids; use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; +use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical, @@ -104,9 +104,9 @@ impl StatisticsVTable for FoREncoding {} impl ArrayTrait for FoRArray {} -impl ArrayVariants for FoRArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for FoREncoding { + fn as_primitive_array<'a>(&self, array: &'a FoRArray) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 5cec37e609..44fc5d197a 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -7,7 +7,7 @@ use vortex_array::array::{VarBin, VarBinArray}; use vortex_array::encoding::ids; use vortex_array::stats::{StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait}; +use vortex_array::variants::{BinaryArrayTrait, Utf8ArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayDef, ArrayLen, ArrayTrait, IntoCanonical, @@ -212,13 +212,13 @@ impl ValidityVTable for FSSTEncoding { } } -impl ArrayVariants for FSSTArray { - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { - Some(self) +impl VariantsVTable for FSSTEncoding { + fn as_utf8_array<'a>(&self, array: &'a FSSTArray) -> Option<&'a dyn Utf8ArrayTrait> { + Some(array) } - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - Some(self) + fn as_binary_array<'a>(&self, array: &'a FSSTArray) -> Option<&'a dyn BinaryArrayTrait> { + Some(array) } } diff --git a/encodings/roaring/src/boolean/mod.rs b/encodings/roaring/src/boolean/mod.rs index 3b82db257b..7f191d4880 100644 --- a/encodings/roaring/src/boolean/mod.rs +++ b/encodings/roaring/src/boolean/mod.rs @@ -10,7 +10,7 @@ use vortex_array::array::BoolArray; use vortex_array::encoding::ids; use vortex_array::stats::StatsSet; use vortex_array::validity::{LogicalValidity, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, BoolArrayTrait}; +use vortex_array::variants::{BoolArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical, @@ -84,9 +84,9 @@ impl RoaringBoolArray { impl ArrayTrait for RoaringBoolArray {} -impl ArrayVariants for RoaringBoolArray { - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - Some(self) +impl VariantsVTable for RoaringBoolEncoding { + fn as_bool_array<'a>(&self, array: &'a RoaringBoolArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } } diff --git a/encodings/roaring/src/integer/mod.rs b/encodings/roaring/src/integer/mod.rs index b749c7a7ff..b40ffd3f43 100644 --- a/encodings/roaring/src/integer/mod.rs +++ b/encodings/roaring/src/integer/mod.rs @@ -9,7 +9,7 @@ use vortex_array::compute::try_cast; use vortex_array::encoding::ids; use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{LogicalValidity, Validity, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; +use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType as _, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -99,9 +99,12 @@ impl RoaringIntArray { impl ArrayTrait for RoaringIntArray {} -impl ArrayVariants for RoaringIntArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for RoaringIntEncoding { + fn as_primitive_array<'a>( + &self, + array: &'a RoaringIntArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/encodings/runend-bool/src/array.rs b/encodings/runend-bool/src/array.rs index 552821721d..970ba769fc 100644 --- a/encodings/runend-bool/src/array.rs +++ b/encodings/runend-bool/src/array.rs @@ -6,7 +6,7 @@ use vortex_array::compute::{scalar_at, search_sorted, SearchSortedSide}; use vortex_array::encoding::ids; use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, BoolArrayTrait, PrimitiveArrayTrait}; +use vortex_array::variants::{BoolArrayTrait, PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -178,9 +178,9 @@ pub(crate) fn value_at_index(idx: usize, start: bool) -> bool { impl BoolArrayTrait for RunEndBoolArray {} -impl ArrayVariants for RunEndBoolArray { - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - Some(self) +impl VariantsVTable for RunEndBoolEncoding { + fn as_bool_array<'a>(&self, array: &'a RunEndBoolArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } } diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 415ba99109..8230a91962 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -8,7 +8,7 @@ use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ ArrayValidity, LogicalValidity, Validity, ValidityMetadata, ValidityVTable, }; -use vortex_array::variants::{ArrayVariants, BoolArrayTrait, PrimitiveArrayTrait}; +use vortex_array::variants::{BoolArrayTrait, PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -187,9 +187,16 @@ impl RunEndArray { impl ArrayTrait for RunEndArray {} -impl ArrayVariants for RunEndArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for RunEndEncoding { + fn as_bool_array<'a>(&self, array: &'a RunEndArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) + } + + fn as_primitive_array<'a>( + &self, + array: &'a RunEndArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 63a8b022a4..c3af17ee4f 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -5,7 +5,7 @@ use vortex_array::array::PrimitiveArray; use vortex_array::encoding::ids; use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; -use vortex_array::variants::{ArrayVariants, PrimitiveArrayTrait}; +use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable}; use vortex_array::visitor::{ArrayVisitor, VisitorVTable}; use vortex_array::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayVariant, @@ -71,9 +71,12 @@ impl ZigZagArray { impl ArrayTrait for ZigZagArray {} -impl ArrayVariants for ZigZagArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for ZigZagEncoding { + fn as_primitive_array<'a>( + &self, + array: &'a ZigZagArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 5e2db6a519..8c37be6f15 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -13,7 +13,7 @@ use vortex_error::{vortex_bail, VortexExpect as _, VortexResult}; use crate::encoding::ids; use crate::stats::StatsSet; use crate::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable}; -use crate::variants::{ArrayVariants, BoolArrayTrait}; +use crate::variants::{BoolArrayTrait, VariantsVTable}; use crate::visitor::{ArrayVisitor, VisitorVTable}; use crate::{ impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical, @@ -178,9 +178,9 @@ impl BoolArray { impl ArrayTrait for BoolArray {} -impl ArrayVariants for BoolArray { - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - Some(self) +impl VariantsVTable for BoolEncoding { + fn as_bool_array<'a>(&self, array: &'a BoolArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs index f837f6455d..dc7ba9212c 100644 --- a/vortex-array/src/array/chunked/canonical.rs +++ b/vortex-array/src/array/chunked/canonical.rs @@ -1,6 +1,6 @@ use arrow_buffer::{BooleanBufferBuilder, Buffer, MutableBuffer, ScalarBuffer}; use vortex_dtype::{DType, PType, StructDType}; -use vortex_error::{vortex_bail, vortex_err, ErrString, VortexResult}; +use vortex_error::{vortex_bail, vortex_err, ErrString, VortexExpect, VortexResult}; use crate::array::chunked::ChunkedArray; use crate::array::extension::ExtensionArray; @@ -131,11 +131,11 @@ fn swizzle_struct_chunks( let mut field_arrays = Vec::new(); for (field_idx, field_dtype) in struct_dtype.dtypes().iter().enumerate() { - let field_chunks = chunks.iter().map(|c| c.with_dyn(|d| - d.as_struct_array_unchecked() + let field_chunks = chunks.iter().map(|c| c.as_struct_array() + .vortex_expect("Chunk was not a StructArray") .field(field_idx) - .ok_or_else(|| vortex_err!("All chunks must have same dtype; missing field at index {}, current chunk dtype: {}", field_idx, c.dtype())), - )).collect::>>()?; + .ok_or_else(|| vortex_err!("All chunks must have same dtype; missing field at index {}, current chunk dtype: {}", field_idx, c.dtype())) + ).collect::>>()?; let field_array = ChunkedArray::try_new(field_chunks, field_dtype.clone())?; field_arrays.push(field_array.into_array()); } diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index 0efd383d8d..f39c94e59c 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -1,46 +1,53 @@ use vortex_dtype::field::Field; use vortex_dtype::DType; -use vortex_error::{vortex_err, vortex_panic, VortexResult}; +use vortex_error::{vortex_err, vortex_panic, VortexExpect, VortexResult}; use crate::array::chunked::ChunkedArray; +use crate::array::ChunkedEncoding; use crate::variants::{ - ArrayVariants, BinaryArrayTrait, BoolArrayTrait, ExtensionArrayTrait, ListArrayTrait, - NullArrayTrait, PrimitiveArrayTrait, StructArrayTrait, Utf8ArrayTrait, + BinaryArrayTrait, BoolArrayTrait, ExtensionArrayTrait, ListArrayTrait, NullArrayTrait, + PrimitiveArrayTrait, StructArrayTrait, Utf8ArrayTrait, VariantsVTable, }; use crate::{ArrayDType, ArrayData, IntoArrayData}; /// Chunked arrays support all DTypes -impl ArrayVariants for ChunkedArray { - fn as_null_array(&self) -> Option<&dyn NullArrayTrait> { - matches!(self.dtype(), DType::Null).then_some(self) +impl VariantsVTable for ChunkedEncoding { + fn as_null_array<'a>(&self, array: &'a ChunkedArray) -> Option<&'a dyn NullArrayTrait> { + Some(array) } - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - matches!(self.dtype(), DType::Bool(_)).then_some(self) + fn as_bool_array<'a>(&self, array: &'a ChunkedArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - matches!(self.dtype(), DType::Primitive(..)).then_some(self) + fn as_primitive_array<'a>( + &self, + array: &'a ChunkedArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { - matches!(self.dtype(), DType::Utf8(_)).then_some(self) + fn as_utf8_array<'a>(&self, array: &'a ChunkedArray) -> Option<&'a dyn Utf8ArrayTrait> { + Some(array) } - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - matches!(self.dtype(), DType::Binary(_)).then_some(self) + fn as_binary_array<'a>(&self, array: &'a ChunkedArray) -> Option<&'a dyn BinaryArrayTrait> { + Some(array) } - fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { - matches!(self.dtype(), DType::Struct(..)).then_some(self) + fn as_struct_array<'a>(&self, array: &'a ChunkedArray) -> Option<&'a dyn StructArrayTrait> { + Some(array) } - fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { - matches!(self.dtype(), DType::List(..)).then_some(self) + fn as_list_array<'a>(&self, array: &'a ChunkedArray) -> Option<&'a dyn ListArrayTrait> { + Some(array) } - fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { - matches!(self.dtype(), DType::Extension(..)).then_some(self) + fn as_extension_array<'a>( + &self, + array: &'a ChunkedArray, + ) -> Option<&'a dyn ExtensionArrayTrait> { + Some(array) } } @@ -58,7 +65,7 @@ impl StructArrayTrait for ChunkedArray { fn field(&self, idx: usize) -> Option { let mut chunks = Vec::with_capacity(self.nchunks()); for chunk in self.chunks() { - chunks.push(chunk.with_dyn(|a| a.as_struct_array().and_then(|s| s.field(idx)))?); + chunks.push(chunk.as_struct_array().and_then(|s| s.field(idx))?); } let projected_dtype = self.dtype().as_struct().and_then(|s| s.dtypes().get(idx))?; @@ -77,11 +84,12 @@ impl StructArrayTrait for ChunkedArray { fn project(&self, projection: &[Field]) -> VortexResult { let mut chunks = Vec::with_capacity(self.nchunks()); for chunk in self.chunks() { - chunks.push(chunk.with_dyn(|a| { - a.as_struct_array() + chunks.push( + chunk + .as_struct_array() .ok_or_else(|| vortex_err!("Chunk was not a StructArray"))? - .project(projection) - })?); + .project(projection)?, + ); } let projected_dtype = self @@ -101,10 +109,12 @@ impl ListArrayTrait for ChunkedArray {} impl ExtensionArrayTrait for ChunkedArray { fn storage_data(&self) -> ArrayData { - ChunkedArray::from_iter( - self.chunks() - .map(|chunk| chunk.with_dyn(|a| a.as_extension_array_unchecked().storage_data())), - ) + ChunkedArray::from_iter(self.chunks().map(|chunk| { + chunk + .as_extension_array() + .vortex_expect("Expected extension array") + .storage_data() + })) .into_array() } } diff --git a/vortex-array/src/array/constant/variants.rs b/vortex-array/src/array/constant/variants.rs index 42f926d6a5..c127194ead 100644 --- a/vortex-array/src/array/constant/variants.rs +++ b/vortex-array/src/array/constant/variants.rs @@ -1,49 +1,55 @@ use vortex_dtype::field::Field; -use vortex_dtype::DType; use vortex_error::{VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::Scalar; use crate::array::constant::ConstantArray; +use crate::array::ConstantEncoding; use crate::iter::Accessor; use crate::validity::{ArrayValidity, Validity}; use crate::variants::{ - ArrayVariants, BinaryArrayTrait, BoolArrayTrait, ExtensionArrayTrait, ListArrayTrait, - NullArrayTrait, PrimitiveArrayTrait, StructArrayTrait, Utf8ArrayTrait, + BinaryArrayTrait, BoolArrayTrait, ExtensionArrayTrait, ListArrayTrait, NullArrayTrait, + PrimitiveArrayTrait, StructArrayTrait, Utf8ArrayTrait, VariantsVTable, }; -use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData}; +use crate::{ArrayData, ArrayLen, IntoArrayData}; /// Constant arrays support all DTypes -impl ArrayVariants for ConstantArray { - fn as_null_array(&self) -> Option<&dyn NullArrayTrait> { - matches!(self.dtype(), DType::Null).then_some(self) +impl VariantsVTable for ConstantEncoding { + fn as_null_array<'a>(&self, array: &'a ConstantArray) -> Option<&'a dyn NullArrayTrait> { + Some(array) } - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - matches!(self.dtype(), DType::Bool(_)).then_some(self) + fn as_bool_array<'a>(&self, array: &'a ConstantArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - matches!(self.dtype(), DType::Primitive(..)).then_some(self) + fn as_primitive_array<'a>( + &self, + array: &'a ConstantArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { - matches!(self.dtype(), DType::Utf8(_)).then_some(self) + fn as_utf8_array<'a>(&self, array: &'a ConstantArray) -> Option<&'a dyn Utf8ArrayTrait> { + Some(array) } - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - matches!(self.dtype(), DType::Binary(_)).then_some(self) + fn as_binary_array<'a>(&self, array: &'a ConstantArray) -> Option<&'a dyn BinaryArrayTrait> { + Some(array) } - fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { - matches!(self.dtype(), DType::Struct(..)).then_some(self) + fn as_struct_array<'a>(&self, array: &'a ConstantArray) -> Option<&'a dyn StructArrayTrait> { + Some(array) } - fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { - matches!(self.dtype(), DType::List(..)).then_some(self) + fn as_list_array<'a>(&self, array: &'a ConstantArray) -> Option<&'a dyn ListArrayTrait> { + Some(array) } - fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { - matches!(self.dtype(), DType::Extension(..)).then_some(self) + fn as_extension_array<'a>( + &self, + array: &'a ConstantArray, + ) -> Option<&'a dyn ExtensionArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/extension/mod.rs b/vortex-array/src/array/extension/mod.rs index 6da478afa8..845d7b3d8e 100644 --- a/vortex-array/src/array/extension/mod.rs +++ b/vortex-array/src/array/extension/mod.rs @@ -9,7 +9,7 @@ use vortex_error::{VortexExpect as _, VortexResult}; use crate::encoding::ids; use crate::stats::{ArrayStatistics as _, Stat, StatisticsVTable, StatsSet}; use crate::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; -use crate::variants::{ArrayVariants, ExtensionArrayTrait}; +use crate::variants::{ExtensionArrayTrait, VariantsVTable}; use crate::visitor::{ArrayVisitor, VisitorVTable}; use crate::{impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical}; @@ -59,9 +59,12 @@ impl ExtensionArray { impl ArrayTrait for ExtensionArray {} -impl ArrayVariants for ExtensionArray { - fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { - Some(self) +impl VariantsVTable for ExtensionEncoding { + fn as_extension_array<'a>( + &self, + array: &'a ExtensionArray, + ) -> Option<&'a dyn ExtensionArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/null/mod.rs b/vortex-array/src/array/null/mod.rs index 7032123068..a4087d45a2 100644 --- a/vortex-array/src/array/null/mod.rs +++ b/vortex-array/src/array/null/mod.rs @@ -8,7 +8,7 @@ use crate::encoding::ids; use crate::nbytes::ArrayNBytes; use crate::stats::{Stat, StatisticsVTable, StatsSet}; use crate::validity::{LogicalValidity, Validity, ValidityVTable}; -use crate::variants::{ArrayVariants, NullArrayTrait}; +use crate::variants::{NullArrayTrait, VariantsVTable}; use crate::visitor::{ArrayVisitor, VisitorVTable}; use crate::{impl_encoding, ArrayLen, ArrayTrait, Canonical, IntoCanonical}; @@ -72,9 +72,9 @@ impl VisitorVTable for NullEncoding { impl ArrayTrait for NullArray {} -impl ArrayVariants for NullArray { - fn as_null_array(&self) -> Option<&dyn NullArrayTrait> { - Some(self) +impl VariantsVTable for NullEncoding { + fn as_null_array<'a>(&self, array: &'a NullArray) -> Option<&'a dyn NullArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 856dd9b86d..2497bc1858 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -16,7 +16,7 @@ use crate::encoding::ids; use crate::iter::Accessor; use crate::stats::StatsSet; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata, ValidityVTable}; -use crate::variants::{ArrayVariants, PrimitiveArrayTrait}; +use crate::variants::{PrimitiveArrayTrait, VariantsVTable}; use crate::visitor::{ArrayVisitor, VisitorVTable}; use crate::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -200,9 +200,12 @@ impl PrimitiveArray { impl ArrayTrait for PrimitiveArray {} -impl ArrayVariants for PrimitiveArray { - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - Some(self) +impl VariantsVTable for PrimitiveEncoding { + fn as_primitive_array<'a>( + &self, + array: &'a PrimitiveArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/sparse/variants.rs b/vortex-array/src/array/sparse/variants.rs index 62f65d55ba..b6d7520003 100644 --- a/vortex-array/src/array/sparse/variants.rs +++ b/vortex-array/src/array/sparse/variants.rs @@ -1,47 +1,53 @@ use vortex_dtype::field::Field; -use vortex_dtype::DType; use vortex_error::{vortex_err, VortexExpect, VortexResult}; use vortex_scalar::StructScalar; use crate::array::sparse::SparseArray; +use crate::array::SparseEncoding; use crate::variants::{ - ArrayVariants, BinaryArrayTrait, BoolArrayTrait, ExtensionArrayTrait, ListArrayTrait, - NullArrayTrait, PrimitiveArrayTrait, StructArrayTrait, Utf8ArrayTrait, + BinaryArrayTrait, BoolArrayTrait, ExtensionArrayTrait, ListArrayTrait, NullArrayTrait, + PrimitiveArrayTrait, StructArrayTrait, Utf8ArrayTrait, VariantsVTable, }; -use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData}; +use crate::{ArrayData, ArrayLen, IntoArrayData}; /// Sparse arrays support all DTypes -impl ArrayVariants for SparseArray { - fn as_null_array(&self) -> Option<&dyn NullArrayTrait> { - matches!(self.dtype(), DType::Null).then_some(self) +impl VariantsVTable for SparseEncoding { + fn as_null_array<'a>(&self, array: &'a SparseArray) -> Option<&'a dyn NullArrayTrait> { + Some(array) } - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { - matches!(self.dtype(), DType::Bool(_)).then_some(self) + fn as_bool_array<'a>(&self, array: &'a SparseArray) -> Option<&'a dyn BoolArrayTrait> { + Some(array) } - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { - matches!(self.dtype(), DType::Primitive(..)).then_some(self) + fn as_primitive_array<'a>( + &self, + array: &'a SparseArray, + ) -> Option<&'a dyn PrimitiveArrayTrait> { + Some(array) } - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { - matches!(self.dtype(), DType::Utf8(_)).then_some(self) + fn as_utf8_array<'a>(&self, array: &'a SparseArray) -> Option<&'a dyn Utf8ArrayTrait> { + Some(array) } - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - matches!(self.dtype(), DType::Binary(_)).then_some(self) + fn as_binary_array<'a>(&self, array: &'a SparseArray) -> Option<&'a dyn BinaryArrayTrait> { + Some(array) } - fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { - matches!(self.dtype(), DType::Struct(..)).then_some(self) + fn as_struct_array<'a>(&self, array: &'a SparseArray) -> Option<&'a dyn StructArrayTrait> { + Some(array) } - fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { - matches!(self.dtype(), DType::List(..)).then_some(self) + fn as_list_array<'a>(&self, array: &'a SparseArray) -> Option<&'a dyn ListArrayTrait> { + Some(array) } - fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { - matches!(self.dtype(), DType::Extension(..)).then_some(self) + fn as_extension_array<'a>( + &self, + array: &'a SparseArray, + ) -> Option<&'a dyn ExtensionArrayTrait> { + Some(array) } } @@ -57,9 +63,7 @@ impl BinaryArrayTrait for SparseArray {} impl StructArrayTrait for SparseArray { fn field(&self, idx: usize) -> Option { - let values = self - .values() - .with_dyn(|s| s.as_struct_array().and_then(|s| s.field(idx)))?; + let values = self.values().as_struct_array().and_then(|s| s.field(idx))?; let scalar = StructScalar::try_from(&self.fill_scalar()) .ok()? .field_by_idx(idx)?; @@ -78,11 +82,11 @@ impl StructArrayTrait for SparseArray { } fn project(&self, projection: &[Field]) -> VortexResult { - let values = self.values().with_dyn(|s| { - s.as_struct_array() - .ok_or_else(|| vortex_err!("Chunk was not a StructArray"))? - .project(projection) - })?; + let values = self + .values() + .as_struct_array() + .ok_or_else(|| vortex_err!("Chunk was not a StructArray"))? + .project(projection)?; let scalar = StructScalar::try_from(&self.fill_scalar())?.project(projection)?; SparseArray::try_new_with_offset( @@ -103,7 +107,9 @@ impl ExtensionArrayTrait for SparseArray { SparseArray::try_new_with_offset( self.indices(), self.values() - .with_dyn(|a| a.as_extension_array_unchecked().storage_data()), + .as_extension_array() + .vortex_expect("Expected extension array") + .storage_data(), self.len(), self.indices_offset(), self.fill_scalar(), diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index e9da403be9..d79ccfcdf4 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -8,7 +8,7 @@ use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexExpect as _, Vor use crate::encoding::ids; use crate::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet}; use crate::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable}; -use crate::variants::{ArrayVariants, StructArrayTrait}; +use crate::variants::{StructArrayTrait, VariantsVTable}; use crate::visitor::{ArrayVisitor, VisitorVTable}; use crate::{ impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, @@ -143,9 +143,9 @@ impl StructArray { impl ArrayTrait for StructArray {} -impl ArrayVariants for StructArray { - fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { - Some(self) +impl VariantsVTable for StructEncoding { + fn as_struct_array<'a>(&self, array: &'a StructArray) -> Option<&'a dyn StructArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/varbin/variants.rs b/vortex-array/src/array/varbin/variants.rs index dadd9ea99f..b78132ec9f 100644 --- a/vortex-array/src/array/varbin/variants.rs +++ b/vortex-array/src/array/varbin/variants.rs @@ -1,16 +1,14 @@ -use vortex_dtype::DType; - use crate::array::varbin::VarBinArray; -use crate::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait}; -use crate::ArrayDType; +use crate::array::VarBinEncoding; +use crate::variants::{BinaryArrayTrait, Utf8ArrayTrait, VariantsVTable}; -impl ArrayVariants for VarBinArray { - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { - matches!(self.dtype(), DType::Utf8(..)).then_some(self) +impl VariantsVTable for VarBinEncoding { + fn as_utf8_array<'a>(&self, array: &'a VarBinArray) -> Option<&'a dyn Utf8ArrayTrait> { + Some(array) } - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - matches!(self.dtype(), DType::Binary(..)).then_some(self) + fn as_binary_array<'a>(&self, array: &'a VarBinArray) -> Option<&'a dyn BinaryArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/array/varbinview/variants.rs b/vortex-array/src/array/varbinview/variants.rs index 23a43b5524..2226306921 100644 --- a/vortex-array/src/array/varbinview/variants.rs +++ b/vortex-array/src/array/varbinview/variants.rs @@ -1,16 +1,14 @@ -use vortex_dtype::DType; - use crate::array::varbinview::VarBinViewArray; -use crate::variants::{ArrayVariants, BinaryArrayTrait, Utf8ArrayTrait}; -use crate::ArrayDType; +use crate::array::VarBinViewEncoding; +use crate::variants::{BinaryArrayTrait, Utf8ArrayTrait, VariantsVTable}; -impl ArrayVariants for VarBinViewArray { - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { - matches!(self.dtype(), DType::Utf8(..)).then_some(self) +impl VariantsVTable for VarBinViewEncoding { + fn as_utf8_array<'a>(&self, array: &'a VarBinViewArray) -> Option<&'a dyn Utf8ArrayTrait> { + Some(array) } - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - matches!(self.dtype(), DType::Binary(..)).then_some(self) + fn as_binary_array<'a>(&self, array: &'a VarBinViewArray) -> Option<&'a dyn BinaryArrayTrait> { + Some(array) } } diff --git a/vortex-array/src/data/mod.rs b/vortex-array/src/data/mod.rs index d2db222744..4d9bf05d2e 100644 --- a/vortex-array/src/data/mod.rs +++ b/vortex-array/src/data/mod.rs @@ -8,7 +8,7 @@ use owned::OwnedArrayData; use viewed::ViewedArrayData; use vortex_buffer::Buffer; use vortex_dtype::DType; -use vortex_error::{vortex_err, vortex_panic, VortexError, VortexExpect, VortexResult}; +use vortex_error::{vortex_err, vortex_panic, VortexExpect, VortexResult}; use vortex_scalar::Scalar; use crate::array::{ @@ -65,7 +65,7 @@ impl ArrayData { children: Arc<[ArrayData]>, statistics: StatsSet, ) -> VortexResult { - let data = OwnedArrayData { + Self::try_new(InnerArrayData::Owned(OwnedArrayData { encoding, dtype, len, @@ -73,14 +73,7 @@ impl ArrayData { buffer, children, stats_set: Arc::new(RwLock::new(statistics)), - }; - - let array = ArrayData(InnerArrayData::Owned(data)); - // Validate here that the metadata correctly parses, so that an encoding can infallibly - // FIXME(robert): Encoding::with_dyn no longer eagerly validates metadata, come up with a way to validate metadata - encoding.with_dyn(&array, &mut |_| Ok(()))?; - - Ok(array) + })) } pub fn try_new_viewed( @@ -105,6 +98,7 @@ impl ArrayData { }, )?; + // Parse the array metadata let metadata = encoding.load_metadata(array.metadata().map(|v| v.bytes()))?; let view = ViewedArrayData { @@ -118,12 +112,34 @@ impl ArrayData { ctx, }; - // Validate here that the metadata correctly parses, so that an encoding can infallibly - // implement Encoding::with_view(). - // FIXME(ngates): validate the metadata - ArrayData::from(view.clone()).with_dyn(|_| Ok::<(), VortexError>(()))?; + Self::try_new(InnerArrayData::Viewed(view)) + } + + /// Shared constructor that performs common array validation. + fn try_new(inner: InnerArrayData) -> VortexResult { + let array = ArrayData(inner); - Ok(view.into()) + // Sanity check that the encoding implements the correct array trait + debug_assert!( + match array.dtype() { + DType::Null => array.as_null_array().is_some(), + DType::Bool(_) => array.as_bool_array().is_some(), + DType::Primitive(..) => array.as_primitive_array().is_some(), + DType::Utf8(_) => array.as_utf8_array().is_some(), + DType::Binary(_) => array.as_binary_array().is_some(), + DType::Struct(..) => array.as_struct_array().is_some(), + DType::List(..) => array.as_list_array().is_some(), + DType::Extension(..) => array.as_extension_array().is_some(), + }, + "Encoding {} does not implement the variant trait for {}", + array.encoding().id(), + array.dtype() + ); + + // TODO(ngates): we should run something like encoding.validate(array) so the encoding + // can check the number of children, number of buffers, and metadata values etc. + + Ok(array) } /// Return the array's encoding @@ -329,23 +345,6 @@ impl ArrayData { self.encoding() .with_dyn(self, &mut |array| { - // Sanity check that the encoding implements the correct array trait - debug_assert!( - match array.dtype() { - DType::Null => array.as_null_array().is_some(), - DType::Bool(_) => array.as_bool_array().is_some(), - DType::Primitive(..) => array.as_primitive_array().is_some(), - DType::Utf8(_) => array.as_utf8_array().is_some(), - DType::Binary(_) => array.as_binary_array().is_some(), - DType::Struct(..) => array.as_struct_array().is_some(), - DType::List(..) => array.as_list_array().is_some(), - DType::Extension(..) => array.as_extension_array().is_some(), - }, - "Encoding {} does not implement the variant trait for {}", - self.encoding().id(), - array.dtype() - ); - result = Some(f(array)); Ok(()) }) diff --git a/vortex-array/src/encoding/mod.rs b/vortex-array/src/encoding/mod.rs index 0d790a9fc4..fe111b69cf 100644 --- a/vortex-array/src/encoding/mod.rs +++ b/vortex-array/src/encoding/mod.rs @@ -9,6 +9,7 @@ use vortex_error::{vortex_panic, VortexResult}; use crate::compute::ComputeVTable; use crate::stats::StatisticsVTable; use crate::validity::ValidityVTable; +use crate::variants::VariantsVTable; use crate::visitor::VisitorVTable; use crate::{ArrayData, ArrayDef, ArrayMetadata, ArrayTrait, IntoCanonicalVTable, MetadataVTable}; @@ -79,6 +80,7 @@ pub trait EncodingVTable: + ComputeVTable + StatisticsVTable + ValidityVTable + + VariantsVTable + VisitorVTable { fn id(&self) -> EncodingId; diff --git a/vortex-array/src/encoding/opaque.rs b/vortex-array/src/encoding/opaque.rs index 5e5ae909ea..166a58da04 100644 --- a/vortex-array/src/encoding/opaque.rs +++ b/vortex-array/src/encoding/opaque.rs @@ -8,6 +8,7 @@ use crate::compute::ComputeVTable; use crate::encoding::{EncodingId, EncodingVTable}; use crate::stats::StatisticsVTable; use crate::validity::{LogicalValidity, ValidityVTable}; +use crate::variants::VariantsVTable; use crate::visitor::{ArrayVisitor, VisitorVTable}; use crate::{ ArrayData, ArrayMetadata, ArrayTrait, Canonical, IntoCanonicalVTable, MetadataVTable, @@ -27,6 +28,8 @@ use crate::{ #[derive(Debug, Clone, Copy)] pub struct OpaqueEncoding(pub u16); +impl VariantsVTable for OpaqueEncoding {} + impl EncodingVTable for OpaqueEncoding { fn id(&self) -> EncodingId { EncodingId::new("vortex.opaque", self.0) diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index a5b58ff5dd..f9d6f0e0b5 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -23,7 +23,6 @@ use crate::encoding::ArrayEncodingRef; use crate::nbytes::ArrayNBytes; use crate::stats::ArrayStatistics; use crate::validity::ArrayValidity; -use crate::variants::ArrayVariants; pub mod accessor; pub mod aliases; @@ -92,7 +91,6 @@ pub trait ArrayTrait: + ArrayDType + ArrayLen + ArrayNBytes - + ArrayVariants + IntoCanonical + ArrayValidity + ArrayStatistics diff --git a/vortex-array/src/variants.rs b/vortex-array/src/variants.rs index 74be72b40e..4e5e7ee6f2 100644 --- a/vortex-array/src/variants.rs +++ b/vortex-array/src/variants.rs @@ -7,126 +7,195 @@ use std::sync::Arc; use vortex_dtype::field::Field; use vortex_dtype::{DType, ExtDType, FieldNames, PType}; -use vortex_error::{vortex_panic, VortexExpect as _, VortexResult}; +use vortex_error::{vortex_panic, VortexError, VortexExpect as _, VortexResult}; -use crate::{ArrayData, ArrayTrait}; +use crate::encoding::Encoding; +use crate::{ArrayDType, ArrayData, ArrayTrait}; -pub trait ArrayVariants { - fn as_null_array(&self) -> Option<&dyn NullArrayTrait> { +/// An Array encoding must declare which DTypes it can be downcast into. +pub trait VariantsVTable { + fn as_null_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn NullArrayTrait> { None } - fn as_null_array_unchecked(&self) -> &dyn NullArrayTrait { - self.as_null_array().vortex_expect("Expected NullArray") + fn as_bool_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn BoolArrayTrait> { + None } - fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { + fn as_primitive_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn PrimitiveArrayTrait> { None } - fn as_bool_array_unchecked(&self) -> &dyn BoolArrayTrait { - self.as_bool_array().vortex_expect("Expected BoolArray") + fn as_utf8_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn Utf8ArrayTrait> { + None } - fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { + fn as_binary_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn BinaryArrayTrait> { None } - fn as_primitive_array_unchecked(&self) -> &dyn PrimitiveArrayTrait { - self.as_primitive_array() - .vortex_expect("Expected PrimitiveArray") + fn as_struct_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn StructArrayTrait> { + None } - fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { + fn as_list_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn ListArrayTrait> { None } - fn as_utf8_array_unchecked(&self) -> &dyn Utf8ArrayTrait { - self.as_utf8_array().vortex_expect("Expected Utf8Array") + fn as_extension_array<'a>(&self, _array: &'a Array) -> Option<&'a dyn ExtensionArrayTrait> { + None } +} - fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { - None +impl VariantsVTable for E +where + E: VariantsVTable, + for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>, +{ + fn as_null_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn NullArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_null_array(encoding, array_ref) } - fn as_binary_array_unchecked(&self) -> &dyn BinaryArrayTrait { - self.as_binary_array().vortex_expect("Expected BinaryArray") + fn as_bool_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn BoolArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_bool_array(encoding, array_ref) } - fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { - None + fn as_primitive_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn PrimitiveArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_primitive_array(encoding, array_ref) } - fn as_struct_array_unchecked(&self) -> &dyn StructArrayTrait { - self.as_struct_array().vortex_expect("Expected StructArray") + fn as_utf8_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn Utf8ArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_utf8_array(encoding, array_ref) } - fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { - None + fn as_binary_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn BinaryArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_binary_array(encoding, array_ref) } - fn as_list_array_unchecked(&self) -> &dyn ListArrayTrait { - self.as_list_array().vortex_expect("Expected ListArray") + fn as_struct_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn StructArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_struct_array(encoding, array_ref) } - fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { - None + fn as_list_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn ListArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_list_array(encoding, array_ref) } - fn as_extension_array_unchecked(&self) -> &dyn ExtensionArrayTrait { - self.as_extension_array() - .vortex_expect("Expected ExtensionArray") + fn as_extension_array<'a>(&self, array: &'a ArrayData) -> Option<&'a dyn ExtensionArrayTrait> { + let array_ref = + <&E::Array>::try_from(array).vortex_expect("Failed to get array as reference"); + let encoding = array + .encoding() + .as_any() + .downcast_ref::() + .vortex_expect("Failed to downcast encoding"); + VariantsVTable::as_extension_array(encoding, array_ref) } } -pub trait NullArrayTrait: ArrayTrait {} +/// Provide functions on type-erased ArrayData to downcast into dtype-specific array variants. +impl ArrayData { + pub fn as_null_array(&self) -> Option<&dyn NullArrayTrait> { + matches!(self.dtype(), DType::Null) + .then(|| self.encoding().as_null_array(self)) + .flatten() + } -pub trait BoolArrayTrait: ArrayTrait {} + pub fn as_bool_array(&self) -> Option<&dyn BoolArrayTrait> { + matches!(self.dtype(), DType::Bool(..)) + .then(|| self.encoding().as_bool_array(self)) + .flatten() + } -/// Iterate over an array of primitives by dispatching at run-time on the array type. -#[macro_export] -macro_rules! iterate_primitive_array { - ($self:expr, | $_1:tt $rust_type:ident, $_2:tt $iterator:ident | $($body:tt)*) => ({ - macro_rules! __with__ {( $_1:tt $rust_type:ident, $_2:tt $iterator:ident ) => ( $($body)* )} - use vortex_error::VortexExpect; - match $self.ptype() { - PType::I8 => __with__! { i8, $self.i8_iter().vortex_expect("i8 array must have i8_iter") }, - PType::I16 => __with__! { i16, $self.i16_iter().vortex_expect("i16 array must have i16_iter") }, - PType::I32 => __with__! { i32, $self.i32_iter().vortex_expect("i32 array must have i32_iter") }, - PType::I64 => __with__! { i64, $self.i64_iter().vortex_expect("i64 array must have i64_iter") }, - PType::U8 => __with__! { u8, $self.u8_iter().vortex_expect("u8 array must have u8_iter") }, - PType::U16 => __with__! { u16, $self.u16_iter().vortex_expect("u16 array must have u16_iter") }, - PType::U32 => __with__! { u32, $self.u32_iter().vortex_expect("u32 array must have u32_iter") }, - PType::U64 => __with__! { u64, $self.u64_iter().vortex_expect("u64 array must have u64_iter") }, - PType::F16 => __with__! { f16, $self.f16_iter().vortex_expect("f16 array must have f16_iter") }, - PType::F32 => __with__! { f32, $self.f32_iter().vortex_expect("f32 array must have f32_iter") }, - PType::F64 => __with__! { f64, $self.f64_iter().vortex_expect("f64 array must have f64_iter") }, - } - }) -} + pub fn as_primitive_array(&self) -> Option<&dyn PrimitiveArrayTrait> { + matches!(self.dtype(), DType::Primitive(..)) + .then(|| self.encoding().as_primitive_array(self)) + .flatten() + } -/// Iterate over an array of integers by dispatching at run-time on the array type. -#[macro_export] -macro_rules! iterate_integer_array { - ($self:expr, | $_1:tt $rust_type:ident, $_2:tt $iterator:ident | $($body:tt)*) => ({ - macro_rules! __with__ {( $_1 $rust_type:ident, $_2 $iterator:expr ) => ( $($body)* )} - use vortex_error::VortexExpect; - match $self.ptype() { - PType::I8 => __with__! { i8, $self.i8_iter().vortex_expect("i8 array must have i8_iter") }, - PType::I16 => __with__! { i16, $self.i16_iter().vortex_expect("i16 array must have i16_iter") }, - PType::I32 => __with__! { i32, $self.i32_iter().vortex_expect("i32 array must have i32_iter") }, - PType::I64 => __with__! { i64, $self.i64_iter().vortex_expect("i64 array must have i64_iter") }, - PType::U8 => __with__! { u8, $self.u8_iter().vortex_expect("u8 array must have u8_iter") }, - PType::U16 => __with__! { u16, $self.u16_iter().vortex_expect("u16 array must have u16_iter") }, - PType::U32 => __with__! { u32, $self.u32_iter().vortex_expect("u32 array must have u32_iter") }, - PType::U64 => __with__! { u64, $self.u64_iter().vortex_expect("u64 array must have u64_iter") }, - PType::F16 => panic!("unsupported type: f16"), - PType::F32 => panic!("unsupported type: f32"), - PType::F64 => panic!("unsupported type: f64"), - } - }) + pub fn as_utf8_array(&self) -> Option<&dyn Utf8ArrayTrait> { + matches!(self.dtype(), DType::Utf8(..)) + .then(|| self.encoding().as_utf8_array(self)) + .flatten() + } + + pub fn as_binary_array(&self) -> Option<&dyn BinaryArrayTrait> { + matches!(self.dtype(), DType::Binary(..)) + .then(|| self.encoding().as_binary_array(self)) + .flatten() + } + + pub fn as_struct_array(&self) -> Option<&dyn StructArrayTrait> { + matches!(self.dtype(), DType::Struct(..)) + .then(|| self.encoding().as_struct_array(self)) + .flatten() + } + + pub fn as_list_array(&self) -> Option<&dyn ListArrayTrait> { + matches!(self.dtype(), DType::List(..)) + .then(|| self.encoding().as_list_array(self)) + .flatten() + } + + pub fn as_extension_array(&self) -> Option<&dyn ExtensionArrayTrait> { + matches!(self.dtype(), DType::Extension(..)) + .then(|| self.encoding().as_extension_array(self)) + .flatten() + } } +pub trait NullArrayTrait: ArrayTrait {} + +pub trait BoolArrayTrait: ArrayTrait {} + pub trait PrimitiveArrayTrait: ArrayTrait { fn ptype(&self) -> PType { if let DType::Primitive(ptype, ..) = self.dtype() { diff --git a/vortex-datafusion/src/memory/plans.rs b/vortex-datafusion/src/memory/plans.rs index 48d09924ee..c999849299 100644 --- a/vortex-datafusion/src/memory/plans.rs +++ b/vortex-datafusion/src/memory/plans.rs @@ -151,11 +151,10 @@ impl Stream for RowIndicesStream { // Get the unfiltered record batch. // Since this is a one-shot, we only want to poll the inner future once, to create the // initial batch for us to process. - let vortex_struct = next_chunk.with_dyn(|a| { - a.as_struct_array() - .ok_or_else(|| vortex_err!("Not a struct array"))? - .project(&this.filter_projection) - })?; + let vortex_struct = next_chunk + .as_struct_array() + .ok_or_else(|| vortex_err!("Not a struct array"))? + .project(&this.filter_projection)?; let selection = this .conjunction_expr diff --git a/vortex-datafusion/src/memory/statistics.rs b/vortex-datafusion/src/memory/statistics.rs index d73e95c6ca..c2c262a2e1 100644 --- a/vortex-datafusion/src/memory/statistics.rs +++ b/vortex-datafusion/src/memory/statistics.rs @@ -3,54 +3,49 @@ use datafusion_common::{ColumnStatistics, Result as DFResult, ScalarValue, Stati use itertools::Itertools; use vortex_array::array::ChunkedArray; use vortex_array::stats::{ArrayStatistics, Stat}; +use vortex_array::variants::StructArrayTrait; use vortex_array::ArrayLen; use vortex_error::{vortex_err, VortexExpect, VortexResult}; pub fn chunked_array_df_stats(array: &ChunkedArray, projection: &[usize]) -> DFResult { let mut nbytes: usize = 0; - let column_statistics = array.as_ref().with_dyn(|a| { - let struct_arr = a - .as_struct_array() - .ok_or_else(|| vortex_err!("Not a struct array"))?; - projection - .iter() - .map(|i| { - struct_arr - .field(*i) - .ok_or_else(|| vortex_err!("Projection references unknown field {i}")) - }) - .map_ok(|arr| { - nbytes += arr.nbytes(); - ColumnStatistics { - null_count: arr - .statistics() - .get_as::(Stat::NullCount) - .map(|n| n as usize) - .map(Precision::Exact) - .unwrap_or(Precision::Absent), - max_value: arr - .statistics() - .get(Stat::Max) - .map(|n| { - ScalarValue::try_from(n) - .vortex_expect("cannot convert scalar to df scalar") - }) - .map(Precision::Exact) - .unwrap_or(Precision::Absent), - min_value: arr - .statistics() - .get(Stat::Min) - .map(|n| { - ScalarValue::try_from(n) - .vortex_expect("cannot convert scalar to df scalar") - }) - .map(Precision::Exact) - .unwrap_or(Precision::Absent), - distinct_count: Precision::Absent, - } - }) - .collect::>>() - })?; + let column_statistics = projection + .iter() + .map(|i| { + array + .field(*i) + .ok_or_else(|| vortex_err!("Projection references unknown field {i}")) + }) + .map_ok(|arr| { + nbytes += arr.nbytes(); + ColumnStatistics { + null_count: arr + .statistics() + .get_as::(Stat::NullCount) + .map(|n| n as usize) + .map(Precision::Exact) + .unwrap_or(Precision::Absent), + max_value: arr + .statistics() + .get(Stat::Max) + .map(|n| { + ScalarValue::try_from(n).vortex_expect("cannot convert scalar to df scalar") + }) + .map(Precision::Exact) + .unwrap_or(Precision::Absent), + min_value: arr + .statistics() + .get(Stat::Min) + .map(|n| { + ScalarValue::try_from(n).vortex_expect("cannot convert scalar to df scalar") + }) + .map(Precision::Exact) + .unwrap_or(Precision::Absent), + distinct_count: Precision::Absent, + } + }) + .collect::>>()?; + Ok(Statistics { num_rows: Precision::Exact(array.len()), total_byte_size: Precision::Exact(nbytes), diff --git a/vortex-expr/src/select.rs b/vortex-expr/src/select.rs index 35fa8653a3..1bb19a7950 100644 --- a/vortex-expr/src/select.rs +++ b/vortex-expr/src/select.rs @@ -40,34 +40,32 @@ impl VortexExpr for Select { } fn evaluate(&self, batch: &ArrayData) -> VortexResult { - batch.with_dyn(|s| { - let st = s - .as_struct_array() - .ok_or_else(|| vortex_err!("Not a struct array"))?; - match self { - Select::Include(f) => st.project(f), - Select::Exclude(e) => { - let normalized_exclusion = e - .iter() - .map(|ef| match ef { - Field::Name(n) => Ok(n.as_str()), - Field::Index(i) => st - .names() - .get(*i) - .map(|s| &**s) - .ok_or_else(|| vortex_err!("Column doesn't exist")), - }) - .collect::>>()?; - let included_names = st - .names() - .iter() - .filter(|f| !normalized_exclusion.contains(&&***f)) - .map(|f| Field::from(&**f)) - .collect::>(); - st.project(&included_names) - } + let st = batch + .as_struct_array() + .ok_or_else(|| vortex_err!("Not a struct array"))?; + match self { + Select::Include(f) => st.project(f), + Select::Exclude(e) => { + let normalized_exclusion = e + .iter() + .map(|ef| match ef { + Field::Name(n) => Ok(n.as_str()), + Field::Index(i) => st + .names() + .get(*i) + .map(|s| &**s) + .ok_or_else(|| vortex_err!("Column doesn't exist")), + }) + .collect::>>()?; + let included_names = st + .names() + .iter() + .filter(|f| !normalized_exclusion.contains(&&***f)) + .map(|f| Field::from(&**f)) + .collect::>(); + st.project(&included_names) } - }) + } } fn collect_references<'a>(&'a self, references: &mut HashSet<&'a Field>) { @@ -109,7 +107,7 @@ mod tests { let st = test_array(); let select = Select::include(vec![Field::from("a")]); let selected = select.evaluate(st.as_ref()).unwrap(); - let selected_names = selected.with_dyn(|a| a.as_struct_array_unchecked().names().clone()); + let selected_names = selected.as_struct_array().unwrap().names().clone(); assert_eq!(selected_names.as_ref(), &["a".into()]); } @@ -118,7 +116,7 @@ mod tests { let st = test_array(); let select = Select::exclude(vec![Field::from("a")]); let selected = select.evaluate(st.as_ref()).unwrap(); - let selected_names = selected.with_dyn(|a| a.as_struct_array_unchecked().names().clone()); + let selected_names = selected.as_struct_array().unwrap().names().clone(); assert_eq!(selected_names.as_ref(), &["b".into()]); } } diff --git a/vortex-file/src/pruning.rs b/vortex-file/src/pruning.rs index 56a7000600..afca9311f8 100644 --- a/vortex-file/src/pruning.rs +++ b/vortex-file/src/pruning.rs @@ -124,15 +124,14 @@ impl PruningPredicate { /// If it returns Ok(Some(array)), the array is a boolean array with the same length as the /// metadata, and the values indicate whether the corresponding chunk can be pruned. pub fn evaluate(&self, metadata: &ArrayData) -> VortexResult> { - let known_stats = metadata.with_dyn(|x| { - HashSet::from_iter( - x.as_struct_array() - .vortex_expect("metadata must be struct array") - .names() - .iter() - .map(|x| x.to_string()), - ) - }); + let known_stats = HashSet::from_iter( + metadata + .as_struct_array() + .vortex_expect("metadata must be struct array") + .names() + .iter() + .map(|x| x.to_string()), + ); let required_stats = self .required_stats() .iter() diff --git a/vortex-file/src/read/layouts/columnar.rs b/vortex-file/src/read/layouts/columnar.rs index 184e4a3926..9a4c28c67f 100644 --- a/vortex-file/src/read/layouts/columnar.rs +++ b/vortex-file/src/read/layouts/columnar.rs @@ -500,14 +500,18 @@ mod tests { let prim_arr = arr .as_ref() .unwrap() - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) + .as_struct_array() + .unwrap() + .field(0) .unwrap() .into_primitive() .unwrap(); let str_arr = arr .as_ref() .unwrap() - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) + .as_struct_array() + .unwrap() + .field(1) .unwrap() .into_varbinview() .unwrap(); @@ -538,14 +542,18 @@ mod tests { let prim_arr = arr .as_ref() .unwrap() - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) + .as_struct_array() + .unwrap() + .field(0) .unwrap() .into_primitive() .unwrap(); let str_arr = arr .as_ref() .unwrap() - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) + .as_struct_array() + .unwrap() + .field(1) .unwrap() .into_varbinview() .unwrap(); @@ -595,12 +603,16 @@ mod tests { assert_eq!(arr.len(), 2); let prim_arr_chunk0 = arr[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) + .as_struct_array() + .unwrap() + .field(0) .unwrap() .into_primitive() .unwrap(); let str_arr_chunk0 = arr[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) + .as_struct_array() + .unwrap() + .field(1) .unwrap() .into_varbinview() .unwrap(); @@ -618,12 +630,16 @@ mod tests { iter::repeat("it").take(50).collect::>() ); let prim_arr_chunk1 = arr[1] - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) + .as_struct_array() + .unwrap() + .field(0) .unwrap() .into_primitive() .unwrap(); let str_arr_chunk1 = arr[1] - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) + .as_struct_array() + .unwrap() + .field(1) .unwrap() .into_varbinview() .unwrap(); diff --git a/vortex-file/src/read/metadata.rs b/vortex-file/src/read/metadata.rs index b967621f68..3ec9470a35 100644 --- a/vortex-file/src/read/metadata.rs +++ b/vortex-file/src/read/metadata.rs @@ -188,78 +188,76 @@ mod test { assert!(metadata_table.len() == 2); assert!(metadata_table.iter().all(Option::is_some)); - metadata_table[0] + let name_metadata_table = metadata_table[0] .as_ref() .unwrap() - .with_dyn(|name_metadata_table| { - let name_metadata_table = name_metadata_table.as_struct_array().unwrap(); + .as_struct_array() + .unwrap(); - let min = name_metadata_table.field_by_name("min").unwrap(); - let chunk1_min = scalar_at(&min, 0).unwrap(); - let chunk2_min = scalar_at(&min, 1).unwrap(); - assert_eq!( - chunk1_min.as_utf8().value(), - Some(BufferString::from("Angela")) - ); - assert_eq!( - chunk2_min.as_utf8().value(), - Some(BufferString::from("Khalil")) - ); + let min = name_metadata_table.field_by_name("min").unwrap(); + let chunk1_min = scalar_at(&min, 0).unwrap(); + let chunk2_min = scalar_at(&min, 1).unwrap(); + assert_eq!( + chunk1_min.as_utf8().value(), + Some(BufferString::from("Angela")) + ); + assert_eq!( + chunk2_min.as_utf8().value(), + Some(BufferString::from("Khalil")) + ); - let max = name_metadata_table.field_by_name("max").unwrap(); - let chunk1_max = scalar_at(&max, 0).unwrap(); - let chunk2_max = scalar_at(&max, 1).unwrap(); - assert_eq!( - chunk1_max.as_utf8().value(), - Some(BufferString::from("Joseph")) - ); - assert_eq!( - chunk2_max.as_utf8().value(), - Some(BufferString::from("Pharrell")) - ); + let max = name_metadata_table.field_by_name("max").unwrap(); + let chunk1_max = scalar_at(&max, 0).unwrap(); + let chunk2_max = scalar_at(&max, 1).unwrap(); + assert_eq!( + chunk1_max.as_utf8().value(), + Some(BufferString::from("Joseph")) + ); + assert_eq!( + chunk2_max.as_utf8().value(), + Some(BufferString::from("Pharrell")) + ); - let null_count = name_metadata_table.field_by_name("null_count").unwrap(); - let chunk1_null_count = scalar_at(&null_count, 0).unwrap(); - let chunk2_null_count = scalar_at(&null_count, 1).unwrap(); - assert_eq!( - chunk1_null_count.as_primitive().typed_value::(), - Some(0) - ); - assert_eq!( - chunk2_null_count.as_primitive().typed_value::(), - Some(1) - ); - }); + let null_count = name_metadata_table.field_by_name("null_count").unwrap(); + let chunk1_null_count = scalar_at(&null_count, 0).unwrap(); + let chunk2_null_count = scalar_at(&null_count, 1).unwrap(); + assert_eq!( + chunk1_null_count.as_primitive().typed_value::(), + Some(0) + ); + assert_eq!( + chunk2_null_count.as_primitive().typed_value::(), + Some(1) + ); - metadata_table[1] + let age_metadata_table = metadata_table[1] .as_ref() .unwrap() - .with_dyn(|age_metadata_table| { - let age_metadata_table = age_metadata_table.as_struct_array().unwrap(); + .as_struct_array() + .unwrap(); - let min = age_metadata_table.field_by_name("min").unwrap(); - let chunk1_min = scalar_at(&min, 0).unwrap(); - let chunk2_min = scalar_at(&min, 1).unwrap(); - assert_eq!(chunk1_min.as_primitive().typed_value::(), Some(25)); - assert_eq!(chunk2_min.as_primitive().typed_value::(), Some(18)); + let min = age_metadata_table.field_by_name("min").unwrap(); + let chunk1_min = scalar_at(&min, 0).unwrap(); + let chunk2_min = scalar_at(&min, 1).unwrap(); + assert_eq!(chunk1_min.as_primitive().typed_value::(), Some(25)); + assert_eq!(chunk2_min.as_primitive().typed_value::(), Some(18)); - let max = age_metadata_table.field_by_name("max").unwrap(); - let chunk1_max = scalar_at(&max, 0).unwrap(); - let chunk2_max = scalar_at(&max, 1).unwrap(); - assert_eq!(chunk1_max.as_primitive().typed_value::(), Some(31)); - assert_eq!(chunk2_max.as_primitive().typed_value::(), Some(57)); + let max = age_metadata_table.field_by_name("max").unwrap(); + let chunk1_max = scalar_at(&max, 0).unwrap(); + let chunk2_max = scalar_at(&max, 1).unwrap(); + assert_eq!(chunk1_max.as_primitive().typed_value::(), Some(31)); + assert_eq!(chunk2_max.as_primitive().typed_value::(), Some(57)); - let null_count = age_metadata_table.field_by_name("null_count").unwrap(); - let chunk1_null_count = scalar_at(&null_count, 0).unwrap(); - let chunk2_null_count = scalar_at(&null_count, 1).unwrap(); - assert_eq!( - chunk1_null_count.as_primitive().typed_value::(), - Some(1) - ); - assert_eq!( - chunk2_null_count.as_primitive().typed_value::(), - Some(1) - ); - }); + let null_count = age_metadata_table.field_by_name("null_count").unwrap(); + let chunk1_null_count = scalar_at(&null_count, 0).unwrap(); + let chunk2_null_count = scalar_at(&null_count, 1).unwrap(); + assert_eq!( + chunk1_null_count.as_primitive().typed_value::(), + Some(1) + ); + assert_eq!( + chunk2_null_count.as_primitive().typed_value::(), + Some(1) + ); } } diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 2dedad5003..7fb57056ef 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -319,7 +319,7 @@ async fn unequal_batches() { item_count += array.len(); batch_count += 1; - let numbers = array.with_dyn(|a| a.as_struct_array_unchecked().field_by_name("numbers")); + let numbers = array.as_struct_array().unwrap().field_by_name("numbers"); if let Some(numbers) = numbers { let numbers = numbers.into_primitive().unwrap(); @@ -414,9 +414,7 @@ async fn filter_string() { result.push(array.unwrap()); } assert_eq!(result.len(), 1); - let names = result[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) - .unwrap(); + let names = result[0].as_struct_array().unwrap().field(0).unwrap(); assert_eq!( names .into_varbinview() @@ -428,9 +426,7 @@ async fn filter_string() { .unwrap(), vec!["Joseph".to_string()] ); - let ages = result[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) - .unwrap(); + let ages = result[0].as_struct_array().unwrap().field(1).unwrap(); assert_eq!( ages.into_primitive().unwrap().maybe_null_slice::(), vec![25] @@ -487,9 +483,7 @@ async fn filter_or() { result.push(array.unwrap()); } assert_eq!(result.len(), 1); - let names = result[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) - .unwrap(); + let names = result[0].as_struct_array().unwrap().field(0).unwrap(); assert_eq!( names .into_varbinview() @@ -501,9 +495,7 @@ async fn filter_or() { .unwrap(), vec!["Joseph".to_string(), "Angela".to_string()] ); - let ages = result[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) - .unwrap(); + let ages = result[0].as_struct_array().unwrap().field(1).unwrap(); assert_eq!( ages.into_primitive() .unwrap() @@ -555,9 +547,7 @@ async fn filter_and() { result.push(array.unwrap()); } assert_eq!(result.len(), 1); - let names = result[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) - .unwrap(); + let names = result[0].as_struct_array().unwrap().field(0).unwrap(); assert_eq!( names .into_varbinview() @@ -568,9 +558,7 @@ async fn filter_and() { .unwrap(), vec![Some("Joseph".to_string()), None] ); - let ages = result[0] - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) - .unwrap(); + let ages = result[0].as_struct_array().unwrap().field(1).unwrap(); assert_eq!( ages.into_primitive().unwrap().maybe_null_slice::(), vec![25, 31] @@ -872,9 +860,7 @@ async fn filter_string_chunked() { .unwrap(); assert_eq!(actual_array.len(), 1); - let names = actual_array - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) - .unwrap(); + let names = actual_array.as_struct_array().unwrap().field(0).unwrap(); assert_eq!( names .into_varbinview() @@ -886,9 +872,7 @@ async fn filter_string_chunked() { .unwrap(), vec!["Joseph".to_string()] ); - let ages = actual_array - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) - .unwrap(); + let ages = actual_array.as_struct_array().unwrap().field(1).unwrap(); assert_eq!( ages.into_primitive().unwrap().maybe_null_slice::(), vec![25] @@ -974,9 +958,7 @@ async fn test_pruning_with_or() { .unwrap(); assert_eq!(actual_array.len(), 10); - let letters = actual_array - .with_dyn(|a| a.as_struct_array_unchecked().field(0)) - .unwrap(); + let letters = actual_array.as_struct_array().unwrap().field(0).unwrap(); assert_eq!( letters .into_varbinview() @@ -998,9 +980,7 @@ async fn test_pruning_with_or() { Some("P".to_string()) ] ); - let numbers = actual_array - .with_dyn(|a| a.as_struct_array_unchecked().field(1)) - .unwrap(); + let numbers = actual_array.as_struct_array().unwrap().field(1).unwrap(); assert_eq!( (0..numbers.len()) .map(|index| -> Option { diff --git a/vortex-sampling-compressor/tests/smoketest.rs b/vortex-sampling-compressor/tests/smoketest.rs index 6a818e30c6..0b6632e476 100644 --- a/vortex-sampling-compressor/tests/smoketest.rs +++ b/vortex-sampling-compressor/tests/smoketest.rs @@ -13,7 +13,7 @@ use vortex_sampling_compressor::{CompressConfig, SamplingCompressor}; mod tests { use vortex_array::array::{Bool, BooleanBuffer, ChunkedArray, VarBin}; use vortex_array::stats::{ArrayStatistics, Stat}; - use vortex_array::variants::{ArrayVariants, StructArrayTrait}; + use vortex_array::variants::StructArrayTrait; use vortex_array::ArrayDef; use vortex_datetime_dtype::TimeUnit; use vortex_datetime_parts::DateTimeParts; @@ -114,7 +114,6 @@ mod tests { assert_eq!(compressed.dtype(), to_compress.dtype()); let struct_array: StructArray = compressed.try_into().unwrap(); - let struct_array: &dyn StructArrayTrait = struct_array.as_struct_array().unwrap(); let prim_col: ChunkedArray = struct_array .field_by_name("prim_col")