spiraldb · joseph-isaacs · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/bench-vortex/benches/compress.rs b/bench-vortex/benches/compress.rs
@@ -34,7 +34,7 @@ use vortex::error::VortexResult;
 use vortex::file::{LayoutContext, LayoutDeserializer, VortexFileWriter, VortexReadBuilder};
 use vortex::sampling_compressor::compressors::fsst::FSSTCompressor;
 use vortex::sampling_compressor::{SamplingCompressor, ALL_ENCODINGS_CONTEXT};
-use vortex::{ArrayDType, ArrayData, IntoArrayData, IntoCanonical};
+use vortex::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant, IntoCanonical};
 
 use crate::tokio_runtime::TOKIO_RUNTIME;
 
@@ -412,12 +412,7 @@ fn tpc_h_l_comment(c: &mut Criterion) {
         "TPC-H l_comment chunked",
     );
 
-    let comments_canonical = comments
-        .into_canonical()
-        .unwrap()
-        .into_struct()
-        .unwrap()
-        .into_array();
+    let comments_canonical = comments.into_struct().unwrap().into_array();
     let dtype = comments_canonical.dtype().clone();
     let comments_canonical_chunked =
         ChunkedArray::try_new(vec![comments_canonical], dtype).unwrap();

diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs
@@ -8,7 +8,8 @@ use vortex_array::stats::{StatisticsVTable, StatsSet};
 use vortex_array::validity::{ArrayValidity, LogicalValidity, ValidityVTable};
 use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
 use vortex_array::{
-    impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
+    impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayVariant,
+    IntoCanonical,
 };
 use vortex_dtype::{DType, Nullability, PType};
 use vortex_error::{vortex_bail, VortexExpect, VortexResult};
@@ -192,8 +193,8 @@ impl ALPRDArray {
 
 impl IntoCanonical for ALPRDArray {
     fn into_canonical(self) -> VortexResult<Canonical> {
-        let left_parts = self.left_parts().into_canonical()?.into_primitive()?;
-        let right_parts = self.right_parts().into_canonical()?.into_primitive()?;
+        let left_parts = self.left_parts().into_primitive()?;
+        let right_parts = self.right_parts().into_primitive()?;
 
         // Decode the left_parts using our builtin dictionary.
         let left_parts_dict = &self.metadata().dict[0..self.metadata().dict_len as usize];
@@ -258,7 +259,7 @@ impl ArrayTrait for ALPRDArray {}
 mod test {
     use rstest::rstest;
     use vortex_array::array::PrimitiveArray;
-    use vortex_array::{IntoArrayData, IntoCanonical};
+    use vortex_array::{IntoArrayData, IntoArrayVariant};
 
     use crate::{alp_rd, ALPRDFloat};
 
@@ -284,12 +285,7 @@ mod test {
 
         let rd_array = encoder.encode(&real_array);
 
-        let decoded = rd_array
-            .into_array()
-            .into_canonical()
-            .unwrap()
-            .into_primitive()
-            .unwrap();
+        let decoded = rd_array.into_array().into_primitive().unwrap();
 
         let maybe_null_reals: Vec<T> = reals.into_iter().map(|v| v.unwrap_or_default()).collect();
         assert_eq!(decoded.maybe_null_slice::<T>(), &maybe_null_reals);

diff --git a/encodings/dict/src/compress.rs b/encodings/dict/src/compress.rs
@@ -10,7 +10,7 @@ use vortex_array::array::{
 };
 use vortex_array::validity::Validity;
 use vortex_array::variants::PrimitiveArrayTrait;
-use vortex_array::{ArrayDType, IntoArrayData, IntoCanonical};
+use vortex_array::{ArrayDType, IntoArrayData, IntoArrayVariant};
 use vortex_dtype::{match_each_native_ptype, DType, NativePType, ToBytes};
 use vortex_error::{VortexExpect as _, VortexUnwrap};
 use vortex_scalar::Scalar;
@@ -98,10 +98,8 @@ pub fn dict_encode_varbinview(array: &VarBinViewArray) -> (PrimitiveArray, VarBi
     (
         codes,
         values
-            .into_canonical()
-            .vortex_expect("VarBin to canonical")
             .into_varbinview()
-            .vortex_expect("VarBinView"),
+            .vortex_expect("VarBin to canonical to VarBinView"),
     )
 }
 

diff --git a/encodings/fastlanes/src/bitpacking/compute/take.rs b/encodings/fastlanes/src/bitpacking/compute/take.rs
@@ -3,9 +3,7 @@ use vortex_array::array::PrimitiveArray;
 use vortex_array::compute::{take, TakeFn};
 use vortex_array::validity::Validity;
 use vortex_array::variants::PrimitiveArrayTrait;
-use vortex_array::{
-    ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, IntoCanonical, ToArrayData,
-};
+use vortex_array::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData};
 use vortex_dtype::{
     match_each_integer_ptype, match_each_unsigned_integer_ptype, NativePType, PType,
 };
@@ -23,7 +21,7 @@ impl TakeFn<BitPackedArray> for BitPackedEncoding {
     fn take(&self, array: &BitPackedArray, indices: &ArrayData) -> VortexResult<ArrayData> {
         // If the indices are large enough, it's faster to flatten and take the primitive array.
         if indices.len() * UNPACK_CHUNK_THRESHOLD > array.len() {
-            return take(array.clone().into_canonical()?.into_primitive()?, indices);
+            return take(array.clone().into_primitive()?, indices);
         }
 
         // NOTE: we use the unsigned PType because all values in the BitPackedArray must

diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs
@@ -9,7 +9,7 @@ use vortex_array::stats::{StatisticsVTable, StatsSet};
 use vortex_array::validity::{ArrayValidity, LogicalValidity, Validity, ValidityVTable};
 use vortex_array::variants::{BinaryArrayTrait, Utf8ArrayTrait, VariantsVTable};
 use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
-use vortex_array::{impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, IntoCanonical};
+use vortex_array::{impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, IntoArrayVariant};
 use vortex_dtype::{DType, Nullability, PType};
 use vortex_error::{vortex_bail, VortexExpect, VortexResult};
 
@@ -163,20 +163,14 @@ impl FSSTArray {
         F: FnOnce(Decompressor) -> VortexResult<R>,
     {
         // canonicalize the symbols child array, so we can view it contiguously
-        let symbols_array = self
-            .symbols()
-            .into_canonical()
-            .map_err(|err| err.with_context("Failed to canonicalize symbols array"))?
-            .into_primitive()
-            .map_err(|err| err.with_context("Symbols must be a Primitive Array"))?;
+        let symbols_array = self.symbols().into_primitive().map_err(|err| {
+            err.with_context("Failed to canonicalize symbols array into a Primitive Array")
+        })?;
         let symbols = symbols_array.maybe_null_slice::<u64>();
 
-        let symbol_lengths_array = self
-            .symbol_lengths()
-            .into_canonical()
-            .map_err(|err| err.with_context("Failed to canonicalize symbol_lengths array"))?
-            .into_primitive()
-            .map_err(|err| err.with_context("Symbol lengths must be a Primitive Array"))?;
+        let symbol_lengths_array = self.symbol_lengths().into_primitive().map_err(|err| {
+            err.with_context("Failed to canonicalize symbol_lengths array into a Primitive Array")
+        })?;
         let symbol_lengths = symbol_lengths_array.maybe_null_slice::<u8>();
 
         // Transmute the 64-bit symbol values into fsst `Symbol`s.

diff --git a/encodings/fsst/src/canonical.rs b/encodings/fsst/src/canonical.rs
@@ -31,10 +31,7 @@ impl IntoCanonical for FSSTArray {
             let uncompressed_bytes =
                 decompressor.decompress(compressed_bytes.maybe_null_slice::<u8>());
 
-            let uncompressed_lens_array = self
-                .uncompressed_lengths()
-                .into_canonical()?
-                .into_primitive()?;
+            let uncompressed_lens_array = self.uncompressed_lengths().into_primitive()?;
 
             // Directly create the binary views.
             let views: Vec<u128> = match_each_integer_ptype!(uncompressed_lens_array.ptype(), |$P| {

diff --git a/encodings/fsst/tests/fsst_tests.rs b/encodings/fsst/tests/fsst_tests.rs
@@ -5,7 +5,7 @@ use vortex_array::array::PrimitiveArray;
 use vortex_array::compute::{filter, scalar_at, slice, take, FilterMask};
 use vortex_array::encoding::Encoding;
 use vortex_array::validity::Validity;
-use vortex_array::{ArrayData, IntoArrayData, IntoCanonical};
+use vortex_array::{ArrayData, IntoArrayData, IntoArrayVariant};
 use vortex_dtype::{DType, Nullability};
 use vortex_fsst::{fsst_compress, fsst_train_compressor, FSSTEncoding};
 
@@ -97,13 +97,7 @@ fn test_fsst_array_ops() {
     );
 
     // test into_canonical
-    let canonical_array = fsst_array
-        .clone()
-        .into_canonical()
-        .unwrap()
-        .into_varbinview()
-        .unwrap()
-        .into_array();
+    let canonical_array = fsst_array.clone().into_varbinview().unwrap().into_array();
 
     assert_eq!(canonical_array.len(), fsst_array.len());
 

diff --git a/encodings/runend-bool/src/array.rs b/encodings/runend-bool/src/array.rs
@@ -259,7 +259,7 @@ mod test {
     use vortex_array::stats::ArrayStatistics;
     use vortex_array::validity::Validity;
     use vortex_array::{
-        ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical, ToArrayData,
+        ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData,
     };
     use vortex_dtype::{DType, Nullability};
 
@@ -353,8 +353,6 @@ mod test {
 
     fn to_bool_vec(arr: &ArrayData) -> Vec<bool> {
         arr.clone()
-            .into_canonical()
-            .unwrap()
             .into_bool()
             .unwrap()
             .boolean_buffer()
@@ -382,7 +380,7 @@ mod test {
 
         let arr =
             RunEndBoolArray::try_new(ends.into_array(), start, Validity::NonNullable).unwrap();
-        let bools = arr.clone().into_canonical().unwrap().into_bool().unwrap();
+        let bools = arr.clone().into_bool().unwrap();
         for stat in [
             Stat::IsConstant,
             Stat::NullCount,

diff --git a/vortex-array/benches/take_patches.rs b/vortex-array/benches/take_patches.rs
@@ -4,7 +4,7 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng as _};
 use vortex_array::patches::Patches;
-use vortex_array::{ArrayData, IntoCanonical as _};
+use vortex_array::{ArrayData, IntoArrayVariant};
 
 fn fixture(len: usize, sparsity: f64, rng: &mut StdRng) -> Patches {
     // NB: indices are always ordered
@@ -53,8 +53,6 @@ fn bench_take(c: &mut Criterion) {
                         patches.take_search(
                             <&ArrayData>::clone(indices)
                                 .clone()
-                                .into_canonical()
-                                .unwrap()
                                 .into_primitive()
                                 .unwrap(),
                         )
@@ -76,8 +74,6 @@ fn bench_take(c: &mut Criterion) {
                         patches.take_map(
                             <&ArrayData>::clone(indices)
                                 .clone()
-                                .into_canonical()
-                                .unwrap()
                                 .into_primitive()
                                 .unwrap(),
                         )

diff --git a/vortex-array/src/array/chunked/canonical.rs b/vortex-array/src/array/chunked/canonical.rs
@@ -251,7 +251,7 @@ fn pack_views(
         let canonical_chunk = chunk.clone().into_varbinview()?;
 
         for buffer in canonical_chunk.buffers() {
-            let canonical_buffer = buffer.into_canonical()?.into_primitive()?.into_array();
+            let canonical_buffer = buffer.into_primitive()?.into_array();
             buffers.push(canonical_buffer);
         }
 

diff --git a/vortex-array/src/array/chunked/compute/filter.rs b/vortex-array/src/array/chunked/compute/filter.rs
@@ -3,7 +3,7 @@ use vortex_error::{VortexExpect, VortexResult, VortexUnwrap};
 
 use crate::array::{ChunkedArray, ChunkedEncoding, PrimitiveArray};
 use crate::compute::{filter, take, FilterFn, FilterMask, SearchSorted, SearchSortedSide};
-use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoCanonical};
+use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant};
 
 // This is modeled after the constant with the equivalent name in arrow-rs.
 const FILTER_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.8;
@@ -65,7 +65,7 @@ fn filter_slices(array: &ChunkedArray, mask: FilterMask) -> VortexResult<Vec<Arr
 
     // Pre-materialize the chunk ends for performance.
     // The chunk ends is nchunks+1, which is expected to be in the hundreds or at most thousands.
-    let chunk_ends = array.chunk_offsets().into_canonical()?.into_primitive()?;
+    let chunk_ends = array.chunk_offsets().into_primitive()?;
     let chunk_ends = chunk_ends.maybe_null_slice::<u64>();
 
     let mut chunk_filters = vec![ChunkFilter::None; array.nchunks()];
@@ -143,7 +143,7 @@ fn filter_indices(array: &ChunkedArray, mask: FilterMask) -> VortexResult<Vec<Ar
 
     // Avoid find_chunk_idx and use our own to avoid the overhead.
     // The array should only be some thousands of values in the general case.
-    let chunk_ends = array.chunk_offsets().into_canonical()?.into_primitive()?;
+    let chunk_ends = array.chunk_offsets().into_primitive()?;
     let chunk_ends = chunk_ends.maybe_null_slice::<u64>();
 
     for set_index in mask.iter_indices()? {

diff --git a/vortex-array/src/array/sparse/canonical.rs b/vortex-array/src/array/sparse/canonical.rs
@@ -92,7 +92,7 @@ mod test {
     use crate::array::sparse::SparseArray;
     use crate::array::{BoolArray, PrimitiveArray};
     use crate::validity::Validity;
-    use crate::{ArrayDType, IntoArrayData, IntoCanonical};
+    use crate::{ArrayDType, IntoArrayData, IntoArrayVariant};
 
     #[rstest]
     #[case(Some(true))]
@@ -106,7 +106,7 @@ mod test {
             SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
         assert_eq!(*sparse_bools.dtype(), DType::Bool(Nullability::Nullable));
 
-        let flat_bools = sparse_bools.into_canonical().unwrap().into_bool().unwrap();
+        let flat_bools = sparse_bools.into_bool().unwrap();
         let expected = bool_array_from_nullable_vec(
             vec![
                 Some(true),
@@ -169,11 +169,7 @@ mod test {
             DType::Primitive(PType::I32, Nullability::Nullable)
         );
 
-        let flat_ints = sparse_ints
-            .into_canonical()
-            .unwrap()
-            .into_primitive()
-            .unwrap();
+        let flat_ints = sparse_ints.into_primitive().unwrap();
         let expected = PrimitiveArray::from_nullable_vec(vec![
             Some(0i32),
             None,

diff --git a/vortex-array/src/array/varbin/canonical.rs b/vortex-array/src/array/varbin/canonical.rs
@@ -37,7 +37,7 @@ mod test {
 
     use crate::array::varbin::builder::VarBinBuilder;
     use crate::validity::ArrayValidity;
-    use crate::{ArrayDType, IntoCanonical};
+    use crate::{ArrayDType, IntoArrayVariant};
 
     #[rstest]
     #[case(DType::Utf8(Nullability::Nullable))]
@@ -52,7 +52,7 @@ mod test {
         varbin.push_value("1234567890123".as_bytes());
         let varbin = varbin.finish(dtype.clone());
 
-        let canonical = varbin.into_canonical().unwrap().into_varbinview().unwrap();
+        let canonical = varbin.into_varbinview().unwrap();
         assert_eq!(canonical.dtype(), &dtype);
 
         assert!(!canonical.is_valid(0));

diff --git a/vortex-array/src/array/varbinview/accessor.rs b/vortex-array/src/array/varbinview/accessor.rs
@@ -6,15 +6,15 @@ use crate::array::primitive::PrimitiveArray;
 use crate::array::varbinview::VarBinViewArray;
 use crate::array::BinaryView;
 use crate::validity::ArrayValidity;
-use crate::IntoCanonical;
+use crate::IntoArrayVariant;
 
 impl ArrayAccessor<[u8]> for VarBinViewArray {
     fn with_iterator<F: for<'a> FnOnce(&mut dyn Iterator<Item = Option<&'a [u8]>>) -> R, R>(
         &self,
         f: F,
     ) -> VortexResult<R> {
         let bytes: Vec<PrimitiveArray> = (0..self.metadata().buffer_lens.len())
-            .map(|i| self.buffer(i).into_canonical()?.into_primitive())
+            .map(|i| self.buffer(i).into_primitive())
             .try_collect()?;
         let bytes_slices: Vec<&[u8]> = bytes.iter().map(|b| b.maybe_null_slice::<u8>()).collect();
         let views: Vec<BinaryView> = self.binary_views()?.collect();

diff --git a/vortex-array/src/arrow/record_batch.rs b/vortex-array/src/arrow/record_batch.rs
@@ -49,7 +49,7 @@ impl TryFrom<StructArray> for RecordBatch {
     type Error = VortexError;
 
     fn try_from(value: StructArray) -> VortexResult<Self> {
-        let array_ref = value.into_canonical()?.into_arrow()?;
+        let array_ref = value.into_arrow()?;
         let struct_array = as_struct_array(array_ref.as_ref());
         Ok(Self::from(struct_array))
     }

diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs
@@ -198,9 +198,8 @@ fn struct_to_arrow(struct_array: StructArray) -> VortexResult<ArrayRef> {
         .iter()
         .zip(struct_array.children())
         .map(|(name, f)| {
-            f.into_canonical()
+            f.into_arrow()
                 .map_err(|err| err.with_context(format!("Failed to canonicalize field {}", name)))
-                .and_then(|c| c.into_arrow())
         })
         .collect::<VortexResult<Vec<_>>>()?;
 

diff --git a/vortex-array/src/compute/binary_numeric.rs b/vortex-array/src/compute/binary_numeric.rs
@@ -199,14 +199,12 @@ mod test {
 
     use crate::array::PrimitiveArray;
     use crate::compute::{scalar_at, sub_scalar};
-    use crate::{ArrayLen as _, IntoArrayData, IntoCanonical};
+    use crate::{ArrayLen as _, IntoArrayData, IntoArrayVariant};
 
     #[test]
     fn test_scalar_subtract_unsigned() {
         let values = vec![1u16, 2, 3].into_array();
         let results = sub_scalar(&values, 1u16.into())
-            .unwrap()
-            .into_canonical()
             .unwrap()
             .into_primitive()
             .unwrap()
@@ -219,8 +217,6 @@ mod test {
     fn test_scalar_subtract_signed() {
         let values = vec![1i64, 2, 3].into_array();
         let results = sub_scalar(&values, (-1i64).into())
-            .unwrap()
-            .into_canonical()
             .unwrap()
             .into_primitive()
             .unwrap()
@@ -234,8 +230,6 @@ mod test {
         let values = PrimitiveArray::from_nullable_vec(vec![Some(1u16), Some(2), None, Some(3)])
             .into_array();
         let result = sub_scalar(&values, Some(1u16).into())
-            .unwrap()
-            .into_canonical()
             .unwrap()
             .into_primitive()
             .unwrap();
@@ -259,8 +253,6 @@ mod test {
         let values = vec![1.0f64, 2.0, 3.0].into_array();
         let to_subtract = -1f64;
         let results = sub_scalar(&values, to_subtract.into())
-            .unwrap()
-            .into_canonical()
             .unwrap()
             .into_primitive()
             .unwrap()