From 516bf272fe13ac9d93d623b1d97e93d36224447b Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 5 Jun 2024 17:39:59 +0100 Subject: [PATCH] lord give me the strength --- bench-vortex/Cargo.toml | 4 +-- bench-vortex/benches/random_access.rs | 1 + bench-vortex/src/public_bi_data.rs | 1 + vortex-alp/src/compute.rs | 31 ++++++++++++++----- .../src/array/datetime/localdatetime.rs | 1 - vortex-array/src/array/struct/compute.rs | 3 +- vortex-array/src/array/struct/mod.rs | 5 ++- vortex-array/src/compute/as_contiguous.rs | 9 ++++-- vortex-array/src/flatten.rs | 2 +- vortex-datetime-parts/src/array.rs | 5 ++- vortex-datetime-parts/src/compute.rs | 24 +++++++++++--- vortex-dict/src/compute.rs | 4 +-- vortex-fastlanes/src/for/compute.rs | 2 +- 13 files changed, 67 insertions(+), 25 deletions(-) diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 7a17e33825..47228faed0 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -11,8 +11,8 @@ include = { workspace = true } edition = { workspace = true } rust-version = { workspace = true } -#[lints] -#workspace = true +[lints] +workspace = true [dependencies] arrow-array = { workspace = true } diff --git a/bench-vortex/benches/random_access.rs b/bench-vortex/benches/random_access.rs index f472683366..eaff19ecf6 100644 --- a/bench-vortex/benches/random_access.rs +++ b/bench-vortex/benches/random_access.rs @@ -27,6 +27,7 @@ fn random_access(c: &mut Criterion) { }); let dataset = BenchmarkDatasets::PBI(Medicare1); + dataset.write_as_parquet(); dataset.write_as_lance(); // NB: our parquet benchmarks read from a single file, and we (currently) write each // file to an individual lance dataset for comparison parity. diff --git a/bench-vortex/src/public_bi_data.rs b/bench-vortex/src/public_bi_data.rs index 8ede0b3500..053f58884a 100644 --- a/bench-vortex/src/public_bi_data.rs +++ b/bench-vortex/src/public_bi_data.rs @@ -503,6 +503,7 @@ impl BenchmarkDataset for BenchmarkDatasets { self.as_uncompressed(); for f in self.list_files(FileType::Csv) { info!("Compressing {} to lance", f.to_str().unwrap()); + println!("Compressing {} to lance", f.to_str().unwrap()); let output_fname = f .file_name() .unwrap() diff --git a/vortex-alp/src/compute.rs b/vortex-alp/src/compute.rs index 9b2752e810..db81ab7d48 100644 --- a/vortex-alp/src/compute.rs +++ b/vortex-alp/src/compute.rs @@ -1,9 +1,9 @@ +use vortex::compute::as_contiguous::AsContiguousFn; use vortex::compute::scalar_at::{scalar_at, ScalarAtFn}; use vortex::compute::slice::{slice, SliceFn}; use vortex::compute::take::{take, TakeFn}; use vortex::compute::ArrayCompute; -use vortex::{Array, ArrayDType, impl_default_as_contiguous_fn, IntoArray}; -use vortex::compute::as_contiguous::AsContiguousFn; +use vortex::{impl_default_as_contiguous_fn, Array, ArrayDType, IntoArray}; use vortex_error::VortexResult; use vortex_scalar::Scalar; @@ -74,8 +74,9 @@ mod test { use vortex::array::primitive::PrimitiveArray; use vortex::compute::as_contiguous::AsContiguousFn; use vortex::compute::scalar_at::scalar_at; - use vortex::IntoArray; use vortex::validity::Validity; + use vortex::IntoArray; + use crate::ALPArray; #[test] @@ -85,20 +86,34 @@ mod test { let encoded = ALPArray::encode(primitives.into_array()).unwrap(); let alp = ALPArray::try_from(&encoded).unwrap(); - let flat = alp.as_contiguous(&[encoded]).unwrap(); - let a = scalar_at(&flat, 0).unwrap().value().as_pvalue().unwrap().unwrap(); + let a = scalar_at(&flat, 0) + .unwrap() + .value() + .as_pvalue() + .unwrap() + .unwrap(); let a: f64 = a.try_into().unwrap(); - let b = scalar_at(&flat, 1).unwrap().value().as_pvalue().unwrap().unwrap(); + let b = scalar_at(&flat, 1) + .unwrap() + .value() + .as_pvalue() + .unwrap() + .unwrap(); let b: f64 = b.try_into().unwrap(); - let c = scalar_at(&flat, 2).unwrap().value().as_pvalue().unwrap().unwrap(); + let c = scalar_at(&flat, 2) + .unwrap() + .value() + .as_pvalue() + .unwrap() + .unwrap(); let c: f64 = c.try_into().unwrap(); assert_eq!(a, 1.0); assert_eq!(b, 2.0); assert_eq!(c, 3.0); } -} \ No newline at end of file +} diff --git a/vortex-array/src/array/datetime/localdatetime.rs b/vortex-array/src/array/datetime/localdatetime.rs index d909acd078..849d84cf9c 100644 --- a/vortex-array/src/array/datetime/localdatetime.rs +++ b/vortex-array/src/array/datetime/localdatetime.rs @@ -62,7 +62,6 @@ impl TryFrom for ExtensionArray { } } - impl TryFrom<&LocalDateTimeArray> for ExtensionArray { type Error = VortexError; diff --git a/vortex-array/src/array/struct/compute.rs b/vortex-array/src/array/struct/compute.rs index d7fc07d84f..e7751652f1 100644 --- a/vortex-array/src/array/struct/compute.rs +++ b/vortex-array/src/array/struct/compute.rs @@ -81,7 +81,8 @@ impl AsContiguousFn for StructArray { } } - let fields_len = fields.first() + let fields_len = fields + .first() .map(|field| field.iter().map(|a| a.len()).sum()) .unwrap_or_default(); diff --git a/vortex-array/src/array/struct/mod.rs b/vortex-array/src/array/struct/mod.rs index 194e83e390..e61a96e001 100644 --- a/vortex-array/src/array/struct/mod.rs +++ b/vortex-array/src/array/struct/mod.rs @@ -70,7 +70,10 @@ impl StructArray { } if fields.iter().any(|a| a.with_dyn(|a| a.len()) != length) { - println!("FIELD LENGTHS: {:?}", fields.iter().map(|field| field.len()).collect::>()); + println!( + "FIELD LENGTHS: {:?}", + fields.iter().map(|field| field.len()).collect::>() + ); vortex_bail!("Expected all struct fields to have length {}", length); } diff --git a/vortex-array/src/compute/as_contiguous.rs b/vortex-array/src/compute/as_contiguous.rs index 1518ea928a..80572d68a8 100644 --- a/vortex-array/src/compute/as_contiguous.rs +++ b/vortex-array/src/compute/as_contiguous.rs @@ -3,7 +3,6 @@ use vortex_error::{vortex_bail, vortex_err, VortexResult}; use crate::{Array, ArrayDType}; - /// Trait for typed array variants which support the process of unfurling to somewhere else. pub trait AsContiguousFn { fn as_contiguous(&self, arrays: &[Array]) -> VortexResult; @@ -46,7 +45,13 @@ pub fn as_contiguous(arrays: &[Array]) -> VortexResult { vortex_bail!(ComputeError: "No arrays to concatenate"); } if !arrays.iter().map(|chunk| chunk.encoding().id()).all_equal() { - println!("ENCODINGS: {:?}", arrays.iter().map(|chunk| chunk.encoding().id()).collect_vec()); + println!( + "ENCODINGS: {:?}", + arrays + .iter() + .map(|chunk| chunk.encoding().id()) + .collect_vec() + ); vortex_bail!(ComputeError: "Chunks have differing encodings"); } if !arrays.iter().map(|chunk| chunk.dtype()).all_equal() { diff --git a/vortex-array/src/flatten.rs b/vortex-array/src/flatten.rs index c3a8cde329..a51f31fbcc 100644 --- a/vortex-array/src/flatten.rs +++ b/vortex-array/src/flatten.rs @@ -1,6 +1,5 @@ use vortex_error::VortexResult; -use crate::{Array, IntoArray}; use crate::array::bool::BoolArray; use crate::array::extension::ExtensionArray; use crate::array::primitive::PrimitiveArray; @@ -8,6 +7,7 @@ use crate::array::r#struct::StructArray; use crate::array::varbin::VarBinArray; use crate::array::varbinview::VarBinViewArray; use crate::encoding::ArrayEncoding; +use crate::{Array, IntoArray}; /// The set of encodings that can be converted to Arrow with zero-copy. pub enum Flattened { diff --git a/vortex-datetime-parts/src/array.rs b/vortex-datetime-parts/src/array.rs index f37d7a48be..32abbe2443 100644 --- a/vortex-datetime-parts/src/array.rs +++ b/vortex-datetime-parts/src/array.rs @@ -4,6 +4,7 @@ use vortex::validity::{ArrayValidity, LogicalValidity}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, ArrayDType, ArrayFlatten, ToArrayData}; use vortex_error::vortex_bail; + use crate::compute::decode_to_localdatetime; impl_encoding!("vortex.datetimeparts", DateTimeParts); @@ -82,7 +83,9 @@ impl DateTimePartsArray { impl ArrayFlatten for DateTimePartsArray { fn flatten(self) -> VortexResult { - Ok(Flattened::Extension(decode_to_localdatetime(&self.into_array())?.try_into()?)) + Ok(Flattened::Extension( + decode_to_localdatetime(&self.into_array())?.try_into()?, + )) } } diff --git a/vortex-datetime-parts/src/compute.rs b/vortex-datetime-parts/src/compute.rs index b0c8fa2351..9090f29cd4 100644 --- a/vortex-datetime-parts/src/compute.rs +++ b/vortex-datetime-parts/src/compute.rs @@ -123,9 +123,24 @@ pub fn decode_to_localdatetime(array: &Array) -> VortexResult 1, }; - let days_buf = array.days().flatten()?.into_array().as_primitive().scalar_buffer::(); - let seconds_buf = array.seconds().flatten()?.into_array().as_primitive().scalar_buffer::(); - let subsecond_buf = array.subsecond().flatten()?.into_array().as_primitive().scalar_buffer::(); + let days_buf = array + .days() + .flatten()? + .into_array() + .as_primitive() + .scalar_buffer::(); + let seconds_buf = array + .seconds() + .flatten()? + .into_array() + .as_primitive() + .scalar_buffer::(); + let subsecond_buf = array + .subsecond() + .flatten()? + .into_array() + .as_primitive() + .scalar_buffer::(); // TODO(aduffy): replace with vectorized implementation? let values = days_buf @@ -137,7 +152,7 @@ pub fn decode_to_localdatetime(array: &Array) -> VortexResult