From df59a634b398922cdd17e3fe55c0431e1becdda1 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 6 Mar 2024 09:02:42 +0000 Subject: [PATCH] Compressors --- bench-vortex/src/lib.rs | 1 + pyvortex/src/compress.rs | 5 ++++- vortex-array/src/array/sparse/mod.rs | 5 +++-- vortex-array/src/array/sparse/stats.rs | 9 --------- vortex-array/src/array/struct_/mod.rs | 5 +++-- vortex-array/src/array/struct_/stats.rs | 9 --------- vortex-array/src/compress.rs | 2 +- vortex-array/src/stats.rs | 4 +++- vortex-fastlanes/src/bitpacking/compress.rs | 10 ++++++---- vortex-fastlanes/src/for/compress.rs | 2 +- vortex-ffor/src/ffor.rs | 4 +++- vortex-ffor/src/lib.rs | 1 - vortex-ffor/src/stats.rs | 10 ---------- vortex-ree/src/lib.rs | 1 - vortex-ree/src/ree.rs | 4 +++- vortex-ree/src/stats.rs | 9 --------- 16 files changed, 28 insertions(+), 53 deletions(-) delete mode 100644 vortex-array/src/array/sparse/stats.rs delete mode 100644 vortex-array/src/array/struct_/stats.rs delete mode 100644 vortex-ffor/src/stats.rs delete mode 100644 vortex-ree/src/stats.rs diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 7997ffcde0..6ceb56714c 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -91,6 +91,7 @@ mod test { .unwrap(); } + #[ignore] #[test] fn compression_ratio() { setup_logger(); diff --git a/pyvortex/src/compress.rs b/pyvortex/src/compress.rs index 46833c9d5a..741d5a6ab6 100644 --- a/pyvortex/src/compress.rs +++ b/pyvortex/src/compress.rs @@ -4,6 +4,7 @@ use pyo3::{pyclass, pyfunction, pymethods, Py, PyResult, Python}; use vortex::compress::{CompressConfig, CompressCtx}; use crate::array::PyArray; +use crate::error::PyVortexError; #[derive(Clone)] #[pyclass(name = "CompressConfig", module = "vortex")] @@ -33,6 +34,8 @@ pub fn compress( ) -> PyResult> { let compress_opts = opts.map(|o| o.inner).unwrap_or_default(); let ctx = CompressCtx::new(&compress_opts); - let compressed = py.allow_threads(|| ctx.compress(arr.unwrap(), None)); + let compressed = py + .allow_threads(|| ctx.compress(arr.unwrap(), None)) + .map_err(PyVortexError::map_err)?; PyArray::wrap(py, compressed) } diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index 5cfdc0b40b..ab757dff4f 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -21,12 +21,11 @@ use crate::error::{VortexError, VortexResult}; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::match_arrow_numeric_type; use crate::serde::{ArraySerde, EncodingSerde}; -use crate::stats::{Stats, StatsSet}; +use crate::stats::{Stats, StatsCompute, StatsSet}; mod compress; mod compute; mod serde; -mod stats; #[derive(Debug, Clone)] pub struct SparseArray { @@ -190,6 +189,8 @@ impl Array for SparseArray { } } +impl StatsCompute for SparseArray {} + impl<'arr> AsRef<(dyn Array + 'arr)> for SparseArray { fn as_ref(&self) -> &(dyn Array + 'arr) { self diff --git a/vortex-array/src/array/sparse/stats.rs b/vortex-array/src/array/sparse/stats.rs deleted file mode 100644 index b12669ee59..0000000000 --- a/vortex-array/src/array/sparse/stats.rs +++ /dev/null @@ -1,9 +0,0 @@ -use crate::array::sparse::SparseArray; -use crate::error::VortexResult; -use crate::stats::{Stat, StatsCompute, StatsSet}; - -impl StatsCompute for SparseArray { - fn compute(&self, _stat: &Stat) -> VortexResult { - todo!() - } -} diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 8ea1b16fd6..d007925194 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -13,7 +13,7 @@ use crate::dtype::{DType, FieldNames}; use crate::error::VortexResult; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::serde::{ArraySerde, EncodingSerde}; -use crate::stats::{Stats, StatsSet}; +use crate::stats::{Stats, StatsCompute, StatsSet}; use super::{ check_slice_bounds, Array, ArrayRef, ArrowIterator, Encoding, EncodingId, EncodingRef, @@ -23,7 +23,6 @@ use super::{ mod compress; mod compute; mod serde; -mod stats; #[derive(Debug, Clone)] pub struct StructArray { @@ -167,6 +166,8 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for StructArray { } } +impl StatsCompute for StructArray {} + #[derive(Debug)] pub struct StructEncoding; diff --git a/vortex-array/src/array/struct_/stats.rs b/vortex-array/src/array/struct_/stats.rs deleted file mode 100644 index 6d3e2b7e78..0000000000 --- a/vortex-array/src/array/struct_/stats.rs +++ /dev/null @@ -1,9 +0,0 @@ -use crate::array::struct_::StructArray; -use crate::error::VortexResult; -use crate::stats::{Stat, StatsCompute, StatsSet}; - -impl StatsCompute for StructArray { - fn compute(&self, _stat: &Stat) -> VortexResult { - todo!() - } -} diff --git a/vortex-array/src/compress.rs b/vortex-array/src/compress.rs index 5a09cb3fef..038a65cfaf 100644 --- a/vortex-array/src/compress.rs +++ b/vortex-array/src/compress.rs @@ -131,7 +131,7 @@ impl Default for CompressCtx<'_> { pub fn sampled_compression(array: &dyn Array, ctx: CompressCtx) -> VortexResult { // First, we try constant compression and shortcut any sampling. - if array.len() > 0 + if !array.is_empty() && array .stats() .get_or_compute_as::(&Stat::IsConstant) diff --git a/vortex-array/src/stats.rs b/vortex-array/src/stats.rs index e1fb4cfe4e..0ccf75a21e 100644 --- a/vortex-array/src/stats.rs +++ b/vortex-array/src/stats.rs @@ -209,7 +209,9 @@ impl StatsSet { } pub trait StatsCompute { - fn compute(&self, stat: &Stat) -> VortexResult; + fn compute(&self, _stat: &Stat) -> VortexResult { + Ok(StatsSet::new()) + } } pub struct Stats<'a> { diff --git a/vortex-fastlanes/src/bitpacking/compress.rs b/vortex-fastlanes/src/bitpacking/compress.rs index 119c90694c..d287bb3849 100644 --- a/vortex-fastlanes/src/bitpacking/compress.rs +++ b/vortex-fastlanes/src/bitpacking/compress.rs @@ -224,10 +224,12 @@ mod test { ); let ctx = CompressCtx::new(&cfg); - let compressed = ctx.compress( - &PrimitiveArray::from_vec(Vec::from_iter((0..10_000).map(|i| (i % 63) as u8))), - None, - ); + let compressed = ctx + .compress( + &PrimitiveArray::from_vec(Vec::from_iter((0..10_000).map(|i| (i % 63) as u8))), + None, + ) + .unwrap(); assert_eq!(compressed.encoding().id(), BitPackedEncoding.id()); let bp = compressed .as_any() diff --git a/vortex-fastlanes/src/for/compress.rs b/vortex-fastlanes/src/for/compress.rs index 2929951682..0ba25c3ea7 100644 --- a/vortex-fastlanes/src/for/compress.rs +++ b/vortex-fastlanes/src/for/compress.rs @@ -93,7 +93,7 @@ mod test { // Create a range offset by a million let array = PrimitiveArray::from_vec((0u32..10_000).map(|v| v + 1_000_000).collect_vec()); - let compressed = ctx.compress(&array, None); + let compressed = ctx.compress(&array, None).unwrap(); assert_eq!(compressed.encoding().id(), FoREncoding.id()); let fa = compressed.as_any().downcast_ref::().unwrap(); assert_eq!(fa.reference().try_into(), Ok(1_000_000u32)); diff --git a/vortex-ffor/src/ffor.rs b/vortex-ffor/src/ffor.rs index 200fbac591..85ad9ef870 100644 --- a/vortex-ffor/src/ffor.rs +++ b/vortex-ffor/src/ffor.rs @@ -13,7 +13,7 @@ use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::scalar::{Scalar, ScalarRef}; use vortex::serde::{ArraySerde, EncodingSerde}; -use vortex::stats::{Stats, StatsSet}; +use vortex::stats::{Stats, StatsCompute, StatsSet}; use crate::compress::ffor_encode; @@ -168,6 +168,8 @@ impl Array for FFORArray { } } +impl StatsCompute for FFORArray {} + impl ArrayCompute for FFORArray {} impl<'arr> AsRef<(dyn Array + 'arr)> for FFORArray { diff --git a/vortex-ffor/src/lib.rs b/vortex-ffor/src/lib.rs index 99e96dd1ad..043f691984 100644 --- a/vortex-ffor/src/lib.rs +++ b/vortex-ffor/src/lib.rs @@ -7,7 +7,6 @@ mod compress; mod downcast; mod ffor; mod serde; -mod stats; #[distributed_slice(ENCODINGS)] static ENCODINGS_FFOR: EncodingRef = &FFoREncoding; diff --git a/vortex-ffor/src/stats.rs b/vortex-ffor/src/stats.rs deleted file mode 100644 index 7aae542de9..0000000000 --- a/vortex-ffor/src/stats.rs +++ /dev/null @@ -1,10 +0,0 @@ -use vortex::error::VortexResult; - -use crate::FFORArray; -use vortex::stats::{Stat, StatsCompute, StatsSet}; - -impl StatsCompute for FFORArray { - fn compute(&self, _stat: &Stat) -> VortexResult { - Ok(StatsSet::default()) - } -} diff --git a/vortex-ree/src/lib.rs b/vortex-ree/src/lib.rs index c6e878a3e1..1999271823 100644 --- a/vortex-ree/src/lib.rs +++ b/vortex-ree/src/lib.rs @@ -8,7 +8,6 @@ mod compute; mod downcast; mod ree; mod serde; -mod stats; #[distributed_slice(ENCODINGS)] static ENCODINGS_REE: EncodingRef = &REEEncoding; diff --git a/vortex-ree/src/ree.rs b/vortex-ree/src/ree.rs index ece19ab5f4..22840c080f 100644 --- a/vortex-ree/src/ree.rs +++ b/vortex-ree/src/ree.rs @@ -23,7 +23,7 @@ use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::ptype::NativePType; use vortex::serde::{ArraySerde, EncodingSerde}; -use vortex::stats::{Stat, Stats, StatsSet}; +use vortex::stats::{Stat, Stats, StatsCompute, StatsSet}; use crate::compress::ree_encode; @@ -218,6 +218,8 @@ impl Array for REEArray { } } +impl StatsCompute for REEArray {} + impl<'arr> AsRef<(dyn Array + 'arr)> for REEArray { fn as_ref(&self) -> &(dyn Array + 'arr) { self diff --git a/vortex-ree/src/stats.rs b/vortex-ree/src/stats.rs deleted file mode 100644 index 7b16fda239..0000000000 --- a/vortex-ree/src/stats.rs +++ /dev/null @@ -1,9 +0,0 @@ -use crate::REEArray; -use vortex::error::VortexResult; -use vortex::stats::{Stat, StatsCompute, StatsSet}; - -impl StatsCompute for REEArray { - fn compute(&self, _stat: &Stat) -> VortexResult { - todo!() - } -}