diff --git a/Cargo.lock b/Cargo.lock index a15748123b..cc7cc731cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1147,6 +1147,7 @@ dependencies = [ "simplelog", "tokio", "uuid", + "vortex-alp", "vortex-array", "vortex-datetime-parts", "vortex-dict", @@ -1521,6 +1522,7 @@ checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstyle", "clap_lex", + "terminal_size", ] [[package]] @@ -1549,6 +1551,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + [[package]] name = "const-random" version = "0.1.18" @@ -1982,6 +1990,31 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "divan" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d567df2c9c2870a43f3f2bd65aaeb18dbce1c18f217c3e564b4fbaeb3ee56c" +dependencies = [ + "cfg-if", + "clap", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + +[[package]] +name = "divan-macros" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27540baf49be0d484d8f0130d7d8da3011c32a44d4fc873368154f1510e574a2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.59", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -4264,6 +4297,7 @@ dependencies = [ "paste", "pyo3", "pyo3-log", + "vortex-alp", "vortex-array", "vortex-dict", "vortex-error", @@ -4424,6 +4458,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" + [[package]] name = "regex-syntax" version = "0.8.3" @@ -5169,6 +5209,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "thiserror" version = "1.0.58" @@ -5568,6 +5618,22 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "vortex-alp" +version = "0.1.0" +dependencies = [ + "divan", + "itertools 0.12.1", + "linkme", + "log", + "num-traits", + "paste", + "serde", + "vortex-array", + "vortex-error", + "vortex-schema", +] + [[package]] name = "vortex-array" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index f6b243d13a..e0e474eea3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ members = [ "fastlanez-sys", "pyvortex", "vortex-alloc", - #"vortex-alp", + "vortex-alp", "vortex-array", "vortex-datetime-parts", "vortex-dict", diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 01fcc99c69..204fbd82c1 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -40,6 +40,7 @@ reqwest = { workspace = true } simplelog = { workspace = true } tokio = { workspace = true } uuid = { workspace = true } +vortex-alp = { path = "../vortex-alp" } vortex-array = { path = "../vortex-array" } vortex-datetime-parts = { path = "../vortex-datetime-parts" } vortex-dict = { path = "../vortex-dict" } diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 75daea52bb..b0e4dc79cf 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -15,6 +15,7 @@ use vortex::arrow::FromArrowType; use vortex::compress::{CompressConfig, CompressCtx}; use vortex::encoding::{EncodingRef, VORTEX_ENCODINGS}; use vortex::{IntoArray, OwnedArray, ToArrayData}; +use vortex_alp::ALPEncoding; use vortex_datetime_parts::DateTimePartsEncoding; use vortex_dict::DictEncoding; use vortex_fastlanes::{BitPackedEncoding, FoREncoding}; @@ -106,7 +107,7 @@ pub fn enumerate_arrays() -> Vec { VORTEX_ENCODINGS.iter().map(|e| e.id()).collect_vec() ); vec![ - //&ALPEncoding, + &ALPEncoding, &DictEncoding, &BitPackedEncoding, &FoREncoding, diff --git a/pyvortex/Cargo.toml b/pyvortex/Cargo.toml index 5870a73b32..556bcd9c2d 100644 --- a/pyvortex/Cargo.toml +++ b/pyvortex/Cargo.toml @@ -21,7 +21,7 @@ crate-type = ["rlib", "cdylib"] [dependencies] arrow = { workspace = true } vortex-array = { path = "../vortex-array" } -#vortex-alp = { path = "../vortex-alp" } +vortex-alp = { path = "../vortex-alp" } vortex-dict = { path = "../vortex-dict" } vortex-error = { path = "../vortex-error" } vortex-fastlanes = { path = "../vortex-fastlanes" } diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 957a3bab62..e2324ab71c 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -16,6 +16,7 @@ use vortex::encoding::EncodingRef; use vortex::ToStatic; use vortex::{ArrayDType, ArrayData, IntoArray, OwnedArray}; use vortex::{ArrayDef, IntoArrayData}; +use vortex_alp::{ALPArray, ALPEncoding, OwnedALPArray, ALP}; use vortex_dict::{Dict, DictArray, DictEncoding, OwnedDictArray}; use vortex_fastlanes::{ BitPacked, BitPackedArray, BitPackedEncoding, Delta, DeltaArray, DeltaEncoding, FoR, FoRArray, @@ -67,7 +68,7 @@ pyarray!(StructEncoding, StructArray, "StructArray"); pyarray!(VarBinEncoding, VarBinArray, "VarBinArray"); pyarray!(VarBinViewEncoding, VarBinViewArray, "VarBinViewArray"); -// pyarray!(ALPEncoding, ALPArray, "ALPArray"); +pyarray!(ALPEncoding, ALPArray, "ALPArray"); pyarray!(BitPackedEncoding, BitPackedArray, "BitPackedArray"); pyarray!(FoREncoding, FoRArray, "FoRArray"); pyarray!(DeltaEncoding, DeltaArray, "DeltaArray"); @@ -156,6 +157,11 @@ impl PyArray { )? .extract(py), + ALP::ID => PyALPArray::wrap( + py, + OwnedALPArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + )? + .extract(py), _ => Py::new( py, Self { diff --git a/pyvortex/src/lib.rs b/pyvortex/src/lib.rs index 219aaf3e04..964ddca02e 100644 --- a/pyvortex/src/lib.rs +++ b/pyvortex/src/lib.rs @@ -47,6 +47,7 @@ fn _lib(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; // m.add_class::()?; + m.add_class::()?; m.add_class::()?; diff --git a/vortex-alp/Cargo.toml b/vortex-alp/Cargo.toml index 9f9cee7fc2..5740705e83 100644 --- a/vortex-alp/Cargo.toml +++ b/vortex-alp/Cargo.toml @@ -15,13 +15,15 @@ rust-version = { workspace = true } workspace = true [dependencies] +itertools = { workspace = true } +linkme = { workspace = true } +log = { workspace = true } +num-traits = { workspace = true } +paste = { workspace = true } +serde = { workspace = true, features = ["derive"] } vortex-array = { path = "../vortex-array" } vortex-error = { path = "../vortex-error" } vortex-schema = { path = "../vortex-schema" } -linkme = { workspace = true } -itertools = { workspace = true } -num-traits = { workspace = true } -log = { workspace = true } [dev-dependencies] divan = { workspace = true } diff --git a/vortex-alp/src/alp.rs b/vortex-alp/src/alp.rs index 6a3a5c7095..f8251a96da 100644 --- a/vortex-alp/src/alp.rs +++ b/vortex-alp/src/alp.rs @@ -2,10 +2,11 @@ use std::mem::size_of; use itertools::Itertools; use num_traits::{Float, NumCast, PrimInt, Zero}; +use serde::{Deserialize, Serialize}; const SAMPLE_SIZE: usize = 32; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct Exponents { pub e: u8, pub f: u8, diff --git a/vortex-alp/src/array.rs b/vortex-alp/src/array.rs index 4de69a3e4a..841df11db5 100644 --- a/vortex-alp/src/array.rs +++ b/vortex-alp/src/array.rs @@ -1,39 +1,31 @@ -use std::sync::{Arc, RwLock}; - -use vortex::array::{Array, ArrayKind, ArrayRef}; -use vortex::compress::EncodingCompression; -use vortex::compute::ArrayCompute; -use vortex::encoding::{Encoding, EncodingId, EncodingRef}; -use vortex::formatter::{ArrayDisplay, ArrayFormatter}; -use vortex::serde::{ArraySerde, EncodingSerde}; -use vortex::stats::{Stats, StatsSet}; -use vortex::validity::{ArrayValidity, Validity}; -use vortex::{impl_array, ArrayWalker}; -use vortex_error::{vortex_bail, vortex_err, VortexResult}; -use vortex_schema::{DType, IntWidth, Signedness}; +use serde::{Deserialize, Serialize}; +use vortex::array::primitive::{Primitive, PrimitiveArray}; +use vortex::stats::ArrayStatisticsCompute; +use vortex::validity::{ArrayValidity, LogicalValidity}; +use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; +use vortex::{impl_encoding, ArrayDType, ArrayFlatten, IntoArrayData, OwnedArray, ToArrayData}; +use vortex_error::{vortex_bail, VortexResult}; +use vortex_schema::{IntWidth, Signedness}; use crate::alp::Exponents; -use crate::compress::alp_encode; +use crate::compress::{alp_encode, decompress}; -#[derive(Debug, Clone)] -pub struct ALPArray { - encoded: ArrayRef, +impl_encoding!("vortex.alp", ALP); + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ALPMetadata { exponents: Exponents, - patches: Option, - dtype: DType, - stats: Arc>, + has_patches: bool, + encoded_dtype: DType, } -impl ALPArray { - pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option) -> Self { - Self::try_new(encoded, exponents, patches).unwrap() - } - +impl ALPArray<'_> { pub fn try_new( - encoded: ArrayRef, + encoded: Array, exponents: Exponents, - patches: Option, + patches: Option, ) -> VortexResult { + let encoded_dtype = encoded.dtype().clone(); let dtype = match encoded.dtype() { DType::Int(IntWidth::_32, Signedness::Signed, nullability) => { DType::Float(32.into(), *nullability) @@ -43,120 +35,89 @@ impl ALPArray { } d => vortex_bail!(MismatchedTypes: "int32 or int64", d), }; - Ok(Self { - encoded, - exponents, - patches, - dtype, - stats: Arc::new(RwLock::new(StatsSet::new())), - }) - } - pub fn encode(array: &dyn Array) -> VortexResult { - match ArrayKind::from(array) { - ArrayKind::Primitive(p) => Ok(alp_encode(p)?.into_array()), - _ => Err(vortex_err!("ALP can only encoding primitive arrays")), + let mut children = Vec::with_capacity(2); + children.push(encoded.into_array_data()); + + if let Some(ref patch) = patches { + children.push(patch.to_array_data()); + } + + Self::try_from_parts( + dtype, + ALPMetadata { + exponents, + has_patches: patches.is_some(), + encoded_dtype, + }, + children.into(), + Default::default(), + ) + } + + pub fn encode(array: Array<'_>) -> VortexResult { + if array.encoding().id() == Primitive::ID { + Ok(alp_encode(&PrimitiveArray::try_from(array)?)?.into_array()) + } else { + vortex_bail!("ALP can only encode primitive arrays"); } } - pub fn encoded(&self) -> &ArrayRef { - &self.encoded + pub fn encoded(&self) -> Array { + self.array() + .child(0, &self.metadata().encoded_dtype) + .expect("Missing encoded array") } pub fn exponents(&self) -> &Exponents { - &self.exponents + &self.metadata().exponents } - pub fn patches(&self) -> Option<&ArrayRef> { - self.patches.as_ref() + pub fn patches(&self) -> Option { + self.metadata().has_patches.then(|| { + self.array() + .child(1, self.dtype()) + .expect("Missing patches with present metadata flag") + }) } } -impl Array for ALPArray { - impl_array!(); - - #[inline] - fn len(&self) -> usize { - self.encoded.len() - } - - #[inline] - fn is_empty(&self) -> bool { - self.encoded.is_empty() - } - - #[inline] - fn dtype(&self) -> &DType { - &self.dtype - } - - #[inline] - fn stats(&self) -> Stats { - Stats::new(&self.stats, self) - } - - #[inline] - fn with_compute_mut( - &self, - f: &mut dyn FnMut(&dyn ArrayCompute) -> VortexResult<()>, - ) -> VortexResult<()> { - f(self) - } - - #[inline] - fn encoding(&self) -> EncodingRef { - &ALPEncoding - } - - #[inline] - fn nbytes(&self) -> usize { - self.encoded().nbytes() + self.patches().map(|p| p.nbytes()).unwrap_or(0) - } - - fn serde(&self) -> Option<&dyn ArraySerde> { - Some(self) +impl ArrayValidity for ALPArray<'_> { + fn is_valid(&self, index: usize) -> bool { + self.encoded().with_dyn(|a| a.is_valid(index)) } - fn walk(&self, walker: &mut dyn ArrayWalker) -> VortexResult<()> { - walker.visit_child(self.encoded()) + fn logical_validity(&self) -> LogicalValidity { + self.encoded().with_dyn(|a| a.logical_validity()) } } -impl ArrayDisplay for ALPArray { - fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result { - f.property("exponents", format!("{:?}", self.exponents()))?; - f.child("encoded", self.encoded())?; - f.maybe_child("patches", self.patches()) +impl ArrayFlatten for ALPArray<'_> { + fn flatten<'a>(self) -> VortexResult> + where + Self: 'a, + { + decompress(self).map(Flattened::Primitive) } } -impl ArrayValidity for ALPArray { - fn logical_validity(&self) -> Validity { - self.encoded().logical_validity() - } - - fn is_valid(&self, index: usize) -> bool { - self.encoded().is_valid(index) +impl AcceptArrayVisitor for ALPArray<'_> { + fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { + visitor.visit_child("encoded", &self.encoded())?; + if self.patches().is_some() { + visitor.visit_child( + "patches", + &self.patches().expect("Expected patches to be present "), + )?; + } + Ok(()) } } -#[derive(Debug)] -pub struct ALPEncoding; +impl ArrayStatisticsCompute for ALPArray<'_> {} -impl ALPEncoding { - pub const ID: EncodingId = EncodingId::new("vortex.alp"); -} - -impl Encoding for ALPEncoding { - fn id(&self) -> EncodingId { - Self::ID - } - - fn compression(&self) -> Option<&dyn EncodingCompression> { - Some(self) - } - - fn serde(&self) -> Option<&dyn EncodingSerde> { - Some(self) +impl ArrayTrait for ALPArray<'_> { + fn len(&self) -> usize { + self.encoded().len() } } diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs index a6bd6e6a10..a1b2ce51c1 100644 --- a/vortex-alp/src/compress.rs +++ b/vortex-alp/src/compress.rs @@ -1,20 +1,15 @@ use itertools::Itertools; -use vortex::array::downcast::DowncastArrayBuiltin; use vortex::array::primitive::PrimitiveArray; -use vortex::array::sparse::{SparseArray, SparseEncoding}; -use vortex::array::{Array, ArrayRef}; +use vortex::array::sparse::{Sparse, SparseArray}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::compute::flatten::flatten_primitive; use vortex::ptype::{NativePType, PType}; use vortex::scalar::Scalar; -use vortex::validity::OwnedValidity; -use vortex::view::ToOwnedView; +use vortex::{Array, ArrayDType, ArrayDef, AsArray, IntoArray, OwnedArray}; use vortex_error::{vortex_bail, vortex_err, VortexResult}; use crate::alp::ALPFloat; use crate::array::{ALPArray, ALPEncoding}; -use crate::downcast::DowncastALP; -use crate::Exponents; +use crate::{Exponents, OwnedALPArray}; #[macro_export] macro_rules! match_each_alp_float_ptype { @@ -34,11 +29,11 @@ macro_rules! match_each_alp_float_ptype { impl EncodingCompression for ALPEncoding { fn can_compress( &self, - array: &dyn Array, + array: &Array, _config: &CompressConfig, ) -> Option<&dyn EncodingCompression> { // Only support primitive arrays - let parray = array.maybe_primitive()?; + let parray = PrimitiveArray::try_from(array).ok()?; // Only supports f32 and f64 if !matches!(parray.ptype(), PType::F32 | PType::F64) { @@ -50,41 +45,44 @@ impl EncodingCompression for ALPEncoding { fn compress( &self, - array: &dyn Array, - like: Option<&dyn Array>, + array: &Array, + like: Option<&Array>, ctx: CompressCtx, - ) -> VortexResult { - let like_alp = like.map(|like_array| like_array.as_alp()); + ) -> VortexResult> { + let like_alp = like.map(|like_array| like_array.as_array_ref()); + let like_exponents = like + .map(|like_array| ALPArray::try_from(like_array).unwrap()) + .map(|a| a.exponents().to_owned()); // TODO(ngates): fill forward nulls - let parray = array.as_primitive(); + let parray = PrimitiveArray::try_from(array)?; let (exponents, encoded, patches) = match_each_alp_float_ptype!( parray.ptype(), |$T| { - encode_to_array::<$T>(parray, like_alp.map(|l| l.exponents())) + encode_to_array::<$T>(&parray, like_exponents.as_ref()) })?; let compressed_encoded = ctx .named("packed") .excluding(&ALPEncoding) - .compress(encoded.as_ref(), like_alp.map(|a| a.encoded()))?; + .compress(encoded.as_array_ref(), like_alp)?; let compressed_patches = patches .map(|p| { ctx.auxiliary("patches") .excluding(&ALPEncoding) - .compress(p.as_ref(), like_alp.and_then(|a| a.patches())) + .compress(p.as_array_ref(), like_alp) }) .transpose()?; - Ok(ALPArray::new(compressed_encoded, exponents, compressed_patches).into_array()) + ALPArray::try_new(compressed_encoded, exponents, compressed_patches).map(|a| a.into_array()) } } fn encode_to_array( values: &PrimitiveArray, exponents: Option<&Exponents>, -) -> (Exponents, ArrayRef, Option) +) -> (Exponents, OwnedArray, Option) where T: ALPFloat + NativePType, T::ALPInt: NativePType, @@ -93,9 +91,7 @@ where let len = encoded.len(); ( exponents, - PrimitiveArray::from(encoded) - .into_nullable(values.nullability()) - .into_array(), + PrimitiveArray::from(encoded).into_array(), (!exc.is_empty()).then(|| { SparseArray::new( PrimitiveArray::from(exc_pos).into_array(), @@ -108,41 +104,46 @@ where ) } -pub(crate) fn alp_encode(parray: &PrimitiveArray) -> VortexResult { +pub(crate) fn alp_encode(parray: &PrimitiveArray) -> VortexResult { let (exponents, encoded, patches) = match parray.ptype() { PType::F32 => encode_to_array::(parray, None), PType::F64 => encode_to_array::(parray, None), _ => vortex_bail!("ALP can only encode f32 and f64"), }; - Ok(ALPArray::new(encoded, exponents, patches)) + ALPArray::try_new(encoded, exponents, patches) } -pub fn decompress(array: &ALPArray) -> VortexResult { - let encoded = flatten_primitive(array.encoded())?; +pub fn decompress(array: ALPArray) -> VortexResult { + let encoded = array.encoded().clone().flatten_primitive()?; + let decoded = match_each_alp_float_ptype!(array.dtype().try_into().unwrap(), |$T| { - PrimitiveArray::from_nullable( + PrimitiveArray::from_vec( decompress_primitive::<$T>(encoded.typed_data(), array.exponents()), - encoded.validity().to_owned_view(), + encoded.validity(), ) })?; if let Some(patches) = array.patches() { - patch_decoded(decoded, patches) + patch_decoded(decoded, &patches) } else { Ok(decoded) } } -fn patch_decoded(array: PrimitiveArray, patches: &dyn Array) -> VortexResult { +fn patch_decoded<'a>( + array: PrimitiveArray<'a>, + patches: &Array, +) -> VortexResult> { match patches.encoding().id() { - SparseEncoding::ID => { + Sparse::ID => { match_each_alp_float_ptype!(array.ptype(), |$T| { + let typed_patches = SparseArray::try_from(patches).unwrap(); array.patch( - &patches.as_sparse().resolved_indices(), - flatten_primitive(patches.as_sparse().values())?.typed_data::<$T>())? + &typed_patches.resolved_indices(), + typed_patches.values().flatten_primitive()?.typed_data::<$T>())? }) } - _ => panic!("can't patch alp array with {}", patches), + _ => panic!("can't patch ALP array with {}", patches), } } @@ -166,28 +167,32 @@ mod tests { let encoded = alp_encode(&array).unwrap(); assert!(encoded.patches().is_none()); assert_eq!( - encoded.encoded().as_primitive().typed_data::(), + PrimitiveArray::try_from(encoded.encoded()) + .unwrap() + .typed_data::(), vec![1234; 1025] ); assert_eq!(encoded.exponents(), &Exponents { e: 4, f: 1 }); - let decoded = decompress(&encoded).unwrap(); + let decoded = decompress(encoded).unwrap(); assert_eq!(array.typed_data::(), decoded.typed_data::()); } #[test] fn test_nullable_compress() { - let array = PrimitiveArray::from_iter(vec![None, Some(1.234f32), None]); + let array = PrimitiveArray::from_nullable_vec(vec![None, Some(1.234f32), None]); let encoded = alp_encode(&array).unwrap(); println!("Encoded {:?}", encoded); assert!(encoded.patches().is_none()); assert_eq!( - encoded.encoded().as_primitive().typed_data::(), + PrimitiveArray::try_from(encoded.encoded()) + .unwrap() + .typed_data::(), vec![0, 1234, 0] ); assert_eq!(encoded.exponents(), &Exponents { e: 4, f: 1 }); - let decoded = decompress(&encoded).unwrap(); + let decoded = decompress(encoded).unwrap(); let expected = vec![0f32, 1.234f32, 0f32]; assert_eq!(decoded.typed_data::(), expected.as_slice()); } @@ -201,12 +206,17 @@ mod tests { println!("Encoded {:?}", encoded); assert!(encoded.patches().is_some()); assert_eq!( - encoded.encoded().as_primitive().typed_data::(), + encoded + .encoded() + .clone() + .flatten_primitive() + .unwrap() + .typed_data::(), vec![1234i64, 2718, 2718, 4000] // fill forward ); assert_eq!(encoded.exponents(), &Exponents { e: 3, f: 0 }); - let decoded = decompress(&encoded).unwrap(); + let decoded = decompress(encoded).unwrap(); assert_eq!(values, decoded.typed_data::()); } } diff --git a/vortex-alp/src/compute.rs b/vortex-alp/src/compute.rs index 37920557e9..b5710c03ea 100644 --- a/vortex-alp/src/compute.rs +++ b/vortex-alp/src/compute.rs @@ -1,20 +1,14 @@ -use vortex::array::{Array, ArrayRef}; -use vortex::compute::flatten::{FlattenFn, FlattenedArray}; use vortex::compute::scalar_at::{scalar_at, ScalarAtFn}; use vortex::compute::slice::{slice, SliceFn}; use vortex::compute::take::{take, TakeFn}; use vortex::compute::ArrayCompute; use vortex::scalar::Scalar; +use vortex::{Array, ArrayDType, IntoArray, OwnedArray}; use vortex_error::VortexResult; -use crate::compress::decompress; -use crate::{match_each_alp_float_ptype, ALPArray, ALPFloat}; - -impl ArrayCompute for ALPArray { - fn flatten(&self) -> Option<&dyn FlattenFn> { - Some(self) - } +use crate::{match_each_alp_float_ptype, ALPArray}; +impl ArrayCompute for ALPArray<'_> { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { Some(self) } @@ -28,19 +22,13 @@ impl ArrayCompute for ALPArray { } } -impl FlattenFn for ALPArray { - fn flatten(&self) -> VortexResult { - decompress(self).map(FlattenedArray::Primitive) - } -} - -impl ScalarAtFn for ALPArray { +impl ScalarAtFn for ALPArray<'_> { fn scalar_at(&self, index: usize) -> VortexResult { - if let Some(patch) = self.patches().and_then(|p| scalar_at(p, index).ok()) { + if let Some(patch) = self.patches().and_then(|p| scalar_at(&p, index).ok()) { return Ok(patch); } - - let encoded_val = scalar_at(self.encoded(), index)?; + use crate::ALPFloat; + let encoded_val = scalar_at(&self.encoded(), index)?; match_each_alp_float_ptype!(self.dtype().try_into().unwrap(), |$T| { let encoded_val: <$T as ALPFloat>::ALPInt = encoded_val.try_into().unwrap(); Scalar::from(<$T as ALPFloat>::decode_single( @@ -51,24 +39,24 @@ impl ScalarAtFn for ALPArray { } } -impl TakeFn for ALPArray { - fn take(&self, indices: &dyn Array) -> VortexResult { +impl TakeFn for ALPArray<'_> { + fn take(&self, indices: &Array) -> VortexResult { // TODO(ngates): wrap up indices in an array that caches decompression? - Ok(ALPArray::new( - take(self.encoded(), indices)?, + Ok(ALPArray::try_new( + take(&self.encoded(), indices)?, self.exponents().clone(), - self.patches().map(|p| take(p, indices)).transpose()?, - ) + self.patches().map(|p| take(&p, indices)).transpose()?, + )? .into_array()) } } -impl SliceFn for ALPArray { - fn slice(&self, start: usize, stop: usize) -> VortexResult { +impl SliceFn for ALPArray<'_> { + fn slice(&self, start: usize, end: usize) -> VortexResult { Ok(ALPArray::try_new( - slice(self.encoded(), start, stop)?, + slice(&self.encoded(), start, end)?, self.exponents().clone(), - self.patches().map(|p| slice(p, start, stop)).transpose()?, + self.patches().map(|p| slice(&p, start, end)).transpose()?, )? .into_array()) } diff --git a/vortex-alp/src/downcast.rs b/vortex-alp/src/downcast.rs deleted file mode 100644 index 1eee5d4ddb..0000000000 --- a/vortex-alp/src/downcast.rs +++ /dev/null @@ -1,31 +0,0 @@ -use vortex::array::{Array, ArrayRef}; - -use crate::ALPArray; - -mod private { - pub trait Sealed {} -} - -pub trait DowncastALP: private::Sealed { - fn maybe_alp(&self) -> Option<&ALPArray>; - - fn as_alp(&self) -> &ALPArray { - self.maybe_alp().unwrap() - } -} - -impl private::Sealed for dyn Array + '_ {} - -impl DowncastALP for dyn Array + '_ { - fn maybe_alp(&self) -> Option<&ALPArray> { - self.as_any().downcast_ref() - } -} - -impl private::Sealed for ArrayRef {} - -impl DowncastALP for ArrayRef { - fn maybe_alp(&self) -> Option<&ALPArray> { - self.as_any().downcast_ref() - } -} diff --git a/vortex-alp/src/lib.rs b/vortex-alp/src/lib.rs index 9428b3cb98..b255ef0074 100644 --- a/vortex-alp/src/lib.rs +++ b/vortex-alp/src/lib.rs @@ -1,15 +1,7 @@ pub use alp::*; pub use array::*; -use linkme::distributed_slice; -use vortex::encoding::{EncodingRef, ENCODINGS}; mod alp; mod array; mod compress; mod compute; -mod downcast; -mod serde; -mod stats; - -#[distributed_slice(ENCODINGS)] -static ENCODINGS_ALP: EncodingRef = &ALPEncoding; diff --git a/vortex-alp/src/serde.rs b/vortex-alp/src/serde.rs deleted file mode 100644 index 846ba87125..0000000000 --- a/vortex-alp/src/serde.rs +++ /dev/null @@ -1,92 +0,0 @@ -use vortex::array::{Array, ArrayRef}; -use vortex::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx}; -use vortex_error::{vortex_bail, VortexResult}; -use vortex_schema::{DType, FloatWidth, Signedness}; - -use crate::alp::Exponents; -use crate::ALPArray; -use crate::ALPEncoding; - -impl ArraySerde for ALPArray { - fn write(&self, ctx: &mut WriteCtx) -> VortexResult<()> { - ctx.write_optional_array(self.patches())?; - ctx.write_fixed_slice([self.exponents().e, self.exponents().f])?; - ctx.write(self.encoded()) - } - - fn metadata(&self) -> VortexResult>> { - Ok(Some(vec![self.exponents().e, self.exponents().f])) - } -} - -impl EncodingSerde for ALPEncoding { - fn read(&self, ctx: &mut ReadCtx) -> VortexResult { - let patches = ctx.read_optional_array()?; - let exponents = ctx.read_nbytes::<2>()?; - let encoded_dtype = match ctx.schema() { - DType::Float(FloatWidth::_32, nullability) => { - DType::Int(32.into(), Signedness::Signed, *nullability) - } - DType::Float(FloatWidth::_64, nullability) => { - DType::Int(64.into(), Signedness::Signed, *nullability) - } - _ => vortex_bail!(MismatchedTypes: "f32 or f64", ctx.schema()), - }; - let encoded = ctx.with_schema(&encoded_dtype).read()?; - Ok(ALPArray::new( - encoded, - Exponents { - e: exponents[0], - f: exponents[1], - }, - patches, - ) - .into_array()) - } -} - -#[cfg(test)] -mod test { - use vortex::array::downcast::DowncastArrayBuiltin; - use vortex::array::primitive::PrimitiveArray; - use vortex::array::{Array, ArrayRef}; - use vortex::serde::{ReadCtx, WriteCtx}; - use vortex_error::VortexResult; - - use crate::compress::alp_encode; - use crate::downcast::DowncastALP; - - fn roundtrip_array(array: &dyn Array) -> VortexResult { - let mut buf = Vec::::new(); - let mut write_ctx = WriteCtx::new(&mut buf); - write_ctx.write(array)?; - let mut read = buf.as_slice(); - let mut read_ctx = ReadCtx::new(array.dtype(), &mut read); - read_ctx.read() - } - - #[test] - fn roundtrip() { - let arr = alp_encode(&PrimitiveArray::from(vec![ - 0.00001f64, - 0.0004f64, - 1000000.0f64, - 0.33f64, - ])) - .unwrap(); - let read_arr = roundtrip_array(&arr).unwrap(); - - let read_alp = read_arr.as_alp(); - assert_eq!( - arr.encoded().as_primitive().buffer().typed_data::(), - read_alp - .encoded() - .as_primitive() - .buffer() - .typed_data::() - ); - - assert_eq!(arr.exponents().e, read_alp.exponents().e); - assert_eq!(arr.exponents().f, read_alp.exponents().f); - } -} diff --git a/vortex-alp/src/stats.rs b/vortex-alp/src/stats.rs deleted file mode 100644 index 36a544530b..0000000000 --- a/vortex-alp/src/stats.rs +++ /dev/null @@ -1,13 +0,0 @@ -use std::collections::HashMap; - -use vortex::stats::{Stat, StatsCompute, StatsSet}; -use vortex_error::VortexResult; - -use crate::ALPArray; - -impl StatsCompute for ALPArray { - fn compute(&self, _stat: &Stat) -> VortexResult { - // TODO(ngates): implement based on the encoded array - Ok(StatsSet::from(HashMap::new())) - } -}