From 9d7b929cea4d5216bb31453bf8bb52a7e4c29792 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Tue, 30 Apr 2024 09:21:29 +0100 Subject: [PATCH] Move PType into vortex-dtype (#274) Next step is to change to `DType::Primitive(PType)`. This aligns our flattened arrays, our scalars and our dtypes to have essentially the same enum variants. We also vendor the half crate into vortex_dtype. --- Cargo.lock | 6 +- vortex-alp/src/compress.rs | 4 +- vortex-array/Cargo.toml | 1 - vortex-array/src/array/bool/compute/take.rs | 2 +- .../src/array/chunked/compute/take.rs | 2 +- vortex-array/src/array/chunked/mod.rs | 2 +- vortex-array/src/array/composite/array.rs | 3 +- vortex-array/src/array/constant/flatten.rs | 4 +- .../src/array/datetime/localdatetime.rs | 2 +- vortex-array/src/array/primitive/accessor.rs | 2 +- .../src/array/primitive/compute/as_arrow.rs | 2 +- .../array/primitive/compute/as_contiguous.rs | 3 +- .../src/array/primitive/compute/cast.rs | 8 +-- .../src/array/primitive/compute/fill.rs | 2 +- .../src/array/primitive/compute/scalar_at.rs | 2 +- .../array/primitive/compute/search_sorted.rs | 2 +- .../src/array/primitive/compute/slice.rs | 2 +- .../src/array/primitive/compute/take.rs | 4 +- vortex-array/src/array/primitive/mod.rs | 20 +++--- vortex-array/src/array/primitive/stats.rs | 22 ++++--- vortex-array/src/array/sparse/compute/mod.rs | 3 +- vortex-array/src/array/sparse/flatten.rs | 8 +-- vortex-array/src/array/sparse/mod.rs | 3 +- vortex-array/src/array/varbin/accessor.rs | 2 +- vortex-array/src/array/varbin/builder.rs | 2 +- vortex-array/src/array/varbin/compute/mod.rs | 2 +- vortex-array/src/array/varbin/compute/take.rs | 4 +- vortex-array/src/array/varbin/mod.rs | 8 ++- vortex-array/src/array/varbinview/compute.rs | 2 +- vortex-array/src/arrow/array.rs | 2 +- .../src/arrow/{dtypes.rs => dtype.rs} | 12 ++-- vortex-array/src/arrow/mod.rs | 8 ++- vortex-array/src/arrow/wrappers.rs | 12 ++-- vortex-array/src/buffer.rs | 15 ++++- vortex-array/src/lib.rs | 1 - vortex-array/src/scalar/mod.rs | 21 +------ vortex-array/src/scalar/primitive.rs | 23 ++++--- vortex-array/src/scalar/serde.rs | 4 +- vortex-array/src/stats.rs | 4 +- vortex-datetime-parts/src/compress.rs | 2 +- vortex-dict/Cargo.toml | 1 - vortex-dict/benches/dict_compress.rs | 3 +- vortex-dict/src/compress.rs | 8 +-- vortex-dict/src/dict.rs | 4 +- vortex-dtype/Cargo.toml | 3 +- vortex-dtype/src/lib.rs | 4 +- {vortex-array => vortex-dtype}/src/ptype.rs | 63 +++++-------------- vortex-fastlanes/src/bitpacking/compress.rs | 9 +-- .../src/bitpacking/compute/mod.rs | 4 +- vortex-fastlanes/src/bitpacking/mod.rs | 2 +- vortex-fastlanes/src/delta/compress.rs | 4 +- vortex-fastlanes/src/delta/mod.rs | 3 +- vortex-fastlanes/src/for/compress.rs | 4 +- vortex-fastlanes/src/for/compute.rs | 3 +- vortex-ipc/src/reader.rs | 7 +-- vortex-ree/Cargo.toml | 1 - vortex-ree/src/compress.rs | 11 ++-- vortex-ree/src/compute.rs | 3 +- vortex-roaring/src/boolean/mod.rs | 3 +- vortex-roaring/src/integer/compress.rs | 2 +- vortex-roaring/src/integer/compute.rs | 2 +- vortex-roaring/src/integer/mod.rs | 2 +- vortex-zigzag/src/compress.rs | 2 +- 63 files changed, 179 insertions(+), 202 deletions(-) rename vortex-array/src/arrow/{dtypes.rs => dtype.rs} (95%) rename {vortex-array => vortex-dtype}/src/ptype.rs (80%) diff --git a/Cargo.lock b/Cargo.lock index c0a2ba41b0..9d10735412 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5185,7 +5185,6 @@ dependencies = [ "flatbuffers", "flatc", "flexbuffers", - "half", "humansize", "itertools 0.12.1", "leb128", @@ -5222,7 +5221,6 @@ version = "0.1.0" dependencies = [ "ahash", "criterion", - "half", "hashbrown", "linkme", "log", @@ -5240,11 +5238,12 @@ dependencies = [ name = "vortex-dtype" version = "0.1.0" dependencies = [ - "arrow-schema 51.0.0", "flatbuffers", "flatc", + "half", "itertools 0.12.1", "linkme", + "num-traits", "serde", "thiserror", "vortex-error", @@ -5325,7 +5324,6 @@ version = "0.1.0" dependencies = [ "arrow-array 51.0.0", "arrow-buffer 51.0.0", - "half", "itertools 0.12.1", "linkme", "num-traits", diff --git a/vortex-alp/src/compress.rs b/vortex-alp/src/compress.rs index 389eb1044d..c469fda60f 100644 --- a/vortex-alp/src/compress.rs +++ b/vortex-alp/src/compress.rs @@ -2,10 +2,10 @@ use itertools::Itertools; use vortex::array::primitive::PrimitiveArray; use vortex::array::sparse::{Sparse, SparseArray}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::ptype::{NativePType, PType}; use vortex::scalar::Scalar; use vortex::validity::Validity; use vortex::{Array, ArrayDType, ArrayDef, AsArray, IntoArray, OwnedArray}; +use vortex_dtype::{NativePType, PType}; use vortex_error::{vortex_bail, vortex_err, VortexResult}; use crate::alp::ALPFloat; @@ -16,8 +16,8 @@ use crate::{Exponents, OwnedALPArray}; macro_rules! match_each_alp_float_ptype { ($self:expr, | $_:tt $enc:ident | $($body:tt)*) => ({ macro_rules! __with__ {( $_ $enc:ident ) => ( $($body)* )} + use vortex_dtype::PType; use vortex_error::vortex_err; - use vortex::ptype::PType; let ptype = $self; match ptype { PType::F32 => Ok(__with__! { f32 }), diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 979e671fb3..be54dc3056 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -25,7 +25,6 @@ arrow-buffer = { workspace = true } arrow-schema = { workspace = true } flatbuffers = { workspace = true } flexbuffers = { workspace = true } -half = { workspace = true } humansize = { workspace = true } itertools = { workspace = true } leb128 = { workspace = true } diff --git a/vortex-array/src/array/bool/compute/take.rs b/vortex-array/src/array/bool/compute/take.rs index feea250e60..1de669cce9 100644 --- a/vortex-array/src/array/bool/compute/take.rs +++ b/vortex-array/src/array/bool/compute/take.rs @@ -1,10 +1,10 @@ use arrow_buffer::BooleanBuffer; use num_traits::AsPrimitive; +use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexResult; use crate::array::bool::BoolArray; use crate::compute::take::TakeFn; -use crate::match_each_integer_ptype; use crate::AsArray; use crate::IntoArray; use crate::{Array, OwnedArray}; diff --git a/vortex-array/src/array/chunked/compute/take.rs b/vortex-array/src/array/chunked/compute/take.rs index f3a5b67766..5f057f5464 100644 --- a/vortex-array/src/array/chunked/compute/take.rs +++ b/vortex-array/src/array/chunked/compute/take.rs @@ -1,9 +1,9 @@ +use vortex_dtype::PType; use vortex_error::VortexResult; use crate::array::chunked::ChunkedArray; use crate::compute::cast::cast; use crate::compute::take::{take, TakeFn}; -use crate::ptype::PType; use crate::{Array, IntoArray, OwnedArray, ToArray, ToStatic}; use crate::{ArrayDType, ArrayTrait}; diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index a9c3e9d43a..b1acacff23 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -145,10 +145,10 @@ impl EncodingCompression for ChunkedEncoding {} #[cfg(test)] mod test { + use vortex_dtype::NativePType; use vortex_dtype::{DType, IntWidth, Nullability, Signedness}; use crate::array::chunked::{ChunkedArray, OwnedChunkedArray}; - use crate::ptype::NativePType; use crate::{Array, IntoArray}; #[allow(dead_code)] diff --git a/vortex-array/src/array/composite/array.rs b/vortex-array/src/array/composite/array.rs index b5b526c162..f3a1897cad 100644 --- a/vortex-array/src/array/composite/array.rs +++ b/vortex-array/src/array/composite/array.rs @@ -6,7 +6,6 @@ use vortex_flatbuffers::{FlatBufferToBytes, ReadFlatBuffer}; use crate::array::composite::{find_extension, CompositeExtensionRef, TypedCompositeArray}; use crate::compute::ArrayCompute; -use crate::scalar::AsBytes; use crate::stats::ArrayStatisticsCompute; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; @@ -141,7 +140,7 @@ impl CompositeArray<'_> { impl<'a> CompositeArray<'a> { pub fn as_typed(&'a self) -> VortexResult> { Ok(TypedCompositeArray::new( - M::try_deserialize_metadata(Some(self.underlying_metadata().as_bytes()))?, + M::try_deserialize_metadata(Some(self.underlying_metadata()))?, self.underlying().clone(), )) } diff --git a/vortex-array/src/array/constant/flatten.rs b/vortex-array/src/array/constant/flatten.rs index b2c40b75c8..05037a2175 100644 --- a/vortex-array/src/array/constant/flatten.rs +++ b/vortex-array/src/array/constant/flatten.rs @@ -1,4 +1,4 @@ -use vortex_dtype::Nullability; +use vortex_dtype::{match_each_native_ptype, Nullability}; use vortex_error::VortexResult; use crate::array::bool::BoolArray; @@ -6,7 +6,7 @@ use crate::array::constant::ConstantArray; use crate::array::primitive::PrimitiveArray; use crate::scalar::Scalar; use crate::validity::Validity; -use crate::{match_each_native_ptype, ArrayDType, ArrayTrait}; +use crate::{ArrayDType, ArrayTrait}; use crate::{ArrayFlatten, Flattened}; impl ArrayFlatten for ConstantArray<'_> { diff --git a/vortex-array/src/array/datetime/localdatetime.rs b/vortex-array/src/array/datetime/localdatetime.rs index ccdd828efe..a1140c62f2 100644 --- a/vortex-array/src/array/datetime/localdatetime.rs +++ b/vortex-array/src/array/datetime/localdatetime.rs @@ -5,13 +5,13 @@ use arrow_array::{ TimestampNanosecondArray, TimestampSecondArray, }; use serde::{Deserialize, Serialize}; +use vortex_dtype::PType; use vortex_error::VortexResult; use crate::array::datetime::TimeUnit; use crate::compute::as_arrow::AsArrowArray; use crate::compute::cast::cast; use crate::impl_composite; -use crate::ptype::PType; use crate::validity::ArrayValidity; impl_composite!("vortex.localdatetime", LocalDateTime); diff --git a/vortex-array/src/array/primitive/accessor.rs b/vortex-array/src/array/primitive/accessor.rs index 4e43cbf6c2..fdf15891f2 100644 --- a/vortex-array/src/array/primitive/accessor.rs +++ b/vortex-array/src/array/primitive/accessor.rs @@ -1,8 +1,8 @@ +use vortex_dtype::NativePType; use vortex_error::VortexResult; use crate::accessor::ArrayAccessor; use crate::array::primitive::PrimitiveArray; -use crate::ptype::NativePType; use crate::validity::ArrayValidity; impl ArrayAccessor for PrimitiveArray<'_> { diff --git a/vortex-array/src/array/primitive/compute/as_arrow.rs b/vortex-array/src/array/primitive/compute/as_arrow.rs index 9893a70ae4..12c2fd65fc 100644 --- a/vortex-array/src/array/primitive/compute/as_arrow.rs +++ b/vortex-array/src/array/primitive/compute/as_arrow.rs @@ -4,11 +4,11 @@ use arrow_array::{ ArrayRef as ArrowArrayRef, ArrowPrimitiveType, PrimitiveArray as ArrowPrimitiveArray, }; use arrow_buffer::ScalarBuffer; +use vortex_dtype::PType; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::as_arrow::AsArrowArray; -use crate::ptype::PType; use crate::validity::ArrayValidity; use crate::ArrayTrait; diff --git a/vortex-array/src/array/primitive/compute/as_contiguous.rs b/vortex-array/src/array/primitive/compute/as_contiguous.rs index a1ab4cfee5..583d652578 100644 --- a/vortex-array/src/array/primitive/compute/as_contiguous.rs +++ b/vortex-array/src/array/primitive/compute/as_contiguous.rs @@ -1,10 +1,11 @@ use arrow_buffer::{MutableBuffer, ScalarBuffer}; +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::as_contiguous::AsContiguousFn; use crate::validity::Validity; -use crate::{match_each_native_ptype, ArrayDType}; +use crate::ArrayDType; use crate::{Array, IntoArray, OwnedArray}; impl AsContiguousFn for PrimitiveArray<'_> { diff --git a/vortex-array/src/array/primitive/compute/cast.rs b/vortex-array/src/array/primitive/compute/cast.rs index 7d8da8532b..5b8af758d9 100644 --- a/vortex-array/src/array/primitive/compute/cast.rs +++ b/vortex-array/src/array/primitive/compute/cast.rs @@ -1,11 +1,11 @@ -use vortex_dtype::DType; +use vortex_dtype::{match_each_native_ptype, DType}; +use vortex_dtype::{NativePType, PType}; use vortex_error::{vortex_err, VortexResult}; use crate::array::primitive::PrimitiveArray; use crate::compute::cast::CastFn; -use crate::ptype::{NativePType, PType}; use crate::validity::Validity; -use crate::{match_each_native_ptype, ArrayDType}; +use crate::ArrayDType; use crate::{IntoArray, OwnedArray}; impl CastFn for PrimitiveArray<'_> { @@ -49,9 +49,9 @@ fn cast(array: &PrimitiveArray) -> VortexResult> { #[cfg(test)] mod test { + use vortex_dtype::PType; use vortex_error::VortexError; - use crate::ptype::PType; use crate::{compute, IntoArray}; #[test] diff --git a/vortex-array/src/array/primitive/compute/fill.rs b/vortex-array/src/array/primitive/compute/fill.rs index 252309cd6e..7667c9b22f 100644 --- a/vortex-array/src/array/primitive/compute/fill.rs +++ b/vortex-array/src/array/primitive/compute/fill.rs @@ -1,8 +1,8 @@ +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::fill::FillForwardFn; -use crate::match_each_native_ptype; use crate::validity::ArrayValidity; use crate::{IntoArray, OwnedArray, ToArrayData}; diff --git a/vortex-array/src/array/primitive/compute/scalar_at.rs b/vortex-array/src/array/primitive/compute/scalar_at.rs index a7b02b73c1..0987054460 100644 --- a/vortex-array/src/array/primitive/compute/scalar_at.rs +++ b/vortex-array/src/array/primitive/compute/scalar_at.rs @@ -1,8 +1,8 @@ +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::scalar_at::ScalarAtFn; -use crate::match_each_native_ptype; use crate::scalar::PrimitiveScalar; use crate::scalar::Scalar; use crate::validity::ArrayValidity; diff --git a/vortex-array/src/array/primitive/compute/search_sorted.rs b/vortex-array/src/array/primitive/compute/search_sorted.rs index eb7e20d82f..3251908780 100644 --- a/vortex-array/src/array/primitive/compute/search_sorted.rs +++ b/vortex-array/src/array/primitive/compute/search_sorted.rs @@ -1,9 +1,9 @@ +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::search_sorted::{SearchResult, SearchSorted}; use crate::compute::search_sorted::{SearchSortedFn, SearchSortedSide}; -use crate::match_each_native_ptype; use crate::scalar::Scalar; impl SearchSortedFn for PrimitiveArray<'_> { diff --git a/vortex-array/src/array/primitive/compute/slice.rs b/vortex-array/src/array/primitive/compute/slice.rs index 68644dfbfd..8dc1cc90e7 100644 --- a/vortex-array/src/array/primitive/compute/slice.rs +++ b/vortex-array/src/array/primitive/compute/slice.rs @@ -1,8 +1,8 @@ +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::slice::SliceFn; -use crate::match_each_native_ptype; use crate::IntoArray; use crate::OwnedArray; diff --git a/vortex-array/src/array/primitive/compute/take.rs b/vortex-array/src/array/primitive/compute/take.rs index 70206fa1b1..1aaf9782e5 100644 --- a/vortex-array/src/array/primitive/compute/take.rs +++ b/vortex-array/src/array/primitive/compute/take.rs @@ -1,11 +1,11 @@ use num_traits::PrimInt; +use vortex_dtype::NativePType; +use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype}; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::take::TakeFn; -use crate::ptype::NativePType; use crate::IntoArray; -use crate::{match_each_integer_ptype, match_each_native_ptype}; use crate::{Array, OwnedArray}; impl TakeFn for PrimitiveArray<'_> { diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index ba0f5d9897..42ed2cb411 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -2,14 +2,14 @@ use arrow_buffer::{ArrowNativeType, ScalarBuffer}; use itertools::Itertools; use num_traits::AsPrimitive; use serde::{Deserialize, Serialize}; +use vortex_dtype::{match_each_native_ptype, NativePType, PType}; use vortex_error::{vortex_bail, VortexResult}; use crate::buffer::Buffer; -use crate::ptype::{NativePType, PType}; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; +use crate::ArrayFlatten; use crate::{impl_encoding, ArrayDType, OwnedArray}; -use crate::{match_each_native_ptype, ArrayFlatten}; mod accessor; mod compute; @@ -23,6 +23,7 @@ pub struct PrimitiveMetadata { } impl PrimitiveArray<'_> { + // TODO(ngates): remove the Arrow types from this API. pub fn try_new( buffer: ScalarBuffer, validity: Validity, @@ -40,11 +41,15 @@ impl PrimitiveArray<'_> { }) } - pub fn from_vec(values: Vec, validity: Validity) -> Self { - Self::try_new(ScalarBuffer::from(values), validity).unwrap() + pub fn from_vec(values: Vec, validity: Validity) -> Self { + match_each_native_ptype!(T::PTYPE, |$P| { + Self::try_new(ScalarBuffer::<$P>::from( + unsafe { std::mem::transmute::, Vec<$P>>(values) } + ), validity).unwrap() + }) } - pub fn from_nullable_vec(values: Vec>) -> Self { + pub fn from_nullable_vec(values: Vec>) -> Self { let elems: Vec = values.iter().map(|v| v.unwrap_or_default()).collect(); let validity = Validity::from(values.iter().map(|v| v.is_some()).collect::>()); Self::from_vec(elems, validity) @@ -65,7 +70,8 @@ impl PrimitiveArray<'_> { self.array().buffer().expect("missing buffer") } - pub fn scalar_buffer(&self) -> ScalarBuffer { + // TODO(ngates): deprecated, remove this. + pub fn scalar_buffer(&self) -> ScalarBuffer { assert_eq!( T::PTYPE, self.ptype(), @@ -126,7 +132,7 @@ impl PrimitiveArray<'_> { for (idx, value) in positions.iter().zip_eq(values.iter()) { own_values[(*idx).as_()] = *value; } - Self::try_new(ScalarBuffer::from(own_values), validity) + Ok(Self::from_vec(own_values, validity)) } } diff --git a/vortex-array/src/array/primitive/stats.rs b/vortex-array/src/array/primitive/stats.rs index 1ee2af70a9..80fb9dfa76 100644 --- a/vortex-array/src/array/primitive/stats.rs +++ b/vortex-array/src/array/primitive/stats.rs @@ -2,18 +2,20 @@ use std::collections::HashMap; use std::mem::size_of; use arrow_buffer::buffer::BooleanBuffer; +use vortex_dtype::match_each_native_ptype; use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; -use crate::match_each_native_ptype; -use crate::ptype::NativePType; -use crate::scalar::Scalar; use crate::scalar::{ListScalarVec, PScalar}; +use crate::scalar::{PScalarType, Scalar}; use crate::stats::{ArrayStatisticsCompute, Stat}; use crate::validity::ArrayValidity; use crate::validity::LogicalValidity; use crate::IntoArray; +trait PStatsType: PScalarType + Into {} +impl> PStatsType for T {} + impl ArrayStatisticsCompute for PrimitiveArray<'_> { fn compute_statistics(&self, stat: Stat) -> VortexResult> { match_each_native_ptype!(self.ptype(), |$P| { @@ -30,7 +32,7 @@ impl ArrayStatisticsCompute for PrimitiveArray<'_> { } } -impl ArrayStatisticsCompute for &[T] { +impl ArrayStatisticsCompute for &[T] { fn compute_statistics(&self, _stat: Stat) -> VortexResult> { if self.is_empty() { return Ok(HashMap::default()); @@ -41,7 +43,7 @@ impl ArrayStatisticsCompute for &[T] { } } -fn all_null_stats(len: usize) -> VortexResult> { +fn all_null_stats(len: usize) -> VortexResult> { Ok(HashMap::from([ (Stat::Min, Option::::None.into()), (Stat::Max, Option::::None.into()), @@ -61,9 +63,9 @@ fn all_null_stats(len: usize) -> VortexResult(&'a [T], &'a BooleanBuffer); +struct NullableValues<'a, T: PStatsType>(&'a [T], &'a BooleanBuffer); -impl<'a, T: NativePType> ArrayStatisticsCompute for NullableValues<'a, T> { +impl<'a, T: PStatsType> ArrayStatisticsCompute for NullableValues<'a, T> { fn compute_statistics(&self, _stat: Stat) -> VortexResult> { let values = self.0; if values.is_empty() { @@ -98,7 +100,7 @@ trait BitWidth { fn trailing_zeros(self) -> usize; } -impl> BitWidth for T { +impl BitWidth for T { fn bit_width(self) -> usize { let bit_width = size_of::() * 8; let scalar: PScalar = self.into(); @@ -135,7 +137,7 @@ impl> BitWidth for T { } } -struct StatsAccumulator { +struct StatsAccumulator { prev: T, min: T, max: T, @@ -147,7 +149,7 @@ struct StatsAccumulator { trailing_zeros: Vec, } -impl StatsAccumulator { +impl StatsAccumulator { fn new(first_value: T) -> Self { let mut stats = Self { prev: first_value, diff --git a/vortex-array/src/array/sparse/compute/mod.rs b/vortex-array/src/array/sparse/compute/mod.rs index 590ade8876..1342475424 100644 --- a/vortex-array/src/array/sparse/compute/mod.rs +++ b/vortex-array/src/array/sparse/compute/mod.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use itertools::Itertools; +use vortex_dtype::match_each_integer_ptype; use vortex_error::{vortex_bail, VortexResult}; use crate::array::primitive::{OwnedPrimitiveArray, PrimitiveArray}; @@ -11,7 +12,7 @@ use crate::compute::slice::SliceFn; use crate::compute::take::{take, TakeFn}; use crate::compute::ArrayCompute; use crate::scalar::Scalar; -use crate::{match_each_integer_ptype, Array, ArrayDType, ArrayTrait, IntoArray, OwnedArray}; +use crate::{Array, ArrayDType, ArrayTrait, IntoArray, OwnedArray}; mod slice; diff --git a/vortex-array/src/array/sparse/flatten.rs b/vortex-array/src/array/sparse/flatten.rs index 5273418ed6..78af21a34c 100644 --- a/vortex-array/src/array/sparse/flatten.rs +++ b/vortex-array/src/array/sparse/flatten.rs @@ -1,13 +1,13 @@ use arrow_buffer::BooleanBufferBuilder; use itertools::Itertools; -use vortex_error::VortexResult; +use vortex_dtype::{match_each_native_ptype, NativePType}; +use vortex_error::{VortexError, VortexResult}; use crate::array::primitive::PrimitiveArray; use crate::array::sparse::SparseArray; -use crate::ptype::NativePType; use crate::scalar::Scalar; use crate::validity::Validity; -use crate::{match_each_native_ptype, ArrayFlatten, ArrayTrait, Flattened}; +use crate::{ArrayFlatten, ArrayTrait, Flattened}; impl ArrayFlatten for SparseArray<'_> { fn flatten<'a>(self) -> VortexResult> @@ -32,7 +32,7 @@ impl ArrayFlatten for SparseArray<'_> { } } -fn flatten_sparse_values( +fn flatten_sparse_values TryFrom<&'a Scalar, Error = VortexError>>( values: &[T], indices: &[usize], len: usize, diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index c00e1f403f..2b9e50ac4a 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -1,4 +1,5 @@ use ::serde::{Deserialize, Serialize}; +use vortex_dtype::match_each_integer_ptype; use vortex_error::{vortex_bail, VortexResult}; use crate::array::constant::ConstantArray; @@ -6,7 +7,7 @@ use crate::compute::search_sorted::{search_sorted, SearchSortedSide}; use crate::stats::ArrayStatisticsCompute; use crate::validity::{ArrayValidity, LogicalValidity}; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; -use crate::{impl_encoding, match_each_integer_ptype, ArrayDType, IntoArrayData, ToArrayData}; +use crate::{impl_encoding, ArrayDType, IntoArrayData, ToArrayData}; mod compress; mod compute; diff --git a/vortex-array/src/array/varbin/accessor.rs b/vortex-array/src/array/varbin/accessor.rs index efdf4949e0..118f158d7c 100644 --- a/vortex-array/src/array/varbin/accessor.rs +++ b/vortex-array/src/array/varbin/accessor.rs @@ -1,8 +1,8 @@ +use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexResult; use crate::accessor::ArrayAccessor; use crate::array::varbin::VarBinArray; -use crate::match_each_integer_ptype; use crate::validity::ArrayValidity; impl ArrayAccessor<[u8]> for VarBinArray<'_> { diff --git a/vortex-array/src/array/varbin/builder.rs b/vortex-array/src/array/varbin/builder.rs index 1ebcf3decd..c59309b911 100644 --- a/vortex-array/src/array/varbin/builder.rs +++ b/vortex-array/src/array/varbin/builder.rs @@ -2,10 +2,10 @@ use std::mem; use arrow_buffer::NullBufferBuilder; use vortex_dtype::DType; +use vortex_dtype::NativePType; use crate::array::primitive::PrimitiveArray; use crate::array::varbin::{OwnedVarBinArray, VarBinArray}; -use crate::ptype::NativePType; use crate::validity::Validity; use crate::IntoArray; diff --git a/vortex-array/src/array/varbin/compute/mod.rs b/vortex-array/src/array/varbin/compute/mod.rs index 9ec6e53e2f..49558d9037 100644 --- a/vortex-array/src/array/varbin/compute/mod.rs +++ b/vortex-array/src/array/varbin/compute/mod.rs @@ -5,6 +5,7 @@ use arrow_array::{ }; use itertools::Itertools; use vortex_dtype::DType; +use vortex_dtype::PType; use vortex_error::{vortex_bail, VortexResult}; use crate::array::primitive::PrimitiveArray; @@ -17,7 +18,6 @@ use crate::compute::scalar_at::ScalarAtFn; use crate::compute::slice::SliceFn; use crate::compute::take::TakeFn; use crate::compute::ArrayCompute; -use crate::ptype::PType; use crate::scalar::Scalar; use crate::validity::{ArrayValidity, Validity}; use crate::{Array, ArrayDType, IntoArray, OwnedArray, ToArray}; diff --git a/vortex-array/src/array/varbin/compute/take.rs b/vortex-array/src/array/varbin/compute/take.rs index e352a2d223..4bbffed921 100644 --- a/vortex-array/src/array/varbin/compute/take.rs +++ b/vortex-array/src/array/varbin/compute/take.rs @@ -1,12 +1,12 @@ use arrow_buffer::NullBuffer; +use vortex_dtype::match_each_integer_ptype; use vortex_dtype::DType; +use vortex_dtype::NativePType; use vortex_error::VortexResult; use crate::array::varbin::builder::VarBinBuilder; use crate::array::varbin::{OwnedVarBinArray, VarBinArray}; use crate::compute::take::TakeFn; -use crate::match_each_integer_ptype; -use crate::ptype::NativePType; use crate::validity::Validity; use crate::ArrayDType; use crate::IntoArray; diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index c110e7dc33..4a1389db76 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -1,16 +1,16 @@ use num_traits::AsPrimitive; use serde::{Deserialize, Serialize}; +use vortex_dtype::{match_each_native_ptype, NativePType}; use vortex_dtype::{IntWidth, Nullability, Signedness}; use vortex_error::{vortex_bail, VortexResult}; use crate::array::varbin::builder::VarBinBuilder; use crate::compute::scalar_at::scalar_at; use crate::compute::slice::slice; -use crate::ptype::NativePType; use crate::scalar::{BinaryScalar, Utf8Scalar}; use crate::validity::{Validity, ValidityMetadata}; +use crate::ArrayDType; use crate::{impl_encoding, OwnedArray, ToArrayData}; -use crate::{match_each_native_ptype, ArrayDType}; mod accessor; mod array; @@ -75,7 +75,9 @@ impl VarBinArray<'_> { .expect("missing offsets") } - pub fn first_offset(&self) -> VortexResult { + pub fn first_offset>( + &self, + ) -> VortexResult { scalar_at(&self.offsets(), 0)? .cast(&DType::from(T::PTYPE))? .try_into() diff --git a/vortex-array/src/array/varbinview/compute.rs b/vortex-array/src/array/varbinview/compute.rs index 84ed0aa027..ae5caa1758 100644 --- a/vortex-array/src/array/varbinview/compute.rs +++ b/vortex-array/src/array/varbinview/compute.rs @@ -5,6 +5,7 @@ use arrow_buffer::Buffer as ArrowBuffer; use arrow_buffer::ScalarBuffer; use itertools::Itertools; use vortex_dtype::DType; +use vortex_dtype::PType; use vortex_error::{vortex_bail, VortexResult}; use crate::array::varbin::varbin_scalar; @@ -13,7 +14,6 @@ use crate::compute::as_arrow::AsArrowArray; use crate::compute::scalar_at::ScalarAtFn; use crate::compute::slice::{slice, SliceFn}; use crate::compute::ArrayCompute; -use crate::ptype::PType; use crate::scalar::Scalar; use crate::validity::ArrayValidity; use crate::{ArrayDType, IntoArray, IntoArrayData, OwnedArray}; diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index 1761398069..a620e5c731 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -22,6 +22,7 @@ use arrow_buffer::buffer::{NullBuffer, OffsetBuffer}; use arrow_buffer::{ArrowNativeType, Buffer, ScalarBuffer}; use arrow_schema::{DataType, TimeUnit}; use vortex_dtype::DType; +use vortex_dtype::NativePType; use crate::array::bool::BoolArray; use crate::array::constant::ConstantArray; @@ -31,7 +32,6 @@ use crate::array::r#struct::StructArray; use crate::array::varbin::VarBinArray; use crate::array::varbinview::VarBinViewArray; use crate::arrow::FromArrowArray; -use crate::ptype::NativePType; use crate::scalar::NullScalar; use crate::stats::{Stat, Statistics}; use crate::validity::Validity; diff --git a/vortex-array/src/arrow/dtypes.rs b/vortex-array/src/arrow/dtype.rs similarity index 95% rename from vortex-array/src/arrow/dtypes.rs rename to vortex-array/src/arrow/dtype.rs index 2a63178c2d..133e0afb98 100644 --- a/vortex-array/src/arrow/dtypes.rs +++ b/vortex-array/src/arrow/dtype.rs @@ -3,17 +3,15 @@ use std::sync::Arc; use arrow_schema::TimeUnit as ArrowTimeUnit; use arrow_schema::{DataType, Field, SchemaRef}; use itertools::Itertools; +use vortex_dtype::PType; use vortex_dtype::{DType, FloatWidth, IntWidth, Nullability}; -use vortex_error::{vortex_err, VortexError, VortexResult}; +use vortex_error::{vortex_err, VortexResult}; use crate::array::datetime::{LocalDateTimeExtension, TimeUnit}; -use crate::arrow::FromArrowType; -use crate::ptype::PType; +use crate::arrow::{FromArrowType, TryFromArrowType}; -impl TryFrom<&DataType> for PType { - type Error = VortexError; - - fn try_from(value: &DataType) -> VortexResult { +impl TryFromArrowType<&DataType> for PType { + fn try_from_arrow(value: &DataType) -> VortexResult { match value { DataType::Int8 => Ok(PType::I8), DataType::Int16 => Ok(PType::I16), diff --git a/vortex-array/src/arrow/mod.rs b/vortex-array/src/arrow/mod.rs index 9b5eaaa2a0..926760b553 100644 --- a/vortex-array/src/arrow/mod.rs +++ b/vortex-array/src/arrow/mod.rs @@ -1,5 +1,7 @@ +use vortex_error::VortexResult; + mod array; -mod dtypes; +mod dtype; mod recordbatch; pub mod wrappers; @@ -10,3 +12,7 @@ pub trait FromArrowArray { pub trait FromArrowType: Sized { fn from_arrow(value: T) -> Self; } + +pub trait TryFromArrowType: Sized { + fn try_from_arrow(value: T) -> VortexResult; +} diff --git a/vortex-array/src/arrow/wrappers.rs b/vortex-array/src/arrow/wrappers.rs index 3621eb810a..95af62d2a0 100644 --- a/vortex-array/src/arrow/wrappers.rs +++ b/vortex-array/src/arrow/wrappers.rs @@ -1,13 +1,17 @@ -use arrow_buffer::{Buffer as ArrowBuffer, OffsetBuffer, ScalarBuffer}; +use arrow_buffer::{ArrowNativeType, Buffer as ArrowBuffer, OffsetBuffer, ScalarBuffer}; +use vortex_dtype::NativePType; use crate::array::primitive::PrimitiveArray; -use crate::ptype::NativePType; -pub fn as_scalar_buffer(array: PrimitiveArray<'_>) -> ScalarBuffer { +pub fn as_scalar_buffer( + array: PrimitiveArray<'_>, +) -> ScalarBuffer { assert_eq!(array.ptype(), T::PTYPE); ScalarBuffer::from(ArrowBuffer::from(array.buffer())) } -pub fn as_offset_buffer(array: PrimitiveArray<'_>) -> OffsetBuffer { +pub fn as_offset_buffer( + array: PrimitiveArray<'_>, +) -> OffsetBuffer { OffsetBuffer::new(as_scalar_buffer(array)) } diff --git a/vortex-array/src/buffer.rs b/vortex-array/src/buffer.rs index ceb733f00e..5d0de01394 100644 --- a/vortex-array/src/buffer.rs +++ b/vortex-array/src/buffer.rs @@ -1,6 +1,6 @@ use arrow_buffer::Buffer as ArrowBuffer; +use vortex_dtype::{match_each_native_ptype, NativePType}; -use crate::ptype::NativePType; use crate::ToStatic; #[derive(Debug, Clone)] @@ -32,7 +32,11 @@ impl Buffer<'_> { pub fn typed_data(&self) -> &[T] { match self { - Buffer::Owned(buffer) => buffer.typed_data::(), + Buffer::Owned(buffer) => unsafe { + match_each_native_ptype!(T::PTYPE, |$T| { + std::mem::transmute(buffer.typed_data::<$T>()) + }) + }, Buffer::View(slice) => { // From ArrowBuffer::typed_data let (prefix, offsets, suffix) = unsafe { slice.align_to::() }; @@ -46,7 +50,12 @@ impl Buffer<'_> { impl<'a> Buffer<'a> { pub fn into_vec(self) -> Result, Buffer<'a>> { match self { - Buffer::Owned(buffer) => buffer.into_vec().map_err(Buffer::Owned), + Buffer::Owned(buffer) => match_each_native_ptype!(T::PTYPE, |$T| { + buffer + .into_vec() + .map(|vec| unsafe { std::mem::transmute::, Vec>(vec) }) + .map_err(Buffer::Owned) + }), Buffer::View(_) => Err(self), } } diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index f821516172..fcd242de5a 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -10,7 +10,6 @@ pub mod encoding; mod flatten; mod implementation; mod metadata; -pub mod ptype; mod sampling; pub mod scalar; pub mod stats; diff --git a/vortex-array/src/scalar/mod.rs b/vortex-array/src/scalar/mod.rs index 7c0fbd79a5..b38ea6201c 100644 --- a/vortex-array/src/scalar/mod.rs +++ b/vortex-array/src/scalar/mod.rs @@ -3,17 +3,16 @@ use std::fmt::{Debug, Display, Formatter}; pub use binary::*; pub use bool::*; pub use composite::*; -use half::f16; pub use list::*; pub use null::*; pub use primitive::*; pub use struct_::*; pub use utf8::*; +use vortex_dtype::half::f16; +use vortex_dtype::NativePType; use vortex_dtype::{DType, FloatWidth, IntWidth, Nullability, Signedness}; use vortex_error::VortexResult; -use crate::ptype::NativePType; - mod binary; mod bool; mod composite; @@ -145,22 +144,6 @@ pub trait AsBytes { fn as_bytes(&self) -> &[u8]; } -impl AsBytes for [T] { - #[inline] - fn as_bytes(&self) -> &[u8] { - let raw_ptr = self.as_ptr() as *const u8; - unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of_val(self)) } - } -} - -impl AsBytes for &[T] { - #[inline] - fn as_bytes(&self) -> &[u8] { - let raw_ptr = (*self).as_ptr() as *const u8; - unsafe { std::slice::from_raw_parts(raw_ptr, std::mem::size_of_val(*self)) } - } -} - impl AsBytes for T { #[inline] fn as_bytes(&self) -> &[u8] { diff --git a/vortex-array/src/scalar/primitive.rs b/vortex-array/src/scalar/primitive.rs index 07234edceb..5570c55370 100644 --- a/vortex-array/src/scalar/primitive.rs +++ b/vortex-array/src/scalar/primitive.rs @@ -2,13 +2,16 @@ use std::any; use std::fmt::{Display, Formatter}; use std::mem::size_of; -use half::f16; +use vortex_dtype::half::f16; +use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype}; use vortex_dtype::{DType, Nullability}; +use vortex_dtype::{NativePType, PType}; use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; -use crate::ptype::{NativePType, PType}; use crate::scalar::Scalar; -use crate::{match_each_integer_ptype, match_each_native_ptype}; + +pub trait PScalarType: NativePType + Into + TryFrom {} +impl + TryFrom> PScalarType for T {} #[derive(Debug, Clone, PartialEq, PartialOrd)] pub struct PrimitiveScalar { @@ -19,7 +22,7 @@ pub struct PrimitiveScalar { } impl PrimitiveScalar { - pub fn try_new( + pub fn try_new( value: Option, nullability: Nullability, ) -> VortexResult { @@ -34,15 +37,15 @@ impl PrimitiveScalar { }) } - pub fn nullable(value: Option) -> Self { + pub fn nullable(value: Option) -> Self { Self::try_new(value, Nullability::Nullable).unwrap() } - pub fn some(value: T) -> Self { + pub fn some(value: T) -> Self { Self::try_new::(Some(value), Nullability::default()).unwrap() } - pub fn none() -> Self { + pub fn none() -> Self { Self::try_new::(None, Nullability::Nullable).unwrap() } @@ -51,7 +54,7 @@ impl PrimitiveScalar { self.value } - pub fn typed_value(&self) -> Option { + pub fn typed_value(&self) -> Option { assert_eq!( T::PTYPE, self.ptype, @@ -264,7 +267,7 @@ pscalar!(f16, F16); pscalar!(f32, F32); pscalar!(f64, F64); -impl From> for Scalar { +impl From> for Scalar { fn from(value: Option) -> Self { PrimitiveScalar::nullable(value).into() } @@ -337,10 +340,10 @@ impl Display for PScalar { #[cfg(test)] mod test { + use vortex_dtype::PType; use vortex_dtype::{DType, IntWidth, Nullability, Signedness}; use vortex_error::VortexError; - use crate::ptype::PType; use crate::scalar::Scalar; #[test] diff --git a/vortex-array/src/scalar/serde.rs b/vortex-array/src/scalar/serde.rs index d7f5474bd4..984490562e 100644 --- a/vortex-array/src/scalar/serde.rs +++ b/vortex-array/src/scalar/serde.rs @@ -1,13 +1,13 @@ use flatbuffers::{root, FlatBufferBuilder, WIPOffset}; use serde::de::Visitor; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use vortex_dtype::match_each_native_ptype; +use vortex_dtype::PType; use vortex_dtype::{DTypeSerdeContext, Nullability}; use vortex_error::{vortex_bail, VortexError}; use vortex_flatbuffers::{FlatBufferRoot, FlatBufferToBytes, ReadFlatBuffer, WriteFlatBuffer}; use crate::flatbuffers::scalar as fb; -use crate::match_each_native_ptype; -use crate::ptype::PType; use crate::scalar::{PScalar, PrimitiveScalar, Scalar, Utf8Scalar}; impl FlatBufferRoot for Scalar {} diff --git a/vortex-array/src/stats.rs b/vortex-array/src/stats.rs index 5d9ae9ed76..e4a3ffa306 100644 --- a/vortex-array/src/stats.rs +++ b/vortex-array/src/stats.rs @@ -1,9 +1,9 @@ use std::collections::HashMap; use vortex_dtype::DType; +use vortex_dtype::NativePType; use vortex_error::VortexResult; -use crate::ptype::NativePType; use crate::scalar::Scalar; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -45,7 +45,7 @@ impl dyn Statistics + '_ { self.compute(stat).and_then(|s| T::try_from(s).ok()) } - pub fn compute_as_cast(&self, stat: Stat) -> Option { + pub fn compute_as_cast>(&self, stat: Stat) -> Option { self.compute(stat) .and_then(|s| s.cast(&DType::from(T::PTYPE)).ok()) .and_then(|s| T::try_from(s).ok()) diff --git a/vortex-datetime-parts/src/compress.rs b/vortex-datetime-parts/src/compress.rs index c949791a1a..35d56ccc0b 100644 --- a/vortex-datetime-parts/src/compress.rs +++ b/vortex-datetime-parts/src/compress.rs @@ -3,8 +3,8 @@ use vortex::array::datetime::{LocalDateTimeArray, LocalDateTimeExtension, TimeUn use vortex::array::primitive::PrimitiveArray; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; use vortex::compute::cast::cast; -use vortex::ptype::PType; use vortex::{Array, ArrayDType, ArrayDef, ArrayTrait, IntoArray, OwnedArray}; +use vortex_dtype::PType; use vortex_error::VortexResult; use crate::{DateTimePartsArray, DateTimePartsEncoding}; diff --git a/vortex-dict/Cargo.toml b/vortex-dict/Cargo.toml index d7554d53c0..e2f2dd1c2e 100644 --- a/vortex-dict/Cargo.toml +++ b/vortex-dict/Cargo.toml @@ -13,7 +13,6 @@ rust-version = { workspace = true } [dependencies] ahash = { workspace = true } -half = { workspace = true } hashbrown = { workspace = true } linkme = { workspace = true } log = { workspace = true } diff --git a/vortex-dict/benches/dict_compress.rs b/vortex-dict/benches/dict_compress.rs index 872dc71bf4..ceceb4c2d7 100644 --- a/vortex-dict/benches/dict_compress.rs +++ b/vortex-dict/benches/dict_compress.rs @@ -4,8 +4,9 @@ use rand::prelude::SliceRandom; use rand::{thread_rng, Rng}; use vortex::array::primitive::PrimitiveArray; use vortex::array::varbin::VarBinArray; -use vortex::{match_each_native_ptype, ArrayTrait}; +use vortex::ArrayTrait; use vortex_dict::dict_encode_typed_primitive; +use vortex_dtype::match_each_native_ptype; fn gen_primitive_dict<'a>(len: usize, uniqueness: f64) -> PrimitiveArray<'a> { let mut rng = thread_rng(); diff --git a/vortex-dict/src/compress.rs b/vortex-dict/src/compress.rs index 4fcbbc4f9d..b4ed6d5033 100644 --- a/vortex-dict/src/compress.rs +++ b/vortex-dict/src/compress.rs @@ -8,14 +8,12 @@ use vortex::accessor::ArrayAccessor; use vortex::array::primitive::{Primitive, PrimitiveArray}; use vortex::array::varbin::{VarBin, VarBinArray}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::ptype::NativePType; use vortex::scalar::AsBytes; use vortex::stats::{ArrayStatistics, Stat}; use vortex::validity::Validity; -use vortex::{ - match_each_native_ptype, Array, ArrayDType, ArrayDef, IntoArray, OwnedArray, ToArray, -}; -use vortex_dtype::DType; +use vortex::{Array, ArrayDType, ArrayDef, IntoArray, OwnedArray, ToArray}; +use vortex_dtype::NativePType; +use vortex_dtype::{match_each_native_ptype, DType}; use vortex_error::VortexResult; use crate::dict::{DictArray, DictEncoding}; diff --git a/vortex-dict/src/dict.rs b/vortex-dict/src/dict.rs index 8844a3c0b3..ec85706e8b 100644 --- a/vortex-dict/src/dict.rs +++ b/vortex-dict/src/dict.rs @@ -6,8 +6,8 @@ use vortex::compute::take::take; use vortex::validity::{ArrayValidity, LogicalValidity}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::IntoArrayData; -use vortex::{impl_encoding, match_each_integer_ptype, ArrayDType, ArrayFlatten, ToArrayData}; -use vortex_dtype::Signedness; +use vortex::{impl_encoding, ArrayDType, ArrayFlatten, ToArrayData}; +use vortex_dtype::{match_each_integer_ptype, Signedness}; use vortex_error::{vortex_bail, VortexResult}; impl_encoding!("vortex.dict", Dict); diff --git a/vortex-dtype/Cargo.toml b/vortex-dtype/Cargo.toml index fc01da2a11..e129426438 100644 --- a/vortex-dtype/Cargo.toml +++ b/vortex-dtype/Cargo.toml @@ -16,10 +16,11 @@ name = "vortex_dtype" path = "src/lib.rs" [dependencies] -arrow-schema = { workspace = true } flatbuffers = { workspace = true } +half = { workspace = true } itertools = { workspace = true } linkme = { workspace = true } +num-traits = { workspace = true } serde = { workspace = true, optional = true } thiserror = { workspace = true } vortex-error = { path = "../vortex-error" } diff --git a/vortex-dtype/src/lib.rs b/vortex-dtype/src/lib.rs index 1b2698ea83..b7909b6ed3 100644 --- a/vortex-dtype/src/lib.rs +++ b/vortex-dtype/src/lib.rs @@ -1,9 +1,11 @@ use std::fmt::{Display, Formatter}; pub use dtype::*; - +pub use half; +pub use ptype::*; mod deserialize; mod dtype; +mod ptype; mod serde; mod serialize; diff --git a/vortex-array/src/ptype.rs b/vortex-dtype/src/ptype.rs similarity index 80% rename from vortex-array/src/ptype.rs rename to vortex-dtype/src/ptype.rs index 71c02de223..59a67d7c22 100644 --- a/vortex-array/src/ptype.rs +++ b/vortex-dtype/src/ptype.rs @@ -1,19 +1,15 @@ use std::fmt::{Debug, Display, Formatter}; use std::panic::RefUnwindSafe; -use arrow_array::types::*; -use arrow_buffer::ArrowNativeType; -use half::f16; -use num_traits::{Num, NumCast}; -use serde::{Deserialize, Serialize}; -use vortex_dtype::DType::*; -use vortex_dtype::{DType, FloatWidth, IntWidth}; +use num_traits::{FromPrimitive, Num, NumCast}; use vortex_error::{vortex_err, VortexError, VortexResult}; -use crate::scalar::{PScalar, Scalar}; +use crate::half::f16; +use crate::DType::*; +use crate::{DType, FloatWidth, IntWidth}; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash)] pub enum PType { U8, U16, @@ -39,15 +35,10 @@ pub trait NativePType: + PartialEq + PartialOrd + Default - + ArrowNativeType + RefUnwindSafe + Num + NumCast - + Into - + TryFrom - + for<'a> TryFrom<&'a Scalar, Error = VortexError> - + Into - + TryFrom + + FromPrimitive { const PTYPE: PType; } @@ -72,36 +63,12 @@ native_ptype!(f16, F16); native_ptype!(f32, F32); native_ptype!(f64, F64); -pub trait AsArrowPrimitiveType { - type ArrowType: ArrowPrimitiveType; -} - -macro_rules! impl_as_arrow_primitive_type { - ($T:ty, $A:ty) => { - impl AsArrowPrimitiveType for $T { - type ArrowType = $A; - } - }; -} - -impl_as_arrow_primitive_type!(u8, UInt8Type); -impl_as_arrow_primitive_type!(u16, UInt16Type); -impl_as_arrow_primitive_type!(u32, UInt32Type); -impl_as_arrow_primitive_type!(u64, UInt64Type); -impl_as_arrow_primitive_type!(i8, Int8Type); -impl_as_arrow_primitive_type!(i16, Int16Type); -impl_as_arrow_primitive_type!(i32, Int32Type); -impl_as_arrow_primitive_type!(i64, Int64Type); -impl_as_arrow_primitive_type!(f16, Float16Type); -impl_as_arrow_primitive_type!(f32, Float32Type); -impl_as_arrow_primitive_type!(f64, Float64Type); - #[macro_export] macro_rules! match_each_native_ptype { ($self:expr, | $_:tt $enc:ident | $($body:tt)*) => ({ macro_rules! __with__ {( $_ $enc:ident ) => ( $($body)* )} - use $crate::ptype::PType; - use half::f16; + use $crate::PType; + use $crate::half::f16; match $self { PType::I8 => __with__! { i8 }, PType::I16 => __with__! { i16 }, @@ -117,13 +84,12 @@ macro_rules! match_each_native_ptype { } }) } -pub use match_each_native_ptype; #[macro_export] macro_rules! match_each_integer_ptype { ($self:expr, | $_:tt $enc:ident | $($body:tt)*) => ({ macro_rules! __with__ {( $_ $enc:ident ) => ( $($body)* )} - use $crate::ptype::PType; + use $crate::PType; match $self { PType::I8 => __with__! { i8 }, PType::I16 => __with__! { i16 }, @@ -137,7 +103,6 @@ macro_rules! match_each_integer_ptype { } }) } -pub use match_each_integer_ptype; impl PType { pub const fn is_unsigned_int(self) -> bool { @@ -207,7 +172,7 @@ impl TryFrom<&DType> for PType { type Error = VortexError; fn try_from(value: &DType) -> VortexResult { - use vortex_dtype::Signedness::*; + use crate::Signedness::*; match value { Int(w, s, _) => match (w, s) { (IntWidth::_8, Signed) => Ok(PType::I8), @@ -231,8 +196,8 @@ impl TryFrom<&DType> for PType { impl From for &DType { fn from(item: PType) -> Self { - use vortex_dtype::Nullability::*; - use vortex_dtype::Signedness::*; + use crate::Nullability::*; + use crate::Signedness::*; match item { PType::I8 => &Int(IntWidth::_8, Signed, NonNullable), @@ -252,8 +217,8 @@ impl From for &DType { impl From for DType { fn from(item: PType) -> Self { - use vortex_dtype::Nullability::*; - use vortex_dtype::Signedness::*; + use crate::Nullability::*; + use crate::Signedness::*; match item { PType::I8 => Int(IntWidth::_8, Signed, NonNullable), diff --git a/vortex-fastlanes/src/bitpacking/compress.rs b/vortex-fastlanes/src/bitpacking/compress.rs index 6f1e61d044..59136a127f 100644 --- a/vortex-fastlanes/src/bitpacking/compress.rs +++ b/vortex-fastlanes/src/bitpacking/compress.rs @@ -4,15 +4,12 @@ use vortex::array::primitive::PrimitiveArray; use vortex::array::sparse::{Sparse, SparseArray}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; use vortex::compute::cast::cast; -use vortex::ptype::PType::U8; -use vortex::ptype::{NativePType, PType}; use vortex::scalar::{ListScalarVec, Scalar}; use vortex::stats::{ArrayStatistics, Stat}; use vortex::validity::Validity; -use vortex::{ - match_each_integer_ptype, Array, ArrayDType, ArrayDef, ArrayTrait, IntoArray, OwnedArray, - ToStatic, -}; +use vortex::{Array, ArrayDType, ArrayDef, ArrayTrait, IntoArray, OwnedArray, ToStatic}; +use vortex_dtype::PType::U8; +use vortex_dtype::{match_each_integer_ptype, NativePType, PType}; use vortex_error::{vortex_bail, vortex_err, VortexResult}; use crate::{match_integers_by_width, BitPackedArray, BitPackedEncoding}; diff --git a/vortex-fastlanes/src/bitpacking/compute/mod.rs b/vortex-fastlanes/src/bitpacking/compute/mod.rs index cf26e0c7e7..407d58bf02 100644 --- a/vortex-fastlanes/src/bitpacking/compute/mod.rs +++ b/vortex-fastlanes/src/bitpacking/compute/mod.rs @@ -9,9 +9,9 @@ use vortex::compute::scalar_at::{scalar_at, ScalarAtFn}; use vortex::compute::slice::{slice, SliceFn}; use vortex::compute::take::{take, TakeFn}; use vortex::compute::ArrayCompute; -use vortex::ptype::NativePType; use vortex::scalar::Scalar; -use vortex::{match_each_integer_ptype, Array, ArrayDType, ArrayTrait, IntoArray, OwnedArray}; +use vortex::{Array, ArrayDType, ArrayTrait, IntoArray, OwnedArray}; +use vortex_dtype::{match_each_integer_ptype, NativePType}; use vortex_error::{vortex_bail, vortex_err, VortexResult}; use crate::bitpacking::compress::unpack_single; diff --git a/vortex-fastlanes/src/bitpacking/mod.rs b/vortex-fastlanes/src/bitpacking/mod.rs index 6927b69b86..a362670bb3 100644 --- a/vortex-fastlanes/src/bitpacking/mod.rs +++ b/vortex-fastlanes/src/bitpacking/mod.rs @@ -182,7 +182,7 @@ impl ArrayTrait for BitPackedArray<'_> { macro_rules! match_integers_by_width { ($self:expr, | $_:tt $enc:ident | $($body:tt)*) => ({ macro_rules! __with__ {( $_ $enc:ident ) => ( $($body)* )} - use vortex::ptype::PType; + use vortex_dtype::PType; use vortex_error::vortex_bail; match $self { PType::I8 | PType::U8 => __with__! { u8 }, diff --git a/vortex-fastlanes/src/delta/compress.rs b/vortex-fastlanes/src/delta/compress.rs index a205a5af3e..1cd6e32705 100644 --- a/vortex-fastlanes/src/delta/compress.rs +++ b/vortex-fastlanes/src/delta/compress.rs @@ -6,10 +6,10 @@ use num_traits::{WrappingAdd, WrappingSub}; use vortex::array::primitive::PrimitiveArray; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; use vortex::compute::fill::fill_forward; -use vortex::ptype::NativePType; use vortex::validity::Validity; -use vortex::{match_each_integer_ptype, Array, IntoArray, OwnedArray}; +use vortex::{Array, IntoArray, OwnedArray}; use vortex_dtype::Nullability; +use vortex_dtype::{match_each_integer_ptype, NativePType}; use vortex_error::VortexResult; use crate::{DeltaArray, DeltaEncoding}; diff --git a/vortex-fastlanes/src/delta/mod.rs b/vortex-fastlanes/src/delta/mod.rs index 7dd25a0cb0..c796c5d430 100644 --- a/vortex-fastlanes/src/delta/mod.rs +++ b/vortex-fastlanes/src/delta/mod.rs @@ -3,7 +3,8 @@ use vortex::stats::ArrayStatisticsCompute; use vortex::validity::ValidityMetadata; use vortex::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; -use vortex::{impl_encoding, match_each_integer_ptype, ArrayDType, ArrayFlatten, IntoArrayData}; +use vortex::{impl_encoding, ArrayDType, ArrayFlatten, IntoArrayData}; +use vortex_dtype::match_each_integer_ptype; use vortex_error::{vortex_bail, VortexResult}; use crate::delta::compress::decompress; diff --git a/vortex-fastlanes/src/for/compress.rs b/vortex-fastlanes/src/for/compress.rs index f7049efc45..dfc02cfda9 100644 --- a/vortex-fastlanes/src/for/compress.rs +++ b/vortex-fastlanes/src/for/compress.rs @@ -3,10 +3,10 @@ use num_traits::{PrimInt, WrappingAdd, WrappingSub}; use vortex::array::constant::ConstantArray; use vortex::array::primitive::PrimitiveArray; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::ptype::{NativePType, PType}; use vortex::scalar::ListScalarVec; use vortex::stats::{ArrayStatistics, Stat}; -use vortex::{match_each_integer_ptype, Array, ArrayDType, ArrayTrait, IntoArray, OwnedArray}; +use vortex::{Array, ArrayDType, ArrayTrait, IntoArray, OwnedArray}; +use vortex_dtype::{match_each_integer_ptype, NativePType, PType}; use vortex_error::{vortex_err, VortexResult}; use crate::{FoRArray, FoREncoding}; diff --git a/vortex-fastlanes/src/for/compute.rs b/vortex-fastlanes/src/for/compute.rs index 35080b0726..5025409422 100644 --- a/vortex-fastlanes/src/for/compute.rs +++ b/vortex-fastlanes/src/for/compute.rs @@ -3,7 +3,8 @@ use vortex::compute::slice::{slice, SliceFn}; use vortex::compute::take::{take, TakeFn}; use vortex::compute::ArrayCompute; use vortex::scalar::{PrimitiveScalar, Scalar}; -use vortex::{match_each_integer_ptype, Array, IntoArray, OwnedArray}; +use vortex::{Array, IntoArray, OwnedArray}; +use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexResult; use crate::FoRArray; diff --git a/vortex-ipc/src/reader.rs b/vortex-ipc/src/reader.rs index a9d0fdb077..b8379613a1 100644 --- a/vortex-ipc/src/reader.rs +++ b/vortex-ipc/src/reader.rs @@ -15,10 +15,9 @@ use vortex::compute::slice::slice; use vortex::compute::take::take; use vortex::stats::{ArrayStatistics, Stat}; use vortex::{ - match_each_integer_ptype, Array, ArrayDType, ArrayView, IntoArray, OwnedArray, SerdeContext, - ToArray, ToStatic, + Array, ArrayDType, ArrayView, IntoArray, OwnedArray, SerdeContext, ToArray, ToStatic, }; -use vortex_dtype::{DType, DTypeSerdeContext, Signedness}; +use vortex_dtype::{match_each_integer_ptype, DType, DTypeSerdeContext, Signedness}; use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; use vortex_flatbuffers::ReadFlatBuffer; @@ -376,9 +375,9 @@ mod tests { use vortex::array::chunked::{Chunked, ChunkedArray, ChunkedEncoding}; use vortex::array::primitive::{Primitive, PrimitiveArray, PrimitiveEncoding}; use vortex::encoding::{ArrayEncoding, EncodingId}; - use vortex::ptype::NativePType; use vortex::{Array, ArrayDType, ArrayDef, IntoArray, OwnedArray, SerdeContext}; use vortex_alp::{ALPArray, ALPEncoding}; + use vortex_dtype::NativePType; use vortex_error::VortexResult; use vortex_fastlanes::BitPackedArray; diff --git a/vortex-ree/Cargo.toml b/vortex-ree/Cargo.toml index d83fcab7f3..1184fd8327 100644 --- a/vortex-ree/Cargo.toml +++ b/vortex-ree/Cargo.toml @@ -14,7 +14,6 @@ rust-version = { workspace = true } [dependencies] arrow-array = { workspace = true } arrow-buffer = { workspace = true } -half = { workspace = true } itertools = { workspace = true } linkme = { workspace = true } num-traits = { workspace = true } diff --git a/vortex-ree/src/compress.rs b/vortex-ree/src/compress.rs index 7615377c25..77d83eed95 100644 --- a/vortex-ree/src/compress.rs +++ b/vortex-ree/src/compress.rs @@ -1,16 +1,16 @@ use std::cmp::min; use itertools::Itertools; -use num_traits::AsPrimitive; +use num_traits::{AsPrimitive, FromPrimitive}; use vortex::array::primitive::{Primitive, PrimitiveArray}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::ptype::{match_each_native_ptype, NativePType}; use vortex::stats::{ArrayStatistics, Stat}; use vortex::validity::Validity; use vortex::ArrayDType; use vortex::ArrayTrait; -use vortex::{match_each_integer_ptype, Array, ArrayDef, IntoArray, OwnedArray}; +use vortex::{Array, ArrayDef, IntoArray, OwnedArray}; use vortex_dtype::Nullability; +use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType}; use vortex_error::VortexResult; use crate::{REEArray, REEEncoding}; @@ -138,7 +138,10 @@ pub fn ree_decode<'a>( }) } -pub fn ree_decode_primitive + Ord, T: NativePType>( +pub fn ree_decode_primitive< + E: NativePType + AsPrimitive + FromPrimitive + Ord, + T: NativePType, +>( run_ends: &[E], values: &[T], offset: usize, diff --git a/vortex-ree/src/compute.rs b/vortex-ree/src/compute.rs index 034598f7bf..2e701d11f6 100644 --- a/vortex-ree/src/compute.rs +++ b/vortex-ree/src/compute.rs @@ -4,7 +4,8 @@ use vortex::compute::slice::{slice, SliceFn}; use vortex::compute::take::{take, TakeFn}; use vortex::compute::ArrayCompute; use vortex::scalar::Scalar; -use vortex::{match_each_integer_ptype, Array, IntoArray, OwnedArray}; +use vortex::{Array, IntoArray, OwnedArray}; +use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexResult; use crate::REEArray; diff --git a/vortex-roaring/src/boolean/mod.rs b/vortex-roaring/src/boolean/mod.rs index aa757b7261..791ba2f1a0 100644 --- a/vortex-roaring/src/boolean/mod.rs +++ b/vortex-roaring/src/boolean/mod.rs @@ -5,7 +5,6 @@ use croaring::{Bitmap, Portable}; use serde::{Deserialize, Serialize}; use vortex::array::bool::{Bool, BoolArray}; use vortex::buffer::Buffer; -use vortex::scalar::AsBytes; use vortex::stats::ArrayStatisticsCompute; use vortex::validity::{ArrayValidity, LogicalValidity, Validity}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; @@ -100,7 +99,7 @@ impl ArrayFlatten for RoaringBoolArray<'_> { .to_bitset() .ok_or(vortex_err!("Failed to convert RoaringBitmap to Bitset"))?; - let bytes = &bitset.as_slice().as_bytes()[0..bitset.size_in_bytes()]; + let bytes = &bitset.as_slice()[0..bitset.size_in_bytes()]; let buffer = ArrowBuffer::from_slice_ref(bytes); Ok(Flattened::Bool(BoolArray::try_new( BooleanBuffer::new(buffer, 0, bitset.size_in_bits()), diff --git a/vortex-roaring/src/integer/compress.rs b/vortex-roaring/src/integer/compress.rs index 9b16c3d99e..02f15ebace 100644 --- a/vortex-roaring/src/integer/compress.rs +++ b/vortex-roaring/src/integer/compress.rs @@ -3,12 +3,12 @@ use log::debug; use num_traits::NumCast; use vortex::array::primitive::PrimitiveArray; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::ptype::{NativePType, PType}; use vortex::stats::{ArrayStatistics, Stat}; use vortex::{Array, ArrayDType, ArrayDef, IntoArray, OwnedArray, ToStatic}; use vortex_dtype::DType; use vortex_dtype::Nullability::NonNullable; use vortex_dtype::Signedness::Unsigned; +use vortex_dtype::{NativePType, PType}; use vortex_error::VortexResult; use crate::{OwnedRoaringIntArray, RoaringInt, RoaringIntArray, RoaringIntEncoding}; diff --git a/vortex-roaring/src/integer/compute.rs b/vortex-roaring/src/integer/compute.rs index 69d87863c5..fe3855ae2a 100644 --- a/vortex-roaring/src/integer/compute.rs +++ b/vortex-roaring/src/integer/compute.rs @@ -1,7 +1,7 @@ use vortex::compute::scalar_at::ScalarAtFn; use vortex::compute::ArrayCompute; -use vortex::ptype::PType; use vortex::scalar::Scalar; +use vortex_dtype::PType; use vortex_error::VortexResult; use crate::RoaringIntArray; diff --git a/vortex-roaring/src/integer/mod.rs b/vortex-roaring/src/integer/mod.rs index 98ddd57f31..5cbf483e7f 100644 --- a/vortex-roaring/src/integer/mod.rs +++ b/vortex-roaring/src/integer/mod.rs @@ -3,12 +3,12 @@ use croaring::{Bitmap, Portable}; use serde::{Deserialize, Serialize}; use vortex::array::primitive::{Primitive, PrimitiveArray}; use vortex::buffer::Buffer; -use vortex::ptype::PType; use vortex::stats::ArrayStatisticsCompute; use vortex::validity::{ArrayValidity, LogicalValidity}; use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor}; use vortex::{impl_encoding, ArrayFlatten, OwnedArray}; use vortex_dtype::Nullability::NonNullable; +use vortex_dtype::PType; use vortex_error::{vortex_bail, vortex_err, VortexResult}; mod compress; diff --git a/vortex-zigzag/src/compress.rs b/vortex-zigzag/src/compress.rs index f29d99e254..79688de135 100644 --- a/vortex-zigzag/src/compress.rs +++ b/vortex-zigzag/src/compress.rs @@ -1,10 +1,10 @@ use vortex::array::primitive::PrimitiveArray; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::ptype::{NativePType, PType}; use vortex::stats::{ArrayStatistics, Stat}; use vortex::validity::Validity; use vortex::{Array, IntoArray, OwnedArray}; use vortex_alloc::{AlignedVec, ALIGNED_ALLOCATOR}; +use vortex_dtype::{NativePType, PType}; use vortex_error::VortexResult; use zigzag::ZigZag as ExternalZigZag;