From db107208512a0cd4341a479d6e83d79f910edeb6 Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Tue, 7 May 2024 09:12:07 -0400 Subject: [PATCH] review 2/n: favor concrete types in flatbuffer stats table --- vortex-array/flatbuffers/array.fbs | 16 ++++---- vortex-array/src/view.rs | 65 +++++------------------------- vortex-ipc/src/messages.rs | 46 +++++---------------- 3 files changed, 28 insertions(+), 99 deletions(-) diff --git a/vortex-array/flatbuffers/array.fbs b/vortex-array/flatbuffers/array.fbs index 768f271e72..c309cd525c 100644 --- a/vortex-array/flatbuffers/array.fbs +++ b/vortex-array/flatbuffers/array.fbs @@ -18,14 +18,14 @@ table Array { table ArrayStats { min: vortex.scalar.Scalar; max: vortex.scalar.Scalar; - is_sorted: vortex.scalar.Scalar; - is_strict_sorted: vortex.scalar.Scalar; - is_constant: vortex.scalar.Scalar; - run_count: vortex.scalar.Scalar; - true_count: vortex.scalar.Scalar; - null_count: vortex.scalar.Scalar; - bit_width_freq: [vortex.scalar.Scalar]; - trailing_zero_freq: [vortex.scalar.Scalar]; + is_sorted: bool = null; + is_strict_sorted: bool = null; + is_constant: bool = null; + run_count: uint64 = null; + true_count: uint64 = null; + null_count: uint64 = null; + bit_width_freq: [uint64]; + trailing_zero_freq: [uint64]; } diff --git a/vortex-array/src/view.rs b/vortex-array/src/view.rs index b2a694c8de..36c95e58c3 100644 --- a/vortex-array/src/view.rs +++ b/vortex-array/src/view.rs @@ -8,9 +8,9 @@ use vortex_dtype::flatbuffers::PType; use vortex_dtype::half::f16; use vortex_dtype::{DType, Nullability}; use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult}; -use vortex_scalar::flatbuffers as fbs; +use vortex_scalar::Scalar; use vortex_scalar::Scalar::List; -use vortex_scalar::{ListScalar, Scalar}; +use vortex_scalar::{flatbuffers as fbs, ListScalar}; use crate::encoding::{EncodingId, EncodingRef}; use crate::flatbuffers as fb; @@ -149,24 +149,6 @@ impl<'v> ArrayView<'v> { impl Statistics for ArrayView<'_> { fn get(&self, stat: Stat) -> Option { match stat { - Stat::IsConstant => { - let is_constant = self.array.stats()?.is_constant(); - is_constant - .and_then(|v| v.type__as_bool()) - .map(|v| v.value().into()) - } - Stat::IsSorted => self - .array - .stats()? - .is_sorted() - .and_then(|v| v.type__as_bool()) - .map(|v| v.value().into()), - Stat::IsStrictSorted => self - .array - .stats()? - .is_strict_sorted() - .and_then(|v| v.type__as_bool()) - .map(|v| v.value().into()), Stat::Max => { let max = self.array.stats()?.max(); max.and_then(|v| v.type__as_primitive()) @@ -177,35 +159,17 @@ impl Statistics for ArrayView<'_> { min.and_then(|v| v.type__as_primitive()) .and_then(primitive_to_scalar) } - Stat::RunCount => { - let rc = self.array.stats()?.run_count(); - rc.and_then(|v| v.type__as_primitive()) - .and_then(primitive_to_scalar) - } - Stat::TrueCount => { - let tc = self.array.stats()?.true_count(); - tc.and_then(|v| v.type__as_primitive()) - .and_then(primitive_to_scalar) - } - Stat::NullCount => { - let nc = self.array.stats()?.null_count(); - nc.and_then(|v| v.type__as_primitive()) - .and_then(primitive_to_scalar) - } + Stat::IsConstant => self.array.stats()?.is_constant().map(bool::into), + Stat::IsSorted => self.array.stats()?.is_sorted().map(bool::into), + Stat::IsStrictSorted => self.array.stats()?.is_strict_sorted().map(bool::into), + Stat::RunCount => self.array.stats()?.run_count().map(u64::into), + Stat::TrueCount => self.array.stats()?.true_count().map(u64::into), + Stat::NullCount => self.array.stats()?.null_count().map(u64::into), Stat::BitWidthFreq => self .array .stats()? .bit_width_freq() - .map(|v| { - v.iter() - .flat_map(|v| { - primitive_to_scalar( - v.type__as_primitive() - .expect("Should only ever produce primitives"), - ) - }) - .collect_vec() - }) + .map(|v| v.iter().map(u64::into).collect_vec()) .map(|v| { List(ListScalar::new( DType::Primitive(vortex_dtype::PType::U64, Nullability::NonNullable), @@ -216,16 +180,7 @@ impl Statistics for ArrayView<'_> { .array .stats()? .trailing_zero_freq() - .map(|v| { - v.iter() - .flat_map(|v| { - primitive_to_scalar( - v.type__as_primitive() - .expect("Should only ever produce primitives"), - ) - }) - .collect_vec() - }) + .map(|v| v.iter().map(u64::into).collect_vec()) .map(|v| { List(ListScalar::new( DType::Primitive(vortex_dtype::PType::U64, Nullability::NonNullable), diff --git a/vortex-ipc/src/messages.rs b/vortex-ipc/src/messages.rs index 2c886a7aeb..ba313d49ec 100644 --- a/vortex-ipc/src/messages.rs +++ b/vortex-ipc/src/messages.rs @@ -5,8 +5,8 @@ use vortex::{ArrayData, Context, ViewContext}; use vortex_dtype::{match_each_native_ptype, DType}; use vortex_error::{vortex_err, VortexError}; use vortex_flatbuffers::{FlatBufferRoot, WriteFlatBuffer}; -use vortex_scalar::Scalar::{Bool, Primitive}; -use vortex_scalar::{BoolScalar, PrimitiveScalar}; +use vortex_scalar::PrimitiveScalar; +use vortex_scalar::Scalar::Primitive; use crate::flatbuffers::ipc as fbi; use crate::flatbuffers::ipc::Compression; @@ -233,64 +233,38 @@ fn compute_and_build_stats<'a>( }) }); - let is_constant = array - .statistics() - .compute_is_constant() - .ok() - .map(|v| Bool(BoolScalar::some(v)).write_flatbuffer(fbb)); - let is_sorted = array - .statistics() - .compute_is_sorted() - .ok() - .map(|v| Bool(BoolScalar::some(v)).write_flatbuffer(fbb)); - let is_strict_sorted = array - .statistics() - .compute_is_strict_sorted() - .ok() - .map(|v| Bool(BoolScalar::some(v)).write_flatbuffer(fbb)); + let is_constant = array.statistics().compute_is_constant().ok(); + let is_sorted = array.statistics().compute_is_sorted().ok(); + let is_strict_sorted = array.statistics().compute_is_strict_sorted().ok(); let run_count = array .statistics() .compute_run_count() .ok() - .map(|v| Primitive(PrimitiveScalar::some(v as u64)).write_flatbuffer(fbb)); + .map(|v| v as u64); let true_count = array .statistics() .compute_true_count() .ok() - .map(|v| Primitive(PrimitiveScalar::some(v as u64)).write_flatbuffer(fbb)); + .map(|v| v as u64); let null_count = array .statistics() .compute_null_count() .ok() - .map(|v| Primitive(PrimitiveScalar::some(v as u64)).write_flatbuffer(fbb)); + .map(|v| v as u64); let bit_width_freq = array .statistics() .compute_bit_width_freq() .ok() - .map(|v| { - v.iter() - .map(|&inner| inner as u64) - .map(PrimitiveScalar::some) - .map(Primitive) - .map(|v| v.write_flatbuffer(fbb)) - .collect_vec() - }) + .map(|v| v.iter().map(|&inner| inner as u64).collect_vec()) .map(|v| fbb.create_vector(v.as_slice())); let trailing_zero_freq = array .statistics() .compute_trailing_zero_freq() .ok() - .map(|v| { - v.iter() - .map(|&inner| inner as u64) - .map(PrimitiveScalar::some) - .map(Primitive) - .map(|v| v.write_flatbuffer(fbb)) - .collect_vec() - }) + .map(|v| v.iter().map(|&inner| inner as u64).collect_vec()) .map(|v| fbb.create_vector(v.as_slice())); let stat_args = &fb::ArrayStatsArgs {