Skip to content

Commit

Permalink
review 2/n: favor concrete types in flatbuffer stats table
Browse files Browse the repository at this point in the history
  • Loading branch information
jdcasale committed May 7, 2024
1 parent 388343b commit 9c4e336
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 99 deletions.
16 changes: 8 additions & 8 deletions vortex-array/flatbuffers/array.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ table Array {
table ArrayStats {
min: vortex.scalar.Scalar;
max: vortex.scalar.Scalar;
is_sorted: vortex.scalar.Scalar;
is_strict_sorted: vortex.scalar.Scalar;
is_constant: vortex.scalar.Scalar;
run_count: vortex.scalar.Scalar;
true_count: vortex.scalar.Scalar;
null_count: vortex.scalar.Scalar;
bit_width_freq: [vortex.scalar.Scalar];
trailing_zero_freq: [vortex.scalar.Scalar];
is_sorted: bool = null;
is_strict_sorted: bool = null;
is_constant: bool = null;
run_count: uint64 = null;
true_count: uint64 = null;
null_count: uint64 = null;
bit_width_freq: [uint64];
trailing_zero_freq: [uint64];
}


Expand Down
66 changes: 11 additions & 55 deletions vortex-array/src/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ use vortex_dtype::flatbuffers::PType;
use vortex_dtype::half::f16;
use vortex_dtype::{DType, Nullability};
use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult};
use vortex_scalar::flatbuffers as fbs;
use vortex_scalar::Scalar;
use vortex_scalar::Scalar::List;
use vortex_scalar::{ListScalar, Scalar};
use vortex_scalar::{flatbuffers as fbs, ListScalar};

use crate::encoding::{EncodingId, EncodingRef};
use crate::flatbuffers as fb;
Expand Down Expand Up @@ -149,24 +149,6 @@ impl<'v> ArrayView<'v> {
impl Statistics for ArrayView<'_> {
fn get(&self, stat: Stat) -> Option<Scalar> {
match stat {
Stat::IsConstant => {
let is_constant = self.array.stats()?.is_constant();
is_constant
.and_then(|v| v.type__as_bool())
.map(|v| v.value().into())
}
Stat::IsSorted => self
.array
.stats()?
.is_sorted()
.and_then(|v| v.type__as_bool())
.map(|v| v.value().into()),
Stat::IsStrictSorted => self
.array
.stats()?
.is_strict_sorted()
.and_then(|v| v.type__as_bool())
.map(|v| v.value().into()),
Stat::Max => {
let max = self.array.stats()?.max();
max.and_then(|v| v.type__as_primitive())
Expand All @@ -177,35 +159,17 @@ impl Statistics for ArrayView<'_> {
min.and_then(|v| v.type__as_primitive())
.and_then(primitive_to_scalar)
}
Stat::RunCount => {
let rc = self.array.stats()?.run_count();
rc.and_then(|v| v.type__as_primitive())
.and_then(primitive_to_scalar)
}
Stat::TrueCount => {
let tc = self.array.stats()?.true_count();
tc.and_then(|v| v.type__as_primitive())
.and_then(primitive_to_scalar)
}
Stat::NullCount => {
let nc = self.array.stats()?.null_count();
nc.and_then(|v| v.type__as_primitive())
.and_then(primitive_to_scalar)
}
Stat::IsConstant => self.array.stats()?.is_constant().map(bool::into),
Stat::IsSorted => self.array.stats()?.is_sorted().map(bool::into),
Stat::IsStrictSorted => self.array.stats()?.is_strict_sorted().map(bool::into),
Stat::RunCount => self.array.stats()?.run_count().map(u64::into),
Stat::TrueCount => self.array.stats()?.true_count().map(u64::into),
Stat::NullCount => self.array.stats()?.null_count().map(u64::into),
Stat::BitWidthFreq => self
.array
.stats()?
.bit_width_freq()
.map(|v| {
v.iter()
.flat_map(|v| {
primitive_to_scalar(
v.type__as_primitive()
.expect("Should only ever produce primitives"),
)
})
.collect_vec()
})
.map(|v| v.iter().map(u64::into).collect_vec())
.map(|v| {
List(ListScalar::new(
DType::Primitive(vortex_dtype::PType::U64, Nullability::NonNullable),
Expand All @@ -216,16 +180,7 @@ impl Statistics for ArrayView<'_> {
.array
.stats()?
.trailing_zero_freq()
.map(|v| {
v.iter()
.flat_map(|v| {
primitive_to_scalar(
v.type__as_primitive()
.expect("Should only ever produce primitives"),
)
})
.collect_vec()
})
.map(|v| v.iter().map(u64::into).collect_vec())
.map(|v| {
List(ListScalar::new(
DType::Primitive(vortex_dtype::PType::U64, Nullability::NonNullable),
Expand Down Expand Up @@ -291,6 +246,7 @@ impl Statistics for ArrayView<'_> {
}
}

#[allow(dead_code)]
// TODO(@jcasale): move this to serde and make serde crate public?
fn primitive_to_scalar(v: fbs::Primitive) -> Option<Scalar> {
let err_msg = "failed to deserialize invalid primitive scalar";
Expand Down
46 changes: 10 additions & 36 deletions vortex-ipc/src/messages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use vortex::{ArrayData, Context, ViewContext};
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_error::{vortex_err, VortexError};
use vortex_flatbuffers::{FlatBufferRoot, WriteFlatBuffer};
use vortex_scalar::Scalar::{Bool, Primitive};
use vortex_scalar::{BoolScalar, PrimitiveScalar};
use vortex_scalar::PrimitiveScalar;
use vortex_scalar::Scalar::Primitive;

use crate::flatbuffers::ipc as fbi;
use crate::flatbuffers::ipc::Compression;
Expand Down Expand Up @@ -233,64 +233,38 @@ fn compute_and_build_stats<'a>(
})
});

let is_constant = array
.statistics()
.compute_is_constant()
.ok()
.map(|v| Bool(BoolScalar::some(v)).write_flatbuffer(fbb));
let is_sorted = array
.statistics()
.compute_is_sorted()
.ok()
.map(|v| Bool(BoolScalar::some(v)).write_flatbuffer(fbb));
let is_strict_sorted = array
.statistics()
.compute_is_strict_sorted()
.ok()
.map(|v| Bool(BoolScalar::some(v)).write_flatbuffer(fbb));
let is_constant = array.statistics().compute_is_constant().ok();
let is_sorted = array.statistics().compute_is_sorted().ok();
let is_strict_sorted = array.statistics().compute_is_strict_sorted().ok();

let run_count = array
.statistics()
.compute_run_count()
.ok()
.map(|v| Primitive(PrimitiveScalar::some(v as u64)).write_flatbuffer(fbb));
.map(|v| v as u64);
let true_count = array
.statistics()
.compute_true_count()
.ok()
.map(|v| Primitive(PrimitiveScalar::some(v as u64)).write_flatbuffer(fbb));
.map(|v| v as u64);
let null_count = array
.statistics()
.compute_null_count()
.ok()
.map(|v| Primitive(PrimitiveScalar::some(v as u64)).write_flatbuffer(fbb));
.map(|v| v as u64);

let bit_width_freq = array
.statistics()
.compute_bit_width_freq()
.ok()
.map(|v| {
v.iter()
.map(|&inner| inner as u64)
.map(PrimitiveScalar::some)
.map(Primitive)
.map(|v| v.write_flatbuffer(fbb))
.collect_vec()
})
.map(|v| v.iter().map(|&inner| inner as u64).collect_vec())
.map(|v| fbb.create_vector(v.as_slice()));

let trailing_zero_freq = array
.statistics()
.compute_trailing_zero_freq()
.ok()
.map(|v| {
v.iter()
.map(|&inner| inner as u64)
.map(PrimitiveScalar::some)
.map(Primitive)
.map(|v| v.write_flatbuffer(fbb))
.collect_vec()
})
.map(|v| v.iter().map(|&inner| inner as u64).collect_vec())
.map(|v| fbb.create_vector(v.as_slice()));

let stat_args = &fb::ArrayStatsArgs {
Expand Down

0 comments on commit 9c4e336

Please sign in to comment.