Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NullArray + statsset cleanup #350

Merged
merged 2 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 2 additions & 17 deletions vortex-array/src/array/bool/stats.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use std::collections::HashMap;

use arrow_buffer::BooleanBuffer;
use vortex_dtype::{DType, Nullability};
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::array::bool::BoolArray;
use crate::stats::{ArrayStatisticsCompute, Stat, StatsSet};
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::{ArrayTrait, IntoArray};
use crate::{ArrayDType, ArrayTrait, IntoArray};

impl ArrayStatisticsCompute for BoolArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
Expand All @@ -18,7 +16,7 @@ impl ArrayStatisticsCompute for BoolArray {

match self.logical_validity() {
LogicalValidity::AllValid(_) => self.boolean_buffer().compute_statistics(stat),
LogicalValidity::AllInvalid(v) => all_null_stats(v),
LogicalValidity::AllInvalid(v) => Ok(StatsSet::nulls(v, self.dtype())),
LogicalValidity::Array(a) => NullableBools(
&self.boolean_buffer(),
&a.into_array().flatten_bool()?.boolean_buffer(),
Expand All @@ -28,19 +26,6 @@ impl ArrayStatisticsCompute for BoolArray {
}
}

fn all_null_stats(len: usize) -> VortexResult<StatsSet> {
Ok(StatsSet::from(HashMap::from([
(Stat::Min, Scalar::null(DType::Bool(Nullability::Nullable))),
(Stat::Max, Scalar::null(DType::Bool(Nullability::Nullable))),
(Stat::IsConstant, true.into()),
(Stat::IsSorted, true.into()),
(Stat::IsStrictSorted, (len < 2).into()),
(Stat::RunCount, 1.into()),
(Stat::NullCount, len.into()),
(Stat::TrueCount, 0.into()),
])))
}

struct NullableBools<'a>(&'a BooleanBuffer, &'a BooleanBuffer);

impl ArrayStatisticsCompute for NullableBools<'_> {
Expand Down
7 changes: 3 additions & 4 deletions vortex-array/src/array/chunked/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@ use arrow_buffer::{BooleanBuffer, MutableBuffer, ScalarBuffer};
use itertools::Itertools;
use vortex_dtype::{match_each_native_ptype, DType, Nullability, PType, StructDType};
use vortex_error::{vortex_bail, ErrString, VortexResult};
use vortex_scalar::Scalar;

use crate::accessor::ArrayAccessor;
use crate::array::bool::BoolArray;
use crate::array::chunked::ChunkedArray;
use crate::array::constant::ConstantArray;
use crate::array::extension::ExtensionArray;
use crate::array::null::NullArray;
use crate::array::primitive::PrimitiveArray;
use crate::array::r#struct::StructArray;
use crate::array::varbin::builder::VarBinBuilder;
Expand Down Expand Up @@ -73,8 +72,8 @@ pub(crate) fn try_flatten_chunks(chunks: Vec<Array>, dtype: DType) -> VortexResu
}
DType::Null => {
let len = chunks.iter().map(|chunk| chunk.len()).sum();
let const_array = ConstantArray::new(Scalar::null(DType::Null), len);
Ok(Flattened::Null(const_array))
let null_array = NullArray::new(len);
Ok(Flattened::Null(null_array))
}
}
}
Expand Down
50 changes: 0 additions & 50 deletions vortex-array/src/array/constant/as_arrow.rs

This file was deleted.

1 change: 0 additions & 1 deletion vortex-array/src/array/constant/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use crate::impl_encoding;
use crate::stats::Stat;
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};
mod as_arrow;
mod compute;
mod flatten;
mod stats;
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub mod chunked;
pub mod constant;
pub mod datetime;
pub mod extension;
pub mod null;
pub mod primitive;
pub mod sparse;
pub mod r#struct;
Expand Down
53 changes: 53 additions & 0 deletions vortex-array/src/array/null/as_arrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//! Implementation of the [AsArrowArray] trait for [ConstantArray] that is representing
//! [DType::Null] values.

use std::sync::Arc;

use arrow_array::{ArrayRef as ArrowArrayRef, NullArray as ArrowNullArray};
use vortex_dtype::DType;
use vortex_error::{vortex_bail, VortexResult};

use crate::array::null::NullArray;
use crate::compute::as_arrow::AsArrowArray;
use crate::{ArrayDType, ArrayTrait};

impl AsArrowArray for NullArray {
fn as_arrow(&self) -> VortexResult<ArrowArrayRef> {
if self.dtype() != &DType::Null {
a10y marked this conversation as resolved.
Show resolved Hide resolved
vortex_bail!(InvalidArgument: "only null ConstantArrays convert to arrow");
}

let arrow_null = ArrowNullArray::new(self.len());
Ok(Arc::new(arrow_null))
}
}

#[cfg(test)]
mod test {
use arrow_array::{Array, NullArray as ArrowNullArray};

use crate::array::null::NullArray;
use crate::arrow::FromArrowArray;
use crate::compute::as_arrow::AsArrowArray;
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::{ArrayData, ArrayTrait, IntoArray};

#[test]
fn test_round_trip() {
let arrow_nulls = ArrowNullArray::new(10);
let vortex_nulls = ArrayData::from_arrow(&arrow_nulls, true).into_array();

let vortex_nulls = NullArray::try_from(vortex_nulls).unwrap();
assert_eq!(vortex_nulls.len(), 10);
assert!(matches!(
vortex_nulls.logical_validity(),
LogicalValidity::AllInvalid(10)
));

let to_arrow = vortex_nulls.as_arrow().unwrap();
assert_eq!(
*to_arrow.as_any().downcast_ref::<ArrowNullArray>().unwrap(),
arrow_nulls
);
}
}
101 changes: 101 additions & 0 deletions vortex-array/src/array/null/compute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
use vortex_dtype::{match_each_integer_ptype, DType};
use vortex_error::VortexResult;
use vortex_scalar::Scalar;

use crate::array::null::NullArray;
use crate::compute::scalar_at::ScalarAtFn;
use crate::compute::slice::SliceFn;
use crate::compute::take::TakeFn;
use crate::compute::ArrayCompute;
use crate::{Array, ArrayTrait, IntoArray};

impl ArrayCompute for NullArray {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}

fn slice(&self) -> Option<&dyn SliceFn> {
Some(self)
}

fn take(&self) -> Option<&dyn TakeFn> {
Some(self)
}
}

impl SliceFn for NullArray {
fn slice(&self, start: usize, stop: usize) -> VortexResult<Array> {
assert!(stop < self.len(), "cannot slice past end of the array");
Ok(NullArray::new(stop - start).into_array())
}
}

impl ScalarAtFn for NullArray {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
assert!(index < self.len(), "cannot index past end of the array");

Ok(Scalar::null(DType::Null))
}
}

impl TakeFn for NullArray {
fn take(&self, indices: &Array) -> VortexResult<Array> {
let indices = indices.clone().flatten_primitive()?;

// Enforce all indices are valid
match_each_integer_ptype!(indices.ptype(), |$T| {
for index in indices.scalar_buffer::<$T>().iter() {
assert!((*index as usize) < self.len(), "cannot take past end of the array");
}
});

Ok(NullArray::new(indices.len()).into_array())
}
}

#[cfg(test)]
mod test {
use vortex_dtype::DType;

use crate::array::null::NullArray;
use crate::compute::scalar_at::scalar_at;
use crate::compute::slice::slice;
use crate::compute::take::take;
use crate::validity::{ArrayValidity, LogicalValidity};
use crate::{ArrayTrait, IntoArray};

#[test]
fn test_slice_nulls() {
let nulls = NullArray::new(10).into_array();
let sliced = NullArray::try_from(slice(&nulls, 0, 4).unwrap()).unwrap();

assert_eq!(sliced.len(), 4);
assert!(matches!(
sliced.logical_validity(),
LogicalValidity::AllInvalid(4)
));
}

#[test]
fn test_take_nulls() {
let nulls = NullArray::new(10).into_array();
let taken =
NullArray::try_from(take(&nulls, &vec![0u64, 2, 4, 6, 8].into_array()).unwrap())
.unwrap();

assert_eq!(taken.len(), 5);
assert!(matches!(
taken.logical_validity(),
LogicalValidity::AllInvalid(5)
));
}

#[test]
fn test_scalar_at_nulls() {
let nulls = NullArray::new(10);

let scalar = scalar_at(&nulls.into_array(), 0).unwrap();
assert!(scalar.is_null());
assert_eq!(scalar.dtype().clone(), DType::Null);
}
}
68 changes: 68 additions & 0 deletions vortex-array/src/array/null/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use serde::{Deserialize, Serialize};

use crate::stats::{ArrayStatisticsCompute, Stat};
use crate::validity::{ArrayValidity, LogicalValidity, Validity};
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};
use crate::{impl_encoding, ArrayFlatten};

mod as_arrow;
mod compute;

impl_encoding!("vortex.null", Null);

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NullMetadata {
len: usize,
}

impl NullArray {
pub fn new(len: usize) -> Self {
Self::try_from_parts(
DType::Null,
NullMetadata { len },
Arc::new([]),
StatsSet::nulls(len, &DType::Null),
)
.expect("NullArray::new cannot fail")
}
}

impl ArrayFlatten for NullArray {
fn flatten(self) -> VortexResult<Flattened> {
Ok(Flattened::Null(self))
}
}

impl ArrayValidity for NullArray {
fn is_valid(&self, _: usize) -> bool {
false
}

fn logical_validity(&self) -> LogicalValidity {
LogicalValidity::AllInvalid(self.len())
}
}

impl ArrayStatisticsCompute for NullArray {
fn compute_statistics(&self, _stat: Stat) -> VortexResult<StatsSet> {
Ok(StatsSet::nulls(self.len(), &DType::Null))
}
}

impl AcceptArrayVisitor for NullArray {
fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
visitor.visit_validity(&Validity::AllInvalid)
}
}

impl ArrayTrait for NullArray {
fn len(&self) -> usize {
self.metadata().len
}

fn nbytes(&self) -> usize {
0
}
}

impl EncodingCompression for NullEncoding {}
Loading