forked from spiraldb/vortex
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
NullArray + statsset cleanup (spiraldb#350)
Add first-class NullArray that maps back/forth with Arrow. Cleans up StatsSet with all-nulls stats set a bit
- Loading branch information
Showing
13 changed files
with
267 additions
and
117 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
//! Implementation of the [AsArrowArray] trait for [ConstantArray] that is representing | ||
//! [DType::Null] values. | ||
use std::sync::Arc; | ||
|
||
use arrow_array::{ArrayRef as ArrowArrayRef, NullArray as ArrowNullArray}; | ||
use vortex_error::VortexResult; | ||
|
||
use crate::array::null::NullArray; | ||
use crate::compute::as_arrow::AsArrowArray; | ||
use crate::ArrayTrait; | ||
|
||
impl AsArrowArray for NullArray { | ||
fn as_arrow(&self) -> VortexResult<ArrowArrayRef> { | ||
let arrow_null = ArrowNullArray::new(self.len()); | ||
Ok(Arc::new(arrow_null)) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use arrow_array::{Array, NullArray as ArrowNullArray}; | ||
|
||
use crate::array::null::NullArray; | ||
use crate::arrow::FromArrowArray; | ||
use crate::compute::as_arrow::AsArrowArray; | ||
use crate::validity::{ArrayValidity, LogicalValidity}; | ||
use crate::{ArrayData, ArrayTrait, IntoArray}; | ||
|
||
#[test] | ||
fn test_round_trip() { | ||
let arrow_nulls = ArrowNullArray::new(10); | ||
let vortex_nulls = ArrayData::from_arrow(&arrow_nulls, true).into_array(); | ||
|
||
let vortex_nulls = NullArray::try_from(vortex_nulls).unwrap(); | ||
assert_eq!(vortex_nulls.len(), 10); | ||
assert!(matches!( | ||
vortex_nulls.logical_validity(), | ||
LogicalValidity::AllInvalid(10) | ||
)); | ||
|
||
let to_arrow = vortex_nulls.as_arrow().unwrap(); | ||
assert_eq!( | ||
*to_arrow.as_any().downcast_ref::<ArrowNullArray>().unwrap(), | ||
arrow_nulls | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
use vortex_dtype::{match_each_integer_ptype, DType}; | ||
use vortex_error::VortexResult; | ||
use vortex_scalar::Scalar; | ||
|
||
use crate::array::null::NullArray; | ||
use crate::compute::scalar_at::ScalarAtFn; | ||
use crate::compute::slice::SliceFn; | ||
use crate::compute::take::TakeFn; | ||
use crate::compute::ArrayCompute; | ||
use crate::{Array, ArrayTrait, IntoArray}; | ||
|
||
impl ArrayCompute for NullArray { | ||
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { | ||
Some(self) | ||
} | ||
|
||
fn slice(&self) -> Option<&dyn SliceFn> { | ||
Some(self) | ||
} | ||
|
||
fn take(&self) -> Option<&dyn TakeFn> { | ||
Some(self) | ||
} | ||
} | ||
|
||
impl SliceFn for NullArray { | ||
fn slice(&self, start: usize, stop: usize) -> VortexResult<Array> { | ||
assert!(stop < self.len(), "cannot slice past end of the array"); | ||
Ok(NullArray::new(stop - start).into_array()) | ||
} | ||
} | ||
|
||
impl ScalarAtFn for NullArray { | ||
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> { | ||
assert!(index < self.len(), "cannot index past end of the array"); | ||
|
||
Ok(Scalar::null(DType::Null)) | ||
} | ||
} | ||
|
||
impl TakeFn for NullArray { | ||
fn take(&self, indices: &Array) -> VortexResult<Array> { | ||
let indices = indices.clone().flatten_primitive()?; | ||
|
||
// Enforce all indices are valid | ||
match_each_integer_ptype!(indices.ptype(), |$T| { | ||
for index in indices.scalar_buffer::<$T>().iter() { | ||
assert!((*index as usize) < self.len(), "cannot take past end of the array"); | ||
} | ||
}); | ||
|
||
Ok(NullArray::new(indices.len()).into_array()) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use vortex_dtype::DType; | ||
|
||
use crate::array::null::NullArray; | ||
use crate::compute::scalar_at::scalar_at; | ||
use crate::compute::slice::slice; | ||
use crate::compute::take::take; | ||
use crate::validity::{ArrayValidity, LogicalValidity}; | ||
use crate::{ArrayTrait, IntoArray}; | ||
|
||
#[test] | ||
fn test_slice_nulls() { | ||
let nulls = NullArray::new(10).into_array(); | ||
let sliced = NullArray::try_from(slice(&nulls, 0, 4).unwrap()).unwrap(); | ||
|
||
assert_eq!(sliced.len(), 4); | ||
assert!(matches!( | ||
sliced.logical_validity(), | ||
LogicalValidity::AllInvalid(4) | ||
)); | ||
} | ||
|
||
#[test] | ||
fn test_take_nulls() { | ||
let nulls = NullArray::new(10).into_array(); | ||
let taken = | ||
NullArray::try_from(take(&nulls, &vec![0u64, 2, 4, 6, 8].into_array()).unwrap()) | ||
.unwrap(); | ||
|
||
assert_eq!(taken.len(), 5); | ||
assert!(matches!( | ||
taken.logical_validity(), | ||
LogicalValidity::AllInvalid(5) | ||
)); | ||
} | ||
|
||
#[test] | ||
fn test_scalar_at_nulls() { | ||
let nulls = NullArray::new(10); | ||
|
||
let scalar = scalar_at(&nulls.into_array(), 0).unwrap(); | ||
assert!(scalar.is_null()); | ||
assert_eq!(scalar.dtype().clone(), DType::Null); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
use serde::{Deserialize, Serialize}; | ||
|
||
use crate::stats::{ArrayStatisticsCompute, Stat}; | ||
use crate::validity::{ArrayValidity, LogicalValidity, Validity}; | ||
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; | ||
use crate::{impl_encoding, ArrayFlatten}; | ||
|
||
mod as_arrow; | ||
mod compute; | ||
|
||
impl_encoding!("vortex.null", Null); | ||
|
||
#[derive(Debug, Clone, Serialize, Deserialize)] | ||
pub struct NullMetadata { | ||
len: usize, | ||
} | ||
|
||
impl NullArray { | ||
pub fn new(len: usize) -> Self { | ||
Self::try_from_parts( | ||
DType::Null, | ||
NullMetadata { len }, | ||
Arc::new([]), | ||
StatsSet::nulls(len, &DType::Null), | ||
) | ||
.expect("NullArray::new cannot fail") | ||
} | ||
} | ||
|
||
impl ArrayFlatten for NullArray { | ||
fn flatten(self) -> VortexResult<Flattened> { | ||
Ok(Flattened::Null(self)) | ||
} | ||
} | ||
|
||
impl ArrayValidity for NullArray { | ||
fn is_valid(&self, _: usize) -> bool { | ||
false | ||
} | ||
|
||
fn logical_validity(&self) -> LogicalValidity { | ||
LogicalValidity::AllInvalid(self.len()) | ||
} | ||
} | ||
|
||
impl ArrayStatisticsCompute for NullArray { | ||
fn compute_statistics(&self, _stat: Stat) -> VortexResult<StatsSet> { | ||
Ok(StatsSet::nulls(self.len(), &DType::Null)) | ||
} | ||
} | ||
|
||
impl AcceptArrayVisitor for NullArray { | ||
fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> { | ||
visitor.visit_validity(&Validity::AllInvalid) | ||
} | ||
} | ||
|
||
impl ArrayTrait for NullArray { | ||
fn len(&self) -> usize { | ||
self.metadata().len | ||
} | ||
|
||
fn nbytes(&self) -> usize { | ||
0 | ||
} | ||
} | ||
|
||
impl EncodingCompression for NullEncoding {} |
Oops, something went wrong.