Skip to content

Commit

Permalink
Array2: port zigzag (#265)
Browse files Browse the repository at this point in the history
NB: zigzag isn't actually used anywhere, and there was no previous
(optional) implementation of flatten, so the now-required implementation
is left as a todo!()
  • Loading branch information
jdcasale authored Apr 25, 2024
1 parent c327896 commit 06d5ba1
Show file tree
Hide file tree
Showing 14 changed files with 159 additions and 256 deletions.
25 changes: 25 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ members = [
"vortex-ree",
"vortex-roaring",
"vortex-schema",
#"vortex-zigzag",
"vortex-zigzag",
]
resolver = "2"

Expand Down
1 change: 1 addition & 0 deletions bench-vortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ vortex-fastlanes = { path = "../vortex-fastlanes" }
vortex-ipc = { path = "../vortex-ipc" }
vortex-ree = { path = "../vortex-ree" }
vortex-schema = { path = "../vortex-schema" }
vortex-zigzag = { path = "../vortex-zigzag" }

[dev-dependencies]
criterion = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ pub fn enumerate_arrays() -> Vec<EncodingRef> {
&RoaringBoolEncoding,
// &RoaringIntEncoding,
// Doesn't offer anything more than FoR really
// ZigZagEncoding,
// &ZigZagEncoding,
]
}

Expand Down
2 changes: 1 addition & 1 deletion pyvortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ vortex-fastlanes = { path = "../vortex-fastlanes" }
vortex-ree = { path = "../vortex-ree" }
vortex-roaring = { path = "../vortex-roaring" }
vortex-schema = { path = "../vortex-schema" }
#vortex-zigzag = { path = "../vortex-zigzag" }
vortex-zigzag = { path = "../vortex-zigzag" }
itertools = { workspace = true }
log = { workspace = true }
paste = { workspace = true }
Expand Down
8 changes: 7 additions & 1 deletion pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use vortex_roaring::{
OwnedRoaringBoolArray, OwnedRoaringIntArray, RoaringBool, RoaringBoolArray,
RoaringBoolEncoding, RoaringInt, RoaringIntArray, RoaringIntEncoding,
};
use vortex_zigzag::{OwnedZigZagArray, ZigZag, ZigZagArray, ZigZagEncoding};

use crate::dtype::PyDType;
use crate::error::PyVortexError;
Expand Down Expand Up @@ -80,7 +81,7 @@ pyarray!(DictEncoding, DictArray, "DictArray");
pyarray!(REEEncoding, REEArray, "REEArray");
pyarray!(RoaringBoolEncoding, RoaringBoolArray, "RoaringBoolArray");
pyarray!(RoaringIntEncoding, RoaringIntArray, "RoaringIntArray");
// pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray");
pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray");

impl PyArray {
pub fn wrap(py: Python<'_>, inner: ArrayData) -> PyResult<Py<Self>> {
Expand Down Expand Up @@ -178,6 +179,11 @@ impl PyArray {
.map_err(PyVortexError::map_err)?,
)?
.extract(py),
ZigZag::ID => PyZigZagArray::wrap(
py,
OwnedZigZagArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
)?
.extract(py),
_ => Py::new(
py,
Self {
Expand Down
2 changes: 1 addition & 1 deletion pyvortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fn _lib(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<PyStructArray>()?;
m.add_class::<PyVarBinArray>()?;
m.add_class::<PyVarBinViewArray>()?;
// m.add_class::<PyZigZagArray>()?;
m.add_class::<PyZigZagArray>()?;
m.add_class::<PyALPArray>()?;

m.add_class::<PyDType>()?;
Expand Down
2 changes: 2 additions & 0 deletions vortex-zigzag/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ linkme = { workspace = true }
vortex-alloc = { path = "../vortex-alloc" }
vortex-array = { path = "../vortex-array" }
vortex-error = { path = "../vortex-error" }
vortex-fastlanes = { path = "../vortex-fastlanes" }
vortex-schema = { path = "../vortex-schema" }
zigzag = { workspace = true }
serde = { workspace = true, features = ["derive"] }

[lints]
workspace = true
115 changes: 64 additions & 51 deletions vortex-zigzag/src/compress.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,23 @@
use vortex::array::downcast::DowncastArrayBuiltin;
use vortex::array::primitive::PrimitiveArray;
use vortex::array::{Array, ArrayKind, ArrayRef};
use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression};
use vortex::ptype::{NativePType, PType};
use vortex::stats::Stat;
use vortex::validity::{OwnedValidity, ValidityView};
use vortex::view::ToOwnedView;
use vortex::stats::{ArrayStatistics, Stat};
use vortex::validity::Validity;
use vortex::{Array, IntoArray, OwnedArray};
use vortex_alloc::{AlignedVec, ALIGNED_ALLOCATOR};
use vortex_error::VortexResult;
use zigzag::ZigZag;
use zigzag::ZigZag as ExternalZigZag;

use crate::downcast::DowncastZigzag;
use crate::zigzag::{ZigZagArray, ZigZagEncoding};
use crate::{OwnedZigZagArray, ZigZagArray, ZigZagEncoding};

impl EncodingCompression for ZigZagEncoding {
fn can_compress(
&self,
array: &dyn Array,
array: &Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
// Only support primitive arrays
let parray = array.maybe_primitive()?;
let parray = PrimitiveArray::try_from(array).ok()?;

// Only supports signed integers
if !parray.ptype().is_signed_int() {
Expand All @@ -30,63 +27,54 @@ impl EncodingCompression for ZigZagEncoding {
// Only compress if the array has negative values
// TODO(ngates): also check that Stat::Max is less than half the max value of the type
parray
.stats()
.get_or_compute_cast::<i64>(&Stat::Min)
.statistics()
.compute_as_cast::<i64>(Stat::Min)
.filter(|&min| min < 0)
.map(|_| self as &dyn EncodingCompression)
}

fn compress(
&self,
array: &dyn Array,
like: Option<&dyn Array>,
array: &Array,
like: Option<&Array>,
ctx: CompressCtx,
) -> VortexResult<ArrayRef> {
let zigzag_like = like.map(|like_arr| like_arr.as_zigzag());
let encoded = match ArrayKind::from(array) {
ArrayKind::Primitive(p) => zigzag_encode(p),
_ => unreachable!("This array kind should have been filtered out"),
}
.unwrap();
) -> VortexResult<OwnedArray> {
let zigzag_like = like.map(|like_arr| ZigZagArray::try_from(like_arr).unwrap());
let encoded = zigzag_encode(&array.as_primitive())?;

Ok(
ZigZagArray::new(ctx.compress(encoded.encoded(), zigzag_like.map(|z| z.encoded()))?)
.into_array(),
)
Ok(OwnedZigZagArray::new(ctx.compress(
&encoded.encoded(),
zigzag_like.as_ref().map(|z| z.encoded()).as_ref(),
)?)
.into_array())
}
}

pub fn zigzag_encode(parray: &PrimitiveArray) -> VortexResult<ZigZagArray> {
pub fn zigzag_encode(parray: &PrimitiveArray<'_>) -> VortexResult<OwnedZigZagArray> {
let encoded = match parray.ptype() {
PType::I8 => zigzag_encode_primitive::<i8>(parray.buffer().typed_data(), parray.validity()),
PType::I16 => {
zigzag_encode_primitive::<i16>(parray.buffer().typed_data(), parray.validity())
}
PType::I32 => {
zigzag_encode_primitive::<i32>(parray.buffer().typed_data(), parray.validity())
}
PType::I64 => {
zigzag_encode_primitive::<i64>(parray.buffer().typed_data(), parray.validity())
}
PType::I8 => zigzag_encode_primitive::<i8>(parray.typed_data(), parray.validity()),
PType::I16 => zigzag_encode_primitive::<i16>(parray.typed_data(), parray.validity()),
PType::I32 => zigzag_encode_primitive::<i32>(parray.typed_data(), parray.validity()),
PType::I64 => zigzag_encode_primitive::<i64>(parray.typed_data(), parray.validity()),
_ => panic!("Unsupported ptype {}", parray.ptype()),
};
ZigZagArray::try_new(encoded.into_array())
OwnedZigZagArray::try_new(encoded.into_array())
}

fn zigzag_encode_primitive<T: ZigZag + NativePType>(
values: &[T],
validity: Option<ValidityView>,
) -> PrimitiveArray
fn zigzag_encode_primitive<'a, T: ExternalZigZag + NativePType>(
values: &'a [T],
validity: Validity<'a>,
) -> PrimitiveArray<'a>
where
<T as ZigZag>::UInt: NativePType,
<T as ExternalZigZag>::UInt: NativePType,
{
let mut encoded = AlignedVec::with_capacity_in(values.len(), ALIGNED_ALLOCATOR);
let mut encoded = Vec::with_capacity(values.len());
encoded.extend(values.iter().map(|v| T::encode(*v)));
PrimitiveArray::from_nullable_in(encoded, validity.to_owned_view())
PrimitiveArray::from_vec(encoded.to_vec(), validity.to_owned())
}

#[allow(dead_code)]
pub fn zigzag_decode(parray: &PrimitiveArray) -> PrimitiveArray {
pub fn zigzag_decode<'a>(parray: &'a PrimitiveArray<'a>) -> PrimitiveArray<'a> {
match parray.ptype() {
PType::U8 => zigzag_decode_primitive::<i8>(parray.buffer().typed_data(), parray.validity()),
PType::U16 => {
Expand All @@ -103,14 +91,39 @@ pub fn zigzag_decode(parray: &PrimitiveArray) -> PrimitiveArray {
}

#[allow(dead_code)]
fn zigzag_decode_primitive<T: ZigZag + NativePType>(
values: &[T::UInt],
validity: Option<ValidityView>,
) -> PrimitiveArray
fn zigzag_decode_primitive<'a, T: ExternalZigZag + NativePType>(
values: &'a [T::UInt],
validity: Validity<'a>,
) -> PrimitiveArray<'a>
where
<T as ZigZag>::UInt: NativePType,
<T as ExternalZigZag>::UInt: NativePType,
{
let mut encoded: AlignedVec<T> = AlignedVec::with_capacity_in(values.len(), ALIGNED_ALLOCATOR);
encoded.extend(values.iter().map(|v| T::decode(*v)));
PrimitiveArray::from_nullable_in(encoded, validity.to_owned_view())
PrimitiveArray::from_vec(encoded.to_vec(), validity)
}

#[cfg(test)]
mod test {
use std::sync::Arc;

use vortex::encoding::{ArrayEncoding, EncodingRef};
use vortex_fastlanes::BitPackedEncoding;

use super::*;

#[test]
fn test_compress() {
let cfg = CompressConfig::new()
.with_enabled([&ZigZagEncoding as EncodingRef, &BitPackedEncoding]);
let ctx = CompressCtx::new(Arc::new(cfg));

let compressed = ctx
.compress(
PrimitiveArray::from(Vec::from_iter((-10_000..10_000).map(|i| i as i64))).array(),
None,
)
.unwrap();
assert_eq!(compressed.encoding().id(), ZigZagEncoding.id());
}
}
16 changes: 8 additions & 8 deletions vortex-zigzag/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use vortex::array::{Array, ArrayRef};
use vortex::compute::scalar_at::{scalar_at, ScalarAtFn};
use vortex::compute::slice::{slice, SliceFn};
use vortex::compute::ArrayCompute;
use vortex::scalar::{PScalar, Scalar};
use vortex::{ArrayDType, IntoArray, OwnedArray};
use vortex_error::{vortex_err, VortexResult};
use zigzag::ZigZag;
use zigzag::ZigZag as ExternalZigZag;

use crate::ZigZagArray;

impl ArrayCompute for ZigZagArray {
impl ArrayCompute for ZigZagArray<'_> {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
Expand All @@ -18,9 +18,9 @@ impl ArrayCompute for ZigZagArray {
}
}

impl ScalarAtFn for ZigZagArray {
impl ScalarAtFn for ZigZagArray<'_> {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
let scalar = scalar_at(self.encoded(), index)?;
let scalar = scalar_at(&self.encoded(), index)?;
match scalar {
Scalar::Primitive(p) => match p.value() {
None => Ok(Scalar::null(self.dtype())),
Expand All @@ -37,8 +37,8 @@ impl ScalarAtFn for ZigZagArray {
}
}

impl SliceFn for ZigZagArray {
fn slice(&self, start: usize, stop: usize) -> VortexResult<ArrayRef> {
Ok(ZigZagArray::try_new(slice(self.encoded(), start, stop)?)?.into_array())
impl SliceFn for ZigZagArray<'_> {
fn slice(&self, start: usize, stop: usize) -> VortexResult<OwnedArray> {
Ok(ZigZagArray::try_new(slice(&self.encoded(), start, stop)?)?.into_array())
}
}
8 changes: 0 additions & 8 deletions vortex-zigzag/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
use linkme::distributed_slice;
use vortex::encoding::{EncodingRef, ENCODINGS};
pub use zigzag::*;

mod compress;
mod compute;
mod downcast;
mod serde;
mod stats;
mod zigzag;

#[distributed_slice(ENCODINGS)]
static ENCODINGS_ZIGZAG: EncodingRef = &ZigZagEncoding;
Loading

0 comments on commit 06d5ba1

Please sign in to comment.