Skip to content

Commit

Permalink
Compressors
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn committed Mar 6, 2024
1 parent 4da0308 commit df59a63
Show file tree
Hide file tree
Showing 16 changed files with 28 additions and 53 deletions.
1 change: 1 addition & 0 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ mod test {
.unwrap();
}

#[ignore]
#[test]
fn compression_ratio() {
setup_logger();
Expand Down
5 changes: 4 additions & 1 deletion pyvortex/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use pyo3::{pyclass, pyfunction, pymethods, Py, PyResult, Python};
use vortex::compress::{CompressConfig, CompressCtx};

use crate::array::PyArray;
use crate::error::PyVortexError;

#[derive(Clone)]
#[pyclass(name = "CompressConfig", module = "vortex")]
Expand Down Expand Up @@ -33,6 +34,8 @@ pub fn compress(
) -> PyResult<Py<PyArray>> {
let compress_opts = opts.map(|o| o.inner).unwrap_or_default();
let ctx = CompressCtx::new(&compress_opts);
let compressed = py.allow_threads(|| ctx.compress(arr.unwrap(), None));
let compressed = py
.allow_threads(|| ctx.compress(arr.unwrap(), None))
.map_err(PyVortexError::map_err)?;
PyArray::wrap(py, compressed)
}
5 changes: 3 additions & 2 deletions vortex-array/src/array/sparse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@ use crate::error::{VortexError, VortexResult};
use crate::formatter::{ArrayDisplay, ArrayFormatter};
use crate::match_arrow_numeric_type;
use crate::serde::{ArraySerde, EncodingSerde};
use crate::stats::{Stats, StatsSet};
use crate::stats::{Stats, StatsCompute, StatsSet};

mod compress;
mod compute;
mod serde;
mod stats;

#[derive(Debug, Clone)]
pub struct SparseArray {
Expand Down Expand Up @@ -190,6 +189,8 @@ impl Array for SparseArray {
}
}

impl StatsCompute for SparseArray {}

impl<'arr> AsRef<(dyn Array + 'arr)> for SparseArray {
fn as_ref(&self) -> &(dyn Array + 'arr) {
self
Expand Down
9 changes: 0 additions & 9 deletions vortex-array/src/array/sparse/stats.rs

This file was deleted.

5 changes: 3 additions & 2 deletions vortex-array/src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::dtype::{DType, FieldNames};
use crate::error::VortexResult;
use crate::formatter::{ArrayDisplay, ArrayFormatter};
use crate::serde::{ArraySerde, EncodingSerde};
use crate::stats::{Stats, StatsSet};
use crate::stats::{Stats, StatsCompute, StatsSet};

use super::{
check_slice_bounds, Array, ArrayRef, ArrowIterator, Encoding, EncodingId, EncodingRef,
Expand All @@ -23,7 +23,6 @@ use super::{
mod compress;
mod compute;
mod serde;
mod stats;

#[derive(Debug, Clone)]
pub struct StructArray {
Expand Down Expand Up @@ -167,6 +166,8 @@ impl<'arr> AsRef<(dyn Array + 'arr)> for StructArray {
}
}

impl StatsCompute for StructArray {}

#[derive(Debug)]
pub struct StructEncoding;

Expand Down
9 changes: 0 additions & 9 deletions vortex-array/src/array/struct_/stats.rs

This file was deleted.

2 changes: 1 addition & 1 deletion vortex-array/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ impl Default for CompressCtx<'_> {

pub fn sampled_compression(array: &dyn Array, ctx: CompressCtx) -> VortexResult<ArrayRef> {
// First, we try constant compression and shortcut any sampling.
if array.len() > 0
if !array.is_empty()
&& array
.stats()
.get_or_compute_as::<bool>(&Stat::IsConstant)
Expand Down
4 changes: 3 additions & 1 deletion vortex-array/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ impl StatsSet {
}

pub trait StatsCompute {
fn compute(&self, stat: &Stat) -> VortexResult<StatsSet>;
fn compute(&self, _stat: &Stat) -> VortexResult<StatsSet> {
Ok(StatsSet::new())
}
}

pub struct Stats<'a> {
Expand Down
10 changes: 6 additions & 4 deletions vortex-fastlanes/src/bitpacking/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,12 @@ mod test {
);
let ctx = CompressCtx::new(&cfg);

let compressed = ctx.compress(
&PrimitiveArray::from_vec(Vec::from_iter((0..10_000).map(|i| (i % 63) as u8))),
None,
);
let compressed = ctx
.compress(
&PrimitiveArray::from_vec(Vec::from_iter((0..10_000).map(|i| (i % 63) as u8))),
None,
)
.unwrap();
assert_eq!(compressed.encoding().id(), BitPackedEncoding.id());
let bp = compressed
.as_any()
Expand Down
2 changes: 1 addition & 1 deletion vortex-fastlanes/src/for/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ mod test {
// Create a range offset by a million
let array = PrimitiveArray::from_vec((0u32..10_000).map(|v| v + 1_000_000).collect_vec());

let compressed = ctx.compress(&array, None);
let compressed = ctx.compress(&array, None).unwrap();
assert_eq!(compressed.encoding().id(), FoREncoding.id());
let fa = compressed.as_any().downcast_ref::<FoRArray>().unwrap();
assert_eq!(fa.reference().try_into(), Ok(1_000_000u32));
Expand Down
4 changes: 3 additions & 1 deletion vortex-ffor/src/ffor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use vortex::error::{VortexError, VortexResult};
use vortex::formatter::{ArrayDisplay, ArrayFormatter};
use vortex::scalar::{Scalar, ScalarRef};
use vortex::serde::{ArraySerde, EncodingSerde};
use vortex::stats::{Stats, StatsSet};
use vortex::stats::{Stats, StatsCompute, StatsSet};

use crate::compress::ffor_encode;

Expand Down Expand Up @@ -168,6 +168,8 @@ impl Array for FFORArray {
}
}

impl StatsCompute for FFORArray {}

impl ArrayCompute for FFORArray {}

impl<'arr> AsRef<(dyn Array + 'arr)> for FFORArray {
Expand Down
1 change: 0 additions & 1 deletion vortex-ffor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ mod compress;
mod downcast;
mod ffor;
mod serde;
mod stats;

#[distributed_slice(ENCODINGS)]
static ENCODINGS_FFOR: EncodingRef = &FFoREncoding;
10 changes: 0 additions & 10 deletions vortex-ffor/src/stats.rs

This file was deleted.

1 change: 0 additions & 1 deletion vortex-ree/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ mod compute;
mod downcast;
mod ree;
mod serde;
mod stats;

#[distributed_slice(ENCODINGS)]
static ENCODINGS_REE: EncodingRef = &REEEncoding;
4 changes: 3 additions & 1 deletion vortex-ree/src/ree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use vortex::error::{VortexError, VortexResult};
use vortex::formatter::{ArrayDisplay, ArrayFormatter};
use vortex::ptype::NativePType;
use vortex::serde::{ArraySerde, EncodingSerde};
use vortex::stats::{Stat, Stats, StatsSet};
use vortex::stats::{Stat, Stats, StatsCompute, StatsSet};

use crate::compress::ree_encode;

Expand Down Expand Up @@ -218,6 +218,8 @@ impl Array for REEArray {
}
}

impl StatsCompute for REEArray {}

impl<'arr> AsRef<(dyn Array + 'arr)> for REEArray {
fn as_ref(&self) -> &(dyn Array + 'arr) {
self
Expand Down
9 changes: 0 additions & 9 deletions vortex-ree/src/stats.rs

This file was deleted.

0 comments on commit df59a63

Please sign in to comment.