Skip to content

Commit

Permalink
Fastlanes delta (#57)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Robert Kruszewski <[email protected]>
Co-authored-by: Will Manning <[email protected]>
  • Loading branch information
3 people authored Mar 22, 2024
1 parent 4d7188a commit a6a8b92
Show file tree
Hide file tree
Showing 15 changed files with 638 additions and 24 deletions.
4 changes: 2 additions & 2 deletions bench-vortex/benches/compress_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
use bench_vortex::compress_taxi_data;
use bench_vortex::taxi_data::download_taxi_data;

fn enc_compress(c: &mut Criterion) {
fn vortex_compress(c: &mut Criterion) {
download_taxi_data();
let mut group = c.benchmark_group("end to end");
group.sample_size(10);
group.bench_function("compress", |b| b.iter(|| black_box(compress_taxi_data())));
group.finish()
}

criterion_group!(benches, enc_compress);
criterion_group!(benches, vortex_compress);
criterion_main!(benches);
8 changes: 3 additions & 5 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use arrow_array::RecordBatchReader;
use std::fs::{create_dir_all, File};
use std::path::{Path, PathBuf};
use std::sync::Arc;

use arrow_array::RecordBatchReader;
use itertools::Itertools;
use log::{info, warn, LevelFilter};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
Expand All @@ -19,7 +19,7 @@ use vortex::formatter::display_tree;
use vortex_alp::ALPEncoding;
use vortex_datetime::DateTimeEncoding;
use vortex_dict::DictEncoding;
use vortex_fastlanes::{BitPackedEncoding, FoREncoding};
use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding};
use vortex_ree::REEEncoding;
use vortex_roaring::RoaringBoolEncoding;
use vortex_schema::DType;
Expand Down Expand Up @@ -58,8 +58,7 @@ pub fn enumerate_arrays() -> Vec<EncodingRef> {
&BitPackedEncoding,
&FoREncoding,
&DateTimeEncoding,
// DeltaEncoding,
// FFoREncoding,
&DeltaEncoding,
&REEEncoding,
&RoaringBoolEncoding,
// RoaringIntEncoding,
Expand Down Expand Up @@ -148,7 +147,6 @@ mod test {
use crate::taxi_data::download_taxi_data;
use crate::{compress_ctx, compress_taxi_data, setup_logger};

#[ignore]
#[test]
fn compression_ratio() {
setup_logger(LevelFilter::Debug);
Expand Down
2 changes: 1 addition & 1 deletion deps/fastlanez
123 changes: 121 additions & 2 deletions fastlanez-sys/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#![allow(incomplete_features)]
#![feature(generic_const_exprs)]
#![feature(maybe_uninit_uninit_array)]
#![feature(maybe_uninit_array_assume_init)]
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
Expand All @@ -12,10 +14,61 @@ use uninit::prelude::VecCapacity;

include!(concat!(env!("OUT_DIR"), "/bindings.rs"));

pub struct Pred<const B: bool>;
pub fn transpose<T: Sized>(input: &[T; 1024], output: &mut [T; 1024]) {
unsafe {
match size_of::<T>() {
1 => fl_transpose_u8(
input.as_ptr() as *const [u8; 1024],
output.as_ptr() as *mut [u8; 1024],
),
2 => fl_transpose_u16(
input.as_ptr() as *const [u16; 1024],
output.as_ptr() as *mut [u16; 1024],
),
4 => fl_transpose_u32(
input.as_ptr() as *const [u32; 1024],
output.as_ptr() as *mut [u32; 1024],
),
8 => fl_transpose_u64(
input.as_ptr() as *const [u64; 1024],
output.as_ptr() as *mut [u64; 1024],
),
_ => unreachable!(),
}
}
}

pub trait Satisfied {}
pub fn untranspose<T: Sized>(input: &[T; 1024], output: &mut Vec<T>) {
unsafe {
match size_of::<T>() {
1 => fl_untranspose_u8(
input.as_ptr() as *const [u8; 1024],
array_mut_ref![output.reserve_uninit(1024), 0, 1024]
as *mut [std::mem::MaybeUninit<T>; 1024] as *mut [u8; 1024],
),
2 => fl_untranspose_u16(
input.as_ptr() as *const [u16; 1024],
array_mut_ref![output.reserve_uninit(1024), 0, 1024]
as *mut [std::mem::MaybeUninit<T>; 1024] as *mut [u16; 1024],
),
4 => fl_untranspose_u32(
input.as_ptr() as *const [u32; 1024],
array_mut_ref![output.reserve_uninit(1024), 0, 1024]
as *mut [std::mem::MaybeUninit<T>; 1024] as *mut [u32; 1024],
),
8 => fl_untranspose_u64(
input.as_ptr() as *const [u64; 1024],
array_mut_ref![output.reserve_uninit(1024), 0, 1024]
as *mut [std::mem::MaybeUninit<T>; 1024] as *mut [u64; 1024],
),
_ => unreachable!(),
}
output.set_len(output.len() + input.len());
}
}

pub struct Pred<const B: bool>;
pub trait Satisfied {}
impl Satisfied for Pred<true> {}

/// BitPack into a compile-time known bit-width.
Expand Down Expand Up @@ -147,3 +200,69 @@ bitpack_impl!(u8, 8);
bitpack_impl!(u16, 16);
bitpack_impl!(u32, 32);
bitpack_impl!(u64, 64);

pub trait Delta
where
Self: Sized + Copy + Default,
{
/// input is assumed to already be in the transposed layout
/// call transpose() to convert from the original layout
fn encode_transposed(
input: &[Self; 1024],
base: &mut [Self; 128 / size_of::<Self>()],
output: &mut Vec<Self>,
);

/// output is still in the transposed layout
/// call untranspose() to put it back in the original layout
fn decode_transposed(
input: &[Self; 1024],
base: &mut [Self; 128 / size_of::<Self>()],
output: &mut [Self; 1024],
);

fn lanes() -> usize {
// fastlanez processes 1024 bits (128 bytes) at a time
128 / std::mem::size_of::<Self>()
}
}

macro_rules! delta_impl {
($T:ty) => {
paste::item! {
impl Delta for $T {
fn encode_transposed(
input: &[Self; 1024],
base: &mut [Self; 128 / size_of::<Self>()],
output: &mut Vec<Self>,
) {
unsafe {
[<fl_delta_encode_ $T>](
input,
base,
array_mut_ref![output.reserve_uninit(1024), 0, 1024] as *mut [std::mem::MaybeUninit<Self>; 1024] as *mut [Self; 1024],
);
output.set_len(output.len() + 1024);
}
}

fn decode_transposed(
input: &[Self; 1024],
base: &mut [Self; 128 / size_of::<Self>()],
output: &mut [Self; 1024],
) {
unsafe { [<fl_delta_decode_ $T>](input, base, output); }
}
}
}
};
}

delta_impl!(i8);
delta_impl!(i16);
delta_impl!(i32);
delta_impl!(i64);
delta_impl!(u8);
delta_impl!(u16);
delta_impl!(u32);
delta_impl!(u64);
15 changes: 9 additions & 6 deletions pyvortex/src/array.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use paste::paste;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;

use vortex::array::bool::{BoolArray, BoolEncoding};
Expand All @@ -14,7 +13,9 @@ use vortex::array::varbinview::{VarBinViewArray, VarBinViewEncoding};
use vortex::array::{Array, ArrayKind, ArrayRef, EncodingRef};
use vortex_alp::{ALPArray, ALPEncoding};
use vortex_dict::{DictArray, DictEncoding};
use vortex_fastlanes::{BitPackedArray, BitPackedEncoding, FoRArray, FoREncoding};
use vortex_fastlanes::{
BitPackedArray, BitPackedEncoding, DeltaArray, DeltaEncoding, FoRArray, FoREncoding,
};
use vortex_ree::{REEArray, REEEncoding};
use vortex_roaring::{RoaringBoolArray, RoaringBoolEncoding, RoaringIntArray, RoaringIntEncoding};
use vortex_zigzag::{ZigZagArray, ZigZagEncoding};
Expand Down Expand Up @@ -66,6 +67,7 @@ pyarray!(VarBinViewEncoding, VarBinViewArray, "VarBinViewArray");
pyarray!(ALPEncoding, ALPArray, "ALPArray");
pyarray!(BitPackedEncoding, BitPackedArray, "BitPackedArray");
pyarray!(FoREncoding, FoRArray, "FoRArray");
pyarray!(DeltaEncoding, DeltaArray, "DeltaArray");
pyarray!(DictEncoding, DictArray, "DictArray");
pyarray!(REEEncoding, REEArray, "REEArray");
pyarray!(RoaringBoolEncoding, RoaringBoolArray, "RoaringBoolArray");
Expand Down Expand Up @@ -120,6 +122,10 @@ impl PyArray {
PyALPArray::wrap(py, inner.into_any().downcast::<ALPArray>().unwrap())?
.extract(py)
}
DeltaEncoding::ID => {
PyDeltaArray::wrap(py, inner.into_any().downcast::<DeltaArray>().unwrap())?
.extract(py)
}
DictEncoding::ID => {
PyDictArray::wrap(py, inner.into_any().downcast::<DictArray>().unwrap())?
.extract(py)
Expand Down Expand Up @@ -151,10 +157,7 @@ impl PyArray {
PyZigZagArray::wrap(py, inner.into_any().downcast::<ZigZagArray>().unwrap())?
.extract(py)
}
_ => Err(PyValueError::new_err(format!(
"Cannot convert {:?} to enc array",
inner
))),
_ => Py::new(py, Self { inner }),
},
}
}
Expand Down
7 changes: 3 additions & 4 deletions pyvortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,16 @@ fn _lib(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<PyChunkedArray>()?;
m.add_class::<PyCompositeArray>()?;
m.add_class::<PyConstantArray>()?;
m.add_class::<PyDeltaArray>()?;
m.add_class::<PyFoRArray>()?;
m.add_class::<PyPrimitiveArray>()?;
m.add_class::<PyREEArray>()?;
m.add_class::<PyRoaringBoolArray>()?;
m.add_class::<PyRoaringIntArray>()?;
m.add_class::<PySparseArray>()?;
m.add_class::<PyStructArray>()?;
m.add_class::<PyVarBinArray>()?;
m.add_class::<PyVarBinViewArray>()?;

m.add_class::<PyRoaringBoolArray>()?;
m.add_class::<PyRoaringIntArray>()?;

m.add_class::<PyZigZagArray>()?;

m.add_class::<PyDType>()?;
Expand Down
5 changes: 3 additions & 2 deletions pyvortex/test/test_compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ def test_roaring_bool_encode():
assert rarr.nbytes < a.nbytes


def test_roaring_int_encode():
def test_arange_encode():
a = vortex.encode(pa.array(np.arange(10_000), type=pa.uint32()))
compressed = vortex.compress(a)
assert compressed.encoding == "roaring.int"
assert isinstance(compressed, vortex.DeltaArray) or isinstance(compressed, vortex.RoaringIntArray)
assert compressed.nbytes < a.nbytes


def test_zigzag_encode():
Expand Down
1 change: 0 additions & 1 deletion requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# pre: false
# features: []
# all-features: false
# with-sources: false

-e file:pyvortex
-e file:.
Expand Down
1 change: 0 additions & 1 deletion requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# pre: false
# features: []
# all-features: false
# with-sources: false

-e file:pyvortex
-e file:.
20 changes: 20 additions & 0 deletions vortex-array/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,26 @@ impl PType {
pub const fn bit_width(&self) -> usize {
self.byte_width() * 8
}

pub fn to_signed(self) -> PType {
match self {
PType::U8 => PType::I8,
PType::U16 => PType::I16,
PType::U32 => PType::I32,
PType::U64 => PType::I64,
_ => self,
}
}

pub fn to_unsigned(self) -> PType {
match self {
PType::I8 => PType::U8,
PType::I16 => PType::U16,
PType::I32 => PType::U32,
PType::I64 => PType::U64,
_ => self,
}
}
}

impl Display for PType {
Expand Down
Loading

0 comments on commit a6a8b92

Please sign in to comment.