Skip to content

Commit

Permalink
implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
jdcasale committed Apr 22, 2024
1 parent 0725639 commit be5b5d0
Show file tree
Hide file tree
Showing 11 changed files with 148 additions and 117 deletions.
4 changes: 2 additions & 2 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use parquet::arrow::ProjectionMask;
use simplelog::{ColorChoice, Config, TermLogger, TerminalMode};
use vortex::array::chunked::ChunkedArray;
use vortex_roaring::RoaringIntEncoding;
use vortex_roaring::RoaringBoolEncoding;
use vortex::arrow::FromArrowType;
use vortex::compress::{CompressConfig, CompressCtx};
use vortex::encoding::{EncodingRef, VORTEX_ENCODINGS};
use vortex::{IntoArray, OwnedArray, ToArrayData};
use vortex_dict::DictEncoding;
use vortex_fastlanes::{BitPackedEncoding, FoREncoding};
use vortex_ree::REEEncoding;
use vortex_roaring::RoaringBoolEncoding;
use vortex_roaring::RoaringIntEncoding;
use vortex_schema::DType;

use crate::data_downloads::FileType;
Expand Down
5 changes: 4 additions & 1 deletion pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ use vortex_fastlanes::{
FoREncoding, OwnedBitPackedArray, OwnedDeltaArray, OwnedFoRArray,
};
use vortex_ree::{OwnedREEArray, REEArray, REEEncoding, REE};
use vortex_roaring::{RoaringBoolArray, RoaringBoolEncoding, RoaringIntArray, RoaringIntEncoding};
use vortex_roaring::{
OwnedRoaringBoolArray, OwnedRoaringIntArray, RoaringBool, RoaringBoolArray,
RoaringBoolEncoding, RoaringInt, RoaringIntArray, RoaringIntEncoding,
};

use crate::dtype::PyDType;
use crate::error::PyVortexError;
Expand Down
3 changes: 2 additions & 1 deletion vortex-array/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ use arrow_array::types::*;
use arrow_buffer::ArrowNativeType;
use half::f16;
use num_traits::{Num, NumCast};
use serde::{Deserialize, Serialize};
use vortex_error::{vortex_err, VortexError, VortexResult};
use vortex_schema::DType::*;
use vortex_schema::{DType, FloatWidth, IntWidth};

use crate::scalar::{PScalar, Scalar};

#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash, Serialize, Deserialize)]
pub enum PType {
U8,
U16,
Expand Down
15 changes: 7 additions & 8 deletions vortex-roaring/src/boolean/compress.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use croaring::Bitmap;
use vortex::{Array, ArrayDef, ArrayDType, IntoArray};
// use vortex::array::bool::{BoolArray, BoolEncoding};
use vortex::array::primitive::PrimitiveArray;
use vortex::array::bool::BoolArray;
use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression};
use vortex::{Array, ArrayDType, ArrayDef, IntoArray, ToStatic};
use vortex_error::VortexResult;
use vortex_schema::DType;
use vortex_schema::Nullability::NonNullable;

use crate::boolean::{RoaringBoolArray};
use crate::boolean::RoaringBoolArray;
use crate::{RoaringBool, RoaringBoolEncoding};

impl EncodingCompression for RoaringBoolEncoding {
Expand Down Expand Up @@ -39,15 +38,15 @@ impl EncodingCompression for RoaringBoolEncoding {
_like: Option<&Array>,
_ctx: CompressCtx,
) -> VortexResult<Array<'static>> {
Ok(roaring_encode(array.clone().flatten_primitive()?).into_array())
roaring_encode(array.clone().flatten_bool()?).map(move |a| a.into_array().to_static())
}
}

pub fn roaring_encode(bool_array: PrimitiveArray) -> RoaringBoolArray {
pub fn roaring_encode(bool_array: BoolArray) -> VortexResult<RoaringBoolArray> {
let mut bitmap = Bitmap::new();
bitmap.extend(
bool_array
.buffer()
.boolean_buffer()
.iter()
.enumerate()
.filter(|(_, b)| *b)
Expand All @@ -56,5 +55,5 @@ pub fn roaring_encode(bool_array: PrimitiveArray) -> RoaringBoolArray {
bitmap.run_optimize();
bitmap.shrink_to_fit();

RoaringBoolArray::new(bitmap, bool_array.buffer().len())
RoaringBoolArray::try_new(bitmap, bool_array.buffer().len())
}
8 changes: 4 additions & 4 deletions vortex-roaring/src/boolean/compute.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use croaring::Bitmap;
use vortex::{IntoArray, OwnedArray};
use vortex::compute::scalar_at::ScalarAtFn;
use vortex::compute::slice::SliceFn;
use vortex::compute::ArrayCompute;
use vortex::scalar::{Scalar};
use vortex_error::{VortexResult};
use vortex::scalar::Scalar;
use vortex::{IntoArray, OwnedArray};
use vortex_error::VortexResult;

use crate::RoaringBoolArray;

Expand Down Expand Up @@ -58,6 +58,6 @@ impl SliceFn for RoaringBoolArray<'_> {
let slice_bitmap = Bitmap::from_range(start as u32..stop as u32);
let bitmap = self.bitmap().and(&slice_bitmap).add_offset(-(start as i64));

Ok(RoaringBoolArray::new(bitmap, stop - start).into_array())
RoaringBoolArray::try_new(bitmap, stop - start).map(|a| a.into_array())
}
}
86 changes: 41 additions & 45 deletions vortex-roaring/src/boolean/mod.rs
Original file line number Diff line number Diff line change
@@ -1,48 +1,56 @@
use std::sync::{RwLock};

use compress::roaring_encode;
use croaring::Bitmap;
use croaring::{Bitmap, Portable};
use serde::{Deserialize, Serialize};
use vortex::encoding::{ArrayEncodingRef};
use vortex::stats::{ArrayStatistics, ArrayStatisticsCompute};
use vortex::array::bool::{Bool, BoolArray};
use vortex::buffer::Buffer;
use vortex::stats::ArrayStatisticsCompute;
use vortex::validity::{ArrayValidity, LogicalValidity};
use vortex::{impl_encoding, ArrayFlatten, ArrayDType, ToArrayData, OwnedArray};
// use vortex::array::bool::BoolArray;
use vortex::array::primitive::{Primitive, PrimitiveArray};
use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex::{impl_encoding, ArrayFlatten, OwnedArray};
use vortex_error::{vortex_err, VortexResult};
use vortex_schema::Nullability::NonNullable;

mod compress;
mod compute;


impl_encoding!("vortex.roaring_bool", RoaringBool);

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RoaringBoolMetadata {
bitmap: Bitmap,
length: usize,
stats: Arc<RwLock<StatsSet>>,
}

impl RoaringBoolArray<'_> {
pub fn new(bitmap: Bitmap, length: usize) -> Self {
Self {
bitmap,
length,
stats: Arc::new(RwLock::new(StatsSet::new())),
}
pub fn try_new(bitmap: Bitmap, _length: usize) -> VortexResult<Self> {
Ok(Self {
typed: TypedArray::try_from_parts(
DType::Bool(NonNullable),
RoaringBoolMetadata {
// TODO(@jdcasale): wtf -- why is _length wrong? why do we need the plus 1?
length: (bitmap.statistics().cardinality + 1) as usize,
},
Some(Buffer::Owned(bitmap.serialize::<Portable>().into())),
vec![].into(),
HashMap::default(),
)?,
})
}

pub fn bitmap(&self) -> &Bitmap {
&self.metadata().bitmap
pub fn bitmap(&self) -> Bitmap {
//TODO(@jdcasale): figure out a way to avoid this deserialization per-call
Bitmap::deserialize::<Portable>(
self.array()
.buffer()
.expect("RoaringBoolArray buffer is missing")
.as_slice(),
)
}

pub fn encode(array: Array) -> VortexResult<OwnedArray> {
if array.encoding().id() == Primitive::ID {
Ok(roaring_encode(PrimitiveArray::try_from(array)?).into_array())
pub fn encode(array: Array<'static>) -> VortexResult<OwnedArray> {
if array.encoding().id() == Bool::ID {
roaring_encode(BoolArray::try_from(array)?).map(|a| a.into_array())
} else {
Err(vortex_err!("RoaringInt can only encode primitive arrays"))
Err(vortex_err!("RoaringInt can only encode boolean arrays"))
}
}
}
Expand All @@ -52,15 +60,9 @@ impl AcceptArrayVisitor for RoaringBoolArray<'_> {
}
}

impl ToArrayData for RoaringBoolArray<'_> {
fn to_array_data(&self) -> ArrayData {
todo!()
}
}

impl ArrayTrait for RoaringBoolArray<'_> {
fn len(&self) -> usize {
todo!()
self.metadata().length
}
}

Expand All @@ -76,16 +78,10 @@ impl ArrayValidity for RoaringBoolArray<'_> {
}
}

// impl ArrayDisplay for RoaringBoolArray {
// fn fmt(&self, f: &mut ArrayFormatter) -> std::fmt::Result {
// f.property("bitmap", format!("{:?}", self.bitmap()))
// }
// }

impl ArrayFlatten for RoaringBoolArray<'_> {
fn flatten<'a>(self) -> VortexResult<Flattened<'a>>
where
Self: 'a,
where
Self: 'a,
{
todo!()
// decompress(self).map(Flattened::Primitive)
Expand All @@ -94,29 +90,29 @@ impl ArrayFlatten for RoaringBoolArray<'_> {

#[cfg(test)]
mod test {
use vortex::Array;
use vortex::array::bool::BoolArray;
use vortex::compute::scalar_at::scalar_at;
use vortex::scalar::Scalar;
use vortex::IntoArray;
use vortex_error::VortexResult;

use crate::RoaringBoolArray;

#[test]
pub fn iter() -> VortexResult<()> {
let bool: Array = &BoolArray::from(vec![true, false, true, true]);
let array = RoaringBoolArray::encode(bool)?;

let values = array.bitmap().to_vec();
let bool: BoolArray = BoolArray::from(vec![true, false, true, true]);
let array = RoaringBoolArray::encode(bool.into_array())?;
let round_trip = RoaringBoolArray::try_from(array.clone())?;
let values = round_trip.bitmap().to_vec();
assert_eq!(values, vec![0, 2, 3]);

Ok(())
}

#[test]
pub fn test_scalar_at() -> VortexResult<()> {
let bool: &dyn Array = &BoolArray::from(vec![true, false, true, true]);
let array = RoaringBoolArray::encode(bool)?;
let bool: BoolArray = BoolArray::from(vec![true, false, true, true]);
let array = RoaringBoolArray::encode(bool.into_array())?;

let truthy: Scalar = true.into();
let falsy: Scalar = false.into();
Expand Down
42 changes: 24 additions & 18 deletions vortex-roaring/src/integer/compress.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
use croaring::Bitmap;
use log::debug;
use num_traits::NumCast;
use vortex::{Array, ArrayDef, ArrayDType, IntoArray};
use vortex::array::primitive::{PrimitiveArray};
use vortex::array::primitive::PrimitiveArray;
use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression};
use vortex::ptype::{NativePType, PType};
use vortex::stats::{ArrayStatistics, Stat};
use vortex::{Array, ArrayDType, ArrayDef, IntoArray, OwnedArray, ToStatic};
use vortex_error::VortexResult;
use vortex_schema::DType;
use vortex_schema::Nullability::NonNullable;
use vortex_schema::Signedness::Unsigned;

use crate::{RoaringInt, RoaringIntArray, RoaringIntEncoding, RoaringIntMetadata};
use crate::{RoaringInt, RoaringIntArray, RoaringIntEncoding};

impl EncodingCompression for RoaringIntEncoding {
fn can_compress(
&self,
array: & Array,
array: &Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
// Only support primitive enc arrays
Expand All @@ -34,13 +34,18 @@ impl EncodingCompression for RoaringIntEncoding {
if !array
.statistics()
.compute_as(Stat::IsStrictSorted)
.map(|s| s.unwrap_or(false))
.unwrap_or(false)
{
debug!("Skipping roaring int, not strict sorted");
return None;
}

if array.statistics().compute_as(Stat::Max).map(|s| s > u32::MAX as usize).unwrap_or(false) {
if array
.statistics()
.compute_as(Stat::Max)
.map(|s: usize| s > u32::MAX as usize)
.unwrap_or(false)
{
debug!("Skipping roaring int, max is larger than {}", u32::MAX);
return None;
}
Expand All @@ -51,25 +56,26 @@ impl EncodingCompression for RoaringIntEncoding {

fn compress(
&self,
array: & Array,
_like: Option<& Array>,
array: &Array,
_like: Option<&Array>,
_ctx: CompressCtx,
) -> VortexResult<Array<'static>> {
Ok(roaring_encode(array.clone().flatten_primitive()?).into_array())
) -> VortexResult<OwnedArray> {
let parray = array.clone().flatten_primitive()?;
Ok(roaring_encode(parray).into_array().to_static())
}
}

pub fn roaring_encode(primitive_array: PrimitiveArray) -> RoaringIntArray {
match primitive_array.ptype() {
PType::U8 => roaring_encode_primitive::<u8>(primitive_array.buffer().typed_data()),
PType::U16 => roaring_encode_primitive::<u16>(primitive_array.buffer().typed_data()),
PType::U32 => roaring_encode_primitive::<u32>(primitive_array.buffer().typed_data()),
PType::U64 => roaring_encode_primitive::<u64>(primitive_array.buffer().typed_data()),
_ => panic!("Unsupported ptype {}", primitive_array.ptype()),
pub fn roaring_encode(parray: PrimitiveArray) -> RoaringIntArray {
match parray.ptype() {
PType::U8 => roaring_encode_primitive::<u8>(parray.typed_data()),
PType::U16 => roaring_encode_primitive::<u16>(parray.typed_data()),
PType::U32 => roaring_encode_primitive::<u32>(parray.typed_data()),
PType::U64 => roaring_encode_primitive::<u64>(parray.typed_data()),
_ => panic!("Unsupported ptype {}", parray.ptype()),
}
}

fn roaring_encode_primitive<T: NumCast + NativePType>(values: &[T]) -> RoaringIntArray {
fn roaring_encode_primitive<T: NumCast + NativePType>(values: &[T]) -> RoaringIntArray<'static> {
let mut bitmap = Bitmap::new();
bitmap.extend(values.iter().map(|i| i.to_u32().unwrap()));
bitmap.run_optimize();
Expand Down
4 changes: 2 additions & 2 deletions vortex-roaring/src/integer/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ impl ArrayCompute for RoaringIntArray<'_> {
impl ScalarAtFn for RoaringIntArray<'_> {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
// Unwrap since we know the index is valid
let bitmap_value = self.bitmap.select(index as u32).unwrap();
let scalar: Scalar = match self.ptype {
let bitmap_value = self.bitmap().select(index as u32).unwrap();
let scalar: Scalar = match self.metadata().ptype {
PType::U8 => (bitmap_value as u8).into(),
PType::U16 => (bitmap_value as u16).into(),
PType::U32 => bitmap_value.into(),
Expand Down
Loading

0 comments on commit be5b5d0

Please sign in to comment.