Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Array2: Migrate Roaring boolean and int #256

Merged
merged 5 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ members = [
"vortex-flatbuffers",
"vortex-ipc",
"vortex-ree",
#"vortex-roaring",
"vortex-roaring",
"vortex-schema",
#"vortex-zigzag",
]
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workspace = true

[dependencies]
#vortex-alp = { path = "../vortex-alp" }
#vortex-roaring = { path = "../vortex-roaring" }
vortex-roaring = { path = "../vortex-roaring" }
#vortex-zigzag = { path = "../vortex-zigzag" }
arrow = { workspace = true }
arrow-array = { workspace = true }
Expand Down
5 changes: 3 additions & 2 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use vortex_datetime_parts::DateTimePartsEncoding;
use vortex_dict::DictEncoding;
use vortex_fastlanes::{BitPackedEncoding, FoREncoding};
use vortex_ree::REEEncoding;
use vortex_roaring::RoaringBoolEncoding;
use vortex_schema::DType;

use crate::data_downloads::FileType;
Expand Down Expand Up @@ -114,8 +115,8 @@ pub fn enumerate_arrays() -> Vec<EncodingRef> {
&DateTimePartsEncoding,
// &DeltaEncoding, Blows up the search space too much.
&REEEncoding,
//&RoaringBoolEncoding,
// RoaringIntEncoding,
&RoaringBoolEncoding,
// &RoaringIntEncoding,
// Doesn't offer anything more than FoR really
// ZigZagEncoding,
]
Expand Down
2 changes: 1 addition & 1 deletion pyvortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ vortex-dict = { path = "../vortex-dict" }
vortex-error = { path = "../vortex-error" }
vortex-fastlanes = { path = "../vortex-fastlanes" }
vortex-ree = { path = "../vortex-ree" }
#vortex-roaring = { path = "../vortex-roaring" }
vortex-roaring = { path = "../vortex-roaring" }
vortex-schema = { path = "../vortex-schema" }
#vortex-zigzag = { path = "../vortex-zigzag" }
itertools = { workspace = true }
Expand Down
20 changes: 18 additions & 2 deletions pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ use vortex_fastlanes::{
FoREncoding, OwnedBitPackedArray, OwnedDeltaArray, OwnedFoRArray,
};
use vortex_ree::{OwnedREEArray, REEArray, REEEncoding, REE};
use vortex_roaring::{
OwnedRoaringBoolArray, OwnedRoaringIntArray, RoaringBool, RoaringBoolArray,
RoaringBoolEncoding, RoaringInt, RoaringIntArray, RoaringIntEncoding,
};

use crate::dtype::PyDType;
use crate::error::PyVortexError;
Expand Down Expand Up @@ -74,8 +78,8 @@ pyarray!(FoREncoding, FoRArray, "FoRArray");
pyarray!(DeltaEncoding, DeltaArray, "DeltaArray");
pyarray!(DictEncoding, DictArray, "DictArray");
pyarray!(REEEncoding, REEArray, "REEArray");
// pyarray!(RoaringBoolEncoding, RoaringBoolArray, "RoaringBoolArray");
// pyarray!(RoaringIntEncoding, RoaringIntArray, "RoaringIntArray");
pyarray!(RoaringBoolEncoding, RoaringBoolArray, "RoaringBoolArray");
pyarray!(RoaringIntEncoding, RoaringIntArray, "RoaringIntArray");
// pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray");

impl PyArray {
Expand Down Expand Up @@ -162,6 +166,18 @@ impl PyArray {
OwnedALPArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
)?
.extract(py),
RoaringBool::ID => PyBitPackedArray::wrap(
py,
OwnedBitPackedArray::try_from(inner.into_array())
.map_err(PyVortexError::map_err)?,
)?
.extract(py),
RoaringInt::ID => PyBitPackedArray::wrap(
py,
OwnedBitPackedArray::try_from(inner.into_array())
.map_err(PyVortexError::map_err)?,
)?
.extract(py),
_ => Py::new(
py,
Self {
Expand Down
4 changes: 2 additions & 2 deletions pyvortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ fn _lib(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<PyFoRArray>()?;
m.add_class::<PyPrimitiveArray>()?;
m.add_class::<PyREEArray>()?;
// m.add_class::<PyRoaringBoolArray>()?;
// m.add_class::<PyRoaringIntArray>()?;
m.add_class::<PyRoaringBoolArray>()?;
m.add_class::<PyRoaringIntArray>()?;
m.add_class::<PySparseArray>()?;
m.add_class::<PyStructArray>()?;
m.add_class::<PyVarBinArray>()?;
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/bool/compute/as_contiguous.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ use vortex_error::VortexResult;
use crate::array::bool::BoolArray;
use crate::compute::as_contiguous::AsContiguousFn;
use crate::validity::Validity;
use crate::{Array, ArrayDType, IntoArray};
use crate::{Array, ArrayDType, IntoArray, OwnedArray};

impl AsContiguousFn for BoolArray<'_> {
fn as_contiguous(&self, arrays: &[Array]) -> VortexResult<Array<'static>> {
fn as_contiguous(&self, arrays: &[Array]) -> VortexResult<OwnedArray> {
let validity = if self.dtype().is_nullable() {
Validity::from_iter(arrays.iter().map(|a| a.with_dyn(|a| a.logical_validity())))
} else {
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/bool/compute/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ use vortex_schema::Nullability;
use crate::array::bool::BoolArray;
use crate::compute::fill::FillForwardFn;
use crate::validity::ArrayValidity;
use crate::{Array, ArrayDType, IntoArray, ToArrayData};
use crate::{ArrayDType, IntoArray, OwnedArray, ToArrayData};

impl FillForwardFn for BoolArray<'_> {
fn fill_forward(&self) -> VortexResult<Array<'static>> {
fn fill_forward(&self) -> VortexResult<OwnedArray> {
if self.dtype().nullability() == Nullability::NonNullable {
return Ok(self.to_array_data().into_array());
}
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::buffer::Buffer;
use crate::ptype::{NativePType, PType};
use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata};
use crate::visitor::{AcceptArrayVisitor, ArrayVisitor};
use crate::{impl_encoding, ArrayDType};
use crate::{impl_encoding, ArrayDType, OwnedArray};
use crate::{match_each_native_ptype, ArrayFlatten};

mod accessor;
Expand Down Expand Up @@ -129,7 +129,7 @@ impl<T: NativePType> From<Vec<T>> for PrimitiveArray<'_> {
}

impl<T: NativePType> IntoArray<'static> for Vec<T> {
fn into_array(self) -> Array<'static> {
fn into_array(self) -> OwnedArray {
PrimitiveArray::from(self).into_array()
}
}
Expand Down
6 changes: 3 additions & 3 deletions vortex-array/src/compute/as_contiguous.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use itertools::Itertools;
use vortex_error::{vortex_bail, vortex_err, VortexResult};

use crate::{Array, ArrayDType};
use crate::{Array, ArrayDType, OwnedArray};

pub trait AsContiguousFn {
fn as_contiguous(&self, arrays: &[Array]) -> VortexResult<Array<'static>>;
fn as_contiguous(&self, arrays: &[Array]) -> VortexResult<OwnedArray>;
}

pub fn as_contiguous(arrays: &[Array]) -> VortexResult<Array<'static>> {
pub fn as_contiguous(arrays: &[Array]) -> VortexResult<OwnedArray> {
if arrays.is_empty() {
vortex_bail!(ComputeError: "No arrays to concatenate");
}
Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::encoding::EncodingRef;
use crate::scalar::Scalar;
use crate::stats::Stat;
use crate::stats::Statistics;
use crate::{Array, ArrayMetadata, IntoArray, ToArray};
use crate::{Array, ArrayMetadata, IntoArray, OwnedArray, ToArray};

#[derive(Clone, Debug)]
pub struct ArrayData {
Expand Down Expand Up @@ -134,7 +134,7 @@ impl ToArray for ArrayData {
}

impl IntoArray<'static> for ArrayData {
fn into_array(self) -> Array<'static> {
fn into_array(self) -> OwnedArray {
Array::Data(self)
}
}
Expand Down
3 changes: 2 additions & 1 deletion vortex-array/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ use arrow_array::types::*;
use arrow_buffer::ArrowNativeType;
use half::f16;
use num_traits::{Num, NumCast};
use serde::{Deserialize, Serialize};
use vortex_error::{vortex_err, VortexError, VortexResult};
use vortex_schema::DType::*;
use vortex_schema::{DType, FloatWidth, IntWidth};

use crate::scalar::{PScalar, Scalar};

#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Hash, Serialize, Deserialize)]
pub enum PType {
U8,
U16,
Expand Down
2 changes: 2 additions & 0 deletions vortex-roaring/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ linkme = { workspace = true }
croaring = { workspace = true }
num-traits = { workspace = true }
log = { workspace = true }
serde = { workspace = true }
jdcasale marked this conversation as resolved.
Show resolved Hide resolved
paste = { workspace = true }

[lints]
workspace = true
26 changes: 13 additions & 13 deletions vortex-roaring/src/boolean/compress.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
use croaring::Bitmap;
use vortex::array::bool::{BoolArray, BoolEncoding};
use vortex::array::downcast::DowncastArrayBuiltin;
use vortex::array::{Array, ArrayRef};
use vortex::array::bool::BoolArray;
use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression};
use vortex::{Array, ArrayDType, ArrayDef, ArrayTrait, IntoArray, OwnedArray};
use vortex_error::VortexResult;
use vortex_schema::DType;
use vortex_schema::Nullability::NonNullable;

use crate::boolean::{RoaringBoolArray, RoaringBoolEncoding};
use crate::boolean::RoaringBoolArray;
use crate::{OwnedRoaringBoolArray, RoaringBool, RoaringBoolEncoding};

impl EncodingCompression for RoaringBoolEncoding {
fn can_compress(
&self,
array: &dyn Array,
array: &Array,
_config: &CompressConfig,
) -> Option<&dyn EncodingCompression> {
// Only support bool enc arrays
if array.encoding().id() != BoolEncoding::ID {
if array.encoding().id() != RoaringBool::ID {
return None;
}

Expand All @@ -34,19 +34,19 @@ impl EncodingCompression for RoaringBoolEncoding {

fn compress(
&self,
array: &dyn Array,
_like: Option<&dyn Array>,
array: &Array,
_like: Option<&Array>,
_ctx: CompressCtx,
) -> VortexResult<ArrayRef> {
Ok(roaring_encode(array.as_bool()).into_array())
) -> VortexResult<OwnedArray> {
roaring_encode(array.clone().flatten_bool()?).map(move |a| a.into_array())
}
}

pub fn roaring_encode(bool_array: &BoolArray) -> RoaringBoolArray {
pub fn roaring_encode(bool_array: BoolArray) -> VortexResult<OwnedRoaringBoolArray> {
let mut bitmap = Bitmap::new();
bitmap.extend(
bool_array
.buffer()
.boolean_buffer()
.iter()
.enumerate()
.filter(|(_, b)| *b)
Expand All @@ -55,5 +55,5 @@ pub fn roaring_encode(bool_array: &BoolArray) -> RoaringBoolArray {
bitmap.run_optimize();
bitmap.shrink_to_fit();

RoaringBoolArray::new(bitmap, bool_array.buffer().len())
RoaringBoolArray::try_new(bitmap, bool_array.len())
}
Loading