Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an ArrayBuffer that declares alignment #1720

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions encodings/bytebool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ use vortex_array::stats::StatsSet;
use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable};
use vortex_array::variants::{BoolArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical};
use vortex_array::{
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_error::{VortexExpect as _, VortexResult};
Expand Down Expand Up @@ -47,7 +49,7 @@ impl ByteBoolArray {
Arc::new(ByteBoolMetadata {
validity: validity.to_metadata(length)?,
}),
Some(buffer),
Some(ArrayBuffer::new::<u8>(buffer)),
validity.into_array().into_iter().collect::<Vec<_>>().into(),
StatsSet::default(),
)?
Expand All @@ -70,7 +72,7 @@ impl ByteBoolArray {
Self::try_new(buffer, validity)
}

pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("ByteBoolArray is missing the underlying buffer")
Expand Down
8 changes: 5 additions & 3 deletions encodings/bytebool/src/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,11 @@ impl FillForwardFn<ByteBoolArray> for ByteBoolEncoding {
}
// all valid, but we need to convert to non-nullable
if validity.all_valid() {
return Ok(
ByteBoolArray::try_new(array.buffer().clone(), Validity::AllValid)?.into_array(),
);
return Ok(ByteBoolArray::try_new(
array.buffer().clone().into_inner(),
Validity::AllValid,
)?
.into_array());
}
// all invalid => fill with default value (false)
if validity.all_invalid() {
Expand Down
2 changes: 1 addition & 1 deletion encodings/fastlanes/src/bitpacking/compute/scalar_at.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ mod test {
#[test]
fn invalid_patches() {
let packed_array = BitPackedArray::try_new(
Buffer::from(vec![0u8; 128]),
Buffer::from(vec![0u32; 32]),
PType::U32,
Validity::AllInvalid,
Some(Patches::new(
Expand Down
10 changes: 8 additions & 2 deletions encodings/fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, Validi
use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical,
IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::{DType, NativePType, PType};
Expand Down Expand Up @@ -88,6 +89,11 @@ impl BitPackedArray {
));
}

// TODO(ngates): enforce 128 byte alignment once we have a ScalarBufferBuilder that can
// enforce custom alignments.
// let packed = ArrayBuffer::new_with_alignment(packed, FASTLANES_ALIGNMENT);
let packed = ArrayBuffer::new_with_alignment(packed, ptype.byte_width());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

new_with_alignment (and new) are weird names for a thing called ArrayBuffer, since I kind of expect them to allocate? a la Vec::new


let metadata = BitPackedMetadata {
validity: validity.to_metadata(length)?,
offset,
Expand Down Expand Up @@ -120,7 +126,7 @@ impl BitPackedArray {
}

#[inline]
pub fn packed(&self) -> &Buffer {
pub fn packed(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("BitPackedArray must contain packed buffer")
Expand Down
9 changes: 6 additions & 3 deletions encodings/roaring/src/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ use vortex_array::validity::{LogicalValidity, ValidityVTable};
use vortex_array::variants::{BoolArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::{DType, Nullability};
Expand Down Expand Up @@ -55,7 +56,9 @@ impl RoaringBoolArray {
DType::Bool(Nullability::NonNullable),
length,
Arc::new(RoaringBoolMetadata),
Some(Buffer::from(bitmap.serialize::<Native>())),
Some(ArrayBuffer::new::<u8>(Buffer::from(
bitmap.serialize::<Native>(),
))),
vec![].into(),
stats,
)?
Expand All @@ -75,7 +78,7 @@ impl RoaringBoolArray {
}
}

pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in PrimitiveArray")
Expand Down
8 changes: 5 additions & 3 deletions encodings/roaring/src/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use vortex_array::validity::{LogicalValidity, Validity, ValidityVTable};
use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayDType as _, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
impl_encoding, ArrayBuffer, ArrayDType as _, ArrayData, ArrayLen, ArrayTrait, Canonical,
IntoArrayData, IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::Nullability::NonNullable;
Expand Down Expand Up @@ -68,7 +68,9 @@ impl RoaringIntArray {
DType::Primitive(ptype, NonNullable),
length,
Arc::new(RoaringIntMetadata { ptype }),
Some(Buffer::from(bitmap.serialize::<Portable>())),
Some(ArrayBuffer::new::<u8>(Buffer::from(
bitmap.serialize::<Portable>(),
))),
vec![].into(),
stats,
)?
Expand Down
4 changes: 2 additions & 2 deletions pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,10 @@ impl PyArray {
/// >>> print(arr.tree_display())
/// root: vortex.primitive(0x03)(i64?, len=4) nbytes=36 B (100.00%)
/// metadata: PrimitiveMetadata { validity: Array }
/// buffer: 32 B
/// buffer (align=8): 32 B
/// validity: vortex.bool(0x02)(bool, len=4) nbytes=3 B (8.33%)
/// metadata: BoolMetadata { validity: NonNullable, first_byte_bit_offset: 0 }
/// buffer: 1 B
/// buffer (align=1): 1 B
/// <BLANKLINE>
///
/// Compressed arrays often have more complex, deeply nested encoding trees.
Expand Down
13 changes: 7 additions & 6 deletions vortex-array/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ use crate::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTabl
use crate::variants::{BoolArrayTrait, VariantsVTable};
use crate::visitor::{ArrayVisitor, VisitorVTable};
use crate::{
impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
};

pub mod compute;
Expand All @@ -40,14 +41,14 @@ impl Display for BoolMetadata {

impl BoolArray {
/// Access internal array buffer
pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in BoolArray")
}

/// Convert array into its internal buffer
pub fn into_buffer(self) -> Buffer {
pub fn into_buffer(self) -> ArrayBuffer {
self.into_array()
.into_buffer()
.vortex_expect("BoolArray must have a buffer")
Expand All @@ -56,7 +57,7 @@ impl BoolArray {
/// Get array values as an arrow [BooleanBuffer]
pub fn boolean_buffer(&self) -> BooleanBuffer {
BooleanBuffer::new(
self.buffer().clone().into_arrow(),
self.buffer().clone().into_inner().into_arrow(),
self.metadata().first_byte_bit_offset as usize,
self.len(),
)
Expand All @@ -71,7 +72,7 @@ impl BoolArray {
pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
let first_byte_bit_offset = self.metadata().first_byte_bit_offset as usize;
let len = self.len();
let arrow_buffer = self.into_buffer().into_arrow();
let arrow_buffer = self.into_buffer().into_inner().into_arrow();
let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
arrow_buffer.into_mutable().unwrap_or_else(|b| {
let mut buf = MutableBuffer::with_capacity(b.len());
Expand Down Expand Up @@ -127,7 +128,7 @@ impl BoolArray {
validity: validity.to_metadata(buffer_len)?,
first_byte_bit_offset,
}),
Some(Buffer::from(inner)),
Some(ArrayBuffer::new::<u8>(Buffer::from(inner))),
validity.into_array().into_iter().collect(),
StatsSet::default(),
)?
Expand Down
10 changes: 6 additions & 4 deletions vortex-array/src/array/primitive/compute/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ impl CastFn<PrimitiveArray> for PrimitiveEncoding {

// If the bit width is the same, we can short-circuit and simply update the validity
if array.ptype() == new_ptype {
return Ok(
PrimitiveArray::new(array.buffer().clone(), array.ptype(), new_validity)
.into_array(),
);
return Ok(PrimitiveArray::new(
array.buffer().clone().into_inner(),
array.ptype(),
new_validity,
)
.into_array());
}

// Otherwise, we need to cast the values one-by-one
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/primitive/compute/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ impl FillForwardFn<PrimitiveArray> for PrimitiveEncoding {
let validity = array.logical_validity();
if validity.all_valid() {
return Ok(PrimitiveArray::new(
array.buffer().clone(),
array.buffer().clone().into_inner(),
array.ptype(),
Validity::AllValid,
)
Expand Down
24 changes: 14 additions & 10 deletions vortex-array/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata
use crate::variants::{PrimitiveArrayTrait, VariantsVTable};
use crate::visitor::{ArrayVisitor, VisitorVTable};
use crate::{
impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
};

mod compute;
Expand Down Expand Up @@ -60,7 +61,7 @@ impl PrimitiveArray {
.to_metadata(length)
.vortex_expect("Invalid validity"),
}),
Some(buffer),
Some(ArrayBuffer::new_with_alignment(buffer, ptype.byte_width())),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should constructor just accept ArrayBuffer directly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should accept a ScalarBuffer, and then the PType can be extracted. For now it's a bit annoying to take an ArrayBuffer and then make sure alignment matches when I'm about to change it anyway

validity.into_array().into_iter().collect_vec().into(),
StatsSet::default(),
)
Expand Down Expand Up @@ -101,7 +102,7 @@ impl PrimitiveArray {
})
}

pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in PrimitiveArray")
Expand Down Expand Up @@ -132,11 +133,14 @@ impl PrimitiveArray {
T::PTYPE,
self.ptype(),
);
self.into_buffer().into_vec::<T>().unwrap_or_else(|b| {
let (prefix, values, suffix) = unsafe { b.as_ref().align_to::<T>() };
assert!(prefix.is_empty() && suffix.is_empty());
Vec::from(values)
})
self.into_buffer()
.into_inner()
.into_vec::<T>()
.unwrap_or_else(|b| {
let (prefix, values, suffix) = unsafe { b.as_ref().align_to::<T>() };
assert!(prefix.is_empty() && suffix.is_empty());
Vec::from(values)
})
}

pub fn get_as_cast<T: NativePType>(&self, idx: usize) -> T {
Expand All @@ -156,10 +160,10 @@ impl PrimitiveArray {
"can't reinterpret cast between integers of two different widths"
);

PrimitiveArray::new(self.buffer().clone(), ptype, self.validity())
PrimitiveArray::new(self.buffer().clone().into_inner(), ptype, self.validity())
}

pub fn into_buffer(self) -> Buffer {
pub fn into_buffer(self) -> ArrayBuffer {
self.into_array()
.into_buffer()
.vortex_expect("PrimitiveArray must have a buffer")
Expand Down
8 changes: 4 additions & 4 deletions vortex-array/src/array/varbin/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ pub(crate) fn varbin_to_arrow(varbin_array: &VarBinArray) -> VortexResult<ArrayR
PType::I32 => Arc::new(unsafe {
BinaryArray::new_unchecked(
as_offset_buffer::<i32>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
PType::I64 => Arc::new(unsafe {
LargeBinaryArray::new_unchecked(
as_offset_buffer::<i64>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
Expand All @@ -64,14 +64,14 @@ pub(crate) fn varbin_to_arrow(varbin_array: &VarBinArray) -> VortexResult<ArrayR
PType::I32 => Arc::new(unsafe {
StringArray::new_unchecked(
as_offset_buffer::<i32>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
PType::I64 => Arc::new(unsafe {
LargeStringArray::new_unchecked(
as_offset_buffer::<i64>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/varbin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ impl VarBinArray {
let start = self.offset_at(index);
let end = self.offset_at(index + 1);
let sliced = slice(self.bytes(), start, end)?;
Ok(sliced.into_primitive()?.buffer().clone())
Ok(sliced.into_primitive()?.buffer().clone().into_inner())
}

/// Consumes self, returning a tuple containing the `DType`, the `bytes` array,
Expand Down
20 changes: 16 additions & 4 deletions vortex-array/src/array/varbinview/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,14 @@ impl TakeFn<VarBinViewArray> for VarBinViewEncoding {
let validity = array.validity().take(indices)?;

// Convert our views array into an Arrow u128 ScalarBuffer (16 bytes per view)
let views_buffer =
ScalarBuffer::<u128>::from(array.views().into_primitive()?.into_buffer().into_arrow());
let views_buffer = ScalarBuffer::<u128>::from(
array
.views()
.into_primitive()?
.into_buffer()
.into_inner()
.into_arrow(),
);

let indices = indices.clone().into_primitive()?;

Expand Down Expand Up @@ -97,8 +103,14 @@ impl TakeFn<VarBinViewArray> for VarBinViewEncoding {
let validity = array.validity().take(indices)?;

// Convert our views array into an Arrow u128 ScalarBuffer (16 bytes per view)
let views_buffer =
ScalarBuffer::<u128>::from(array.views().into_primitive()?.into_buffer().into_arrow());
let views_buffer = ScalarBuffer::<u128>::from(
array
.views()
.into_primitive()?
.into_buffer()
.into_inner()
.into_arrow(),
);

let indices = indices.clone().into_primitive()?;

Expand Down
Loading
Loading