Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an ArrayBuffer that declares alignment #1720

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions encodings/bytebool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ use vortex_array::stats::StatsSet;
use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable};
use vortex_array::variants::{BoolArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical};
use vortex_array::{
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_error::{VortexExpect as _, VortexResult};
Expand Down Expand Up @@ -47,7 +49,7 @@ impl ByteBoolArray {
Arc::new(ByteBoolMetadata {
validity: validity.to_metadata(length)?,
}),
Some(buffer),
Some(ArrayBuffer::new::<u8>(buffer)),
validity.into_array().into_iter().collect::<Vec<_>>().into(),
StatsSet::default(),
)?
Expand All @@ -70,7 +72,7 @@ impl ByteBoolArray {
Self::try_new(buffer, validity)
}

pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("ByteBoolArray is missing the underlying buffer")
Expand Down
8 changes: 5 additions & 3 deletions encodings/bytebool/src/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,11 @@ impl FillForwardFn<ByteBoolArray> for ByteBoolEncoding {
}
// all valid, but we need to convert to non-nullable
if validity.all_valid() {
return Ok(
ByteBoolArray::try_new(array.buffer().clone(), Validity::AllValid)?.into_array(),
);
return Ok(ByteBoolArray::try_new(
array.buffer().clone().into_inner(),
Validity::AllValid,
)?
.into_array());
}
// all invalid => fill with default value (false)
if validity.all_invalid() {
Expand Down
8 changes: 6 additions & 2 deletions encodings/fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@ use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, Validi
use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayDType, ArrayData, ArrayLen, ArrayTrait, Canonical,
IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::{DType, NativePType, PType};
use vortex_error::{vortex_bail, vortex_err, VortexExpect as _, VortexResult};

use crate::FASTLANES_ALIGNMENT;

mod compress;
mod compute;

Expand Down Expand Up @@ -87,6 +90,7 @@ impl BitPackedArray {
packed.len()
));
}
let packed = ArrayBuffer::new_with_alignment(packed, FASTLANES_ALIGNMENT);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, I expect this will fail. I can fix this up in the PR to add ScalarBuffer by specifying a runtime alignment.


let metadata = BitPackedMetadata {
validity: validity.to_metadata(length)?,
Expand Down Expand Up @@ -120,7 +124,7 @@ impl BitPackedArray {
}

#[inline]
pub fn packed(&self) -> &Buffer {
pub fn packed(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("BitPackedArray must contain packed buffer")
Expand Down
4 changes: 4 additions & 0 deletions encodings/fastlanes/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ pub use r#for::*;
mod bitpacking;
mod delta;
mod r#for;

/// FastLanes is built around the idea of 1024-bit virtual SIMD registers, therefore we enforce
/// an alignment of 128 bytes.
pub(crate) const FASTLANES_ALIGNMENT: usize = 128;
9 changes: 6 additions & 3 deletions encodings/roaring/src/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ use vortex_array::validity::{LogicalValidity, ValidityVTable};
use vortex_array::variants::{BoolArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::{DType, Nullability};
Expand Down Expand Up @@ -55,7 +56,9 @@ impl RoaringBoolArray {
DType::Bool(Nullability::NonNullable),
length,
Arc::new(RoaringBoolMetadata),
Some(Buffer::from(bitmap.serialize::<Native>())),
Some(ArrayBuffer::new::<u8>(Buffer::from(
bitmap.serialize::<Native>(),
))),
vec![].into(),
stats,
)?
Expand All @@ -75,7 +78,7 @@ impl RoaringBoolArray {
}
}

pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in PrimitiveArray")
Expand Down
8 changes: 5 additions & 3 deletions encodings/roaring/src/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use vortex_array::validity::{LogicalValidity, Validity, ValidityVTable};
use vortex_array::variants::{PrimitiveArrayTrait, VariantsVTable};
use vortex_array::visitor::{ArrayVisitor, VisitorVTable};
use vortex_array::{
impl_encoding, ArrayDType as _, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
impl_encoding, ArrayBuffer, ArrayDType as _, ArrayData, ArrayLen, ArrayTrait, Canonical,
IntoArrayData, IntoCanonical,
};
use vortex_buffer::Buffer;
use vortex_dtype::Nullability::NonNullable;
Expand Down Expand Up @@ -68,7 +68,9 @@ impl RoaringIntArray {
DType::Primitive(ptype, NonNullable),
length,
Arc::new(RoaringIntMetadata { ptype }),
Some(Buffer::from(bitmap.serialize::<Portable>())),
Some(ArrayBuffer::new::<u8>(Buffer::from(
bitmap.serialize::<Portable>(),
))),
vec![].into(),
stats,
)?
Expand Down
13 changes: 7 additions & 6 deletions vortex-array/src/array/bool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ use crate::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTabl
use crate::variants::{BoolArrayTrait, VariantsVTable};
use crate::visitor::{ArrayVisitor, VisitorVTable};
use crate::{
impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
};

pub mod compute;
Expand All @@ -40,14 +41,14 @@ impl Display for BoolMetadata {

impl BoolArray {
/// Access internal array buffer
pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in BoolArray")
}

/// Convert array into its internal buffer
pub fn into_buffer(self) -> Buffer {
pub fn into_buffer(self) -> ArrayBuffer {
self.into_array()
.into_buffer()
.vortex_expect("BoolArray must have a buffer")
Expand All @@ -56,7 +57,7 @@ impl BoolArray {
/// Get array values as an arrow [BooleanBuffer]
pub fn boolean_buffer(&self) -> BooleanBuffer {
BooleanBuffer::new(
self.buffer().clone().into_arrow(),
self.buffer().clone().into_inner().into_arrow(),
self.metadata().first_byte_bit_offset as usize,
self.len(),
)
Expand All @@ -71,7 +72,7 @@ impl BoolArray {
pub fn into_boolean_builder(self) -> (BooleanBufferBuilder, usize) {
let first_byte_bit_offset = self.metadata().first_byte_bit_offset as usize;
let len = self.len();
let arrow_buffer = self.into_buffer().into_arrow();
let arrow_buffer = self.into_buffer().into_inner().into_arrow();
let mutable_buf = if arrow_buffer.ptr_offset() == 0 {
arrow_buffer.into_mutable().unwrap_or_else(|b| {
let mut buf = MutableBuffer::with_capacity(b.len());
Expand Down Expand Up @@ -127,7 +128,7 @@ impl BoolArray {
validity: validity.to_metadata(buffer_len)?,
first_byte_bit_offset,
}),
Some(Buffer::from(inner)),
Some(ArrayBuffer::new::<u8>(Buffer::from(inner))),
validity.into_array().into_iter().collect(),
StatsSet::default(),
)?
Expand Down
10 changes: 6 additions & 4 deletions vortex-array/src/array/primitive/compute/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ impl CastFn<PrimitiveArray> for PrimitiveEncoding {

// If the bit width is the same, we can short-circuit and simply update the validity
if array.ptype() == new_ptype {
return Ok(
PrimitiveArray::new(array.buffer().clone(), array.ptype(), new_validity)
.into_array(),
);
return Ok(PrimitiveArray::new(
array.buffer().clone().into_inner(),
array.ptype(),
new_validity,
)
.into_array());
}

// Otherwise, we need to cast the values one-by-one
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/primitive/compute/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ impl FillForwardFn<PrimitiveArray> for PrimitiveEncoding {
let validity = array.logical_validity();
if validity.all_valid() {
return Ok(PrimitiveArray::new(
array.buffer().clone(),
array.buffer().clone().into_inner(),
array.ptype(),
Validity::AllValid,
)
Expand Down
24 changes: 14 additions & 10 deletions vortex-array/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata
use crate::variants::{PrimitiveArrayTrait, VariantsVTable};
use crate::visitor::{ArrayVisitor, VisitorVTable};
use crate::{
impl_encoding, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData, IntoCanonical,
impl_encoding, ArrayBuffer, ArrayData, ArrayLen, ArrayTrait, Canonical, IntoArrayData,
IntoCanonical,
};

mod compute;
Expand Down Expand Up @@ -60,7 +61,7 @@ impl PrimitiveArray {
.to_metadata(length)
.vortex_expect("Invalid validity"),
}),
Some(buffer),
Some(ArrayBuffer::new_with_alignment(buffer, ptype.byte_width())),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should constructor just accept ArrayBuffer directly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should accept a ScalarBuffer, and then the PType can be extracted. For now it's a bit annoying to take an ArrayBuffer and then make sure alignment matches when I'm about to change it anyway

validity.into_array().into_iter().collect_vec().into(),
StatsSet::default(),
)
Expand Down Expand Up @@ -101,7 +102,7 @@ impl PrimitiveArray {
})
}

pub fn buffer(&self) -> &Buffer {
pub fn buffer(&self) -> &ArrayBuffer {
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in PrimitiveArray")
Expand Down Expand Up @@ -132,11 +133,14 @@ impl PrimitiveArray {
T::PTYPE,
self.ptype(),
);
self.into_buffer().into_vec::<T>().unwrap_or_else(|b| {
let (prefix, values, suffix) = unsafe { b.as_ref().align_to::<T>() };
assert!(prefix.is_empty() && suffix.is_empty());
Vec::from(values)
})
self.into_buffer()
.into_inner()
.into_vec::<T>()
.unwrap_or_else(|b| {
let (prefix, values, suffix) = unsafe { b.as_ref().align_to::<T>() };
assert!(prefix.is_empty() && suffix.is_empty());
Vec::from(values)
})
}

pub fn get_as_cast<T: NativePType>(&self, idx: usize) -> T {
Expand All @@ -156,10 +160,10 @@ impl PrimitiveArray {
"can't reinterpret cast between integers of two different widths"
);

PrimitiveArray::new(self.buffer().clone(), ptype, self.validity())
PrimitiveArray::new(self.buffer().clone().into_inner(), ptype, self.validity())
}

pub fn into_buffer(self) -> Buffer {
pub fn into_buffer(self) -> ArrayBuffer {
self.into_array()
.into_buffer()
.vortex_expect("PrimitiveArray must have a buffer")
Expand Down
8 changes: 4 additions & 4 deletions vortex-array/src/array/varbin/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ pub(crate) fn varbin_to_arrow(varbin_array: &VarBinArray) -> VortexResult<ArrayR
PType::I32 => Arc::new(unsafe {
BinaryArray::new_unchecked(
as_offset_buffer::<i32>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
PType::I64 => Arc::new(unsafe {
LargeBinaryArray::new_unchecked(
as_offset_buffer::<i64>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
Expand All @@ -64,14 +64,14 @@ pub(crate) fn varbin_to_arrow(varbin_array: &VarBinArray) -> VortexResult<ArrayR
PType::I32 => Arc::new(unsafe {
StringArray::new_unchecked(
as_offset_buffer::<i32>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
PType::I64 => Arc::new(unsafe {
LargeStringArray::new_unchecked(
as_offset_buffer::<i64>(offsets),
data.clone().into_arrow(),
data.clone().into_inner().into_arrow(),
nulls,
)
}),
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/varbin/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ impl VarBinArray {
let start = self.offset_at(index);
let end = self.offset_at(index + 1);
let sliced = slice(self.bytes(), start, end)?;
Ok(sliced.into_primitive()?.buffer().clone())
Ok(sliced.into_primitive()?.buffer().clone().into_inner())
}

/// Consumes self, returning a tuple containing the `DType`, the `bytes` array,
Expand Down
20 changes: 16 additions & 4 deletions vortex-array/src/array/varbinview/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,14 @@ impl TakeFn<VarBinViewArray> for VarBinViewEncoding {
let validity = array.validity().take(indices)?;

// Convert our views array into an Arrow u128 ScalarBuffer (16 bytes per view)
let views_buffer =
ScalarBuffer::<u128>::from(array.views().into_primitive()?.into_buffer().into_arrow());
let views_buffer = ScalarBuffer::<u128>::from(
array
.views()
.into_primitive()?
.into_buffer()
.into_inner()
.into_arrow(),
);

let indices = indices.clone().into_primitive()?;

Expand Down Expand Up @@ -97,8 +103,14 @@ impl TakeFn<VarBinViewArray> for VarBinViewEncoding {
let validity = array.validity().take(indices)?;

// Convert our views array into an Arrow u128 ScalarBuffer (16 bytes per view)
let views_buffer =
ScalarBuffer::<u128>::from(array.views().into_primitive()?.into_buffer().into_arrow());
let views_buffer = ScalarBuffer::<u128>::from(
array
.views()
.into_primitive()?
.into_buffer()
.into_inner()
.into_arrow(),
);

let indices = indices.clone().into_primitive()?;

Expand Down
8 changes: 4 additions & 4 deletions vortex-array/src/array/varbinview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ impl VarBinViewArray {
/// only require hitting the prefixes or inline strings.
pub fn binary_views(&self) -> VortexResult<Views> {
Ok(Views {
inner: self.views().into_primitive()?.into_buffer(),
inner: self.views().into_primitive()?.into_buffer().into_inner(),
idx: 0,
len: self.len(),
})
Expand Down Expand Up @@ -567,21 +567,21 @@ pub(crate) fn varbinview_as_arrow(var_bin_view: &VarBinViewArray) -> ArrayRef {

let data = data
.iter()
.map(|p| p.buffer().clone().into_arrow())
.map(|p| p.buffer().clone().into_inner().into_arrow())
.collect::<Vec<_>>();

// Switch on Arrow DType.
match var_bin_view.dtype() {
DType::Binary(_) => Arc::new(unsafe {
BinaryViewArray::new_unchecked(
ScalarBuffer::<u128>::from(views.buffer().clone().into_arrow()),
ScalarBuffer::<u128>::from(views.buffer().clone().into_inner().into_arrow()),
data,
nulls,
)
}),
DType::Utf8(_) => Arc::new(unsafe {
StringViewArray::new_unchecked(
ScalarBuffer::<u128>::from(views.buffer().clone().into_arrow()),
ScalarBuffer::<u128>::from(views.buffer().clone().into_inner().into_arrow()),
data,
nulls,
)
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/arrow/wrappers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub fn as_scalar_buffer<T: NativePType + ArrowNativeType>(
array: PrimitiveArray,
) -> ScalarBuffer<T> {
assert_eq!(array.ptype(), T::PTYPE);
ScalarBuffer::from(array.buffer().clone().into_arrow())
ScalarBuffer::from(array.buffer().clone().into_inner().into_arrow())
}

pub fn as_offset_buffer<T: NativePType + ArrowNativeType>(
Expand Down
Loading
Loading