Skip to content

Commit

Permalink
Added a list builder (#1711)
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-isaacs authored Dec 18, 2024
1 parent 46ec5a5 commit 55396cb
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 6 deletions.
182 changes: 182 additions & 0 deletions vortex-array/src/builders/list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
use std::any::Any;
use std::sync::Arc;

use num_traits::{AsPrimitive, PrimInt};
use vortex_dtype::{DType, NativePType, Nullability};
use vortex_error::{VortexExpect, VortexResult};
use vortex_scalar::{ListScalar, Scalar};

use crate::array::ListArray;
use crate::builders::{
builder_with_capacity, ArrayBuilder, ArrayBuilderExt, BoolBuilder, PrimitiveBuilder,
};
use crate::validity::Validity;
use crate::{ArrayData, IntoArrayData};

pub struct ListBuilder<O: PrimInt + NativePType> {
value_builder: Box<dyn ArrayBuilder>,
index_builder: PrimitiveBuilder<O>,
validity: BoolBuilder,
nullability: Nullability,
dtype: DType,
}

impl<O> ListBuilder<O>
where
O: PrimInt + NativePType,
Scalar: From<O>,
usize: AsPrimitive<O>,
{
pub fn with_capacity(
value_dtype: Arc<DType>,
nullability: Nullability,
capacity: usize,
) -> Self {
// I would expect the list to have more than one value per index
let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity);
let mut index_builder = PrimitiveBuilder::with_capacity(nullability, capacity);

// The first index of the list, which is always 0 and represents an empty list.
index_builder.append_zero();

Self {
value_builder,
index_builder,
validity: BoolBuilder::with_capacity(Nullability::NonNullable, capacity),
nullability,
dtype: DType::List(value_dtype, nullability),
}
}

pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
if value.is_null() {
self.append_null();
Ok(())
} else {
for scalar in value.elements() {
// TODO(joe): This is slow, we should be able to append multiple values at once,
// or the list scalar should hold an ArrayData
self.value_builder.append_scalar(&scalar)?;
}
self.append_index(self.value_builder.len().as_())
}
}

fn append_index(&mut self, index: O) -> VortexResult<()> {
self.index_builder.append_scalar(&Scalar::from(index))
}
}

impl<O> ArrayBuilder for ListBuilder<O>
where
O: PrimInt + NativePType,
Scalar: From<O>,
usize: AsPrimitive<O>,
{
fn as_any(&self) -> &dyn Any {
self
}

fn as_any_mut(&mut self) -> &mut dyn Any {
self
}

fn dtype(&self) -> &DType {
&self.dtype
}

fn len(&self) -> usize {
self.validity.len()
}

fn append_zeros(&mut self, n: usize) {
let count = self.value_builder.len();
self.value_builder.append_zeros(n);
for i in 0..n {
self.append_index((count + i + 1).as_())
.vortex_expect("Failed to append index");
}
self.validity.append_values(true, n);
}

fn append_nulls(&mut self, n: usize) {
let count = self.value_builder.len();
for _ in 0..n {
// A list with a null element is can be a list with a zero-span offset and a validity
// bit set
self.append_index(count.as_())
.vortex_expect("Failed to append index");
}
self.validity.append_values(false, n);
}

fn finish(&mut self) -> VortexResult<ArrayData> {
let validity = match self.nullability {
Nullability::NonNullable => Validity::NonNullable,
Nullability::Nullable => Validity::Array(self.validity.finish()?),
};

ListArray::try_new(
self.value_builder.finish()?,
self.index_builder.finish()?,
validity,
)
.map(ListArray::into_array)
}
}

#[cfg(test)]
mod tests {
use std::sync::Arc;

use vortex_dtype::{DType, Nullability, PType};
use vortex_scalar::Scalar;

use crate::builders::list::ListBuilder;
use crate::builders::ArrayBuilder;
use crate::IntoArrayVariant;

#[test]
fn test_empty() {
let mut builder = ListBuilder::<u32>::with_capacity(
Arc::new(PType::I32.into()),
Nullability::NonNullable,
0,
);

let list = builder.finish().unwrap();
assert_eq!(list.len(), 0);
}

#[test]
fn test_values() {
let dtype: Arc<DType> = Arc::new(PType::I32.into());
let mut builder =
ListBuilder::<u32>::with_capacity(dtype.clone(), Nullability::NonNullable, 0);

builder
.append_value(
Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()]).as_list(),
)
.unwrap();

builder
.append_value(Scalar::empty(dtype.clone()).as_list())
.unwrap();

builder
.append_value(
Scalar::list(dtype, vec![4i32.into(), 5i32.into(), 6i32.into()]).as_list(),
)
.unwrap();

let list = builder.finish().unwrap();
assert_eq!(list.len(), 3);

let list_array = list.into_list().unwrap();

assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
assert!(list_array.elements_at(1).unwrap().is_empty());
assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
}
}
19 changes: 14 additions & 5 deletions vortex-array/src/builders/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod binary;
mod bool;
mod extension;
mod list;
mod null;
mod primitive;
mod struct_;
Expand All @@ -17,9 +18,11 @@ pub use utf8::*;
use vortex_dtype::{match_each_native_ptype, DType};
use vortex_error::{vortex_bail, vortex_err, VortexResult};
use vortex_scalar::{
BinaryScalar, BoolScalar, ExtScalar, PrimitiveScalar, Scalar, StructScalar, Utf8Scalar,
BinaryScalar, BoolScalar, ExtScalar, ListScalar, PrimitiveScalar, Scalar, StructScalar,
Utf8Scalar,
};

use crate::builders::list::ListBuilder;
use crate::builders::struct_::StructBuilder;
use crate::ArrayData;

Expand Down Expand Up @@ -71,9 +74,11 @@ pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box<dyn ArrayBui
*n,
capacity,
)),
DType::List(..) => {
todo!()
}
DType::List(dtype, n) => Box::new(ListBuilder::<u64>::with_capacity(
dtype.clone(),
*n,
capacity,
)),
DType::Extension(ext_dtype) => {
Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity))
}
Expand Down Expand Up @@ -127,7 +132,11 @@ pub trait ArrayBuilderExt: ArrayBuilder {
.downcast_mut::<StructBuilder>()
.ok_or_else(|| vortex_err!("Cannot append struct scalar to non-struct builder"))?
.append_value(StructScalar::try_from(scalar)?)?,
DType::List(..) => {}
DType::List(..) => self
.as_any_mut()
.downcast_mut::<ListBuilder<u64>>()
.ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))?
.append_value(ListScalar::try_from(scalar)?)?,
DType::Extension(..) => self
.as_any_mut()
.downcast_mut::<ExtensionBuilder>()
Expand Down
1 change: 1 addition & 0 deletions vortex-dtype/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ pub trait NativePType:
+ FromPrimitive
+ ToBytes
+ TryFromBytes
+ 'static
{
/// The PType that corresponds to this native type
const PTYPE: PType;
Expand Down
9 changes: 8 additions & 1 deletion vortex-scalar/src/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::ops::Deref;
use std::sync::Arc;

use vortex_dtype::DType;
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::Nullability::{NonNullable, Nullable};
use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexResult};

use crate::value::{InnerScalarValue, ScalarValue};
Expand Down Expand Up @@ -89,6 +89,13 @@ impl Scalar {
)),
}
}

pub fn empty(element_dtype: Arc<DType>) -> Self {
Self {
dtype: DType::List(element_dtype, Nullable),
value: ScalarValue(InnerScalarValue::Null),
}
}
}

impl<'a> TryFrom<&'a Scalar> for ListScalar<'a> {
Expand Down

0 comments on commit 55396cb

Please sign in to comment.