diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs new file mode 100644 index 000000000..91fcb8d0e --- /dev/null +++ b/vortex-array/src/builders/list.rs @@ -0,0 +1,182 @@ +use std::any::Any; +use std::sync::Arc; + +use num_traits::{AsPrimitive, PrimInt}; +use vortex_dtype::{DType, NativePType, Nullability}; +use vortex_error::{VortexExpect, VortexResult}; +use vortex_scalar::{ListScalar, Scalar}; + +use crate::array::ListArray; +use crate::builders::{ + builder_with_capacity, ArrayBuilder, ArrayBuilderExt, BoolBuilder, PrimitiveBuilder, +}; +use crate::validity::Validity; +use crate::{ArrayData, IntoArrayData}; + +pub struct ListBuilder { + value_builder: Box, + index_builder: PrimitiveBuilder, + validity: BoolBuilder, + nullability: Nullability, + dtype: DType, +} + +impl ListBuilder +where + O: PrimInt + NativePType, + Scalar: From, + usize: AsPrimitive, +{ + pub fn with_capacity( + value_dtype: Arc, + nullability: Nullability, + capacity: usize, + ) -> Self { + // I would expect the list to have more than one value per index + let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity); + let mut index_builder = PrimitiveBuilder::with_capacity(nullability, capacity); + + // The first index of the list, which is always 0 and represents an empty list. + index_builder.append_zero(); + + Self { + value_builder, + index_builder, + validity: BoolBuilder::with_capacity(Nullability::NonNullable, capacity), + nullability, + dtype: DType::List(value_dtype, nullability), + } + } + + pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> { + if value.is_null() { + self.append_null(); + Ok(()) + } else { + for scalar in value.elements() { + // TODO(joe): This is slow, we should be able to append multiple values at once, + // or the list scalar should hold an ArrayData + self.value_builder.append_scalar(&scalar)?; + } + self.append_index(self.value_builder.len().as_()) + } + } + + fn append_index(&mut self, index: O) -> VortexResult<()> { + self.index_builder.append_scalar(&Scalar::from(index)) + } +} + +impl ArrayBuilder for ListBuilder +where + O: PrimInt + NativePType, + Scalar: From, + usize: AsPrimitive, +{ + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn dtype(&self) -> &DType { + &self.dtype + } + + fn len(&self) -> usize { + self.validity.len() + } + + fn append_zeros(&mut self, n: usize) { + let count = self.value_builder.len(); + self.value_builder.append_zeros(n); + for i in 0..n { + self.append_index((count + i + 1).as_()) + .vortex_expect("Failed to append index"); + } + self.validity.append_values(true, n); + } + + fn append_nulls(&mut self, n: usize) { + let count = self.value_builder.len(); + for _ in 0..n { + // A list with a null element is can be a list with a zero-span offset and a validity + // bit set + self.append_index(count.as_()) + .vortex_expect("Failed to append index"); + } + self.validity.append_values(false, n); + } + + fn finish(&mut self) -> VortexResult { + let validity = match self.nullability { + Nullability::NonNullable => Validity::NonNullable, + Nullability::Nullable => Validity::Array(self.validity.finish()?), + }; + + ListArray::try_new( + self.value_builder.finish()?, + self.index_builder.finish()?, + validity, + ) + .map(ListArray::into_array) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use vortex_dtype::{DType, Nullability, PType}; + use vortex_scalar::Scalar; + + use crate::builders::list::ListBuilder; + use crate::builders::ArrayBuilder; + use crate::IntoArrayVariant; + + #[test] + fn test_empty() { + let mut builder = ListBuilder::::with_capacity( + Arc::new(PType::I32.into()), + Nullability::NonNullable, + 0, + ); + + let list = builder.finish().unwrap(); + assert_eq!(list.len(), 0); + } + + #[test] + fn test_values() { + let dtype: Arc = Arc::new(PType::I32.into()); + let mut builder = + ListBuilder::::with_capacity(dtype.clone(), Nullability::NonNullable, 0); + + builder + .append_value( + Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into(), 3i32.into()]).as_list(), + ) + .unwrap(); + + builder + .append_value(Scalar::empty(dtype.clone()).as_list()) + .unwrap(); + + builder + .append_value( + Scalar::list(dtype, vec![4i32.into(), 5i32.into(), 6i32.into()]).as_list(), + ) + .unwrap(); + + let list = builder.finish().unwrap(); + assert_eq!(list.len(), 3); + + let list_array = list.into_list().unwrap(); + + assert_eq!(list_array.elements_at(0).unwrap().len(), 3); + assert!(list_array.elements_at(1).unwrap().is_empty()); + assert_eq!(list_array.elements_at(2).unwrap().len(), 3); + } +} diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs index e27f85aa7..fe6172c4b 100644 --- a/vortex-array/src/builders/mod.rs +++ b/vortex-array/src/builders/mod.rs @@ -1,6 +1,7 @@ mod binary; mod bool; mod extension; +mod list; mod null; mod primitive; mod struct_; @@ -17,9 +18,11 @@ pub use utf8::*; use vortex_dtype::{match_each_native_ptype, DType}; use vortex_error::{vortex_bail, vortex_err, VortexResult}; use vortex_scalar::{ - BinaryScalar, BoolScalar, ExtScalar, PrimitiveScalar, Scalar, StructScalar, Utf8Scalar, + BinaryScalar, BoolScalar, ExtScalar, ListScalar, PrimitiveScalar, Scalar, StructScalar, + Utf8Scalar, }; +use crate::builders::list::ListBuilder; use crate::builders::struct_::StructBuilder; use crate::ArrayData; @@ -71,9 +74,11 @@ pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box { - todo!() - } + DType::List(dtype, n) => Box::new(ListBuilder::::with_capacity( + dtype.clone(), + *n, + capacity, + )), DType::Extension(ext_dtype) => { Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity)) } @@ -127,7 +132,11 @@ pub trait ArrayBuilderExt: ArrayBuilder { .downcast_mut::() .ok_or_else(|| vortex_err!("Cannot append struct scalar to non-struct builder"))? .append_value(StructScalar::try_from(scalar)?)?, - DType::List(..) => {} + DType::List(..) => self + .as_any_mut() + .downcast_mut::>() + .ok_or_else(|| vortex_err!("Cannot append list scalar to non-list builder"))? + .append_value(ListScalar::try_from(scalar)?)?, DType::Extension(..) => self .as_any_mut() .downcast_mut::() diff --git a/vortex-dtype/src/ptype.rs b/vortex-dtype/src/ptype.rs index 0b406b6ad..d1ebf2789 100644 --- a/vortex-dtype/src/ptype.rs +++ b/vortex-dtype/src/ptype.rs @@ -60,6 +60,7 @@ pub trait NativePType: + FromPrimitive + ToBytes + TryFromBytes + + 'static { /// The PType that corresponds to this native type const PTYPE: PType; diff --git a/vortex-scalar/src/list.rs b/vortex-scalar/src/list.rs index 925cd0011..bdc1fd021 100644 --- a/vortex-scalar/src/list.rs +++ b/vortex-scalar/src/list.rs @@ -2,7 +2,7 @@ use std::ops::Deref; use std::sync::Arc; use vortex_dtype::DType; -use vortex_dtype::Nullability::NonNullable; +use vortex_dtype::Nullability::{NonNullable, Nullable}; use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexResult}; use crate::value::{InnerScalarValue, ScalarValue}; @@ -89,6 +89,13 @@ impl Scalar { )), } } + + pub fn empty(element_dtype: Arc) -> Self { + Self { + dtype: DType::List(element_dtype, Nullable), + value: ScalarValue(InnerScalarValue::Null), + } + } } impl<'a> TryFrom<&'a Scalar> for ListScalar<'a> {