Skip to content

Commit

Permalink
Implement Flatten for DictArray
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 committed Mar 26, 2024
1 parent 2d598aa commit eb32ea0
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 1 deletion.
10 changes: 10 additions & 0 deletions vortex-array/src/compute/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ pub fn flatten(array: &dyn Array) -> VortexResult<FlattenedArray> {
})
}

pub fn flatten_varbin(array: &dyn Array) -> VortexResult<VarBinArray> {
if let FlattenedArray::VarBin(vb) = flatten(array)? {
Ok(vb)
} else {
Err(VortexError::InvalidArgument(
format!("Cannot flatten array {} into varbin", array).into(),
))
}
}

pub fn flatten_bool(array: &dyn Array) -> VortexResult<BoolArray> {
if let FlattenedArray::Bool(b) = flatten(array)? {
Ok(b)
Expand Down
78 changes: 77 additions & 1 deletion vortex-dict/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
use std::sync::Arc;

use vortex::array::primitive::PrimitiveArray;
use vortex::array::varbin::VarBinArray;
use vortex::compute::flatten::{flatten, flatten_primitive, FlattenFn, FlattenedArray};
use vortex::compute::scalar_at::{scalar_at, ScalarAtFn};
use vortex::compute::take::take;
use vortex::compute::ArrayCompute;
use vortex::scalar::Scalar;
use vortex_error::VortexResult;
use vortex_error::{VortexError, VortexResult};

use crate::DictArray;

impl ArrayCompute for DictArray {
fn flatten(&self) -> Option<&dyn FlattenFn> {
Some(self)
}

fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
Expand All @@ -17,3 +27,69 @@ impl ScalarAtFn for DictArray {
scalar_at(self.values(), dict_index)
}
}

impl FlattenFn for DictArray {
fn flatten(&self) -> VortexResult<FlattenedArray> {
let codes = flatten_primitive(self.codes())?;
let values = flatten(self.values())?;

match values {
FlattenedArray::Primitive(v) => take(&v, &codes).map(|r| {
FlattenedArray::Primitive(
Arc::try_unwrap(r.into_any().downcast::<PrimitiveArray>().unwrap())
.expect("Expected take on PrimitiveArray array to produce new array"),
)
}),
FlattenedArray::VarBin(vb) => take(&vb, &codes).map(|r| {
FlattenedArray::VarBin(
Arc::try_unwrap(r.into_any().downcast::<VarBinArray>().unwrap())
.expect("Expected take on VarBin array to produce new array"),
)
}),
_ => Err(VortexError::InvalidArgument(
"Only VarBin and Primitive values array are supported".into(),
)),
}
}
}

#[cfg(test)]
mod test {
use vortex::array::downcast::DowncastArrayBuiltin;
use vortex::array::primitive::PrimitiveArray;
use vortex::array::varbin::VarBinArray;
use vortex::array::Array;
use vortex::compute::flatten::{flatten_primitive, flatten_varbin};
use vortex_schema::{DType, Nullability};

use crate::{dict_encode_typed_primitive, dict_encode_varbin, DictArray};

#[test]
fn flatten_nullable_primitive() {
let reference =
PrimitiveArray::from_iter(vec![Some(42), Some(-9), None, Some(42), None, Some(-9)]);
let (codes, values) = dict_encode_typed_primitive::<i32>(&reference);
let dict = DictArray::new(codes.to_array(), values.to_array());
let flattened_dict = flatten_primitive(&dict).unwrap();
assert_eq!(flattened_dict.buffer(), reference.buffer());
}

#[test]
fn flatten_nullable_varbin() {
let reference = VarBinArray::from_iter(
vec![Some("a"), Some("b"), None, Some("a"), None, Some("b")],
DType::Utf8(Nullability::Nullable),
);
let (codes, values) = dict_encode_varbin(&reference);
let dict = DictArray::new(codes.to_array(), values.to_array());
let flattened_dict = flatten_varbin(&dict).unwrap();
assert_eq!(
flattened_dict.offsets().as_primitive().buffer(),
reference.offsets().as_primitive().buffer()
);
assert_eq!(
flattened_dict.bytes().as_primitive().buffer(),
reference.bytes().as_primitive().buffer()
);
}
}

0 comments on commit eb32ea0

Please sign in to comment.