Skip to content

Commit

Permalink
fix(data): map type ID to child index before indexing a union child a…
Browse files Browse the repository at this point in the history
…rray (#4598)

* test: add a test for `MutableArrayData` and dense union

* fix(data): map type ID to child index before indexing union child array
  • Loading branch information
kawadakk authored Jul 31, 2023
1 parent b597a20 commit c663d88
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 4 deletions.
13 changes: 10 additions & 3 deletions arrow-data/src/transform/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ pub(super) fn build_extend_sparse(array: &ArrayData) -> Extend {
pub(super) fn build_extend_dense(array: &ArrayData) -> Extend {
let type_ids = array.buffer::<i8>(0);
let offsets = array.buffer::<i32>(1);
let arrow_schema::DataType::Union(src_fields, _) = array.data_type() else {
unreachable!();
};

Box::new(
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
Expand All @@ -48,14 +51,18 @@ pub(super) fn build_extend_dense(array: &ArrayData) -> Extend {
.extend_from_slice(&type_ids[start..start + len]);

(start..start + len).for_each(|i| {
let type_id = type_ids[i] as usize;
let type_id = type_ids[i];
let child_index = src_fields
.iter()
.position(|(r, _)| r == type_id)
.expect("invalid union type ID");
let src_offset = offsets[i] as usize;
let child_data = &mut mutable.child_data[type_id];
let child_data = &mut mutable.child_data[child_index];
let dst_offset = child_data.len();

// Extend offsets
mutable.buffer2.push(dst_offset as i32);
mutable.child_data[type_id].extend(index, src_offset, src_offset + 1)
mutable.child_data[child_index].extend(index, src_offset, src_offset + 1)
})
},
)
Expand Down
59 changes: 58 additions & 1 deletion arrow/tests/array_transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use arrow::array::{
Array, ArrayRef, BooleanArray, Decimal128Array, DictionaryArray,
FixedSizeBinaryArray, Int16Array, Int32Array, Int64Array, Int64Builder, ListArray,
ListBuilder, MapBuilder, NullArray, StringArray, StringBuilder,
StringDictionaryBuilder, StructArray, UInt8Array,
StringDictionaryBuilder, StructArray, UInt8Array, UnionArray,
};
use arrow::datatypes::Int16Type;
use arrow_buffer::Buffer;
Expand Down Expand Up @@ -488,6 +488,63 @@ fn test_struct_many() {
assert_eq!(array, expected)
}

#[test]
fn test_union_dense() {
// Input data
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Some("joe"),
Some("mark"),
Some("doe"),
]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![
Some(1),
Some(2),
Some(3),
Some(4),
Some(5),
]));
let offsets = Buffer::from_slice_ref([0, 0, 1, 1, 2, 2, 3, 4i32]);
let type_ids = Buffer::from_slice_ref([42, 84, 42, 84, 84, 42, 84, 84i8]);

let array = UnionArray::try_new(
&[84, 42],
type_ids,
Some(offsets),
vec![
(Field::new("int", DataType::Int32, false), ints),
(Field::new("string", DataType::Utf8, false), strings),
],
)
.unwrap()
.into_data();
let arrays = vec![&array];
let mut mutable = MutableArrayData::new(arrays, false, 0);

// Slice it by `MutableArrayData`
mutable.extend(0, 4, 7);
let data = mutable.freeze();
let array = UnionArray::from(data);

// Expected data
let strings: ArrayRef = Arc::new(StringArray::from(vec![Some("doe")]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(3), Some(4)]));
let offsets = Buffer::from_slice_ref([0, 0, 1i32]);
let type_ids = Buffer::from_slice_ref([84, 42, 84i8]);

let expected = UnionArray::try_new(
&[84, 42],
type_ids,
Some(offsets),
vec![
(Field::new("int", DataType::Int32, false), ints),
(Field::new("string", DataType::Utf8, false), strings),
],
)
.unwrap();

assert_eq!(array.to_data(), expected.to_data());
}

#[test]
fn test_binary_fixed_sized_offsets() {
let array = FixedSizeBinaryArray::try_from_iter(
Expand Down

0 comments on commit c663d88

Please sign in to comment.