Skip to content

Commit

Permalink
VarBin builder
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn committed Mar 26, 2024
1 parent f7abad4 commit 9f98535
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 3 deletions.
2 changes: 1 addition & 1 deletion bench-vortex/src/taxi_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub fn write_taxi_data() -> PathBuf {

// FIXME(ngates): the compressor should handle batch size.
let reader = builder
.with_limit(100)
// .with_limit(100)
// .with_projection(_mask)
.with_batch_size(65_536)
.build()
Expand Down
7 changes: 7 additions & 0 deletions pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::dtype::PyDType;
use crate::error::PyVortexError;
use crate::vortex_arrow;
use std::sync::Arc;
use vortex::compute::take::take;

#[pyclass(name = "Array", module = "vortex", sequence, subclass)]
pub struct PyArray {
Expand Down Expand Up @@ -196,6 +197,12 @@ impl PyArray {
fn dtype(self_: PyRef<Self>) -> PyResult<Py<PyDType>> {
PyDType::wrap(self_.py(), self_.inner.dtype().clone())
}

fn take(&self, indices: PyRef<'_, PyArray>) -> PyResult<Py<PyArray>> {
take(&self.inner, indices.unwrap())
.map_err(PyVortexError::map_err)
.and_then(|arr| PyArray::wrap(indices.py(), arr))
}
}

#[pymethods]
Expand Down
8 changes: 8 additions & 0 deletions pyvortex/test/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ def test_varbin_array_round_trip():
assert arr.to_pyarrow().combine_chunks() == a


def test_varbin_array_take():
a = vortex.encode(pa.array(["a", "b", "c", "d"]))
# TODO(ngates): ensure we correctly round-trip to a string and not large_string
assert a.take(vortex.encode(pa.array([0, 2]))).to_pyarrow().combine_chunks() == pa.array(
["a", "c"], type=pa.large_utf8(),
)


def test_empty_array():
a = pa.array([], type=pa.uint8())
primitive = vortex.encode(a)
Expand Down
40 changes: 39 additions & 1 deletion vortex-array/src/array/varbin/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,46 @@ impl<O: NativePType + PrimInt> VarBinBuilder<O> {
pub fn finish(self, dtype: DType) -> VarBinArray {
let offsets = PrimitiveArray::from(self.offsets);
let data = PrimitiveArray::from(self.data);

// TODO(ngates): create our own ValidityBuilder that doesn't need mut or clone on finish.
let validity = self.validity.finish_cloned().map(Validity::from);
let nulls = self.validity.finish_cloned();

let validity = if dtype.is_nullable() {
Some(
nulls
.map(Validity::from)
.unwrap_or_else(|| Validity::Valid(offsets.len() - 1)),
)
} else {
assert!(nulls.is_none(), "dtype and validity mismatch");
None
};

VarBinArray::new(offsets.into_array(), data.into_array(), dtype, validity)
}
}

#[cfg(test)]
mod test {
use crate::array::varbin::builder::VarBinBuilder;
use crate::array::Array;
use crate::compute::scalar_at::scalar_at;
use crate::scalar::Scalar;
use crate::validity::ArrayValidity;
use vortex_schema::DType;
use vortex_schema::Nullability::Nullable;

#[test]
fn test_builder() {
let mut builder = VarBinBuilder::<i32>::with_capacity(0);
builder.push(Some(b"hello"));
builder.push(None);
builder.push(Some(b"world"));
let array = builder.finish(DType::Utf8(Nullable));

assert_eq!(array.len(), 3);
assert_eq!(array.nullability(), Nullable);
assert_eq!(scalar_at(&array, 0).unwrap(), Scalar::from("hello"));
assert!(scalar_at(&array, 1).unwrap().is_null());
}
}
2 changes: 1 addition & 1 deletion vortex-array/src/array/varbin/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn take<I: NativePType + PrimInt, O: NativePType + PrimInt>(
for &idx in indices {
let idx = idx.to_usize().unwrap();
let start = offsets[idx].to_usize().unwrap();
let stop = offsets[idx].to_usize().unwrap();
let stop = offsets[idx + 1].to_usize().unwrap();
builder.push(Some(&data[start..stop]));
}
builder.finish(dtype)
Expand Down

0 comments on commit 9f98535

Please sign in to comment.