diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index 86576a4992..b8f65db568 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -1,16 +1,14 @@ use paste::paste; use pyo3::prelude::*; -use vortex::array::bool::{Bool, BoolArray, BoolEncoding}; -use vortex::array::chunked::{Chunked, ChunkedArray, ChunkedEncoding}; -use vortex::array::constant::{Constant, ConstantArray, ConstantEncoding}; -use vortex::array::primitive::{Primitive, PrimitiveArray, PrimitiveEncoding}; -use vortex::array::sparse::{Sparse, SparseArray, SparseEncoding}; -use vortex::array::struct_::{Struct, StructArray, StructEncoding}; -use vortex::array::varbin::{VarBin, VarBinArray, VarBinEncoding}; -use vortex::array::varbinview::{VarBinView, VarBinViewArray, VarBinViewEncoding}; +use vortex::array::{ + Bool, BoolArray, BoolEncoding, Chunked, ChunkedArray, ChunkedEncoding, Constant, ConstantArray, + ConstantEncoding, Primitive, PrimitiveArray, PrimitiveEncoding, Sparse, SparseArray, + SparseEncoding, Struct, StructArray, StructEncoding, VarBin, VarBinArray, VarBinEncoding, + VarBinView, VarBinViewArray, VarBinViewEncoding, +}; use vortex::compute::take; use vortex::encoding::EncodingRef; -use vortex::{Array, ArrayDType, ArrayData, ArrayDef, IntoArray, IntoArrayData, ToArray}; +use vortex::{Array, ArrayDType, ArrayData, ArrayDef, ToArray}; use vortex_alp::{ALPArray, ALPEncoding, ALP}; use vortex_dict::{Dict, DictArray, DictEncoding}; use vortex_fastlanes::{ @@ -79,100 +77,97 @@ pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray"); impl PyArray { pub fn wrap(py: Python<'_>, inner: ArrayData) -> PyResult> { + let encoding_id = inner.encoding().id(); + let array = Array::from(inner); // This is the one place where we'd want to have owned kind enum but there's no other place this is used - match inner.encoding().id() { + match encoding_id { Bool::ID => PyBoolArray::wrap( py, - BoolArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BoolArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Chunked::ID => PyChunkedArray::wrap( py, - ChunkedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ChunkedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Constant::ID => PyConstantArray::wrap( py, - ConstantArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ConstantArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Primitive::ID => PyPrimitiveArray::wrap( py, - PrimitiveArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + PrimitiveArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Sparse::ID => PySparseArray::wrap( py, - SparseArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + SparseArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Struct::ID => PyStructArray::wrap( py, - StructArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + StructArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), VarBin::ID => PyVarBinArray::wrap( py, - VarBinArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + VarBinArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), VarBinView::ID => PyVarBinViewArray::wrap( py, - VarBinViewArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + VarBinViewArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Dict::ID => PyDictArray::wrap( py, - DictArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + DictArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), RunEnd::ID => PyRunEndArray::wrap( py, - RunEndArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + RunEndArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Delta::ID => PyDeltaArray::wrap( py, - DeltaArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + DeltaArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), FoR::ID => PyFoRArray::wrap( py, - FoRArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + FoRArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), BitPacked::ID => PyBitPackedArray::wrap( py, - BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), ALP::ID => PyALPArray::wrap( py, - ALPArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ALPArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), RoaringBool::ID => PyBitPackedArray::wrap( py, - BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), RoaringInt::ID => PyBitPackedArray::wrap( py, - BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), ZigZag::ID => PyZigZagArray::wrap( py, - ZigZagArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ZigZagArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), - _ => Py::new( - py, - Self { - inner: inner.into_array(), - }, - ), + _ => Py::new(py, Self { inner: array }), // ArrayKind::Other(other) => match other.encoding().id() { // // PyEnc chooses to expose certain encodings as first-class objects. // // For the remainder, we should have a generic EncArray implementation that supports basic functions. @@ -236,7 +231,7 @@ impl PyArray { fn take(&self, indices: PyRef<'_, Self>) -> PyResult> { take(&self.inner, indices.unwrap()) .map_err(PyVortexError::map_err) - .and_then(|arr| Self::wrap(indices.py(), arr.into_array_data())) + .and_then(|arr| Self::wrap(indices.py(), arr.into())) } } // diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index 911f0ac380..564cabce06 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -5,9 +5,9 @@ use arrow::pyarrow::FromPyArrow; use arrow::record_batch::RecordBatchReader; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use vortex::array::chunked::ChunkedArray; +use vortex::array::ChunkedArray; use vortex::arrow::{FromArrowArray, FromArrowType}; -use vortex::{ArrayData, IntoArray, IntoArrayData, ToArrayData}; +use vortex::Array; use vortex_dtype::DType; use crate::array::PyArray; @@ -25,8 +25,8 @@ pub fn encode(obj: &Bound) -> PyResult> { if obj.is_instance(&pa_array)? { let arrow_array = ArrowArrayData::from_pyarrow_bound(obj).map(make_array)?; - let enc_array = ArrayData::from_arrow(arrow_array, false); - PyArray::wrap(obj.py(), enc_array) + let enc_array = Array::from_arrow(arrow_array, false); + PyArray::wrap(obj.py(), enc_array.into()) } else if obj.is_instance(&chunked_array)? { let chunks: Vec> = obj.getattr("chunks")?.extract()?; let encoded_chunks = chunks @@ -34,7 +34,7 @@ pub fn encode(obj: &Bound) -> PyResult> { .map(|a| { ArrowArrayData::from_pyarrow_bound(a) .map(make_array) - .map(|a| ArrayData::from_arrow(a, false).into_array()) + .map(|a| Array::from_arrow(a, false)) }) .collect::>>()?; let dtype: DType = obj @@ -45,23 +45,20 @@ pub fn encode(obj: &Bound) -> PyResult> { obj.py(), ChunkedArray::try_new(encoded_chunks, dtype) .map_err(PyVortexError::map_err)? - .into_array_data(), + .into(), ) } else if obj.is_instance(&table)? { let array_stream = ArrowArrayStreamReader::from_pyarrow_bound(obj)?; let dtype = DType::from_arrow(array_stream.schema()); let chunks = array_stream .into_iter() - .map(|b| { - b.map(|bb| bb.to_array_data().into_array()) - .map_err(map_arrow_err) - }) + .map(|b| b.map(Array::from).map_err(map_arrow_err)) .collect::>>()?; PyArray::wrap( obj.py(), ChunkedArray::try_new(chunks, dtype) .map_err(PyVortexError::map_err)? - .into_array_data(), + .into(), ) } else { Err(PyValueError::new_err("Cannot convert object to enc array")) diff --git a/pyvortex/src/vortex_arrow.rs b/pyvortex/src/vortex_arrow.rs index cdb1d05a74..d74f049631 100644 --- a/pyvortex/src/vortex_arrow.rs +++ b/pyvortex/src/vortex_arrow.rs @@ -4,7 +4,7 @@ use arrow::pyarrow::ToPyArrow; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::{IntoPyDict, PyList}; -use vortex::array::chunked::ChunkedArray; +use vortex::array::ChunkedArray; use vortex::{Array, IntoCanonical}; pub fn map_arrow_err(error: ArrowError) -> PyErr {