Skip to content

Commit

Permalink
Refactor specialized conversion traits into From and Into (#560)
Browse files Browse the repository at this point in the history
We have a few traits that define conversion behavior between types that
can be expressed by the usual `From`/`Into` traits, making the overall
API somewhat more idiomatic.
As of now, this PR removes `ToArrayData` and `IntoArrayData`.
  • Loading branch information
AdamGS authored Aug 7, 2024
1 parent a253273 commit 7f57019
Show file tree
Hide file tree
Showing 27 changed files with 196 additions and 205 deletions.
4 changes: 2 additions & 2 deletions bench-vortex/benches/datafusion_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use datafusion::prelude::{col, DataFrame, SessionContext};
use lazy_static::lazy_static;
use vortex::compress::CompressionStrategy;
use vortex::encoding::EncodingRef;
use vortex::{Array, Context, IntoArray, ToArrayData};
use vortex::{Array, Context};
use vortex_datafusion::memory::{VortexMemTable, VortexMemTableOptions};
use vortex_dict::DictEncoding;
use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding};
Expand Down Expand Up @@ -81,7 +81,7 @@ fn toy_dataset_arrow() -> RecordBatch {
}

fn toy_dataset_vortex(compress: bool) -> Array {
let uncompressed = toy_dataset_arrow().to_array_data().into_array();
let uncompressed = toy_dataset_arrow().into();

if !compress {
return uncompressed;
Expand Down
4 changes: 2 additions & 2 deletions bench-vortex/src/data_downloads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use tokio::runtime::Runtime;
use vortex::array::ChunkedArray;
use vortex::arrow::FromArrowType;
use vortex::{IntoArray, ToArrayData};
use vortex::{Array, IntoArray};
use vortex_dtype::DType;
use vortex_error::{VortexError, VortexResult};
use vortex_serde::io::TokioAdapter;
Expand Down Expand Up @@ -46,7 +46,7 @@ pub fn data_vortex_uncompressed(fname_out: &str, downloaded_data: PathBuf) -> Pa
let array = ChunkedArray::try_new(
reader
.into_iter()
.map(|batch_result| batch_result.unwrap().to_array_data().into_array())
.map(|batch_result| Array::from(batch_result.unwrap()))
.collect(),
dtype,
)
Expand Down
10 changes: 5 additions & 5 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use vortex::array::ChunkedArray;
use vortex::arrow::FromArrowType;
use vortex::compress::CompressionStrategy;
use vortex::encoding::EncodingRef;
use vortex::{Array, Context, IntoArray, ToArrayData};
use vortex::{Array, Context, IntoArray};
use vortex_alp::ALPEncoding;
use vortex_datetime_parts::DateTimePartsEncoding;
use vortex_dict::DictEncoding;
Expand Down Expand Up @@ -188,7 +188,7 @@ pub fn compress_taxi_data() -> Array {
let chunks = reader
.into_iter()
.map(|batch_result| batch_result.unwrap())
.map(|batch| batch.to_array_data().into_array())
.map(Array::from)
.map(|array| {
uncompressed_size += array.nbytes();
compressor.compress(&array).unwrap()
Expand Down Expand Up @@ -262,7 +262,7 @@ mod test {
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use vortex::arrow::FromArrowArray;
use vortex::compress::CompressionStrategy;
use vortex::{ArrayData, IntoArray, IntoCanonical};
use vortex::{Array, IntoCanonical};
use vortex_sampling_compressor::SamplingCompressor;

use crate::taxi_data::taxi_data_parquet;
Expand All @@ -285,7 +285,7 @@ mod test {
for record_batch in reader.map(|batch_result| batch_result.unwrap()) {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = ArrayData::from_arrow(arrow_array.clone(), false).into_array();
let vortex_array = Array::from_arrow(arrow_array.clone(), false);
let vortex_as_arrow = vortex_array.into_canonical().unwrap().into_arrow();
assert_eq!(vortex_as_arrow.deref(), arrow_array.deref());
}
Expand All @@ -304,7 +304,7 @@ mod test {
for record_batch in reader.map(|batch_result| batch_result.unwrap()) {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = ArrayData::from_arrow(arrow_array.clone(), false).into_array();
let vortex_array = Array::from_arrow(arrow_array.clone(), false);

let compressed = compressor.compress(&vortex_array).unwrap();
let compressed_as_arrow = compressed.into_canonical().unwrap().into_arrow();
Expand Down
8 changes: 4 additions & 4 deletions bench-vortex/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use vortex::array::{ChunkedArray, PrimitiveArray};
use vortex::arrow::FromArrowType;
use vortex::compress::CompressionStrategy;
use vortex::stream::ArrayStreamExt;
use vortex::{Array, IntoArray, IntoCanonical, ToArrayData};
use vortex::{Array, IntoArray, IntoCanonical};
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_error::{vortex_err, VortexResult};
Expand Down Expand Up @@ -98,7 +98,7 @@ pub fn compress_parquet_to_vortex(parquet_path: &Path) -> VortexResult<ChunkedAr
let chunks = reader
.map(|batch_result| batch_result.unwrap())
.map(|record_batch| {
let vortex_array = record_batch.to_array_data().into_array();
let vortex_array = Array::from(record_batch);
compressor.compress(&vortex_array).unwrap()
})
.collect_vec();
Expand Down Expand Up @@ -170,7 +170,7 @@ pub async fn take_vortex_object_store(
.take_rows(&indices_array)
.await?;
// For equivalence.... we flatten to make sure we're not cheating too much.
Ok(taken.into_canonical()?.into_array())
Ok(taken.into_canonical()?.into())
}

pub async fn take_vortex_tokio(path: &Path, indices: &[u64]) -> VortexResult<Array> {
Expand All @@ -181,7 +181,7 @@ pub async fn take_vortex_tokio(path: &Path, indices: &[u64]) -> VortexResult<Arr
.take_rows(&indices_array)
.await?;
// For equivalence.... we flatten to make sure we're not cheating too much.
Ok(taken.into_canonical()?.into_array())
Ok(taken.into_canonical()?.into())
}

pub async fn take_parquet_object_store(
Expand Down
4 changes: 2 additions & 2 deletions bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use datafusion::datasource::MemTable;
use datafusion::prelude::{CsvReadOptions, ParquetReadOptions, SessionContext};
use vortex::array::ChunkedArray;
use vortex::arrow::FromArrowArray;
use vortex::{Array, ArrayDType, ArrayData, IntoArray};
use vortex::{Array, ArrayDType, IntoArray};
use vortex_datafusion::memory::VortexMemTableOptions;
use vortex_datafusion::SessionContextExt;

Expand Down Expand Up @@ -193,7 +193,7 @@ async fn register_vortex(
.iter()
.cloned()
.map(StructArray::from)
.map(|struct_array| ArrayData::from_arrow(&struct_array, false).into_array())
.map(|struct_array| Array::from_arrow(&struct_array, false))
.collect();

let dtype = chunks[0].dtype().clone();
Expand Down
4 changes: 2 additions & 2 deletions encodings/byte-bool/src/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use num_traits::AsPrimitive;
use vortex::compute::unary::{FillForwardFn, ScalarAtFn};
use vortex::compute::{ArrayCompute, CompareFn, SliceFn, TakeFn};
use vortex::validity::{ArrayValidity, Validity};
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant, ToArrayData};
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::{match_each_integer_ptype, Nullability};
use vortex_error::VortexResult;
use vortex_expr::Operator;
Expand Down Expand Up @@ -136,7 +136,7 @@ impl CompareFn for ByteBoolArray {
impl FillForwardFn for ByteBoolArray {
fn fill_forward(&self) -> VortexResult<Array> {
if self.dtype().nullability() == Nullability::NonNullable {
return Ok(self.to_array_data().into_array());
return Ok(self.clone().into());
}

let validity = self.logical_validity().to_null_buffer()?.unwrap();
Expand Down
49 changes: 23 additions & 26 deletions pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use vortex::array::{
};
use vortex::compute::take;
use vortex::encoding::EncodingRef;
use vortex::{Array, ArrayDType, ArrayData, ArrayDef, IntoArray, IntoArrayData, ToArray};
use vortex::{Array, ArrayDType, ArrayData, ArrayDef, ToArray};
use vortex_alp::{ALPArray, ALPEncoding, ALP};
use vortex_dict::{Dict, DictArray, DictEncoding};
use vortex_fastlanes::{
Expand Down Expand Up @@ -77,100 +77,97 @@ pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray");

impl PyArray {
pub fn wrap(py: Python<'_>, inner: ArrayData) -> PyResult<Py<Self>> {
let encoding_id = inner.encoding().id();
let array = Array::from(inner);
// This is the one place where we'd want to have owned kind enum but there's no other place this is used
match inner.encoding().id() {
match encoding_id {
Bool::ID => PyBoolArray::wrap(
py,
BoolArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BoolArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Chunked::ID => PyChunkedArray::wrap(
py,
ChunkedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ChunkedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Constant::ID => PyConstantArray::wrap(
py,
ConstantArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ConstantArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Primitive::ID => PyPrimitiveArray::wrap(
py,
PrimitiveArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
PrimitiveArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Sparse::ID => PySparseArray::wrap(
py,
SparseArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
SparseArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Struct::ID => PyStructArray::wrap(
py,
StructArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
StructArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
VarBin::ID => PyVarBinArray::wrap(
py,
VarBinArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
VarBinArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
VarBinView::ID => PyVarBinViewArray::wrap(
py,
VarBinViewArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
VarBinViewArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Dict::ID => PyDictArray::wrap(
py,
DictArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
DictArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
RunEnd::ID => PyRunEndArray::wrap(
py,
RunEndArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
RunEndArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Delta::ID => PyDeltaArray::wrap(
py,
DeltaArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
DeltaArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
FoR::ID => PyFoRArray::wrap(
py,
FoRArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
FoRArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
BitPacked::ID => PyBitPackedArray::wrap(
py,
BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),

ALP::ID => PyALPArray::wrap(
py,
ALPArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ALPArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
RoaringBool::ID => PyBitPackedArray::wrap(
py,
BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
RoaringInt::ID => PyBitPackedArray::wrap(
py,
BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
ZigZag::ID => PyZigZagArray::wrap(
py,
ZigZagArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ZigZagArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
_ => Py::new(
py,
Self {
inner: inner.into_array(),
},
),
_ => Py::new(py, Self { inner: array }),
// ArrayKind::Other(other) => match other.encoding().id() {
// // PyEnc chooses to expose certain encodings as first-class objects.
// // For the remainder, we should have a generic EncArray implementation that supports basic functions.
Expand Down Expand Up @@ -234,7 +231,7 @@ impl PyArray {
fn take(&self, indices: PyRef<'_, Self>) -> PyResult<Py<Self>> {
take(&self.inner, indices.unwrap())
.map_err(PyVortexError::map_err)
.and_then(|arr| Self::wrap(indices.py(), arr.into_array_data()))
.and_then(|arr| Self::wrap(indices.py(), arr.into()))
}
}
//
Expand Down
17 changes: 7 additions & 10 deletions pyvortex/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use vortex::array::ChunkedArray;
use vortex::arrow::{FromArrowArray, FromArrowType};
use vortex::{ArrayData, IntoArray, IntoArrayData, ToArrayData};
use vortex::Array;
use vortex_dtype::DType;

use crate::array::PyArray;
Expand All @@ -25,16 +25,16 @@ pub fn encode(obj: &Bound<PyAny>) -> PyResult<Py<PyArray>> {

if obj.is_instance(&pa_array)? {
let arrow_array = ArrowArrayData::from_pyarrow_bound(obj).map(make_array)?;
let enc_array = ArrayData::from_arrow(arrow_array, false);
PyArray::wrap(obj.py(), enc_array)
let enc_array = Array::from_arrow(arrow_array, false);
PyArray::wrap(obj.py(), enc_array.into())
} else if obj.is_instance(&chunked_array)? {
let chunks: Vec<Bound<PyAny>> = obj.getattr("chunks")?.extract()?;
let encoded_chunks = chunks
.iter()
.map(|a| {
ArrowArrayData::from_pyarrow_bound(a)
.map(make_array)
.map(|a| ArrayData::from_arrow(a, false).into_array())
.map(|a| Array::from_arrow(a, false))
})
.collect::<PyResult<Vec<_>>>()?;
let dtype: DType = obj
Expand All @@ -45,23 +45,20 @@ pub fn encode(obj: &Bound<PyAny>) -> PyResult<Py<PyArray>> {
obj.py(),
ChunkedArray::try_new(encoded_chunks, dtype)
.map_err(PyVortexError::map_err)?
.into_array_data(),
.into(),
)
} else if obj.is_instance(&table)? {
let array_stream = ArrowArrayStreamReader::from_pyarrow_bound(obj)?;
let dtype = DType::from_arrow(array_stream.schema());
let chunks = array_stream
.into_iter()
.map(|b| {
b.map(|bb| bb.to_array_data().into_array())
.map_err(map_arrow_err)
})
.map(|b| b.map(Array::from).map_err(map_arrow_err))
.collect::<PyResult<Vec<_>>>()?;
PyArray::wrap(
obj.py(),
ChunkedArray::try_new(chunks, dtype)
.map_err(PyVortexError::map_err)?
.into_array_data(),
.into(),
)
} else {
Err(PyValueError::new_err("Cannot convert object to enc array"))
Expand Down
6 changes: 3 additions & 3 deletions vortex-array/src/array/bool/compute/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use vortex_error::VortexResult;
use crate::array::BoolArray;
use crate::arrow::FromArrowArray;
use crate::compute::{AndFn, OrFn};
use crate::{Array, ArrayData, IntoArray, IntoCanonical};
use crate::{Array, IntoCanonical};

impl OrFn for BoolArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
Expand All @@ -17,7 +17,7 @@ impl OrFn for BoolArray {

let array = boolean::or(lhs, rhs)?;

Ok(ArrayData::from_arrow(&array, true).into_array())
Ok(Array::from_arrow(&array, true))
}
}

Expand All @@ -31,6 +31,6 @@ impl AndFn for BoolArray {

let array = boolean::and(lhs, rhs)?;

Ok(ArrayData::from_arrow(&array, true).into_array())
Ok(Array::from_arrow(&array, true))
}
}
4 changes: 2 additions & 2 deletions vortex-array/src/array/bool/compute/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ use vortex_error::VortexResult;
use crate::array::BoolArray;
use crate::compute::unary::FillForwardFn;
use crate::validity::ArrayValidity;
use crate::{Array, ArrayDType, IntoArray, ToArrayData};
use crate::{Array, ArrayDType, IntoArray};

impl FillForwardFn for BoolArray {
fn fill_forward(&self) -> VortexResult<Array> {
if self.dtype().nullability() == Nullability::NonNullable {
return Ok(self.to_array_data().into_array());
return Ok(self.clone().into());
}

let validity = self.logical_validity().to_null_buffer()?.unwrap();
Expand Down
Loading

0 comments on commit 7f57019

Please sign in to comment.