Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor specialized conversion traits into From and Into #560

Merged
merged 6 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bench-vortex/benches/datafusion_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use datafusion::prelude::{col, DataFrame, SessionContext};
use lazy_static::lazy_static;
use vortex::compress::CompressionStrategy;
use vortex::encoding::EncodingRef;
use vortex::{Array, Context, IntoArray, ToArrayData};
use vortex::{Array, Context};
use vortex_datafusion::memory::{VortexMemTable, VortexMemTableOptions};
use vortex_dict::DictEncoding;
use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding};
Expand Down Expand Up @@ -81,7 +81,7 @@ fn toy_dataset_arrow() -> RecordBatch {
}

fn toy_dataset_vortex(compress: bool) -> Array {
let uncompressed = toy_dataset_arrow().to_array_data().into_array();
let uncompressed = toy_dataset_arrow().into();

if !compress {
return uncompressed;
Expand Down
4 changes: 2 additions & 2 deletions bench-vortex/src/data_downloads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use tokio::runtime::Runtime;
use vortex::array::ChunkedArray;
use vortex::arrow::FromArrowType;
use vortex::{IntoArray, ToArrayData};
use vortex::{Array, IntoArray};
use vortex_dtype::DType;
use vortex_error::{VortexError, VortexResult};
use vortex_serde::io::TokioAdapter;
Expand Down Expand Up @@ -46,7 +46,7 @@ pub fn data_vortex_uncompressed(fname_out: &str, downloaded_data: PathBuf) -> Pa
let array = ChunkedArray::try_new(
reader
.into_iter()
.map(|batch_result| batch_result.unwrap().to_array_data().into_array())
.map(|batch_result| Array::from(batch_result.unwrap()))
.collect(),
dtype,
)
Expand Down
10 changes: 5 additions & 5 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use vortex::array::ChunkedArray;
use vortex::arrow::FromArrowType;
use vortex::compress::CompressionStrategy;
use vortex::encoding::EncodingRef;
use vortex::{Array, Context, IntoArray, ToArrayData};
use vortex::{Array, Context, IntoArray};
use vortex_alp::ALPEncoding;
use vortex_datetime_parts::DateTimePartsEncoding;
use vortex_dict::DictEncoding;
Expand Down Expand Up @@ -188,7 +188,7 @@ pub fn compress_taxi_data() -> Array {
let chunks = reader
.into_iter()
.map(|batch_result| batch_result.unwrap())
.map(|batch| batch.to_array_data().into_array())
.map(Array::from)
.map(|array| {
uncompressed_size += array.nbytes();
compressor.compress(&array).unwrap()
Expand Down Expand Up @@ -262,7 +262,7 @@ mod test {
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use vortex::arrow::FromArrowArray;
use vortex::compress::CompressionStrategy;
use vortex::{ArrayData, IntoArray, IntoCanonical};
use vortex::{Array, ArrayData, IntoCanonical};
use vortex_sampling_compressor::SamplingCompressor;

use crate::taxi_data::taxi_data_parquet;
Expand All @@ -285,7 +285,7 @@ mod test {
for record_batch in reader.map(|batch_result| batch_result.unwrap()) {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = ArrayData::from_arrow(arrow_array.clone(), false).into_array();
let vortex_array: Array = ArrayData::from_arrow(arrow_array.clone(), false).into();
let vortex_as_arrow = vortex_array.into_canonical().unwrap().into_arrow();
assert_eq!(vortex_as_arrow.deref(), arrow_array.deref());
}
Expand All @@ -304,7 +304,7 @@ mod test {
for record_batch in reader.map(|batch_result| batch_result.unwrap()) {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = ArrayData::from_arrow(arrow_array.clone(), false).into_array();
let vortex_array = ArrayData::from_arrow(arrow_array.clone(), false).into();

let compressed = compressor.compress(&vortex_array).unwrap();
let compressed_as_arrow = compressed.into_canonical().unwrap().into_arrow();
Expand Down
8 changes: 4 additions & 4 deletions bench-vortex/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use vortex::array::{ChunkedArray, PrimitiveArray};
use vortex::arrow::FromArrowType;
use vortex::compress::CompressionStrategy;
use vortex::stream::ArrayStreamExt;
use vortex::{Array, IntoArray, IntoCanonical, ToArrayData};
use vortex::{Array, IntoArray, IntoCanonical};
use vortex_buffer::Buffer;
use vortex_dtype::DType;
use vortex_error::{vortex_err, VortexResult};
Expand Down Expand Up @@ -98,7 +98,7 @@ pub fn compress_parquet_to_vortex(parquet_path: &Path) -> VortexResult<ChunkedAr
let chunks = reader
.map(|batch_result| batch_result.unwrap())
.map(|record_batch| {
let vortex_array = record_batch.to_array_data().into_array();
let vortex_array = Array::from(record_batch);
compressor.compress(&vortex_array).unwrap()
})
.collect_vec();
Expand Down Expand Up @@ -170,7 +170,7 @@ pub async fn take_vortex_object_store(
.take_rows(&indices_array)
.await?;
// For equivalence.... we flatten to make sure we're not cheating too much.
Ok(taken.into_canonical()?.into_array())
Ok(taken.into_canonical()?.into())
}

pub async fn take_vortex_tokio(path: &Path, indices: &[u64]) -> VortexResult<Array> {
Expand All @@ -181,7 +181,7 @@ pub async fn take_vortex_tokio(path: &Path, indices: &[u64]) -> VortexResult<Arr
.take_rows(&indices_array)
.await?;
// For equivalence.... we flatten to make sure we're not cheating too much.
Ok(taken.into_canonical()?.into_array())
Ok(taken.into_canonical()?.into())
}

pub async fn take_parquet_object_store(
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ async fn register_vortex(
.iter()
.cloned()
.map(StructArray::from)
.map(|struct_array| ArrayData::from_arrow(&struct_array, false).into_array())
.map(|struct_array| ArrayData::from_arrow(&struct_array, false).into())
.collect();

let dtype = chunks[0].dtype().clone();
Expand Down
4 changes: 2 additions & 2 deletions encodings/byte-bool/src/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use num_traits::AsPrimitive;
use vortex::compute::unary::{FillForwardFn, ScalarAtFn};
use vortex::compute::{ArrayCompute, CompareFn, SliceFn, TakeFn};
use vortex::validity::{ArrayValidity, Validity};
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant, ToArrayData};
use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant};
use vortex_dtype::{match_each_integer_ptype, Nullability};
use vortex_error::VortexResult;
use vortex_expr::Operator;
Expand Down Expand Up @@ -136,7 +136,7 @@ impl CompareFn for ByteBoolArray {
impl FillForwardFn for ByteBoolArray {
fn fill_forward(&self) -> VortexResult<Array> {
if self.dtype().nullability() == Nullability::NonNullable {
return Ok(self.to_array_data().into_array());
return Ok(self.clone().into());
}

let validity = self.logical_validity().to_null_buffer()?.unwrap();
Expand Down
49 changes: 23 additions & 26 deletions pyvortex/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use vortex::array::{
};
use vortex::compute::take;
use vortex::encoding::EncodingRef;
use vortex::{Array, ArrayDType, ArrayData, ArrayDef, IntoArray, IntoArrayData, ToArray};
use vortex::{Array, ArrayDType, ArrayData, ArrayDef, ToArray};
use vortex_alp::{ALPArray, ALPEncoding, ALP};
use vortex_dict::{Dict, DictArray, DictEncoding};
use vortex_fastlanes::{
Expand Down Expand Up @@ -77,100 +77,97 @@ pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray");

impl PyArray {
pub fn wrap(py: Python<'_>, inner: ArrayData) -> PyResult<Py<Self>> {
let encoding_id = inner.encoding().id();
let array = Array::from(inner);
// This is the one place where we'd want to have owned kind enum but there's no other place this is used
match inner.encoding().id() {
match encoding_id {
Bool::ID => PyBoolArray::wrap(
py,
BoolArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BoolArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Chunked::ID => PyChunkedArray::wrap(
py,
ChunkedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ChunkedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Constant::ID => PyConstantArray::wrap(
py,
ConstantArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ConstantArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Primitive::ID => PyPrimitiveArray::wrap(
py,
PrimitiveArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
PrimitiveArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Sparse::ID => PySparseArray::wrap(
py,
SparseArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
SparseArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Struct::ID => PyStructArray::wrap(
py,
StructArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
StructArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
VarBin::ID => PyVarBinArray::wrap(
py,
VarBinArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
VarBinArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
VarBinView::ID => PyVarBinViewArray::wrap(
py,
VarBinViewArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
VarBinViewArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Dict::ID => PyDictArray::wrap(
py,
DictArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
DictArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
RunEnd::ID => PyRunEndArray::wrap(
py,
RunEndArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
RunEndArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
Delta::ID => PyDeltaArray::wrap(
py,
DeltaArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
DeltaArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
FoR::ID => PyFoRArray::wrap(
py,
FoRArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
FoRArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
BitPacked::ID => PyBitPackedArray::wrap(
py,
BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),

ALP::ID => PyALPArray::wrap(
py,
ALPArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ALPArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
RoaringBool::ID => PyBitPackedArray::wrap(
py,
BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
RoaringInt::ID => PyBitPackedArray::wrap(
py,
BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
ZigZag::ID => PyZigZagArray::wrap(
py,
ZigZagArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?,
ZigZagArray::try_from(array).map_err(PyVortexError::map_err)?,
)?
.extract(py),
_ => Py::new(
py,
Self {
inner: inner.into_array(),
},
),
_ => Py::new(py, Self { inner: array }),
// ArrayKind::Other(other) => match other.encoding().id() {
// // PyEnc chooses to expose certain encodings as first-class objects.
// // For the remainder, we should have a generic EncArray implementation that supports basic functions.
Expand Down Expand Up @@ -234,7 +231,7 @@ impl PyArray {
fn take(&self, indices: PyRef<'_, Self>) -> PyResult<Py<Self>> {
take(&self.inner, indices.unwrap())
.map_err(PyVortexError::map_err)
.and_then(|arr| Self::wrap(indices.py(), arr.into_array_data()))
.and_then(|arr| Self::wrap(indices.py(), arr.into()))
}
}
//
Expand Down
13 changes: 5 additions & 8 deletions pyvortex/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use vortex::array::ChunkedArray;
use vortex::arrow::{FromArrowArray, FromArrowType};
use vortex::{ArrayData, IntoArray, IntoArrayData, ToArrayData};
use vortex::{Array, ArrayData};
use vortex_dtype::DType;

use crate::array::PyArray;
Expand All @@ -34,7 +34,7 @@ pub fn encode(obj: &Bound<PyAny>) -> PyResult<Py<PyArray>> {
.map(|a| {
ArrowArrayData::from_pyarrow_bound(a)
.map(make_array)
.map(|a| ArrayData::from_arrow(a, false).into_array())
.map(|a| ArrayData::from_arrow(a, false).into())
})
.collect::<PyResult<Vec<_>>>()?;
let dtype: DType = obj
Expand All @@ -45,23 +45,20 @@ pub fn encode(obj: &Bound<PyAny>) -> PyResult<Py<PyArray>> {
obj.py(),
ChunkedArray::try_new(encoded_chunks, dtype)
.map_err(PyVortexError::map_err)?
.into_array_data(),
.into(),
)
} else if obj.is_instance(&table)? {
let array_stream = ArrowArrayStreamReader::from_pyarrow_bound(obj)?;
let dtype = DType::from_arrow(array_stream.schema());
let chunks = array_stream
.into_iter()
.map(|b| {
b.map(|bb| bb.to_array_data().into_array())
.map_err(map_arrow_err)
})
.map(|b| b.map(Array::from).map_err(map_arrow_err))
.collect::<PyResult<Vec<_>>>()?;
PyArray::wrap(
obj.py(),
ChunkedArray::try_new(chunks, dtype)
.map_err(PyVortexError::map_err)?
.into_array_data(),
.into(),
)
} else {
Err(PyValueError::new_err("Cannot convert object to enc array"))
Expand Down
6 changes: 3 additions & 3 deletions vortex-array/src/array/bool/compute/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use vortex_error::VortexResult;
use crate::array::BoolArray;
use crate::arrow::FromArrowArray;
use crate::compute::{AndFn, OrFn};
use crate::{Array, ArrayData, IntoArray, IntoCanonical};
use crate::{Array, ArrayData, IntoCanonical};

impl OrFn for BoolArray {
fn or(&self, array: &Array) -> VortexResult<Array> {
Expand All @@ -17,7 +17,7 @@ impl OrFn for BoolArray {

let array = boolean::or(lhs, rhs)?;

Ok(ArrayData::from_arrow(&array, true).into_array())
Ok(ArrayData::from_arrow(&array, true).into())
}
}

Expand All @@ -31,6 +31,6 @@ impl AndFn for BoolArray {

let array = boolean::and(lhs, rhs)?;

Ok(ArrayData::from_arrow(&array, true).into_array())
Ok(ArrayData::from_arrow(&array, true).into())
}
}
4 changes: 2 additions & 2 deletions vortex-array/src/array/bool/compute/fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ use vortex_error::VortexResult;
use crate::array::BoolArray;
use crate::compute::unary::FillForwardFn;
use crate::validity::ArrayValidity;
use crate::{Array, ArrayDType, IntoArray, ToArrayData};
use crate::{Array, ArrayDType, IntoArray};

impl FillForwardFn for BoolArray {
fn fill_forward(&self) -> VortexResult<Array> {
if self.dtype().nullability() == Nullability::NonNullable {
return Ok(self.to_array_data().into_array());
return Ok(self.clone().into());
}

let validity = self.logical_validity().to_null_buffer()?.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/constant/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ impl CompareFn for ConstantArray {
Operator::Lte => arrow_ord::cmp::lt_eq(datum.as_ref(), &rhs)?,
};

Ok(ArrayData::from_arrow(&boolean_array, true).into_array())
Ok(ArrayData::from_arrow(&boolean_array, true).into())
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions vortex-array/src/array/datetime/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use vortex_dtype::{DType, ExtDType, ExtID};

use crate::array::datetime::TimeUnit;
use crate::array::extension::ExtensionArray;
use crate::{Array, ArrayDType, ArrayData, IntoArrayData};
use crate::{Array, ArrayDType, ArrayData};

mod from;

Expand Down Expand Up @@ -224,8 +224,8 @@ impl TemporalArray {
}
}

impl IntoArrayData for TemporalArray {
fn into_array_data(self) -> ArrayData {
self.ext.into_array_data()
impl From<TemporalArray> for ArrayData {
fn from(value: TemporalArray) -> Self {
value.ext.into()
}
}
Loading
Loading