From 7f570199cd8b99c4476cc2c515cd5355d7bab39f Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 7 Aug 2024 11:13:58 +0100 Subject: [PATCH] Refactor specialized conversion traits into `From` and `Into` (#560) We have a few traits that define conversion behavior between types that can be expressed by the usual `From`/`Into` traits, making the overall API somewhat more idiomatic. As of now, this PR removes `ToArrayData` and `IntoArrayData`. --- bench-vortex/benches/datafusion_benchmark.rs | 4 +- bench-vortex/src/data_downloads.rs | 4 +- bench-vortex/src/lib.rs | 10 +- bench-vortex/src/reader.rs | 8 +- bench-vortex/src/tpch/mod.rs | 4 +- encodings/byte-bool/src/compute/mod.rs | 4 +- pyvortex/src/array.rs | 49 ++++---- pyvortex/src/encode.rs | 17 ++- .../src/array/bool/compute/boolean.rs | 6 +- vortex-array/src/array/bool/compute/fill.rs | 4 +- vortex-array/src/array/constant/compute.rs | 4 +- vortex-array/src/array/datetime/temporal.rs | 14 ++- .../src/array/primitive/compute/fill.rs | 4 +- vortex-array/src/array/varbinview/mod.rs | 28 ++--- vortex-array/src/arrow/array.rs | 105 +++++++++--------- vortex-array/src/arrow/recordbatch.rs | 22 ++-- vortex-array/src/canonical.rs | 20 ++-- vortex-array/src/compute/compare.rs | 4 +- vortex-array/src/compute/filter.rs | 4 +- vortex-array/src/compute/take.rs | 4 +- vortex-array/src/data.rs | 17 ++- vortex-array/src/implementation.rs | 25 +++-- vortex-array/src/lib.rs | 18 --- vortex-array/src/tree.rs | 5 +- vortex-datafusion/src/plans.rs | 5 +- vortex-sampling-compressor/src/lib.rs | 2 +- vortex-sampling-compressor/tests/smoketest.rs | 10 +- 27 files changed, 196 insertions(+), 205 deletions(-) diff --git a/bench-vortex/benches/datafusion_benchmark.rs b/bench-vortex/benches/datafusion_benchmark.rs index 7e2ce50e8f..5e81802bcb 100644 --- a/bench-vortex/benches/datafusion_benchmark.rs +++ b/bench-vortex/benches/datafusion_benchmark.rs @@ -15,7 +15,7 @@ use datafusion::prelude::{col, DataFrame, SessionContext}; use lazy_static::lazy_static; use vortex::compress::CompressionStrategy; use vortex::encoding::EncodingRef; -use vortex::{Array, Context, IntoArray, ToArrayData}; +use vortex::{Array, Context}; use vortex_datafusion::memory::{VortexMemTable, VortexMemTableOptions}; use vortex_dict::DictEncoding; use vortex_fastlanes::{BitPackedEncoding, DeltaEncoding, FoREncoding}; @@ -81,7 +81,7 @@ fn toy_dataset_arrow() -> RecordBatch { } fn toy_dataset_vortex(compress: bool) -> Array { - let uncompressed = toy_dataset_arrow().to_array_data().into_array(); + let uncompressed = toy_dataset_arrow().into(); if !compress { return uncompressed; diff --git a/bench-vortex/src/data_downloads.rs b/bench-vortex/src/data_downloads.rs index ace58af7ec..2cdeea796d 100644 --- a/bench-vortex/src/data_downloads.rs +++ b/bench-vortex/src/data_downloads.rs @@ -11,7 +11,7 @@ use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use tokio::runtime::Runtime; use vortex::array::ChunkedArray; use vortex::arrow::FromArrowType; -use vortex::{IntoArray, ToArrayData}; +use vortex::{Array, IntoArray}; use vortex_dtype::DType; use vortex_error::{VortexError, VortexResult}; use vortex_serde::io::TokioAdapter; @@ -46,7 +46,7 @@ pub fn data_vortex_uncompressed(fname_out: &str, downloaded_data: PathBuf) -> Pa let array = ChunkedArray::try_new( reader .into_iter() - .map(|batch_result| batch_result.unwrap().to_array_data().into_array()) + .map(|batch_result| Array::from(batch_result.unwrap())) .collect(), dtype, ) diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 0574ae341b..216ef859b1 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -19,7 +19,7 @@ use vortex::array::ChunkedArray; use vortex::arrow::FromArrowType; use vortex::compress::CompressionStrategy; use vortex::encoding::EncodingRef; -use vortex::{Array, Context, IntoArray, ToArrayData}; +use vortex::{Array, Context, IntoArray}; use vortex_alp::ALPEncoding; use vortex_datetime_parts::DateTimePartsEncoding; use vortex_dict::DictEncoding; @@ -188,7 +188,7 @@ pub fn compress_taxi_data() -> Array { let chunks = reader .into_iter() .map(|batch_result| batch_result.unwrap()) - .map(|batch| batch.to_array_data().into_array()) + .map(Array::from) .map(|array| { uncompressed_size += array.nbytes(); compressor.compress(&array).unwrap() @@ -262,7 +262,7 @@ mod test { use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use vortex::arrow::FromArrowArray; use vortex::compress::CompressionStrategy; - use vortex::{ArrayData, IntoArray, IntoCanonical}; + use vortex::{Array, IntoCanonical}; use vortex_sampling_compressor::SamplingCompressor; use crate::taxi_data::taxi_data_parquet; @@ -285,7 +285,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayData::from_arrow(arrow_array.clone(), false).into_array(); + let vortex_array = Array::from_arrow(arrow_array.clone(), false); let vortex_as_arrow = vortex_array.into_canonical().unwrap().into_arrow(); assert_eq!(vortex_as_arrow.deref(), arrow_array.deref()); } @@ -304,7 +304,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayData::from_arrow(arrow_array.clone(), false).into_array(); + let vortex_array = Array::from_arrow(arrow_array.clone(), false); let compressed = compressor.compress(&vortex_array).unwrap(); let compressed_as_arrow = compressed.into_canonical().unwrap().into_arrow(); diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs index 9a3ccc16ae..bef82d4e57 100644 --- a/bench-vortex/src/reader.rs +++ b/bench-vortex/src/reader.rs @@ -26,7 +26,7 @@ use vortex::array::{ChunkedArray, PrimitiveArray}; use vortex::arrow::FromArrowType; use vortex::compress::CompressionStrategy; use vortex::stream::ArrayStreamExt; -use vortex::{Array, IntoArray, IntoCanonical, ToArrayData}; +use vortex::{Array, IntoArray, IntoCanonical}; use vortex_buffer::Buffer; use vortex_dtype::DType; use vortex_error::{vortex_err, VortexResult}; @@ -98,7 +98,7 @@ pub fn compress_parquet_to_vortex(parquet_path: &Path) -> VortexResult VortexResult { @@ -181,7 +181,7 @@ pub async fn take_vortex_tokio(path: &Path, indices: &[u64]) -> VortexResult VortexResult { if self.dtype().nullability() == Nullability::NonNullable { - return Ok(self.to_array_data().into_array()); + return Ok(self.clone().into()); } let validity = self.logical_validity().to_null_buffer()?.unwrap(); diff --git a/pyvortex/src/array.rs b/pyvortex/src/array.rs index cc21aa8f74..b8f65db568 100644 --- a/pyvortex/src/array.rs +++ b/pyvortex/src/array.rs @@ -8,7 +8,7 @@ use vortex::array::{ }; use vortex::compute::take; use vortex::encoding::EncodingRef; -use vortex::{Array, ArrayDType, ArrayData, ArrayDef, IntoArray, IntoArrayData, ToArray}; +use vortex::{Array, ArrayDType, ArrayData, ArrayDef, ToArray}; use vortex_alp::{ALPArray, ALPEncoding, ALP}; use vortex_dict::{Dict, DictArray, DictEncoding}; use vortex_fastlanes::{ @@ -77,100 +77,97 @@ pyarray!(ZigZagEncoding, ZigZagArray, "ZigZagArray"); impl PyArray { pub fn wrap(py: Python<'_>, inner: ArrayData) -> PyResult> { + let encoding_id = inner.encoding().id(); + let array = Array::from(inner); // This is the one place where we'd want to have owned kind enum but there's no other place this is used - match inner.encoding().id() { + match encoding_id { Bool::ID => PyBoolArray::wrap( py, - BoolArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BoolArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Chunked::ID => PyChunkedArray::wrap( py, - ChunkedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ChunkedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Constant::ID => PyConstantArray::wrap( py, - ConstantArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ConstantArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Primitive::ID => PyPrimitiveArray::wrap( py, - PrimitiveArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + PrimitiveArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Sparse::ID => PySparseArray::wrap( py, - SparseArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + SparseArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Struct::ID => PyStructArray::wrap( py, - StructArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + StructArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), VarBin::ID => PyVarBinArray::wrap( py, - VarBinArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + VarBinArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), VarBinView::ID => PyVarBinViewArray::wrap( py, - VarBinViewArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + VarBinViewArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Dict::ID => PyDictArray::wrap( py, - DictArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + DictArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), RunEnd::ID => PyRunEndArray::wrap( py, - RunEndArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + RunEndArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), Delta::ID => PyDeltaArray::wrap( py, - DeltaArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + DeltaArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), FoR::ID => PyFoRArray::wrap( py, - FoRArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + FoRArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), BitPacked::ID => PyBitPackedArray::wrap( py, - BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), ALP::ID => PyALPArray::wrap( py, - ALPArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ALPArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), RoaringBool::ID => PyBitPackedArray::wrap( py, - BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), RoaringInt::ID => PyBitPackedArray::wrap( py, - BitPackedArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + BitPackedArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), ZigZag::ID => PyZigZagArray::wrap( py, - ZigZagArray::try_from(inner.into_array()).map_err(PyVortexError::map_err)?, + ZigZagArray::try_from(array).map_err(PyVortexError::map_err)?, )? .extract(py), - _ => Py::new( - py, - Self { - inner: inner.into_array(), - }, - ), + _ => Py::new(py, Self { inner: array }), // ArrayKind::Other(other) => match other.encoding().id() { // // PyEnc chooses to expose certain encodings as first-class objects. // // For the remainder, we should have a generic EncArray implementation that supports basic functions. @@ -234,7 +231,7 @@ impl PyArray { fn take(&self, indices: PyRef<'_, Self>) -> PyResult> { take(&self.inner, indices.unwrap()) .map_err(PyVortexError::map_err) - .and_then(|arr| Self::wrap(indices.py(), arr.into_array_data())) + .and_then(|arr| Self::wrap(indices.py(), arr.into())) } } // diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index 8b784d052a..564cabce06 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -7,7 +7,7 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::array::ChunkedArray; use vortex::arrow::{FromArrowArray, FromArrowType}; -use vortex::{ArrayData, IntoArray, IntoArrayData, ToArrayData}; +use vortex::Array; use vortex_dtype::DType; use crate::array::PyArray; @@ -25,8 +25,8 @@ pub fn encode(obj: &Bound) -> PyResult> { if obj.is_instance(&pa_array)? { let arrow_array = ArrowArrayData::from_pyarrow_bound(obj).map(make_array)?; - let enc_array = ArrayData::from_arrow(arrow_array, false); - PyArray::wrap(obj.py(), enc_array) + let enc_array = Array::from_arrow(arrow_array, false); + PyArray::wrap(obj.py(), enc_array.into()) } else if obj.is_instance(&chunked_array)? { let chunks: Vec> = obj.getattr("chunks")?.extract()?; let encoded_chunks = chunks @@ -34,7 +34,7 @@ pub fn encode(obj: &Bound) -> PyResult> { .map(|a| { ArrowArrayData::from_pyarrow_bound(a) .map(make_array) - .map(|a| ArrayData::from_arrow(a, false).into_array()) + .map(|a| Array::from_arrow(a, false)) }) .collect::>>()?; let dtype: DType = obj @@ -45,23 +45,20 @@ pub fn encode(obj: &Bound) -> PyResult> { obj.py(), ChunkedArray::try_new(encoded_chunks, dtype) .map_err(PyVortexError::map_err)? - .into_array_data(), + .into(), ) } else if obj.is_instance(&table)? { let array_stream = ArrowArrayStreamReader::from_pyarrow_bound(obj)?; let dtype = DType::from_arrow(array_stream.schema()); let chunks = array_stream .into_iter() - .map(|b| { - b.map(|bb| bb.to_array_data().into_array()) - .map_err(map_arrow_err) - }) + .map(|b| b.map(Array::from).map_err(map_arrow_err)) .collect::>>()?; PyArray::wrap( obj.py(), ChunkedArray::try_new(chunks, dtype) .map_err(PyVortexError::map_err)? - .into_array_data(), + .into(), ) } else { Err(PyValueError::new_err("Cannot convert object to enc array")) diff --git a/vortex-array/src/array/bool/compute/boolean.rs b/vortex-array/src/array/bool/compute/boolean.rs index 8165b11089..9cdce98425 100644 --- a/vortex-array/src/array/bool/compute/boolean.rs +++ b/vortex-array/src/array/bool/compute/boolean.rs @@ -5,7 +5,7 @@ use vortex_error::VortexResult; use crate::array::BoolArray; use crate::arrow::FromArrowArray; use crate::compute::{AndFn, OrFn}; -use crate::{Array, ArrayData, IntoArray, IntoCanonical}; +use crate::{Array, IntoCanonical}; impl OrFn for BoolArray { fn or(&self, array: &Array) -> VortexResult { @@ -17,7 +17,7 @@ impl OrFn for BoolArray { let array = boolean::or(lhs, rhs)?; - Ok(ArrayData::from_arrow(&array, true).into_array()) + Ok(Array::from_arrow(&array, true)) } } @@ -31,6 +31,6 @@ impl AndFn for BoolArray { let array = boolean::and(lhs, rhs)?; - Ok(ArrayData::from_arrow(&array, true).into_array()) + Ok(Array::from_arrow(&array, true)) } } diff --git a/vortex-array/src/array/bool/compute/fill.rs b/vortex-array/src/array/bool/compute/fill.rs index 06c422d338..d88a8c8e63 100644 --- a/vortex-array/src/array/bool/compute/fill.rs +++ b/vortex-array/src/array/bool/compute/fill.rs @@ -4,12 +4,12 @@ use vortex_error::VortexResult; use crate::array::BoolArray; use crate::compute::unary::FillForwardFn; use crate::validity::ArrayValidity; -use crate::{Array, ArrayDType, IntoArray, ToArrayData}; +use crate::{Array, ArrayDType, IntoArray}; impl FillForwardFn for BoolArray { fn fill_forward(&self) -> VortexResult { if self.dtype().nullability() == Nullability::NonNullable { - return Ok(self.to_array_data().into_array()); + return Ok(self.clone().into()); } let validity = self.logical_validity().to_null_buffer()?.unwrap(); diff --git a/vortex-array/src/array/constant/compute.rs b/vortex-array/src/array/constant/compute.rs index 0b367c222c..807f25ef48 100644 --- a/vortex-array/src/array/constant/compute.rs +++ b/vortex-array/src/array/constant/compute.rs @@ -15,7 +15,7 @@ use crate::compute::{ SearchSortedSide, SliceFn, TakeFn, }; use crate::stats::{ArrayStatistics, Stat}; -use crate::{Array, ArrayDType, ArrayData, AsArray, IntoArray, IntoCanonical}; +use crate::{Array, ArrayDType, AsArray, IntoArray, IntoCanonical}; impl ArrayCompute for ConstantArray { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { @@ -118,7 +118,7 @@ impl CompareFn for ConstantArray { Operator::Lte => arrow_ord::cmp::lt_eq(datum.as_ref(), &rhs)?, }; - Ok(ArrayData::from_arrow(&boolean_array, true).into_array()) + Ok(Array::from_arrow(&boolean_array, true)) } } } diff --git a/vortex-array/src/array/datetime/temporal.rs b/vortex-array/src/array/datetime/temporal.rs index 34f2109999..127a8cebbf 100644 --- a/vortex-array/src/array/datetime/temporal.rs +++ b/vortex-array/src/array/datetime/temporal.rs @@ -3,7 +3,7 @@ use vortex_dtype::{DType, ExtDType, ExtID}; use crate::array::datetime::TimeUnit; use crate::array::extension::ExtensionArray; -use crate::{Array, ArrayDType, ArrayData, IntoArrayData}; +use crate::{Array, ArrayDType, ArrayData, IntoArray}; mod from; @@ -224,8 +224,14 @@ impl TemporalArray { } } -impl IntoArrayData for TemporalArray { - fn into_array_data(self) -> ArrayData { - self.ext.into_array_data() +impl From for ArrayData { + fn from(value: TemporalArray) -> Self { + value.ext.into() + } +} + +impl From for Array { + fn from(value: TemporalArray) -> Self { + value.ext.into_array() } } diff --git a/vortex-array/src/array/primitive/compute/fill.rs b/vortex-array/src/array/primitive/compute/fill.rs index 1fe8ec5061..fa6bc00623 100644 --- a/vortex-array/src/array/primitive/compute/fill.rs +++ b/vortex-array/src/array/primitive/compute/fill.rs @@ -4,13 +4,13 @@ use vortex_error::VortexResult; use crate::array::primitive::PrimitiveArray; use crate::compute::unary::FillForwardFn; use crate::validity::ArrayValidity; -use crate::{Array, IntoArray, ToArrayData}; +use crate::{Array, IntoArray}; impl FillForwardFn for PrimitiveArray { fn fill_forward(&self) -> VortexResult { let validity = self.logical_validity(); let Some(nulls) = validity.to_null_buffer()? else { - return Ok(self.to_array_data().into_array()); + return Ok(self.clone().into()); }; match_each_native_ptype!(self.ptype(), |$T| { let maybe_null_slice = self.maybe_null_slice::<$T>(); diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 12addc709e..c3f8c2570a 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -20,8 +20,8 @@ use crate::stats::StatsSet; use crate::validity::{ArrayValidity, LogicalValidity, Validity, ValidityMetadata}; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; use crate::{ - impl_encoding, Array, ArrayDType, ArrayData, ArrayDef, ArrayTrait, Canonical, IntoArray, - IntoArrayVariant, IntoCanonical, + impl_encoding, Array, ArrayDType, ArrayDef, ArrayTrait, Canonical, IntoArrayVariant, + IntoCanonical, }; mod accessor; @@ -200,8 +200,8 @@ impl VarBinViewArray { for s in iter { builder.append_value(s); } - let array_data = ArrayData::from_arrow(&builder.finish(), false); - VarBinViewArray::try_from(array_data.into_array()).expect("should be var bin view array") + let array = Array::from_arrow(&builder.finish(), false); + VarBinViewArray::try_from(array).expect("should be var bin view array") } pub fn from_iter_nullable_str, I: IntoIterator>>( @@ -211,8 +211,8 @@ impl VarBinViewArray { let mut builder = StringViewBuilder::with_capacity(iter.size_hint().0); builder.extend(iter); - let array_data = ArrayData::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array_data.into_array()).expect("should be var bin view array") + let array = Array::from_arrow(&builder.finish(), true); + VarBinViewArray::try_from(array).expect("should be var bin view array") } pub fn from_iter_bin, I: IntoIterator>(iter: I) -> Self { @@ -221,8 +221,8 @@ impl VarBinViewArray { for b in iter { builder.append_value(b); } - let array_data = ArrayData::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array_data.into_array()).expect("should be var bin view array") + let array = Array::from_arrow(&builder.finish(), true); + VarBinViewArray::try_from(array).expect("should be var bin view array") } pub fn from_iter_nullable_bin, I: IntoIterator>>( @@ -231,8 +231,8 @@ impl VarBinViewArray { let iter = iter.into_iter(); let mut builder = BinaryViewBuilder::with_capacity(iter.size_hint().0); builder.extend(iter); - let array_data = ArrayData::from_arrow(&builder.finish(), true); - VarBinViewArray::try_from(array_data.into_array()).expect("should be var bin view array") + let array = Array::from_arrow(&builder.finish(), true); + VarBinViewArray::try_from(array).expect("should be var bin view array") } pub fn bytes_at(&self, index: usize) -> VortexResult> { @@ -261,7 +261,7 @@ impl IntoCanonical for VarBinViewArray { let arrow_self = as_arrow(self); let arrow_varbin = arrow_cast::cast(arrow_self.deref(), &DataType::Utf8) .expect("Utf8View must cast to Ut8f"); - let vortex_array = ArrayData::from_arrow(arrow_varbin, nullable).into_array(); + let vortex_array = Array::from_arrow(arrow_varbin, nullable); Ok(Canonical::VarBin(VarBinArray::try_from(&vortex_array)?)) } @@ -360,7 +360,7 @@ mod test { use crate::array::varbinview::{BinaryView, Inlined, Ref, VarBinViewArray, VIEW_SIZE}; use crate::compute::slice; use crate::compute::unary::scalar_at; - use crate::{Canonical, IntoArray, IntoCanonical}; + use crate::{Canonical, IntoCanonical}; #[test] pub fn varbin_view() { @@ -381,7 +381,7 @@ mod test { pub fn slice_array() { let binary_arr = slice( &VarBinViewArray::from_iter_str(["hello world", "hello world this is a long string"]) - .into_array(), + .into(), 1, 2, ) @@ -399,7 +399,7 @@ mod test { let flattened = binary_arr.into_canonical().unwrap(); assert!(matches!(flattened, Canonical::VarBin(_))); - let var_bin = flattened.into_array(); + let var_bin = flattened.into(); assert_eq!(scalar_at(&var_bin, 0).unwrap(), Scalar::from("string1")); assert_eq!(scalar_at(&var_bin, 1).unwrap(), Scalar::from("string2")); } diff --git a/vortex-array/src/arrow/array.rs b/vortex-array/src/arrow/array.rs index 3902cfc5ad..0b9ab8986f 100644 --- a/vortex-array/src/arrow/array.rs +++ b/vortex-array/src/arrow/array.rs @@ -26,44 +26,49 @@ use crate::array::{ use crate::arrow::FromArrowArray; use crate::stats::{Stat, Statistics}; use crate::validity::Validity; -use crate::{ArrayData, IntoArray, IntoArrayData}; +use crate::{Array, ArrayData}; -impl IntoArrayData for Buffer { - fn into_array_data(self) -> ArrayData { - PrimitiveArray::new(self.into(), PType::U8, Validity::NonNullable).into_array_data() +impl From for ArrayData { + fn from(value: Buffer) -> Self { + PrimitiveArray::new(value.into(), PType::U8, Validity::NonNullable).into() } } -impl IntoArrayData for NullBuffer { - fn into_array_data(self) -> ArrayData { - BoolArray::try_new(self.into_inner(), Validity::NonNullable) +impl From for ArrayData { + fn from(value: NullBuffer) -> Self { + BoolArray::try_new(value.into_inner(), Validity::NonNullable) .unwrap() - .into_array_data() + .into() } } -impl IntoArrayData for ScalarBuffer { - fn into_array_data(self) -> ArrayData { - PrimitiveArray::new(self.into_inner().into(), T::PTYPE, Validity::NonNullable) - .into_array_data() +impl From> for ArrayData +where + T: ArrowNativeType + NativePType, +{ + fn from(value: ScalarBuffer) -> Self { + PrimitiveArray::new(value.into_inner().into(), T::PTYPE, Validity::NonNullable).into() } } -impl IntoArrayData for OffsetBuffer { - fn into_array_data(self) -> ArrayData { - let array = PrimitiveArray::new( - self.into_inner().into_inner().into(), +impl From> for ArrayData +where + O: NativePType + OffsetSizeTrait, +{ + fn from(value: OffsetBuffer) -> Self { + let array_data: ArrayData = PrimitiveArray::new( + value.into_inner().into_inner().into(), O::PTYPE, Validity::NonNullable, ) - .into_array_data(); - array.set(Stat::IsSorted, true.into()); - array.set(Stat::IsStrictSorted, true.into()); - array + .into(); + array_data.set(Stat::IsSorted, true.into()); + array_data.set(Stat::IsStrictSorted, true.into()); + array_data } } -impl FromArrowArray<&ArrowPrimitiveArray> for ArrayData +impl FromArrowArray<&ArrowPrimitiveArray> for Array where ::Native: NativePType, { @@ -72,41 +77,33 @@ where value.values().clone().into_inner().into(), T::Native::PTYPE, nulls(value.nulls(), nullable), - ) - .into_array_data(); + ); if T::DATA_TYPE.is_numeric() { - return arr; + return arr.into(); } match T::DATA_TYPE { DataType::Timestamp(time_unit, tz) => { let tz = tz.clone().map(|s| s.to_string()); - TemporalArray::new_timestamp(arr.into_array(), from_arrow_time_unit(time_unit), tz) - .into_array_data() + TemporalArray::new_timestamp(arr.into(), from_arrow_time_unit(time_unit), tz).into() } DataType::Time32(time_unit) => { - TemporalArray::new_time(arr.into_array(), from_arrow_time_unit(time_unit)) - .into_array_data() + TemporalArray::new_time(arr.into(), from_arrow_time_unit(time_unit)).into() } DataType::Time64(time_unit) => { - TemporalArray::new_time(arr.into_array(), from_arrow_time_unit(time_unit)) - .into_array_data() - } - DataType::Date32 => { - TemporalArray::new_date(arr.into_array(), TimeUnit::D).into_array_data() - } - DataType::Date64 => { - TemporalArray::new_date(arr.into_array(), TimeUnit::Ms).into_array_data() + TemporalArray::new_time(arr.into(), from_arrow_time_unit(time_unit)).into() } - DataType::Duration(_) => todo!(), - DataType::Interval(_) => todo!(), + DataType::Date32 => TemporalArray::new_date(arr.into(), TimeUnit::D).into(), + DataType::Date64 => TemporalArray::new_date(arr.into(), TimeUnit::Ms).into(), + DataType::Duration(_) => unimplemented!(), + DataType::Interval(_) => unimplemented!(), _ => panic!("Invalid data type for PrimitiveArray"), } } } -impl FromArrowArray<&GenericByteArray> for ArrayData +impl FromArrowArray<&GenericByteArray> for Array where ::Offset: NativePType, { @@ -117,17 +114,17 @@ where _ => panic!("Invalid data type for ByteArray"), }; VarBinArray::try_new( - value.offsets().clone().into_array_data().into_array(), - value.values().clone().into_array_data().into_array(), + ArrayData::from(value.offsets().clone()).into(), + ArrayData::from(value.values().clone()).into(), dtype, nulls(value.nulls(), nullable), ) .unwrap() - .into_array_data() + .into() } } -impl FromArrowArray<&GenericByteViewArray> for ArrayData { +impl FromArrowArray<&GenericByteViewArray> for Array { fn from_arrow(value: &GenericByteViewArray, nullable: bool) -> Self { let dtype = match T::DATA_TYPE { DataType::BinaryView => DType::Binary(nullable.into()), @@ -135,29 +132,29 @@ impl FromArrowArray<&GenericByteViewArray> for ArrayData { _ => panic!("Invalid data type for ByteViewArray"), }; VarBinViewArray::try_new( - value.views().inner().clone().into_array_data().into_array(), + ArrayData::from(value.views().inner().clone()).into(), value .data_buffers() .iter() - .map(|b| b.clone().into_array_data().into_array()) + .map(|b| ArrayData::from(b.clone()).into()) .collect::>(), dtype, nulls(value.nulls(), nullable), ) .unwrap() - .into_array_data() + .into() } } -impl FromArrowArray<&ArrowBooleanArray> for ArrayData { +impl FromArrowArray<&ArrowBooleanArray> for Array { fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { BoolArray::try_new(value.values().clone(), nulls(value.nulls(), nullable)) .unwrap() - .into_array_data() + .into() } } -impl FromArrowArray<&ArrowStructArray> for ArrayData { +impl FromArrowArray<&ArrowStructArray> for Array { fn from_arrow(value: &ArrowStructArray, nullable: bool) -> Self { // TODO(ngates): how should we deal with Arrow "logical nulls"? assert!(!nullable); @@ -172,20 +169,20 @@ impl FromArrowArray<&ArrowStructArray> for ArrayData { .columns() .iter() .zip(value.fields()) - .map(|(c, field)| Self::from_arrow(c.clone(), field.is_nullable()).into_array()) + .map(|(c, field)| Self::from_arrow(c.clone(), field.is_nullable())) .collect(), value.len(), nulls(value.nulls(), nullable), ) .unwrap() - .into_array_data() + .into() } } -impl FromArrowArray<&ArrowNullArray> for ArrayData { +impl FromArrowArray<&ArrowNullArray> for Array { fn from_arrow(value: &ArrowNullArray, nullable: bool) -> Self { assert!(nullable); - NullArray::new(value.len()).into_array_data() + NullArray::new(value.len()).into() } } @@ -206,7 +203,7 @@ fn nulls(nulls: Option<&NullBuffer>, nullable: bool) -> Validity { } } -impl FromArrowArray for ArrayData { +impl FromArrowArray for Array { fn from_arrow(array: ArrowArrayRef, nullable: bool) -> Self { match array.data_type() { DataType::Boolean => Self::from_arrow(array.as_boolean(), nullable), diff --git a/vortex-array/src/arrow/recordbatch.rs b/vortex-array/src/arrow/recordbatch.rs index d0b1c9b16a..cb151d0b01 100644 --- a/vortex-array/src/arrow/recordbatch.rs +++ b/vortex-array/src/arrow/recordbatch.rs @@ -4,28 +4,28 @@ use itertools::Itertools; use crate::array::StructArray; use crate::arrow::FromArrowArray; use crate::validity::Validity; -use crate::{ArrayData, IntoArray, IntoArrayData, ToArrayData}; +use crate::Array; -impl ToArrayData for RecordBatch { - fn to_array_data(&self) -> ArrayData { +impl From for Array { + fn from(value: RecordBatch) -> Self { StructArray::try_new( - self.schema() + value + .schema() .fields() .iter() .map(|f| f.name().as_str().into()) .collect_vec() .into(), - self.columns() + value + .columns() .iter() - .zip(self.schema().fields()) - .map(|(array, field)| { - ArrayData::from_arrow(array.clone(), field.is_nullable()).into_array() - }) + .zip(value.schema().fields()) + .map(|(array, field)| Array::from_arrow(array.clone(), field.is_nullable())) .collect(), - self.num_rows(), + value.num_rows(), Validity::AllValid, ) .unwrap() - .into_array_data() + .into() } } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 3e27d1b6ef..6a9f11bb7d 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -420,22 +420,20 @@ impl IntoCanonical for Array { } } -/// Implement the IntoArray for the [Canonical] type. -/// /// This conversion is always "free" and should not touch underlying data. All it does is create an /// owned pointer to the underlying concrete array type. /// /// This combined with the above [IntoCanonical] impl for [Array] allows simple two-way conversions /// between arbitrary Vortex encodings and canonical Arrow-compatible encodings. -impl IntoArray for Canonical { - fn into_array(self) -> Array { - match self { - Self::Null(a) => a.into_array(), - Self::Bool(a) => a.into_array(), - Self::Primitive(a) => a.into_array(), - Self::Struct(a) => a.into_array(), - Self::VarBin(a) => a.into_array(), - Self::Extension(a) => a.into_array(), +impl From for Array { + fn from(value: Canonical) -> Self { + match value { + Canonical::Null(a) => a.into(), + Canonical::Bool(a) => a.into(), + Canonical::Primitive(a) => a.into(), + Canonical::Struct(a) => a.into(), + Canonical::VarBin(a) => a.into(), + Canonical::Extension(a) => a.into(), } } } diff --git a/vortex-array/src/compute/compare.rs b/vortex-array/src/compute/compare.rs index 819bf1d316..550ddca309 100644 --- a/vortex-array/src/compute/compare.rs +++ b/vortex-array/src/compute/compare.rs @@ -5,7 +5,7 @@ use vortex_expr::Operator; use vortex_scalar::Scalar; use crate::arrow::FromArrowArray; -use crate::{Array, ArrayDType, ArrayData, IntoArray, IntoCanonical}; +use crate::{Array, ArrayDType, IntoCanonical}; pub trait CompareFn { fn compare(&self, array: &Array, operator: Operator) -> VortexResult; @@ -47,7 +47,7 @@ pub fn compare(left: &Array, right: &Array, operator: Operator) -> VortexResult< Operator::Lte => cmp::lt_eq(&lhs.as_ref(), &rhs.as_ref())?, }; - Ok(ArrayData::from_arrow(&array, true).into_array()) + Ok(Array::from_arrow(&array, true)) } pub fn scalar_cmp(lhs: &Scalar, rhs: &Scalar, operator: Operator) -> Scalar { diff --git a/vortex-array/src/compute/filter.rs b/vortex-array/src/compute/filter.rs index 59de8cd355..9603540784 100644 --- a/vortex-array/src/compute/filter.rs +++ b/vortex-array/src/compute/filter.rs @@ -3,7 +3,7 @@ use vortex_dtype::{DType, Nullability}; use vortex_error::VortexResult; use crate::arrow::FromArrowArray; -use crate::{Array, ArrayDType, ArrayData, IntoArray, IntoCanonical}; +use crate::{Array, ArrayDType, IntoCanonical}; pub trait FilterFn { /// Filter an array by the provided predicate. @@ -42,7 +42,7 @@ pub fn filter(array: &Array, predicate: &Array) -> VortexResult { let filtered = arrow_select::filter::filter(array_ref.as_ref(), predicate_ref.as_boolean())?; - Ok(ArrayData::from_arrow(filtered, array.dtype().is_nullable()).into_array()) + Ok(Array::from_arrow(filtered, array.dtype().is_nullable())) } }) } diff --git a/vortex-array/src/compute/take.rs b/vortex-array/src/compute/take.rs index ca8489da98..6f4136a021 100644 --- a/vortex-array/src/compute/take.rs +++ b/vortex-array/src/compute/take.rs @@ -1,7 +1,7 @@ use log::info; use vortex_error::{vortex_err, VortexResult}; -use crate::{Array, IntoArray, IntoCanonical}; +use crate::{Array, IntoCanonical}; pub trait TakeFn { fn take(&self, indices: &Array) -> VortexResult; @@ -15,7 +15,7 @@ pub fn take(array: &Array, indices: &Array) -> VortexResult { // Otherwise, flatten and try again. info!("TakeFn not implemented for {}, flattening", array); - array.clone().into_canonical()?.into_array().with_dyn(|a| { + Array::from(array.clone().into_canonical()?).with_dyn(|a| { a.take() .map(|t| t.take(indices)) .unwrap_or_else(|| Err(vortex_err!(NotImplemented: "take", array.encoding().id()))) diff --git a/vortex-array/src/data.rs b/vortex-array/src/data.rs index 269e482355..afa53411ca 100644 --- a/vortex-array/src/data.rs +++ b/vortex-array/src/data.rs @@ -7,7 +7,7 @@ use vortex_scalar::Scalar; use crate::encoding::EncodingRef; use crate::stats::{Stat, Statistics, StatsSet}; -use crate::{Array, ArrayDType, ArrayMetadata, IntoArray, ToArray}; +use crate::{Array, ArrayDType, ArrayMetadata, ToArray}; #[derive(Clone, Debug)] pub struct ArrayData { @@ -106,9 +106,18 @@ impl ToArray for ArrayData { } } -impl IntoArray for ArrayData { - fn into_array(self) -> Array { - Array::Data(self) +impl From for ArrayData { + fn from(value: Array) -> ArrayData { + match &value { + Array::Data(d) => d.clone(), + Array::View(_) => value.clone().into(), + } + } +} + +impl From for Array { + fn from(value: ArrayData) -> Array { + Array::Data(value) } } diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index 4cbaa086c7..aaf693b948 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -7,7 +7,7 @@ use crate::stats::{ArrayStatistics, Statistics}; use crate::visitor::ArrayVisitor; use crate::{ Array, ArrayDType, ArrayData, ArrayLen, ArrayMetadata, ArrayTrait, AsArray, GetArrayMetadata, - IntoArray, IntoArrayData, ToArrayData, TryDeserializeArrayMetadata, + IntoArray, TryDeserializeArrayMetadata, }; /// Trait the defines the set of types relating to an array. @@ -108,6 +108,12 @@ macro_rules! impl_encoding { $crate::TypedArray::<$Name>::try_from(array).map(Self::from) } } + impl From<[<$Name Array>]> for $crate::Array { + fn from(value: [<$Name Array>]) -> $crate::Array { + use $crate::IntoArray; + value.typed.into_array() + } + } /// The array encoding #[derive(std::fmt::Debug)] @@ -198,11 +204,14 @@ impl ArrayStatistics for T { } } -impl IntoArrayData for T { - fn into_array_data(self) -> ArrayData { +impl From for ArrayData +where + D: IntoArray + ArrayEncodingRef + ArrayStatistics + GetArrayMetadata, +{ + fn from(value: D) -> Self { // TODO: move metadata call `Array::View` match - let metadata = self.metadata(); - let array = self.into_array(); + let metadata = value.metadata(); + let array = value.into_array(); match array { Array::Data(d) => d, Array::View(ref view) => { @@ -245,9 +254,3 @@ impl IntoA } } } - -impl ToArrayData for T { - fn to_array_data(&self) -> ArrayData { - self.clone().into_array_data() - } -} diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 5a6adb6195..1158745a1d 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -213,14 +213,6 @@ pub trait IntoArray { fn into_array(self) -> Array; } -pub trait ToArrayData { - fn to_array_data(&self) -> ArrayData; -} - -pub trait IntoArrayData { - fn into_array_data(self) -> ArrayData; -} - pub trait AsArray { fn as_array_ref(&self) -> &Array; } @@ -237,7 +229,6 @@ pub trait ArrayTrait: + AcceptArrayVisitor + ArrayStatistics + ArrayStatisticsCompute - + ToArrayData { fn nbytes(&self) -> usize { let mut visitor = NBytesVisitor(0); @@ -324,12 +315,3 @@ impl Display for Array { ) } } - -impl IntoArrayData for Array { - fn into_array_data(self) -> ArrayData { - match self { - Self::Data(d) => d, - Self::View(_) => self.with_dyn(|a| a.to_array_data()), - } - } -} diff --git a/vortex-array/src/tree.rs b/vortex-array/src/tree.rs index 5fdf087a55..ae1f5f0df5 100644 --- a/vortex-array/src/tree.rs +++ b/vortex-array/src/tree.rs @@ -7,7 +7,7 @@ use vortex_error::{VortexError, VortexResult}; use crate::array::ChunkedArray; use crate::visitor::ArrayVisitor; -use crate::{Array, ToArrayData}; +use crate::{Array, ArrayData}; impl Array { pub fn tree_display(&self) -> TreeDisplayWrapper { @@ -55,12 +55,13 @@ impl<'a, 'b: 'a> ArrayVisitor for TreeFormatter<'a, 'b> { 100f64 * nbytes as f64 / total_size as f64 )?; self.indent(|i| { + let array_data = ArrayData::from(array.clone()); writeln!( i.fmt, // TODO(ngates): use Display for metadata "{}metadata: {:?}", i.indent, - array.to_array_data().metadata() + array_data.metadata() ) })?; diff --git a/vortex-datafusion/src/plans.rs b/vortex-datafusion/src/plans.rs index 333fe48dd2..d9380d0912 100644 --- a/vortex-datafusion/src/plans.rs +++ b/vortex-datafusion/src/plans.rs @@ -23,7 +23,7 @@ use pin_project::pin_project; use vortex::array::ChunkedArray; use vortex::arrow::FromArrowArray; use vortex::compute::take; -use vortex::{ArrayDType, ArrayData, IntoArray, IntoArrayVariant, IntoCanonical}; +use vortex::{Array, ArrayDType, IntoArray, IntoArrayVariant, IntoCanonical}; use crate::datatype::infer_schema; use crate::eval::ExpressionEvaluator; @@ -346,8 +346,7 @@ where ); let row_indices = - ArrayData::from_arrow(record_batch.column(0).as_primitive::(), false) - .into_array(); + Array::from_arrow(record_batch.column(0).as_primitive::(), false); // If no columns in the output projection, we send back a RecordBatch with empty schema. // This is common for COUNT queries. diff --git a/vortex-sampling-compressor/src/lib.rs b/vortex-sampling-compressor/src/lib.rs index 229edb7332..ef6f56f82c 100644 --- a/vortex-sampling-compressor/src/lib.rs +++ b/vortex-sampling-compressor/src/lib.rs @@ -298,7 +298,7 @@ fn sampled_compression<'a>( array.dtype().clone(), )? .into_canonical()? - .into_array(); + .into(); find_best_compression(candidates, &sample, compressor)? .into_path() diff --git a/vortex-sampling-compressor/tests/smoketest.rs b/vortex-sampling-compressor/tests/smoketest.rs index 443ee0f879..94e7c1a01f 100644 --- a/vortex-sampling-compressor/tests/smoketest.rs +++ b/vortex-sampling-compressor/tests/smoketest.rs @@ -5,7 +5,7 @@ use chrono::TimeDelta; use vortex::array::builder::VarBinBuilder; use vortex::array::{BoolArray, PrimitiveArray, StructArray, TemporalArray, TimeUnit}; use vortex::validity::Validity; -use vortex::{Array, ArrayDType, IntoArray, IntoArrayData}; +use vortex::{Array, ArrayDType, IntoArray}; use vortex_dtype::{DType, FieldName, FieldNames, Nullability}; use vortex_sampling_compressor::compressors::alp::ALPCompressor; use vortex_sampling_compressor::compressors::bitpacked::BitPackedCompressor; @@ -118,7 +118,9 @@ fn make_timestamp_column(count: usize) -> Array { let storage_array = PrimitiveArray::from_vec(timestamps, Validity::NonNullable).into_array(); - TemporalArray::new_timestamp(storage_array, TimeUnit::Ms, None) - .into_array_data() - .into_array() + Array::from(TemporalArray::new_timestamp( + storage_array, + TimeUnit::Ms, + None, + )) }