From bcfc2e77e6a3250e93467edf3321d77d0131191c Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Wed, 20 Mar 2024 13:21:23 +0000 Subject: [PATCH 1/2] Split vortex-schema from main crate --- Cargo.lock | 91 +++++----- Cargo.toml | 3 +- bench-vortex/Cargo.toml | 3 +- bench-vortex/src/lib.rs | 13 +- pyvortex/Cargo.toml | 3 +- pyvortex/src/dtype.rs | 8 +- pyvortex/src/encode.rs | 19 +- pyvortex/src/lib.rs | 2 +- vortex-alp/Cargo.toml | 1 + vortex-alp/src/array.rs | 2 +- vortex-alp/src/serde.rs | 2 +- vortex-array/Cargo.toml | 1 + vortex-array/src/array/bool/mod.rs | 3 +- vortex-array/src/array/chunked/mod.rs | 4 +- vortex-array/src/array/chunked/serde.rs | 2 +- vortex-array/src/array/composite/array.rs | 6 +- vortex-array/src/array/composite/mod.rs | 16 +- vortex-array/src/array/composite/serde.rs | 6 +- vortex-array/src/array/composite/typed.rs | 5 +- vortex-array/src/array/constant/mod.rs | 2 +- vortex-array/src/array/constant/stats.rs | 2 +- vortex-array/src/array/mod.rs | 2 +- .../src/array/primitive/compute/cast.rs | 2 +- vortex-array/src/array/primitive/mod.rs | 4 +- vortex-array/src/array/sparse/mod.rs | 2 +- vortex-array/src/array/sparse/serde.rs | 2 +- vortex-array/src/array/struct_/mod.rs | 2 +- vortex-array/src/array/struct_/serde.rs | 2 +- vortex-array/src/array/varbin/compute.rs | 2 +- vortex-array/src/array/varbin/mod.rs | 4 +- vortex-array/src/array/varbin/serde.rs | 3 +- vortex-array/src/array/varbin/stats.rs | 4 +- vortex-array/src/array/varbinview/compute.rs | 2 +- vortex-array/src/array/varbinview/mod.rs | 2 +- vortex-array/src/array/varbinview/serde.rs | 2 +- vortex-array/src/arrow/dtypes.rs | 68 +++---- vortex-array/src/arrow/mod.rs | 4 + vortex-array/src/compute/cast.rs | 2 +- vortex-array/src/datetime/localdatetime.rs | 4 +- vortex-array/src/encode.rs | 167 +++++++++++------- vortex-array/src/error.rs | 44 +---- vortex-array/src/lib.rs | 1 - vortex-array/src/ptype.rs | 50 +++++- vortex-array/src/scalar/binary.rs | 2 +- vortex-array/src/scalar/bool.rs | 2 +- vortex-array/src/scalar/composite.rs | 2 +- vortex-array/src/scalar/list.rs | 2 +- vortex-array/src/scalar/mod.rs | 2 +- vortex-array/src/scalar/null.rs | 2 +- vortex-array/src/scalar/primitive.rs | 4 +- vortex-array/src/scalar/serde.rs | 2 +- vortex-array/src/scalar/struct_.rs | 3 +- vortex-array/src/scalar/utf8.rs | 2 +- vortex-array/src/serde/mod.rs | 20 ++- vortex-array/src/stats.rs | 2 +- vortex-dict/Cargo.toml | 1 + vortex-dict/src/compress.rs | 2 +- vortex-dict/src/dict.rs | 2 +- vortex-fastlanes/Cargo.toml | 7 +- vortex-fastlanes/src/bitpacking/mod.rs | 2 +- vortex-fastlanes/src/for/mod.rs | 2 +- vortex-ree/Cargo.toml | 1 + vortex-ree/src/ree.rs | 4 +- vortex-roaring/Cargo.toml | 1 + vortex-roaring/src/boolean/compress.rs | 4 +- vortex-roaring/src/boolean/mod.rs | 4 +- vortex-roaring/src/integer/compress.rs | 6 +- vortex-roaring/src/integer/mod.rs | 2 +- vortex-schema/Cargo.toml | 26 +++ {vortex-array => vortex-schema}/src/dtype.rs | 45 +---- vortex-schema/src/error.rs | 71 ++++++++ vortex-schema/src/lib.rs | 20 +++ .../dtype.rs => vortex-schema/src/serde.rs | 120 +++++++------ vortex-zigzag/Cargo.toml | 3 +- vortex-zigzag/src/serde.rs | 2 +- vortex-zigzag/src/zigzag.rs | 2 +- 76 files changed, 552 insertions(+), 389 deletions(-) create mode 100644 vortex-schema/Cargo.toml rename {vortex-array => vortex-schema}/src/dtype.rs (77%) create mode 100644 vortex-schema/src/error.rs create mode 100644 vortex-schema/src/lib.rs rename vortex-array/src/serde/dtype.rs => vortex-schema/src/serde.rs (73%) diff --git a/Cargo.lock b/Cargo.lock index 1e9c15356c..2b947a186c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,9 +33,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -283,7 +283,7 @@ version = "50.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", ] [[package]] @@ -360,6 +360,7 @@ dependencies = [ "vortex-fastlanes", "vortex-ree", "vortex-roaring", + "vortex-schema", "vortex-zigzag", ] @@ -369,7 +370,7 @@ version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cexpr", "clang-sys", "itertools 0.12.1", @@ -394,15 +395,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "brotli" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -421,15 +422,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.3" +version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytemuck" -version = "1.14.3" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" [[package]] name = "byteorder" @@ -526,9 +527,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.2" +version = "4.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" +checksum = "949626d00e063efc93b6dca932419ceb5432f99769911c0b995f7e884c778813" dependencies = [ "clap_builder", ] @@ -920,9 +921,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" +checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb" dependencies = [ "bytes", "fnv", @@ -1485,16 +1486,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "num_enum" version = "0.7.2" @@ -1552,7 +1543,7 @@ version = "0.10.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "cfg-if", "foreign-types", "libc", @@ -1757,9 +1748,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -1854,6 +1845,7 @@ dependencies = [ "vortex-fastlanes", "vortex-ree", "vortex-roaring", + "vortex-schema", "vortex-zigzag", ] @@ -1962,9 +1954,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "reqwest" -version = "0.11.24" +version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ "base64", "bytes", @@ -2033,11 +2025,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.31" +version = "0.38.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" dependencies = [ - "bitflags 2.4.2", + "bitflags 2.5.0", "errno", "libc", "linux-raw-sys", @@ -2218,9 +2210,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "2.0.52" +version = "2.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" dependencies = [ "proc-macro2", "quote", @@ -2293,18 +2285,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.57" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.57" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", @@ -2399,7 +2391,6 @@ dependencies = [ "bytes", "libc", "mio", - "num_cpus", "pin-project-lite", "socket2", "windows-sys 0.48.0", @@ -2560,6 +2551,7 @@ dependencies = [ "log", "num-traits", "vortex-array", + "vortex-schema", ] [[package]] @@ -2585,6 +2577,7 @@ dependencies = [ "roaring", "thiserror", "vortex-alloc", + "vortex-schema", ] [[package]] @@ -2601,6 +2594,7 @@ dependencies = [ "rand", "simplelog", "vortex-array", + "vortex-schema", ] [[package]] @@ -2613,6 +2607,7 @@ dependencies = [ "linkme", "num-traits", "vortex-array", + "vortex-schema", ] [[package]] @@ -2626,6 +2621,7 @@ dependencies = [ "linkme", "num-traits", "vortex-array", + "vortex-schema", ] [[package]] @@ -2637,6 +2633,18 @@ dependencies = [ "log", "num-traits", "vortex-array", + "vortex-schema", +] + +[[package]] +name = "vortex-schema" +version = "0.1.0" +dependencies = [ + "arrow-schema", + "itertools 0.12.1", + "leb128", + "num_enum", + "thiserror", ] [[package]] @@ -2646,6 +2654,7 @@ dependencies = [ "linkme", "vortex-alloc", "vortex-array", + "vortex-schema", "zigzag", ] diff --git a/Cargo.toml b/Cargo.toml index 197bb22fe4..11589a5cac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,13 +3,14 @@ members = [ "bench-vortex", "fastlanez-sys", "pyvortex", - "vortex-array", "vortex-alloc", "vortex-alp", + "vortex-array", "vortex-dict", "vortex-fastlanes", "vortex-ree", "vortex-roaring", + "vortex-schema", "vortex-zigzag", ] resolver = "2" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 8bdc8ed232..02d3dca22d 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -16,12 +16,13 @@ workspace = true [dependencies] arrow-array = "50.0.0" -vortex-array = { path = "../vortex-array" } vortex-alp = { path = "../vortex-alp" } +vortex-array = { path = "../vortex-array" } vortex-dict = { path = "../vortex-dict" } vortex-fastlanes = { path = "../vortex-fastlanes" } vortex-ree = { path = "../vortex-ree" } vortex-roaring = { path = "../vortex-roaring" } +vortex-schema = { path = "../vortex-schema" } vortex-zigzag = { path = "../vortex-zigzag" } itertools = "0.12.1" reqwest = { version = "0.11.24", features = ["blocking"] } diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 06f9bf137a..ae35cc0972 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -19,14 +19,15 @@ use vortex::array::struct_::StructEncoding; use vortex::array::varbin::VarBinEncoding; use vortex::array::varbinview::VarBinViewEncoding; use vortex::array::{Array, ArrayRef, Encoding}; +use vortex::arrow::FromArrowType; use vortex::compress::{CompressConfig, CompressCtx}; -use vortex::dtype::DType; use vortex::formatter::display_tree; use vortex_alp::ALPEncoding; use vortex_dict::DictEncoding; use vortex_fastlanes::{BitPackedEncoding, FoREncoding}; use vortex_ree::REEEncoding; use vortex_roaring::RoaringBoolEncoding; +use vortex_schema::DType; pub fn enumerate_arrays() -> Vec<&'static dyn Encoding> { vec![ @@ -116,7 +117,7 @@ pub fn compress_taxi_data() -> ArrayRef { }) .collect_vec(); - let dtype: DType = schema.clone().try_into().unwrap(); + let dtype = DType::from_arrow_type(schema.clone()); let compressed = ChunkedArray::new(chunks.clone(), dtype).boxed(); info!("Compressed array {}", display_tree(compressed.as_ref())); @@ -151,7 +152,7 @@ mod test { use std::sync::Arc; use vortex::array::ArrayRef; use vortex::compute::as_arrow::as_arrow; - use vortex::encode::FromArrow; + use vortex::encode::FromArrowArray; use vortex::serde::{ReadCtx, WriteCtx}; use crate::{compress_ctx, compress_taxi_data, download_taxi_data}; @@ -184,7 +185,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false); + let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false); let mut buf = Vec::::new(); let mut write_ctx = WriteCtx::new(&mut buf); @@ -206,7 +207,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false); + let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false); let vortex_as_arrow = as_arrow(vortex_array.as_ref()).unwrap(); assert_eq!(vortex_as_arrow.deref(), arrow_array.deref()); } @@ -225,7 +226,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false); + let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false); let compressed = ctx.clone().compress(vortex_array.as_ref(), None).unwrap(); let compressed_as_arrow = as_arrow(compressed.as_ref()).unwrap(); diff --git a/pyvortex/Cargo.toml b/pyvortex/Cargo.toml index b0ae53c42a..5f8dc1d3bb 100644 --- a/pyvortex/Cargo.toml +++ b/pyvortex/Cargo.toml @@ -20,12 +20,13 @@ crate-type = ["rlib", "cdylib"] [dependencies] arrow = { version = "50.0.0", features = ["pyarrow"] } -vortex-array = { path = "../vortex-array" } vortex-alp = { path = "../vortex-alp" } +vortex-array = { path = "../vortex-array" } vortex-dict = { path = "../vortex-dict" } vortex-fastlanes = { path = "../vortex-fastlanes" } vortex-ree = { path = "../vortex-ree" } vortex-roaring = { path = "../vortex-roaring" } +vortex-schema = { path = "../vortex-schema" } vortex-zigzag = { path = "../vortex-zigzag" } itertools = "0.12.1" log = "0.4.20" diff --git a/pyvortex/src/dtype.rs b/pyvortex/src/dtype.rs index bbe3b4a377..271baac352 100644 --- a/pyvortex/src/dtype.rs +++ b/pyvortex/src/dtype.rs @@ -2,8 +2,9 @@ use arrow::datatypes::{DataType, Field}; use arrow::pyarrow::FromPyArrow; use pyo3::types::PyType; use pyo3::{pyclass, pymethods, Py, PyAny, PyResult, Python}; +use vortex::arrow::FromArrowType; -use vortex::dtype::DType; +use vortex_schema::DType; #[pyclass(name = "DType", module = "vortex", subclass)] pub struct PyDType { @@ -32,7 +33,10 @@ impl PyDType { #[pyo3(from_py_with = "import_arrow_dtype")] arrow_dtype: DataType, nullable: bool, ) -> PyResult> { - PyDType::wrap(cls.py(), (&Field::new("_", arrow_dtype, nullable)).into()) + PyDType::wrap( + cls.py(), + DType::from_arrow_type(&Field::new("_", arrow_dtype, nullable)), + ) } } diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index 53d8168b26..1c272d59c5 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -6,14 +6,13 @@ use arrow::record_batch::RecordBatchReader; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; -use vortex::array::chunked::ChunkedArray; -use vortex::array::{Array, ArrayRef}; -use vortex::dtype::DType; -use vortex::encode::FromArrow; - use crate::array::PyArray; -use crate::error::PyVortexError; use crate::vortex_arrow::map_arrow_err; +use vortex::array::chunked::ChunkedArray; +use vortex::array::{Array, ArrayRef}; +use vortex::arrow::FromArrowType; +use vortex::encode::FromArrowArray; +use vortex_schema::DType; /// The main entry point for creating enc arrays from other Python objects. /// @@ -26,7 +25,7 @@ pub fn encode(obj: &PyAny) -> PyResult> { if obj.is_instance(pa_array)? { let arrow_array = ArrayData::from_pyarrow(obj).map(make_array)?; - let enc_array = ArrayRef::from_arrow(arrow_array, false); + let enc_array = ArrayRef::from_arrow_array(arrow_array, false); PyArray::wrap(obj.py(), enc_array) } else if obj.is_instance(chunked_array)? { let chunks: Vec<&PyAny> = obj.getattr("chunks")?.extract()?; @@ -35,17 +34,17 @@ pub fn encode(obj: &PyAny) -> PyResult> { .map(|a| { ArrayData::from_pyarrow(a) .map(make_array) - .map(|a| ArrayRef::from_arrow(a, false)) + .map(|a| ArrayRef::from_arrow_array(a, false)) }) .collect::>>()?; let dtype: DType = obj .getattr("type") .and_then(DataType::from_pyarrow) - .map(|dt| (&Field::new("_", dt, false)).into())?; + .map(|dt| DType::from_arrow_type(&Field::new("_", dt, false)))?; PyArray::wrap(obj.py(), ChunkedArray::new(encoded_chunks, dtype).boxed()) } else if obj.is_instance(table)? { let array_stream = ArrowArrayStreamReader::from_pyarrow(obj)?; - let dtype = DType::try_from(array_stream.schema()).map_err(PyVortexError::map_err)?; + let dtype = DType::from_arrow_type(array_stream.schema()); let chunks = array_stream .into_iter() .map(|b| b.map(ArrayRef::from).map_err(map_arrow_err)) diff --git a/pyvortex/src/lib.rs b/pyvortex/src/lib.rs index 3c1a314ce7..c455c51273 100644 --- a/pyvortex/src/lib.rs +++ b/pyvortex/src/lib.rs @@ -2,7 +2,7 @@ use log::debug; use pyo3::prelude::*; use dtype::PyDType; -use vortex::dtype::DType; +use vortex_schema::DType; use crate::array::*; use crate::compress::PyCompressConfig; diff --git a/vortex-alp/Cargo.toml b/vortex-alp/Cargo.toml index 64911557b3..39137563c9 100644 --- a/vortex-alp/Cargo.toml +++ b/vortex-alp/Cargo.toml @@ -16,6 +16,7 @@ workspace = true [dependencies] vortex-array = { path = "../vortex-array" } +vortex-schema = { path = "../vortex-schema" } linkme = "0.3.22" itertools = "0.12.1" num-traits = "0.2.18" diff --git a/vortex-alp/src/array.rs b/vortex-alp/src/array.rs index e80703a7e9..7d43eea610 100644 --- a/vortex-alp/src/array.rs +++ b/vortex-alp/src/array.rs @@ -4,11 +4,11 @@ use std::sync::{Arc, RwLock}; use crate::alp::Exponents; use vortex::array::{Array, ArrayKind, ArrayRef, Encoding, EncodingId, EncodingRef}; use vortex::compress::EncodingCompression; -use vortex::dtype::{DType, IntWidth, Signedness}; use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stats, StatsSet}; +use vortex_schema::{DType, IntWidth, Signedness}; use crate::compress::alp_encode; diff --git a/vortex-alp/src/serde.rs b/vortex-alp/src/serde.rs index 5d2836bb44..e416aeb721 100644 --- a/vortex-alp/src/serde.rs +++ b/vortex-alp/src/serde.rs @@ -1,8 +1,8 @@ use crate::alp::Exponents; use vortex::array::{Array, ArrayRef}; -use vortex::dtype::{DType, FloatWidth, Signedness}; use vortex::error::{VortexError, VortexResult}; use vortex::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx}; +use vortex_schema::{DType, FloatWidth, Signedness}; use crate::ALPArray; use crate::ALPEncoding; diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 34258b0b0e..08a6d4707b 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -20,6 +20,7 @@ workspace = true [dependencies] allocator-api2 = "0.2.16" +vortex-schema = { path = "../vortex-schema" } arrow-array = { version = "50.0.0" } arrow-buffer = { version = "50.0.0" } arrow-schema = { version = "50.0.0" } diff --git a/vortex-array/src/array/bool/mod.rs b/vortex-array/src/array/bool/mod.rs index 43c33a72ac..4560804529 100644 --- a/vortex-array/src/array/bool/mod.rs +++ b/vortex-array/src/array/bool/mod.rs @@ -4,8 +4,9 @@ use std::sync::{Arc, RwLock}; use arrow_buffer::buffer::BooleanBuffer; use linkme::distributed_slice; +use vortex_schema::{DType, Nullability}; + use crate::compute::scalar_at::scalar_at; -use crate::dtype::{DType, Nullability}; use crate::error::VortexResult; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::serde::{ArraySerde, EncodingSerde}; diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index 4d40a1aa7b..2b1bc5b1ad 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -3,11 +3,11 @@ use std::sync::{Arc, RwLock}; use itertools::Itertools; use linkme::distributed_slice; +use vortex_schema::DType; use crate::array::{ check_slice_bounds, Array, ArrayRef, Encoding, EncodingId, EncodingRef, ENCODINGS, }; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::serde::{ArraySerde, EncodingSerde}; @@ -214,10 +214,10 @@ impl Encoding for ChunkedEncoding { #[cfg(test)] mod test { use crate::array::{Array, ArrayRef}; + use vortex_schema::{DType, IntWidth, Nullability, Signedness}; use crate::array::chunked::ChunkedArray; use crate::compute::flatten::{flatten, flatten_primitive, FlattenedArray}; - use crate::dtype::{DType, IntWidth, Nullability, Signedness}; use crate::ptype::NativePType; fn chunked_array() -> ChunkedArray { diff --git a/vortex-array/src/array/chunked/serde.rs b/vortex-array/src/array/chunked/serde.rs index ad1bad2ccd..4a186dfeb3 100644 --- a/vortex-array/src/array/chunked/serde.rs +++ b/vortex-array/src/array/chunked/serde.rs @@ -31,8 +31,8 @@ mod test { use crate::array::downcast::DowncastArrayBuiltin; use crate::array::primitive::PrimitiveArray; use crate::array::Array; - use crate::dtype::{DType, IntWidth, Nullability, Signedness}; use crate::serde::test::roundtrip_array; + use vortex_schema::{DType, IntWidth, Nullability, Signedness}; #[test] fn roundtrip() { diff --git a/vortex-array/src/array/composite/array.rs b/vortex-array/src/array/composite/array.rs index 0ab03c624b..7531c42fc7 100644 --- a/vortex-array/src/array/composite/array.rs +++ b/vortex-array/src/array/composite/array.rs @@ -3,14 +3,12 @@ use std::fmt::{Debug, Display}; use std::sync::{Arc, RwLock}; use linkme::distributed_slice; +use vortex_schema::{CompositeID, DType}; -use crate::array::composite::{ - find_extension, CompositeExtensionRef, CompositeID, TypedCompositeArray, -}; +use crate::array::composite::{find_extension, CompositeExtensionRef, TypedCompositeArray}; use crate::array::{Array, ArrayRef, Encoding, EncodingId, EncodingRef, ENCODINGS}; use crate::compress::EncodingCompression; use crate::compute::ArrayCompute; -use crate::dtype::DType; use crate::error::VortexResult; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::serde::{ArraySerde, BytesSerde, EncodingSerde}; diff --git a/vortex-array/src/array/composite/mod.rs b/vortex-array/src/array/composite/mod.rs index 02c2c0369b..030303decb 100644 --- a/vortex-array/src/array/composite/mod.rs +++ b/vortex-array/src/array/composite/mod.rs @@ -1,9 +1,8 @@ -use std::fmt::{Display, Formatter}; - use linkme::distributed_slice; pub use array::*; pub use typed::*; +use vortex_schema::CompositeID; mod array; mod compress; @@ -11,15 +10,6 @@ mod compute; mod serde; mod typed; -#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] -pub struct CompositeID(pub &'static str); - -impl Display for CompositeID { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - #[distributed_slice] pub static COMPOSITE_EXTENSIONS: [&'static dyn CompositeExtension] = [..]; @@ -29,3 +19,7 @@ pub fn find_extension(id: &str) -> Option<&'static dyn CompositeExtension> { .find(|ext| ext.id().0 == id) .copied() } + +pub fn find_extension_id(id: &str) -> Option { + find_extension(id).map(|e| e.id()) +} diff --git a/vortex-array/src/array/composite/serde.rs b/vortex-array/src/array/composite/serde.rs index 03a3ed86dd..72e62f599a 100644 --- a/vortex-array/src/array/composite/serde.rs +++ b/vortex-array/src/array/composite/serde.rs @@ -1,9 +1,11 @@ +use std::sync::Arc; + +use vortex_schema::DType; + use crate::array::composite::{CompositeArray, CompositeEncoding}; use crate::array::{Array, ArrayRef}; -use crate::dtype::DType; use crate::error::VortexResult; use crate::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx}; -use std::sync::Arc; impl ArraySerde for CompositeArray { fn write(&self, ctx: &mut WriteCtx) -> VortexResult<()> { diff --git a/vortex-array/src/array/composite/typed.rs b/vortex-array/src/array/composite/typed.rs index 084c5c064a..a35ad58d1f 100644 --- a/vortex-array/src/array/composite/typed.rs +++ b/vortex-array/src/array/composite/typed.rs @@ -2,7 +2,7 @@ use std::fmt::Debug; use std::sync::Arc; use crate::array::composite::array::CompositeArray; -use crate::array::composite::{CompositeID, CompositeMetadata}; +use crate::array::composite::CompositeMetadata; use crate::array::{Array, ArrayRef}; use crate::compute::ArrayCompute; @@ -53,9 +53,9 @@ macro_rules! composite_impl { CompositeArray, CompositeExtension, CompositeMetadata, COMPOSITE_EXTENSIONS, }; use crate::compute::ArrayCompute; - use crate::dtype::{DType, Nullability}; use linkme::distributed_slice; use paste::paste; + use vortex_schema::{DType, Nullability}; paste! { #[derive(Debug)] @@ -95,3 +95,4 @@ macro_rules! composite_impl { } pub(crate) use composite_impl; +use vortex_schema::CompositeID; diff --git a/vortex-array/src/array/constant/mod.rs b/vortex-array/src/array/constant/mod.rs index d3065f5531..3373a2052b 100644 --- a/vortex-array/src/array/constant/mod.rs +++ b/vortex-array/src/array/constant/mod.rs @@ -2,11 +2,11 @@ use std::any::Any; use std::sync::{Arc, RwLock}; use linkme::distributed_slice; +use vortex_schema::DType; use crate::array::{ check_slice_bounds, Array, ArrayRef, Encoding, EncodingId, EncodingRef, ENCODINGS, }; -use crate::dtype::DType; use crate::error::VortexResult; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::scalar::Scalar; diff --git a/vortex-array/src/array/constant/stats.rs b/vortex-array/src/array/constant/stats.rs index a4130ff801..0da0a976d7 100644 --- a/vortex-array/src/array/constant/stats.rs +++ b/vortex-array/src/array/constant/stats.rs @@ -1,9 +1,9 @@ use crate::array::constant::ConstantArray; use crate::array::Array; -use crate::dtype::DType; use crate::error::VortexResult; use crate::scalar::Scalar; use crate::stats::{Stat, StatsCompute, StatsSet}; +use vortex_schema::DType; impl StatsCompute for ConstantArray { fn compute(&self, _stat: &Stat) -> VortexResult { diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 83892653c0..f5574d4c0e 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -2,6 +2,7 @@ use std::any::Any; use std::fmt::{Debug, Display, Formatter}; use linkme::distributed_slice; +use vortex_schema::{DType, Nullability}; use crate::array::bool::{BoolArray, BoolEncoding}; use crate::array::chunked::{ChunkedArray, ChunkedEncoding}; @@ -15,7 +16,6 @@ use crate::array::varbin::{VarBinArray, VarBinEncoding}; use crate::array::varbinview::{VarBinViewArray, VarBinViewEncoding}; use crate::compress::EncodingCompression; use crate::compute::ArrayCompute; -use crate::dtype::{DType, Nullability}; use crate::error::{VortexError, VortexResult}; use crate::formatter::ArrayDisplay; use crate::serde::{ArraySerde, EncodingSerde}; diff --git a/vortex-array/src/array/primitive/compute/cast.rs b/vortex-array/src/array/primitive/compute/cast.rs index be7ca6c147..efd5c5ffbc 100644 --- a/vortex-array/src/array/primitive/compute/cast.rs +++ b/vortex-array/src/array/primitive/compute/cast.rs @@ -2,10 +2,10 @@ use crate::array::primitive::PrimitiveArray; use crate::array::CloneOptionalArray; use crate::array::{Array, ArrayRef}; use crate::compute::cast::CastFn; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::match_each_native_ptype; use crate::ptype::{NativePType, PType}; +use vortex_schema::DType; impl CastFn for PrimitiveArray { fn cast(&self, dtype: &DType) -> VortexResult { diff --git a/vortex-array/src/array/primitive/mod.rs b/vortex-array/src/array/primitive/mod.rs index 166def627f..2f9ebfc2ef 100644 --- a/vortex-array/src/array/primitive/mod.rs +++ b/vortex-array/src/array/primitive/mod.rs @@ -10,6 +10,7 @@ use crate::accessor::ArrayAccessor; use allocator_api2::alloc::Allocator; use arrow_buffer::buffer::{Buffer, ScalarBuffer}; use linkme::distributed_slice; +use vortex_schema::DType; use crate::array::bool::BoolArray; use crate::array::{ @@ -17,7 +18,6 @@ use crate::array::{ ENCODINGS, }; use crate::compute::scalar_at::scalar_at; -use crate::dtype::DType; use crate::error::VortexResult; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::iterator::ArrayIter; @@ -310,7 +310,7 @@ impl ArrayDisplay for PrimitiveArray { #[cfg(test)] mod test { - use crate::dtype::{IntWidth, Nullability, Signedness}; + use vortex_schema::{IntWidth, Nullability, Signedness}; use super::*; diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index 9410a4abce..83129d3c56 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -3,6 +3,7 @@ use std::sync::{Arc, RwLock}; use itertools::Itertools; use linkme::distributed_slice; +use vortex_schema::DType; use crate::array::ENCODINGS; use crate::array::{check_slice_bounds, Array, ArrayRef, Encoding, EncodingId, EncodingRef}; @@ -10,7 +11,6 @@ use crate::compress::EncodingCompression; use crate::compute::cast::cast; use crate::compute::flatten::flatten_primitive; use crate::compute::search_sorted::{search_sorted, SearchSortedSide}; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::ptype::PType; diff --git a/vortex-array/src/array/sparse/serde.rs b/vortex-array/src/array/sparse/serde.rs index 7275d25d81..3a0ccd861b 100644 --- a/vortex-array/src/array/sparse/serde.rs +++ b/vortex-array/src/array/sparse/serde.rs @@ -1,9 +1,9 @@ use std::io; use std::io::ErrorKind; +use vortex_schema::DType; use crate::array::sparse::{SparseArray, SparseEncoding}; use crate::array::{Array, ArrayRef}; -use crate::dtype::DType; use crate::error::VortexResult; use crate::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx}; diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index a68bea1e51..4b0e168025 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -3,9 +3,9 @@ use std::sync::{Arc, RwLock}; use itertools::Itertools; use linkme::distributed_slice; +use vortex_schema::{DType, FieldNames}; use crate::compress::EncodingCompression; -use crate::dtype::{DType, FieldNames}; use crate::error::VortexResult; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::serde::{ArraySerde, EncodingSerde}; diff --git a/vortex-array/src/array/struct_/serde.rs b/vortex-array/src/array/struct_/serde.rs index 79737d5264..a3991cad7b 100644 --- a/vortex-array/src/array/struct_/serde.rs +++ b/vortex-array/src/array/struct_/serde.rs @@ -1,8 +1,8 @@ use crate::array::struct_::{StructArray, StructEncoding}; use crate::array::{Array, ArrayRef}; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx}; +use vortex_schema::DType; impl ArraySerde for StructArray { fn write(&self, ctx: &mut WriteCtx) -> VortexResult<()> { diff --git a/vortex-array/src/array/varbin/compute.rs b/vortex-array/src/array/varbin/compute.rs index b00bbf8542..cdd621b05b 100644 --- a/vortex-array/src/array/varbin/compute.rs +++ b/vortex-array/src/array/varbin/compute.rs @@ -4,6 +4,7 @@ use arrow_array::{ ArrayRef as ArrowArrayRef, BinaryArray, LargeBinaryArray, LargeStringArray, StringArray, }; use itertools::Itertools; +use vortex_schema::DType; use crate::array::bool::BoolArray; use crate::array::downcast::DowncastArrayBuiltin; @@ -17,7 +18,6 @@ use crate::compute::cast::cast; use crate::compute::flatten::{flatten, flatten_primitive, FlattenFn, FlattenedArray}; use crate::compute::scalar_at::ScalarAtFn; use crate::compute::ArrayCompute; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::ptype::PType; use crate::scalar::{BinaryScalar, Scalar, Utf8Scalar}; diff --git a/vortex-array/src/array/varbin/mod.rs b/vortex-array/src/array/varbin/mod.rs index 35d543eb3e..7d174e60c5 100644 --- a/vortex-array/src/array/varbin/mod.rs +++ b/vortex-array/src/array/varbin/mod.rs @@ -3,6 +3,7 @@ use std::sync::{Arc, RwLock}; use linkme::distributed_slice; use num_traits::{FromPrimitive, Unsigned}; +use vortex_schema::{DType, IntWidth, Nullability, Signedness}; use crate::array::bool::BoolArray; use crate::array::downcast::DowncastArrayBuiltin; @@ -15,7 +16,6 @@ use crate::array::{ use crate::compress::EncodingCompression; use crate::compute::flatten::flatten_primitive; use crate::compute::scalar_at::scalar_at; -use crate::dtype::{DType, IntWidth, Nullability, Signedness}; use crate::error::{VortexError, VortexResult}; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::ptype::NativePType; @@ -370,7 +370,7 @@ mod test { use crate::array::varbin::VarBinArray; use crate::array::Array; use crate::compute::scalar_at::scalar_at; - use crate::dtype::{DType, Nullability}; + use vortex_schema::{DType, Nullability}; fn binary_array() -> VarBinArray { let values = PrimitiveArray::from( diff --git a/vortex-array/src/array/varbin/serde.rs b/vortex-array/src/array/varbin/serde.rs index f5f24adc66..81435a4afc 100644 --- a/vortex-array/src/array/varbin/serde.rs +++ b/vortex-array/src/array/varbin/serde.rs @@ -31,9 +31,10 @@ impl EncodingSerde for VarBinEncoding { #[cfg(test)] mod test { + use vortex_schema::{DType, Nullability}; + use crate::array::downcast::DowncastArrayBuiltin; use crate::array::varbin::VarBinArray; - use crate::dtype::{DType, Nullability}; use crate::serde::test::roundtrip_array; #[test] diff --git a/vortex-array/src/array/varbin/stats.rs b/vortex-array/src/array/varbin/stats.rs index 619d076ed6..20086c76a7 100644 --- a/vortex-array/src/array/varbin/stats.rs +++ b/vortex-array/src/array/varbin/stats.rs @@ -1,10 +1,10 @@ use std::cmp::Ordering; use std::collections::HashMap; +use vortex_schema::DType; use crate::array::varbin::VarBinArray; use crate::array::varbinview::VarBinViewArray; use crate::array::Array; -use crate::dtype::DType; use crate::error::VortexResult; use crate::stats::{Stat, StatsCompute, StatsSet}; @@ -82,8 +82,8 @@ mod test { use crate::array::primitive::PrimitiveArray; use crate::array::varbin::VarBinArray; use crate::array::Array; - use crate::dtype::{DType, Nullability}; use crate::stats::Stat; + use vortex_schema::{DType, Nullability}; fn array(dtype: DType) -> VarBinArray { let values = PrimitiveArray::from( diff --git a/vortex-array/src/array/varbinview/compute.rs b/vortex-array/src/array/varbinview/compute.rs index a6f440b111..1855779234 100644 --- a/vortex-array/src/array/varbinview/compute.rs +++ b/vortex-array/src/array/varbinview/compute.rs @@ -2,9 +2,9 @@ use crate::array::varbinview::VarBinViewArray; use crate::array::Array; use crate::compute::scalar_at::ScalarAtFn; use crate::compute::ArrayCompute; -use crate::dtype::DType; use crate::error::VortexResult; use crate::scalar::Scalar; +use vortex_schema::DType; impl ArrayCompute for VarBinViewArray { fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { diff --git a/vortex-array/src/array/varbinview/mod.rs b/vortex-array/src/array/varbinview/mod.rs index 4cdac25357..f06ce6695c 100644 --- a/vortex-array/src/array/varbinview/mod.rs +++ b/vortex-array/src/array/varbinview/mod.rs @@ -3,6 +3,7 @@ use std::mem; use std::sync::{Arc, RwLock}; use linkme::distributed_slice; +use vortex_schema::{DType, IntWidth, Nullability, Signedness}; use crate::array::{ check_slice_bounds, check_validity_buffer, Array, ArrayRef, Encoding, EncodingId, EncodingRef, @@ -10,7 +11,6 @@ use crate::array::{ }; use crate::compute::flatten::flatten_primitive; use crate::compute::scalar_at::scalar_at; -use crate::dtype::{DType, IntWidth, Nullability, Signedness}; use crate::error::{VortexError, VortexResult}; use crate::formatter::{ArrayDisplay, ArrayFormatter}; use crate::serde::{ArraySerde, EncodingSerde}; diff --git a/vortex-array/src/array/varbinview/serde.rs b/vortex-array/src/array/varbinview/serde.rs index a476077a78..7b1773f710 100644 --- a/vortex-array/src/array/varbinview/serde.rs +++ b/vortex-array/src/array/varbinview/serde.rs @@ -40,8 +40,8 @@ mod test { use crate::array::primitive::PrimitiveArray; use crate::array::varbinview::{BinaryView, Inlined, Ref, VarBinViewArray}; use crate::array::Array; - use crate::dtype::{DType, Nullability}; use crate::serde::test::roundtrip_array; + use vortex_schema::{DType, Nullability}; fn binary_array() -> VarBinViewArray { let values = PrimitiveArray::from("hello world this is a long string".as_bytes().to_vec()); diff --git a/vortex-array/src/arrow/dtypes.rs b/vortex-array/src/arrow/dtypes.rs index c00c036b2b..770ca4d6b1 100644 --- a/vortex-array/src/arrow/dtypes.rs +++ b/vortex-array/src/arrow/dtypes.rs @@ -1,16 +1,17 @@ use std::sync::Arc; use arrow_array::RecordBatch; -use arrow_schema::{DataType, Field, SchemaRef, TimeUnit as ArrowTimeUnit}; +use arrow_schema::TimeUnit as ArrowTimeUnit; +use arrow_schema::{DataType, Field, SchemaRef}; use itertools::Itertools; +use vortex_schema::{DType, FloatWidth, IntWidth, Nullability}; use crate::array::struct_::StructArray; use crate::array::{Array, ArrayRef}; +use crate::arrow::FromArrowType; use crate::compute::cast::cast; use crate::datetime::{LocalDateTimeExtension, TimeUnit}; -use crate::dtype::DType::*; -use crate::dtype::{DType, FloatWidth, IntWidth, Nullability}; -use crate::encode::FromArrow; +use crate::encode::FromArrowArray; use crate::error::{VortexError, VortexResult}; use crate::ptype::PType; @@ -32,8 +33,13 @@ impl From for ArrayRef { .map(|(array, field)| { // The dtype of the child arrays infer their nullability from the array itself. // In case the schema says something different, we cast into the schema's dtype. - let vortex_array = ArrayRef::from_arrow(array.clone(), field.is_nullable()); - cast(vortex_array.as_ref(), &field.as_ref().into()).unwrap() + let vortex_array = + ArrayRef::from_arrow_array(array.clone(), field.is_nullable()); + cast( + vortex_array.as_ref(), + &DType::from_arrow_type(field.as_ref()), + ) + .unwrap() }) .collect(), ) @@ -41,25 +47,6 @@ impl From for ArrayRef { } } -impl TryFrom for DType { - type Error = VortexError; - - fn try_from(value: SchemaRef) -> VortexResult { - Ok(Struct( - value - .fields() - .iter() - .map(|f| Arc::new(f.name().clone())) - .collect(), - value - .fields() - .iter() - .map(|f| f.as_ref().into()) - .collect_vec(), - )) - } -} - impl TryFrom<&DataType> for PType { type Error = VortexError; @@ -87,9 +74,27 @@ impl TryFrom<&DataType> for PType { } } -impl From<&Field> for DType { - fn from(field: &Field) -> Self { - use crate::dtype::Signedness::*; +impl FromArrowType for DType { + fn from_arrow_type(value: SchemaRef) -> Self { + DType::Struct( + value + .fields() + .iter() + .map(|f| Arc::new(f.name().clone())) + .collect(), + value + .fields() + .iter() + .map(|f| DType::from_arrow_type(f.as_ref())) + .collect_vec(), + ) + } +} + +impl FromArrowType<&Field> for DType { + fn from_arrow_type(field: &Field) -> Self { + use vortex_schema::DType::*; + use vortex_schema::Signedness::*; let nullability: Nullability = field.is_nullable().into(); @@ -109,7 +114,6 @@ impl From<&Field> for DType { DataType::Float64 => Float(FloatWidth::_64, nullability), DataType::Utf8 | DataType::LargeUtf8 => Utf8(nullability), DataType::Binary | DataType::LargeBinary => Binary(nullability), - // TODO(robert): what to do about this timezone? DataType::Timestamp(_u, tz) => match tz { None => LocalDateTimeExtension::dtype(nullability), Some(_) => unimplemented!("Timezone not yet supported"), @@ -119,11 +123,13 @@ impl From<&Field> for DType { // DataType::Time32(u) => localtime(u.into(), IntWidth::_32, nullability), // DataType::Time64(u) => localtime(u.into(), IntWidth::_64, nullability), DataType::List(e) | DataType::LargeList(e) => { - List(Box::new(e.as_ref().into()), nullability) + List(Box::new(DType::from_arrow_type(e.as_ref())), nullability) } DataType::Struct(f) => Struct( f.iter().map(|f| Arc::new(f.name().clone())).collect(), - f.iter().map(|f| f.as_ref().into()).collect_vec(), + f.iter() + .map(|f| DType::from_arrow_type(f.as_ref())) + .collect_vec(), ), DataType::Decimal128(p, s) | DataType::Decimal256(p, s) => Decimal(*p, *s, nullability), _ => unimplemented!("Arrow data type not yet supported: {:?}", field.data_type()), diff --git a/vortex-array/src/arrow/mod.rs b/vortex-array/src/arrow/mod.rs index c877433917..2f99f3fcb5 100644 --- a/vortex-array/src/arrow/mod.rs +++ b/vortex-array/src/arrow/mod.rs @@ -1,2 +1,6 @@ pub mod dtypes; pub mod wrappers; + +pub trait FromArrowType: Sized { + fn from_arrow_type(value: T) -> Self; +} diff --git a/vortex-array/src/compute/cast.rs b/vortex-array/src/compute/cast.rs index f8759ed3bd..2c5fda96c6 100644 --- a/vortex-array/src/compute/cast.rs +++ b/vortex-array/src/compute/cast.rs @@ -1,6 +1,6 @@ use crate::array::{Array, ArrayRef}; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; +use vortex_schema::DType; pub trait CastFn { fn cast(&self, dtype: &DType) -> VortexResult; diff --git a/vortex-array/src/datetime/localdatetime.rs b/vortex-array/src/datetime/localdatetime.rs index d297100c90..8cd6280ab4 100644 --- a/vortex-array/src/datetime/localdatetime.rs +++ b/vortex-array/src/datetime/localdatetime.rs @@ -6,9 +6,9 @@ use arrow_array::{ TimestampNanosecondArray, TimestampSecondArray, }; -use crate::array::composite::CompositeID; -use crate::array::composite::{composite_impl, TypedCompositeArray}; +use vortex_schema::CompositeID; +use crate::array::composite::{composite_impl, TypedCompositeArray}; use crate::arrow::wrappers::as_nulls; use crate::compute::as_arrow::AsArrowArray; use crate::compute::cast::cast; diff --git a/vortex-array/src/encode.rs b/vortex-array/src/encode.rs index 570f0da1e9..205c7125d6 100644 --- a/vortex-array/src/encode.rs +++ b/vortex-array/src/encode.rs @@ -21,6 +21,8 @@ use arrow_buffer::buffer::{NullBuffer, OffsetBuffer}; use arrow_buffer::Buffer; use arrow_schema::{DataType, TimeUnit}; +use vortex_schema::DType; + use crate::array::bool::BoolArray; use crate::array::constant::ConstantArray; use crate::array::primitive::PrimitiveArray; @@ -28,12 +30,11 @@ use crate::array::struct_::StructArray; use crate::array::varbin::VarBinArray; use crate::array::{Array, ArrayRef}; use crate::datetime::{LocalDateTime, LocalDateTimeArray}; -use crate::dtype::DType; use crate::ptype::PType; use crate::scalar::NullScalar; -pub trait FromArrow { - fn from_arrow(array: A, nullable: bool) -> Self; +pub trait FromArrowArray { + fn from_arrow_array(array: A, nullable: bool) -> Self; } impl From<&Buffer> for ArrayRef { @@ -55,8 +56,8 @@ impl From<&OffsetBuffer> for ArrayRef { } } -impl FromArrow<&ArrowPrimitiveArray> for ArrayRef { - fn from_arrow(value: &ArrowPrimitiveArray, nullable: bool) -> Self { +impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { + fn from_arrow_array(value: &ArrowPrimitiveArray, nullable: bool) -> Self { let ptype: PType = (&T::DATA_TYPE).try_into().unwrap(); let arr = PrimitiveArray::new( ptype, @@ -89,8 +90,8 @@ impl FromArrow<&ArrowPrimitiveArray> for ArrayRef { } } -impl FromArrow<&GenericByteArray> for ArrayRef { - fn from_arrow(value: &GenericByteArray, nullable: bool) -> Self { +impl FromArrowArray<&GenericByteArray> for ArrayRef { + fn from_arrow_array(value: &GenericByteArray, nullable: bool) -> Self { let dtype = match T::DATA_TYPE { DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()), DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()), @@ -106,8 +107,8 @@ impl FromArrow<&GenericByteArray> for ArrayRef { } } -impl FromArrow<&ArrowBooleanArray> for ArrayRef { - fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { +impl FromArrowArray<&ArrowBooleanArray> for ArrayRef { + fn from_arrow_array(value: &ArrowBooleanArray, nullable: bool) -> Self { BoolArray::new( value.values().to_owned(), nulls(value.nulls(), nullable, value.len()), @@ -116,8 +117,8 @@ impl FromArrow<&ArrowBooleanArray> for ArrayRef { } } -impl FromArrow<&ArrowStructArray> for ArrayRef { - fn from_arrow(value: &ArrowStructArray, nullable: bool) -> Self { +impl FromArrowArray<&ArrowStructArray> for ArrayRef { + fn from_arrow_array(value: &ArrowStructArray, nullable: bool) -> Self { // TODO(ngates): how should we deal with Arrow "logical nulls"? assert!(!nullable); StructArray::new( @@ -131,15 +132,15 @@ impl FromArrow<&ArrowStructArray> for ArrayRef { .columns() .iter() .zip(value.fields()) - .map(|(c, field)| ArrayRef::from_arrow(c.clone(), field.is_nullable())) + .map(|(c, field)| ArrayRef::from_arrow_array(c.clone(), field.is_nullable())) .collect(), ) .boxed() } } -impl FromArrow<&ArrowNullArray> for ArrayRef { - fn from_arrow(value: &ArrowNullArray, nullable: bool) -> Self { +impl FromArrowArray<&ArrowNullArray> for ArrayRef { + fn from_arrow_array(value: &ArrowNullArray, nullable: bool) -> Self { assert!(nullable); ConstantArray::new(NullScalar::new().into(), value.len()).boxed() } @@ -158,80 +159,110 @@ fn nulls(nulls: Option<&NullBuffer>, nullable: bool, len: usize) -> Option for ArrayRef { - fn from_arrow(array: ArrowArrayRef, nullable: bool) -> Self { +impl FromArrowArray for ArrayRef { + fn from_arrow_array(array: ArrowArrayRef, nullable: bool) -> Self { match array.data_type() { - DataType::Boolean => ArrayRef::from_arrow(array.as_boolean(), nullable), - DataType::UInt8 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::UInt16 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::UInt32 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::UInt64 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::Int8 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::Int16 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::Int32 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::Int64 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::Boolean => ArrayRef::from_arrow_array(array.as_boolean(), nullable), + DataType::UInt8 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::UInt16 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::UInt32 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::UInt64 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::Int8 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::Int16 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::Int32 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::Int64 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } DataType::Float16 => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) } DataType::Float32 => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) } DataType::Float64 => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) } - DataType::Utf8 => ArrayRef::from_arrow(array.as_string::(), nullable), - DataType::LargeUtf8 => ArrayRef::from_arrow(array.as_string::(), nullable), - DataType::Binary => ArrayRef::from_arrow(array.as_binary::(), nullable), - DataType::LargeBinary => ArrayRef::from_arrow(array.as_binary::(), nullable), - DataType::Struct(_) => ArrayRef::from_arrow(array.as_struct(), nullable), - DataType::Null => ArrayRef::from_arrow(as_null_array(array.as_ref()), nullable), + DataType::Utf8 => ArrayRef::from_arrow_array(array.as_string::(), nullable), + DataType::LargeUtf8 => ArrayRef::from_arrow_array(array.as_string::(), nullable), + DataType::Binary => ArrayRef::from_arrow_array(array.as_binary::(), nullable), + DataType::LargeBinary => ArrayRef::from_arrow_array(array.as_binary::(), nullable), + DataType::Struct(_) => ArrayRef::from_arrow_array(array.as_struct(), nullable), + DataType::Null => ArrayRef::from_arrow_array(as_null_array(array.as_ref()), nullable), DataType::Timestamp(u, _) => match u { - TimeUnit::Second => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Millisecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Microsecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Nanosecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } + TimeUnit::Second => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), + TimeUnit::Millisecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), + TimeUnit::Microsecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), + TimeUnit::Nanosecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), }, - DataType::Date32 => ArrayRef::from_arrow(array.as_primitive::(), nullable), - DataType::Date64 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::Date32 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } + DataType::Date64 => { + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + } DataType::Time32(u) => match u { TimeUnit::Second => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Millisecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) } + TimeUnit::Millisecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), _ => unreachable!(), }, DataType::Time64(u) => match u { - TimeUnit::Microsecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Nanosecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } + TimeUnit::Microsecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), + TimeUnit::Nanosecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), _ => unreachable!(), }, DataType::Duration(u) => match u { TimeUnit::Second => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Millisecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Microsecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) - } - TimeUnit::Nanosecond => { - ArrayRef::from_arrow(array.as_primitive::(), nullable) + ArrayRef::from_arrow_array(array.as_primitive::(), nullable) } + TimeUnit::Millisecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), + TimeUnit::Microsecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), + TimeUnit::Nanosecond => ArrayRef::from_arrow_array( + array.as_primitive::(), + nullable, + ), }, _ => panic!( "TODO(robert): Missing array encoding for dtype {}", diff --git a/vortex-array/src/error.rs b/vortex-array/src/error.rs index 391a7b35ff..b5109b5e81 100644 --- a/vortex-array/src/error.rs +++ b/vortex-array/src/error.rs @@ -1,48 +1,10 @@ -use std::borrow::Cow; -use std::fmt::{self, Display, Formatter}; -use std::ops::Deref; -use std::{env, io}; +use std::io; + +use vortex_schema::{DType, ErrString}; use crate::array::EncodingId; -use crate::dtype::DType; use crate::ptype::PType; -#[derive(Debug, PartialEq)] -pub struct ErrString(Cow<'static, str>); - -impl From for ErrString -where - T: Into>, -{ - fn from(msg: T) -> Self { - if env::var("ENC_PANIC_ON_ERR").as_deref().unwrap_or("") == "1" { - panic!("{}", msg.into()) - } else { - ErrString(msg.into()) - } - } -} - -impl AsRef for ErrString { - fn as_ref(&self) -> &str { - &self.0 - } -} - -impl Deref for ErrString { - type Target = str; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl Display for ErrString { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - #[derive(Debug, thiserror::Error, PartialEq)] pub enum VortexError { #[error("index {0} out of bounds from {1} to {2}")] diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index c97210cdce..7ae22ad33e 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -6,7 +6,6 @@ pub mod accessor; pub mod compress; pub mod compute; pub mod datetime; -pub mod dtype; pub mod encode; pub mod error; pub mod formatter; diff --git a/vortex-array/src/ptype.rs b/vortex-array/src/ptype.rs index 122d59b86d..b13dd680a2 100644 --- a/vortex-array/src/ptype.rs +++ b/vortex-array/src/ptype.rs @@ -1,11 +1,13 @@ -use arrow_buffer::ArrowNativeType; use std::fmt::{Debug, Display, Formatter}; use std::panic::RefUnwindSafe; +use arrow_buffer::ArrowNativeType; use half::f16; use num_traits::{Num, NumCast}; -use crate::dtype::{DType, FloatWidth, IntWidth, Signedness}; +use vortex_schema::DType::*; +use vortex_schema::{DType, FloatWidth, IntWidth}; + use crate::error::{VortexError, VortexResult}; use crate::scalar::{PScalar, Scalar}; @@ -155,7 +157,7 @@ impl TryFrom<&DType> for PType { type Error = VortexError; fn try_from(value: &DType) -> VortexResult { - use Signedness::*; + use vortex_schema::Signedness::*; match value { DType::Int(w, s, _) => match w { IntWidth::Unknown => match s { @@ -196,3 +198,45 @@ impl TryFrom<&DType> for PType { } } } + +impl From for &DType { + fn from(item: PType) -> Self { + use vortex_schema::Nullability::*; + use vortex_schema::Signedness::*; + + match item { + PType::I8 => &Int(IntWidth::_8, Signed, NonNullable), + PType::I16 => &Int(IntWidth::_16, Signed, NonNullable), + PType::I32 => &Int(IntWidth::_32, Signed, NonNullable), + PType::I64 => &Int(IntWidth::_64, Signed, NonNullable), + PType::U8 => &Int(IntWidth::_8, Unsigned, NonNullable), + PType::U16 => &Int(IntWidth::_16, Unsigned, NonNullable), + PType::U32 => &Int(IntWidth::_32, Unsigned, NonNullable), + PType::U64 => &Int(IntWidth::_64, Unsigned, NonNullable), + PType::F16 => &Float(FloatWidth::_16, NonNullable), + PType::F32 => &Float(FloatWidth::_32, NonNullable), + PType::F64 => &Float(FloatWidth::_64, NonNullable), + } + } +} + +impl From for DType { + fn from(item: PType) -> Self { + use vortex_schema::Nullability::*; + use vortex_schema::Signedness::*; + + match item { + PType::I8 => Int(IntWidth::_8, Signed, NonNullable), + PType::I16 => Int(IntWidth::_16, Signed, NonNullable), + PType::I32 => Int(IntWidth::_32, Signed, NonNullable), + PType::I64 => Int(IntWidth::_64, Signed, NonNullable), + PType::U8 => Int(IntWidth::_8, Unsigned, NonNullable), + PType::U16 => Int(IntWidth::_16, Unsigned, NonNullable), + PType::U32 => Int(IntWidth::_32, Unsigned, NonNullable), + PType::U64 => Int(IntWidth::_64, Unsigned, NonNullable), + PType::F16 => Float(FloatWidth::_16, NonNullable), + PType::F32 => Float(FloatWidth::_32, NonNullable), + PType::F64 => Float(FloatWidth::_64, NonNullable), + } + } +} diff --git a/vortex-array/src/scalar/binary.rs b/vortex-array/src/scalar/binary.rs index 3975ee1edd..fa0a9f29cd 100644 --- a/vortex-array/src/scalar/binary.rs +++ b/vortex-array/src/scalar/binary.rs @@ -1,6 +1,6 @@ use std::fmt::{Display, Formatter}; +use vortex_schema::{DType, Nullability}; -use crate::dtype::{DType, Nullability}; use crate::error::{VortexError, VortexResult}; use crate::scalar::Scalar; diff --git a/vortex-array/src/scalar/bool.rs b/vortex-array/src/scalar/bool.rs index fb4093e727..670ea02678 100644 --- a/vortex-array/src/scalar/bool.rs +++ b/vortex-array/src/scalar/bool.rs @@ -1,6 +1,6 @@ use std::fmt::{Display, Formatter}; +use vortex_schema::{DType, Nullability}; -use crate::dtype::{DType, Nullability}; use crate::error::{VortexError, VortexResult}; use crate::scalar::Scalar; diff --git a/vortex-array/src/scalar/composite.rs b/vortex-array/src/scalar/composite.rs index 7563897f5f..edd5734627 100644 --- a/vortex-array/src/scalar/composite.rs +++ b/vortex-array/src/scalar/composite.rs @@ -1,6 +1,6 @@ use std::fmt::{Display, Formatter}; +use vortex_schema::DType; -use crate::dtype::DType; use crate::error::VortexResult; use crate::scalar::Scalar; diff --git a/vortex-array/src/scalar/list.rs b/vortex-array/src/scalar/list.rs index 012d401e82..1fcb508561 100644 --- a/vortex-array/src/scalar/list.rs +++ b/vortex-array/src/scalar/list.rs @@ -1,8 +1,8 @@ use std::fmt::{Display, Formatter}; use itertools::Itertools; +use vortex_schema::DType; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::scalar::Scalar; diff --git a/vortex-array/src/scalar/mod.rs b/vortex-array/src/scalar/mod.rs index d39799be0b..82191cddcb 100644 --- a/vortex-array/src/scalar/mod.rs +++ b/vortex-array/src/scalar/mod.rs @@ -9,8 +9,8 @@ pub use primitive::*; pub use serde::*; pub use struct_::*; pub use utf8::*; +use vortex_schema::{DType, FloatWidth, IntWidth, Signedness}; -use crate::dtype::{DType, FloatWidth, IntWidth, Signedness}; use crate::error::VortexResult; use crate::ptype::{NativePType, PType}; diff --git a/vortex-array/src/scalar/null.rs b/vortex-array/src/scalar/null.rs index 817dbecccd..97b5547dfd 100644 --- a/vortex-array/src/scalar/null.rs +++ b/vortex-array/src/scalar/null.rs @@ -1,6 +1,6 @@ use std::fmt::{Display, Formatter}; +use vortex_schema::DType; -use crate::dtype::DType; use crate::error::VortexResult; use crate::scalar::Scalar; diff --git a/vortex-array/src/scalar/primitive.rs b/vortex-array/src/scalar/primitive.rs index bf526eb169..09fff7ae85 100644 --- a/vortex-array/src/scalar/primitive.rs +++ b/vortex-array/src/scalar/primitive.rs @@ -2,8 +2,8 @@ use std::fmt::{Display, Formatter}; use std::mem::size_of; use half::f16; +use vortex_schema::DType; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::ptype::{NativePType, PType}; use crate::scalar::composite::CompositeScalar; @@ -349,10 +349,10 @@ impl Display for PScalar { #[cfg(test)] mod test { - use crate::dtype::{DType, IntWidth, Nullability, Signedness}; use crate::error::VortexError; use crate::ptype::PType; use crate::scalar::Scalar; + use vortex_schema::{DType, IntWidth, Nullability, Signedness}; #[test] fn into_from() { diff --git a/vortex-array/src/scalar/serde.rs b/vortex-array/src/scalar/serde.rs index 8b14a36cee..3b07f9be66 100644 --- a/vortex-array/src/scalar/serde.rs +++ b/vortex-array/src/scalar/serde.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use half::f16; use num_enum::{IntoPrimitive, TryFromPrimitive}; +use vortex_schema::DType; -use crate::dtype::DType; use crate::error::VortexResult; use crate::ptype::PType; use crate::scalar::composite::CompositeScalar; diff --git a/vortex-array/src/scalar/struct_.rs b/vortex-array/src/scalar/struct_.rs index f097304523..ae8c5d1236 100644 --- a/vortex-array/src/scalar/struct_.rs +++ b/vortex-array/src/scalar/struct_.rs @@ -4,7 +4,8 @@ use std::sync::Arc; use itertools::Itertools; -use crate::dtype::DType; +use vortex_schema::DType; + use crate::error::{VortexError, VortexResult}; use crate::scalar::Scalar; diff --git a/vortex-array/src/scalar/utf8.rs b/vortex-array/src/scalar/utf8.rs index 5153ab5511..090d9660f1 100644 --- a/vortex-array/src/scalar/utf8.rs +++ b/vortex-array/src/scalar/utf8.rs @@ -1,6 +1,6 @@ use std::fmt::{Display, Formatter}; +use vortex_schema::{DType, Nullability}; -use crate::dtype::{DType, Nullability}; use crate::error::{VortexError, VortexResult}; use crate::scalar::Scalar; diff --git a/vortex-array/src/serde/mod.rs b/vortex-array/src/serde/mod.rs index 331e862c4a..68c811ff4f 100644 --- a/vortex-array/src/serde/mod.rs +++ b/vortex-array/src/serde/mod.rs @@ -3,15 +3,17 @@ use std::io::{ErrorKind, Read, Write}; use arrow_buffer::buffer::{Buffer, MutableBuffer}; +use vortex_schema::{ + DType, DTypeReader, DTypeWriter, IntWidth, Nullability, SchemaError, Signedness, +}; + +use crate::array::composite::find_extension_id; use crate::array::{Array, ArrayRef, EncodingId, ENCODINGS}; -use crate::dtype::{DType, IntWidth, Nullability, Signedness}; use crate::error::{VortexError, VortexResult}; use crate::ptype::PType; use crate::scalar::{Scalar, ScalarReader, ScalarWriter}; -pub use crate::serde::dtype::{DTypeReader, DTypeWriter}; use crate::serde::ptype::PTypeTag; -mod dtype; mod ptype; pub trait ArraySerde { @@ -80,7 +82,12 @@ impl<'a> ReadCtx<'a> { #[inline] pub fn dtype(&mut self) -> VortexResult { - DTypeReader::new(self.r).read() + DTypeReader::new(self.r) + .read(find_extension_id) + .map_err(|e| match e { + SchemaError::InvalidArgument(s) => VortexError::InvalidArgument(s), + SchemaError::IOError(io_err) => io_err.0.into(), + }) } pub fn ptype(&mut self) -> VortexResult { @@ -175,7 +182,10 @@ impl<'a> WriteCtx<'a> { } pub fn dtype(&mut self, dtype: &DType) -> VortexResult<()> { - DTypeWriter::new(self).write(dtype) + DTypeWriter::new(self.w).write(dtype).map_err(|e| match e { + SchemaError::InvalidArgument(s) => VortexError::InvalidArgument(s), + SchemaError::IOError(io_err) => io_err.0.into(), + }) } pub fn ptype(&mut self, ptype: PType) -> VortexResult<()> { diff --git a/vortex-array/src/stats.rs b/vortex-array/src/stats.rs index 6c94bbb1c3..b1e9a260a2 100644 --- a/vortex-array/src/stats.rs +++ b/vortex-array/src/stats.rs @@ -4,8 +4,8 @@ use std::collections::HashMap; use std::sync::RwLock; use itertools::Itertools; +use vortex_schema::DType; -use crate::dtype::DType; use crate::error::{VortexError, VortexResult}; use crate::ptype::NativePType; use crate::scalar::{ListScalarVec, Scalar}; diff --git a/vortex-dict/Cargo.toml b/vortex-dict/Cargo.toml index 9dec75e82e..2cd21ffc33 100644 --- a/vortex-dict/Cargo.toml +++ b/vortex-dict/Cargo.toml @@ -14,6 +14,7 @@ rust-version = { workspace = true } [dependencies] ahash = "0.8.7" vortex-array = { path = "../vortex-array" } +vortex-schema = { path = "../vortex-schema" } half = { version = "2.3.1", features = ["std", "num-traits"] } hashbrown = "0.14.3" linkme = "0.3.22" diff --git a/vortex-dict/src/compress.rs b/vortex-dict/src/compress.rs index 6595adfcd5..604c5c1d18 100644 --- a/vortex-dict/src/compress.rs +++ b/vortex-dict/src/compress.rs @@ -10,12 +10,12 @@ use vortex::array::varbin::{VarBinArray, VarBinEncoding}; use vortex::array::{Array, ArrayKind, ArrayRef}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::dtype::DType; use vortex::error::VortexResult; use vortex::match_each_native_ptype; use vortex::ptype::NativePType; use vortex::scalar::AsBytes; use vortex::stats::Stat; +use vortex_schema::DType; use crate::dict::{DictArray, DictEncoding}; use crate::downcast::DowncastDict; diff --git a/vortex-dict/src/dict.rs b/vortex-dict/src/dict.rs index ff09dab4d8..d6999f33d3 100644 --- a/vortex-dict/src/dict.rs +++ b/vortex-dict/src/dict.rs @@ -3,11 +3,11 @@ use std::sync::{Arc, RwLock}; use vortex::array::{check_slice_bounds, Array, ArrayRef, Encoding, EncodingId}; use vortex::compress::EncodingCompression; -use vortex::dtype::{DType, Signedness}; use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stats, StatsSet}; +use vortex_schema::{DType, Signedness}; #[derive(Debug, Clone)] pub struct DictArray { diff --git a/vortex-fastlanes/Cargo.toml b/vortex-fastlanes/Cargo.toml index 7a1903a5fd..3007e588e7 100644 --- a/vortex-fastlanes/Cargo.toml +++ b/vortex-fastlanes/Cargo.toml @@ -16,8 +16,9 @@ workspace = true [dependencies] arrayref = "0.3.7" -vortex-array = { path = "../vortex-array" } -linkme = "0.3.22" +fastlanez-sys = { path = "../fastlanez-sys" } itertools = "0.12.1" +linkme = "0.3.22" num-traits = "0.2.18" -fastlanez-sys = { path = "../fastlanez-sys" } +vortex-array = { path = "../vortex-array" } +vortex-schema = { path = "../vortex-schema" } diff --git a/vortex-fastlanes/src/bitpacking/mod.rs b/vortex-fastlanes/src/bitpacking/mod.rs index 63b5cef05f..fdcb1a6dda 100644 --- a/vortex-fastlanes/src/bitpacking/mod.rs +++ b/vortex-fastlanes/src/bitpacking/mod.rs @@ -4,11 +4,11 @@ use std::sync::{Arc, RwLock}; use vortex::array::{check_validity_buffer, Array, ArrayRef, Encoding, EncodingId, EncodingRef}; use vortex::compress::EncodingCompression; use vortex::compute::scalar_at::scalar_at; -use vortex::dtype::DType; use vortex::error::VortexResult; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stat, Stats, StatsCompute, StatsSet}; +use vortex_schema::DType; mod compress; mod compute; diff --git a/vortex-fastlanes/src/for/mod.rs b/vortex-fastlanes/src/for/mod.rs index 47b90e48a7..3988f43fc8 100644 --- a/vortex-fastlanes/src/for/mod.rs +++ b/vortex-fastlanes/src/for/mod.rs @@ -3,12 +3,12 @@ use std::sync::{Arc, RwLock}; use vortex::array::{Array, ArrayRef, Encoding, EncodingId, EncodingRef}; use vortex::compress::EncodingCompression; -use vortex::dtype::DType; use vortex::error::VortexResult; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::scalar::Scalar; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stat, Stats, StatsCompute, StatsSet}; +use vortex_schema::DType; mod compress; mod compute; diff --git a/vortex-ree/Cargo.toml b/vortex-ree/Cargo.toml index 99109d847b..966c9d38b2 100644 --- a/vortex-ree/Cargo.toml +++ b/vortex-ree/Cargo.toml @@ -13,6 +13,7 @@ rust-version = { workspace = true } [dependencies] vortex-array = { path = "../vortex-array" } +vortex-schema = { path = "../vortex-schema" } arrow-array = "50.0.0" arrow-buffer = "50.0.0" linkme = "0.3.22" diff --git a/vortex-ree/src/ree.rs b/vortex-ree/src/ree.rs index f2fe6a9dcb..2b95fb8366 100644 --- a/vortex-ree/src/ree.rs +++ b/vortex-ree/src/ree.rs @@ -8,11 +8,11 @@ use vortex::array::{ use vortex::compress::EncodingCompression; use vortex::compute; use vortex::compute::search_sorted::SearchSortedSide; -use vortex::dtype::DType; use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stat, Stats, StatsCompute, StatsSet}; +use vortex_schema::DType; use crate::compress::ree_encode; @@ -220,7 +220,7 @@ mod test { use vortex::array::Array; use vortex::compute::flatten::flatten_primitive; use vortex::compute::scalar_at::scalar_at; - use vortex::dtype::{DType, IntWidth, Nullability, Signedness}; + use vortex_schema::{DType, IntWidth, Nullability, Signedness}; use crate::REEArray; diff --git a/vortex-roaring/Cargo.toml b/vortex-roaring/Cargo.toml index 089635e27f..67a36cdf8f 100644 --- a/vortex-roaring/Cargo.toml +++ b/vortex-roaring/Cargo.toml @@ -13,6 +13,7 @@ rust-version = { workspace = true } [dependencies] vortex-array = { path = "../vortex-array" } +vortex-schema = { path = "../vortex-schema" } linkme = "0.3.22" croaring = "1.0.1" num-traits = "0.2.17" diff --git a/vortex-roaring/src/boolean/compress.rs b/vortex-roaring/src/boolean/compress.rs index cb6ad9e244..37a2a1bd65 100644 --- a/vortex-roaring/src/boolean/compress.rs +++ b/vortex-roaring/src/boolean/compress.rs @@ -4,9 +4,9 @@ use vortex::array::bool::{BoolArray, BoolEncoding}; use vortex::array::downcast::DowncastArrayBuiltin; use vortex::array::{Array, ArrayRef}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::dtype::DType; -use vortex::dtype::Nullability::NonNullable; use vortex::error::VortexResult; +use vortex_schema::DType; +use vortex_schema::Nullability::NonNullable; use crate::boolean::{RoaringBoolArray, RoaringBoolEncoding}; diff --git a/vortex-roaring/src/boolean/mod.rs b/vortex-roaring/src/boolean/mod.rs index 9923057103..e6865f7c45 100644 --- a/vortex-roaring/src/boolean/mod.rs +++ b/vortex-roaring/src/boolean/mod.rs @@ -8,12 +8,12 @@ use vortex::array::{ check_slice_bounds, Array, ArrayKind, ArrayRef, Encoding, EncodingId, EncodingRef, }; use vortex::compress::EncodingCompression; -use vortex::dtype::DType; -use vortex::dtype::Nullability::NonNullable; use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stats, StatsSet}; +use vortex_schema::DType; +use vortex_schema::Nullability::NonNullable; mod compress; mod compute; diff --git a/vortex-roaring/src/integer/compress.rs b/vortex-roaring/src/integer/compress.rs index d6075ccc24..83a262c29a 100644 --- a/vortex-roaring/src/integer/compress.rs +++ b/vortex-roaring/src/integer/compress.rs @@ -6,12 +6,12 @@ use vortex::array::downcast::DowncastArrayBuiltin; use vortex::array::primitive::{PrimitiveArray, PrimitiveEncoding}; use vortex::array::{Array, ArrayRef}; use vortex::compress::{CompressConfig, CompressCtx, EncodingCompression}; -use vortex::dtype::DType; -use vortex::dtype::Nullability::NonNullable; -use vortex::dtype::Signedness::Unsigned; use vortex::error::VortexResult; use vortex::ptype::{NativePType, PType}; use vortex::stats::Stat; +use vortex_schema::DType; +use vortex_schema::Nullability::NonNullable; +use vortex_schema::Signedness::Unsigned; use crate::{RoaringIntArray, RoaringIntEncoding}; diff --git a/vortex-roaring/src/integer/mod.rs b/vortex-roaring/src/integer/mod.rs index 1a6b71e36a..ada2d48e58 100644 --- a/vortex-roaring/src/integer/mod.rs +++ b/vortex-roaring/src/integer/mod.rs @@ -8,12 +8,12 @@ use vortex::array::{ check_slice_bounds, Array, ArrayKind, ArrayRef, Encoding, EncodingId, EncodingRef, }; use vortex::compress::EncodingCompression; -use vortex::dtype::DType; use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::ptype::PType; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stats, StatsSet}; +use vortex_schema::DType; mod compress; mod compute; diff --git a/vortex-schema/Cargo.toml b/vortex-schema/Cargo.toml new file mode 100644 index 0000000000..61cfdb1ab4 --- /dev/null +++ b/vortex-schema/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "vortex-schema" +version = { workspace = true } +description = "Vortex schema" +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +keywords = { workspace = true } +include = { workspace = true } +edition = { workspace = true } +rust-version = { workspace = true } + +[lib] +name = "vortex_schema" +path = "src/lib.rs" + +[dependencies] +arrow-schema = "50.0.0" +itertools = "0.12.1" +leb128 = "0.2.5" +num_enum = "0.7.2" +thiserror = "1.0.58" + +[lints] +workspace = true diff --git a/vortex-array/src/dtype.rs b/vortex-schema/src/dtype.rs similarity index 77% rename from vortex-array/src/dtype.rs rename to vortex-schema/src/dtype.rs index 1e8aa2b585..6f7de6c8d8 100644 --- a/vortex-array/src/dtype.rs +++ b/vortex-schema/src/dtype.rs @@ -4,10 +4,9 @@ use std::sync::Arc; use itertools::Itertools; -use crate::array::composite::CompositeID; use DType::*; -use crate::ptype::PType; +use crate::CompositeID; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] pub enum Nullability { @@ -227,48 +226,6 @@ impl Display for DType { } } -impl From for &DType { - fn from(item: PType) -> Self { - use Nullability::*; - use Signedness::*; - - match item { - PType::I8 => &Int(IntWidth::_8, Signed, NonNullable), - PType::I16 => &Int(IntWidth::_16, Signed, NonNullable), - PType::I32 => &Int(IntWidth::_32, Signed, NonNullable), - PType::I64 => &Int(IntWidth::_64, Signed, NonNullable), - PType::U8 => &Int(IntWidth::_8, Unsigned, NonNullable), - PType::U16 => &Int(IntWidth::_16, Unsigned, NonNullable), - PType::U32 => &Int(IntWidth::_32, Unsigned, NonNullable), - PType::U64 => &Int(IntWidth::_64, Unsigned, NonNullable), - PType::F16 => &Float(FloatWidth::_16, NonNullable), - PType::F32 => &Float(FloatWidth::_32, NonNullable), - PType::F64 => &Float(FloatWidth::_64, NonNullable), - } - } -} - -impl From for DType { - fn from(item: PType) -> Self { - use Nullability::*; - use Signedness::*; - - match item { - PType::I8 => Int(IntWidth::_8, Signed, NonNullable), - PType::I16 => Int(IntWidth::_16, Signed, NonNullable), - PType::I32 => Int(IntWidth::_32, Signed, NonNullable), - PType::I64 => Int(IntWidth::_64, Signed, NonNullable), - PType::U8 => Int(IntWidth::_8, Unsigned, NonNullable), - PType::U16 => Int(IntWidth::_16, Unsigned, NonNullable), - PType::U32 => Int(IntWidth::_32, Unsigned, NonNullable), - PType::U64 => Int(IntWidth::_64, Unsigned, NonNullable), - PType::F16 => Float(FloatWidth::_16, NonNullable), - PType::F32 => Float(FloatWidth::_32, NonNullable), - PType::F64 => Float(FloatWidth::_64, NonNullable), - } - } -} - #[cfg(test)] mod test { use std::mem; diff --git a/vortex-schema/src/error.rs b/vortex-schema/src/error.rs new file mode 100644 index 0000000000..d404e5dc3b --- /dev/null +++ b/vortex-schema/src/error.rs @@ -0,0 +1,71 @@ +use std::borrow::Cow; +use std::fmt::{Display, Formatter}; +use std::ops::Deref; +use std::{env, fmt, io}; + +#[derive(Debug, PartialEq)] +pub struct ErrString(Cow<'static, str>); + +impl From for ErrString +where + T: Into>, +{ + fn from(msg: T) -> Self { + if env::var("VORTEX_PANIC_ON_ERR").as_deref().unwrap_or("") == "1" { + panic!("{}", msg.into()) + } else { + ErrString(msg.into()) + } + } +} + +impl AsRef for ErrString { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl Deref for ErrString { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Display for ErrString { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.0, f) + } +} + +#[derive(Debug, thiserror::Error, PartialEq)] +pub enum SchemaError { + #[error("{0}")] + InvalidArgument(ErrString), + #[error("io error: {0:?}")] + IOError(IOError), +} + +pub type SchemaResult = Result; + +macro_rules! wrapped_error { + ($E:ty, $e:ident) => { + #[derive(Debug)] + pub struct $e(pub $E); + + impl PartialEq for $e { + fn eq(&self, _other: &Self) -> bool { + false + } + } + + impl From<$E> for SchemaError { + fn from(err: $E) -> Self { + SchemaError::$e($e(err)) + } + } + }; +} + +wrapped_error!(io::Error, IOError); diff --git a/vortex-schema/src/lib.rs b/vortex-schema/src/lib.rs new file mode 100644 index 0000000000..7a3494f440 --- /dev/null +++ b/vortex-schema/src/lib.rs @@ -0,0 +1,20 @@ +pub use dtype::*; +pub use error::ErrString; +pub use error::SchemaError; +pub use error::SchemaResult; +pub use serde::DTypeReader; +pub use serde::DTypeWriter; +use std::fmt::{Display, Formatter}; + +mod dtype; +mod error; +mod serde; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] +pub struct CompositeID(pub &'static str); + +impl Display for CompositeID { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} diff --git a/vortex-array/src/serde/dtype.rs b/vortex-schema/src/serde.rs similarity index 73% rename from vortex-array/src/serde/dtype.rs rename to vortex-schema/src/serde.rs index 2fd9ecee1c..ea083d8b72 100644 --- a/vortex-array/src/serde/dtype.rs +++ b/vortex-schema/src/serde.rs @@ -1,14 +1,13 @@ -use leb128::read::Error; -use std::io::Read; +use std::io::{Read, Write}; use std::sync::Arc; -use crate::array::composite::find_extension; +use leb128::read::Error; use num_enum::{IntoPrimitive, TryFromPrimitive}; use crate::dtype::DType::*; use crate::dtype::{DType, FloatWidth, IntWidth, Nullability, Signedness}; -use crate::error::{VortexError, VortexResult}; -use crate::serde::WriteCtx; +use crate::error::{SchemaError, SchemaResult}; +use crate::CompositeID; pub struct DTypeReader<'a> { reader: &'a mut dyn Read, @@ -19,34 +18,36 @@ impl<'a> DTypeReader<'a> { Self { reader } } - fn read_nbytes(&mut self) -> VortexResult<[u8; N]> { + fn read_nbytes(&mut self) -> SchemaResult<[u8; N]> { let mut bytes: [u8; N] = [0; N]; - self.reader.read_exact(&mut bytes)?; + self.reader + .read_exact(&mut bytes) + .map_err(SchemaError::from)?; Ok(bytes) } - fn read_usize(&mut self) -> VortexResult { + fn read_usize(&mut self) -> SchemaResult { leb128::read::unsigned(self.reader) .map_err(|e| match e { Error::IoError(io_err) => io_err.into(), - Error::Overflow => VortexError::InvalidArgument("overflow".into()), + Error::Overflow => SchemaError::InvalidArgument("overflow".into()), }) .map(|u| u as usize) } - fn read_slice(&mut self) -> VortexResult> { + fn read_slice(&mut self) -> SchemaResult> { let len = self.read_usize()?; let mut slice = Vec::with_capacity(len); self.reader .take(len as u64) .read_to_end(&mut slice) - .map_err(VortexError::from)?; + .map_err(SchemaError::from)?; Ok(slice) } - pub fn read(&mut self) -> VortexResult { + pub fn read(&mut self, find_extension: fn(&str) -> Option) -> SchemaResult { let dtype = DTypeTag::try_from(self.read_nbytes::<1>()?[0]) - .map_err(|_| VortexError::InvalidArgument("Failed to parse dtype tag".into()))?; + .map_err(|_| SchemaError::InvalidArgument("Failed to parse dtype tag".into()))?; match dtype { DTypeTag::Null => Ok(Null), DTypeTag::Bool => Ok(Bool(self.read_nullability()?)), @@ -75,7 +76,7 @@ impl<'a> DTypeReader<'a> { } DTypeTag::List => { let nullability = self.read_nullability()?; - Ok(List(Box::new(self.read()?), nullability)) + Ok(List(Box::new(self.read(find_extension)?), nullability)) } DTypeTag::Struct => { let field_num = self.read_usize()?; @@ -87,58 +88,57 @@ impl<'a> DTypeReader<'a> { let mut fields = Vec::with_capacity(field_num); for _ in 0..field_num { - fields.push(self.read()?); + fields.push(self.read(find_extension)?); } Ok(Struct(names, fields)) } DTypeTag::Composite => { let nullability = self.read_nullability()?; let id = unsafe { String::from_utf8_unchecked(self.read_slice()?) }; - let extension = find_extension(id.as_str()).ok_or(VortexError::InvalidArgument( - "Failed to find extension".into(), - ))?; - Ok(Composite(extension.id(), nullability)) + let extension_id = find_extension(id.as_str()).ok_or( + SchemaError::InvalidArgument("Failed to find extension".into()), + )?; + Ok(Composite(extension_id, nullability)) } } } - fn read_signedness(&mut self) -> VortexResult { + fn read_signedness(&mut self) -> SchemaResult { SignednessTag::try_from(self.read_nbytes::<1>()?[0]) - .map_err(|_| VortexError::InvalidArgument("Failed to parse signedness tag".into())) + .map_err(|_| SchemaError::InvalidArgument("Failed to parse signedness tag".into())) .map(Signedness::from) } - fn read_nullability(&mut self) -> VortexResult { + fn read_nullability(&mut self) -> SchemaResult { NullabilityTag::try_from(self.read_nbytes::<1>()?[0]) - .map_err(|_| VortexError::InvalidArgument("Failed to parse nullability tag".into())) + .map_err(|_| SchemaError::InvalidArgument("Failed to parse nullability tag".into())) .map(Nullability::from) } - fn read_int_width(&mut self) -> VortexResult { + fn read_int_width(&mut self) -> SchemaResult { IntWidthTag::try_from(self.read_nbytes::<1>()?[0]) - .map_err(|_| VortexError::InvalidArgument("Failed to parse int width tag".into())) + .map_err(|_| SchemaError::InvalidArgument("Failed to parse int width tag".into())) .map(IntWidth::from) } - fn read_float_width(&mut self) -> VortexResult { + fn read_float_width(&mut self) -> SchemaResult { FloatWidthTag::try_from(self.read_nbytes::<1>()?[0]) - .map_err(|_| VortexError::InvalidArgument("Failed to parse float width tag".into())) + .map_err(|_| SchemaError::InvalidArgument("Failed to parse float width tag".into())) .map(FloatWidth::from) } } -pub struct DTypeWriter<'a, 'b> { - writer: &'b mut WriteCtx<'a>, +pub struct DTypeWriter<'a> { + writer: &'a mut dyn Write, } -impl<'a, 'b> DTypeWriter<'a, 'b> { - pub fn new(writer: &'b mut WriteCtx<'a>) -> Self { +impl<'a> DTypeWriter<'a> { + pub fn new(writer: &'a mut dyn Write) -> Self { Self { writer } } - pub fn write(&mut self, dtype: &DType) -> VortexResult<()> { - self.writer - .write_fixed_slice([DTypeTag::from(dtype).into()])?; + pub fn write(&mut self, dtype: &DType) -> SchemaResult<()> { + self.write_fixed_slice([DTypeTag::from(dtype).into()])?; match dtype { Null => {} Bool(n) => self.write_nullability(*n)?, @@ -149,7 +149,7 @@ impl<'a, 'b> DTypeWriter<'a, 'b> { } Decimal(p, w, n) => { self.write_nullability(*n)?; - self.writer.write_fixed_slice([*p, *w as u8])? + self.write_fixed_slice([*p, *w as u8])? } Float(w, n) => { self.write_nullability(*n)?; @@ -158,9 +158,9 @@ impl<'a, 'b> DTypeWriter<'a, 'b> { Utf8(n) => self.write_nullability(*n)?, Binary(n) => self.write_nullability(*n)?, Struct(ns, fs) => { - self.writer.write_usize(ns.len())?; + self.write_usize(ns.len())?; for name in ns { - self.writer.write_slice(name.as_bytes())?; + self.write_slice(name.as_bytes())?; } for field in fs { self.write(field)? @@ -172,31 +172,42 @@ impl<'a, 'b> DTypeWriter<'a, 'b> { } Composite(id, n) => { self.write_nullability(*n)?; - self.writer.write_slice(id.0.as_bytes())?; + self.write_slice(id.0.as_bytes())?; } } Ok(()) } - fn write_signedness(&mut self, signedness: Signedness) -> VortexResult<()> { - self.writer - .write_fixed_slice([SignednessTag::from(signedness).into()]) + fn write_usize(&mut self, u: usize) -> SchemaResult<()> { + leb128::write::unsigned(self.writer, u as u64) + .map_err(|_| SchemaError::InvalidArgument("Failed to write leb128 usize".into())) + .map(|_| ()) + } + + fn write_fixed_slice(&mut self, slice: [u8; N]) -> SchemaResult<()> { + self.writer.write_all(&slice).map_err(|e| e.into()) + } + + fn write_slice(&mut self, slice: &[u8]) -> SchemaResult<()> { + self.write_usize(slice.len())?; + self.writer.write_all(slice).map_err(|e| e.into()) + } + + fn write_signedness(&mut self, signedness: Signedness) -> SchemaResult<()> { + self.write_fixed_slice([SignednessTag::from(signedness).into()]) } - fn write_nullability(&mut self, nullability: Nullability) -> VortexResult<()> { - self.writer - .write_fixed_slice([NullabilityTag::from(nullability).into()]) + fn write_nullability(&mut self, nullability: Nullability) -> SchemaResult<()> { + self.write_fixed_slice([NullabilityTag::from(nullability).into()]) } - fn write_int_width(&mut self, int_width: IntWidth) -> VortexResult<()> { - self.writer - .write_fixed_slice([IntWidthTag::from(int_width).into()]) + fn write_int_width(&mut self, int_width: IntWidth) -> SchemaResult<()> { + self.write_fixed_slice([IntWidthTag::from(int_width).into()]) } - fn write_float_width(&mut self, float_width: FloatWidth) -> VortexResult<()> { - self.writer - .write_fixed_slice([FloatWidthTag::from(float_width).into()]) + fn write_float_width(&mut self, float_width: FloatWidth) -> SchemaResult<()> { + self.write_fixed_slice([FloatWidthTag::from(float_width).into()]) } } @@ -366,16 +377,17 @@ mod test { use crate::dtype::IntWidth::_64; use crate::dtype::Nullability::NonNullable; use crate::dtype::Signedness::Unsigned; - use crate::serde::{DTypeReader, WriteCtx}; + use crate::serde::{DTypeReader, DTypeWriter}; #[test] fn roundtrip() { let mut buffer: Vec = Vec::new(); let dtype = Int(_64, Unsigned, NonNullable); - let mut ctx = WriteCtx::new(&mut buffer); - ctx.dtype(&dtype).unwrap(); + DTypeWriter::new(&mut buffer).write(&dtype).unwrap(); assert_eq!(buffer, [0x02, 0x01, 0x04, 0x01]); - let read_dtype = DTypeReader::new(&mut buffer.as_slice()).read().unwrap(); + let read_dtype = DTypeReader::new(&mut buffer.as_slice()) + .read(|_| panic!("no composite types")) + .unwrap(); assert_eq!(dtype, read_dtype); } } diff --git a/vortex-zigzag/Cargo.toml b/vortex-zigzag/Cargo.toml index b3eca76ce0..6ced0fa47a 100644 --- a/vortex-zigzag/Cargo.toml +++ b/vortex-zigzag/Cargo.toml @@ -12,9 +12,10 @@ edition = { workspace = true } rust-version = { workspace = true } [dependencies] -vortex-array = { "path" = "../vortex-array" } linkme = "0.3.22" vortex-alloc = { path = "../vortex-alloc" } +vortex-array = { path = "../vortex-array" } +vortex-schema = { path = "../vortex-schema" } zigzag = "0.1.0" [lints] diff --git a/vortex-zigzag/src/serde.rs b/vortex-zigzag/src/serde.rs index dce75cb996..62d68e79c0 100644 --- a/vortex-zigzag/src/serde.rs +++ b/vortex-zigzag/src/serde.rs @@ -1,7 +1,7 @@ use vortex::array::{Array, ArrayRef}; -use vortex::dtype::{DType, Signedness}; use vortex::error::{VortexError, VortexResult}; use vortex::serde::{ArraySerde, EncodingSerde, ReadCtx, WriteCtx}; +use vortex_schema::{DType, Signedness}; use crate::{ZigZagArray, ZigZagEncoding}; diff --git a/vortex-zigzag/src/zigzag.rs b/vortex-zigzag/src/zigzag.rs index 2e04657908..4000d55c8d 100644 --- a/vortex-zigzag/src/zigzag.rs +++ b/vortex-zigzag/src/zigzag.rs @@ -3,11 +3,11 @@ use std::sync::{Arc, RwLock}; use vortex::array::{Array, ArrayKind, ArrayRef, Encoding, EncodingId, EncodingRef}; use vortex::compress::EncodingCompression; -use vortex::dtype::{DType, Signedness}; use vortex::error::{VortexError, VortexResult}; use vortex::formatter::{ArrayDisplay, ArrayFormatter}; use vortex::serde::{ArraySerde, EncodingSerde}; use vortex::stats::{Stats, StatsSet}; +use vortex_schema::{DType, Signedness}; use crate::compress::zigzag_encode; From 4c871c5ce55916df8ba8f2b8736d9624022aa7ce Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Wed, 20 Mar 2024 14:50:27 +0000 Subject: [PATCH 2/2] rename --- bench-vortex/src/lib.rs | 29 +++-- pyvortex/src/dtype.rs | 2 +- pyvortex/src/encode.rs | 8 +- vortex-array/src/array/composite/typed.rs | 3 +- vortex-array/src/arrow/dtypes.rs | 19 +-- vortex-array/src/arrow/mod.rs | 2 +- vortex-array/src/encode.rs | 150 +++++++++------------- 7 files changed, 91 insertions(+), 122 deletions(-) diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index ae35cc0972..8898ede02e 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -1,17 +1,18 @@ +use std::collections::HashSet; +use std::fs::{create_dir_all, File}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + use arrow_array::RecordBatchReader; use itertools::Itertools; use log::info; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::ProjectionMask; -use std::collections::HashSet; -use std::fs::{create_dir_all, File}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; + use vortex::array::bool::BoolEncoding; use vortex::array::chunked::{ChunkedArray, ChunkedEncoding}; -use vortex::array::constant::ConstantEncoding; - use vortex::array::composite::CompositeEncoding; +use vortex::array::constant::ConstantEncoding; use vortex::array::downcast::DowncastArrayBuiltin; use vortex::array::primitive::PrimitiveEncoding; use vortex::array::sparse::SparseEncoding; @@ -117,7 +118,7 @@ pub fn compress_taxi_data() -> ArrayRef { }) .collect_vec(); - let dtype = DType::from_arrow_type(schema.clone()); + let dtype = DType::from_arrow(schema.clone()); let compressed = ChunkedArray::new(chunks.clone(), dtype).boxed(); info!("Compressed array {}", display_tree(compressed.as_ref())); @@ -143,13 +144,15 @@ pub fn compress_taxi_data() -> ArrayRef { #[cfg(test)] mod test { + use std::fs::File; + use std::ops::Deref; + use std::sync::Arc; + use arrow_array::{ArrayRef as ArrowArrayRef, StructArray as ArrowStructArray}; use log::LevelFilter; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use simplelog::{ColorChoice, Config, TermLogger, TerminalMode}; - use std::fs::File; - use std::ops::Deref; - use std::sync::Arc; + use vortex::array::ArrayRef; use vortex::compute::as_arrow::as_arrow; use vortex::encode::FromArrowArray; @@ -185,7 +188,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false); + let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false); let mut buf = Vec::::new(); let mut write_ctx = WriteCtx::new(&mut buf); @@ -207,7 +210,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false); + let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false); let vortex_as_arrow = as_arrow(vortex_array.as_ref()).unwrap(); assert_eq!(vortex_as_arrow.deref(), arrow_array.deref()); } @@ -226,7 +229,7 @@ mod test { for record_batch in reader.map(|batch_result| batch_result.unwrap()) { let struct_arrow: ArrowStructArray = record_batch.into(); let arrow_array: ArrowArrayRef = Arc::new(struct_arrow); - let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false); + let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false); let compressed = ctx.clone().compress(vortex_array.as_ref(), None).unwrap(); let compressed_as_arrow = as_arrow(compressed.as_ref()).unwrap(); diff --git a/pyvortex/src/dtype.rs b/pyvortex/src/dtype.rs index 271baac352..3aa0f35100 100644 --- a/pyvortex/src/dtype.rs +++ b/pyvortex/src/dtype.rs @@ -35,7 +35,7 @@ impl PyDType { ) -> PyResult> { PyDType::wrap( cls.py(), - DType::from_arrow_type(&Field::new("_", arrow_dtype, nullable)), + DType::from_arrow(&Field::new("_", arrow_dtype, nullable)), ) } } diff --git a/pyvortex/src/encode.rs b/pyvortex/src/encode.rs index 1c272d59c5..c98a8a0448 100644 --- a/pyvortex/src/encode.rs +++ b/pyvortex/src/encode.rs @@ -25,7 +25,7 @@ pub fn encode(obj: &PyAny) -> PyResult> { if obj.is_instance(pa_array)? { let arrow_array = ArrayData::from_pyarrow(obj).map(make_array)?; - let enc_array = ArrayRef::from_arrow_array(arrow_array, false); + let enc_array = ArrayRef::from_arrow(arrow_array, false); PyArray::wrap(obj.py(), enc_array) } else if obj.is_instance(chunked_array)? { let chunks: Vec<&PyAny> = obj.getattr("chunks")?.extract()?; @@ -34,17 +34,17 @@ pub fn encode(obj: &PyAny) -> PyResult> { .map(|a| { ArrayData::from_pyarrow(a) .map(make_array) - .map(|a| ArrayRef::from_arrow_array(a, false)) + .map(|a| ArrayRef::from_arrow(a, false)) }) .collect::>>()?; let dtype: DType = obj .getattr("type") .and_then(DataType::from_pyarrow) - .map(|dt| DType::from_arrow_type(&Field::new("_", dt, false)))?; + .map(|dt| DType::from_arrow(&Field::new("_", dt, false)))?; PyArray::wrap(obj.py(), ChunkedArray::new(encoded_chunks, dtype).boxed()) } else if obj.is_instance(table)? { let array_stream = ArrowArrayStreamReader::from_pyarrow(obj)?; - let dtype = DType::from_arrow_type(array_stream.schema()); + let dtype = DType::from_arrow(array_stream.schema()); let chunks = array_stream .into_iter() .map(|b| b.map(ArrayRef::from).map_err(map_arrow_err)) diff --git a/vortex-array/src/array/composite/typed.rs b/vortex-array/src/array/composite/typed.rs index a35ad58d1f..8ed79ee44b 100644 --- a/vortex-array/src/array/composite/typed.rs +++ b/vortex-array/src/array/composite/typed.rs @@ -1,6 +1,8 @@ use std::fmt::Debug; use std::sync::Arc; +use vortex_schema::CompositeID; + use crate::array::composite::array::CompositeArray; use crate::array::composite::CompositeMetadata; use crate::array::{Array, ArrayRef}; @@ -95,4 +97,3 @@ macro_rules! composite_impl { } pub(crate) use composite_impl; -use vortex_schema::CompositeID; diff --git a/vortex-array/src/arrow/dtypes.rs b/vortex-array/src/arrow/dtypes.rs index 770ca4d6b1..e0d4f00789 100644 --- a/vortex-array/src/arrow/dtypes.rs +++ b/vortex-array/src/arrow/dtypes.rs @@ -33,13 +33,8 @@ impl From for ArrayRef { .map(|(array, field)| { // The dtype of the child arrays infer their nullability from the array itself. // In case the schema says something different, we cast into the schema's dtype. - let vortex_array = - ArrayRef::from_arrow_array(array.clone(), field.is_nullable()); - cast( - vortex_array.as_ref(), - &DType::from_arrow_type(field.as_ref()), - ) - .unwrap() + let vortex_array = ArrayRef::from_arrow(array.clone(), field.is_nullable()); + cast(vortex_array.as_ref(), &DType::from_arrow(field.as_ref())).unwrap() }) .collect(), ) @@ -75,7 +70,7 @@ impl TryFrom<&DataType> for PType { } impl FromArrowType for DType { - fn from_arrow_type(value: SchemaRef) -> Self { + fn from_arrow(value: SchemaRef) -> Self { DType::Struct( value .fields() @@ -85,14 +80,14 @@ impl FromArrowType for DType { value .fields() .iter() - .map(|f| DType::from_arrow_type(f.as_ref())) + .map(|f| DType::from_arrow(f.as_ref())) .collect_vec(), ) } } impl FromArrowType<&Field> for DType { - fn from_arrow_type(field: &Field) -> Self { + fn from_arrow(field: &Field) -> Self { use vortex_schema::DType::*; use vortex_schema::Signedness::*; @@ -123,12 +118,12 @@ impl FromArrowType<&Field> for DType { // DataType::Time32(u) => localtime(u.into(), IntWidth::_32, nullability), // DataType::Time64(u) => localtime(u.into(), IntWidth::_64, nullability), DataType::List(e) | DataType::LargeList(e) => { - List(Box::new(DType::from_arrow_type(e.as_ref())), nullability) + List(Box::new(DType::from_arrow(e.as_ref())), nullability) } DataType::Struct(f) => Struct( f.iter().map(|f| Arc::new(f.name().clone())).collect(), f.iter() - .map(|f| DType::from_arrow_type(f.as_ref())) + .map(|f| DType::from_arrow(f.as_ref())) .collect_vec(), ), DataType::Decimal128(p, s) | DataType::Decimal256(p, s) => Decimal(*p, *s, nullability), diff --git a/vortex-array/src/arrow/mod.rs b/vortex-array/src/arrow/mod.rs index 2f99f3fcb5..bf9a621991 100644 --- a/vortex-array/src/arrow/mod.rs +++ b/vortex-array/src/arrow/mod.rs @@ -2,5 +2,5 @@ pub mod dtypes; pub mod wrappers; pub trait FromArrowType: Sized { - fn from_arrow_type(value: T) -> Self; + fn from_arrow(value: T) -> Self; } diff --git a/vortex-array/src/encode.rs b/vortex-array/src/encode.rs index 205c7125d6..b71807a830 100644 --- a/vortex-array/src/encode.rs +++ b/vortex-array/src/encode.rs @@ -34,7 +34,7 @@ use crate::ptype::PType; use crate::scalar::NullScalar; pub trait FromArrowArray { - fn from_arrow_array(array: A, nullable: bool) -> Self; + fn from_arrow(array: A, nullable: bool) -> Self; } impl From<&Buffer> for ArrayRef { @@ -57,7 +57,7 @@ impl From<&OffsetBuffer> for ArrayRef { } impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef { - fn from_arrow_array(value: &ArrowPrimitiveArray, nullable: bool) -> Self { + fn from_arrow(value: &ArrowPrimitiveArray, nullable: bool) -> Self { let ptype: PType = (&T::DATA_TYPE).try_into().unwrap(); let arr = PrimitiveArray::new( ptype, @@ -91,7 +91,7 @@ impl FromArrowArray<&ArrowPrimitiveArray> for ArrayRef } impl FromArrowArray<&GenericByteArray> for ArrayRef { - fn from_arrow_array(value: &GenericByteArray, nullable: bool) -> Self { + fn from_arrow(value: &GenericByteArray, nullable: bool) -> Self { let dtype = match T::DATA_TYPE { DataType::Binary | DataType::LargeBinary => DType::Binary(nullable.into()), DataType::Utf8 | DataType::LargeUtf8 => DType::Utf8(nullable.into()), @@ -108,7 +108,7 @@ impl FromArrowArray<&GenericByteArray> for ArrayRef { } impl FromArrowArray<&ArrowBooleanArray> for ArrayRef { - fn from_arrow_array(value: &ArrowBooleanArray, nullable: bool) -> Self { + fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { BoolArray::new( value.values().to_owned(), nulls(value.nulls(), nullable, value.len()), @@ -118,7 +118,7 @@ impl FromArrowArray<&ArrowBooleanArray> for ArrayRef { } impl FromArrowArray<&ArrowStructArray> for ArrayRef { - fn from_arrow_array(value: &ArrowStructArray, nullable: bool) -> Self { + fn from_arrow(value: &ArrowStructArray, nullable: bool) -> Self { // TODO(ngates): how should we deal with Arrow "logical nulls"? assert!(!nullable); StructArray::new( @@ -132,7 +132,7 @@ impl FromArrowArray<&ArrowStructArray> for ArrayRef { .columns() .iter() .zip(value.fields()) - .map(|(c, field)| ArrayRef::from_arrow_array(c.clone(), field.is_nullable())) + .map(|(c, field)| ArrayRef::from_arrow(c.clone(), field.is_nullable())) .collect(), ) .boxed() @@ -140,7 +140,7 @@ impl FromArrowArray<&ArrowStructArray> for ArrayRef { } impl FromArrowArray<&ArrowNullArray> for ArrayRef { - fn from_arrow_array(value: &ArrowNullArray, nullable: bool) -> Self { + fn from_arrow(value: &ArrowNullArray, nullable: bool) -> Self { assert!(nullable); ConstantArray::new(NullScalar::new().into(), value.len()).boxed() } @@ -160,109 +160,79 @@ fn nulls(nulls: Option<&NullBuffer>, nullable: bool, len: usize) -> Option for ArrayRef { - fn from_arrow_array(array: ArrowArrayRef, nullable: bool) -> Self { + fn from_arrow(array: ArrowArrayRef, nullable: bool) -> Self { match array.data_type() { - DataType::Boolean => ArrayRef::from_arrow_array(array.as_boolean(), nullable), - DataType::UInt8 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::UInt16 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::UInt32 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::UInt64 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::Int8 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::Int16 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::Int32 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::Int64 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } + DataType::Boolean => ArrayRef::from_arrow(array.as_boolean(), nullable), + DataType::UInt8 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::UInt16 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::UInt32 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::UInt64 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::Int8 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::Int16 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::Int32 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::Int64 => ArrayRef::from_arrow(array.as_primitive::(), nullable), DataType::Float16 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + ArrayRef::from_arrow(array.as_primitive::(), nullable) } DataType::Float32 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + ArrayRef::from_arrow(array.as_primitive::(), nullable) } DataType::Float64 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + ArrayRef::from_arrow(array.as_primitive::(), nullable) } - DataType::Utf8 => ArrayRef::from_arrow_array(array.as_string::(), nullable), - DataType::LargeUtf8 => ArrayRef::from_arrow_array(array.as_string::(), nullable), - DataType::Binary => ArrayRef::from_arrow_array(array.as_binary::(), nullable), - DataType::LargeBinary => ArrayRef::from_arrow_array(array.as_binary::(), nullable), - DataType::Struct(_) => ArrayRef::from_arrow_array(array.as_struct(), nullable), - DataType::Null => ArrayRef::from_arrow_array(as_null_array(array.as_ref()), nullable), + DataType::Utf8 => ArrayRef::from_arrow(array.as_string::(), nullable), + DataType::LargeUtf8 => ArrayRef::from_arrow(array.as_string::(), nullable), + DataType::Binary => ArrayRef::from_arrow(array.as_binary::(), nullable), + DataType::LargeBinary => ArrayRef::from_arrow(array.as_binary::(), nullable), + DataType::Struct(_) => ArrayRef::from_arrow(array.as_struct(), nullable), + DataType::Null => ArrayRef::from_arrow(as_null_array(array.as_ref()), nullable), DataType::Timestamp(u, _) => match u { - TimeUnit::Second => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), - TimeUnit::Millisecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), - TimeUnit::Microsecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), - TimeUnit::Nanosecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), + TimeUnit::Second => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Millisecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Microsecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Nanosecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } }, - DataType::Date32 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } - DataType::Date64 => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) - } + DataType::Date32 => ArrayRef::from_arrow(array.as_primitive::(), nullable), + DataType::Date64 => ArrayRef::from_arrow(array.as_primitive::(), nullable), DataType::Time32(u) => match u { TimeUnit::Second => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Millisecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) } - TimeUnit::Millisecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), _ => unreachable!(), }, DataType::Time64(u) => match u { - TimeUnit::Microsecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), - TimeUnit::Nanosecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), + TimeUnit::Microsecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Nanosecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } _ => unreachable!(), }, DataType::Duration(u) => match u { TimeUnit::Second => { - ArrayRef::from_arrow_array(array.as_primitive::(), nullable) + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Millisecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Microsecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) + } + TimeUnit::Nanosecond => { + ArrayRef::from_arrow(array.as_primitive::(), nullable) } - TimeUnit::Millisecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), - TimeUnit::Microsecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), - TimeUnit::Nanosecond => ArrayRef::from_arrow_array( - array.as_primitive::(), - nullable, - ), }, _ => panic!( "TODO(robert): Missing array encoding for dtype {}",