Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade arrow-rs to 51.0.0 and extract common dependencies to top level #127

Merged
merged 8 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 165 additions & 86 deletions Cargo.lock

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,42 @@ include = [
edition = "2021"
rust-version = "1.76"

[workspace.dependencies]
ahash = "0.8.11"
allocator-api2 = "0.2.16"
arrayref = "0.3.7"
arrow = { version = "51.0.0", features = ["pyarrow"] }
arrow-array = "51.0.0"
arrow-buffer = "51.0.0"
arrow-schema = "51.0.0"
bindgen = "0.69.4"
criterion = { version = "0.5.1", features = ["html_reports"] }
croaring = "1.0.1"
divan = "0.1.14"
flatbuffers = "23.5.26"
flatc = "0.2.2"
half = { version = "2.4.0", features = ["std", "num-traits"] }
hashbrown = "0.14.3"
humansize = "2.1.3"
itertools = "0.12.1"
leb128 = "0.2.5"
linkme = "0.3.25"
log = "0.4.21"
num-traits = "0.2.18"
num_enum = "0.7.2"
parquet = "51.0.0"
paste = "1.0.14"
pyo3 = { version = "0.20.2", features = ["extension-module", "abi3-py311"] }
pyo3-log = "0.9.0"
rand = "0.8.5"
reqwest = { version = "0.12.0", features = ["blocking"] }
seq-macro = "0.3.5"
simplelog = { version = "0.12.2", features = ["paris"] }
thiserror = "1.0.58"
uninit = "0.6.2"
walkdir = "2.5.0"
zigzag = "0.1.0"

[workspace.lints.rust]
warnings = "deny"

Expand Down
16 changes: 8 additions & 8 deletions bench-vortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,24 @@ rust-version = { workspace = true }
workspace = true

[dependencies]
arrow-array = "50.0.0"
vortex-alp = { path = "../vortex-alp" }
arrow-array = { workspace = true }
vortex-array = { path = "../vortex-array" }
vortex-datetime = { path = "../vortex-datetime" }
vortex-alp = { path = "../vortex-alp" }
vortex-dict = { path = "../vortex-dict" }
vortex-fastlanes = { path = "../vortex-fastlanes" }
vortex-ree = { path = "../vortex-ree" }
vortex-roaring = { path = "../vortex-roaring" }
vortex-schema = { path = "../vortex-schema" }
vortex-zigzag = { path = "../vortex-zigzag" }
itertools = "0.12.1"
reqwest = { version = "0.11.24", features = ["blocking"] }
parquet = "50.0.0"
log = "0.4.20"
itertools = { workspace = true }
reqwest = { workspace = true }
parquet = { workspace = true }
log = { workspace = true }

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
simplelog = { version = "0.12.1", features = ["paris"] }
criterion = { workspace = true }
simplelog = { workspace = true }

[[bench]]
name = "compress_benchmark"
Expand Down
12 changes: 6 additions & 6 deletions fastlanez-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ links = "fastlanez"
workspace = true

[dependencies]
arrayref = "0.3.7"
paste = "1.0.14"
seq-macro = "0.3.5"
uninit = "0.6.2"
arrayref = { workspace = true }
paste = { workspace = true }
seq-macro = { workspace = true }
uninit = { workspace = true }

[build-dependencies]
bindgen = "0.69.1"
walkdir = "2.4.0"
bindgen = { workspace = true }
walkdir = { workspace = true }
14 changes: 7 additions & 7 deletions pyvortex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@ name = "pyvortex"
crate-type = ["rlib", "cdylib"]

[dependencies]
arrow = { version = "50.0.0", features = ["pyarrow"] }
vortex-alp = { path = "../vortex-alp" }
arrow = { workspace = true }
vortex-array = { path = "../vortex-array" }
vortex-alp = { path = "../vortex-alp" }
vortex-dict = { path = "../vortex-dict" }
vortex-fastlanes = { path = "../vortex-fastlanes" }
vortex-ree = { path = "../vortex-ree" }
vortex-roaring = { path = "../vortex-roaring" }
vortex-schema = { path = "../vortex-schema" }
vortex-zigzag = { path = "../vortex-zigzag" }
itertools = "0.12.1"
log = "0.4.20"
paste = "1.0.14"
pyo3 = { version = "0.20.2", features = ["extension-module", "abi3-py311"] }
pyo3-log = "0.9.0"
itertools = { workspace = true }
log = { workspace = true }
paste = { workspace = true }
pyo3 = { workspace = true }
pyo3-log = { workspace = true }

# We may need this workaround?
# https://pyo3.rs/v0.20.2/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror
2 changes: 1 addition & 1 deletion pyvortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn dtype_bool(py: Python<'_>, nullable: bool) -> PyResult<Py<PyDType>> {
#[pyo3(signature = (width = None, signed = true, nullable = false))]
fn dtype_int(
py: Python<'_>,
width: Option<i8>,
width: Option<u16>,
signed: bool,
nullable: bool,
) -> PyResult<Py<PyDType>> {
Expand Down
2 changes: 1 addition & 1 deletion vortex-alloc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ edition = { workspace = true }
rust-version = { workspace = true }

[dependencies]
allocator-api2 = "0.2.16"
allocator-api2 = { workspace = true }

[lints]
workspace = true
10 changes: 5 additions & 5 deletions vortex-alp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ workspace = true
[dependencies]
vortex-array = { path = "../vortex-array" }
vortex-schema = { path = "../vortex-schema" }
linkme = "0.3.22"
itertools = "0.12.1"
num-traits = "0.2.18"
log = { version = "0.4.20", features = [] }
linkme = { workspace = true }
itertools = { workspace = true }
num-traits = { workspace = true }
log = { workspace = true }

[dev-dependencies]
divan = "0.1.14"
divan = { workspace = true }

[[bench]]
name = "alp_compress"
Expand Down
33 changes: 15 additions & 18 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,20 @@ path = "src/lib.rs"
workspace = true

[dependencies]
allocator-api2 = "0.2.16"
allocator-api2 = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
half = { workspace = true }
humansize = { workspace = true }
itertools = { workspace = true }
leb128 = { workspace = true }
linkme = { workspace = true }
log = { workspace = true }
num-traits = { workspace = true }
num_enum = { workspace = true }
paste = { workspace = true }
rand = { workspace = true }
thiserror = { workspace = true }
vortex-schema = { path = "../vortex-schema" }
arrow-array = { version = "50.0.0" }
arrow-buffer = { version = "50.0.0" }
arrow-schema = { version = "50.0.0" }
dyn-clone = "1.0.16"
half = "2.3.1"
humansize = "2.1.3"
itertools = "0.12.1"
leb128 = "0.2.5"
linkme = "0.3.23"
log = "0.4.20"
num-traits = "0.2.18"
num_enum = "0.7.2"
paste = "1.0.14"
rand = { version = "0.8.5", features = [] }
rayon = "1.8.1"
roaring = "0.10.3"
vortex-alloc = { path = "../vortex-alloc" }
thiserror = "1.0.57"
2 changes: 1 addition & 1 deletion vortex-array/src/array/varbin/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ impl AsArrowArray for VarBinArray {
}
_ => flatten_primitive(cast(&offsets.to_array(), &PType::I32.into())?.as_ref())?,
};
let nulls = as_nulls(offsets.validity())?;
let nulls = as_nulls(self.validity())?;

let data = flatten_primitive(self.bytes())?;
assert_eq!(data.ptype(), &PType::U8);
Expand Down
83 changes: 81 additions & 2 deletions vortex-array/src/array/varbinview/compute.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,31 @@
use std::sync::Arc;

use arrow_array::{ArrayRef as ArrowArrayRef, BinaryViewArray, StringViewArray};
use arrow_buffer::ScalarBuffer;
use itertools::Itertools;

use vortex_schema::DType;

use crate::array::varbinview::VarBinViewArray;
use crate::array::Array;
use crate::arrow::wrappers::as_nulls;
use crate::compute::as_arrow::AsArrowArray;
use crate::compute::flatten::{flatten, flatten_primitive, FlattenFn, FlattenedArray};
use crate::compute::scalar_at::ScalarAtFn;
use crate::compute::ArrayCompute;
use crate::error::VortexResult;
use crate::error::{VortexError, VortexResult};
use crate::ptype::PType;
use crate::scalar::Scalar;
use vortex_schema::DType;

impl ArrayCompute for VarBinViewArray {
fn as_arrow(&self) -> Option<&dyn AsArrowArray> {
Some(self)
}

fn flatten(&self) -> Option<&dyn FlattenFn> {
Some(self)
}

fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
Expand All @@ -27,3 +46,63 @@ impl ScalarAtFn for VarBinViewArray {
}
}
}

impl FlattenFn for VarBinViewArray {
fn flatten(&self) -> VortexResult<FlattenedArray> {
let views = flatten(self.views())?.into_array();
let data = self
.data()
.iter()
.map(|d| flatten(d.as_ref()).unwrap().into_array())
.collect::<Vec<_>>();
let validity = self
.validity()
.map(|v| flatten(v).map(FlattenedArray::into_array))
.transpose()?;
Ok(FlattenedArray::VarBinView(VarBinViewArray::new(
views,
data,
self.dtype.clone(),
validity,
)))
}
}

impl AsArrowArray for VarBinViewArray {
fn as_arrow(&self) -> VortexResult<ArrowArrayRef> {
// Views should be buffer of u8
let views = flatten_primitive(self.views())?;
robert3005 marked this conversation as resolved.
Show resolved Hide resolved
assert_eq!(views.ptype(), &PType::U8);
let nulls = as_nulls(self.validity())?;

let data = self
.data()
.iter()
.map(|d| flatten_primitive(d.as_ref()).unwrap())
.collect::<Vec<_>>();
if !data.is_empty() {
assert_eq!(data[0].ptype(), &PType::U8);
assert!(data.iter().map(|d| d.ptype()).all_equal());
}

let data = data
.iter()
.map(|p| p.buffer().to_owned())
.collect::<Vec<_>>();

// Switch on Arrow DType.
Ok(match self.dtype() {
DType::Binary(_) => Arc::new(BinaryViewArray::new(
ScalarBuffer::<u128>::from(views.buffer().clone()),
data,
nulls,
)),
DType::Utf8(_) => Arc::new(StringViewArray::new(
ScalarBuffer::<u128>::from(views.buffer().clone()),
data,
nulls,
)),
_ => return Err(VortexError::InvalidDType(self.dtype().clone())),
})
}
}
8 changes: 4 additions & 4 deletions vortex-array/src/arrow/wrappers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ pub fn as_nulls(validity: Option<&ArrayRef>) -> VortexResult<Option<NullBuffer>>
.get_as::<bool>(&Stat::IsConstant)
.unwrap_or_default()
{
if scalar_at(validity, 0)?.try_into().unwrap() {
return Ok(None);
return if scalar_at(validity, 0)?.try_into().unwrap() {
Ok(None)
} else {
return Ok(Some(NullBuffer::new_null(validity.len())));
}
Ok(Some(NullBuffer::new_null(validity.len())))
};
}

Ok(Some(NullBuffer::new(
Expand Down
3 changes: 3 additions & 0 deletions vortex-array/src/compute/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::array::composite::CompositeArray;
use crate::array::primitive::PrimitiveArray;
use crate::array::struct_::StructArray;
use crate::array::varbin::VarBinArray;
use crate::array::varbinview::VarBinViewArray;
use crate::array::{Array, ArrayRef};
use crate::error::{VortexError, VortexResult};

Expand All @@ -19,6 +20,7 @@ pub enum FlattenedArray {
Primitive(PrimitiveArray),
Struct(StructArray),
VarBin(VarBinArray),
VarBinView(VarBinViewArray),
}

impl FlattenedArray {
Expand All @@ -30,6 +32,7 @@ impl FlattenedArray {
FlattenedArray::Primitive(array) => array.into_array(),
FlattenedArray::Struct(array) => array.into_array(),
FlattenedArray::VarBin(array) => array.into_array(),
FlattenedArray::VarBinView(array) => array.into_array(),
}
}
}
Expand Down
Loading