diff --git a/Cargo.lock b/Cargo.lock index 137d10e169..fd5481c3cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,9 +102,9 @@ checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" [[package]] name = "arrow" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" dependencies = [ "arrow-arith", "arrow-array", @@ -124,9 +124,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" dependencies = [ "arrow-array", "arrow-buffer", @@ -139,9 +139,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" dependencies = [ "ahash", "arrow-buffer", @@ -155,9 +155,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" dependencies = [ "bytes", "half", @@ -166,27 +166,29 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", - "base64", + "atoi", + "base64 0.22.0", "chrono", "half", "lexical-core", "num", + "ryu", ] [[package]] name = "arrow-csv" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" +checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" dependencies = [ "arrow-array", "arrow-buffer", @@ -203,9 +205,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" dependencies = [ "arrow-buffer", "arrow-schema", @@ -215,9 +217,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" +checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" dependencies = [ "arrow-array", "arrow-buffer", @@ -229,9 +231,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" +checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" dependencies = [ "arrow-array", "arrow-buffer", @@ -249,9 +251,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" dependencies = [ "arrow-array", "arrow-buffer", @@ -264,9 +266,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" dependencies = [ "ahash", "arrow-array", @@ -279,18 +281,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" dependencies = [ "bitflags 2.5.0", ] [[package]] name = "arrow-select" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" dependencies = [ "ahash", "arrow-array", @@ -302,20 +304,30 @@ dependencies = [ [[package]] name = "arrow-string" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "memchr", "num", "regex", "regex-syntax", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -324,9 +336,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "95d8e92cac0961e91dbd517496b00f7e9b92363dbe6d42c3198268323798860c" dependencies = [ "addr2line", "cc", @@ -343,6 +355,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" + [[package]] name = "bench-vortex" version = "0.1.0" @@ -427,12 +445,6 @@ version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" -[[package]] -name = "bytemuck" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" - [[package]] name = "byteorder" version = "1.5.0" @@ -753,12 +765,6 @@ dependencies = [ "syn", ] -[[package]] -name = "dyn-clone" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" - [[package]] name = "either" version = "1.10.0" @@ -874,6 +880,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -908,6 +915,7 @@ checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-core", "futures-io", + "futures-sink", "futures-task", "memchr", "pin-project-lite", @@ -940,9 +948,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.25" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb" +checksum = "51ee2dd2e4f378392eeff5d51618cd9a63166a2513846bbc55f21cfacd9199d4" dependencies = [ "bytes", "fnv", @@ -1001,9 +1009,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.12" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", @@ -1012,12 +1020,24 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.6" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" dependencies = [ "bytes", + "futures-core", "http", + "http-body", "pin-project-lite", ] @@ -1027,12 +1047,6 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - [[package]] name = "humansize" version = "2.1.3" @@ -1044,39 +1058,58 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.28" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +checksum = "186548d73ac615b32a73aafe38fb4f56c0d340e110e5a200bcadbaf2e199263a" dependencies = [ "bytes", "futures-channel", - "futures-core", "futures-util", "h2", "http", "http-body", "httparse", - "httpdate", "itoa", "pin-project-lite", - "socket2", + "smallvec", "tokio", - "tower-service", - "tracing", "want", ] [[package]] name = "hyper-tls" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", + "http-body-util", "hyper", + "hyper-util", "native-tls", "tokio", "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower", + "tower-service", + "tracing", ] [[package]] @@ -1505,6 +1538,16 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "num_enum" version = "0.7.2" @@ -1640,9 +1683,9 @@ dependencies = [ [[package]] name = "parquet" -version = "50.0.0" +version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" +checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" dependencies = [ "ahash", "arrow-array", @@ -1652,7 +1695,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64", + "base64 0.22.0", "brotli", "bytes", "chrono", @@ -1682,6 +1725,26 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pin-project" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -1973,20 +2036,23 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "reqwest" -version = "0.11.27" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +checksum = "58b48d98d932f4ee75e541614d32a7f44c889b72bd9c2e04d95edd135989df88" dependencies = [ - "base64", + "base64 0.21.7", "bytes", "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2", "http", "http-body", + "http-body-util", "hyper", "hyper-tls", + "hyper-util", "ipnet", "js-sys", "log", @@ -2011,16 +2077,6 @@ dependencies = [ "winreg", ] -[[package]] -name = "roaring" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf" -dependencies = [ - "bytemuck", - "byteorder", -] - [[package]] name = "rustc-demangle" version = "0.1.23" @@ -2061,7 +2117,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", ] [[package]] @@ -2201,9 +2257,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "snap" @@ -2410,6 +2466,7 @@ dependencies = [ "bytes", "libc", "mio", + "num_cpus", "pin-project-lite", "socket2", "windows-sys 0.48.0", @@ -2456,6 +2513,28 @@ dependencies = [ "winnow", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + [[package]] name = "tower-service" version = "0.3.2" @@ -2468,6 +2547,7 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ + "log", "pin-project-lite", "tracing-core", ] @@ -2581,7 +2661,6 @@ dependencies = [ "arrow-array", "arrow-buffer", "arrow-schema", - "dyn-clone", "half", "humansize", "itertools 0.12.1", @@ -2592,8 +2671,6 @@ dependencies = [ "num_enum", "paste", "rand", - "rayon", - "roaring", "thiserror", "vortex-alloc", "vortex-schema", @@ -2634,7 +2711,9 @@ dependencies = [ "fastlanez-sys", "itertools 0.12.1", "linkme", + "log", "num-traits", + "simplelog", "vortex-array", "vortex-schema", ] diff --git a/Cargo.toml b/Cargo.toml index babf1a3270..72cde148eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,42 @@ include = [ edition = "2021" rust-version = "1.76" +[workspace.dependencies] +ahash = "0.8.11" +allocator-api2 = "0.2.16" +arrayref = "0.3.7" +arrow = { version = "51.0.0", features = ["pyarrow"] } +arrow-array = "51.0.0" +arrow-buffer = "51.0.0" +arrow-schema = "51.0.0" +bindgen = "0.69.4" +criterion = { version = "0.5.1", features = ["html_reports"] } +croaring = "1.0.1" +divan = "0.1.14" +flatbuffers = "23.5.26" +flatc = "0.2.2" +half = { version = "2.4.0", features = ["std", "num-traits"] } +hashbrown = "0.14.3" +humansize = "2.1.3" +itertools = "0.12.1" +leb128 = "0.2.5" +linkme = "0.3.25" +log = "0.4.21" +num-traits = "0.2.18" +num_enum = "0.7.2" +parquet = "51.0.0" +paste = "1.0.14" +pyo3 = { version = "0.20.2", features = ["extension-module", "abi3-py311"] } +pyo3-log = "0.9.0" +rand = "0.8.5" +reqwest = { version = "0.12.0", features = ["blocking"] } +seq-macro = "0.3.5" +simplelog = { version = "0.12.2", features = ["paris"] } +thiserror = "1.0.58" +uninit = "0.6.2" +walkdir = "2.5.0" +zigzag = "0.1.0" + [workspace.lints.rust] warnings = "deny" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index f7fa967ed1..c18127e996 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -15,24 +15,24 @@ rust-version = { workspace = true } workspace = true [dependencies] -arrow-array = "50.0.0" -vortex-alp = { path = "../vortex-alp" } +arrow-array = { workspace = true } vortex-array = { path = "../vortex-array" } vortex-datetime = { path = "../vortex-datetime" } +vortex-alp = { path = "../vortex-alp" } vortex-dict = { path = "../vortex-dict" } vortex-fastlanes = { path = "../vortex-fastlanes" } vortex-ree = { path = "../vortex-ree" } vortex-roaring = { path = "../vortex-roaring" } vortex-schema = { path = "../vortex-schema" } vortex-zigzag = { path = "../vortex-zigzag" } -itertools = "0.12.1" -reqwest = { version = "0.11.24", features = ["blocking"] } -parquet = "50.0.0" -log = "0.4.20" +itertools = { workspace = true } +reqwest = { workspace = true } +parquet = { workspace = true } +log = { workspace = true } [dev-dependencies] -criterion = { version = "0.5.1", features = ["html_reports"] } -simplelog = { version = "0.12.1", features = ["paris"] } +criterion = { workspace = true } +simplelog = { workspace = true } [[bench]] name = "compress_benchmark" diff --git a/fastlanez-sys/Cargo.toml b/fastlanez-sys/Cargo.toml index 3561e542e2..80784d2cd0 100644 --- a/fastlanez-sys/Cargo.toml +++ b/fastlanez-sys/Cargo.toml @@ -16,11 +16,11 @@ links = "fastlanez" workspace = true [dependencies] -arrayref = "0.3.7" -paste = "1.0.14" -seq-macro = "0.3.5" -uninit = "0.6.2" +arrayref = { workspace = true } +paste = { workspace = true } +seq-macro = { workspace = true } +uninit = { workspace = true } [build-dependencies] -bindgen = "0.69.1" -walkdir = "2.4.0" \ No newline at end of file +bindgen = { workspace = true } +walkdir = { workspace = true } diff --git a/pyvortex/Cargo.toml b/pyvortex/Cargo.toml index 5f8dc1d3bb..21e93e1ef0 100644 --- a/pyvortex/Cargo.toml +++ b/pyvortex/Cargo.toml @@ -19,20 +19,20 @@ name = "pyvortex" crate-type = ["rlib", "cdylib"] [dependencies] -arrow = { version = "50.0.0", features = ["pyarrow"] } -vortex-alp = { path = "../vortex-alp" } +arrow = { workspace = true } vortex-array = { path = "../vortex-array" } +vortex-alp = { path = "../vortex-alp" } vortex-dict = { path = "../vortex-dict" } vortex-fastlanes = { path = "../vortex-fastlanes" } vortex-ree = { path = "../vortex-ree" } vortex-roaring = { path = "../vortex-roaring" } vortex-schema = { path = "../vortex-schema" } vortex-zigzag = { path = "../vortex-zigzag" } -itertools = "0.12.1" -log = "0.4.20" -paste = "1.0.14" -pyo3 = { version = "0.20.2", features = ["extension-module", "abi3-py311"] } -pyo3-log = "0.9.0" +itertools = { workspace = true } +log = { workspace = true } +paste = { workspace = true } +pyo3 = { workspace = true } +pyo3-log = { workspace = true } # We may need this workaround? # https://pyo3.rs/v0.20.2/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror diff --git a/pyvortex/src/lib.rs b/pyvortex/src/lib.rs index c455c51273..b4d16a207d 100644 --- a/pyvortex/src/lib.rs +++ b/pyvortex/src/lib.rs @@ -74,7 +74,7 @@ fn dtype_bool(py: Python<'_>, nullable: bool) -> PyResult> { #[pyo3(signature = (width = None, signed = true, nullable = false))] fn dtype_int( py: Python<'_>, - width: Option, + width: Option, signed: bool, nullable: bool, ) -> PyResult> { diff --git a/vortex-alloc/Cargo.toml b/vortex-alloc/Cargo.toml index 6b45ab27e4..c419058834 100644 --- a/vortex-alloc/Cargo.toml +++ b/vortex-alloc/Cargo.toml @@ -12,7 +12,7 @@ edition = { workspace = true } rust-version = { workspace = true } [dependencies] -allocator-api2 = "0.2.16" +allocator-api2 = { workspace = true } [lints] workspace = true diff --git a/vortex-alp/Cargo.toml b/vortex-alp/Cargo.toml index 39137563c9..e499be70e8 100644 --- a/vortex-alp/Cargo.toml +++ b/vortex-alp/Cargo.toml @@ -17,13 +17,13 @@ workspace = true [dependencies] vortex-array = { path = "../vortex-array" } vortex-schema = { path = "../vortex-schema" } -linkme = "0.3.22" -itertools = "0.12.1" -num-traits = "0.2.18" -log = { version = "0.4.20", features = [] } +linkme = { workspace = true } +itertools = { workspace = true } +num-traits = { workspace = true } +log = { workspace = true } [dev-dependencies] -divan = "0.1.14" +divan = { workspace = true } [[bench]] name = "alp_compress" diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 08a6d4707b..72e80edcab 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -19,23 +19,20 @@ path = "src/lib.rs" workspace = true [dependencies] -allocator-api2 = "0.2.16" +allocator-api2 = { workspace = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-schema = { workspace = true } +half = { workspace = true } +humansize = { workspace = true } +itertools = { workspace = true } +leb128 = { workspace = true } +linkme = { workspace = true } +log = { workspace = true } +num-traits = { workspace = true } +num_enum = { workspace = true } +paste = { workspace = true } +rand = { workspace = true } +thiserror = { workspace = true } vortex-schema = { path = "../vortex-schema" } -arrow-array = { version = "50.0.0" } -arrow-buffer = { version = "50.0.0" } -arrow-schema = { version = "50.0.0" } -dyn-clone = "1.0.16" -half = "2.3.1" -humansize = "2.1.3" -itertools = "0.12.1" -leb128 = "0.2.5" -linkme = "0.3.23" -log = "0.4.20" -num-traits = "0.2.18" -num_enum = "0.7.2" -paste = "1.0.14" -rand = { version = "0.8.5", features = [] } -rayon = "1.8.1" -roaring = "0.10.3" vortex-alloc = { path = "../vortex-alloc" } -thiserror = "1.0.57" diff --git a/vortex-array/src/array/varbin/compute.rs b/vortex-array/src/array/varbin/compute.rs index 6f8a3f39c9..841fe26bfc 100644 --- a/vortex-array/src/array/varbin/compute.rs +++ b/vortex-array/src/array/varbin/compute.rs @@ -95,7 +95,7 @@ impl AsArrowArray for VarBinArray { } _ => flatten_primitive(cast(&offsets.to_array(), &PType::I32.into())?.as_ref())?, }; - let nulls = as_nulls(offsets.validity())?; + let nulls = as_nulls(self.validity())?; let data = flatten_primitive(self.bytes())?; assert_eq!(data.ptype(), &PType::U8); diff --git a/vortex-array/src/array/varbinview/compute.rs b/vortex-array/src/array/varbinview/compute.rs index 1855779234..67c4b3897e 100644 --- a/vortex-array/src/array/varbinview/compute.rs +++ b/vortex-array/src/array/varbinview/compute.rs @@ -1,12 +1,31 @@ +use std::sync::Arc; + +use arrow_array::{ArrayRef as ArrowArrayRef, BinaryViewArray, StringViewArray}; +use arrow_buffer::ScalarBuffer; +use itertools::Itertools; + +use vortex_schema::DType; + use crate::array::varbinview::VarBinViewArray; use crate::array::Array; +use crate::arrow::wrappers::as_nulls; +use crate::compute::as_arrow::AsArrowArray; +use crate::compute::flatten::{flatten, flatten_primitive, FlattenFn, FlattenedArray}; use crate::compute::scalar_at::ScalarAtFn; use crate::compute::ArrayCompute; -use crate::error::VortexResult; +use crate::error::{VortexError, VortexResult}; +use crate::ptype::PType; use crate::scalar::Scalar; -use vortex_schema::DType; impl ArrayCompute for VarBinViewArray { + fn as_arrow(&self) -> Option<&dyn AsArrowArray> { + Some(self) + } + + fn flatten(&self) -> Option<&dyn FlattenFn> { + Some(self) + } + fn scalar_at(&self) -> Option<&dyn ScalarAtFn> { Some(self) } @@ -27,3 +46,63 @@ impl ScalarAtFn for VarBinViewArray { } } } + +impl FlattenFn for VarBinViewArray { + fn flatten(&self) -> VortexResult { + let views = flatten(self.views())?.into_array(); + let data = self + .data() + .iter() + .map(|d| flatten(d.as_ref()).unwrap().into_array()) + .collect::>(); + let validity = self + .validity() + .map(|v| flatten(v).map(FlattenedArray::into_array)) + .transpose()?; + Ok(FlattenedArray::VarBinView(VarBinViewArray::new( + views, + data, + self.dtype.clone(), + validity, + ))) + } +} + +impl AsArrowArray for VarBinViewArray { + fn as_arrow(&self) -> VortexResult { + // Views should be buffer of u8 + let views = flatten_primitive(self.views())?; + assert_eq!(views.ptype(), &PType::U8); + let nulls = as_nulls(self.validity())?; + + let data = self + .data() + .iter() + .map(|d| flatten_primitive(d.as_ref()).unwrap()) + .collect::>(); + if !data.is_empty() { + assert_eq!(data[0].ptype(), &PType::U8); + assert!(data.iter().map(|d| d.ptype()).all_equal()); + } + + let data = data + .iter() + .map(|p| p.buffer().to_owned()) + .collect::>(); + + // Switch on Arrow DType. + Ok(match self.dtype() { + DType::Binary(_) => Arc::new(BinaryViewArray::new( + ScalarBuffer::::from(views.buffer().clone()), + data, + nulls, + )), + DType::Utf8(_) => Arc::new(StringViewArray::new( + ScalarBuffer::::from(views.buffer().clone()), + data, + nulls, + )), + _ => return Err(VortexError::InvalidDType(self.dtype().clone())), + }) + } +} diff --git a/vortex-array/src/arrow/wrappers.rs b/vortex-array/src/arrow/wrappers.rs index 1e3cbe3111..14b844a70f 100644 --- a/vortex-array/src/arrow/wrappers.rs +++ b/vortex-array/src/arrow/wrappers.rs @@ -32,11 +32,11 @@ pub fn as_nulls(validity: Option<&ArrayRef>) -> VortexResult> .get_as::(&Stat::IsConstant) .unwrap_or_default() { - if scalar_at(validity, 0)?.try_into().unwrap() { - return Ok(None); + return if scalar_at(validity, 0)?.try_into().unwrap() { + Ok(None) } else { - return Ok(Some(NullBuffer::new_null(validity.len()))); - } + Ok(Some(NullBuffer::new_null(validity.len()))) + }; } Ok(Some(NullBuffer::new( diff --git a/vortex-array/src/compute/flatten.rs b/vortex-array/src/compute/flatten.rs index 39120efd1e..1885dd2c83 100644 --- a/vortex-array/src/compute/flatten.rs +++ b/vortex-array/src/compute/flatten.rs @@ -4,6 +4,7 @@ use crate::array::composite::CompositeArray; use crate::array::primitive::PrimitiveArray; use crate::array::struct_::StructArray; use crate::array::varbin::VarBinArray; +use crate::array::varbinview::VarBinViewArray; use crate::array::{Array, ArrayRef}; use crate::error::{VortexError, VortexResult}; @@ -19,6 +20,7 @@ pub enum FlattenedArray { Primitive(PrimitiveArray), Struct(StructArray), VarBin(VarBinArray), + VarBinView(VarBinViewArray), } impl FlattenedArray { @@ -30,6 +32,7 @@ impl FlattenedArray { FlattenedArray::Primitive(array) => array.into_array(), FlattenedArray::Struct(array) => array.into_array(), FlattenedArray::VarBin(array) => array.into_array(), + FlattenedArray::VarBinView(array) => array.into_array(), } } } diff --git a/vortex-array/src/encode.rs b/vortex-array/src/encode.rs index 8067c94954..cbc61a51d5 100644 --- a/vortex-array/src/encode.rs +++ b/vortex-array/src/encode.rs @@ -8,17 +8,18 @@ use arrow_array::array::{ use arrow_array::array::{ArrowPrimitiveType, OffsetSizeTrait}; use arrow_array::cast::{as_null_array, AsArray}; use arrow_array::types::{ - ByteArrayType, Date32Type, Date64Type, DurationMicrosecondType, DurationMillisecondType, - DurationNanosecondType, DurationSecondType, Time32MillisecondType, Time32SecondType, - Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType, + ByteArrayType, ByteViewType, Date32Type, Date64Type, DurationMicrosecondType, + DurationMillisecondType, DurationNanosecondType, DurationSecondType, Time32MillisecondType, + Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; use arrow_array::types::{ Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }; +use arrow_array::{BinaryViewArray, GenericByteViewArray, StringViewArray}; use arrow_buffer::buffer::{NullBuffer, OffsetBuffer}; -use arrow_buffer::Buffer; +use arrow_buffer::{ArrowNativeType, Buffer, ScalarBuffer}; use arrow_schema::{DataType, TimeUnit}; use vortex_schema::DType; @@ -28,10 +29,11 @@ use crate::array::constant::ConstantArray; use crate::array::primitive::PrimitiveArray; use crate::array::struct_::StructArray; use crate::array::varbin::VarBinArray; +use crate::array::varbinview::VarBinViewArray; use crate::array::IntoArray; use crate::array::{Array, ArrayRef}; use crate::datetime::{LocalDateTime, LocalDateTimeArray}; -use crate::ptype::PType; +use crate::ptype::{NativePType, PType}; use crate::scalar::NullScalar; use crate::stats::Stat; @@ -51,6 +53,12 @@ impl IntoArray for NullBuffer { } } +impl IntoArray for ScalarBuffer { + fn into_array(self) -> ArrayRef { + PrimitiveArray::new(T::PTYPE, self.into_inner(), None).into_array() + } +} + impl IntoArray for OffsetBuffer { fn into_array(self) -> ArrayRef { let ptype = if O::IS_LARGE { PType::I64 } else { PType::I32 }; @@ -112,6 +120,28 @@ impl FromArrowArray<&GenericByteArray> for ArrayRef { } } +impl FromArrowArray<&GenericByteViewArray> for ArrayRef { + fn from_arrow(value: &GenericByteViewArray, nullable: bool) -> Self { + let dtype = match T::DATA_TYPE { + DataType::BinaryView => DType::Binary(nullable.into()), + DataType::Utf8View => DType::Utf8(nullable.into()), + _ => panic!("Invalid data type for ByteViewArray"), + }; + + VarBinViewArray::new( + value.views().inner().clone().into_array(), + value + .data_buffers() + .iter() + .map(|b| b.clone().into_array()) + .collect::>(), + dtype, + nulls(value.nulls(), nullable, value.len()), + ) + .into_array() + } +} + impl FromArrowArray<&ArrowBooleanArray> for ArrayRef { fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self { BoolArray::new( @@ -189,6 +219,14 @@ impl FromArrowArray for ArrayRef { DataType::LargeUtf8 => ArrayRef::from_arrow(array.as_string::(), nullable), DataType::Binary => ArrayRef::from_arrow(array.as_binary::(), nullable), DataType::LargeBinary => ArrayRef::from_arrow(array.as_binary::(), nullable), + DataType::BinaryView => ArrayRef::from_arrow( + array.as_any().downcast_ref::().unwrap(), + nullable, + ), + DataType::Utf8View => ArrayRef::from_arrow( + array.as_any().downcast_ref::().unwrap(), + nullable, + ), DataType::Struct(_) => ArrayRef::from_arrow(array.as_struct(), nullable), DataType::Null => ArrayRef::from_arrow(as_null_array(&array), nullable), DataType::Timestamp(u, _) => match u { diff --git a/vortex-array/src/ptype.rs b/vortex-array/src/ptype.rs index b13dd680a2..2b0d8e0eec 100644 --- a/vortex-array/src/ptype.rs +++ b/vortex-array/src/ptype.rs @@ -159,34 +159,19 @@ impl TryFrom<&DType> for PType { fn try_from(value: &DType) -> VortexResult { use vortex_schema::Signedness::*; match value { - DType::Int(w, s, _) => match w { - IntWidth::Unknown => match s { - Unknown => Ok(PType::I64), - Unsigned => Ok(PType::U64), - Signed => Ok(PType::I64), - }, - IntWidth::_8 => match s { - Unknown => Ok(PType::I8), - Unsigned => Ok(PType::U8), - Signed => Ok(PType::I8), - }, - IntWidth::_16 => match s { - Unknown => Ok(PType::I16), - Unsigned => Ok(PType::U16), - Signed => Ok(PType::I16), - }, - IntWidth::_32 => match s { - Unknown => Ok(PType::I32), - Unsigned => Ok(PType::U32), - Signed => Ok(PType::I32), - }, - IntWidth::_64 => match s { - Unknown => Ok(PType::I64), - Unsigned => Ok(PType::U64), - Signed => Ok(PType::I64), - }, + Int(w, s, _) => match (w, s) { + (IntWidth::Unknown, Unknown | Signed) => Ok(PType::I64), + (IntWidth::_8, Unknown | Signed) => Ok(PType::I8), + (IntWidth::_16, Unknown | Signed) => Ok(PType::I16), + (IntWidth::_32, Unknown | Signed) => Ok(PType::I32), + (IntWidth::_64, Unknown | Signed) => Ok(PType::I64), + (IntWidth::Unknown, Unsigned) => Ok(PType::U64), + (IntWidth::_8, Unsigned) => Ok(PType::U8), + (IntWidth::_16, Unsigned) => Ok(PType::U16), + (IntWidth::_32, Unsigned) => Ok(PType::U32), + (IntWidth::_64, Unsigned) => Ok(PType::U64), }, - DType::Float(f, _) => match f { + Float(f, _) => match f { FloatWidth::Unknown => Ok(PType::F64), FloatWidth::_16 => Ok(PType::F16), FloatWidth::_32 => Ok(PType::F32), diff --git a/vortex-datetime/Cargo.toml b/vortex-datetime/Cargo.toml index 3d048a24e5..0ed534f11a 100644 --- a/vortex-datetime/Cargo.toml +++ b/vortex-datetime/Cargo.toml @@ -9,5 +9,5 @@ workspace = true [dependencies] vortex-array = { "path" = "../vortex-array" } vortex-schema = { "path" = "../vortex-schema" } -linkme = "0.3.22" -log = "0.4.20" +linkme = { workspace = true } +log = { workspace = true } diff --git a/vortex-dict/Cargo.toml b/vortex-dict/Cargo.toml index 2cd21ffc33..b1e0fed6e1 100644 --- a/vortex-dict/Cargo.toml +++ b/vortex-dict/Cargo.toml @@ -12,23 +12,23 @@ edition = { workspace = true } rust-version = { workspace = true } [dependencies] -ahash = "0.8.7" +ahash = { workspace = true } +half = { workspace = true } +hashbrown = { workspace = true } +linkme = { workspace = true } +log = { workspace = true } +num-traits = { workspace = true } vortex-array = { path = "../vortex-array" } vortex-schema = { path = "../vortex-schema" } -half = { version = "2.3.1", features = ["std", "num-traits"] } -hashbrown = "0.14.3" -linkme = "0.3.22" -log = "0.4.20" -num-traits = "0.2.17" [lints] workspace = true [dev-dependencies] -criterion = { version = "0.5.1", features = ["html_reports"] } -log = "0.4.20" -rand = "0.8.5" -simplelog = { version = "0.12.1", features = ["paris"] } +criterion = { workspace = true } +log = { workspace = true } +rand = { workspace = true } +simplelog = { workspace = true } [[bench]] name = "dict_compress" diff --git a/vortex-fastlanes/Cargo.toml b/vortex-fastlanes/Cargo.toml index 3007e588e7..81382fdf34 100644 --- a/vortex-fastlanes/Cargo.toml +++ b/vortex-fastlanes/Cargo.toml @@ -15,10 +15,14 @@ rust-version = { workspace = true } workspace = true [dependencies] -arrayref = "0.3.7" -fastlanez-sys = { path = "../fastlanez-sys" } -itertools = "0.12.1" -linkme = "0.3.22" -num-traits = "0.2.18" -vortex-array = { path = "../vortex-array" } +arrayref = { workspace = true } vortex-schema = { path = "../vortex-schema" } +vortex-array = { path = "../vortex-array" } +linkme = { workspace = true } +itertools = { workspace = true } +num-traits = { workspace = true } +fastlanez-sys = { path = "../fastlanez-sys" } +log = { workspace = true } + +[dev-dependencies] +simplelog = { workspace = true } diff --git a/vortex-ree/Cargo.toml b/vortex-ree/Cargo.toml index 966c9d38b2..011ff2df5f 100644 --- a/vortex-ree/Cargo.toml +++ b/vortex-ree/Cargo.toml @@ -14,12 +14,12 @@ rust-version = { workspace = true } [dependencies] vortex-array = { path = "../vortex-array" } vortex-schema = { path = "../vortex-schema" } -arrow-array = "50.0.0" -arrow-buffer = "50.0.0" -linkme = "0.3.22" -half = "2.3.1" -num-traits = "0.2.17" -itertools = "0.12.1" +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +linkme = { workspace = true } +half = { workspace = true } +num-traits = { workspace = true } +itertools = { workspace = true } [lints] workspace = true diff --git a/vortex-roaring/Cargo.toml b/vortex-roaring/Cargo.toml index 67a36cdf8f..57306c234b 100644 --- a/vortex-roaring/Cargo.toml +++ b/vortex-roaring/Cargo.toml @@ -14,10 +14,10 @@ rust-version = { workspace = true } [dependencies] vortex-array = { path = "../vortex-array" } vortex-schema = { path = "../vortex-schema" } -linkme = "0.3.22" -croaring = "1.0.1" -num-traits = "0.2.17" -log = "0.4.20" +linkme = { workspace = true } +croaring = {workspace = true} +num-traits = { workspace = true } +log = { workspace = true } [lints] workspace = true diff --git a/vortex-schema/Cargo.toml b/vortex-schema/Cargo.toml index 5387537be5..8766018277 100644 --- a/vortex-schema/Cargo.toml +++ b/vortex-schema/Cargo.toml @@ -16,14 +16,14 @@ name = "vortex_schema" path = "src/lib.rs" [dependencies] -arrow-schema = "50.0.0" -itertools = "0.12.1" -thiserror = "1.0.58" -flatbuffers = "23.5.26" +arrow-schema = { workspace = true } +flatbuffers = { workspace = true } +itertools = { workspace = true } +thiserror = { workspace = true } [build-dependencies] -flatc = "0.2.2" -walkdir = "2.4.0" +flatc = { workspace = true } +walkdir = { workspace = true } [lints] workspace = true diff --git a/vortex-schema/src/dtype.rs b/vortex-schema/src/dtype.rs index 6f7de6c8d8..43e65a7ccd 100644 --- a/vortex-schema/src/dtype.rs +++ b/vortex-schema/src/dtype.rs @@ -69,8 +69,8 @@ pub enum IntWidth { _64, } -impl From for IntWidth { - fn from(item: i8) -> Self { +impl From for IntWidth { + fn from(item: u16) -> Self { match item { 8 => IntWidth::_8, 16 => IntWidth::_16, diff --git a/vortex-zigzag/Cargo.toml b/vortex-zigzag/Cargo.toml index 6ced0fa47a..70f57ed091 100644 --- a/vortex-zigzag/Cargo.toml +++ b/vortex-zigzag/Cargo.toml @@ -12,11 +12,11 @@ edition = { workspace = true } rust-version = { workspace = true } [dependencies] -linkme = "0.3.22" +linkme = { workspace = true } vortex-alloc = { path = "../vortex-alloc" } vortex-array = { path = "../vortex-array" } vortex-schema = { path = "../vortex-schema" } -zigzag = "0.1.0" +zigzag = { workspace = true } [lints] workspace = true