diff --git a/Cargo.lock b/Cargo.lock index dc4c914..584fdf6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,19 +4,13 @@ version = 3 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" dependencies = [ "gimli", ] -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "adler2" version = "2.0.0" @@ -139,9 +133,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" [[package]] name = "apache-avro" @@ -185,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +checksum = "45aef0d9cf9a039bf6cd1acc451b137aca819977b0928dece52bd92811b640ba" dependencies = [ "arrow-arith", "arrow-array", @@ -207,9 +201,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" +checksum = "03675e42d1560790f3524800e41403b40d0da1c793fe9528929fde06d8c7649a" dependencies = [ "arrow-array", "arrow-buffer", @@ -222,9 +216,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" +checksum = "cd2bf348cf9f02a5975c5962c7fa6dee107a2009a7b41ac5fb1a027e12dc033f" dependencies = [ "ahash", "arrow-buffer", @@ -239,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" +checksum = "3092e37715f168976012ce52273c3989b5793b0db5f06cbaa246be25e5f0924d" dependencies = [ "bytes", "half", @@ -250,9 +244,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" +checksum = "7ce1018bb710d502f9db06af026ed3561552e493e989a79d0d0f5d9cf267a785" dependencies = [ "arrow-array", "arrow-buffer", @@ -271,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" +checksum = "fd178575f45624d045e4ebee714e246a05d9652e41363ee3f57ec18cca97f740" dependencies = [ "arrow-array", "arrow-buffer", @@ -290,9 +284,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" +checksum = "4e4ac0c4ee79150afe067dc4857154b3ee9c1cd52b5f40d59a77306d0ed18d65" dependencies = [ "arrow-buffer", "arrow-schema", @@ -302,9 +296,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" +checksum = "bb307482348a1267f91b0912e962cd53440e5de0f7fb24c5f7b10da70b38c94a" dependencies = [ "arrow-array", "arrow-buffer", @@ -317,9 +311,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" +checksum = "d24805ba326758effdd6f2cbdd482fcfab749544f21b134701add25b33f474e6" dependencies = [ "arrow-array", "arrow-buffer", @@ -337,9 +331,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" +checksum = "644046c479d80ae8ed02a7f1e1399072ea344ca6a7b0e293ab2d5d9ed924aa3b" dependencies = [ "arrow-array", "arrow-buffer", @@ -352,9 +346,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" +checksum = "a29791f8eb13b340ce35525b723f5f0df17ecb955599e11f65c2a94ab34e2efb" dependencies = [ "ahash", "arrow-array", @@ -366,18 +360,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" +checksum = "c85320a3a2facf2b2822b57aa9d6d9d55edb8aee0b6b5d3b8df158e503d10858" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "arrow-select" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" +checksum = "9cc7e6b582e23855fd1625ce46e51647aa440c20ea2e71b1d748e0839dd73cba" dependencies = [ "ahash", "arrow-array", @@ -389,9 +383,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" +checksum = "0775b6567c66e56ded19b87a954b6b1beffbdd784ef95a3a2b03f59570c1d230" dependencies = [ "arrow-array", "arrow-buffer", @@ -450,17 +444,17 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", - "miniz_oxide 0.7.4", + "miniz_oxide", "object", "rustc-demangle", + "windows-targets", ] [[package]] @@ -603,9 +597,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.15" +version = "1.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" +checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" dependencies = [ "jobserver", "libc", @@ -732,9 +726,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -849,9 +843,9 @@ checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" [[package]] name = "dashmap" -version = "6.0.1" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", @@ -864,7 +858,7 @@ dependencies = [ [[package]] name = "datafusion" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "apache-avro", @@ -890,7 +884,6 @@ dependencies = [ "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", - "datafusion-physical-expr-functions-aggregate", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-sql", @@ -900,7 +893,7 @@ dependencies = [ "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "num-traits", "num_cpus", @@ -923,7 +916,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "arrow-schema", "async-trait", @@ -936,7 +929,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "apache-avro", @@ -955,20 +948,22 @@ dependencies = [ "paste", "pyo3", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ + "log", "tokio", ] [[package]] name = "datafusion-execution" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "arrow", "chrono", @@ -988,7 +983,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "arrow", @@ -1009,7 +1004,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "arrow", "datafusion-common", @@ -1019,7 +1014,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "arrow", "arrow-buffer", @@ -1032,7 +1027,7 @@ dependencies = [ "datafusion-expr", "hashbrown", "hex", - "itertools 0.12.1", + "itertools 0.13.0", "log", "md-5", "rand", @@ -1045,7 +1040,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "arrow", @@ -1065,7 +1060,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "arrow", @@ -1078,7 +1073,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "arrow", "arrow-array", @@ -1090,7 +1085,8 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", - "itertools 0.12.1", + "datafusion-physical-expr-common", + "itertools 0.13.0", "log", "paste", "rand", @@ -1099,7 +1095,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "datafusion-common", "datafusion-expr", @@ -1110,7 +1106,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "arrow", "async-trait", @@ -1120,7 +1116,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "regex-syntax", @@ -1129,7 +1125,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "arrow", @@ -1150,7 +1146,7 @@ dependencies = [ "hashbrown", "hex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1160,7 +1156,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "arrow", @@ -1170,37 +1166,23 @@ dependencies = [ "rand", ] -[[package]] -name = "datafusion-physical-expr-functions-aggregate" -version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "rand", -] - [[package]] name = "datafusion-physical-optimizer" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-physical-expr", "datafusion-physical-plan", - "itertools 0.12.1", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-plan" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "ahash", "arrow", @@ -1218,12 +1200,11 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", - "datafusion-physical-expr-functions-aggregate", "futures", "half", "hashbrown", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1235,7 +1216,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "41.0.0" -source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#2e125836d4e75c1d07adf30d3f13e70c3afc1416" +source = "git+https://github.com/probably-nothing-labs/arrow-datafusion?branch=main#e99c259407c014e1606908320f037ede72f6689d" dependencies = [ "arrow", "arrow-array", @@ -1356,12 +1337,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - [[package]] name = "either" version = "1.13.0" @@ -1436,7 +1411,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ "crc32fast", - "miniz_oxide 0.8.0", + "miniz_oxide", ] [[package]] @@ -1560,9 +1535,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" [[package]] name = "glob" @@ -1969,15 +1944,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" -[[package]] -name = "miniz_oxide" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" -dependencies = [ - "adler", -] - [[package]] name = "miniz_oxide" version = "0.8.0" @@ -2123,9 +2089,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3" +checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" dependencies = [ "async-trait", "bytes", @@ -2188,9 +2154,9 @@ dependencies = [ [[package]] name = "parquet" -version = "52.2.0" +version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" +checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8" dependencies = [ "ahash", "arrow-array", @@ -2345,15 +2311,15 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", - "parking_lot", + "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", @@ -2363,9 +2329,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8" dependencies = [ "once_cell", "target-lexicon", @@ -2373,9 +2339,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6" dependencies = [ "libc", "pyo3-build-config", @@ -2383,9 +2349,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2395,11 +2361,11 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.21.2" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", @@ -2408,9 +2374,9 @@ dependencies = [ [[package]] name = "quad-rand" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" +checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" [[package]] name = "quote" @@ -2564,9 +2530,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.35" +version = "0.38.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a85d50532239da68e9addb745ba38ff4612a242c1c7ceea689c4bc7c2f43c36f" +checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" dependencies = [ "bitflags 2.6.0", "errno", @@ -2616,18 +2582,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", @@ -2636,9 +2602,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.127" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", "memchr", @@ -2695,24 +2661,23 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "snafu" -version = "0.7.5" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" dependencies = [ - "doc-comment", "snafu-derive", ] [[package]] name = "snafu-derive" -version = "0.7.5" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" +checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.77", ] [[package]] @@ -2927,9 +2892,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -3413,9 +3378,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 774c472..6deccca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,15 +22,15 @@ denormalized-orchestrator = { path = "crates/orchestrator" } datafusion = { git = "https://github.com/probably-nothing-labs/arrow-datafusion", branch = "main" } -arrow = { version = "52.0.0", features = ["prettyprint"] } -arrow-array = { version = "52.0.0", default-features = false, features = [ +arrow = { version = "53.0.0", features = ["prettyprint"] } +arrow-array = { version = "53.0.0", default-features = false, features = [ "chrono-tz", ] } -arrow-schema = { version = "52.0.0", default-features = false } -arrow-ipc = { version = "52.0.0", default-features = false, features = ["lz4"] } -arrow-json = { version = "52.0.0" } -arrow-string = { version = "52.0.0", default-features = false } -arrow-ord = { version = "52.0.0", default-features = false } +arrow-schema = { version = "53.0.0", default-features = false } +arrow-ipc = { version = "53.0.0", default-features = false, features = ["lz4"] } +arrow-json = { version = "53.0.0" } +arrow-string = { version = "53.0.0", default-features = false } +arrow-ord = { version = "53.0.0", default-features = false } apache-avro = { version = "0.16", default-features = false, features = [ "bzip", @@ -56,4 +56,4 @@ serde_json = "1" base64 = "0.22.1" chrono = { version = "0.4.38", default-features = false } itertools = "0.13" -pyo3 = { version = "0.21.2", features = ["experimental-async"] } +pyo3 = { version = "0.22.2", features = ["experimental-async"] } diff --git a/crates/core/src/context.rs b/crates/core/src/context.rs index e069def..dbf917b 100644 --- a/crates/core/src/context.rs +++ b/crates/core/src/context.rs @@ -24,13 +24,13 @@ impl Context { let config = SessionConfig::new() .set( "datafusion.execution.batch_size", - datafusion::common::ScalarValue::UInt64(Some(32)), + &datafusion::common::ScalarValue::UInt64(Some(32)), ) // coalesce_batches slows down the pipeline and increases latency as it tries to concat // small batches together so we disable it. .set( "datafusion.execution.coalesce_batches", - datafusion::common::ScalarValue::Boolean(Some(false)), + &datafusion::common::ScalarValue::Boolean(Some(false)), ); let runtime = Arc::new(RuntimeEnv::default()); diff --git a/crates/core/src/datasource/kafka/kafka_config.rs b/crates/core/src/datasource/kafka/kafka_config.rs index 3117812..c96a3bb 100644 --- a/crates/core/src/datasource/kafka/kafka_config.rs +++ b/crates/core/src/datasource/kafka/kafka_config.rs @@ -4,7 +4,7 @@ use std::{sync::Arc, time::Duration}; use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef, TimeUnit}; -use datafusion::logical_expr::Expr; +use datafusion::logical_expr::SortExpr; use crate::physical_plan::utils::time::TimestampUnit; use crate::utils::arrow_helpers::infer_arrow_schema_from_json_value; @@ -29,7 +29,7 @@ pub struct KafkaReadConfig { pub schema: SchemaRef, pub encoding: StreamEncoding, - pub order: Vec>, + pub order: Vec>, pub partition_count: i32, pub timestamp_column: String, pub timestamp_unit: TimestampUnit, diff --git a/crates/core/src/datasource/kafka/topic_reader.rs b/crates/core/src/datasource/kafka/topic_reader.rs index 66d72fe..9e91174 100644 --- a/crates/core/src/datasource/kafka/topic_reader.rs +++ b/crates/core/src/datasource/kafka/topic_reader.rs @@ -5,7 +5,7 @@ use arrow_schema::{Schema, SchemaRef, SortOptions}; use datafusion::catalog::Session; use datafusion::common::{not_impl_err, plan_err, Result}; use datafusion::datasource::TableProvider; -use datafusion::logical_expr::{Expr, TableType}; +use datafusion::logical_expr::{Expr, SortExpr, TableType}; use datafusion::physical_expr::{expressions, LexOrdering, PhysicalSortExpr}; use datafusion::physical_plan::ExecutionPlan; @@ -89,36 +89,33 @@ impl TableProvider for TopicReader { } } -fn create_ordering(schema: &Schema, sort_order: &[Vec]) -> Result> { +fn create_ordering(schema: &Schema, sort_order: &[Vec]) -> Result> { let mut all_sort_orders = vec![]; for exprs in sort_order { // Construct PhysicalSortExpr objects from Expr objects: let mut sort_exprs = vec![]; - for expr in exprs { - match expr { - Expr::Sort(sort) => match sort.expr.as_ref() { - Expr::Column(col) => match expressions::col(&col.name, schema) { - Ok(expr) => { - sort_exprs.push(PhysicalSortExpr { - expr, - options: SortOptions { - descending: !sort.asc, - nulls_first: sort.nulls_first, - }, - }); - } - // Cannot find expression in the projected_schema, stop iterating - // since rest of the orderings are violated - Err(_) => break, - }, - expr => { - return plan_err!( - "Expected single column references in output_ordering, got {expr}" - ) + for sort in exprs { + match &sort.expr { + Expr::Column(col) => match expressions::col(&col.name, schema) { + Ok(expr) => { + sort_exprs.push(PhysicalSortExpr { + expr, + options: SortOptions { + descending: !sort.asc, + nulls_first: sort.nulls_first, + }, + }); } + // Cannot find expression in the projected_schema, stop iterating + // since rest of the orderings are violated + Err(_) => break, }, - expr => return plan_err!("Expected Expr::Sort in output_ordering, but got {expr}"), + expr => { + return plan_err!( + "Expected single column references in output_ordering, got {expr}" + ) + } } } if !sort_exprs.is_empty() { diff --git a/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs b/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs index c0eceb3..39ba88b 100644 --- a/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs +++ b/crates/core/src/physical_plan/continuous/grouped_window_agg_stream.rs @@ -15,6 +15,7 @@ use arrow::{ use arrow_array::{ArrayRef, PrimitiveArray, RecordBatch, StructArray, TimestampMillisecondArray}; use arrow_ord::cmp; use arrow_schema::{Schema, SchemaRef}; +use datafusion::physical_expr::aggregate::AggregateFunctionExpr; use datafusion::{ common::{utils::proxy::VecAllocExt, DataFusionError, Result}, execution::memory_pool::{MemoryConsumer, MemoryReservation}, @@ -31,9 +32,9 @@ use datafusion::{ AggregateMode, }, metrics::BaselineMetrics, - AggregateExpr, }, }; + use futures::{Stream, StreamExt}; use crate::physical_plan::utils::time::RecordBatchWatermark; @@ -51,7 +52,7 @@ pub struct GroupedWindowAggStream { pub schema: SchemaRef, input: SendableRecordBatchStream, baseline_metrics: BaselineMetrics, - exec_aggregate_expressions: Vec>, + exec_aggregate_expressions: Vec>, aggregate_expressions: Vec>>, filter_expressions: Vec>>, latest_watermark: Arc>>, diff --git a/crates/core/src/physical_plan/continuous/mod.rs b/crates/core/src/physical_plan/continuous/mod.rs index 8edcf05..664aada 100644 --- a/crates/core/src/physical_plan/continuous/mod.rs +++ b/crates/core/src/physical_plan/continuous/mod.rs @@ -17,13 +17,13 @@ use datafusion::{ pub mod grouped_window_agg_stream; pub mod streaming_window; -use datafusion::physical_expr::AggregateExpr; +use datafusion::physical_expr::aggregate::AggregateFunctionExpr; use log::debug; pub(crate) type GroupsAccumulatorItem = Box; pub(crate) fn create_group_accumulator( - agg_expr: &Arc, + agg_expr: &Arc, ) -> Result> { if agg_expr.groups_accumulator_supported() { agg_expr.create_groups_accumulator() diff --git a/crates/core/src/physical_plan/continuous/streaming_window.rs b/crates/core/src/physical_plan/continuous/streaming_window.rs index 093596a..c846887 100644 --- a/crates/core/src/physical_plan/continuous/streaming_window.rs +++ b/crates/core/src/physical_plan/continuous/streaming_window.rs @@ -17,10 +17,11 @@ use arrow_ord::cmp; use arrow_schema::{Field, Schema, SchemaRef}; use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; +use datafusion::physical_expr::aggregate::AggregateFunctionExpr; use datafusion::physical_expr::{ equivalence::{collapse_lex_req, ProjectionMapping}, expressions::UnKnownColumn, - AggregateExpr, Partitioning, PhysicalExpr, PhysicalSortRequirement, + Partitioning, PhysicalExpr, PhysicalSortRequirement, }; use datafusion::physical_plan::{ aggregates::{ @@ -32,6 +33,7 @@ use datafusion::physical_plan::{ DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PlanProperties, }; + use datafusion::{ common::{internal_err, stats::Precision, DataFusionError, Statistics}, physical_plan::Distribution, @@ -193,7 +195,7 @@ pub enum PhysicalStreamingWindowType { #[derive(Debug)] pub struct StreamingWindowExec { pub(crate) input: Arc, - pub aggregate_expressions: Vec>, + pub aggregate_expressions: Vec>, pub filter_expressions: Vec>>, /// Schema after the window is run pub group_by: PhysicalGroupBy, @@ -214,7 +216,7 @@ impl StreamingWindowExec { pub fn try_new( mode: AggregateMode, group_by: PhysicalGroupBy, - aggr_expr: Vec>, + aggr_expr: Vec>, filter_expr: Vec>>, input: Arc, input_schema: SchemaRef, @@ -225,7 +227,7 @@ impl StreamingWindowExec { &input.schema(), group_by.expr(), &aggr_expr, - group_by.contains_null(), + false, //group_by.contains_null(), mode, )?; @@ -246,7 +248,7 @@ impl StreamingWindowExec { pub fn try_new_with_schema( mode: AggregateMode, group_by: PhysicalGroupBy, - mut aggr_expr: Vec>, + mut aggr_expr: Vec>, filter_expr: Vec>>, input: Arc, input_schema: SchemaRef, @@ -363,7 +365,7 @@ impl StreamingWindowExec { PlanProperties::new(eq_properties, output_partitioning, ExecutionMode::Unbounded) } /// Aggregate expressions - pub fn aggr_expr(&self) -> &[Arc] { + pub fn aggr_expr(&self) -> &[Arc] { &self.aggregate_expressions } @@ -608,7 +610,7 @@ pub struct WindowAggStream { pub schema: SchemaRef, input: SendableRecordBatchStream, baseline_metrics: BaselineMetrics, - exec_aggregate_expressions: Vec>, + exec_aggregate_expressions: Vec>, aggregate_expressions: Vec>>, filter_expressions: Vec>>, latest_watermark: Arc>>, @@ -809,7 +811,7 @@ impl FullWindowAggFrame { pub fn new( start_time: SystemTime, end_time: SystemTime, - exec_aggregate_expressions: &[Arc], + exec_aggregate_expressions: &[Arc], aggregate_expressions: Vec>>, filter_expressions: Vec>>, @@ -850,7 +852,7 @@ struct FullWindowAggStream { pub schema: SchemaRef, input: SendableRecordBatchStream, baseline_metrics: BaselineMetrics, - exec_aggregate_expressions: Vec>, + exec_aggregate_expressions: Vec>, aggregate_expressions: Vec>>, filter_expressions: Vec>>, cached_frames: BTreeMap, @@ -1056,7 +1058,7 @@ fn snap_to_window_start(timestamp: SystemTime, window_length: Duration) -> Syste fn create_schema( input_schema: &Schema, group_expr: &[(Arc, String)], - aggr_expr: &[Arc], + aggr_expr: &[Arc], contains_null_expr: bool, mode: AggregateMode, ) -> Result { @@ -1085,7 +1087,7 @@ fn create_schema( | AggregateMode::SinglePartitioned => { // in final mode, the field with the final result of the accumulator for expr in aggr_expr { - fields.push(expr.field()?) + fields.push(expr.field()) } } } diff --git a/crates/core/src/physical_plan/utils/accumulators.rs b/crates/core/src/physical_plan/utils/accumulators.rs index caf938c..87f0978 100644 --- a/crates/core/src/physical_plan/utils/accumulators.rs +++ b/crates/core/src/physical_plan/utils/accumulators.rs @@ -2,12 +2,12 @@ use std::sync::Arc; use datafusion::common::Result; use datafusion::logical_expr::Accumulator; -use datafusion::physical_expr::AggregateExpr; +use datafusion::physical_expr::aggregate::AggregateFunctionExpr; pub(crate) type AccumulatorItem = Box; pub(crate) fn create_accumulators( - aggr_expr: &[Arc], + aggr_expr: &[Arc], ) -> Result> { aggr_expr .iter() diff --git a/py-denormalized/Cargo.toml b/py-denormalized/Cargo.toml index e4773a8..24b4855 100644 --- a/py-denormalized/Cargo.toml +++ b/py-denormalized/Cargo.toml @@ -10,7 +10,7 @@ name = "denormalized_python" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.21.2", features = ["experimental-async"] } +pyo3 = { workspace = true } denormalized = { workspace = true } datafusion = { workspace = true, features = [ "pyarrow",