diff --git a/Cargo.lock b/Cargo.lock index fb91a16698..84a0c25d69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -433,13 +433,16 @@ dependencies = [ "flexbuffers", "futures", "humansize", + "indicatif", "itertools 0.13.0", "lazy_static", "log", "mimalloc", "object_store", "parquet", + "prettytable-rs", "rand", + "rayon", "reqwest", "serde", "simplelog", @@ -735,6 +738,19 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode 0.3.6", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.52.0", +] + [[package]] name = "const-random" version = "0.1.18" @@ -1249,6 +1265,27 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "divan" version = "0.1.14" @@ -1286,6 +1323,18 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -1787,6 +1836,19 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + [[package]] name = "indoc" version = "2.0.5" @@ -1981,6 +2043,16 @@ dependencies = [ "libc", ] +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.5.0", + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -2307,6 +2379,12 @@ dependencies = [ "libc", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" version = "0.36.0" @@ -2639,6 +2717,20 @@ dependencies = [ "syn 2.0.68", ] +[[package]] +name = "prettytable-rs" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a" +dependencies = [ + "csv", + "encode_unicode 1.0.0", + "is-terminal", + "lazy_static", + "term", + "unicode-width", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -2684,8 +2776,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bb182580f71dd070f88d01ce3de9f4da5021db7115d2e1c3605a754153b77c1" dependencies = [ "bytes", - "heck 0.4.1", - "itertools 0.12.1", + "heck 0.5.0", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -2705,7 +2797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.68", @@ -2939,6 +3031,17 @@ dependencies = [ "bitflags 2.5.0", ] +[[package]] +name = "redox_users" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + [[package]] name = "regex" version = "1.10.5" @@ -3495,6 +3598,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -4287,6 +4401,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.8" @@ -4296,6 +4426,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index a98d39e694..6abcb7a48b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,6 +75,7 @@ getrandom = "0.2.14" half = { version = "^2", features = ["std", "num-traits"] } hashbrown = "0.14.3" humansize = "2.1.3" +indicatif = "0.17.8" itertools = "0.13.0" lazy_static = "1.4.0" leb128 = "0.2.5" @@ -85,12 +86,14 @@ num_enum = "0.7.2" parquet = "52.0.0" paste = "1.0.14" pin-project = "1.1.5" +prettytable-rs = "0.10.0" prost = "0.13.0" prost-build = "0.13.0" prost-types = "0.13.0" pyo3 = { version = "0.21.2", features = ["extension-module", "abi3-py311"] } pyo3-log = "0.11.0" rand = "0.8.5" +rayon = "1.10.0" reqwest = { version = "0.12.0", features = ["blocking"] } seq-macro = "0.3.5" serde = "1.0.197" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index 89565d3172..ea3baf45bb 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -27,13 +27,16 @@ enum-iterator = { workspace = true } flexbuffers = { workspace = true } futures = { workspace = true, features = ["executor"] } humansize = { workspace = true } +indicatif = { workspace = true } itertools = { workspace = true } lazy_static = { workspace = true } log = { workspace = true } mimalloc = { workspace = true } object_store = { workspace = true, features = ["aws"] } parquet = { workspace = true, features = [] } +prettytable-rs = { workspace = true } rand = { workspace = true } +rayon = { workspace = true } reqwest = { workspace = true } serde = { workspace = true } simplelog = { workspace = true } diff --git a/bench-vortex/benches/tpch_benchmark.rs b/bench-vortex/benches/tpch_benchmark.rs index 2f3f5d451c..d49e76504a 100644 --- a/bench-vortex/benches/tpch_benchmark.rs +++ b/bench-vortex/benches/tpch_benchmark.rs @@ -33,6 +33,10 @@ fn benchmark(c: &mut Criterion) { .unwrap(); for q in 1..=22 { + if q == 15 { + // DataFusion does not support query 15 since it has multiple SQL statements. + } + let query = bench_vortex::tpch::tpch_query(q); let mut group = c.benchmark_group(format!("tpch_q{q}")); diff --git a/bench-vortex/src/bin/tpch_benchmark.rs b/bench-vortex/src/bin/tpch_benchmark.rs index c80c65c1fb..65ce2f41de 100644 --- a/bench-vortex/src/bin/tpch_benchmark.rs +++ b/bench-vortex/src/bin/tpch_benchmark.rs @@ -1,61 +1,11 @@ -#![allow(dead_code)] -use std::path::PathBuf; use std::time::SystemTime; use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions}; use bench_vortex::tpch::{load_datasets, tpch_query, Format}; +use indicatif::ProgressBar; +use prettytable::{Cell, Row, Table}; -async fn q1_csv(base_dir: &PathBuf) -> anyhow::Result<()> { - let ctx = load_datasets(base_dir, Format::Csv).await?; - let q1 = tpch_query(1); - - println!("BEGIN: Q1(CSV)"); - - let start = SystemTime::now(); - ctx.sql(&q1).await?.show().await?; - let elapsed = start.elapsed()?.as_millis(); - println!("END CSV: {elapsed}ms"); - - Ok(()) -} - -async fn q1_arrow(base_dir: &PathBuf) -> anyhow::Result<()> { - let ctx = load_datasets(base_dir, Format::Arrow).await?; - let q1 = tpch_query(1); - - println!("BEGIN: Q1(ARROW)"); - let start = SystemTime::now(); - - ctx.sql(&q1).await?.show().await?; - let elapsed = start.elapsed()?.as_millis(); - - println!("END ARROW: {elapsed}ms"); - - Ok(()) -} - -async fn q1_vortex(base_dir: &PathBuf) -> anyhow::Result<()> { - let ctx = load_datasets( - base_dir, - Format::Vortex { - disable_pushdown: true, - }, - ) - .await?; - let q1 = tpch_query(1); - - println!("BEGIN: Q1(VORTEX)"); - let start = SystemTime::now(); - - ctx.sql(&q1).await?.show().await?; - - let elapsed = start.elapsed()?.as_millis(); - println!("END VORTEX: {elapsed}ms"); - - Ok(()) -} - -#[tokio::main(flavor = "current_thread")] +#[tokio::main(flavor = "multi_thread", worker_threads = 8)] async fn main() { // uncomment the below to enable trace logging of datafusion execution // setup_logger(LevelFilter::Trace); @@ -63,7 +13,52 @@ async fn main() { // Run TPC-H data gen. let data_dir = DBGen::new(DBGenOptions::default()).generate().unwrap(); - q1_csv(&data_dir).await.unwrap(); - q1_arrow(&data_dir).await.unwrap(); - q1_vortex(&data_dir).await.unwrap(); + let formats = [ + Format::Csv, + Format::Arrow, + Format::Vortex { + disable_pushdown: false, + }, + Format::Vortex { + disable_pushdown: true, + }, + ]; + + // Set up a results table + let mut table = Table::new(); + let mut cells = vec![Cell::new("Query")]; + cells.extend(formats.iter().map(|f| Cell::new(&format!("{:?}", f)))); + table.add_row(Row::new(cells)); + + // Setup a progress bar + let progress = ProgressBar::new(22 * formats.len() as u64); + + for i in 1..=22 { + // Skip query 15 as it is not supported by DataFusion + if i == 15 { + continue; + } + + let query = tpch_query(i); + let mut cells = Vec::with_capacity(formats.len()); + cells.push(Cell::new(&format!("Q{}", i))); + for format in formats.iter() { + let ctx = load_datasets(&data_dir, *format).await.unwrap(); + let start = SystemTime::now(); + ctx.sql(&query) + .await + .map_err(|e| println!("Failed to run {} {:?}: {}", i, format, e)) + .unwrap() + .collect() + .await + .map_err(|e| println!("Failed to collect {} {:?}: {}", i, format, e)) + .unwrap(); + let elapsed = start.elapsed().unwrap(); + progress.inc(1); + cells.push(Cell::new(&format!("{} us", elapsed.as_micros()))); + } + table.add_row(Row::new(cells)); + } + progress.clone().finish(); + table.printstd(); } diff --git a/bench-vortex/src/tpch/mod.rs b/bench-vortex/src/tpch/mod.rs index 0d5fe3d93d..2ad1ba3691 100644 --- a/bench-vortex/src/tpch/mod.rs +++ b/bench-vortex/src/tpch/mod.rs @@ -14,6 +14,7 @@ use vortex_datafusion::{SessionContextExt, VortexMemTableOptions}; pub mod dbgen; pub mod schema; +#[derive(Clone, Copy, Debug)] pub enum Format { Csv, Arrow,