Skip to content

Commit

Permalink
Rest of TPCH
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn committed Jul 15, 2024
1 parent 1fc4a25 commit 851a6d3
Show file tree
Hide file tree
Showing 26 changed files with 697 additions and 57 deletions.
79 changes: 52 additions & 27 deletions bench-vortex/benches/tpch_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
use bench_vortex::tpch::query::Q1;
use bench_vortex::tpch::{load_datasets, Format};
use criterion::{criterion_group, criterion_main, Criterion};
use tokio::runtime::Builder;
Expand All @@ -10,50 +9,76 @@ fn benchmark(c: &mut Criterion) {
// Run TPC-H data gen.
let data_dir = DBGen::new(DBGenOptions::default()).generate().unwrap();

let mut group = c.benchmark_group("tpch q1");
group.sample_size(10);

let ctx = runtime
let vortex_no_pushdown_ctx = runtime
.block_on(load_datasets(
&data_dir,
Format::Vortex {
disable_pushdown: false,
},
))
.unwrap();
group.bench_function("vortex-pushdown", |b| {
b.to_async(&runtime)
.iter(|| async { ctx.sql(Q1).await.unwrap().collect().await.unwrap() })
});

let ctx = runtime
let vortex_ctx = runtime
.block_on(load_datasets(
&data_dir,
Format::Vortex {
disable_pushdown: true,
},
))
.unwrap();
group.bench_function("vortex-nopushdown", |b| {
b.to_async(&runtime)
.iter(|| async { ctx.sql(Q1).await.unwrap().collect().await.unwrap() })
});

let ctx = runtime
let csv_ctx = runtime
.block_on(load_datasets(&data_dir, Format::Csv))
.unwrap();
group.bench_function("csv", |b| {
b.to_async(&runtime)
.iter(|| async { ctx.sql(Q1).await.unwrap().collect().await.unwrap() })
});

let ctx = runtime
let arrow_ctx = runtime
.block_on(load_datasets(&data_dir, Format::Arrow))
.unwrap();
group.bench_function("arrow", |b| {
b.to_async(&runtime)
.iter(|| async { ctx.sql(Q1).await.unwrap().collect().await.unwrap() })
});

for q in 1..=22 {
let query = bench_vortex::tpch::tpch_query(q);

let mut group = c.benchmark_group(format!("tpch_q{q}"));
group.sample_size(10);

group.bench_function("vortex-pushdown", |b| {
b.to_async(&runtime).iter(|| async {
vortex_ctx
.sql(&query)
.await
.unwrap()
.collect()
.await
.unwrap()
})
});

group.bench_function("vortex-nopushdown", |b| {
b.to_async(&runtime).iter(|| async {
vortex_no_pushdown_ctx
.sql(&query)
.await
.unwrap()
.collect()
.await
.unwrap()
})
});

group.bench_function("csv", |b| {
b.to_async(&runtime)
.iter(|| async { csv_ctx.sql(&query).await.unwrap().collect().await.unwrap() })
});

group.bench_function("arrow", |b| {
b.to_async(&runtime).iter(|| async {
arrow_ctx
.sql(&query)
.await
.unwrap()
.collect()
.await
.unwrap()
})
});
}
}

criterion_group!(benches, benchmark);
Expand Down
12 changes: 7 additions & 5 deletions bench-vortex/src/bin/tpch_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
use std::path::PathBuf;
use std::time::SystemTime;

use bench_vortex::tpch;
use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
use bench_vortex::tpch::{load_datasets, Format};
use bench_vortex::tpch::{load_datasets, tpch_query, Format};

async fn q1_csv(base_dir: &PathBuf) -> anyhow::Result<()> {
let ctx = load_datasets(base_dir, Format::Csv).await?;
let q1 = tpch_query(1);

println!("BEGIN: Q1(CSV)");

let start = SystemTime::now();
ctx.sql(tpch::query::Q1).await?.show().await?;
ctx.sql(&q1).await?.show().await?;
let elapsed = start.elapsed()?.as_millis();
println!("END CSV: {elapsed}ms");

Expand All @@ -21,11 +21,12 @@ async fn q1_csv(base_dir: &PathBuf) -> anyhow::Result<()> {

async fn q1_arrow(base_dir: &PathBuf) -> anyhow::Result<()> {
let ctx = load_datasets(base_dir, Format::Arrow).await?;
let q1 = tpch_query(1);

println!("BEGIN: Q1(ARROW)");
let start = SystemTime::now();

ctx.sql(tpch::query::Q1).await?.show().await?;
ctx.sql(&q1).await?.show().await?;
let elapsed = start.elapsed()?.as_millis();

println!("END ARROW: {elapsed}ms");
Expand All @@ -41,11 +42,12 @@ async fn q1_vortex(base_dir: &PathBuf) -> anyhow::Result<()> {
},
)
.await?;
let q1 = tpch_query(1);

println!("BEGIN: Q1(VORTEX)");
let start = SystemTime::now();

ctx.sql(tpch::query::Q1).await?.show().await?;
ctx.sql(&q1).await?.show().await?;

let elapsed = start.elapsed()?.as_millis();
println!("END VORTEX: {elapsed}ms");
Expand Down
9 changes: 8 additions & 1 deletion bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::fs;
use std::path::Path;
use std::sync::Arc;

Expand All @@ -11,7 +12,6 @@ use vortex::{Array, ArrayDType, ArrayData, IntoArray};
use vortex_datafusion::{SessionContextExt, VortexMemTableOptions};

pub mod dbgen;
pub mod query;
pub mod schema;

pub enum Format {
Expand Down Expand Up @@ -156,3 +156,10 @@ async fn register_vortex(

Ok(())
}

pub fn tpch_query(query_idx: usize) -> String {
let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tpch")
.join(format!("q{}.sql", query_idx));
fs::read_to_string(manifest_dir).unwrap()
}
24 changes: 0 additions & 24 deletions bench-vortex/src/tpch/query.rs

This file was deleted.

21 changes: 21 additions & 0 deletions bench-vortex/tpch/q1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-09-02'
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
31 changes: 31 additions & 0 deletions bench-vortex/tpch/q10.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
select
c_custkey,
c_name,
sum(l_extendedprice * (1 - l_discount)) as revenue,
c_acctbal,
n_name,
c_address,
c_phone,
c_comment
from
customer,
orders,
lineitem,
nation
where
c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate >= date '1993-10-01'
and o_orderdate < date '1994-01-01'
and l_returnflag = 'R'
and c_nationkey = n_nationkey
group by
c_custkey,
c_name,
c_acctbal,
c_phone,
n_name,
c_address,
c_comment
order by
revenue desc;
27 changes: 27 additions & 0 deletions bench-vortex/tpch/q11.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
select
ps_partkey,
sum(ps_supplycost * ps_availqty) as value
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by
ps_partkey having
sum(ps_supplycost * ps_availqty) > (
select
sum(ps_supplycost * ps_availqty) * 0.0001
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
)
order by
value desc;
30 changes: 30 additions & 0 deletions bench-vortex/tpch/q12.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
select
l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
or o_orderpriority = '2-HIGH'
then 1
else 0
end) as high_line_count,
sum(case
when o_orderpriority <> '1-URGENT'
and o_orderpriority <> '2-HIGH'
then 1
else 0
end) as low_line_count
from
lineitem
join
orders
on
l_orderkey = o_orderkey
where
l_shipmode in ('MAIL', 'SHIP')
and l_commitdate < l_receiptdate
and l_shipdate < l_commitdate
and l_receiptdate >= date '1994-01-01'
and l_receiptdate < date '1995-01-01'
group by
l_shipmode
order by
l_shipmode;
20 changes: 20 additions & 0 deletions bench-vortex/tpch/q13.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
select
c_count,
count(*) as custdist
from
(
select
c_custkey,
count(o_orderkey)
from
customer left outer join orders on
c_custkey = o_custkey
and o_comment not like '%special%requests%'
group by
c_custkey
) as c_orders (c_custkey, c_count)
group by
c_count
order by
custdist desc,
c_count desc;
13 changes: 13 additions & 0 deletions bench-vortex/tpch/q14.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
select
100.00 * sum(case
when p_type like 'PROMO%'
then l_extendedprice * (1 - l_discount)
else 0
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
lineitem,
part
where
l_partkey = p_partkey
and l_shipdate >= date '1995-09-01'
and l_shipdate < date '1995-10-01';
34 changes: 34 additions & 0 deletions bench-vortex/tpch/q15.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
create view revenue0 (supplier_no, total_revenue) as
select
l_suppkey,
sum(l_extendedprice * (1 - l_discount))
from
lineitem
where
l_shipdate >= date '1996-01-01'
and l_shipdate < date '1996-01-01' + interval '3' month
group by
l_suppkey;


select
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
from
supplier,
revenue0
where
s_suppkey = supplier_no
and total_revenue = (
select
max(total_revenue)
from
revenue0
)
order by
s_suppkey;

drop view revenue0;
Loading

0 comments on commit 851a6d3

Please sign in to comment.