Skip to content

Commit

Permalink
Duckdb updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Swoorup committed Jun 15, 2024
1 parent c56e458 commit 04543bf
Show file tree
Hide file tree
Showing 29 changed files with 3,193 additions and 1,231 deletions.
9 changes: 7 additions & 2 deletions .rustfmt.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
max_width = 120
imports_granularity = "Crate"
# indent_style = "Block"
reorder_imports = true
max_width=120
fn_call_width=72
# tab_spaces = 2
# group_imports="StdExternalCrate"
# imports_granularity = "Module"
15 changes: 11 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ duckdb-loadable-macros = { version = "0.1.1", path = "crates/duckdb-loadable-mac
autocfg = "1.0"
bindgen = { version = "0.69", default-features = false }
byteorder = "1.3"
calamine = "0.22.0"
calamine = "0.24"
cast = "0.3"
cc = "1.0"
chrono = "0.4.22"
Expand All @@ -34,13 +34,14 @@ doc-comment = "0.3"
fallible-iterator = "0.3"
fallible-streaming-iterator = "0.1"
flate2 = "1.0"
hashlink = "0.8"
hashlink = "0.9"
lazy_static = "1.4"
memchr = "2.3"
num = { version = "0.4", default-features = false }
pkg-config = "0.3.24"
polars = "0.35.4"
polars-core = "0.35.4"
polars = "0.38"
polars-core = "0.38"
derive_more = "0.99"
pretty_assertions = "1.4.0"
proc-macro2 = "1.0.56"
quote = "1.0.21"
Expand All @@ -55,9 +56,15 @@ strum = "0.25"
syn = "2.0.15"
tar = "0.4.38"
tempdir = "0.3.7"
thiserror = "1.0"
tempfile = "3.1.0"
unicase = "2.6.0"
url = "2.1"
uuid = "1.0"
vcpkg = "0.2"
arrow = { version = "52", default-features = false }
rusqlite = "0.31"
arrow_convert = "0.6"
itertools = "0.13"
criterion = { version = "0.5", features = [ "html_reports"] }
include_absolute_path = "0.1"
20 changes: 17 additions & 3 deletions crates/duckdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ description = "Ergonomic wrapper for DuckDB"

[lib]
name = "duckdb"
bench = false

[features]
default = []
Expand Down Expand Up @@ -58,9 +59,12 @@ strum = { workspace = true, features = ["derive"] }
r2d2 = { workspace = true, optional = true }
calamine = { workspace = true, optional = true }
num = { workspace = true, features = ["std"], optional = true }
derive_more = { workspace = true }
duckdb-loadable-macros = { workspace = true, optional = true }
polars = { workspace = true, features = ["dtype-full"], optional = true }
num-integer = {version = "0.1.46"}
thiserror = { workspace = true }
arrow_convert = { workspace = true }
itertools = { workspace = true }

[dev-dependencies]
doc-comment = { workspace = true }
Expand All @@ -73,13 +77,14 @@ rand = { workspace = true }
tempdir = { workspace = true }
polars-core = { workspace = true }
pretty_assertions = { workspace = true }
# criterion = "0.3"
rusqlite = { workspace = true }
criterion = { workspace = true }
include_absolute_path = { workspace = true }

# [[bench]]
# name = "data_types"
# harness = false


[package.metadata.docs.rs]
features = []
all-features = false
Expand All @@ -94,3 +99,12 @@ all-features = false
name = "hello-ext"
crate-type = ["cdylib"]
required-features = ["vtab-loadable"]

[[example]]
name = "appender"
crate-type = ["cdylib"]
required-features = ["appender-arrow"]

[[bench]]
name = "issue-282"
harness = false
1 change: 1 addition & 0 deletions crates/duckdb/benches/issue-282/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
db.*
19 changes: 19 additions & 0 deletions crates/duckdb/benches/issue-282/generate-database.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

# SQLite
sqlite3 db.sqlite <<EOF
CREATE TABLE IF NOT EXISTS income (
id INTEGER,
created_at INTEGER,
amount REAL,
category_id INTEGER,
wallet_id INTEGER,
meta TEXT
);
.mode csv
.import output.csv income
EOF

# DuckDB
echo "CREATE TABLE income (id INTEGER, created_at INTEGER, amount REAL, category_id INTEGER, wallet_id INTEGER, meta TEXT);" | duckdb db.duckdb
echo "COPY income FROM 'output.csv' (HEADER);" | duckdb db.duckdb
125 changes: 125 additions & 0 deletions crates/duckdb/benches/issue-282/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//! ensure that the databases are generated using the `generate-database.sh` utility
use arrow_convert::{ArrowDeserialize, ArrowField, ArrowSerialize};
use criterion::{criterion_group, criterion_main, Criterion};
use include_absolute_path::include_absolute_path;

pub fn sqlite_db() -> rusqlite::Connection {
rusqlite::Connection::open(include_absolute_path!("./db.sqlite")).unwrap()
}

pub fn duck_db() -> duckdb::Connection {
duckdb::Connection::open(include_absolute_path!("./db.duckdb")).unwrap()
}

#[derive(Debug, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct Income {
created_at: Option<i32>,
amount: Option<f32>,
category_id: i32,
wallet_id: Option<i32>,
meta: Option<String>,
}

impl Income {
fn select_duckdb_arrow(
conn: &duckdb::Connection,
start: u32,
end: u32,
) -> Result<Vec<Self>, Box<dyn std::error::Error>> {
let sql = format!(
"SELECT created_at, amount, category_id, wallet_id, meta \
FROM 'income' \
WHERE created_at >= {} AND created_at <= {}",
start, end
);
let mut stmt = conn.prepare_cached(&sql)?;
let result = stmt.query_arrow_deserialized::<Income>([])?;
Ok(result)
}

fn select_duckdb(conn: &duckdb::Connection, start: u32, end: u32) -> Result<Vec<Self>, Box<dyn std::error::Error>> {
let mut arr = Vec::new();
let sql = format!(
"SELECT created_at, amount, category_id, wallet_id, meta \
FROM 'income' \
WHERE created_at >= {} AND created_at <= {}",
start, end
);
let mut stmt = conn.prepare_cached(&sql)?;
let result_iter = stmt.query_map([], |row| {
Ok(Self {
created_at: row.get(0)?,
amount: row.get(1)?,
category_id: row.get(2)?,
wallet_id: row.get(3)?,
meta: row.get(4)?,
})
})?;
for result in result_iter {
arr.push(result?);
}
Ok(arr)
}

fn select_sqlite(
conn: &rusqlite::Connection,
start: u32,
end: u32,
) -> Result<Vec<Self>, Box<dyn std::error::Error>> {
let mut arr = Vec::new();
let sql = format!(
"SELECT created_at, amount, category_id, wallet_id, meta \
FROM 'income' \
WHERE created_at >= {} AND created_at <= {}",
start, end
);
let mut stmt = conn.prepare(&sql)?;
let result_iter = stmt.query_map([], |row| {
Ok(Self {
created_at: row.get(0)?,
amount: row.get(1)?,
category_id: row.get(2)?,
wallet_id: row.get(3)?,
meta: row.get(4)?,
})
})?;
for result in result_iter {
arr.push(result?);
}
Ok(arr)
}
}

fn bench_sqlite(c: &mut Criterion) {
let sqlite_conn = sqlite_db();
c.bench_function("sqlite_test", |b| {
b.iter(|| {
let out = Income::select_sqlite(&sqlite_conn, 1709292049, 1711375239).unwrap();
out.len()
})
});
}

fn bench_duckdb(c: &mut Criterion) {
let duckdb_conn = duck_db();
c.bench_function("duckdb_test", |b| {
b.iter(|| {
let out = Income::select_duckdb(&duckdb_conn, 1709292049, 1711375239).unwrap();
out.len()
})
});
}

fn bench_duckdb_arrow(c: &mut Criterion) {
let duckdb_conn = duck_db();
c.bench_function("duckdb_test_arrow", |b| {
b.iter(|| {
let out = Income::select_duckdb_arrow(&duckdb_conn, 1709292049, 1711375239).unwrap();
out.len()
})
});
}

criterion_group!(benches, bench_duckdb_arrow);
// criterion_group!(benches, bench_sqlite, bench_duckdb, bench_duckdb_arrow);
criterion_main!(benches);
Loading

0 comments on commit 04543bf

Please sign in to comment.