Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better markdown benchmarks #849

Merged
merged 5 commits into from
Aug 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ redb1 = { version = "=1.0.0", package = "redb" }
redb2 = { version = "=2.0.0", package = "redb" }
serde = { version = "1.0", features = ["derive"] }
bincode = "1.3.3"
walkdir = "2.5.0"
byte-unit = "=5.0.4"

# Just benchmarking dependencies
[target.'cfg(not(target_os = "wasi"))'.dev-dependencies]
Expand Down
28 changes: 14 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,20 @@ To run all the tests and benchmarks a few extra dependencies are required:
## Benchmarks
redb has similar performance to other top embedded key-value stores such as lmdb and rocksdb

| | redb | lmdb | rocksdb | sled | sanakirja |
|---------------------------|--------|--------|---------|--------|-----------|
| bulk load | 2792ms | 1115ms | 5610ms | 5005ms | 1161ms |
| individual writes | 462ms | 1119ms | 1097ms | 957ms | 662ms |
| batch writes | 2568ms | 2247ms | 1344ms | 1622ms | 2713ms |
| random reads | 988ms | 558ms | 3469ms | 1509ms | 678ms |
| random reads | 962ms | 556ms | 3377ms | 1425ms | 671ms |
| random range reads | 2534ms | 985ms | 6058ms | 4670ms | 1089ms |
| random range reads | 2493ms | 998ms | 5801ms | 4665ms | 1119ms |
| random reads (4 threads) | 344ms | 141ms | 1247ms | 424ms | 266ms |
| random reads (8 threads) | 192ms | 72ms | 673ms | 230ms | 620ms |
| random reads (16 threads) | 131ms | 47ms | 476ms | 148ms | 3500ms |
| random reads (32 threads) | 118ms | 44ms | 412ms | 129ms | 4313ms |
| removals | 2184ms | 784ms | 2451ms | 2047ms | 1344ms |
| | redb | lmdb | rocksdb | sled | sanakirja |
|---------------------------|------------|------------|-------------|--------|-----------|
| bulk load | 2792ms | **1115ms** | 5610ms | 5005ms | 1161ms |
| individual writes | **462ms** | 1119ms | 1097ms | 957ms | 662ms |
| batch writes | 2568ms | 2247ms | **1344ms** | 1622ms | 2713ms |
| random reads | 988ms | **558ms** | 3469ms | 1509ms | 678ms |
| random reads | 962ms | **556ms** | 3377ms | 1425ms | 671ms |
| random range reads | 2534ms | **985ms** | 6058ms | 4670ms | 1089ms |
| random range reads | 2493ms | **998ms** | 5801ms | 4665ms | 1119ms |
| random reads (4 threads) | 344ms | **141ms** | 1247ms | 424ms | 266ms |
| random reads (8 threads) | 192ms | **72ms** | 673ms | 230ms | 620ms |
| random reads (16 threads) | 131ms | **47ms** | 476ms | 148ms | 3500ms |
| random reads (32 threads) | 118ms | **44ms** | 412ms | 129ms | 4313ms |
| removals | 2184ms | **784ms** | 2451ms | 2047ms | 1344ms |

Source code for benchmark [here](./benches/lmdb_benchmark.rs). Results collected on a Ryzen 5900X with Samsung 980 PRO NVMe.

Expand Down
127 changes: 109 additions & 18 deletions benches/lmdb_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::env::current_dir;
use std::mem::size_of;
use std::path::Path;
use std::sync::Arc;
use std::{fs, process, thread};
use tempfile::{NamedTempFile, TempDir};
Expand Down Expand Up @@ -70,7 +71,7 @@ fn make_rng_shards(shards: usize, elements: usize) -> Vec<fastrand::Rng> {
rngs
}

fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, ResultType)> {
let mut rng = make_rng();
let mut results = Vec::new();
let db = Arc::new(db);
Expand All @@ -95,7 +96,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
ELEMENTS,
duration.as_millis()
);
results.push(("bulk load".to_string(), duration));
results.push(("bulk load".to_string(), ResultType::Duration(duration)));

let start = Instant::now();
let writes = 100;
Expand All @@ -118,7 +119,10 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
writes,
duration.as_millis()
);
results.push(("individual writes".to_string(), duration));
results.push((
"individual writes".to_string(),
ResultType::Duration(duration),
));

let start = Instant::now();
let batch_size = 1000;
Expand All @@ -144,7 +148,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
batch_size,
duration.as_millis()
);
results.push(("batch writes".to_string(), duration));
results.push(("batch writes".to_string(), ResultType::Duration(duration)));

let txn = db.read_transaction();
{
Expand All @@ -155,7 +159,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
let end = Instant::now();
let duration = end - start;
println!("{}: len() in {}ms", T::db_type_name(), duration.as_millis());
results.push(("len()".to_string(), duration));
results.push(("len()".to_string(), ResultType::Duration(duration)));
}

for _ in 0..ITERATIONS {
Expand All @@ -179,7 +183,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
ELEMENTS,
duration.as_millis()
);
results.push(("random reads".to_string(), duration));
results.push(("random reads".to_string(), ResultType::Duration(duration)));
}

for _ in 0..ITERATIONS {
Expand Down Expand Up @@ -208,7 +212,10 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
ELEMENTS * num_scan,
duration.as_millis()
);
results.push(("random range reads".to_string(), duration));
results.push((
"random range reads".to_string(),
ResultType::Duration(duration),
));
}
}
drop(txn);
Expand Down Expand Up @@ -246,7 +253,10 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
ELEMENTS,
duration.as_millis()
);
results.push((format!("random reads ({num_threads} threads)"), duration));
results.push((
format!("random reads ({num_threads} threads)"),
ResultType::Duration(duration),
));
}

let start = Instant::now();
Expand All @@ -271,11 +281,40 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
deletes,
duration.as_millis()
);
results.push(("removals".to_string(), duration));
results.push(("removals".to_string(), ResultType::Duration(duration)));

results
}

fn database_size(path: &Path) -> u64 {
let mut size = 0u64;
for result in walkdir::WalkDir::new(path) {
let entry = result.unwrap();
size += entry.metadata().unwrap().len();
}
size
}

#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
enum ResultType {
Duration(Duration),
SizeInBytes(u64),
}

impl std::fmt::Display for ResultType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use byte_unit::{Byte, UnitType};

match self {
ResultType::Duration(d) => write!(f, "{d:.2?}"),
ResultType::SizeInBytes(s) => {
let b = Byte::from_u64(*s).get_appropriate_unit(UnitType::Binary);
write!(f, "{b:.2}")
}
}
}
}

fn main() {
let tmpdir = current_dir().unwrap().join(".benchmark");
fs::create_dir(&tmpdir).unwrap();
Expand All @@ -294,7 +333,13 @@ fn main() {
.create(tmpfile.path())
.unwrap();
let table = RedbBenchDatabase::new(&db);
benchmark(table)
let mut results = benchmark(table);
let size = database_size(tmpfile.path());
results.push((
"size after bench".to_string(),
ResultType::SizeInBytes(size),
));
results
};

let lmdb_results = {
Expand All @@ -306,7 +351,13 @@ fn main() {
.unwrap()
};
let table = HeedBenchDatabase::new(&env);
benchmark(table)
let mut results = benchmark(table);
let size = database_size(tmpfile.path());
results.push((
"size after bench".to_string(),
ResultType::SizeInBytes(size),
));
results
};

let rocksdb_results = {
Expand All @@ -321,22 +372,40 @@ fn main() {

let db = rocksdb::TransactionDB::open(&opts, &Default::default(), tmpfile.path()).unwrap();
let table = RocksdbBenchDatabase::new(&db);
benchmark(table)
let mut results = benchmark(table);
let size = database_size(tmpfile.path());
results.push((
"size after bench".to_string(),
ResultType::SizeInBytes(size),
));
results
};

let sled_results = {
let tmpfile: TempDir = tempfile::tempdir_in(&tmpdir).unwrap();
let db = sled::Config::new().path(tmpfile.path()).open().unwrap();
let table = SledBenchDatabase::new(&db, tmpfile.path());
benchmark(table)
let mut results = benchmark(table);
let size = database_size(tmpfile.path());
results.push((
"size after bench".to_string(),
ResultType::SizeInBytes(size),
));
results
};

let sanakirja_results = {
let tmpfile: NamedTempFile = NamedTempFile::new_in(&tmpdir).unwrap();
fs::remove_file(tmpfile.path()).unwrap();
let db = sanakirja::Env::new(tmpfile.path(), 4096 * 1024 * 1024, 2).unwrap();
let table = SanakirjaBenchDatabase::new(&db);
benchmark(table)
let mut results = benchmark(table);
let size = database_size(tmpfile.path());
results.push((
"size after bench".to_string(),
ResultType::SizeInBytes(size),
));
results
};

fs::remove_dir_all(&tmpdir).unwrap();
Expand All @@ -347,19 +416,41 @@ fn main() {
rows.push(vec![benchmark.to_string()]);
}

for results in [
let results = [
redb_latency_results,
lmdb_results,
rocksdb_results,
sled_results,
sanakirja_results,
] {
for (i, (_benchmark, duration)) in results.iter().enumerate() {
rows[i].push(format!("{}ms", duration.as_millis()));
];

let mut identified_smallests = vec![vec![false; results.len()]; rows.len()];
for (i, identified_smallests_row) in identified_smallests.iter_mut().enumerate() {
let mut smallest = None;
for (j, _) in identified_smallests_row.iter().enumerate() {
let (_, rt) = &results[j][i];
smallest = match smallest {
Some((_, prev)) if rt < prev => Some((j, rt)),
Some((pi, prev)) => Some((pi, prev)),
None => Some((j, rt)),
};
}
let (j, _rt) = smallest.unwrap();
identified_smallests_row[j] = true;
}

for (j, results) in results.iter().enumerate() {
for (i, (_benchmark, result_type)) in results.iter().enumerate() {
rows[i].push(if identified_smallests[i][j] {
format!("**{result_type}**")
} else {
result_type.to_string()
});
}
}

let mut table = comfy_table::Table::new();
table.load_preset(comfy_table::presets::ASCII_MARKDOWN);
table.set_width(100);
table.set_header(["", "redb", "lmdb", "rocksdb", "sled", "sanakirja"]);
for row in rows {
Expand Down
Loading