diff --git a/.github/workflows/build_test_linux.yml b/.github/workflows/build_test_linux.yml index 6c5df932..31f0b306 100644 --- a/.github/workflows/build_test_linux.yml +++ b/.github/workflows/build_test_linux.yml @@ -10,6 +10,30 @@ env: RUST_BACKTRACE: full jobs: + build_bench: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest] + toolchain: [stable] + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.toolchain }} + override: true + - uses: extractions/setup-just@v1 + - uses: hustcer/setup-nu@v3.8 + with: + version: '0.85' + env: + GITHUB_TOKEN: ${{ secrets.PAT_GLOBAL }} + - name: Just version + run: just --version + - name: Build + run: just bench_build build_test: runs-on: ${{ matrix.os }} strategy: diff --git a/Cargo.toml b/Cargo.toml index abd10498..4fbb62f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,8 @@ criterion = { version = "0.5.1" } doc-comment = "0.3.3" uuid = { version = "1", features = ["serde", "v4"] } chrono = { version = "0.4", features = ["serde"] } - +rand = "0.8" +once_cell = "1.19" [features] default = [] diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 00000000..eac7df41 --- /dev/null +++ b/benches/README.md @@ -0,0 +1,24 @@ +# Benchmarks + +Highlight the Native DB overhead compared to direct access to the underlying database ([redb](https://github.com/cberner/redb)). + +The benchmarks ignore: +- [`native_model`](https://github.com/vincent-herlemont/native_model) overhead. +- Serialization overhead used by `native_model` like `bincode`,`postcard` etc. +- The fact that `redb` can copy the data using zero-copy. + +## Run benchmarks + +If you want to run the benchmarks, you need to install [just](https://github.com/casey/just), [nushell](https://www.nushell.sh/) and run: +```bash +just bench +``` + +## Results + +We can see that the overhead is very low. These result are obtained with the version `0.5.3` of `native_db`. + +![](./results/insert_random.png) +![](./results/get_random.png) +![](./results/remove_random.png) +![](./results/scan_random.png) \ No newline at end of file diff --git a/benches/overhead_data_size.rs b/benches/overhead_data_size.rs index 03c27655..31f33766 100644 --- a/benches/overhead_data_size.rs +++ b/benches/overhead_data_size.rs @@ -1,8 +1,48 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use native_db::*; use native_model::{native_model, Model}; -use redb::TableDefinition; +use redb::{ReadableTable, TableDefinition}; use serde::{Deserialize, Serialize}; +use once_cell::sync::Lazy; +use rand::prelude::SliceRandom; + +// 1 byte * 10000, 10 bytes * 10000, 100 bytes * 5000, 1KB * 1000, 1MB * 100, 10MB * 10 +const ITERATIONS:&'static [(usize, usize)] = &[(1, 10000), (10, 10000), (100, 5000), (1024, 1000), (1024 * 1024, 100), (10 * 1024 * 1024, 10)]; + +static DATABASE_BUILDER: Lazy = Lazy::new(|| { + let mut builder = DatabaseBuilder::new(); + builder.define::().unwrap(); + builder +}); + +fn init_database() -> (redb::Database, Database<'static>) { + let redb_backend = redb::backends::InMemoryBackend::new(); + let redb_db = redb::Database::builder() + .create_with_backend(redb_backend) + .unwrap(); + + let native_db = DATABASE_BUILDER.create_in_memory().unwrap(); + (redb_db, native_db) +} + +fn generate_random_data(redb_db: &redb::Database, native_db: &Database,nb_bytes: &usize, nb_items: &usize) -> Vec { + let data = Data { + x: 1, + data: vec![1u8; *nb_bytes], + }; + + let mut out = vec![]; + + for _ in 0..*nb_items { + let mut data = data.clone(); + data.random_x(); + use_redb_insert(&redb_db, data.clone()); + use_native_db_insert(&native_db, data.clone()); + out.push(data); + } + + out +} #[derive(Serialize, Deserialize, Clone)] #[native_model(id = 1, version = 1)] @@ -13,51 +53,240 @@ struct Data { data: Vec, } +impl Data { + fn random_x(&mut self) { + self.x = rand::random(); + } +} + const TABLE_REDB: TableDefinition = TableDefinition::new("my_data"); -fn use_redb(db: &redb::Database, data: Data) { + +fn use_redb_insert(db: &redb::Database, data: Data) { let rw = db.begin_write().unwrap(); { let mut table = rw.open_table(TABLE_REDB).unwrap(); + // Because native_db use native_model to encode data, we do the same here + // to remove the overhead of the encoding. let encode = native_model::encode(&data).unwrap(); table.insert(data.x, encode.as_slice()).unwrap(); } rw.commit().unwrap(); } -fn use_native_db(db: &native_db::Database, data: Data) { +fn use_redb_get(db: &redb::Database, x: u32) -> Data { + let ro = db.begin_read().unwrap(); + let out; + { + let table = ro.open_table(TABLE_REDB).unwrap(); + out = table.get(x).unwrap().map(|v| { + native_model::decode(v.value().to_vec()).unwrap().0 + }).expect("Data not found"); + } + out +} + +fn use_redb_scan(db: &redb::Database) -> Vec { + let ro = db.begin_read().unwrap(); + let out; + { + let table = ro.open_table(TABLE_REDB).unwrap(); + out = table.iter().unwrap().map(|r| { + let (_, v) = r.unwrap(); + native_model::decode(v.value().to_vec()).unwrap().0 + }).collect::>(); + } + out +} + +fn redb_remove(db: &redb::Database, x: u32) { + let rw = db.begin_write().unwrap(); + { + let mut table = rw.open_table(TABLE_REDB).unwrap(); + table.remove(x).unwrap().expect("Data not found"); + } + rw.commit().unwrap(); +} + +fn use_native_db_insert(db: &Database, data: Data) { let rw = db.rw_transaction().unwrap(); rw.insert(data).unwrap(); rw.commit().unwrap(); } -fn criterion_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("insert"); +fn use_native_db_scan(db: &Database) -> Vec { + let r = db.r_transaction().unwrap(); + let out = r.scan().primary().unwrap().all().collect::>(); + out +} + +fn use_native_db_get(db: &Database, x: u32) -> Data { + let r = db.r_transaction().unwrap(); + let out = r.get().primary(x).unwrap().unwrap(); + out +} + +fn native_db_remove(db: &Database, data: Data) { + let rw = db.rw_transaction().unwrap(); + rw.remove(data).unwrap(); + rw.commit().unwrap(); +} + +// Benchmarks + +fn bench_get_random(c: &mut Criterion) { + let mut group = c.benchmark_group("get_random"); + let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic); + group.plot_config(plot_config.clone()); + group.sampling_mode(criterion::SamplingMode::Flat); + + for (nb_bytes,nb_items) in ITERATIONS { + group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64)); + + let (redb_db, native_db) = init_database(); + let data = generate_random_data(&redb_db, &native_db, nb_bytes, nb_items); + + group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| { + b.iter_batched( + || { + let item = data.choose(&mut rand::thread_rng()).unwrap(); + item.x + }, + |x| use_redb_get(&redb_db, x), + criterion::BatchSize::SmallInput + ); + }); + group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| { + b.iter_batched( + || { + let item = data.choose(&mut rand::thread_rng()).unwrap(); + item.x + }, + |x| use_native_db_get(&native_db, x), + criterion::BatchSize::SmallInput + ); + }); + } +} + - // 1 byte, 1KB, 1MB, 10MB, 100MB - for nb_bytes in [1, 1024, 1024 * 1024, 10 * 1024 * 1024, 100 * 1024 * 1024] { - group.throughput(criterion::Throughput::Bytes(nb_bytes as u64)); +fn bench_scan_random(c: &mut Criterion) { + let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic); + let mut group = c.benchmark_group("scan_random"); + group.plot_config(plot_config.clone()); + group.sampling_mode(criterion::SamplingMode::Flat); + + for (nb_bytes,nb_items) in ITERATIONS { + group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64)); + + let (redb_db, native_db) = init_database(); + generate_random_data(&redb_db, &native_db, nb_bytes, nb_items); + + group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| { + b.iter_with_large_drop(|| use_redb_scan(&redb_db)); + }); + + group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| { + b.iter_with_large_drop(|| use_native_db_scan(&native_db)); + }); + } +} + + +fn bench_remove_random(c: &mut Criterion) { + let mut group = c.benchmark_group("remove_random"); + let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic); + group.plot_config(plot_config.clone()); + group.sampling_mode(criterion::SamplingMode::Flat); + + for (nb_bytes,nb_items) in ITERATIONS { + group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64)); + + let (redb_db, native_db) = init_database(); + + group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| { + b.iter_batched( + || { + let mut data = Data { + x: 1, + data: vec![1u8; *nb_bytes as usize], + }; + data.random_x(); + use_redb_insert(&redb_db, data.clone()); + data + }, + |data| redb_remove(&redb_db, data.x), + criterion::BatchSize::SmallInput + ); + }); + + group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| { + b.iter_batched( + || { + let mut data = Data { + x: 1, + data: vec![1u8; *nb_bytes as usize], + }; + data.random_x(); + use_native_db_insert(&native_db, data.clone()); + data + }, + |data| native_db_remove(&native_db, data), + criterion::BatchSize::SmallInput + ); + }); + } +} + + +fn bench_insert_random(c: &mut Criterion) { + let mut insert_random_group = c.benchmark_group("insert_random"); + let plot_config = criterion::PlotConfiguration::default().summary_scale(criterion::AxisScale::Logarithmic); + insert_random_group.plot_config(plot_config.clone()); + insert_random_group.sampling_mode(criterion::SamplingMode::Flat); + + // 1 byte, 10 bytes, 100 bytes, 1KB, 1MB, 10MB + for (nb_bytes,_) in ITERATIONS { + insert_random_group.throughput(criterion::Throughput::Bytes(*nb_bytes as u64)); let data = Data { x: 1, - data: vec![1u8; nb_bytes as usize], + data: vec![1u8; *nb_bytes as usize], }; - let redb_backend = redb::backends::InMemoryBackend::new(); - let redb_db = redb::Database::builder() - .create_with_backend(redb_backend) - .unwrap(); + let (redb_db, native_db) = init_database(); - group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| { - b.iter(|| use_redb(&redb_db, data.clone())) + let batch_size = match nb_bytes { + nb_bytes if *nb_bytes < 1024 => criterion::BatchSize::SmallInput, + nb_bytes if *nb_bytes < 1024 * 1024 => criterion::BatchSize::LargeInput, + _ => criterion::BatchSize::PerIteration, + }; + + insert_random_group.bench_function(BenchmarkId::new("redb", nb_bytes), |b| { + b.iter_batched( + || { + let mut data = data.clone(); + data.random_x(); + data + }, + |data| use_redb_insert(&redb_db, data), + batch_size + ); }); - let mut native_db = native_db::Database::create_in_memory().unwrap(); - native_db.define::().unwrap(); - group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| { - b.iter(|| use_native_db(&native_db, data.clone())) + insert_random_group.bench_function(BenchmarkId::new("native_db", nb_bytes), |b| { + b.iter_batched( + || { + let mut data = data.clone(); + data.random_x(); + data + }, + |data| use_native_db_insert(&native_db, data), + batch_size + ); }); } + insert_random_group.finish(); } -criterion_group!(benches, criterion_benchmark); +criterion_group!(benches, bench_insert_random, bench_scan_random, bench_get_random, bench_remove_random); criterion_main!(benches); diff --git a/benches/results/get_random.png b/benches/results/get_random.png new file mode 100644 index 00000000..d0e623b7 Binary files /dev/null and b/benches/results/get_random.png differ diff --git a/benches/results/insert_random.png b/benches/results/insert_random.png new file mode 100644 index 00000000..3d2fe6c6 Binary files /dev/null and b/benches/results/insert_random.png differ diff --git a/benches/results/remove_random.png b/benches/results/remove_random.png new file mode 100644 index 00000000..419a3fa3 Binary files /dev/null and b/benches/results/remove_random.png differ diff --git a/benches/results/scan_random.png b/benches/results/scan_random.png new file mode 100644 index 00000000..9d2d58a0 Binary files /dev/null and b/benches/results/scan_random.png differ diff --git a/justfile b/justfile index deee47dd..5bc9a2ec 100644 --- a/justfile +++ b/justfile @@ -26,6 +26,13 @@ test_with_optional: test_all: test_no_default test_default test_with_optional +bench_build: + cargo bench --no-run + +bench: + CRITERION_DEBUG=1 cargo bench; \ + start ./target/criterion/report/index.html + expand test_file_name: rm -f {{test_file_name}}.expanded.rs; \ cargo expand --test {{test_file_name}} | save --raw {{test_file_name}}.expanded.rs \ No newline at end of file