Skip to content

Commit

Permalink
Bug 1931812 - Don't reingest the DB on each query benchmark
Browse files Browse the repository at this point in the history
This addresses the bug and also makes one other change: I added a new method to
`BenchmarkWithInput` called `global_input()` that returns some data that's then
passed by reference to the benchmark's iterations. That's a little better than
creating global data in benchmark constructors because it means only the global
data for the current benchmark is in memory instead of the global data for all
benchmarks in the group. In the case of query benchmarks, the global data is the
store, so we avoid having N stores in memory at once, N = the number of
benchmarks in the group. (I'm adding quite a few weather benchmarks.) It also
made sense to rename `generate_input()` to `iteration_input()` to emphasize that
that method is called on each iteration.

I'm open to reverting this other change though if you disagree.

The speedup between this and the main branch when running
`time cargo suggest-bench -- query/query` isn't dramatic but it's a little
better:

```
main branch: ~83s
this branch: ~68s (~18% improvement)
```
  • Loading branch information
0c0w3 committed Nov 18, 2024
1 parent 8e7aa17 commit 8cc7b4b
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 38 deletions.
5 changes: 3 additions & 2 deletions components/suggest/benches/benchmark_all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ fn run_benchmarks<B: BenchmarkWithInput, M: Measurement>(
benchmarks: Vec<(&'static str, B)>,
) {
for (name, benchmark) in benchmarks {
let g_input = benchmark.global_input();
group.bench_function(name.to_string(), |b| {
b.iter_batched(
|| benchmark.generate_input(),
|input| benchmark.benchmarked_code(input),
|| benchmark.iteration_input(),
|i_input| benchmark.benchmarked_code(&g_input, i_input),
// See https://docs.rs/criterion/latest/criterion/enum.BatchSize.html#variants for
// a discussion of this. PerIteration is chosen for these benchmarks because the
// input holds a database file handle
Expand Down
9 changes: 6 additions & 3 deletions components/suggest/src/benchmarks/ingest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,12 @@ impl IngestBenchmark {
pub struct InputType(SuggestStoreInner<RemoteSettingsBenchmarkClient>);

impl BenchmarkWithInput for IngestBenchmark {
type Input = InputType;
type GlobalInput = ();
type IterationInput = InputType;

fn generate_input(&self) -> Self::Input {
fn global_input(&self) -> Self::GlobalInput {}

fn iteration_input(&self) -> Self::IterationInput {
let data_path = self.temp_dir.path().join(unique_db_filename());
let store = SuggestStoreInner::new(data_path, vec![], self.client.clone());
store.ensure_db_initialized();
Expand All @@ -61,7 +64,7 @@ impl BenchmarkWithInput for IngestBenchmark {
InputType(store)
}

fn benchmarked_code(&self, input: Self::Input) {
fn benchmarked_code(&self, _: &Self::GlobalInput, input: Self::IterationInput) {
let InputType(store) = input;
store.ingest_records_by_type(self.record_type);
}
Expand Down
56 changes: 48 additions & 8 deletions components/suggest/src/benchmarks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,16 @@
//!
//! All benchmarks are defined as structs that implement either the [Benchmark] or [BenchmarkWithInput]
use std::sync::atomic::{AtomicU32, Ordering};
use std::{
path::PathBuf,
sync::{
atomic::{AtomicU32, Ordering},
OnceLock,
},
};
use tempfile::TempDir;

use crate::{SuggestIngestionConstraints, SuggestStore};

pub mod client;
pub mod ingest;
Expand All @@ -28,21 +37,52 @@ pub trait Benchmark {
/// Trait for benchmarks that require input
///
/// This will run using Criterion's `iter_batched` function. Criterion will create a batch of
/// inputs, then pass each one to benchmark.
/// inputs, then pass each one to the benchmark's iterations.
///
/// This supports simple benchmarks that don't require any input. Note: global setup can be done
/// in the `new()` method for the struct.
/// This supports simple benchmarks that don't require any input.
pub trait BenchmarkWithInput {
type Input;
/// Input that will be created once and then passed by reference to each
/// of the benchmark's iterations.
type GlobalInput;

/// Input that will be created for each of the benchmark's iterations.
type IterationInput;

/// Generate the input (this is not included in the benchmark time)
fn generate_input(&self) -> Self::Input;
/// Generate the global input (not included in the benchmark time)
fn global_input(&self) -> Self::GlobalInput;

/// Generate the per-iteration input (not included in the benchmark time)
fn iteration_input(&self) -> Self::IterationInput;

/// Perform the operations that we're benchmarking.
fn benchmarked_code(&self, input: Self::Input);
fn benchmarked_code(&self, g_input: &Self::GlobalInput, i_input: Self::IterationInput);
}

fn unique_db_filename() -> String {
static COUNTER: AtomicU32 = AtomicU32::new(0);
format!("db{}.sqlite", COUNTER.fetch_add(1, Ordering::Relaxed))
}

/// Creates a new store that will contain all provider data currently in remote
/// settings.
fn new_store() -> SuggestStore {
// Create a "starter" store that will do an initial ingest, and then
// initialize every returned store with a copy of its DB so that each one
// doesn't need to reingest.
static STARTER: OnceLock<(TempDir, PathBuf)> = OnceLock::new();
let (starter_dir, starter_db_path) = STARTER.get_or_init(|| {
let temp_dir = tempfile::tempdir().unwrap();
let db_path = temp_dir.path().join(unique_db_filename());
let store =
SuggestStore::new(&db_path.to_string_lossy(), None).expect("Error building store");
store
.ingest(SuggestIngestionConstraints::all_providers())
.expect("Error during ingestion");
store.checkpoint();
(temp_dir, db_path)
});

let db_path = starter_dir.path().join(unique_db_filename());
std::fs::copy(starter_db_path, &db_path).expect("Error copying starter DB file");
SuggestStore::new(&db_path.to_string_lossy(), None).expect("Error building store")
}
39 changes: 14 additions & 25 deletions components/suggest/src/benchmarks/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,39 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use crate::{
benchmarks::{unique_db_filename, BenchmarkWithInput},
SuggestIngestionConstraints, SuggestStore, SuggestionProvider, SuggestionQuery,
benchmarks::{new_store, BenchmarkWithInput},
SuggestStore, SuggestionProvider, SuggestionQuery,
};

pub struct QueryBenchmark {
store: SuggestStore,
provider: SuggestionProvider,
query: &'static str,
}

impl QueryBenchmark {
pub fn new(provider: SuggestionProvider, query: &'static str) -> Self {
let temp_dir = tempfile::tempdir().unwrap();
let data_path = temp_dir.path().join(unique_db_filename());
let store =
SuggestStore::new(&data_path.to_string_lossy(), None).expect("Error building store");
store
.ingest(SuggestIngestionConstraints::all_providers())
.expect("Error during ingestion");
Self {
store,
provider,
query,
}
Self { provider, query }
}
}

// The input for each benchmark a query to pass to the store
pub struct InputType(SuggestionQuery);

impl BenchmarkWithInput for QueryBenchmark {
type Input = InputType;
type GlobalInput = SuggestStore;
type IterationInput = SuggestionQuery;

fn generate_input(&self) -> Self::Input {
InputType(SuggestionQuery {
fn global_input(&self) -> Self::GlobalInput {
new_store()
}

fn iteration_input(&self) -> Self::IterationInput {
SuggestionQuery {
providers: vec![self.provider],
keyword: self.query.to_string(),
..SuggestionQuery::default()
})
}
}

fn benchmarked_code(&self, input: Self::Input) {
let InputType(query) = input;
self.store
fn benchmarked_code(&self, store: &Self::GlobalInput, query: Self::IterationInput) {
store
.query(query)
.unwrap_or_else(|e| panic!("Error querying store: {e}"));
}
Expand Down
16 changes: 16 additions & 0 deletions components/suggest/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,16 @@ impl SuggestStore {
}
}

#[cfg(feature = "benchmark_api")]
impl SuggestStore {
/// Creates a WAL checkpoint. This will cause changes in the write-ahead log
/// to be written to the DB. See:
/// https://sqlite.org/pragma.html#pragma_wal_checkpoint
pub fn checkpoint(&self) {
self.inner.checkpoint();
}
}

/// Constraints limit which suggestions to ingest from Remote Settings.
#[derive(Clone, Default, Debug, uniffi::Record)]
pub struct SuggestIngestionConstraints {
Expand Down Expand Up @@ -751,6 +761,12 @@ where
self.dbs().unwrap();
}

fn checkpoint(&self) {
let conn = self.dbs().unwrap().writer.conn.lock();
conn.pragma_update(None, "wal_checkpoint", "TRUNCATE")
.expect("Error performing checkpoint");
}

pub fn ingest_records_by_type(&self, ingest_record_type: SuggestRecordType) {
let writer = &self.dbs().unwrap().writer;
let mut context = MetricsContext::default();
Expand Down

0 comments on commit 8cc7b4b

Please sign in to comment.