From ac4b627723cf49a480408af4ab7f89b902a1d835 Mon Sep 17 00:00:00 2001 From: Evgeny Fomin Date: Mon, 4 Nov 2024 15:05:28 +0100 Subject: [PATCH] wip --- grovedb/src/lib.rs | 2 + grovedb/src/merk_cache.rs | 205 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 grovedb/src/merk_cache.rs diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 42efe8a3..5b0a3e3c 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -135,6 +135,8 @@ pub mod element; pub mod error; #[cfg(feature = "estimated_costs")] mod estimated_costs; +#[cfg(feature = "full")] +mod merk_cache; #[cfg(any(feature = "full", feature = "verify"))] pub mod operations; #[cfg(any(feature = "full", feature = "verify"))] diff --git a/grovedb/src/merk_cache.rs b/grovedb/src/merk_cache.rs new file mode 100644 index 00000000..b691fbd0 --- /dev/null +++ b/grovedb/src/merk_cache.rs @@ -0,0 +1,205 @@ +//! Module dedicated to keep necessary Merks in memory and solve propagation +//! after usage automatically. + +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + mem::{self, MaybeUninit}, + ops::Deref, +}; + +use grovedb_costs::{cost_return_on_error, CostResult, CostsExt}; +use grovedb_merk::Merk; +use grovedb_path::SubtreePath; +use grovedb_storage::{rocksdb_storage::PrefixedRocksDbTransactionContext, StorageBatch}; +use grovedb_version::version::GroveVersion; + +use crate::{Error, GroveDb, Transaction}; + +type TxMerk<'db> = Merk>; + +/// Merk caching structure. +/// +/// Since we usually postpone all writes to the very end with a single RocksDB +/// batch all intermediate changes to subtrees might not be tracked if we reopen +/// those Merks, so it's better to have them cached and proceed through the same +/// structure. Eventually we'll have enough info at the same place to perform +/// necessary propagations as well. +pub(crate) struct MerkCache<'db, 'b, B> { + db: &'db GroveDb, + tx: &'db Transaction<'db>, + batch: &'db StorageBatch, + version: &'db GroveVersion, + inner: HashMap, TxMerk<'db>>, +} + +impl<'db, 'b, B: AsRef<[u8]>> MerkCache<'db, 'b, B> { + pub(crate) fn new( + db: &'db GroveDb, + tx: &'db Transaction<'db>, + batch: &'db StorageBatch, + version: &'db GroveVersion, + ) -> Self { + MerkCache { + db, + tx, + batch, + version, + inner: Default::default(), + } + } + + /// Get a mutable Merk reference from the cache. + /// If it doesn't present then it will be opened. + /// Returns `None` if there is no Merk under this path. + fn get_merk_mut_internal<'s>( + &'s mut self, + path: SubtreePath<'b, B>, + ) -> CostResult<&'s mut TxMerk<'db>, Error> { + let mut cost = Default::default(); + + match self.inner.entry(path) { + Entry::Occupied(e) => Ok(e.into_mut()).wrap_with_cost(cost), + Entry::Vacant(e) => { + let merk = cost_return_on_error!( + &mut cost, + self.db.open_transactional_merk_at_path( + e.key().clone(), + self.tx, + Some(self.batch), + self.version + ) + ); + Ok(e.insert(merk)).wrap_with_cost(cost) + } + } + } + + /// Returns an array of mutable references to different Merks, where each + /// element in the array corresponds to a unique Merk based on its + /// position in the input paths array. + /// + /// # Panics + /// All input paths *must* be unique, otherwise it could provide multiple + /// mutable references to the same memory which is strictly prohibited. + pub(crate) fn get_multi_mut<'s, const N: usize>( + &'s mut self, + paths: [SubtreePath<'b, B>; N], + ) -> CostResult<[MerkHandle<'db, 's>; N], Error> { + let mut result_uninit = [const { MaybeUninit::>::uninit() }; N]; + let mut cost = Default::default(); + + let unique_args: HashSet<_> = paths.iter().collect(); + if unique_args.len() != N { + panic!("`get_multi_mut` keys must be unique"); + } + + for (i, path) in paths.into_iter().enumerate() { + // SAFETY is ensured by tying the lifetime of mutable references to the + // collection itself, preventing them from outliving the collection and + // ensuring exclusive access to the collection's layout through other + // mutable references. The mandatory keys' uniqueness check above makes + // sure no overlapping memory will be referenced. + let merk_ref = unsafe { + MerkHandle( + (cost_return_on_error!(&mut cost, self.get_merk_mut_internal(path)) + as *mut TxMerk<'db>) + .as_mut::<'s>() + .expect("not a null pointer"), + ) + }; + result_uninit[i].write(merk_ref); + } + + // SAFETY: An array of `MaybeUninit` references takes the same size as an array + // of references as long as they both have the same number of elements, + // N in our case. `mem::transmute` would represent it better, however, + // due to poor support of const generics in stable Rust we bypass + // compile-time size checks with pointer casts. + let result = unsafe { (&result_uninit as *const _ as *const [MerkHandle; N]).read() }; + mem::forget(result_uninit); + + Ok(result).wrap_with_cost(cost) + } +} + +/// Handle to a cached Merk. +pub(crate) struct MerkHandle<'db, 'c>(&'c mut TxMerk<'db>); + +/// It is allowed to dereference `MerkHandle` to regular Merks but in a +/// non-mutable way since we want to track what have been done to those Merks. +impl<'db, 'c> Deref for MerkHandle<'db, 'c> { + type Target = TxMerk<'db>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'db, 'c> MerkHandle<'db, 'c> { + pub(crate) fn insert(&mut self) { + todo!() + } +} + +#[cfg(test)] +mod tests { + use grovedb_costs::OperationCost; + use grovedb_path::SubtreePath; + use grovedb_storage::StorageBatch; + use grovedb_version::version::GroveVersion; + + use super::MerkCache; + use crate::tests::{make_deep_tree, ANOTHER_TEST_LEAF, TEST_LEAF}; + + #[test] + fn cached_subtrees_are_free() { + let version = GroveVersion::latest(); + let db = make_deep_tree(&version); + let tx = db.start_transaction(); + let batch = StorageBatch::new(); + let mut cache = MerkCache::new(&db, &tx, &batch, version); + + let mut cost: OperationCost = Default::default(); + let [test1, test2] = cache + .get_multi_mut([ + SubtreePath::from(&[TEST_LEAF]), + SubtreePath::from(&[ANOTHER_TEST_LEAF]), + ]) + .unwrap_add_cost(&mut cost) + .expect("unable to get subtrees"); + + // Assert trees aren't empty + assert!(test1.root_hash().unwrap() != [0; 32]); + assert!(test2.root_hash().unwrap() != [0; 32]); + + // Assert some cost been paid + assert!(!cost.is_nothing()); + + let mut next_cost: OperationCost = Default::default(); + let [_test1, _test2] = cache + .get_multi_mut([ + SubtreePath::from(&[TEST_LEAF]), + SubtreePath::from(&[ANOTHER_TEST_LEAF]), + ]) + .unwrap_add_cost(&mut next_cost) + .expect("unable to get subtrees"); + + // Assert it was for free now + assert!(next_cost.is_nothing()); + } + + #[test] + #[should_panic] + fn overlapping_references_should_panic() { + let version = GroveVersion::latest(); + let db = make_deep_tree(&version); + let tx = db.start_transaction(); + let batch = StorageBatch::new(); + let mut cache = MerkCache::new(&db, &tx, &batch, version); + + let _ = cache.get_multi_mut([ + SubtreePath::from(&[TEST_LEAF]), + SubtreePath::from(&[TEST_LEAF]), + ]); + } +}