Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
fominok committed Nov 4, 2024
1 parent 4af11cf commit ac4b627
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 0 deletions.
2 changes: 2 additions & 0 deletions grovedb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ pub mod element;
pub mod error;
#[cfg(feature = "estimated_costs")]
mod estimated_costs;
#[cfg(feature = "full")]
mod merk_cache;
#[cfg(any(feature = "full", feature = "verify"))]
pub mod operations;
#[cfg(any(feature = "full", feature = "verify"))]
Expand Down
205 changes: 205 additions & 0 deletions grovedb/src/merk_cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
//! Module dedicated to keep necessary Merks in memory and solve propagation
//! after usage automatically.
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
mem::{self, MaybeUninit},
ops::Deref,
};

use grovedb_costs::{cost_return_on_error, CostResult, CostsExt};
use grovedb_merk::Merk;
use grovedb_path::SubtreePath;
use grovedb_storage::{rocksdb_storage::PrefixedRocksDbTransactionContext, StorageBatch};
use grovedb_version::version::GroveVersion;

use crate::{Error, GroveDb, Transaction};

type TxMerk<'db> = Merk<PrefixedRocksDbTransactionContext<'db>>;

/// Merk caching structure.
///
/// Since we usually postpone all writes to the very end with a single RocksDB
/// batch all intermediate changes to subtrees might not be tracked if we reopen
/// those Merks, so it's better to have them cached and proceed through the same
/// structure. Eventually we'll have enough info at the same place to perform
/// necessary propagations as well.
pub(crate) struct MerkCache<'db, 'b, B> {

Check warning on line 27 in grovedb/src/merk_cache.rs

View workflow job for this annotation

GitHub Actions / clippy

struct `MerkCache` is never constructed

warning: struct `MerkCache` is never constructed --> grovedb/src/merk_cache.rs:27:19 | 27 | pub(crate) struct MerkCache<'db, 'b, B> { | ^^^^^^^^^
db: &'db GroveDb,
tx: &'db Transaction<'db>,
batch: &'db StorageBatch,
version: &'db GroveVersion,
inner: HashMap<SubtreePath<'b, B>, TxMerk<'db>>,
}

impl<'db, 'b, B: AsRef<[u8]>> MerkCache<'db, 'b, B> {
pub(crate) fn new(

Check warning on line 36 in grovedb/src/merk_cache.rs

View workflow job for this annotation

GitHub Actions / clippy

associated items `new`, `get_merk_mut_internal`, and `get_multi_mut` are never used

warning: associated items `new`, `get_merk_mut_internal`, and `get_multi_mut` are never used --> grovedb/src/merk_cache.rs:36:19 | 35 | impl<'db, 'b, B: AsRef<[u8]>> MerkCache<'db, 'b, B> { | --------------------------------------------------- associated items in this implementation 36 | pub(crate) fn new( | ^^^ ... 54 | fn get_merk_mut_internal<'s>( | ^^^^^^^^^^^^^^^^^^^^^ ... 84 | pub(crate) fn get_multi_mut<'s, const N: usize>( | ^^^^^^^^^^^^^
db: &'db GroveDb,
tx: &'db Transaction<'db>,
batch: &'db StorageBatch,
version: &'db GroveVersion,
) -> Self {
MerkCache {
db,
tx,
batch,
version,
inner: Default::default(),
}
}

/// Get a mutable Merk reference from the cache.
/// If it doesn't present then it will be opened.
/// Returns `None` if there is no Merk under this path.
fn get_merk_mut_internal<'s>(
&'s mut self,
path: SubtreePath<'b, B>,
) -> CostResult<&'s mut TxMerk<'db>, Error> {
let mut cost = Default::default();

match self.inner.entry(path) {
Entry::Occupied(e) => Ok(e.into_mut()).wrap_with_cost(cost),
Entry::Vacant(e) => {
let merk = cost_return_on_error!(
&mut cost,
self.db.open_transactional_merk_at_path(
e.key().clone(),
self.tx,
Some(self.batch),
self.version
)
);
Ok(e.insert(merk)).wrap_with_cost(cost)
}
}
}

/// Returns an array of mutable references to different Merks, where each
/// element in the array corresponds to a unique Merk based on its
/// position in the input paths array.
///
/// # Panics
/// All input paths *must* be unique, otherwise it could provide multiple
/// mutable references to the same memory which is strictly prohibited.
pub(crate) fn get_multi_mut<'s, const N: usize>(
&'s mut self,
paths: [SubtreePath<'b, B>; N],
) -> CostResult<[MerkHandle<'db, 's>; N], Error> {
let mut result_uninit = [const { MaybeUninit::<MerkHandle<'db, 's>>::uninit() }; N];
let mut cost = Default::default();

let unique_args: HashSet<_> = paths.iter().collect();
if unique_args.len() != N {
panic!("`get_multi_mut` keys must be unique");
}

for (i, path) in paths.into_iter().enumerate() {
// SAFETY is ensured by tying the lifetime of mutable references to the
// collection itself, preventing them from outliving the collection and
// ensuring exclusive access to the collection's layout through other
// mutable references. The mandatory keys' uniqueness check above makes
// sure no overlapping memory will be referenced.
let merk_ref = unsafe {
MerkHandle(
(cost_return_on_error!(&mut cost, self.get_merk_mut_internal(path))
as *mut TxMerk<'db>)
.as_mut::<'s>()
.expect("not a null pointer"),
)
};
result_uninit[i].write(merk_ref);
}

// SAFETY: An array of `MaybeUninit` references takes the same size as an array
// of references as long as they both have the same number of elements,
// N in our case. `mem::transmute` would represent it better, however,
// due to poor support of const generics in stable Rust we bypass
// compile-time size checks with pointer casts.
let result = unsafe { (&result_uninit as *const _ as *const [MerkHandle; N]).read() };
mem::forget(result_uninit);

Check warning on line 119 in grovedb/src/merk_cache.rs

View workflow job for this annotation

GitHub Actions / clippy

call to `std::mem::forget` with a value that does not implement `Drop`. Forgetting such a type is the same as dropping it

warning: call to `std::mem::forget` with a value that does not implement `Drop`. Forgetting such a type is the same as dropping it --> grovedb/src/merk_cache.rs:119:9 | 119 | mem::forget(result_uninit); | ^^^^^^^^^^^^^^^^^^^^^^^^^^ | note: argument has type `[std::mem::MaybeUninit<merk_cache::MerkHandle<'_, '_>>; N]` --> grovedb/src/merk_cache.rs:119:21 | 119 | mem::forget(result_uninit); | ^^^^^^^^^^^^^ = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#forget_non_drop = note: `#[warn(clippy::forget_non_drop)]` on by default

Ok(result).wrap_with_cost(cost)
}
}

/// Handle to a cached Merk.
pub(crate) struct MerkHandle<'db, 'c>(&'c mut TxMerk<'db>);

Check warning on line 126 in grovedb/src/merk_cache.rs

View workflow job for this annotation

GitHub Actions / clippy

struct `MerkHandle` is never constructed

warning: struct `MerkHandle` is never constructed --> grovedb/src/merk_cache.rs:126:19 | 126 | pub(crate) struct MerkHandle<'db, 'c>(&'c mut TxMerk<'db>); | ^^^^^^^^^^

/// It is allowed to dereference `MerkHandle` to regular Merks but in a
/// non-mutable way since we want to track what have been done to those Merks.
impl<'db, 'c> Deref for MerkHandle<'db, 'c> {
type Target = TxMerk<'db>;

fn deref(&self) -> &Self::Target {
&self.0

Check warning on line 134 in grovedb/src/merk_cache.rs

View workflow job for this annotation

GitHub Actions / clippy

this expression creates a reference which is immediately dereferenced by the compiler

warning: this expression creates a reference which is immediately dereferenced by the compiler --> grovedb/src/merk_cache.rs:134:9 | 134 | &self.0 | ^^^^^^^ help: change this to: `self.0` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrow
}
}

impl<'db, 'c> MerkHandle<'db, 'c> {
pub(crate) fn insert(&mut self) {

Check warning on line 139 in grovedb/src/merk_cache.rs

View workflow job for this annotation

GitHub Actions / clippy

method `insert` is never used

warning: method `insert` is never used --> grovedb/src/merk_cache.rs:139:19 | 138 | impl<'db, 'c> MerkHandle<'db, 'c> { | --------------------------------- method in this implementation 139 | pub(crate) fn insert(&mut self) { | ^^^^^^
todo!()
}
}

#[cfg(test)]
mod tests {
use grovedb_costs::OperationCost;
use grovedb_path::SubtreePath;
use grovedb_storage::StorageBatch;
use grovedb_version::version::GroveVersion;

use super::MerkCache;
use crate::tests::{make_deep_tree, ANOTHER_TEST_LEAF, TEST_LEAF};

#[test]
fn cached_subtrees_are_free() {
let version = GroveVersion::latest();
let db = make_deep_tree(&version);
let tx = db.start_transaction();
let batch = StorageBatch::new();
let mut cache = MerkCache::new(&db, &tx, &batch, version);

let mut cost: OperationCost = Default::default();
let [test1, test2] = cache
.get_multi_mut([
SubtreePath::from(&[TEST_LEAF]),
SubtreePath::from(&[ANOTHER_TEST_LEAF]),
])
.unwrap_add_cost(&mut cost)
.expect("unable to get subtrees");

// Assert trees aren't empty
assert!(test1.root_hash().unwrap() != [0; 32]);
assert!(test2.root_hash().unwrap() != [0; 32]);

// Assert some cost been paid
assert!(!cost.is_nothing());

let mut next_cost: OperationCost = Default::default();
let [_test1, _test2] = cache
.get_multi_mut([
SubtreePath::from(&[TEST_LEAF]),
SubtreePath::from(&[ANOTHER_TEST_LEAF]),
])
.unwrap_add_cost(&mut next_cost)
.expect("unable to get subtrees");

// Assert it was for free now
assert!(next_cost.is_nothing());
}

#[test]
#[should_panic]
fn overlapping_references_should_panic() {
let version = GroveVersion::latest();
let db = make_deep_tree(&version);
let tx = db.start_transaction();
let batch = StorageBatch::new();
let mut cache = MerkCache::new(&db, &tx, &batch, version);

let _ = cache.get_multi_mut([
SubtreePath::from(&[TEST_LEAF]),
SubtreePath::from(&[TEST_LEAF]),
]);
}
}

0 comments on commit ac4b627

Please sign in to comment.