From 5ff62ec426ab1bff66b3a9fdec2e066333d69dc2 Mon Sep 17 00:00:00 2001 From: Evgeny Fomin Date: Tue, 10 Dec 2024 21:51:01 +0100 Subject: [PATCH] wip --- grovedb-version/src/lib.rs | 10 +-- grovedb/src/merk_cache.rs | 64 ++++++-------- grovedb/src/operations/insert/mod.rs | 61 ++++++------- grovedb/src/reference_path.rs | 127 ++++++++++++++++++++++++++- grovedb/src/util.rs | 83 +++++++++++++++-- path/src/subtree_path.rs | 2 +- path/src/subtree_path_builder.rs | 15 +++- path/src/util/cow_like.rs | 2 +- 8 files changed, 274 insertions(+), 90 deletions(-) diff --git a/grovedb-version/src/lib.rs b/grovedb-version/src/lib.rs index 48b80a52e..ca2bf9b9c 100644 --- a/grovedb-version/src/lib.rs +++ b/grovedb-version/src/lib.rs @@ -1,4 +1,4 @@ -use crate::version::GroveVersion; +use version::GroveVersion; pub mod error; pub mod version; @@ -8,7 +8,7 @@ macro_rules! check_grovedb_v0_with_cost { ($method:expr, $version:expr) => {{ const EXPECTED_VERSION: u16 = 0; if $version != EXPECTED_VERSION { - return Err(GroveVersionError::UnknownVersionMismatch { + return Err($crate::error::GroveVersionError::UnknownVersionMismatch { method: $method.to_string(), known_versions: vec![EXPECTED_VERSION], received: $version, @@ -24,7 +24,7 @@ macro_rules! check_grovedb_v0 { ($method:expr, $version:expr) => {{ const EXPECTED_VERSION: u16 = 0; if $version != EXPECTED_VERSION { - return Err(GroveVersionError::UnknownVersionMismatch { + return Err($crate::error::GroveVersionError::UnknownVersionMismatch { method: $method.to_string(), known_versions: vec![EXPECTED_VERSION], received: $version, @@ -39,7 +39,7 @@ macro_rules! check_merk_v0_with_cost { ($method:expr, $version:expr) => {{ const EXPECTED_VERSION: u16 = 0; if $version != EXPECTED_VERSION { - return Err(GroveVersionError::UnknownVersionMismatch { + return Err($crate::error::GroveVersionError::UnknownVersionMismatch { method: $method.to_string(), known_versions: vec![EXPECTED_VERSION], received: $version, @@ -55,7 +55,7 @@ macro_rules! check_merk_v0 { ($method:expr, $version:expr) => {{ const EXPECTED_VERSION: u16 = 0; if $version != EXPECTED_VERSION { - return Err(GroveVersionError::UnknownVersionMismatch { + return Err($crate::error::GroveVersionError::UnknownVersionMismatch { method: $method.to_string(), known_versions: vec![EXPECTED_VERSION], received: $version, diff --git a/grovedb/src/merk_cache.rs b/grovedb/src/merk_cache.rs index 0e59ec881..078c7f750 100644 --- a/grovedb/src/merk_cache.rs +++ b/grovedb/src/merk_cache.rs @@ -22,7 +22,7 @@ type TxMerk<'db> = Merk>; /// Structure to keep subtrees open in memory for repeated access. pub(crate) struct MerkCache<'db, 'b, B: AsRef<[u8]>> { db: &'db GroveDb, - version: &'db GroveVersion, + pub(crate) version: &'db GroveVersion, batch: Box, tx: &'db Transaction<'db>, merks: UnsafeCell, Box<(Cell, TxMerk<'db>)>>>, @@ -44,11 +44,7 @@ impl<'db, 'b, B: AsRef<[u8]>> MerkCache<'db, 'b, B> { } } - /// Returns cached Merk reference or opens one if needed. - /// - /// # Panics - /// Borrowing one Merk several times will cause a panic, previous borrow - /// shall reach end of the scope or to be `drop`ped manually. + /// Gets a smart pointer to a cached Merk or opens one if needed. pub(crate) fn get_merk<'c>( &'c self, path: SubtreePathBuilder<'b, B>, @@ -82,15 +78,9 @@ impl<'db, 'b, B: AsRef<[u8]>> MerkCache<'db, 'b, B> { self.version ) ); - e.insert(Box::new((true.into(), merk))) - } - Entry::Occupied(e) => { - if e.get().0.get() { - e.into_mut() - } else { - panic!("Double borrow of a cached Merk") - } + e.insert(Box::new((false.into(), merk))) } + Entry::Occupied(e) => e.into_mut(), }; let taken_handle_ref: *const Cell = &boxed_flag_merk.0 as *const _; @@ -158,14 +148,14 @@ impl<'db, 'b, B: AsRef<[u8]>> MerkCache<'db, 'b, B> { ); cost_return_on_error!( &mut cost, - GroveDb::update_tree_item_preserve_flag( - &mut parent_merk, + parent_merk.for_merk(|m| GroveDb::update_tree_item_preserve_flag( + m, parent_key, root_key, root_hash, sum, self.version, - ) + )) ); } } @@ -176,27 +166,19 @@ impl<'db, 'b, B: AsRef<[u8]>> MerkCache<'db, 'b, B> { /// Wrapper over `Merk` tree to manage unqiue borrow dynamically. pub(crate) struct MerkHandle<'db, 'c> { - merk: &'c mut TxMerk<'db>, + merk: *mut TxMerk<'db>, taken_handle: &'c Cell, } -impl<'db, 'c> Deref for MerkHandle<'db, 'c> { - type Target = TxMerk<'db>; - - fn deref(&self) -> &Self::Target { - self.merk - } -} - -impl<'db, 'c> DerefMut for MerkHandle<'db, 'c> { - fn deref_mut(&mut self) -> &mut Self::Target { - self.merk - } -} - -impl<'db, 'c> Drop for MerkHandle<'db, 'c> { - fn drop(&mut self) { +impl<'db, 'c> MerkHandle<'db, 'c> { + pub(crate) fn for_merk(&mut self, f: impl FnOnce(&mut TxMerk<'db>) -> T) -> T { + if self.taken_handle.get() { + panic!("Attempt to have double &mut borrow on Merk"); + } + self.taken_handle.set(true); + let result = f(unsafe { self.merk.as_mut().expect("pointer to Box cannot be null") }); self.taken_handle.set(false); + result } } @@ -221,14 +203,20 @@ mod tests { let cache = MerkCache::new(&db, &tx, version); - cache + let mut merk1 = cache .get_merk(SubtreePath::empty().derive_owned()) .unwrap() .unwrap(); - cache + let mut merk2 = cache .get_merk(SubtreePath::empty().derive_owned()) .unwrap() .unwrap(); + + merk1.for_merk(|_m1| { + merk2.for_merk(|_m2| { + // this shouldn't happen + }) + }); } #[test] @@ -259,9 +247,7 @@ mod tests { let mut merk = cache.get_merk(path.derive_owned()).unwrap().unwrap(); - item.insert(&mut merk, b"k1", None, &version) - .unwrap() - .unwrap(); + merk.for_merk(|m| item.insert(m, b"k1", None, &version).unwrap().unwrap()); drop(merk); diff --git a/grovedb/src/operations/insert/mod.rs b/grovedb/src/operations/insert/mod.rs index 929a9a812..f6baa932e 100644 --- a/grovedb/src/operations/insert/mod.rs +++ b/grovedb/src/operations/insert/mod.rs @@ -6,15 +6,17 @@ use grovedb_costs::{ cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, }; use grovedb_merk::{tree::NULL_HASH, Merk, MerkOptions}; -use grovedb_path::SubtreePath; +use grovedb_path::{SubtreePath, SubtreePathBuilder}; use grovedb_storage::{rocksdb_storage::PrefixedRocksDbTransactionContext, Storage, StorageBatch}; use grovedb_version::{ check_grovedb_v0_with_cost, error::GroveVersionError, version::GroveVersion, }; use crate::{ - merk_cache::MerkCache, reference_path::path_from_reference_path_type, util::TxRef, Element, - Error, GroveDb, Transaction, TransactionArg, + merk_cache::{MerkCache, MerkHandle}, + reference_path::path_from_reference_path_type, + util::{self, TxRef}, + Element, Error, GroveDb, Transaction, TransactionArg, }; #[derive(Clone)] /// Insert options @@ -100,15 +102,15 @@ impl GroveDb { /// first make sure other merk exist /// if it exists, then create merk to be inserted, and get root hash /// we only care about root hash of merk to be inserted - fn add_element_on_transaction<'db, 'b, B: AsRef<[u8]>>( + fn add_element_on_transaction<'db, 'b, 'c, B: AsRef<[u8]>>( &'db self, path: SubtreePath<'b, B>, key: &[u8], element: Element, options: InsertOptions, - merk_cache: &MerkCache<'db, 'b, B>, + merk_cache: &'c MerkCache<'db, 'b, B>, grove_version: &GroveVersion, - ) -> CostResult>, Error> { + ) -> CostResult, Error> { check_grovedb_v0_with_cost!( "add_element_on_transaction", grove_version @@ -128,12 +130,12 @@ impl GroveDb { let maybe_element_bytes = cost_return_on_error!( &mut cost, subtree_to_insert_into - .get( + .for_merk(|m| m.get( key, true, Some(&Element::value_defined_cost_for_serialized_value), grove_version, - ) + )) .map_err(|e| Error::CorruptedData(e.to_string())) ); if let Some(element_bytes) = maybe_element_bytes { @@ -164,24 +166,16 @@ impl GroveDb { match element { Element::Reference(ref reference_path, ..) => { - let path = path.to_vec(); // TODO: need for support for references in path library - let reference_path = cost_return_on_error!( + let referenced_item: Element = cost_return_on_error!( &mut cost, - path_from_reference_path_type(reference_path.clone(), &path, Some(key)) - .wrap_with_cost(OperationCost::default()) + util::follow_reference( + merk_cache, + path.derive_owned(), + key, + reference_path.clone() + ) ); - let referenced_item: Element = todo!(); - // cost_return_on_error!( - // &mut cost, - // self.follow_reference( - // reference_path.as_slice().into(), - // false, - // transaction, - // grove_version - // ) - // ); - if matches!( referenced_item, Element::Tree(_, _) | Element::SumTree(_, _, _) @@ -197,13 +191,13 @@ impl GroveDb { cost_return_on_error!( &mut cost, - element.insert_reference( - &mut subtree_to_insert_into, + subtree_to_insert_into.for_merk(|m| element.insert_reference( + m, key, referenced_element_value_hash, Some(options.as_merk_options()), grove_version, - ) + )) ); } Element::Tree(ref value, _) | Element::SumTree(ref value, ..) => { @@ -215,31 +209,30 @@ impl GroveDb { } else { cost_return_on_error!( &mut cost, - element.insert_subtree( - &mut subtree_to_insert_into, + subtree_to_insert_into.for_merk(|m| element.insert_subtree( + m, key, NULL_HASH, Some(options.as_merk_options()), grove_version - ) + )) ); } } _ => { cost_return_on_error!( &mut cost, - element.insert( - &mut subtree_to_insert_into, + subtree_to_insert_into.for_merk(|m| element.insert( + m, key, Some(options.as_merk_options()), grove_version - ) + )) ); } } - // Ok(subtree_to_insert_into).wrap_with_cost(cost) - todo!() + Ok(subtree_to_insert_into).wrap_with_cost(cost) } /// Inserts an element at the specified path and key if it does not already diff --git a/grovedb/src/reference_path.rs b/grovedb/src/reference_path.rs index 4a489aa91..ddc08a11f 100644 --- a/grovedb/src/reference_path.rs +++ b/grovedb/src/reference_path.rs @@ -5,7 +5,7 @@ use std::fmt; use std::iter; use bincode::{Decode, Encode}; -use grovedb_path::SubtreePath; +use grovedb_path::{SubtreePath, SubtreePathBuilder}; #[cfg(feature = "full")] use grovedb_visualize::visualize_to_vec; #[cfg(feature = "full")] @@ -204,6 +204,131 @@ impl ReferencePathType { ) -> Result>, Error> { path_from_reference_path_type(self, current_path, current_key) } + + pub fn absolute_qualified_path<'b, B: AsRef<[u8]>>( + self, + mut current_path: SubtreePathBuilder<'b, B>, + current_key: &[u8], + ) -> Result, Error> { + match self { + ReferencePathType::AbsolutePathReference(path) => { + Ok(SubtreePathBuilder::owned_from_iter(path)) + } + + ReferencePathType::UpstreamRootHeightReference(no_of_elements_to_keep, append_path) => { + let len = current_path.len(); + if no_of_elements_to_keep as usize > len { + return Err(Error::InvalidInput( + "reference stored path cannot satisfy reference constraints", + )); + } + let n_to_remove = len - no_of_elements_to_keep as usize; + + let referenced_path = (0..n_to_remove).fold(current_path, |p, _| { + p.derive_parent_owned() + .expect("lenghts were checked above") + .0 + }); + let referenced_path = append_path.into_iter().fold(referenced_path, |mut p, s| { + p.push_segment(&s); + p + }); + + Ok(referenced_path) + } + + ReferencePathType::UpstreamRootHeightWithParentPathAdditionReference( + no_of_elements_to_keep, + append_path, + ) => { + let len = current_path.len(); + if no_of_elements_to_keep as usize > len || len < 2 { + return Err(Error::InvalidInput( + "reference stored path cannot satisfy reference constraints", + )); + } + + let parent_key = current_path + .reverse_iter() + .nth(1) + .expect("lengths were checked above") + .to_vec(); + + let n_to_remove = len - no_of_elements_to_keep as usize; + + let referenced_path = (0..n_to_remove).fold(current_path, |p, _| { + p.derive_parent_owned() + .expect("lenghts were checked above") + .0 + }); + let mut referenced_path = + append_path.into_iter().fold(referenced_path, |mut p, s| { + p.push_segment(&s); + p + }); + referenced_path.push_segment(&parent_key); + + Ok(referenced_path) + } + + // Discard the last n elements from current path, append new path to subpath + ReferencePathType::UpstreamFromElementHeightReference( + no_of_elements_to_discard_from_end, + append_path, + ) => { + let mut referenced_path = current_path; + for _ in 0..no_of_elements_to_discard_from_end { + if let Some((path, _)) = referenced_path.derive_parent_owned() { + referenced_path = path; + } else { + return Err(Error::InvalidInput( + "reference stored path cannot satisfy reference constraints", + )); + } + } + + let referenced_path = append_path.into_iter().fold(referenced_path, |mut p, s| { + p.push_segment(&s); + p + }); + + Ok(referenced_path) + } + + ReferencePathType::CousinReference(cousin_key) => { + let Some((mut referred_path, _)) = current_path.derive_parent_owned() else { + return Err(Error::InvalidInput( + "reference stored path cannot satisfy reference constraints", + )); + }; + + referred_path.push_segment(&cousin_key); + referred_path.push_segment(current_key); + + Ok(referred_path) + } + + ReferencePathType::RemovedCousinReference(cousin_path) => { + let Some((mut referred_path, _)) = current_path.derive_parent_owned() else { + return Err(Error::InvalidInput( + "reference stored path cannot satisfy reference constraints", + )); + }; + + cousin_path + .into_iter() + .for_each(|s| referred_path.push_segment(&s)); + referred_path.push_segment(current_key); + + Ok(referred_path) + } + + ReferencePathType::SiblingReference(sibling_key) => { + current_path.push_segment(&sibling_key); + Ok(current_path) + } + } + } } #[cfg(any(feature = "full", feature = "visualize"))] diff --git a/grovedb/src/util.rs b/grovedb/src/util.rs index 7a8e3081f..1ef9b605a 100644 --- a/grovedb/src/util.rs +++ b/grovedb/src/util.rs @@ -1,14 +1,21 @@ -use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost}; +use std::collections::HashSet; + +use grovedb_costs::{ + cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, +}; use grovedb_merk::Merk; -use grovedb_path::SubtreePath; +use grovedb_path::{SubtreePath, SubtreePathBuilder}; use grovedb_storage::{ rocksdb_storage::{PrefixedRocksDbTransactionContext, RocksDbStorage}, Storage, StorageBatch, }; -use grovedb_version::version::GroveVersion; +use grovedb_version::{check_grovedb_v0_with_cost, version::GroveVersion}; use grovedb_visualize::DebugByteVectors; -use crate::{merk_cache::MerkCache, Element, Error, Transaction, TransactionArg}; +use crate::{ + merk_cache::MerkCache, operations::MAX_REFERENCE_HOPS, reference_path::ReferencePathType, + Element, Error, Transaction, TransactionArg, +}; pub(crate) enum TxRef<'a, 'db: 'a> { Owned(Transaction<'db>), @@ -105,5 +112,69 @@ where } } -// pub(crate) fn follow_reference<'db, 'b, B>(merk_cache: &MerkCache<'db, 'b, -// B>, path: ) +pub(crate) fn follow_reference<'db, 'b, 'c, B: AsRef<[u8]>>( + merk_cache: &'c MerkCache<'db, 'b, B>, + path: SubtreePathBuilder<'b, B>, + key: &[u8], + ref_path: ReferencePathType, +) -> CostResult { + check_grovedb_v0_with_cost!( + "follow_reference", + merk_cache + .version + .grovedb_versions + .operations + .get + .follow_reference + ); + + let mut cost = OperationCost::default(); + + let mut hops_left = MAX_REFERENCE_HOPS; + let mut visited = HashSet::new(); + + let mut qualified_path = path.clone(); + qualified_path.push_segment(key); + + visited.insert(qualified_path); + + let mut current_path = path; + let mut current_key = key.to_vec(); + let mut current_ref = ref_path; + + while hops_left > 0 { + let referred_qualified_path = cost_return_on_error_no_add!( + cost, + current_ref.absolute_qualified_path(current_path, ¤t_key) + ); + + if !visited.insert(referred_qualified_path.clone()) { + return Err(Error::CyclicReference).wrap_with_cost(cost); + } + + let Some((referred_path, referred_key)) = referred_qualified_path.derive_parent_owned() + else { + return Err(Error::InvalidCodeExecution("empty reference")).wrap_with_cost(cost); + }; + + let mut referred_merk = + cost_return_on_error!(&mut cost, merk_cache.get_merk(referred_path.clone())); + let element = cost_return_on_error!( + &mut cost, + referred_merk + .for_merk(|m| { Element::get(m, &referred_key, true, merk_cache.version) }) + ); + + match element { + Element::Reference(ref_path, ..) | Element::BidirectionalReference(ref_path, ..) => { + current_path = referred_path; + current_key = referred_key; + current_ref = ref_path; + hops_left -= 1; + } + e => return Ok(e).wrap_with_cost(cost), + } + } + + Err(Error::ReferenceLimit).wrap_with_cost(cost) +} diff --git a/path/src/subtree_path.rs b/path/src/subtree_path.rs index 7864f84e3..9fd501605 100644 --- a/path/src/subtree_path.rs +++ b/path/src/subtree_path.rs @@ -248,7 +248,7 @@ impl<'b, B: AsRef<[u8]>> SubtreePath<'b, B> { } /// Get a derived path with a child path segment added. - pub fn derive_owned_with_child<'s, S>(&'b self, segment: S) -> SubtreePathBuilder<'b, B> + pub fn derive_owned_with_child<'s, S>(&self, segment: S) -> SubtreePathBuilder<'b, B> where S: Into>, 's: 'b, diff --git a/path/src/subtree_path_builder.rs b/path/src/subtree_path_builder.rs index 4428e0c2e..6ac779775 100644 --- a/path/src/subtree_path_builder.rs +++ b/path/src/subtree_path_builder.rs @@ -46,6 +46,15 @@ pub struct SubtreePathBuilder<'b, B> { pub(crate) relative: SubtreePathRelative<'b>, } +impl<'b, B> Clone for SubtreePathBuilder<'b, B> { + fn clone(&self) -> Self { + SubtreePathBuilder { + base: self.base.clone(), + relative: self.relative.clone(), + } + } +} + /// Hash order is the same as iteration order: from most deep path segment up to /// root. impl<'b, B: AsRef<[u8]>> Hash for SubtreePathBuilder<'b, B> { @@ -97,7 +106,7 @@ impl<'s, 'b, B> From<&'s SubtreePath<'b, B>> for SubtreePathBuilder<'b, B> { } /// Derived subtree path on top of base path. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) enum SubtreePathRelative<'r> { /// Equivalent to the base path. Empty, @@ -149,7 +158,7 @@ impl Default for SubtreePathBuilder<'static, [u8; 0]> { } } -impl SubtreePathBuilder<'static, B> { +impl<'b, B> SubtreePathBuilder<'b, B> { /// Makes an owned `SubtreePathBuilder` out of iterator. pub fn owned_from_iter>(iter: impl IntoIterator) -> Self { let bytes = iter.into_iter().fold(CompactBytes::new(), |mut bytes, s| { @@ -166,7 +175,7 @@ impl SubtreePathBuilder<'static, B> { } /// Create an owned version of `SubtreePathBuilder` from `SubtreePath`. - pub fn owned_from_path<'b, S: AsRef<[u8]>>(path: SubtreePath<'b, S>) -> Self { + pub fn owned_from_path<'a, S: AsRef<[u8]>>(path: SubtreePath<'a, S>) -> Self { Self::owned_from_iter(path.to_vec()) } } diff --git a/path/src/util/cow_like.rs b/path/src/util/cow_like.rs index 78608ec89..02a535372 100644 --- a/path/src/util/cow_like.rs +++ b/path/src/util/cow_like.rs @@ -35,7 +35,7 @@ use std::{ /// A smart pointer that follows the semantics of [Cow](std::borrow::Cow) except /// provides no means for mutability and thus doesn't require [Clone]. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum CowLike<'b> { Owned(Vec), Borrowed(&'b [u8]),