diff --git a/Cargo.toml b/Cargo.toml index b0a38948..6ebd27d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,5 @@ members = [ "node-grove", "storage", "visualize", - "path", + "path" ] diff --git a/grovedb/Cargo.toml b/grovedb/Cargo.toml index dc67a761..c932b138 100644 --- a/grovedb/Cargo.toml +++ b/grovedb/Cargo.toml @@ -26,6 +26,8 @@ nohash-hasher = { version = "0.2.0", optional = true } indexmap = { version = "2.2.6", optional = true } intmap = { version = "2.0.0", optional = true } grovedb-path = { version = "1.0.0-rc.2", path = "../path" } +blake3 = "1.4.0" +bitvec = "1" [dev-dependencies] rand = "0.8.5" diff --git a/grovedb/src/batch/mod.rs b/grovedb/src/batch/mod.rs index f6a4e7ea..8674672c 100644 --- a/grovedb/src/batch/mod.rs +++ b/grovedb/src/batch/mod.rs @@ -552,7 +552,7 @@ impl GroveDbOp { } /// Verify consistency of operations - pub fn verify_consistency_of_operations(ops: &Vec) -> GroveDbOpConsistencyResults { + pub fn verify_consistency_of_operations(ops: &[GroveDbOp]) -> GroveDbOpConsistencyResults { let ops_len = ops.len(); // operations should not have any duplicates let mut repeated_ops = vec![]; @@ -2424,8 +2424,8 @@ mod tests { Element::empty_tree(), ), ]; - assert!(matches!( - db.apply_batch( + assert!(db + .apply_batch( ops, Some(BatchApplyOptions { validate_insertion_does_not_override: false, @@ -2438,9 +2438,8 @@ mod tests { }), None ) - .unwrap(), - Ok(_) - )); + .unwrap() + .is_ok()); } #[test] @@ -3481,7 +3480,7 @@ mod tests { elem.clone(), ), ]; - assert!(matches!(db.apply_batch(batch, None, None).unwrap(), Ok(_))); + assert!(db.apply_batch(batch, None, None).unwrap().is_ok()); assert_eq!( db.get([TEST_LEAF].as_ref(), b"key1", None) .unwrap() @@ -3498,7 +3497,7 @@ mod tests { .unwrap() .expect("should generate proof"); let verification_result = GroveDb::verify_query_raw(&proof, &path_query); - assert!(matches!(verification_result, Ok(_))); + assert!(verification_result.is_ok()); // Hit reference limit when you specify max reference hop, lower than actual hop // count diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 9ea95513..fd11f10d 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -160,7 +160,7 @@ pub mod query_result_type; #[cfg(any(feature = "full", feature = "verify"))] pub mod reference_path; #[cfg(feature = "full")] -mod replication; +pub mod replication; #[cfg(all(test, feature = "full"))] mod tests; #[cfg(feature = "full")] @@ -172,8 +172,6 @@ mod visualize; #[cfg(feature = "full")] use std::{collections::HashMap, option::Option::None, path::Path}; -#[cfg(any(feature = "full", feature = "verify"))] -use element::helpers; #[cfg(any(feature = "full", feature = "verify"))] pub use element::Element; #[cfg(feature = "full")] @@ -217,14 +215,12 @@ use grovedb_storage::{Storage, StorageContext}; use grovedb_visualize::DebugByteVectors; #[cfg(any(feature = "full", feature = "verify"))] pub use query::{PathQuery, SizedQuery}; -#[cfg(feature = "full")] -pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer}; +#[cfg(feature = "full")] +use crate::element::helpers::raw_decode; #[cfg(any(feature = "full", feature = "verify"))] pub use crate::error::Error; #[cfg(feature = "full")] -use crate::helpers::raw_decode; -#[cfg(feature = "full")] use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; use crate::Error::MerkError; @@ -237,6 +233,8 @@ pub struct GroveDb { db: RocksDbStorage, } +pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; + /// Transaction #[cfg(feature = "full")] pub type Transaction<'db> = >::Transaction; diff --git a/grovedb/src/operations/auxiliary.rs b/grovedb/src/operations/auxiliary.rs index 0a29c510..1b6b884d 100644 --- a/grovedb/src/operations/auxiliary.rs +++ b/grovedb/src/operations/auxiliary.rs @@ -30,15 +30,17 @@ #[cfg(feature = "full")] use grovedb_costs::{ - cost_return_on_error_no_add, storage_cost::key_value_cost::KeyValueStorageCost, CostResult, - CostsExt, OperationCost, + cost_return_on_error, cost_return_on_error_no_add, + storage_cost::key_value_cost::KeyValueStorageCost, CostResult, CostsExt, OperationCost, }; +use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::StorageContext; use grovedb_storage::{Storage, StorageBatch}; +use crate::util::storage_context_optional_tx; #[cfg(feature = "full")] -use crate::{util::meta_storage_context_optional_tx, Error, GroveDb, TransactionArg}; +use crate::{util::meta_storage_context_optional_tx, Element, Error, GroveDb, TransactionArg}; #[cfg(feature = "full")] impl GroveDb { @@ -118,4 +120,50 @@ impl GroveDb { Ok(value).wrap_with_cost(cost) }) } + + // TODO: dumb traversal should not be tolerated + /// Finds keys which are trees for a given subtree recursively. + /// One element means a key of a `merk`, n > 1 elements mean relative path + /// for a deeply nested subtree. + pub fn find_subtrees>( + &self, + path: &SubtreePath, + transaction: TransactionArg, + ) -> CostResult>>, Error> { + let mut cost = OperationCost::default(); + + // TODO: remove conversion to vec; + // However, it's not easy for a reason: + // new keys to enqueue are taken from raw iterator which returns Vec; + // changing that to slice is hard as cursor should be moved for next iteration + // which requires exclusive (&mut) reference, also there is no guarantee that + // slice which points into storage internals will remain valid if raw + // iterator got altered so why that reference should be exclusive; + // + // Update: there are pinned views into RocksDB to return slices of data, perhaps + // there is something for iterators + + let mut queue: Vec>> = vec![path.to_vec()]; + let mut result: Vec>> = queue.clone(); + + while let Some(q) = queue.pop() { + let subtree_path: SubtreePath> = q.as_slice().into(); + // Get the correct subtree with q_ref as path + storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, { + let storage = storage.unwrap_add_cost(&mut cost); + let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost); + while let Some((key, value)) = + cost_return_on_error!(&mut cost, raw_iter.next_element()) + { + if value.is_tree() { + let mut sub_path = q.clone(); + sub_path.push(key.to_vec()); + queue.push(sub_path.clone()); + result.push(sub_path); + } + } + }) + } + Ok(result).wrap_with_cost(cost) + } } diff --git a/grovedb/src/operations/delete/mod.rs b/grovedb/src/operations/delete/mod.rs index a8c1c876..84d14652 100644 --- a/grovedb/src/operations/delete/mod.rs +++ b/grovedb/src/operations/delete/mod.rs @@ -59,7 +59,7 @@ use grovedb_storage::{ #[cfg(feature = "full")] use crate::{ batch::{GroveDbOp, Op}, - util::{storage_context_optional_tx, storage_context_with_parent_optional_tx}, + util::storage_context_with_parent_optional_tx, Element, ElementFlags, Error, GroveDb, Transaction, TransactionArg, }; use crate::{raw_decode, util::merk_optional_tx_path_not_empty}; @@ -879,52 +879,6 @@ impl GroveDb { Ok(true).wrap_with_cost(cost) } - - // TODO: dumb traversal should not be tolerated - /// Finds keys which are trees for a given subtree recursively. - /// One element means a key of a `merk`, n > 1 elements mean relative path - /// for a deeply nested subtree. - pub(crate) fn find_subtrees>( - &self, - path: &SubtreePath, - transaction: TransactionArg, - ) -> CostResult>>, Error> { - let mut cost = OperationCost::default(); - - // TODO: remove conversion to vec; - // However, it's not easy for a reason: - // new keys to enqueue are taken from raw iterator which returns Vec; - // changing that to slice is hard as cursor should be moved for next iteration - // which requires exclusive (&mut) reference, also there is no guarantee that - // slice which points into storage internals will remain valid if raw - // iterator got altered so why that reference should be exclusive; - // - // Update: there are pinned views into RocksDB to return slices of data, perhaps - // there is something for iterators - - let mut queue: Vec>> = vec![path.to_vec()]; - let mut result: Vec>> = queue.clone(); - - while let Some(q) = queue.pop() { - let subtree_path: SubtreePath> = q.as_slice().into(); - // Get the correct subtree with q_ref as path - storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, { - let storage = storage.unwrap_add_cost(&mut cost); - let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost); - while let Some((key, value)) = - cost_return_on_error!(&mut cost, raw_iter.next_element()) - { - if value.is_tree() { - let mut sub_path = q.clone(); - sub_path.push(key.to_vec()); - queue.push(sub_path.clone()); - result.push(sub_path); - } - } - }) - } - Ok(result).wrap_with_cost(cost) - } } #[cfg(feature = "full")] @@ -1029,10 +983,7 @@ mod tests { db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(), Err(Error::PathKeyNotFound(_)) )); - assert!(matches!( - db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(), - Ok(_) - )); + assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok()); } #[test] @@ -1397,10 +1348,7 @@ mod tests { db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(), Err(Error::PathKeyNotFound(_)) )); - assert!(matches!( - db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(), - Ok(_) - )); + assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok()); } #[test] diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index 898f5ff1..0484cfa1 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -1,990 +1,470 @@ -// MIT LICENSE -// -// Copyright (c) 2021 Dash Core Group -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. - -//! Replication - use std::{ - collections::VecDeque, - iter::{empty, once}, + collections::{BTreeMap, BTreeSet}, + fmt, + str::Utf8Error, }; use grovedb_merk::{ - proofs::{Node, Op}, - Merk, TreeFeatureType, + merk::restore::Restorer, + proofs::Op, + tree::{hash::CryptoHash, kv::ValueDefinedCostType, value_hash}, + ChunkProducer, }; use grovedb_path::SubtreePath; -use grovedb_storage::{ - rocksdb_storage::{PrefixedRocksDbImmediateStorageContext, PrefixedRocksDbStorageContext}, - Storage, StorageContext, -}; - -use crate::{Element, Error, GroveDb, Hash, Transaction}; - -const OPS_PER_CHUNK: usize = 128; - -impl GroveDb { - /// Creates a chunk producer to replicate GroveDb. - pub fn chunks(&self) -> SubtreeChunkProducer { - SubtreeChunkProducer::new(self) - } -} - -/// Subtree chunks producer. -pub struct SubtreeChunkProducer<'db> { - grove_db: &'db GroveDb, - cache: Option>, +use grovedb_storage::rocksdb_storage::RocksDbStorage; +#[rustfmt::skip] +use grovedb_storage::rocksdb_storage::storage_context::context_immediate::PrefixedRocksDbImmediateStorageContext; + +use crate::{replication, Error, GroveDb, Transaction, TransactionArg}; + +pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; + +// Struct governing state sync +pub struct StateSyncInfo<'db> { + // Current Chunk restorer + pub restorer: Option>>, + // Set of processed prefixes (Path digests) + pub processed_prefixes: BTreeSet, + // Current processed prefix (Path digest) + pub current_prefix: Option, + // Set of global chunk ids requested to be fetched and pending for processing. For the + // description of global chunk id check fetch_chunk(). + pub pending_chunks: BTreeSet>, + // Number of processed chunks in current prefix (Path digest) + pub num_processed_chunks: usize, } -struct SubtreeChunkProducerCache<'db> { - current_merk_path: Vec>, - current_merk: Merk>, - // This needed to be an `Option` because it requires a reference on Merk but it's within the - // same struct and during struct init a referenced Merk would be moved inside a struct, - // using `Option` this init happens in two steps. - current_chunk_producer: - Option>>, +// Struct containing information about current subtrees found in GroveDB +pub struct SubtreesMetadata { + // Map of Prefix (Path digest) -> (Actual path, Parent Subtree actual_value_hash, Parent + // Subtree elem_value_hash) Note: Parent Subtree actual_value_hash, Parent Subtree + // elem_value_hash are needed when verifying the new constructed subtree after wards. + pub data: BTreeMap>, CryptoHash, CryptoHash)>, } -impl<'db> SubtreeChunkProducer<'db> { - fn new(storage: &'db GroveDb) -> Self { - SubtreeChunkProducer { - grove_db: storage, - cache: None, +impl SubtreesMetadata { + pub fn new() -> SubtreesMetadata { + SubtreesMetadata { + data: BTreeMap::new(), } } +} - /// Chunks in current producer - pub fn chunks_in_current_producer(&self) -> usize { - self.cache - .as_ref() - .and_then(|c| c.current_chunk_producer.as_ref().map(|p| p.len())) - .unwrap_or(0) +impl Default for SubtreesMetadata { + fn default() -> Self { + Self::new() } +} - /// Get chunk - pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator, - { - let path_iter = path.into_iter(); - - if let Some(SubtreeChunkProducerCache { - current_merk_path, .. - }) = &self.cache - { - if !itertools::equal(current_merk_path, path_iter.clone()) { - self.cache = None; - } - } - - if self.cache.is_none() { - let current_merk = self - .grove_db - .open_non_transactional_merk_at_path( - path_iter.clone().collect::>().as_slice().into(), - None, - ) - .unwrap()?; - - if current_merk.root_key().is_none() { - return Ok(Vec::new()); - } - - self.cache = Some(SubtreeChunkProducerCache { - current_merk_path: path_iter.map(|p| p.to_vec()).collect(), - current_merk, - current_chunk_producer: None, - }); - let cache = self.cache.as_mut().expect("exists at this point"); - cache.current_chunk_producer = Some( - grovedb_merk::ChunkProducer::new(&cache.current_merk) - .map_err(|e| Error::CorruptedData(e.to_string()))?, +impl fmt::Debug for SubtreesMetadata { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (prefix, metadata) in self.data.iter() { + let metadata_path = &metadata.0; + let metadata_path_str = util_path_to_string(metadata_path); + writeln!( + f, + " prefix:{:?} -> path:{:?}\n", + hex::encode(prefix), + metadata_path_str ); } - - self.cache - .as_mut() - .expect("must exist at this point") - .current_chunk_producer - .as_mut() - .expect("must exist at this point") - .chunk(index) - .map_err(|e| Error::CorruptedData(e.to_string())) + Ok(()) } } -// TODO: make generic over storage_cost context -type MerkRestorer<'db> = grovedb_merk::Restorer>; - -type Path = Vec>; - -/// Structure to drive GroveDb restore process. -pub struct Restorer<'db> { - current_merk_restorer: Option>, - current_merk_chunk_index: usize, - current_merk_path: Path, - queue: VecDeque<(Path, Vec, Hash, TreeFeatureType)>, - grove_db: &'db GroveDb, - tx: &'db Transaction<'db>, +// Converts a path into a human-readable string (for debugging) +pub fn util_path_to_string(path: &[Vec]) -> Vec { + let mut subtree_path_str: Vec = vec![]; + for subtree in path { + let string = std::str::from_utf8(subtree).expect("should be able to convert path"); + subtree_path_str.push( + string + .parse() + .expect("should be able to parse path to string"), + ); + } + subtree_path_str } -/// Indicates what next piece of information `Restorer` expects or wraps a -/// successful result. -#[derive(Debug)] -pub enum RestorerResponse { - AwaitNextChunk { path: Vec>, index: usize }, - Ready, -} +// Splits the given global chunk id into [SUBTREE_PREFIX:CHUNK_ID] +pub fn util_split_global_chunk_id( + global_chunk_id: &[u8], +) -> Result<(crate::SubtreePrefix, String), Error> { + let chunk_prefix_length: usize = 32; + if global_chunk_id.len() < chunk_prefix_length { + return Err(Error::CorruptedData( + "expected global chunk id of at least 32 length".to_string(), + )); + } -#[derive(Debug)] -pub struct RestorerError(String); - -impl<'db> Restorer<'db> { - /// Create a GroveDb restorer using a backing storage_cost and root hash. - pub fn new( - grove_db: &'db GroveDb, - root_hash: Hash, - tx: &'db Transaction<'db>, - ) -> Result { - Ok(Restorer { - tx, - current_merk_restorer: Some(MerkRestorer::new( - Merk::open_base( - grove_db - .db - .get_immediate_storage_context(SubtreePath::empty(), tx) - .unwrap(), - false, - Some(&Element::value_defined_cost_for_serialized_value), - ) - .unwrap() - .map_err(|e| RestorerError(e.to_string()))?, - None, - root_hash, - )), - current_merk_chunk_index: 0, - current_merk_path: vec![], - queue: VecDeque::new(), - grove_db, - }) + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); + let mut array = [0u8; 32]; + array.copy_from_slice(chunk_prefix); + let chunk_prefix_key: crate::SubtreePrefix = array; + let str_chunk_id = String::from_utf8(chunk_id.to_vec()); + match str_chunk_id { + Ok(s) => Ok((chunk_prefix_key, s)), + Err(_) => Err(Error::CorruptedData( + "unable to convert chunk id to string".to_string(), + )), } +} - /// Process next chunk and receive instruction on what to do next. - pub fn process_chunk( - &mut self, - chunk_ops: impl IntoIterator, - ) -> Result { - if self.current_merk_restorer.is_none() { - // Last restorer was consumed and no more Merks to process. - return Ok(RestorerResponse::Ready); +#[cfg(feature = "full")] +impl GroveDb { + pub fn create_state_sync_info(&self) -> StateSyncInfo { + let pending_chunks = BTreeSet::new(); + let processed_prefixes = BTreeSet::new(); + StateSyncInfo { + restorer: None, + processed_prefixes, + current_prefix: None, + pending_chunks, + num_processed_chunks: 0, } - // First we decode a chunk to take out info about nested trees to add them into - // todo list. - let mut ops = Vec::new(); - for op in chunk_ops { - ops.push(op); - match ops.last().expect("just inserted") { - Op::Push(Node::KVValueHashFeatureType( - key, - value_bytes, - value_hash, - feature_type, - )) - | Op::PushInverted(Node::KVValueHashFeatureType( - key, - value_bytes, - value_hash, - feature_type, - )) => { - if let Element::Tree(root_key, _) | Element::SumTree(root_key, ..) = - Element::deserialize(value_bytes) - .map_err(|e| RestorerError(e.to_string()))? - { - if root_key.is_none() || self.current_merk_path.last() == Some(key) { - // We add only subtrees of the current subtree to queue, skipping - // itself; Also skipping empty Merks. - continue; + } + + // Returns the discovered subtrees found recursively along with their associated + // metadata Params: + // tx: Transaction. Function returns the data by opening merks at given tx. + // TODO: Add a SubTreePath as param and start searching from that path instead + // of root (as it is now) + pub fn get_subtrees_metadata(&self, tx: TransactionArg) -> Result { + let mut subtrees_metadata = crate::replication::SubtreesMetadata::new(); + + let subtrees_root = self.find_subtrees(&SubtreePath::empty(), tx).value?; + for subtree in subtrees_root.into_iter() { + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); + + let current_path = SubtreePath::from(path); + + match (current_path.derive_parent(), subtree.last()) { + (Some((parent_path, _)), Some(parent_key)) => match tx { + None => { + let parent_merk = self + .open_non_transactional_merk_at_path(parent_path, None) + .value?; + if let Ok(Some((elem_value, elem_value_hash))) = parent_merk + .get_value_and_value_hash( + parent_key, + true, + None::<&fn(&[u8]) -> Option>, + ) + .value + { + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert( + prefix, + (current_path.to_vec(), actual_value_hash, elem_value_hash), + ); + } + } + Some(t) => { + let parent_merk = self + .open_transactional_merk_at_path(parent_path, t, None) + .value?; + if let Ok(Some((elem_value, elem_value_hash))) = parent_merk + .get_value_and_value_hash( + parent_key, + true, + None::<&fn(&[u8]) -> Option>, + ) + .value + { + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert( + prefix, + (current_path.to_vec(), actual_value_hash, elem_value_hash), + ); } - let mut path = self.current_merk_path.clone(); - path.push(key.clone()); - // The value hash is the root tree hash - self.queue.push_back(( - path, - value_bytes.to_owned(), - *value_hash, - *feature_type, - )); } + }, + _ => { + subtrees_metadata.data.insert( + prefix, + ( + current_path.to_vec(), + CryptoHash::default(), + CryptoHash::default(), + ), + ); } - _ => {} } } - - // Process chunk using Merk's possibilities. - let remaining = self - .current_merk_restorer - .as_mut() - .expect("restorer exists at this point") - .process_chunk(ops) - .map_err(|e| RestorerError(e.to_string()))?; - - self.current_merk_chunk_index += 1; - - if remaining == 0 { - // If no more chunks for this Merk required decide if we're done or take a next - // Merk to process. - self.current_merk_restorer - .take() - .expect("restorer exists at this point") - .finalize() - .map_err(|e| RestorerError(e.to_string()))?; - if let Some((next_path, combining_value, expected_hash, _)) = self.queue.pop_front() { - // Process next subtree. - let merk = self - .grove_db - .open_merk_for_replication(next_path.as_slice().into(), self.tx) - .map_err(|e| RestorerError(e.to_string()))?; - self.current_merk_restorer = Some(MerkRestorer::new( - merk, - Some(combining_value), - expected_hash, - )); - self.current_merk_chunk_index = 0; - self.current_merk_path = next_path; - - Ok(RestorerResponse::AwaitNextChunk { - path: self.current_merk_path.clone(), - index: self.current_merk_chunk_index, - }) - } else { - Ok(RestorerResponse::Ready) - } - } else { - // Request a chunk at the same path but with incremented index. - Ok(RestorerResponse::AwaitNextChunk { - path: self.current_merk_path.clone(), - index: self.current_merk_chunk_index, - }) - } - } -} - -/// Chunk producer wrapper which uses bigger messages that may include chunks of -/// requested subtree with its right siblings. -/// -/// Because `Restorer` builds GroveDb replica breadth-first way from top to -/// bottom it makes sense to send a subtree's siblings next instead of its own -/// subtrees. -pub struct SiblingsChunkProducer<'db> { - chunk_producer: SubtreeChunkProducer<'db>, -} - -#[derive(Debug)] -pub struct GroveChunk { - subtree_chunks: Vec<(usize, Vec)>, -} - -impl<'db> SiblingsChunkProducer<'db> { - /// New - pub fn new(chunk_producer: SubtreeChunkProducer<'db>) -> Self { - SiblingsChunkProducer { chunk_producer } + Ok(subtrees_metadata) } - /// Get a collection of chunks possibly from different Merks with the first - /// one as requested. - pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator + ExactSizeIterator, - { - let path_iter = path.into_iter(); - let mut result = Vec::new(); - let mut ops_count = 0; - - if path_iter.len() == 0 { - // We're at the root of GroveDb, no siblings here. - self.process_subtree_chunks(&mut result, &mut ops_count, empty(), index)?; - return Ok(result); - }; - - // Get siblings on the right to send chunks of multiple Merks if it meets the - // limit. - - let mut siblings_keys: VecDeque> = VecDeque::new(); - - let mut parent_path = path_iter; - let requested_key = parent_path.next_back(); - - let parent_ctx = self - .chunk_producer - .grove_db - .db - .get_storage_context( - parent_path.clone().collect::>().as_slice().into(), - None, - ) - .unwrap(); - let mut siblings_iter = Element::iterator(parent_ctx.raw_iter()).unwrap(); - - if let Some(key) = requested_key { - siblings_iter.fast_forward(key)?; - } - - while let Some(element) = siblings_iter.next_element().unwrap()? { - if let (key, Element::Tree(..)) | (key, Element::SumTree(..)) = element { - siblings_keys.push_back(key); - } - } - - let mut current_index = index; - // Process each subtree - while let Some(subtree_key) = siblings_keys.pop_front() { - #[allow(clippy::map_identity)] - let subtree_path = parent_path - .clone() - .map(|x| x) - .chain(once(subtree_key.as_slice())); - - self.process_subtree_chunks(&mut result, &mut ops_count, subtree_path, current_index)?; - // Going to a next sibling, should start from 0. - - if ops_count >= OPS_PER_CHUNK { - break; - } - current_index = 0; + // Fetch a chunk by global chunk id (should be called by ABCI when + // LoadSnapshotChunk method is called) Params: + // global_chunk_id: Global chunk id in the following format: + // [SUBTREE_PREFIX:CHUNK_ID] SUBTREE_PREFIX: 32 bytes (mandatory) (All zeros + // = Root subtree) CHUNK_ID: 0.. bytes (optional) Traversal instructions to + // the root of the given chunk. Traversal instructions are "1" for left, and + // "0" for right. TODO: Compact CHUNK_ID into bitset for size optimization + // as a subtree can be big hence traversal instructions for the deepest chunks + // tx: Transaction. Function returns the data by opening merks at given tx. + // Returns the Chunk proof operators for the requested chunk + pub fn fetch_chunk( + &self, + global_chunk_id: &[u8], + tx: TransactionArg, + ) -> Result, Error> { + let chunk_prefix_length: usize = 32; + if global_chunk_id.len() < chunk_prefix_length { + return Err(Error::CorruptedData( + "expected global chunk id of at least 32 length".to_string(), + )); } - Ok(result) - } + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); - /// Process one subtree's chunks - fn process_subtree_chunks<'p, P>( - &mut self, - result: &mut Vec, - ops_count: &mut usize, - subtree_path: P, - from_index: usize, - ) -> Result<(), Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator, - { - let path_iter = subtree_path.into_iter(); - - let mut current_index = from_index; - let mut subtree_chunks = Vec::new(); - - loop { - let ops = self - .chunk_producer - .get_chunk(path_iter.clone(), current_index)?; - - *ops_count += ops.len(); - subtree_chunks.push((current_index, ops)); - current_index += 1; - if current_index >= self.chunk_producer.chunks_in_current_producer() - || *ops_count >= OPS_PER_CHUNK - { - break; - } - } + let mut array = [0u8; 32]; + array.copy_from_slice(chunk_prefix); + let chunk_prefix_key: crate::SubtreePrefix = array; - result.push(GroveChunk { subtree_chunks }); + let subtrees_metadata = self.get_subtrees_metadata(tx)?; - Ok(()) - } -} + match subtrees_metadata.data.get(&chunk_prefix_key) { + Some(path_data) => { + let subtree = &path_data.0; + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; -/// `Restorer` wrapper that applies multiple chunks at once and eventually -/// returns less requests. It is named by analogy with IO types that do less -/// syscalls. -pub struct BufferedRestorer<'db> { - restorer: Restorer<'db>, -} + match tx { + None => { + let merk = self + .open_non_transactional_merk_at_path(path.into(), None) + .value?; -impl<'db> BufferedRestorer<'db> { - /// New - pub fn new(restorer: Restorer<'db>) -> Self { - BufferedRestorer { restorer } - } + if merk.is_empty_tree().unwrap() { + return Ok(vec![]); + } - /// Process next chunk and receive instruction on what to do next. - pub fn process_grove_chunks(&mut self, chunks: I) -> Result - where - I: IntoIterator + ExactSizeIterator, - { - let mut response = RestorerResponse::Ready; - - for c in chunks.into_iter() { - for ops in c.subtree_chunks.into_iter().map(|x| x.1) { - if !ops.is_empty() { - response = self.restorer.process_chunk(ops)?; - } - } - } + let chunk_producer_res = ChunkProducer::new(&merk); + match chunk_producer_res { + Ok(mut chunk_producer) => match std::str::from_utf8(chunk_id) { + Ok(chunk_id_str) => { + let chunk_res = chunk_producer.chunk(chunk_id_str); + match chunk_res { + Ok((chunk, _)) => Ok(chunk), + Err(_) => Err(Error::CorruptedData( + "Unable to create to load chunk".to_string(), + )), + } + } + Err(_) => Err(Error::CorruptedData( + "Unable to process chunk id".to_string(), + )), + }, + Err(_) => Err(Error::CorruptedData( + "Unable to create Chunk producer".to_string(), + )), + } + } + Some(t) => { + let merk = self + .open_transactional_merk_at_path(path.into(), &t, None) + .value?; - Ok(response) - } -} + if merk.is_empty_tree().unwrap() { + return Ok(vec![]); + } -#[cfg(test)] -mod test { - use rand::RngCore; - use tempfile::TempDir; - - use super::*; - use crate::{ - batch::GroveDbOp, - reference_path::ReferencePathType, - tests::{common::EMPTY_PATH, make_test_grovedb, TempGroveDb, ANOTHER_TEST_LEAF, TEST_LEAF}, - }; - - fn replicate(original_db: &GroveDb) -> TempDir { - let replica_tempdir = TempDir::new().unwrap(); - - { - let replica_db = GroveDb::open(replica_tempdir.path()).unwrap(); - let mut chunk_producer = original_db.chunks(); - let tx = replica_db.start_transaction(); - - let mut restorer = Restorer::new( - &replica_db, - original_db.root_hash(None).unwrap().unwrap(), - &tx, - ) - .expect("cannot create restorer"); - - // That means root tree chunk with index 0 - let mut next_chunk: (Vec>, usize) = (vec![], 0); - - loop { - let chunk = chunk_producer - .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) - .expect("cannot get next chunk"); - match restorer.process_chunk(chunk).expect("cannot process chunk") { - RestorerResponse::Ready => break, - RestorerResponse::AwaitNextChunk { path, index } => { - next_chunk = (path, index); + let chunk_producer_res = ChunkProducer::new(&merk); + match chunk_producer_res { + Ok(mut chunk_producer) => match std::str::from_utf8(chunk_id) { + Ok(chunk_id_str) => { + let chunk_res = chunk_producer.chunk(chunk_id_str); + match chunk_res { + Ok((chunk, _)) => Ok(chunk), + Err(_) => Err(Error::CorruptedData( + "Unable to create to load chunk".to_string(), + )), + } + } + Err(_) => Err(Error::CorruptedData( + "Unable to process chunk id".to_string(), + )), + }, + Err(_) => Err(Error::CorruptedData( + "Unable to create Chunk producer".to_string(), + )), + } } } } - - replica_db.commit_transaction(tx).unwrap().unwrap(); + None => Err(Error::CorruptedData("Prefix not found".to_string())), } - replica_tempdir } - fn replicate_bigger_messages(original_db: &GroveDb) -> TempDir { - let replica_tempdir = TempDir::new().unwrap(); - - { - let replica_grove_db = GroveDb::open(replica_tempdir.path()).unwrap(); - let mut chunk_producer = SiblingsChunkProducer::new(original_db.chunks()); - let tx = replica_grove_db.start_transaction(); - - let mut restorer = BufferedRestorer::new( - Restorer::new( - &replica_grove_db, - original_db.root_hash(None).unwrap().unwrap(), - &tx, - ) - .expect("cannot create restorer"), - ); - - // That means root tree chunk with index 0 - let mut next_chunk: (Vec>, usize) = (vec![], 0); - - loop { - let chunks = chunk_producer - .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) - .expect("cannot get next chunk"); - match restorer - .process_grove_chunks(chunks.into_iter()) - .expect("cannot process chunk") + // Starts a state sync process (should be called by ABCI when OfferSnapshot + // method is called) Params: + // state_sync_info: Consumed StateSyncInfo + // app_hash: Snapshot's AppHash + // tx: Transaction for the state sync + // Returns the first set of global chunk ids that can be fetched from sources (+ + // the StateSyncInfo transferring ownership back to the caller) + pub fn start_snapshot_syncing<'db>( + &'db self, + mut state_sync_info: StateSyncInfo<'db>, + app_hash: CryptoHash, + tx: &'db Transaction, + ) -> Result<(Vec>, StateSyncInfo), Error> { + let mut res = vec![]; + + match ( + &mut state_sync_info.restorer, + &state_sync_info.current_prefix, + ) { + (None, None) => { + if state_sync_info.pending_chunks.is_empty() + && state_sync_info.processed_prefixes.is_empty() { - RestorerResponse::Ready => break, - RestorerResponse::AwaitNextChunk { path, index } => { - next_chunk = (path, index); + let root_prefix = [0u8; 32]; + if let Ok(merk) = self.open_merk_for_replication(SubtreePath::empty(), tx) { + let restorer = Restorer::new(merk, app_hash, None); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(root_prefix); + state_sync_info.pending_chunks.insert(root_prefix.to_vec()); + + res.push(root_prefix.to_vec()); + } else { + return Err(Error::InternalError("Unable to open merk for replication")); } + } else { + return Err(Error::InternalError("Invalid internal state sync info")); } } - - replica_grove_db.commit_transaction(tx).unwrap().unwrap(); - } - - replica_tempdir - } - - fn test_replication_internal<'a, I, R, F>( - original_db: &TempGroveDb, - to_compare: I, - replicate_fn: F, - ) where - R: AsRef<[u8]> + 'a, - I: Iterator, - F: Fn(&GroveDb) -> TempDir, - { - let expected_root_hash = original_db.root_hash(None).unwrap().unwrap(); - - let replica_tempdir = replicate_fn(original_db); - - let replica = GroveDb::open(replica_tempdir.path()).unwrap(); - assert_eq!( - replica.root_hash(None).unwrap().unwrap(), - expected_root_hash - ); - - for full_path in to_compare { - let (key, path) = full_path.split_last().unwrap(); - assert_eq!( - original_db.get(path, key.as_ref(), None).unwrap().unwrap(), - replica.get(path, key.as_ref(), None).unwrap().unwrap() - ); - } - } - - fn test_replication<'a, I, R>(original_db: &TempGroveDb, to_compare: I) - where - R: AsRef<[u8]> + 'a, - I: Iterator + Clone, - { - test_replication_internal(original_db, to_compare.clone(), replicate); - test_replication_internal(original_db, to_compare, replicate_bigger_messages); - } - - #[test] - fn replicate_wrong_root_hash() { - let db = make_test_grovedb(); - let mut bad_hash = db.root_hash(None).unwrap().unwrap(); - bad_hash[0] = bad_hash[0].wrapping_add(1); - - let tmp_dir = TempDir::new().unwrap(); - let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); - let tx = restored_db.start_transaction(); - let mut restorer = Restorer::new(&restored_db, bad_hash, &tx).unwrap(); - let mut chunks = db.chunks(); - assert!(restorer - .process_chunk(chunks.get_chunk([], 0).unwrap()) - .is_err()); - } - - #[test] - fn replicate_provide_wrong_tree() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key1", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let expected_hash = db.root_hash(None).unwrap().unwrap(); - - let tmp_dir = TempDir::new().unwrap(); - let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); - let tx = restored_db.start_transaction(); - let mut restorer = Restorer::new(&restored_db, expected_hash, &tx).unwrap(); - let mut chunks = db.chunks(); - - let next_op = restorer - .process_chunk(chunks.get_chunk([], 0).unwrap()) - .unwrap(); - match next_op { - RestorerResponse::AwaitNextChunk { path, index } => { - // Feed restorer a wrong Merk! - let chunk = if path == [TEST_LEAF] { - chunks.get_chunk([ANOTHER_TEST_LEAF], index).unwrap() - } else { - chunks.get_chunk([TEST_LEAF], index).unwrap() - }; - assert!(restorer.process_chunk(chunk).is_err()); + _ => { + return Err(Error::InternalError( + "GroveDB has already started a snapshot syncing", + )); } - _ => {} } - } - - #[test] - fn replicate_nested_grovedb() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF], - b"key2", - Element::new_reference(ReferencePathType::SiblingReference(b"key1".to_vec())), - None, - None, - ) - .unwrap() - .expect("should insert reference"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"key3", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2", b"key3"], - b"key4", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - #[test] - fn replicate_nested_grovedb_with_sum_trees() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF], - b"key2", - Element::new_reference(ReferencePathType::SiblingReference(b"key1".to_vec())), - None, - None, - ) - .unwrap() - .expect("should insert reference"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::empty_sum_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"sumitem", - Element::new_sum_item(15), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"key3", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2", b"key3"], - b"key4", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"sumitem"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); + Ok((res, state_sync_info)) } - // TODO: Highlights a bug in replication - #[test] - fn replicate_grovedb_with_sum_tree() { - let db = make_test_grovedb(); - db.insert(&[TEST_LEAF], b"key1", Element::empty_tree(), None, None) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF, b"key1"], - b"key2", - Element::new_item(vec![4]), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF, b"key1"], - b"key3", - Element::new_item(vec![10]), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key1", b"key2"].as_ref(), - [TEST_LEAF, b"key1", b"key3"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - - #[test] - fn replicate_a_big_one() { - const HEIGHT: usize = 3; - const SUBTREES_FOR_EACH: usize = 3; - const SCALARS_FOR_EACH: usize = 600; - - let db = make_test_grovedb(); - let mut to_compare = Vec::new(); - - let mut rng = rand::thread_rng(); - let mut subtrees: VecDeque> = VecDeque::new(); - - // Generate root tree leafs - for _ in 0..SUBTREES_FOR_EACH { - let mut bytes = [0; 8]; - rng.fill_bytes(&mut bytes); - db.insert(EMPTY_PATH, &bytes, Element::empty_tree(), None, None) - .unwrap() - .unwrap(); - subtrees.push_front(vec![bytes]); - to_compare.push(vec![bytes]); - } - - while let Some(path) = subtrees.pop_front() { - let mut batch = Vec::new(); - - if path.len() < HEIGHT { - for _ in 0..SUBTREES_FOR_EACH { - let mut bytes = [0; 8]; - rng.fill_bytes(&mut bytes); - - batch.push(GroveDbOp::insert_op( - path.iter().map(|x| x.to_vec()).collect(), - bytes.to_vec(), - Element::empty_tree(), + // Apply a chunk (should be called by ABCI when ApplySnapshotChunk method is + // called) Params: + // state_sync_info: Consumed StateSyncInfo + // chunk: (Global chunk id, Chunk proof operators) + // tx: Transaction for the state sync + // Returns the next set of global chunk ids that can be fetched from sources (+ + // the StateSyncInfo transferring ownership back to the caller) + pub fn apply_chunk<'db>( + &'db self, + mut state_sync_info: StateSyncInfo<'db>, + chunk: (&[u8], Vec), + tx: &'db Transaction, + ) -> Result<(Vec>, StateSyncInfo), Error> { + let mut res = vec![]; + + let (global_chunk_id, chunk_data) = chunk; + let (chunk_prefix, chunk_id) = replication::util_split_global_chunk_id(global_chunk_id)?; + + match ( + &mut state_sync_info.restorer, + &state_sync_info.current_prefix, + ) { + (Some(restorer), Some(ref current_prefix)) => { + if *current_prefix != chunk_prefix { + return Err(Error::InternalError("Invalid incoming prefix")); + } + if !state_sync_info.pending_chunks.contains(global_chunk_id) { + return Err(Error::InternalError( + "Incoming global_chunk_id not expected", )); - - let mut new_path = path.clone(); - new_path.push(bytes); - subtrees.push_front(new_path.clone()); - to_compare.push(new_path.clone()); + } + state_sync_info.pending_chunks.remove(global_chunk_id); + if !chunk_data.is_empty() { + match restorer.process_chunk(chunk_id.to_string(), chunk_data) { + Ok(next_chunk_ids) => { + state_sync_info.num_processed_chunks += 1; + for next_chunk_id in next_chunk_ids { + let mut next_global_chunk_id = chunk_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); + state_sync_info + .pending_chunks + .insert(next_global_chunk_id.clone()); + res.push(next_global_chunk_id); + } + } + _ => { + return Err(Error::InternalError("Unable to process incoming chunk")); + } + }; } } - - for _ in 0..SCALARS_FOR_EACH { - let mut bytes = [0; 8]; - let mut bytes_val = vec![]; - rng.fill_bytes(&mut bytes); - rng.fill_bytes(&mut bytes_val); - - batch.push(GroveDbOp::insert_op( - path.iter().map(|x| x.to_vec()).collect(), - bytes.to_vec(), - Element::new_item(bytes_val), - )); - - let mut new_path = path.clone(); - new_path.push(bytes); - to_compare.push(new_path.clone()); + _ => { + return Err(Error::InternalError("GroveDB is not in syncing mode")); } - - db.apply_batch(batch, None, None).unwrap().unwrap(); } - test_replication(&db, to_compare.iter().map(|x| x.as_slice())); - } - - #[test] - fn replicate_from_checkpoint() { - // Create a simple GroveDb first - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - - // Save its state with checkpoint - let checkpoint_dir_parent = TempDir::new().unwrap(); - let checkpoint_dir = checkpoint_dir_parent.path().join("cp"); - db.create_checkpoint(&checkpoint_dir).unwrap(); - - // Alter the db to make difference between current state and checkpoint - db.delete(&[TEST_LEAF], b"key1", None, None) - .unwrap() - .unwrap(); - db.insert( - &[TEST_LEAF], - b"key3", - Element::new_item(b"ayyd".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::new_item(b"ayyc".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - - let checkpoint_db = GroveDb::open(&checkpoint_dir).unwrap(); - - // Ensure checkpoint differs from current state - assert_ne!( - checkpoint_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - db.get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - ); - - // Build a replica from checkpoint - let replica_dir = replicate(&checkpoint_db); - let replica_db = GroveDb::open(&replica_dir).unwrap(); - - assert_eq!( - checkpoint_db.root_hash(None).unwrap().unwrap(), - replica_db.root_hash(None).unwrap().unwrap() - ); + if res.is_empty() { + if !state_sync_info.pending_chunks.is_empty() { + return Ok((res, state_sync_info)); + } + match ( + state_sync_info.restorer.take(), + state_sync_info.current_prefix.take(), + ) { + (Some(restorer), Some(current_prefix)) => { + if (state_sync_info.num_processed_chunks > 0) && (restorer.finalize().is_err()) + { + return Err(Error::InternalError("Unable to finalize merk")); + } + state_sync_info.processed_prefixes.insert(current_prefix); + + let subtrees_metadata = self.get_subtrees_metadata(Some(tx))?; + if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { + println!( + " path:{:?} done", + replication::util_path_to_string(&value.0) + ); + } - assert_eq!( - checkpoint_db - .get(&[TEST_LEAF], b"key1", None) - .unwrap() - .unwrap(), - replica_db - .get(&[TEST_LEAF], b"key1", None) - .unwrap() - .unwrap(), - ); - assert_eq!( - checkpoint_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - replica_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - ); - assert!(matches!( - replica_db.get(&[TEST_LEAF], b"key3", None).unwrap(), - Err(Error::PathKeyNotFound(_)) - )); + for (prefix, prefix_metadata) in &subtrees_metadata.data { + if !state_sync_info.processed_prefixes.contains(prefix) { + let (current_path, s_actual_value_hash, s_elem_value_hash) = + &prefix_metadata; + + let subtree_path: Vec<&[u8]> = + current_path.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + if let Ok(merk) = self.open_merk_for_replication(path.into(), tx) { + let restorer = Restorer::new( + merk, + *s_elem_value_hash, + Some(*s_actual_value_hash), + ); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(*prefix); + state_sync_info.num_processed_chunks = 0; + + let root_chunk_prefix = prefix.to_vec(); + state_sync_info + .pending_chunks + .insert(root_chunk_prefix.clone()); + res.push(root_chunk_prefix); + } else { + return Err(Error::InternalError( + "Unable to open merk for replication", + )); + } + break; + } + } + } + _ => { + return Err(Error::InternalError("Unable to finalize tree")); + } + } + } - // Drop original db and checkpoint dir too to ensure there is no dependency - drop(db); - drop(checkpoint_db); - drop(checkpoint_dir); - - assert_eq!( - replica_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - Element::new_item(b"ayyb".to_vec()) - ); + Ok((res, state_sync_info)) } } diff --git a/grovedb/src/tests/mod.rs b/grovedb/src/tests/mod.rs index 09a38e6b..95e0d2b1 100644 --- a/grovedb/src/tests/mod.rs +++ b/grovedb/src/tests/mod.rs @@ -465,7 +465,7 @@ fn test_element_with_flags() { let db = make_test_grovedb(); db.insert( - [TEST_LEAF.as_ref()].as_ref(), + [TEST_LEAF].as_ref(), b"key1", Element::empty_tree(), None, @@ -2812,7 +2812,7 @@ fn test_root_hash() { #[test] fn test_get_non_existing_root_leaf() { let db = make_test_grovedb(); - assert!(matches!(db.get(EMPTY_PATH, b"ayy", None).unwrap(), Err(_))); + assert!(db.get(EMPTY_PATH, b"ayy", None).unwrap().is_err()); } #[test] @@ -2839,7 +2839,7 @@ fn test_check_subtree_exists_function() { // Empty tree path means root always exist assert!(db - .check_subtree_exists_invalid_path(EMPTY_PATH.into(), None) + .check_subtree_exists_invalid_path(EMPTY_PATH, None) .unwrap() .is_ok()); @@ -2952,17 +2952,14 @@ fn test_storage_wipe() { .expect("cannot insert item"); // retrieve key before wipe - let elem = db - .get(&[TEST_LEAF.as_ref()], b"key", None) - .unwrap() - .unwrap(); + let elem = db.get(&[TEST_LEAF], b"key", None).unwrap().unwrap(); assert_eq!(elem, Element::new_item(b"ayy".to_vec())); // wipe the database db.grove_db.wipe().unwrap(); // retrieve key after wipe - let elem_result = db.get(&[TEST_LEAF.as_ref()], b"key", None).unwrap(); + let elem_result = db.get(&[TEST_LEAF], b"key", None).unwrap(); assert!(elem_result.is_err()); assert!(matches!( elem_result, diff --git a/grovedb/src/tests/query_tests.rs b/grovedb/src/tests/query_tests.rs index 304042bd..579b2e42 100644 --- a/grovedb/src/tests/query_tests.rs +++ b/grovedb/src/tests/query_tests.rs @@ -46,7 +46,7 @@ use crate::{ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -70,7 +70,7 @@ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { for j in 100u32..150 { let mut j_vec = i_vec.clone(); - j_vec.append(&mut (j as u32).to_be_bytes().to_vec()); + j_vec.append(&mut j.to_be_bytes().to_vec()); db.insert( [TEST_LEAF, i_vec.as_slice(), b"\0"].as_ref(), &j_vec.clone(), @@ -87,7 +87,7 @@ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 0u32..10 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -110,7 +110,7 @@ fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for j in 25u32..50 { - let j_vec = (j as u32).to_be_bytes().to_vec(); + let j_vec = j.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, i_vec.as_slice(), b"a"].as_ref(), &j_vec, @@ -134,7 +134,7 @@ fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for k in 100u32..110 { - let k_vec = (k as u32).to_be_bytes().to_vec(); + let k_vec = k.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, i_vec.as_slice(), b"a", &j_vec, b"\0"].as_ref(), &k_vec.clone(), @@ -173,7 +173,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, b"1"].as_ref(), &i_vec, @@ -198,7 +198,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { for j in 100u32..150 { let random_key = rand::thread_rng().gen::<[u8; 32]>(); let mut j_vec = i_vec.clone(); - j_vec.append(&mut (j as u32).to_be_bytes().to_vec()); + j_vec.append(&mut j.to_be_bytes().to_vec()); // We should insert every item to the tree holding items db.insert( @@ -231,7 +231,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { fn populate_tree_for_unique_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -278,7 +278,7 @@ fn populate_tree_by_reference_for_unique_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, b"1"].as_ref(), &i_vec, @@ -333,7 +333,7 @@ fn populate_tree_for_unique_range_subquery_with_non_unique_null_values(db: &mut .expect("successful subtree insert"); // Insert a couple of subtrees first for i in 100u32..200 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, &[], b"\0"].as_ref(), &i_vec, diff --git a/grovedb/src/versioning.rs b/grovedb/src/versioning.rs index a041b3d8..5a724afc 100644 --- a/grovedb/src/versioning.rs +++ b/grovedb/src/versioning.rs @@ -52,7 +52,7 @@ mod tests { assert_eq!(new_data, [244, 3, 1, 2, 3]); // show that read_version doesn't consume - assert_eq!(read_proof_version(&mut new_data.as_slice()).unwrap(), 500); + assert_eq!(read_proof_version(new_data.as_slice()).unwrap(), 500); assert_eq!(new_data, [244, 3, 1, 2, 3]); // show that we consume the version number and return the remaining vector diff --git a/merk/src/error.rs b/merk/src/error.rs index 405fdeb1..83fb3bde 100644 --- a/merk/src/error.rs +++ b/merk/src/error.rs @@ -27,6 +27,8 @@ // DEALINGS IN THE SOFTWARE. //! Errors +#[cfg(feature = "full")] +use crate::proofs::chunk::error::ChunkError; #[cfg(any(feature = "full", feature = "verify"))] #[derive(Debug, thiserror::Error)] @@ -57,13 +59,29 @@ pub enum Error { #[error("corrupted code execution error {0}")] CorruptedCodeExecution(&'static str), + /// Corrupted state + #[error("corrupted state: {0}")] + CorruptedState(&'static str), + /// Chunking error + #[cfg(feature = "full")] #[error("chunking error {0}")] - ChunkingError(&'static str), + ChunkingError(ChunkError), + + // TODO: remove + /// Old chunking error + #[error("chunking error {0}")] + OldChunkingError(&'static str), /// Chunk restoring error + #[cfg(feature = "full")] #[error("chunk restoring error {0}")] - ChunkRestoringError(String), + ChunkRestoringError(ChunkError), + + // TODO: remove + /// Chunk restoring error + #[error("chunk restoring error {0}")] + OldChunkRestoringError(String), /// Key not found error #[error("key not found error {0}")] @@ -97,6 +115,10 @@ pub enum Error { #[error("invalid operation error {0}")] InvalidOperation(&'static str), + /// Internal error + #[error("internal error {0}")] + InternalError(&'static str), + /// Specialized costs error #[error("specialized costs error {0}")] SpecializedCostsError(&'static str), diff --git a/merk/src/lib.rs b/merk/src/lib.rs index caf3837c..18255b27 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -35,7 +35,7 @@ extern crate core; /// The top-level store API. #[cfg(feature = "full")] -mod merk; +pub mod merk; #[cfg(feature = "full")] pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions, restore::Restorer}; diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 4f6564ef..8f840f91 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -26,479 +26,1039 @@ // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. -//! Provides `ChunkProducer`, which creates chunk proofs for full replication of -//! a Merk. +use std::collections::VecDeque; -#[cfg(feature = "full")] -use grovedb_costs::CostsExt; -#[cfg(feature = "full")] -use grovedb_storage::{RawIterator, StorageContext}; +use ed::Encode; +use grovedb_storage::StorageContext; -#[cfg(feature = "full")] -use super::Merk; -#[cfg(feature = "full")] use crate::{ error::Error, - proofs::{chunk::get_next_chunk, Node, Op}, + proofs::{ + chunk::{ + chunk_op::ChunkOp, + error::ChunkError, + util::{ + chunk_height, chunk_id_from_traversal_instruction, + chunk_id_from_traversal_instruction_with_recovery, generate_traversal_instruction, + generate_traversal_instruction_as_string, number_of_chunks, + string_as_traversal_instruction, + }, + }, + Node, Op, + }, + Error::ChunkingError, + Merk, }; -#[cfg(feature = "full")] +/// ChunkProof for replication of a single subtree +#[derive(Debug)] +pub struct SubtreeChunk { + chunk: Vec, + next_index: Option, + remaining_limit: Option, +} + +impl SubtreeChunk { + pub fn new(chunk: Vec, next_index: Option, remaining_limit: Option) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + +/// ChunkProof for the replication of multiple subtrees. +#[derive(Debug)] +pub struct MultiChunk { + pub chunk: Vec, + pub next_index: Option, + pub remaining_limit: Option, +} + +impl MultiChunk { + pub fn new( + chunk: Vec, + next_index: Option, + remaining_limit: Option, + ) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + /// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly /// replicating entire Merk trees. Chunks can be generated on the fly in a /// random order, or iterated in order for slightly better performance. -pub struct ChunkProducer<'db, S: StorageContext<'db>> { - trunk: Vec, - chunk_boundaries: Vec>, - raw_iter: S::RawIterator, +pub struct ChunkProducer<'db, S> { + /// Represents the max height of the Merk tree + height: usize, + /// Represents the index of the next chunk index: usize, + merk: &'db Merk, } -#[cfg(feature = "full")] impl<'db, S> ChunkProducer<'db, S> where S: StorageContext<'db>, { - /// Creates a new `ChunkProducer` for the given `Merk` instance. In the - /// constructor, the first chunk (the "trunk") will be created. - pub fn new(merk: &Merk) -> Result { - let (trunk, has_more) = merk - .walk(|maybe_walker| match maybe_walker { - Some(mut walker) => walker.create_trunk_proof(), - None => Ok((vec![], false)).wrap_with_cost(Default::default()), - }) - .unwrap()?; - - let chunk_boundaries = if has_more { - trunk - .iter() - .filter_map(|op| match op { - Op::Push(Node::KVValueHashFeatureType(key, ..)) => Some(key.clone()), - _ => None, - }) - .collect() - } else { - vec![] - }; - - let mut raw_iter = merk.storage.raw_iter(); - raw_iter.seek_to_first().unwrap(); - - Ok(ChunkProducer { - trunk, - chunk_boundaries, - raw_iter, - index: 0, + /// Creates a new `ChunkProducer` for the given `Merk` instance + pub fn new(merk: &'db Merk) -> Result { + let tree_height = merk + .height() + .ok_or(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + )))?; + Ok(Self { + height: tree_height as usize, + index: 1, + merk, }) } /// Gets the chunk with the given index. Errors if the index is out of /// bounds or the tree is empty - the number of chunks can be checked by /// calling `producer.len()`. - pub fn chunk(&mut self, index: usize) -> Result, Error> { - if index >= self.len() { - return Err(Error::ChunkingError("Chunk index out-of-bounds")); + pub fn chunk_with_index( + &mut self, + chunk_index: usize, + ) -> Result<(Vec, Option), Error> { + let traversal_instructions = generate_traversal_instruction(self.height, chunk_index)?; + self.chunk_internal(chunk_index, traversal_instructions) + } + + /// Returns the chunk at a given chunk id. + pub fn chunk(&mut self, chunk_id: &str) -> Result<(Vec, Option), Error> { + let traversal_instructions = string_as_traversal_instruction(chunk_id)?; + let chunk_index = chunk_id_from_traversal_instruction_with_recovery( + traversal_instructions.as_slice(), + self.height, + )?; + let (chunk, next_index) = self.chunk_internal(chunk_index, traversal_instructions)?; + let index_string = next_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose()?; + Ok((chunk, index_string)) + } + + /// Returns the chunk at the given index + /// Assumes index and traversal_instructions represents the same information + fn chunk_internal( + &mut self, + index: usize, + traversal_instructions: Vec, + ) -> Result<(Vec, Option), Error> { + // ensure that the chunk index is within bounds + let max_chunk_index = self.len(); + if index < 1 || index > max_chunk_index { + return Err(ChunkingError(ChunkError::OutOfBounds( + "chunk index out of bounds", + ))); } - self.index = index; + self.index = index + 1; + + let chunk_height = chunk_height(self.height, index).unwrap(); - if index == 0 || index == 1 { - self.raw_iter.seek_to_first().unwrap(); + let chunk = self.merk.walk(|maybe_walker| match maybe_walker { + Some(mut walker) => { + walker.traverse_and_build_chunk(&traversal_instructions, chunk_height) + } + None => Err(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + ))), + })?; + + // now we need to return the next index + // how do we know if we should return some or none + if self.index > max_chunk_index { + Ok((chunk, None)) } else { - let preceding_key = self.chunk_boundaries.get(index - 2).unwrap(); - self.raw_iter.seek(preceding_key).unwrap(); - self.raw_iter.next().unwrap(); + Ok((chunk, Some(self.index))) } + } - self.next_chunk() + /// Generate multichunk with chunk id + /// Multichunks accumulate as many chunks as they can until they have all + /// chunks or hit some optional limit + pub fn multi_chunk_with_limit( + &mut self, + chunk_id: &str, + limit: Option, + ) -> Result { + // we want to convert the chunk id to the index + let chunk_index = string_as_traversal_instruction(chunk_id).and_then(|instruction| { + chunk_id_from_traversal_instruction(instruction.as_slice(), self.height) + })?; + self.multi_chunk_with_limit_and_index(chunk_index, limit) } - /// Returns the total number of chunks for the underlying Merk tree. - #[allow(clippy::len_without_is_empty)] - pub fn len(&self) -> usize { - let boundaries_len = self.chunk_boundaries.len(); - if boundaries_len == 0 { - 1 - } else { - boundaries_len + 2 + /// Generate multichunk with chunk index + /// Multichunks accumulate as many chunks as they can until they have all + /// chunks or hit some optional limit + pub fn multi_chunk_with_limit_and_index( + &mut self, + index: usize, + limit: Option, + ) -> Result { + // TODO: what happens if the vec is filled? + // we need to have some kind of hardhoc limit value if none is supplied. + // maybe we can just do something with the length to fix this? + let mut chunk = vec![]; + + let mut current_index = Some(index); + let mut current_limit = limit; + + // generate as many subtree chunks as we can + // until we have exhausted all or hit a limit restriction + while current_index.is_some() { + let current_index_traversal_instruction = generate_traversal_instruction( + self.height, + current_index.expect("confirmed is Some"), + )?; + let chunk_id_op = ChunkOp::ChunkId(current_index_traversal_instruction); + + // factor in the ChunkId encoding length in limit calculations + let temp_limit = if let Some(limit) = current_limit { + let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|_e| { + Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) + })?; + if limit >= chunk_id_op_encoding_len { + Some(limit - chunk_id_op_encoding_len) + } else { + Some(0) + } + } else { + None + }; + + let subtree_multi_chunk_result = self.subtree_multi_chunk_with_limit( + current_index.expect("confirmed is not None"), + temp_limit, + ); + + let limit_too_small_error = matches!( + subtree_multi_chunk_result, + Err(ChunkingError(ChunkError::LimitTooSmall(..))) + ); + + if limit_too_small_error { + if chunk.is_empty() { + // no progress, return limit too small error + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); + } else { + // made progress, send accumulated chunk + break; + } + } + + let subtree_multi_chunk = subtree_multi_chunk_result?; + + chunk.push(chunk_id_op); + chunk.push(ChunkOp::Chunk(subtree_multi_chunk.chunk)); + + // update loop parameters + current_index = subtree_multi_chunk.next_index; + current_limit = subtree_multi_chunk.remaining_limit; } + + let index_string = current_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose()?; + + Ok(MultiChunk::new(chunk, index_string, current_limit)) } - /// Gets the next chunk based on the `ChunkProducer`'s internal index state. - /// This is mostly useful for letting `ChunkIter` yield the chunks in order, - /// optimizing throughput compared to random access. - fn next_chunk(&mut self) -> Result, Error> { - if self.index == 0 { - if self.trunk.is_empty() { - return Err(Error::ChunkingError( - "Attempted to fetch chunk on empty tree", - )); + /// Packs as many chunks as it can from a starting chunk index, into a + /// vector. Stops when we have exhausted all chunks or we have reached + /// some limit. + fn subtree_multi_chunk_with_limit( + &mut self, + index: usize, + limit: Option, + ) -> Result { + let max_chunk_index = number_of_chunks(self.height); + let mut chunk_index = index; + + // we first get the chunk at the given index + // TODO: use the returned chunk index rather than tracking + let (chunk_ops, _) = self.chunk_with_index(chunk_index)?; + let mut chunk_byte_length = chunk_ops.encoding_length().map_err(|_e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + chunk_index += 1; + + let mut chunk = VecDeque::from(chunk_ops); + + // ensure the limit is not less than first chunk byte length + // if it is we can't proceed and didn't make progress so we return an error + if let Some(limit) = limit { + if chunk_byte_length > limit { + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); } - self.index += 1; - return Ok(self.trunk.clone()); } - if self.index >= self.len() { - panic!("Called next_chunk after end"); + let mut iteration_index = 0; + while iteration_index < chunk.len() { + // we only perform replacements on Hash nodes + if matches!(chunk[iteration_index], Op::Push(Node::Hash(..))) { + // TODO: use the returned chunk index rather than tracking + let (replacement_chunk, _) = self.chunk_with_index(chunk_index)?; + + // calculate the new total + let new_total = replacement_chunk.encoding_length().map_err(|_e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })? + chunk_byte_length + - chunk[iteration_index].encoding_length().map_err(|_e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + + // verify that this chunk doesn't make use exceed the limit + if let Some(limit) = limit { + if new_total > limit { + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; + + return Ok(SubtreeChunk::new( + chunk.into(), + next_index, + Some(limit - chunk_byte_length), + )); + } + } + + chunk_byte_length = new_total; + chunk_index += 1; + + chunk.remove(iteration_index); + for op in replacement_chunk.into_iter().rev() { + chunk.insert(iteration_index, op); + } + } else { + iteration_index += 1; + } } - let end_key = self.chunk_boundaries.get(self.index - 1); - let end_key_slice = end_key.as_ref().map(|k| k.as_slice()); + let remaining_limit = limit.map(|l| l - chunk_byte_length); + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; - self.index += 1; + Ok(SubtreeChunk::new(chunk.into(), next_index, remaining_limit)) + } - get_next_chunk(&mut self.raw_iter, end_key_slice).unwrap() + /// Returns the total number of chunks for the underlying Merk tree. + pub fn len(&self) -> usize { + number_of_chunks(self.height) } -} -#[cfg(feature = "full")] -impl<'db, S> IntoIterator for ChunkProducer<'db, S> -where - S: StorageContext<'db>, -{ - type IntoIter = ChunkIter<'db, S>; - type Item = as Iterator>::Item; + pub fn is_empty(&self) -> bool { + number_of_chunks(self.height) == 0 + } - fn into_iter(self) -> Self::IntoIter { - ChunkIter(self) + /// Gets the next chunk based on the `ChunkProducer`'s internal index state. + /// This is mostly useful for letting `ChunkIter` yield the chunks in order, + /// optimizing throughput compared to random access. + // TODO: this is not better than random access, as we are not keeping state + // that will make this more efficient, decide if this should be fixed or not + fn next_chunk(&mut self) -> Option, Option), Error>> { + let max_index = number_of_chunks(self.height); + if self.index > max_index { + return None; + } + + // get the chunk at the given index + // return the next index as a string + Some( + self.chunk_with_index(self.index) + .and_then(|(chunk, chunk_index)| { + chunk_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose() + .map(|v| (chunk, v)) + }), + ) } } -#[cfg(feature = "full")] -/// A `ChunkIter` iterates through all the chunks for the underlying `Merk` -/// instance in order (the first chunk is the "trunk" chunk). Yields `None` -/// after all chunks have been yielded. -pub struct ChunkIter<'db, S>(ChunkProducer<'db, S>) -where - S: StorageContext<'db>; - -#[cfg(feature = "full")] -impl<'db, S> Iterator for ChunkIter<'db, S> +/// Iterate over each chunk, returning `None` after last chunk +impl<'db, S> Iterator for ChunkProducer<'db, S> where S: StorageContext<'db>, { - type Item = Result, Error>; - - fn size_hint(&self) -> (usize, Option) { - (self.0.len(), Some(self.0.len())) - } + type Item = Result<(Vec, Option), Error>; fn next(&mut self) -> Option { - if self.0.index >= self.0.len() { - None - } else { - Some(self.0.next_chunk()) - } + self.next_chunk() } } -#[cfg(feature = "full")] impl<'db, S> Merk where S: StorageContext<'db>, { /// Creates a `ChunkProducer` which can return chunk proofs for replicating /// the entire Merk tree. - pub fn chunks(&self) -> Result, Error> { + pub fn chunks(&'db self) -> Result, Error> { ChunkProducer::new(self) } } -#[cfg(feature = "full")] #[cfg(test)] -mod tests { - use grovedb_path::SubtreePath; - use grovedb_storage::{rocksdb_storage::RocksDbStorage, Storage, StorageBatch}; - use tempfile::TempDir; - +mod test { use super::*; use crate::{ - proofs::chunk::{verify_leaf, verify_trunk}, - test_utils::*, - tree::kv::ValueDefinedCostType, + proofs::{ + chunk::{ + chunk::{ + tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, + LEFT, RIGHT, + }, + util::traversal_instruction_as_string, + }, + tree::execute, + Tree, + }, + test_utils::{make_batch_seq, TempMerk}, + tree::RefWalker, + PanicSource, }; - #[test] - fn len_small() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..256); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + #[derive(Default)] + struct NodeCounts { + hash: usize, + kv_hash: usize, + kv: usize, + kv_value_hash: usize, + kv_digest: usize, + kv_ref_value_hash: usize, + kv_value_hash_feature_type: usize, + } + + impl NodeCounts { + fn sum(&self) -> usize { + self.hash + + self.kv_hash + + self.kv + + self.kv_value_hash + + self.kv_digest + + self.kv_ref_value_hash + + self.kv_value_hash_feature_type + } + } - let chunks = merk.chunks().unwrap(); - assert_eq!(chunks.len(), 1); - assert_eq!(chunks.into_iter().size_hint().0, 1); + fn count_node_types(tree: Tree) -> NodeCounts { + let mut counts = NodeCounts::default(); + + tree.visit_nodes(&mut |node| { + match node { + Node::Hash(_) => counts.hash += 1, + Node::KVHash(_) => counts.kv_hash += 1, + Node::KV(..) => counts.kv += 1, + Node::KVValueHash(..) => counts.kv_value_hash += 1, + Node::KVDigest(..) => counts.kv_digest += 1, + Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, + Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, + }; + }); + + counts } #[test] - fn len_big() { + fn test_merk_chunk_len() { + // Tree of height 5 - max of 31 elements, min of 16 elements + // 5 will be broken into 2 layers = [3, 2] + // exit nodes from first layer = 2^3 = 8 + // total_chunk = 1 + 8 = 9 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..10_000); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + let batch = make_batch_seq(0..20); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 9); - let chunks = merk.chunks().unwrap(); - assert_eq!(chunks.len(), 129); - assert_eq!(chunks.into_iter().size_hint().0, 129); + // Tree of height 10 - max of 1023 elements, min of 512 elements + // 4 layers -> [3,3,2,2] + // chunk_count_per_layer -> [1, 8, 64, 256] + // total = 341 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1000); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(10)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 329); } #[test] - fn generate_and_verify_chunks() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..10_000); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + fn test_chunk_producer_iter() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks - let mut chunks = merk.chunks().unwrap().into_iter().map(|x| x.unwrap()); + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let chunk = chunks.next().unwrap(); - let (trunk, height) = verify_trunk(chunk.into_iter().map(Ok)).unwrap().unwrap(); - assert_eq!(height, 14); - assert_eq!(trunk.hash().unwrap(), merk.root_hash().unwrap()); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - assert_eq!(trunk.layer(7).count(), 128); + // build iterator from first chunk producer + let mut chunks = merk.chunks().expect("should return producer"); - for (ops, node) in chunks.zip(trunk.layer(height / 2)) { - verify_leaf(ops.into_iter().map(Ok), node.hash().unwrap()) - .unwrap() - .unwrap(); + // ensure that the chunks gotten from the iterator is the same + // as that from the chunk producer + for i in 1..=5 { + assert_eq!( + chunks.next().unwrap().unwrap().0, + chunk_producer.chunk_with_index(i).unwrap().0 + ); } + + // returns None after max + assert!(chunks.next().is_none()); } #[test] - fn chunks_from_reopen() { - let tmp_dir = TempDir::new().expect("cannot create tempdir"); - let original_chunks = { - let storage = RocksDbStorage::default_rocksdb_with_path(tmp_dir.path()) - .expect("cannot open rocksdb storage"); - let batch = StorageBatch::new(); - let mut merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), Some(&batch)) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - let merk_batch = make_batch_seq(1..10); - merk.apply::<_, Vec<_>>(&merk_batch, &[], None) - .unwrap() - .unwrap(); + fn test_random_chunk_access() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks - storage - .commit_multi_context_batch(batch, None) - .unwrap() - .expect("cannot commit batch"); - - let merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), None) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) .unwrap() - .unwrap(); + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - merk.chunks() - .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>() - .into_iter() - }; - let storage = RocksDbStorage::default_rocksdb_with_path(tmp_dir.path()) - .expect("cannot open rocksdb storage"); - let merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), None) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - let reopen_chunks = merk.chunks().unwrap().into_iter().map(|x| x.unwrap()); + let mut inner_tree = merk.tree.take().expect("has inner tree"); + merk.tree.set(Some(inner_tree.clone())); - for (original, checkpoint) in original_chunks.zip(reopen_chunks) { - assert_eq!(original.len(), checkpoint.len()); - } - } + // TODO: should I be using panic source? + let mut tree_walker = RefWalker::new(&mut inner_tree, PanicSource {}); - // #[test] - // fn chunks_from_checkpoint() { - // let mut merk = TempMerk::new(); - // let batch = make_batch_seq(1..10); - // merk.apply(batch.as_slice(), &[]).unwrap(); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + assert_eq!(chunk_producer.len(), 5); - // let path: std::path::PathBuf = - // "generate_and_verify_chunks_from_checkpoint.db".into(); if path. - // exists() { std::fs::remove_dir_all(&path).unwrap(); - // } - // let checkpoint = merk.checkpoint(&path).unwrap(); + // assert bounds + assert!(chunk_producer.chunk_with_index(0).is_err()); + assert!(chunk_producer.chunk_with_index(6).is_err()); - // let original_chunks = - // merk.chunks().unwrap().into_iter().map(Result::unwrap); - // let checkpoint_chunks = - // checkpoint.chunks().unwrap().into_iter().map(Result::unwrap); + // first chunk + // expected: + // 7 + // / \ + // 3 11 + // / \ / \ + // H(1) H(5) H(9) H(13) + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(1) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 13); + assert_eq!(next_chunk, Some(2)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])), + Op::Child, + Op::Child + ] + ); - // for (original, checkpoint) in original_chunks.zip(checkpoint_chunks) { - // assert_eq!(original.len(), checkpoint.len()); - // } + // second chunk + // expected: + // 1 + // / \ + // 0 2 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(2) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(3)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, RIGHT] + )), + Op::Child + ] + ); - // std::fs::remove_dir_all(&path).unwrap(); - // } + // third chunk + // expected: + // 5 + // / \ + // 4 6 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(3) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(4)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 9 + // / \ + // 8 10 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(4) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(5)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 13 + // / \ + // 12 14 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(5) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, None); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + } #[test] - fn random_access_chunks() { + fn test_subtree_chunk_no_limit() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..111); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + // generate multi chunk with no limit + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, None) + .expect("should generate chunk with limit"); - let chunks = merk - .chunks() + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + let tree = execute(chunk_result.chunk.into_iter().map(Ok), false, |_| Ok(())) .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>(); - - let mut producer = merk.chunks().unwrap(); - for i in 0..chunks.len() * 2 { - let index = i % chunks.len(); - assert_eq!(producer.chunk(index).unwrap(), chunks[index]); - } + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + // assert that all nodes are of type kv_value_hash_feature_type + let node_counts = count_node_types(tree); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.kv_hash, 0); + assert_eq!(node_counts.kv, 0); + assert_eq!(node_counts.kv_value_hash, 0); + assert_eq!(node_counts.kv_digest, 0); + assert_eq!(node_counts.kv_ref_value_hash, 0); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); } #[test] - #[should_panic(expected = "Attempted to fetch chunk on empty tree")] - fn test_chunk_empty() { - let merk = TempMerk::new(); + fn test_subtree_chunk_with_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let _chunks = merk - .chunks() + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // initial chunk is of size 453, so limit of 10 is too small + // should return an error + let chunk = chunk_producer.subtree_multi_chunk_with_limit(1, Some(10)); + assert!(chunk.is_err()); + + // get just the fist chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(453)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(2)); + + let chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 453); + assert_eq!(chunk.len(), 13); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 3); + assert_eq!(node_counts.hash, 4); + assert_eq!(node_counts.sum(), 4 + 3); + + // get up to second chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(737)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(3)); + + let chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 737); + assert_eq!(chunk.len(), 17); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>(); + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 6); + assert_eq!(node_counts.hash, 3); + assert_eq!(node_counts.sum(), 6 + 3); + + // get up to third chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1021)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(4)); + + let chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1021); + assert_eq!(chunk.len(), 21); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 9); + assert_eq!(node_counts.hash, 2); + assert_eq!(node_counts.sum(), 9 + 2); + + // get up to fourth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1305)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(5)); + + let chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1305); + assert_eq!(chunk.len(), 25); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 12); + assert_eq!(node_counts.hash, 1); + assert_eq!(node_counts.sum(), 12 + 1); + + // get up to fifth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1589)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, None); + + let chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); + + // limit larger than total chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(usize::MAX)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); + assert_eq!(chunk_result.next_index, None); + + let chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); } #[test] - #[should_panic(expected = "Chunk index out-of-bounds")] - fn test_chunk_index_oob() { + fn test_multi_chunk_with_no_limit_trunk() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..42); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let mut producer = merk.chunks().unwrap(); - let _chunk = producer.chunk(50000).unwrap(); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 1, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(1, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // should only contain 2 items, the starting chunk id and the entire tree + assert_eq!(chunk_result.chunk.len(), 2); + + // assert items + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![])); + if let ChunkOp::Chunk(chunk) = &chunk_result.chunk[1] { + let tree = execute(chunk.clone().into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + } else { + panic!("expected ChunkOp::Chunk"); + } } - // #[test] - // fn test_chunk_index_gt_1_access() { - // let mut merk = TempMerk::new(); - // let batch = make_batch_seq(1..513); - // merk.apply::<_, Vec<_>>(&batch, &[]).unwrap().unwrap(); - - // let mut producer = merk.chunks().unwrap(); - // println!("length: {}", producer.len()); - // let chunk = producer.chunk(2).unwrap(); - // assert_eq!( - // chunk, - // vec![ - // 3, 8, 0, 0, 0, 0, 0, 0, 0, 18, 0, 60, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 3, 8, 0, 0, 0, 0, 0, 0, 0, 19, 0, 60, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 20, 0, 60, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 21, 0, 60, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 22, - // 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 23, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 60, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 17, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 25, 0, - // 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 26, 0, 60, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 3, 8, 0, 0, 0, 0, 0, 0, 0, 27, 0, 60, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, - // 0, 0, 0, 28, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 29, 0, 60, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, - // 30, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 31, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 32, 0, 60, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 17, 17, 17 - // ] - // ); - // } + #[test] + fn test_multi_chunk_with_no_limit_not_trunk() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 2, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(2, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // chunk 2 - 5 will be considered separate subtrees + // each will have an accompanying chunk id, so 8 elements total + assert_eq!(chunk_result.chunk.len(), 8); + + // assert the chunk id's + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); + assert_eq!(chunk_result.chunk[4], ChunkOp::ChunkId(vec![RIGHT, LEFT])); + assert_eq!(chunk_result.chunk[6], ChunkOp::ChunkId(vec![RIGHT, RIGHT])); + + // assert the chunks + assert_eq!( + chunk_result.chunk[1], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(2) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[3], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(3) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[5], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(4) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[7], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(5) + .expect("should generate chunk") + .0 + ) + ); + } #[test] - #[should_panic(expected = "Called next_chunk after end")] - fn test_next_chunk_index_oob() { + fn test_multi_chunk_with_limit() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..42); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // ensure that the remaining limit, next index and values given are correct + // if limit is smaller than first chunk, we should get an error + let chunk_result = chunk_producer.multi_chunk_with_limit("", Some(5)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); + + // get chunk 2 + // data size of chunk 2 is exactly 317 + // chunk op encoding for chunk 2 = 321 + // hence limit of 317 will be insufficient + let chunk_result = chunk_producer.multi_chunk_with_limit_and_index(2, Some(317)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); - let mut producer = merk.chunks().unwrap(); - let _chunk1 = producer.next_chunk(); - let _chunk2 = producer.next_chunk(); + // get chunk 2 and 3 + // chunk 2 chunk op = 331 + // chunk 3 chunk op = 321 + // padding = 5 + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(2, Some(321 + 321 + 5)) + .expect("should generate chunk"); + assert_eq!( + chunk_result.next_index, + Some(traversal_instruction_as_string( + &generate_traversal_instruction(4, 4).unwrap() + )) + ); + assert_eq!(chunk_result.remaining_limit, Some(5)); + assert_eq!(chunk_result.chunk.len(), 4); + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); } } diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 93c052a4..94b99add 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -29,7 +29,6 @@ //! Merk pub mod chunks; - pub(crate) mod defaults; pub mod options; @@ -45,7 +44,7 @@ pub mod source; use std::{ cell::Cell, - collections::{BTreeSet, LinkedList}, + collections::{BTreeMap, BTreeSet, LinkedList}, fmt, }; @@ -61,11 +60,19 @@ use source::MerkSource; use crate::{ error::Error, merk::{defaults::ROOT_KEY_KEY, options::MerkOptions}, - proofs::{query::query_item::QueryItem, Query}, + proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + util::traversal_instruction_as_string, + }, + query::query_item::QueryItem, + Query, + }, tree::{ kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, }, Error::{CostsError, EdError, StorageError}, + Link, MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, }; @@ -276,6 +283,11 @@ where }) } + /// Returns the height of the Merk tree + pub fn height(&self) -> Option { + self.use_tree(|tree| tree.map(|tree| tree.height())) + } + /// Returns the root non-prefixed key of the tree. If the tree is empty, /// None. pub fn root_key(&self) -> Option> { @@ -536,6 +548,142 @@ where Ok(()).wrap_with_cost(Default::default()) } } + + /// Verifies the correctness of a merk tree + /// hash values are computed correctly, heights are accurate and links + /// consistent with backing store. + // TODO: define the return types + pub fn verify( + &self, + skip_sum_checks: bool, + ) -> (BTreeMap, BTreeMap>) { + let tree = self.tree.take(); + + let mut bad_link_map: BTreeMap = BTreeMap::new(); + let mut parent_keys: BTreeMap> = BTreeMap::new(); + let mut root_traversal_instruction = vec![]; + + // TODO: remove clone + self.verify_tree( + // TODO: handle unwrap + &tree.clone().unwrap(), + &mut root_traversal_instruction, + &mut bad_link_map, + &mut parent_keys, + skip_sum_checks, + ); + self.tree.set(tree); + + (bad_link_map, parent_keys) + } + + fn verify_tree( + &self, + tree: &TreeNode, + traversal_instruction: &mut Vec, + bad_link_map: &mut BTreeMap, + parent_keys: &mut BTreeMap>, + skip_sum_checks: bool, + ) { + if let Some(link) = tree.link(LEFT) { + traversal_instruction.push(LEFT); + self.verify_link( + link, + tree.key(), + traversal_instruction, + bad_link_map, + parent_keys, + skip_sum_checks, + ); + traversal_instruction.pop(); + } + + if let Some(link) = tree.link(RIGHT) { + traversal_instruction.push(RIGHT); + self.verify_link( + link, + tree.key(), + traversal_instruction, + bad_link_map, + parent_keys, + skip_sum_checks, + ); + traversal_instruction.pop(); + } + } + + fn verify_link( + &self, + link: &Link, + parent_key: &[u8], + traversal_instruction: &mut Vec, + bad_link_map: &mut BTreeMap, + parent_keys: &mut BTreeMap>, + skip_sum_checks: bool, + ) { + let (hash, key, sum) = match link { + Link::Reference { hash, key, sum, .. } => { + (hash.to_owned(), key.to_owned(), sum.to_owned()) + } + Link::Modified { tree, .. } => ( + tree.hash().unwrap(), + tree.key().to_vec(), + tree.sum().unwrap(), + ), + Link::Loaded { + hash, + child_heights: _, + sum, + tree, + } => (hash.to_owned(), tree.key().to_vec(), sum.to_owned()), + _ => todo!(), + }; + + let instruction_id = traversal_instruction_as_string(traversal_instruction); + let node = TreeNode::get( + &self.storage, + key, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap(); + + if node.is_err() { + bad_link_map.insert(instruction_id.clone(), hash); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + let node = node.unwrap(); + if node.is_none() { + bad_link_map.insert(instruction_id.clone(), hash); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + let node = node.unwrap(); + if node.hash().unwrap() != hash { + bad_link_map.insert(instruction_id.clone(), hash); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + // Need to skip this when restoring a sum tree + if !skip_sum_checks && node.sum().unwrap() != sum { + bad_link_map.insert(instruction_id.clone(), hash); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + // TODO: check child heights + // all checks passed, recurse + self.verify_tree( + &node, + traversal_instruction, + bad_link_map, + parent_keys, + skip_sum_checks, + ); + } } fn fetch_node<'db>( @@ -557,6 +705,7 @@ fn fetch_node<'db>( #[cfg(test)] mod test { + use grovedb_path::SubtreePath; use grovedb_storage::{ rocksdb_storage::{PrefixedRocksDbStorageContext, RocksDbStorage}, @@ -598,6 +747,41 @@ mod test { ); } + #[test] + fn tree_height() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(1)); + + // height 2 + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..2); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(2)); + + // height 5 + // 2^5 - 1 = 31 (max number of elements in tree of height 5) + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..31); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + + // should still be height 5 for 29 elements + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..29); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + } + #[test] fn insert_uncached() { let batch_size = 20; diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index e6ac22e2..e2439f5c 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -29,263 +29,284 @@ //! Provides `Restorer`, which can create a replica of a Merk instance by //! receiving chunk proofs. -#[cfg(feature = "full")] -use std::{iter::Peekable, u8}; +use std::collections::BTreeMap; -#[cfg(feature = "full")] use grovedb_storage::{Batch, StorageContext}; -#[cfg(feature = "full")] -use super::Merk; -#[cfg(feature = "full")] -use crate::merk::source::MerkSource; -use crate::tree::kv::ValueDefinedCostType; -#[cfg(feature = "full")] use crate::{ - error::Error, + merk, + merk::MerkSource, proofs::{ - chunk::{verify_leaf, verify_trunk, MIN_TRUNK_HEIGHT}, - tree::{Child, Tree as ProofTree}, + chunk::{ + chunk::{LEFT, RIGHT}, + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{string_as_traversal_instruction, traversal_instruction_as_string}, + }, + tree::{execute, Child, Tree as ProofTree}, Node, Op, }, - tree::{combine_hash, value_hash, Link, RefWalker, TreeNode}, - CryptoHash, - Error::{CostsError, EdError, StorageError}, - TreeFeatureType::BasicMerkNode, + tree::{combine_hash, kv::ValueDefinedCostType, RefWalker, TreeNode}, + CryptoHash, Error, + Error::{CostsError, StorageError}, + Link, Merk, }; -#[cfg(feature = "full")] -/// A `Restorer` handles decoding, verifying, and storing chunk proofs to -/// replicate an entire Merk tree. It expects the chunks to be processed in -/// order, retrying the last chunk if verification fails. +/// Restorer handles verification of chunks and replication of Merk trees. +/// Chunks can be processed randomly as long as their parent has been processed +/// already. pub struct Restorer { - leaf_hashes: Option>>, - parent_keys: Option>>>, - trunk_height: Option, merk: Merk, - expected_root_hash: CryptoHash, - combining_value: Option>, + chunk_id_to_root_hash: BTreeMap, + parent_key_value_hash: Option, + // this is used to keep track of parents whose links need to be rewritten + parent_keys: BTreeMap>, } -#[cfg(feature = "full")] impl<'db, S: StorageContext<'db>> Restorer { - /// Creates a new `Restorer`, which will initialize a new Merk at the given - /// file path. The first chunk (the "trunk") will be compared against - /// `expected_root_hash`, then each subsequent chunk will be compared - /// against the hashes stored in the trunk, so that the restore process will - /// never allow malicious peers to send more than a single invalid chunk. + /// Initializes a new chunk restorer with the expected root hash for the + /// first chunk pub fn new( merk: Merk, - combining_value: Option>, expected_root_hash: CryptoHash, + parent_key_value_hash: Option, ) -> Self { + let mut chunk_id_to_root_hash = BTreeMap::new(); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(&[]), expected_root_hash); Self { - expected_root_hash, - combining_value, - trunk_height: None, merk, - leaf_hashes: None, - parent_keys: None, + chunk_id_to_root_hash, + parent_key_value_hash, + parent_keys: BTreeMap::new(), } } - /// Verifies a chunk and writes it to the working RocksDB instance. Expects - /// to be called for each chunk in order. Returns the number of remaining - /// chunks. - /// - /// Once there are no remaining chunks to be processed, `finalize` should - /// be called. - pub fn process_chunk(&mut self, ops: impl IntoIterator) -> Result { - match self.leaf_hashes { - None => self.process_trunk(ops), - Some(_) => self.process_leaf(ops), + // TODO: consider converting chunk id to a vec + /// Processes a chunk at some chunk id, returns the chunks id's of chunks + /// that can be requested + pub fn process_chunk( + &mut self, + chunk_id: String, + chunk: Vec, + ) -> Result, Error> { + let expected_root_hash = self + .chunk_id_to_root_hash + .get(&chunk_id) + .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; + + let mut parent_key_value_hash: Option = None; + if chunk_id.is_empty() { + parent_key_value_hash = self.parent_key_value_hash; } - } + let chunk_tree = Self::verify_chunk(chunk, expected_root_hash, &parent_key_value_hash)?; - /// Consumes the `Restorer` and returns the newly-created, fully-populated - /// Merk instance. This method will return an error if called before - /// processing all chunks (e.g. `restorer.remaining_chunks()` is not equal - /// to 0). - pub fn finalize(mut self) -> Result, Error> { - if self.remaining_chunks().unwrap_or(0) != 0 { - return Err(Error::ChunkRestoringError( - "Called finalize before all chunks were processed".to_string(), - )); - } + let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; - if self.trunk_height.unwrap() >= MIN_TRUNK_HEIGHT { - self.rewrite_trunk_child_heights()?; + if root_traversal_instruction.is_empty() { + let _ = self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); + } else { + // every non root chunk has some associated parent with an placeholder link + // here we update the placeholder link to represent the true data + self.rewrite_parent_link(&chunk_id, &root_traversal_instruction, &chunk_tree)?; } - self.merk - .load_base_root(None:: Option>) - .unwrap()?; + // next up, we need to write the chunk and build the map again + let chunk_write_result = self.write_chunk(chunk_tree, &mut root_traversal_instruction); + if chunk_write_result.is_ok() { + // if we were able to successfully write the chunk, we can remove + // the chunk expected root hash from our chunk id map + self.chunk_id_to_root_hash.remove(&chunk_id); + } - Ok(self.merk) + chunk_write_result } - /// Returns the number of remaining chunks to be processed. If called before - /// the first chunk is processed, this method will return `None` since we do - /// not yet have enough information to know about the number of chunks. - pub fn remaining_chunks(&self) -> Option { - self.leaf_hashes.as_ref().map(|lh| lh.len()) + /// Process multi chunks (space optimized chunk proofs that can contain + /// multiple singular chunks) + pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { + let mut expect_chunk_id = true; + let mut chunk_ids = vec![]; + let mut current_chunk_id: String = "".to_string(); + + for chunk_op in multi_chunk { + if (matches!(chunk_op, ChunkOp::ChunkId(..)) && !expect_chunk_id) + || (matches!(chunk_op, ChunkOp::Chunk(..)) && expect_chunk_id) + { + return Err(Error::ChunkRestoringError(ChunkError::InvalidMultiChunk( + "invalid multi chunk ordering", + ))); + } + match chunk_op { + ChunkOp::ChunkId(instructions) => { + current_chunk_id = traversal_instruction_as_string(&instructions); + } + ChunkOp::Chunk(chunk) => { + // TODO: remove clone + let next_chunk_ids = self.process_chunk(current_chunk_id.clone(), chunk)?; + chunk_ids.extend(next_chunk_ids); + } + } + expect_chunk_id = !expect_chunk_id; + } + Ok(chunk_ids) } - /// Writes the data contained in `tree` (extracted from a verified chunk - /// proof) to the RocksDB. - fn write_chunk(&mut self, tree: ProofTree) -> Result<(), Error> { - let mut batch = self.merk.storage.new_batch(); - - tree.visit_refs(&mut |proof_node| { - if let Some((mut node, key)) = match &proof_node.node { - Node::KV(key, value) => Some(( - TreeNode::new(key.clone(), value.clone(), None, BasicMerkNode).unwrap(), - key, - )), - Node::KVValueHash(key, value, value_hash) => Some(( - TreeNode::new_with_value_hash( - key.clone(), - value.clone(), - *value_hash, - BasicMerkNode, - ) - .unwrap(), - key, - )), - Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => Some(( - TreeNode::new_with_value_hash( - key.clone(), - value.clone(), - *value_hash, - *feature_type, - ) - .unwrap(), - key, - )), - _ => None, - } { - // TODO: encode tree node without cloning key/value - *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); - *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); - - let bytes = node.encode(); - batch.put(key, &bytes, None, None).map_err(CostsError) - } else { + /// Verifies the structure of a chunk and ensures the chunk matches the + /// expected root hash + fn verify_chunk( + chunk: Vec, + expected_root_hash: &CryptoHash, + parent_key_value_hash_opt: &Option, + ) -> Result { + let chunk_len = chunk.len(); + let mut kv_count = 0; + let mut hash_count = 0; + + // build tree from ops + // ensure only made of KvValueFeatureType and Hash nodes and count them + let tree = execute(chunk.clone().into_iter().map(Ok), false, |node| { + if matches!(node, Node::KVValueHashFeatureType(..)) { + kv_count += 1; Ok(()) + } else if matches!(node, Node::Hash(..)) { + hash_count += 1; + Ok(()) + } else { + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + } + }) + .unwrap()?; + + // chunk len must be exactly equal to the kv_count + hash_count + + // parent_branch_count + child_branch_count + debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); + + // chunk structure verified, next verify root hash + match parent_key_value_hash_opt { + Some(val_hash) => { + let combined_hash = combine_hash(val_hash, &tree.hash().unwrap()).unwrap(); + if &combined_hash != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", + ))); + } } - })?; + None => { + if &tree.hash().unwrap() != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", + ))); + } + } + }; + Ok(tree) + } + + /// Write the verified chunk to storage + fn write_chunk( + &mut self, + chunk_tree: ProofTree, + traversal_instruction: &mut Vec, + ) -> Result, Error> { + // this contains all the elements we want to write to storage + let mut batch = self.merk.storage.new_batch(); + let mut new_chunk_ids = Vec::new(); + + chunk_tree.visit_refs_track_traversal_and_parent( + traversal_instruction, + None, + &mut |proof_node, node_traversal_instruction, parent_key| { + match &proof_node.node { + Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => { + // build tree from node value + let mut tree = TreeNode::new_with_value_hash( + key.clone(), + value.clone(), + *value_hash, + *feature_type, + ) + .unwrap(); + + // update tree links + *tree.slot_mut(LEFT) = proof_node.left.as_ref().map(Child::as_link); + *tree.slot_mut(RIGHT) = proof_node.right.as_ref().map(Child::as_link); + + // encode the node and add it to the batch + let bytes = tree.encode(); + + batch.put(key, &bytes, None, None).map_err(CostsError) + } + Node::Hash(hash) => { + // the node hash points to the root of another chunk + // we get the chunk id and add the hash to restorer state + let chunk_id = traversal_instruction_as_string(node_traversal_instruction); + new_chunk_ids.push(chunk_id.clone()); + self.chunk_id_to_root_hash.insert(chunk_id.clone(), *hash); + // TODO: handle unwrap + self.parent_keys + .insert(chunk_id, parent_key.unwrap().to_owned()); + Ok(()) + } + _ => { + // we do nothing for other node types + // technically verify chunk will be called before this + // as such this should be be reached + Ok(()) + } + } + }, + )?; + + // write the batch self.merk .storage .commit_batch(batch) .unwrap() - .map_err(StorageError) - } - - /// Verifies the trunk then writes its data to the RocksDB. - fn process_trunk(&mut self, ops: impl IntoIterator) -> Result { - let (trunk, height) = verify_trunk(ops.into_iter().map(Ok)).unwrap()?; - - let root_hash = if self.combining_value.is_none() { - trunk.hash().unwrap() - } else { - combine_hash( - value_hash(self.combining_value.as_ref().expect("confirmed exists")).value(), - &trunk.hash().unwrap(), - ) - .value - }; - - if root_hash != self.expected_root_hash { - return Err(Error::ChunkRestoringError(format!( - "Proof did not match expected hash\n\tExpected: {:?}\n\tActual: {:?}", - self.expected_root_hash, - trunk.hash() - ))); - } - - let root_key = trunk.key().to_vec(); - - let trunk_height = height / 2; - self.trunk_height = Some(trunk_height); - - let chunks_remaining = if trunk_height >= MIN_TRUNK_HEIGHT { - let leaf_hashes = trunk - .layer(trunk_height) - .map(|node| node.hash().unwrap()) - .collect::>() - .into_iter() - .peekable(); - self.leaf_hashes = Some(leaf_hashes); - - let parent_keys = trunk - .layer(trunk_height - 1) - .map(|node| node.key().to_vec()) - .collect::>>() - .into_iter() - .peekable(); - self.parent_keys = Some(parent_keys); - assert_eq!( - self.parent_keys.as_ref().unwrap().len(), - self.leaf_hashes.as_ref().unwrap().len() / 2 - ); - - let chunks_remaining = (2_usize).pow(trunk_height as u32); - assert_eq!(self.remaining_chunks_unchecked(), chunks_remaining); - chunks_remaining - } else { - self.leaf_hashes = Some(vec![].into_iter().peekable()); - self.parent_keys = Some(vec![].into_iter().peekable()); - 0 - }; - - // note that these writes don't happen atomically, which is fine here - // because if anything fails during the restore process we will just - // scrap the whole restore and start over - self.write_chunk(trunk)?; - self.merk.set_base_root_key(Some(root_key)).unwrap()?; - - Ok(chunks_remaining) - } - - /// Verifies a leaf chunk then writes it to the RocksDB. This needs to be - /// called in order, retrying the last chunk for any failed verifications. - fn process_leaf(&mut self, ops: impl IntoIterator) -> Result { - let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); - let leaf_hash = leaf_hashes - .peek() - .expect("Received more chunks than expected"); - - let leaf = verify_leaf(ops.into_iter().map(Ok), *leaf_hash).unwrap()?; - self.rewrite_parent_link(&leaf)?; - self.write_chunk(leaf)?; - - let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); - leaf_hashes.next(); + .map_err(StorageError)?; - Ok(self.remaining_chunks_unchecked()) + Ok(new_chunk_ids) } - /// The parent of the root node of the leaf does not know the key of its - /// children when it is first written. Now that we have verified this leaf, - /// we can write the key into the parent node's entry. Note that this does - /// not need to recalcuate hashes since it already had the child hash. - fn rewrite_parent_link(&mut self, leaf: &ProofTree) -> Result<(), Error> { - let parent_keys = self.parent_keys.as_mut().unwrap(); - let parent_key = parent_keys.peek().unwrap().clone(); - let mut parent = crate::merk::fetch_node( + /// When we process truncated chunks, the parents of Node::Hash have invalid + /// placeholder for links. + /// When we get the actual chunk associated with the Node::Hash, + /// we need to update the parent link to reflect the correct data. + fn rewrite_parent_link( + &mut self, + chunk_id: &str, + traversal_instruction: &[bool], + chunk_tree: &ProofTree, + ) -> Result<(), Error> { + let parent_key = self + .parent_keys + .get(chunk_id) + .ok_or(Error::ChunkRestoringError(InternalError( + "after successful chunk verification parent key should exist", + )))?; + + let mut parent = merk::fetch_node( &self.merk.storage, parent_key.as_slice(), - None:: Option>, + None::<&fn(&[u8]) -> Option>, )? - .expect("Could not find parent of leaf chunk"); + .ok_or(Error::ChunkRestoringError(InternalError( + "cannot find expected parent in memory, most likely state corruption issue", + )))?; - let is_left_child = self.remaining_chunks_unchecked() % 2 == 0; - if let Some(Link::Reference { ref mut key, .. }) = parent.link_mut(is_left_child) { - *key = leaf.key().to_vec(); - } else { - panic!("Expected parent links to be type Link::Reference"); - }; + let is_left = traversal_instruction + .last() + .expect("rewrite is only called when traversal_instruction is not empty"); + + let updated_key = chunk_tree.key(); + let updated_sum = chunk_tree.sum(); + + if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(*is_left) { + *key = updated_key.to_vec(); + *sum = updated_sum; + } let parent_bytes = parent.encode(); self.merk @@ -294,67 +315,66 @@ impl<'db, S: StorageContext<'db>> Restorer { .unwrap() .map_err(StorageError)?; - if !is_left_child { - let parent_keys = self.parent_keys.as_mut().unwrap(); - parent_keys.next(); - } + self.parent_keys + .remove(chunk_id) + .expect("confirmed parent key exists above"); Ok(()) } - fn rewrite_trunk_child_heights(&mut self) -> Result<(), Error> { - fn recurse<'s, 'db, S: StorageContext<'db>>( - mut node: RefWalker>, - remaining_depth: usize, + /// Each nodes height is not added to state as such the producer could lie + /// about the height values after replication we need to verify the + /// heights and if invalid recompute the correct values + fn rewrite_heights(&mut self) -> Result<(), Error> { + fn rewrite_child_heights<'s, 'db, S: StorageContext<'db>>( + mut walker: RefWalker>, batch: &mut >::Batch, ) -> Result<(u8, u8), Error> { - if remaining_depth == 0 { - return Ok(node.tree().child_heights()); - } - + // TODO: remove unwrap let mut cloned_node = TreeNode::decode( - node.tree().key().to_vec(), - node.tree().encode().as_slice(), - None:: Option>, + walker.tree().key().to_vec(), + walker.tree().encode().as_slice(), + None::<&fn(&[u8]) -> Option>, ) - .map_err(EdError)?; + .unwrap(); + + let mut left_height = 0; + let mut right_height = 0; - let left_child = node - .walk(true, None::<&fn(&[u8]) -> Option>) + if let Some(left_walker) = walker + .walk(LEFT, None::<&fn(&[u8]) -> Option>) .unwrap()? - .unwrap(); - let left_child_heights = recurse(left_child, remaining_depth - 1, batch)?; - let left_height = left_child_heights.0.max(left_child_heights.1) + 1; - *cloned_node.link_mut(true).unwrap().child_heights_mut() = left_child_heights; + { + let left_child_heights = rewrite_child_heights(left_walker, batch)?; + left_height = left_child_heights.0.max(left_child_heights.1) + 1; + *cloned_node.link_mut(LEFT).unwrap().child_heights_mut() = left_child_heights; + } - let right_child = node - .walk(false, None::<&fn(&[u8]) -> Option>) + if let Some(right_walker) = walker + .walk(RIGHT, None::<&fn(&[u8]) -> Option>) .unwrap()? - .unwrap(); - let right_child_heights = recurse(right_child, remaining_depth - 1, batch)?; - let right_height = right_child_heights.0.max(right_child_heights.1) + 1; - *cloned_node.link_mut(false).unwrap().child_heights_mut() = right_child_heights; + { + let right_child_heights = rewrite_child_heights(right_walker, batch)?; + right_height = right_child_heights.0.max(right_child_heights.1) + 1; + *cloned_node.link_mut(RIGHT).unwrap().child_heights_mut() = right_child_heights; + } let bytes = cloned_node.encode(); batch - .put(node.tree().key(), &bytes, None, None) + .put(walker.tree().key(), &bytes, None, None) .map_err(CostsError)?; Ok((left_height, right_height)) } - self.merk - .load_base_root(None:: Option>) - .unwrap()?; - let mut batch = self.merk.storage.new_batch(); + // TODO: deal with unwrap + let mut tree = self.merk.tree.take().unwrap(); + let walker = RefWalker::new(&mut tree, self.merk.source()); + + rewrite_child_heights(walker, &mut batch)?; - let depth = self.trunk_height.unwrap(); - self.merk.use_tree_mut(|maybe_tree| { - let tree = maybe_tree.unwrap(); - let walker = RefWalker::new(tree, self.merk.source()); - recurse(walker, depth, &mut batch) - })?; + self.merk.tree.set(Some(tree)); self.merk .storage @@ -363,72 +383,262 @@ impl<'db, S: StorageContext<'db>> Restorer { .map_err(StorageError) } - /// Returns the number of remaining chunks to be processed. This method will - /// panic if called before processing the first chunk (since that chunk - /// gives us the information to know how many chunks to expect). - pub fn remaining_chunks_unchecked(&self) -> usize { - self.leaf_hashes.as_ref().unwrap().len() - } -} + /// Rebuild restoration state from partial storage state + fn attempt_state_recovery(&mut self) -> Result<(), Error> { + // TODO: think about the return type some more + let (bad_link_map, parent_keys) = self.merk.verify(false); + if !bad_link_map.is_empty() { + self.chunk_id_to_root_hash = bad_link_map; + self.parent_keys = parent_keys; + } -#[cfg(feature = "full")] -impl<'db, S: StorageContext<'db>> Merk { - /// Creates a new `Restorer`, which can be used to verify chunk proofs to - /// replicate an entire Merk tree. A new Merk instance will be initialized - /// by creating a RocksDB at `path`. - pub fn restore(merk: Merk, expected_root_hash: CryptoHash) -> Restorer { - Restorer::new(merk, None, expected_root_hash) + Ok(()) } -} -#[cfg(feature = "full")] -impl ProofTree { - fn child_heights(&self) -> (u8, u8) { - ( - self.left.as_ref().map_or(0, |c| c.tree.height as u8), - self.right.as_ref().map_or(0, |c| c.tree.height as u8), - ) + /// Consumes the `Restorer` and returns a newly created, fully populated + /// Merk instance. This method will return an error if called before + /// processing all chunks. + pub fn finalize(mut self) -> Result, Error> { + // ensure all chunks have been processed + if !self.chunk_id_to_root_hash.is_empty() || !self.parent_keys.is_empty() { + return Err(Error::ChunkRestoringError( + ChunkError::RestorationNotComplete, + )); + } + + // get the latest version of the root node + let _ = self + .merk + .load_base_root(None::<&fn(&[u8]) -> Option>); + + // if height values are wrong, rewrite height + if self.verify_height().is_err() { + let _ = self.rewrite_heights(); + // update the root node after height rewrite + let _ = self + .merk + .load_base_root(None::<&fn(&[u8]) -> Option>); + } + + if !self.merk.verify(self.merk.is_sum_tree).0.is_empty() { + return Err(Error::ChunkRestoringError(ChunkError::InternalError( + "restored tree invalid", + ))); + } + + Ok(self.merk) } -} -#[cfg(feature = "full")] -impl Child { - fn as_link(&self) -> Link { - let key = match &self.tree.node { - Node::KV(key, _) - | Node::KVValueHash(key, ..) - | Node::KVValueHashFeatureType(key, ..) => key.as_slice(), - // for the connection between the trunk and leaf chunks, we don't - // have the child key so we must first write in an empty one. once - // the leaf gets verified, we can write in this key to its parent - _ => &[], + /// Verify that the child heights of the merk tree links correctly represent + /// the tree + fn verify_height(&self) -> Result<(), Error> { + let tree = self.merk.tree.take(); + let height_verification_result = if let Some(tree) = &tree { + self.verify_tree_height(tree, tree.height()) + } else { + Ok(()) }; + self.merk.tree.set(tree); + height_verification_result + } - Link::Reference { - hash: self.hash, - sum: None, - child_heights: self.tree.child_heights(), - key: key.to_vec(), + fn verify_tree_height(&self, tree: &TreeNode, parent_height: u8) -> Result<(), Error> { + let (left_height, right_height) = tree.child_heights(); + + if (left_height.abs_diff(right_height)) > 1 { + return Err(Error::CorruptedState( + "invalid child heights, difference greater than 1 for AVL tree", + )); + } + + let max_child_height = left_height.max(right_height); + if parent_height <= max_child_height || parent_height - max_child_height != 1 { + return Err(Error::CorruptedState( + "invalid child heights, parent height is not 1 less than max child height", + )); } + + let left_link = tree.link(LEFT); + let right_link = tree.link(RIGHT); + + if (left_height == 0 && left_link.is_some()) || (right_height == 0 && right_link.is_some()) + { + return Err(Error::CorruptedState( + "invalid child heights node has child height 0, but hash child", + )); + } + + if let Some(link) = left_link { + let left_tree = link.tree(); + if left_tree.is_none() { + let left_tree = TreeNode::get( + &self.merk.storage, + link.key(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&left_tree, left_height)?; + } else { + self.verify_tree_height(left_tree.unwrap(), left_height)?; + } + } + + if let Some(link) = right_link { + let right_tree = link.tree(); + if right_tree.is_none() { + let right_tree = TreeNode::get( + &self.merk.storage, + link.key(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&right_tree, right_height)?; + } else { + self.verify_tree_height(right_tree.unwrap(), right_height)?; + } + } + + Ok(()) } } -#[cfg(feature = "full")] #[cfg(test)] mod tests { use grovedb_path::SubtreePath; use grovedb_storage::{ - rocksdb_storage::{test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext}, + rocksdb_storage::{ + test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext, + PrefixedRocksDbStorageContext, + }, RawIterator, Storage, }; use super::*; - use crate::{test_utils::*, tree::Op, MerkBatch}; + use crate::{ + merk::chunks::ChunkProducer, + proofs::chunk::{ + chunk::tests::traverse_get_node_hash, error::ChunkError::InvalidChunkProof, + }, + test_utils::{make_batch_seq, TempMerk}, + Error::ChunkRestoringError, + Merk, PanicSource, + }; + + #[test] + fn test_chunk_verification_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + assert!(Restorer::::verify_chunk( + non_avl_tree_proof, + &[0; 32], + &None + ) + .is_err()); + } + + #[test] + fn test_chunk_verification_only_kv_feature_and_hash() { + // should not accept kv + let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvhash + let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvdigest + let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvrefvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + } + + fn get_node_hash(node: Node) -> Result { + match node { + Node::Hash(hash) => Ok(hash), + _ => Err("expected node hash".to_string()), + } + } + + #[test] + fn test_process_chunk_correct_chunk_id_map() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut merk_tree = merk.tree.take().expect("should have inner tree"); + merk.tree.set(Some(merk_tree.clone())); + let mut tree_walker = RefWalker::new(&mut merk_tree, PanicSource {}); - fn restore_test(batches: &[&MerkBatch>], expected_nodes: usize) { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut original = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -437,83 +647,136 @@ mod tests { ) .unwrap() .unwrap(); - for batch in batches { - original - .apply::, Vec<_>>(batch, &[], None) - .unwrap() - .unwrap(); - } - let chunks = original.chunks().unwrap(); + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - let storage = TempStorage::default(); - let _tx2 = storage.start_transaction(); - let ctx = storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(); - let merk = Merk::open_base( - ctx, - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - let mut restorer = Merk::restore(merk, original.root_hash().unwrap()); - - assert_eq!(restorer.remaining_chunks(), None); + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); - let mut expected_remaining = chunks.len(); - for chunk in chunks { - let remaining = restorer.process_chunk(chunk.unwrap()).unwrap(); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); - expected_remaining -= 1; - assert_eq!(remaining, expected_remaining); - assert_eq!(restorer.remaining_chunks().unwrap(), expected_remaining); - } - assert_eq!(expected_remaining, 0); - - let restored = restorer.finalize().unwrap(); - assert_eq!(restored.root_hash(), original.root_hash()); - assert_raw_db_entries_eq(&restored, &original, expected_nodes); - } + // initial restorer state should contain just the root hash of the source merk + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); - #[test] - fn restore_10000() { - restore_test(&[&make_batch_seq(0..10_000)], 10_000); - } + // generate first chunk + let (chunk, _) = chunk_producer.chunk_with_index(1).unwrap(); + // apply first chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk successfully"); + assert_eq!(new_chunk_ids.len(), 4); + + // after first chunk application + // the chunk_map should contain 4 items + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + // assert all the chunk hash values + assert_eq!( + restorer.chunk_id_to_root_hash.get("11"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("10"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("01"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("00"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])).unwrap()) + .as_ref() + ); - #[test] - fn restore_3() { - restore_test(&[&make_batch_seq(0..3)], 3); - } + // generate second chunk + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 3); + assert_eq!(restorer.chunk_id_to_root_hash.get("11"), None); + + // let's try to apply the second chunk again, should not work + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); + assert!(chunk_process_result.is_err()); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) + )); + + // next let's get a random but expected chunk and work with that e.g. chunk 4 + // but let's apply it to the wrong place + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); + assert!(chunk_process_result.is_err()); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + .. + ))) + )); + + // correctly apply chunk 5 + let (chunk, _) = chunk_producer.chunk_with_index(5).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.chunk_id_to_root_hash.get("00"), None); + + // correctly apply chunk 3 + let (chunk, _) = chunk_producer.chunk_with_index(3).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.chunk_id_to_root_hash.get("10"), None); + + // correctly apply chunk 4 + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); - #[test] - fn restore_2_left_heavy() { - restore_test( - &[ - &[(vec![0], Op::Put(vec![], BasicMerkNode))], - &[(vec![1], Op::Put(vec![], BasicMerkNode))], - ], - 2, - ); - } + // finalize merk + let restored_merk = restorer.finalize().expect("should finalized successfully"); - #[test] - fn restore_2_right_heavy() { - restore_test( - &[ - &[(vec![1], Op::Put(vec![], BasicMerkNode))], - &[(vec![0], Op::Put(vec![], BasicMerkNode))], - ], - 2, + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() ); } - #[test] - fn restore_1() { - restore_test(&[&make_batch_seq(0..1)], 1); - } - fn assert_raw_db_entries_eq( restored: &Merk, original: &Merk, @@ -528,7 +791,10 @@ mod tests { let mut i = 0; loop { - assert_eq!(restored_entries.valid(), original_entries.valid()); + assert_eq!( + restored_entries.valid().unwrap(), + original_entries.valid().unwrap() + ); if !restored_entries.valid().unwrap() { break; } @@ -544,4 +810,474 @@ mod tests { assert_eq!(i, length); } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk + // verifies that restoration was performed correctly. + fn test_restoration_single_chunk_strategy(batch_size: u64) { + // build the source merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut source_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap(), None); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let (chunk, next_chunk_id) = chunk_producer + .chunk(chunk_id.as_str()) + .expect("should get chunk"); + restorer + .process_chunk(chunk_id.to_string(), chunk) + .expect("should process chunk successfully"); + chunk_id_opt = next_chunk_id; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + + assert_raw_db_entries_eq(&restored_merk, &source_merk, batch_size as usize); + } + + #[test] + fn restore_single_chunk_20() { + test_restoration_single_chunk_strategy(20); + } + + #[test] + fn restore_single_chunk_1000() { + test_restoration_single_chunk_strategy(1000); + } + + #[test] + fn test_process_multi_chunk_no_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // generate multi chunk from root with no limit + let chunk = chunk_producer + .multi_chunk_with_limit("", None) + .expect("should generate multichunk"); + + assert_eq!(chunk.chunk.len(), 2); + assert_eq!(chunk.next_index, None); + assert_eq!(chunk.remaining_limit, None); + + let next_ids = restorer + .process_multi_chunk(chunk.chunk) + .expect("should process chunk"); + // should have replicated all chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_no_limit_but_non_root() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // generate multi chunk from the 2nd chunk with no limit + let multi_chunk = chunk_producer + .multi_chunk_with_limit_and_index(next_chunk_index.unwrap(), None) + .unwrap(); + // tree of height 4 has 5 chunks + // we have restored the first leaving 4 chunks + // each chunk has an extra chunk id, since they are disjoint + // hence the size of the multi chunk should be 8 + assert_eq!(multi_chunk.chunk.len(), 8); + let new_chunk_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_with_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); + + // build multi chunk with with limit of 325 + let multi_chunk = chunk_producer + .multi_chunk_with_limit("", Some(600)) + .unwrap(); + // should only contain the first chunk + assert_eq!(multi_chunk.chunk.len(), 2); + // should point to chunk 2 + assert_eq!(multi_chunk.next_index, Some("11".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(next_ids.len(), 4); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // subsequent chunks are of size 321 + // with limit just above 642 should get 2 chunks (2 and 3) + // disjoint, so multi chunk len should be 4 + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, Some("01".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.parent_keys.len(), 2); + + // get the last 2 chunks + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, None); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + // finalize merk + let restored_merk = restorer.finalize().unwrap(); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk, with multi chunks + // verifies that restoration was performed correctly. + fn test_restoration_multi_chunk_strategy(batch_size: u64, limit: Option) { + // build the source merk + let mut source_merk = TempMerk::new(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap(), None); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let multi_chunk = chunk_producer + .multi_chunk_with_limit(chunk_id.as_str(), limit) + .expect("should get chunk"); + restorer + .process_multi_chunk(multi_chunk.chunk) + .expect("should process chunk successfully"); + chunk_id_opt = multi_chunk.next_index; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + } + + #[test] + fn restore_multi_chunk_20_no_limit() { + test_restoration_multi_chunk_strategy(20, None); + } + + #[test] + #[should_panic] + fn restore_multi_chunk_20_tiny_limit() { + test_restoration_multi_chunk_strategy(20, Some(1)); + } + + #[test] + fn restore_multi_chunk_20_limit() { + test_restoration_multi_chunk_strategy(20, Some(1200)); + } + + #[test] + fn restore_multi_chunk_10000_limit() { + test_restoration_multi_chunk_strategy(10000, Some(1200)); + } + + #[test] + fn test_restoration_interruption() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // store old state for later reference + let old_chunk_id_to_root_hash = restorer.chunk_id_to_root_hash.clone(); + let old_parent_keys = restorer.parent_keys.clone(); + + // drop the restorer and the restoration merk + drop(restorer); + // open the restoration merk again and build a restorer from it + let restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); + + // assert the state of the restorer + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.parent_keys.len(), 0); + + // recover state + let recovery_attempt = restorer.attempt_state_recovery(); + assert!(recovery_attempt.is_ok()); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // assert equality to old state + assert_eq!(old_chunk_id_to_root_hash, restorer.chunk_id_to_root_hash); + assert_eq!(old_parent_keys, restorer.parent_keys); + } } diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 1e3b9fb1..22334688 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -28,614 +28,10 @@ //! Chunk proofs +mod binary_range; #[cfg(feature = "full")] -use grovedb_costs::{ - cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, -}; +pub mod chunk; +pub mod chunk_op; +pub mod error; #[cfg(feature = "full")] -use grovedb_storage::RawIterator; -#[cfg(feature = "full")] -use { - super::tree::{execute, Tree as ProofTree}, - crate::tree::CryptoHash, - crate::tree::TreeNode, -}; - -#[cfg(feature = "full")] -use super::{Node, Op}; -use crate::tree::kv::ValueDefinedCostType; -#[cfg(feature = "full")] -use crate::{ - error::Error, - tree::{Fetch, RefWalker}, - Error::EdError, - TreeFeatureType::BasicMerkNode, -}; - -/// The minimum number of layers the trunk will be guaranteed to have before -/// splitting into multiple chunks. If the tree's height is less than double -/// this value, the trunk should be verified as a leaf chunk. -#[cfg(feature = "full")] -pub const MIN_TRUNK_HEIGHT: usize = 5; - -#[cfg(feature = "full")] -impl<'a, S> RefWalker<'a, S> -where - S: Fetch + Sized + Clone, -{ - /// Generates a trunk proof by traversing the tree. - /// - /// Returns a tuple containing the produced proof, and a boolean indicating - /// whether or not there will be more chunks to follow. If the chunk - /// contains the entire tree, the boolean will be `false`, if the chunk - /// is abridged and will be connected to leaf chunks, it will be `true`. - pub fn create_trunk_proof(&mut self) -> CostResult<(Vec, bool), Error> { - let approx_size = 2usize.pow((self.tree().height() / 2) as u32) * 3; - let mut proof = Vec::with_capacity(approx_size); - - self.traverse_for_height_proof(&mut proof, 1) - .flat_map_ok(|trunk_height| { - if trunk_height < MIN_TRUNK_HEIGHT { - proof.clear(); - self.traverse_for_trunk(&mut proof, usize::MAX, true) - .map_ok(|_| Ok((proof, false))) - } else { - self.traverse_for_trunk(&mut proof, trunk_height, true) - .map_ok(|_| Ok((proof, true))) - } - }) - .flatten() - } - - /// Traverses down the left edge of the tree and pushes ops to the proof, to - /// act as a proof of the height of the tree. This is the first step in - /// generating a trunk proof. - fn traverse_for_height_proof( - &mut self, - proof: &mut Vec, - depth: usize, - ) -> CostResult { - let mut cost = OperationCost::default(); - let maybe_left = match self - .walk(true, None::<&fn(&[u8]) -> Option>) - .unwrap_add_cost(&mut cost) - { - Ok(maybe_left) => maybe_left, - Err(e) => { - return Err(e).wrap_with_cost(cost); - } - }; - let has_left_child = maybe_left.is_some(); - - let trunk_height = if let Some(mut left) = maybe_left { - match left - .traverse_for_height_proof(proof, depth + 1) - .unwrap_add_cost(&mut cost) - { - Ok(x) => x, - Err(e) => return Err(e).wrap_with_cost(cost), - } - } else { - depth / 2 - }; - - if depth > trunk_height { - proof.push(Op::Push(self.to_kvhash_node())); - - if has_left_child { - proof.push(Op::Parent); - } - - if let Some(right) = self.tree().link(false) { - proof.push(Op::Push(Node::Hash(*right.hash()))); - proof.push(Op::Child); - } - } - - Ok(trunk_height).wrap_with_cost(cost) - } - - /// Traverses down the tree and adds KV push ops for all nodes up to a - /// certain depth. This expects the proof to contain a height proof as - /// generated by `traverse_for_height_proof`. - fn traverse_for_trunk( - &mut self, - proof: &mut Vec, - remaining_depth: usize, - is_leftmost: bool, - ) -> CostResult<(), Error> { - let mut cost = OperationCost::default(); - - if remaining_depth == 0 { - // return early if we have reached bottom of trunk - - // for leftmost node, we already have height proof - if is_leftmost { - return Ok(()).wrap_with_cost(cost); - } - - // add this node's hash - proof.push(Op::Push(self.to_hash_node().unwrap_add_cost(&mut cost))); - - return Ok(()).wrap_with_cost(cost); - } - - // traverse left - let has_left_child = self.tree().link(true).is_some(); - if has_left_child { - let mut left = cost_return_on_error!( - &mut cost, - self.walk(true, None::<&fn(&[u8]) -> Option>) - ) - .unwrap(); - cost_return_on_error!( - &mut cost, - left.traverse_for_trunk(proof, remaining_depth - 1, is_leftmost) - ); - } - - // add this node's data - proof.push(Op::Push(self.to_kv_value_hash_feature_type_node())); - - if has_left_child { - proof.push(Op::Parent); - } - - // traverse right - if let Some(mut right) = cost_return_on_error!( - &mut cost, - self.walk(false, None::<&fn(&[u8]) -> Option>) - ) { - cost_return_on_error!( - &mut cost, - right.traverse_for_trunk(proof, remaining_depth - 1, false) - ); - proof.push(Op::Child); - } - - Ok(()).wrap_with_cost(cost) - } -} - -/// Builds a chunk proof by iterating over values in a RocksDB, ending the chunk -/// when a node with key `end_key` is encountered. -/// -/// Advances the iterator for all nodes in the chunk and the `end_key` (if any). -#[cfg(feature = "full")] -pub(crate) fn get_next_chunk( - iter: &mut impl RawIterator, - end_key: Option<&[u8]>, -) -> CostResult, Error> { - let mut cost = OperationCost::default(); - - let mut chunk = Vec::with_capacity(512); - let mut stack = Vec::with_capacity(32); - let mut node = TreeNode::new(vec![], vec![], None, BasicMerkNode).unwrap_add_cost(&mut cost); - - while iter.valid().unwrap_add_cost(&mut cost) { - let key = iter.key().unwrap_add_cost(&mut cost).unwrap(); - - if let Some(end_key) = end_key { - if key == end_key { - break; - } - } - - let encoded_node = iter.value().unwrap_add_cost(&mut cost).unwrap(); - cost_return_on_error_no_add!( - &cost, - TreeNode::decode_into( - &mut node, - vec![], - encoded_node, - None:: Option> - ) - .map_err(EdError) - ); - - // TODO: Only use the KVValueHash if needed, saves 32 bytes - // only needed when dealing with references and trees - let kv = Node::KVValueHashFeatureType( - key.to_vec(), - node.value_ref().to_vec(), - *node.value_hash(), - node.feature_type(), - ); - - chunk.push(Op::Push(kv)); - - if node.link(true).is_some() { - chunk.push(Op::Parent); - } - - if let Some(child) = node.link(false) { - stack.push(child.key().to_vec()); - } else { - while let Some(top_key) = stack.last() { - if key < top_key.as_slice() { - break; - } - stack.pop(); - chunk.push(Op::Child); - } - } - - iter.next().unwrap_add_cost(&mut cost); - } - - if iter.valid().unwrap_add_cost(&mut cost) { - iter.next().unwrap_add_cost(&mut cost); - } - - Ok(chunk).wrap_with_cost(cost) -} - -/// Verifies a leaf chunk proof by executing its operators. Checks that there -/// were no abridged nodes (Hash or KVHash) and the proof hashes to -/// `expected_hash`. -#[cfg(feature = "full")] -#[allow(dead_code)] // TODO: remove when proofs will be enabled -pub(crate) fn verify_leaf>>( - ops: I, - expected_hash: CryptoHash, -) -> CostResult { - execute(ops, false, |node| match node { - Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => Ok(()), - _ => Err(Error::ChunkRestoringError( - "Leaf chunks must contain full subtree".to_string(), - )), - }) - .flat_map_ok(|tree| { - tree.hash().map(|hash| { - if hash != expected_hash { - Error::ChunkRestoringError(format!( - "Leaf chunk proof did not match expected hash\n\tExpected: {:?}\n\tActual: \ - {:?}", - expected_hash, - tree.hash() - )); - } - Ok(tree) - }) - }) -} - -/// Verifies a trunk chunk proof by executing its operators. Ensures the -/// resulting tree contains a valid height proof, the trunk is the correct -/// height, and all of its inner nodes are not abridged. Returns the tree and -/// the height given by the height proof. -#[cfg(feature = "full")] -pub(crate) fn verify_trunk>>( - ops: I, -) -> CostResult<(ProofTree, usize), Error> { - let mut cost = OperationCost::default(); - - fn verify_height_proof(tree: &ProofTree) -> Result { - Ok(match tree.child(true) { - Some(child) => { - if let Node::Hash(_) = child.tree.node { - return Err(Error::ChunkRestoringError( - "Expected height proof to only contain KV and KVHash nodes".to_string(), - )); - } - verify_height_proof(&child.tree)? + 1 - } - None => 1, - }) - } - - fn verify_completeness( - tree: &ProofTree, - remaining_depth: usize, - leftmost: bool, - ) -> Result<(), Error> { - let recurse = |left, leftmost| { - if let Some(child) = tree.child(left) { - verify_completeness(&child.tree, remaining_depth - 1, left && leftmost)?; - } - Ok(()) - }; - - if remaining_depth > 0 { - match tree.node { - Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => {} - _ => { - return Err(Error::ChunkRestoringError( - "Expected trunk inner nodes to contain keys and values".to_string(), - )) - } - } - recurse(true, leftmost)?; - recurse(false, false) - } else if !leftmost { - match tree.node { - Node::Hash(_) => Ok(()), - _ => Err(Error::ChunkRestoringError( - "Expected trunk leaves to contain Hash nodes".to_string(), - )), - } - } else { - match &tree.node { - Node::KVHash(_) => Ok(()), - _ => Err(Error::ChunkRestoringError( - "Expected leftmost trunk leaf to contain KVHash node".to_string(), - )), - } - } - } - - let mut kv_only = true; - let tree = cost_return_on_error!( - &mut cost, - execute(ops, false, |node| { - kv_only &= matches!(node, Node::KVValueHash(..)) - || matches!(node, Node::KV(..)) - || matches!(node, Node::KVValueHashFeatureType(..)); - Ok(()) - }) - ); - - let height = cost_return_on_error_no_add!(&cost, verify_height_proof(&tree)); - let trunk_height = height / 2; - - if trunk_height < MIN_TRUNK_HEIGHT { - if !kv_only { - return Err(Error::ChunkRestoringError( - "Leaf chunks must contain full subtree".to_string(), - )) - .wrap_with_cost(cost); - } - } else { - cost_return_on_error_no_add!(&cost, verify_completeness(&tree, trunk_height, true)); - } - - Ok((tree, height)).wrap_with_cost(cost) -} - -#[cfg(feature = "full")] -#[cfg(test)] -mod tests { - use std::usize; - - use grovedb_storage::StorageContext; - - use super::{super::tree::Tree, *}; - use crate::{ - test_utils::*, - tree::{NoopCommit, PanicSource, TreeNode as BaseTree}, - }; - - #[derive(Default)] - struct NodeCounts { - hash: usize, - kv_hash: usize, - kv: usize, - kv_value_hash: usize, - kv_digest: usize, - kv_ref_value_hash: usize, - kv_value_hash_feature_type: usize, - } - - fn count_node_types(tree: Tree) -> NodeCounts { - let mut counts = NodeCounts::default(); - - tree.visit_nodes(&mut |node| { - match node { - Node::Hash(_) => counts.hash += 1, - Node::KVHash(_) => counts.kv_hash += 1, - Node::KV(..) => counts.kv += 1, - Node::KVValueHash(..) => counts.kv_value_hash += 1, - Node::KVDigest(..) => counts.kv_digest += 1, - Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, - Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, - }; - }); - - counts - } - - #[test] - fn small_trunk_roundtrip() { - let mut tree = make_tree_seq(31); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - // println!("{:?}", &proof); - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 32); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn big_trunk_roundtrip() { - let mut tree = make_tree_seq(2u64.pow(MIN_TRUNK_HEIGHT as u32 * 2 + 1) - 1); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(has_more); - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - - let counts = count_node_types(trunk); - // are these formulas correct for all values of `MIN_TRUNK_HEIGHT`? 🤔 - assert_eq!( - counts.hash, - 2usize.pow(MIN_TRUNK_HEIGHT as u32) + MIN_TRUNK_HEIGHT - 1 - ); - assert_eq!( - counts.kv_value_hash_feature_type, - 2usize.pow(MIN_TRUNK_HEIGHT as u32) - 1 - ); - assert_eq!(counts.kv_hash, MIN_TRUNK_HEIGHT + 1); - } - - #[test] - fn one_node_tree_trunk_roundtrip() { - let mut tree = BaseTree::new(vec![0], vec![], None, BasicMerkNode).unwrap(); - tree.commit(&mut NoopCommit {}, &|_, _| Ok(0)) - .unwrap() - .unwrap(); - - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 1); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn two_node_right_heavy_tree_trunk_roundtrip() { - // 0 - // \ - // 1 - let mut tree = BaseTree::new(vec![0], vec![], None, BasicMerkNode) - .unwrap() - .attach( - false, - Some(BaseTree::new(vec![1], vec![], None, BasicMerkNode).unwrap()), - ); - tree.commit(&mut NoopCommit {}, &|_, _| Ok(0)) - .unwrap() - .unwrap(); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 2); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn two_node_left_heavy_tree_trunk_roundtrip() { - // 1 - // / - // 0 - let mut tree = BaseTree::new(vec![1], vec![], None, BasicMerkNode) - .unwrap() - .attach( - true, - Some(BaseTree::new(vec![0], vec![], None, BasicMerkNode).unwrap()), - ); - tree.commit(&mut NoopCommit {}, &|_, _| Ok(0)) - .unwrap() - .unwrap(); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 2); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn three_node_tree_trunk_roundtrip() { - // 1 - // / \ - // 0 2 - let mut tree = BaseTree::new(vec![1], vec![], None, BasicMerkNode) - .unwrap() - .attach( - true, - Some(BaseTree::new(vec![0], vec![], None, BasicMerkNode).unwrap()), - ) - .attach( - false, - Some(BaseTree::new(vec![2], vec![], None, BasicMerkNode).unwrap()), - ); - tree.commit(&mut NoopCommit {}, &|_, _| Ok(0)) - .unwrap() - .unwrap(); - - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 3); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn leaf_chunk_roundtrip() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..31); - merk.apply::<_, Vec<_>>(batch.as_slice(), &[], None) - .unwrap() - .unwrap(); - - merk.commit(); - - let root_node = merk.tree.take(); - let root_key = root_node.as_ref().unwrap().key().to_vec(); - merk.tree.set(root_node); - - // whole tree as 1 leaf - let mut iter = merk.storage.raw_iter(); - iter.seek_to_first().unwrap(); - let chunk = get_next_chunk(&mut iter, None).unwrap().unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf(ops, merk.root_hash().unwrap()) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 31); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - drop(iter); - - let mut iter = merk.storage.raw_iter(); - iter.seek_to_first().unwrap(); - - // left leaf - let chunk = get_next_chunk(&mut iter, Some(root_key.as_slice())) - .unwrap() - .unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf( - ops, - [ - 78, 230, 25, 188, 163, 2, 169, 185, 254, 174, 196, 206, 162, 187, 245, 188, 74, 70, - 220, 160, 35, 78, 120, 122, 61, 90, 241, 105, 35, 180, 133, 98, - ], - ) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 15); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - - // right leaf - let chunk = get_next_chunk(&mut iter, None).unwrap().unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf( - ops, - [ - 21, 147, 223, 29, 106, 19, 23, 38, 233, 134, 245, 44, 246, 179, 48, 19, 111, 50, - 19, 191, 134, 37, 165, 5, 35, 111, 233, 213, 212, 5, 92, 45, - ], - ) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 15); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - } -} +pub mod util; diff --git a/merk/src/proofs/chunk/binary_range.rs b/merk/src/proofs/chunk/binary_range.rs new file mode 100644 index 00000000..2acaa728 --- /dev/null +++ b/merk/src/proofs/chunk/binary_range.rs @@ -0,0 +1,239 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +const LEFT: bool = true; +const RIGHT: bool = false; + +/// Utility type for range bisection and advancement +#[derive(Debug)] +pub(crate) struct BinaryRange { + start: usize, + end: usize, +} + +impl BinaryRange { + /// Returns a new BinaryRange and ensures that start < end + /// and min start value is 1 + pub fn new(start: usize, end: usize) -> Result { + // start should be less than or equal to end + if start > end { + return Err(String::from("start value cannot be greater than end value")); + } + + // the minimum value for start should be 1 + // that way the length of the maximum length + // of the range is usize::MAX and not + // usize::MAX + 1 + if start < 1 { + return Err(String::from( + "minimum start value should be 1 to avoid len overflow", + )); + } + + Ok(Self { start, end }) + } + + /// Returns the len of the current range + pub fn len(&self) -> usize { + self.end - self.start + 1 + } + + /// Returns true when the len of the range is odd + pub fn odd(&self) -> bool { + (self.len() % 2) != 0 + } + + /// Determines if a value belongs to the left half or right half of a range + /// returns true for left and false for right + /// returns None if value is outside the range or range len is odd + pub fn which_half(&self, value: usize) -> Option { + // return None if value is not in the range + if value < self.start || value > self.end { + return None; + } + + // can't divide the range into equal halves + // when odd, so return None + if self.odd() { + return None; + } + + let half_size = self.len() / 2; + let second_half_start = self.start + half_size; + + if value >= second_half_start { + return Some(RIGHT); + } + + Some(LEFT) + } + + /// Returns a new range that only contains elements on the specified half + /// returns an error if range is not odd + pub fn get_half(&self, left: bool) -> Result { + if self.odd() { + return Err(String::from("cannot break odd range in half")); + } + + let half_size = self.len() / 2; + let second_half_start = self.start + half_size; + + Ok(if left { + Self { + start: self.start, + end: second_half_start - 1, + } + } else { + Self { + start: second_half_start, + end: self.end, + } + }) + } + + /// Returns a new range that increments the start value + /// also return the previous start value + /// returns an error if the operation will cause start to be larger than end + pub fn advance_range_start(&self) -> Result<(Self, usize), String> { + // check if operation will cause start > end + if self.start == self.end { + return Err(String::from( + "can't advance start when start is equal to end", + )); + } + + Ok(( + Self { + start: self.start + 1, + end: self.end, + }, + self.start, + )) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn cannot_create_invalid_range() { + let invalid_range = BinaryRange::new(5, 3); + assert!(invalid_range.is_err()); + } + + #[test] + fn can_get_range_len() { + let range = BinaryRange::new(2, 5).expect("should create range"); + assert_eq!(range.len(), 4); + assert!(!range.odd()); + + let range = BinaryRange::new(2, 2).expect("should create range"); + assert_eq!(range.len(), 1); + assert!(range.odd()); + } + + #[test] + fn can_determine_correct_half() { + let range = BinaryRange::new(3, 7).expect("should create range"); + assert_eq!(range.len(), 5); + assert!(range.odd()); + + // cannot determine half for value outside a range + assert!(range.which_half(1).is_none()); + assert!(range.which_half(7).is_none()); + + // cannot determine half when range is odd + assert!(range.which_half(3).is_none()); + + let range = BinaryRange::new(3, 6).expect("should create range"); + assert_eq!(range.len(), 4); + assert!(!range.odd()); + + assert_eq!(range.which_half(3), Some(LEFT)); + assert_eq!(range.which_half(4), Some(LEFT)); + assert_eq!(range.which_half(5), Some(RIGHT)); + assert_eq!(range.which_half(6), Some(RIGHT)); + } + + #[test] + fn can_advance_start_range() { + let range = BinaryRange::new(2, 5).expect("should create range"); + assert_eq!(range.len(), 4); + assert_eq!(range.start, 2); + + // advance the range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 2); + assert_eq!(range.len(), 3); + assert_eq!(range.start, 3); + + // advance range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 3); + assert_eq!(range.len(), 2); + assert_eq!(range.start, 4); + + // advance range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 4); + assert_eq!(range.len(), 1); + assert_eq!(range.start, 5); + + // should not be allowed to advance the range anymore + let advance_result = range.advance_range_start(); + assert!(advance_result.is_err()); + } + + #[test] + fn can_break_range_into_halves() { + let range = BinaryRange::new(2, 10).expect("should create range"); + assert_eq!(range.len(), 9); + assert!(range.odd()); + assert!(range.get_half(LEFT).is_err()); + + let range = BinaryRange::new(2, 11).expect("should create range"); + assert_eq!(range.len(), 10); + assert!(!range.odd()); + + let left_range = range.get_half(LEFT).expect("should get sub range"); + assert_eq!(left_range.start, 2); + assert_eq!(left_range.end, 6); + + let right_range = range.get_half(RIGHT).expect("should get sub range"); + assert_eq!(right_range.start, 7); + assert_eq!(right_range.end, 11); + + // right_range is false, advance to make even + let (right_range, _prev) = right_range.advance_range_start().expect("should advance"); + let right_left_range = right_range.get_half(LEFT).expect("should get sub range"); + assert_eq!(right_left_range.len(), 2); + assert_eq!(right_left_range.start, 8); + assert_eq!(right_left_range.end, 9); + } +} diff --git a/merk/src/proofs/chunk/chunk.rs b/merk/src/proofs/chunk/chunk.rs new file mode 100644 index 00000000..95d888ec --- /dev/null +++ b/merk/src/proofs/chunk/chunk.rs @@ -0,0 +1,662 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost}; + +// TODO: add copyright comment +use crate::proofs::{Node, Op, Tree}; +use crate::{ + proofs::{chunk::error::ChunkError, tree::execute}, + tree::{kv::ValueDefinedCostType, Fetch, RefWalker}, + CryptoHash, Error, +}; + +pub const LEFT: bool = true; +pub const RIGHT: bool = false; + +impl<'a, S> RefWalker<'a, S> +where + S: Fetch + Sized + Clone, +{ + /// Returns a chunk of a given depth from a RefWalker + pub fn create_chunk(&mut self, depth: usize) -> Result, Error> { + // build the proof vector + let mut proof = vec![]; + + self.create_chunk_internal(&mut proof, depth)?; + + Ok(proof) + } + + fn create_chunk_internal( + &mut self, + proof: &mut Vec, + remaining_depth: usize, + ) -> Result<(), Error> { + // at some point we will reach the depth + // here we need to put the node hash + if remaining_depth == 0 { + proof.push(Op::Push(self.to_hash_node().unwrap())); + return Ok(()); + } + + // traverse left + let has_left_child = self.tree().link(true).is_some(); + if has_left_child { + let mut left = self + .walk(true, None::<&fn(&[u8]) -> Option>) + .unwrap()? + .expect("confirmed is some"); + left.create_chunk_internal(proof, remaining_depth - 1)?; + } + + // add current node's data + proof.push(Op::Push(self.to_kv_value_hash_feature_type_node())); + + if has_left_child { + proof.push(Op::Parent); + } + + // traverse right + if let Some(mut right) = self + .walk(false, None::<&fn(&[u8]) -> Option>) + .unwrap()? + { + right.create_chunk_internal(proof, remaining_depth - 1)?; + + proof.push(Op::Child); + } + + Ok(()) + } + + /// Returns a chunk of a given depth after applying some traversal + /// instruction to the RefWalker + pub fn traverse_and_build_chunk( + &mut self, + instructions: &[bool], + depth: usize, + ) -> Result, Error> { + // base case + if instructions.is_empty() { + // we are at the desired node + return self.create_chunk(depth); + } + + // link must exist + let has_link = self.tree().link(instructions[0]).is_some(); + if !has_link { + return Err(Error::ChunkingError(ChunkError::BadTraversalInstruction( + "no node found at given traversal instruction", + ))); + } + + // grab child + let mut child = self + .walk( + instructions[0], + None::<&fn(&[u8]) -> Option>, + ) + .unwrap()? + .expect("confirmed link exists so cannot be none"); + + // recurse on child + child.traverse_and_build_chunk(&instructions[1..], depth) + } + + /// Returns the smallest amount of tree ops, that can convince + /// a verifier of the tree height + /// the generated subtree is of this form + /// kv_hash + /// / \ + /// kv_hash node_hash + /// / \ + /// kv_hash node_hash + /// . + /// . + /// . + pub fn generate_height_proof(&mut self, proof: &mut Vec) -> CostResult<(), Error> { + // TODO: look into making height proofs more efficient + // they will always be used in the context of some + // existing chunk, we don't want to repeat nodes unnecessarily + let mut cost = OperationCost::default(); + + let maybe_left = cost_return_on_error!( + &mut cost, + self.walk(LEFT, None::<&fn(&[u8]) -> Option>) + ); + let has_left_child = maybe_left.is_some(); + + // recurse to leftmost element + if let Some(mut left) = maybe_left { + cost_return_on_error!(&mut cost, left.generate_height_proof(proof)) + } + + proof.push(Op::Push(self.to_kvhash_node())); + + if has_left_child { + proof.push(Op::Parent); + } + + if let Some(right) = self.tree().link(RIGHT) { + proof.push(Op::Push(Node::Hash(*right.hash()))); + proof.push(Op::Child); + } + + Ok(()).wrap_with_cost(cost) + } +} + +// TODO: add documentation +pub fn verify_height_proof(proof: Vec, expected_root_hash: CryptoHash) -> Result { + // todo: remove unwrap + let height_proof_tree = execute(proof.into_iter().map(Ok), false, |_| Ok(())).unwrap()?; + + // todo: deal with cost + // todo: deal with old chunk restoring error + if height_proof_tree.hash().unwrap() != expected_root_hash { + return Err(Error::OldChunkRestoringError( + "invalid height proof: root hash mismatch".to_string(), + )); + } + + verify_height_tree(&height_proof_tree) +} + +// TODO: add documentation +pub fn verify_height_tree(height_proof_tree: &Tree) -> Result { + return Ok(match height_proof_tree.child(LEFT) { + Some(child) => { + if !matches!(child.tree.node, Node::KVHash(..)) { + // todo deal with old chunk restoring error + return Err(Error::OldChunkRestoringError( + "Expected left nodes in height proofs to be kvhash nodes".to_string(), + )); + } + verify_height_tree(&child.tree)? + 1 + } + None => 1, + }); +} + +#[cfg(test)] +pub mod tests { + use ed::Encode; + + use crate::{ + proofs::{ + chunk::chunk::{verify_height_proof, LEFT, RIGHT}, + tree::execute, + Node, Op, + }, + test_utils::make_tree_seq_with_start_key, + tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, + PanicSource, TreeFeatureType, + }; + + fn build_tree_10_nodes() -> TreeNode { + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + make_tree_seq_with_start_key(10, [0; 8].to_vec()) + } + + /// Traverses a tree to a certain node and returns the node hash of that + /// node + pub fn traverse_get_node_hash( + walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_hash_node().unwrap() + }) + } + + /// Traverses a tree to a certain node and returns the kv_feature_type of + /// that node + pub fn traverse_get_kv_feature_type( + walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_kv_value_hash_feature_type_node() + }) + } + /// Traverses a tree to a certain node and returns the kv_hash of + /// that node + pub fn traverse_get_kv_hash( + walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_kvhash_node() + }) + } + + /// Traverses a tree to a certain node and returns the result of applying + /// some arbitrary function + pub fn traverse_and_apply( + walker: &mut RefWalker, + traverse_instructions: &[bool], + apply_fn: T, + ) -> Node + where + T: Fn(&mut RefWalker) -> Node, + { + if traverse_instructions.is_empty() { + return apply_fn(walker); + } + + let mut child = walker + .walk( + traverse_instructions[0], + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap() + .unwrap(); + traverse_and_apply(&mut child, &traverse_instructions[1..], apply_fn) + } + + #[test] + fn build_chunk_from_root_depth_0() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // should return the node hash of the root node + let chunk = tree_walker.create_chunk(0).expect("should build chunk"); + assert_eq!(chunk.len(), 1); + assert_eq!( + chunk[0], + Op::Push(traverse_get_node_hash(&mut tree_walker, &[])) + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_1() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for depth 1 + // expected: + // 3 + // / \ + // Hash(1) Hash(7) + let chunk = tree_walker.create_chunk(1).expect("should build chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT])), + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_3() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for depth 3 + // expected: + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // H(4) H(6) H(9) + let chunk = tree_walker.create_chunk(3).expect("should build chunk"); + assert_eq!(chunk.len(), 19); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_max_depth() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for entire tree (depth 4) + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + let chunk = tree_walker.create_chunk(4).expect("should build chunk"); + assert_eq!(chunk.len(), 19); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn chunk_greater_than_max_should_equal_max_depth() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk with depth greater than tree + // we should get the same result as building with the exact depth + let large_depth_chunk = tree_walker.create_chunk(100).expect("should build chunk"); + let exact_depth_chunk = tree_walker.create_chunk(4).expect("should build chunk"); + assert_eq!(large_depth_chunk, exact_depth_chunk); + + let tree_a = execute(large_depth_chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + let tree_b = execute(exact_depth_chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree_a.hash().unwrap(), tree_b.hash().unwrap()); + } + + #[test] + fn build_chunk_after_traversal_depth_2() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // traverse to the right first then build chunk + // expected + // 7 + // / \ + // 5 8 + // / \ \ + // H(4) H(6) H(9) + + // right traversal + let chunk = tree_walker + .traverse_and_build_chunk(&[RIGHT], 2) + .expect("should build chunk"); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + ] + ); + + // the hash of the tree computed from the chunk + // should be the same as the node_hash of the element + // on the right + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!( + Node::Hash(computed_tree.hash().unwrap()), + traverse_get_node_hash(&mut tree_walker, &[RIGHT]) + ); + } + + #[test] + fn build_chunk_after_traversal_depth_1() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // traverse with [right, left] and then build chunk of depth 1 + // expected + // 5 + // / \ + // H(4) H(6) + + // instruction traversal + let chunk = tree_walker + .traverse_and_build_chunk(&[RIGHT, LEFT], 1) + .expect("should build chunk"); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!( + Node::Hash(computed_tree.hash().unwrap()), + traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT]) + ); + } + + #[test] + fn test_chunk_encoding() { + let chunk = vec![ + Op::Push(Node::Hash([0; 32])), + Op::Push(Node::KVValueHashFeatureType( + vec![1], + vec![2], + [0; 32], + TreeFeatureType::BasicMerkNode, + )), + ]; + let encoded_chunk = chunk.encode().expect("should encode"); + assert_eq!(encoded_chunk.len(), 33 + 39); + assert_eq!( + encoded_chunk.len(), + chunk.encoding_length().expect("should get encoding length") + ); + } + + #[test] + fn test_height_proof_generation() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + let mut height_proof = vec![]; + tree_walker + .generate_height_proof(&mut height_proof) + .unwrap() + .expect("should generate height proof"); + + assert_eq!(height_proof.len(), 9); + assert_eq!( + height_proof, + vec![ + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT])), + Op::Child, + ] + ); + } + + #[test] + fn test_height_proof_verification() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + let mut height_proof = vec![]; + tree_walker + .generate_height_proof(&mut height_proof) + .unwrap() + .expect("should generate height proof"); + + let verified_height = verify_height_proof(height_proof, tree.hash().unwrap()) + .expect("should verify height proof"); + + // doesn't represent the max height of the tree + assert_eq!(verified_height, 3); + } +} diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs new file mode 100644 index 00000000..6d0d08cd --- /dev/null +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -0,0 +1,169 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +use std::io::{Read, Write}; + +use ed::{Decode, Encode}; +use integer_encoding::{VarInt, VarIntReader}; + +use crate::proofs::Op; + +/// Represents the chunk generated from a given starting chunk id +#[derive(PartialEq, Debug)] +pub enum ChunkOp { + ChunkId(Vec), + Chunk(Vec), +} + +impl Encode for ChunkOp { + fn encode_into(&self, dest: &mut W) -> ed::Result<()> { + match self { + Self::ChunkId(instruction) => { + // write the marker then the len + let _ = dest.write_all(&[0_u8]); + dest.write_all(instruction.len().encode_var_vec().as_slice())?; + let instruction_as_binary: Vec = instruction + .iter() + .map(|v| if *v { 1_u8 } else { 0_u8 }) + .collect(); + dest.write_all(&instruction_as_binary)?; + } + Self::Chunk(chunk) => { + let _ = dest.write_all(&[1_u8]); + // chunk len represents the number of ops not the total encoding len of ops + dest.write_all(chunk.len().encode_var_vec().as_slice())?; + for op in chunk { + dest.write_all(&op.encode()?)?; + } + } + } + + Ok(()) + } + + fn encoding_length(&self) -> ed::Result { + Ok(match self { + Self::ChunkId(instruction) => { + 1 + instruction.len().encode_var_vec().len() + instruction.len() + } + Self::Chunk(chunk) => { + 1 + chunk.len().encode_var_vec().len() + chunk.encoding_length()? + } + }) + } +} + +impl Decode for ChunkOp { + fn decode(input: R) -> ed::Result { + let mut chunk_op = ChunkOp::ChunkId(vec![]); + Self::decode_into(&mut chunk_op, input)?; + Ok(chunk_op) + } + + fn decode_into(&mut self, mut input: R) -> ed::Result<()> { + let mut marker = [0_u8; 1]; + input.read_exact(&mut marker)?; + + match marker[0] { + 0 => { + let length = input.read_varint()?; + let mut instruction_as_binary = vec![0_u8; length]; + input.read_exact(&mut instruction_as_binary)?; + + let instruction: Vec = instruction_as_binary + .into_iter() + .map(|v| v == 1_u8) + .collect(); + + *self = ChunkOp::ChunkId(instruction); + } + 1 => { + let ops_length = input.read_varint()?; + let mut chunk = Vec::with_capacity(ops_length); + + for _ in 0..ops_length { + let op = Decode::decode(&mut input)?; + chunk.push(op); + } + + *self = ChunkOp::Chunk(chunk); + } + _ => return Err(ed::Error::UnexpectedByte(marker[0])), + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use ed::{Decode, Encode}; + + use crate::proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + chunk_op::ChunkOp, + }, + Node, Op, + }; + + #[test] + fn test_chunk_op_encoding() { + let chunk_op = ChunkOp::ChunkId(vec![LEFT, RIGHT]); + let encoded_chunk_op = chunk_op.encode().unwrap(); + assert_eq!(encoded_chunk_op, vec![0, 2, 1, 0]); + assert_eq!(encoded_chunk_op.len(), chunk_op.encoding_length().unwrap()); + + let chunk_op = ChunkOp::Chunk(vec![Op::Push(Node::Hash([0; 32])), Op::Child]); + let encoded_chunk_op = chunk_op.encode().unwrap(); + let mut expected_encoding = vec![1, 2]; + expected_encoding.extend(Op::Push(Node::Hash([0; 32])).encode().unwrap()); + expected_encoding.extend(Op::Child.encode().unwrap()); + assert_eq!(encoded_chunk_op, expected_encoding); + assert_eq!(encoded_chunk_op.len(), chunk_op.encoding_length().unwrap()); + } + + #[test] + fn test_chunk_op_decoding() { + let encoded_chunk_op = vec![0, 3, 1, 0, 1]; + let decoded_chunk_op = ChunkOp::decode(encoded_chunk_op.as_slice()).unwrap(); + assert_eq!(decoded_chunk_op, ChunkOp::ChunkId(vec![LEFT, RIGHT, LEFT])); + + let mut encoded_chunk_op = vec![1, 2]; + encoded_chunk_op.extend(Op::Push(Node::Hash([1; 32])).encode().unwrap()); + encoded_chunk_op.extend(Op::Push(Node::KV(vec![1], vec![2])).encode().unwrap()); + let decoded_chunk_op = ChunkOp::decode(encoded_chunk_op.as_slice()).unwrap(); + assert_eq!( + decoded_chunk_op, + ChunkOp::Chunk(vec![ + Op::Push(Node::Hash([1; 32])), + Op::Push(Node::KV(vec![1], vec![2])) + ]) + ); + } +} diff --git a/merk/src/proofs/chunk/error.rs b/merk/src/proofs/chunk/error.rs new file mode 100644 index 00000000..bd482666 --- /dev/null +++ b/merk/src/proofs/chunk/error.rs @@ -0,0 +1,79 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +#[derive(Debug, thiserror::Error)] +/// Chunk related errors +pub enum ChunkError { + /// Limit too small for first chunk, cannot make progress + #[error("overflow error {0}")] + LimitTooSmall(&'static str), + + /// Chunk index out of bounds + #[error("chunk index out of bounds: {0}")] + OutOfBounds(&'static str), + + /// Empty tree contains no chunks + #[error("chunk from empty tree: {0}")] + EmptyTree(&'static str), + + /// Invalid traversal instruction (points to no element) + #[error("traversal instruction invalid {0}")] + BadTraversalInstruction(&'static str), + + /// Expected ChunkId when parsing chunk ops + #[error("expected chunk id when parsing chunk op")] + ExpectedChunkId, + + /// Expected Chunk when parsing chunk ops + #[error("expected chunk when parsing chunk op")] + ExpectedChunk, + + // Restoration Errors + /// Chunk restoration starts from the root chunk, this lead to a set of + /// root hash values to verify other chunks .... + /// Hence before you can verify a child you need to have verified it's + /// parent. + #[error("unexpected chunk: cannot verify chunk because verification hash is not in memory")] + UnexpectedChunk, + + /// Invalid chunk proof when verifying chunk + #[error("invalid chunk proof: {0}")] + InvalidChunkProof(&'static str), + + /// Invalid multi chunk + #[error("invalid multi chunk: {0}")] + InvalidMultiChunk(&'static str), + + #[error("called finalize too early still expecting chunks")] + RestorationNotComplete, + + /// Internal error, this should never surface + /// if it does, it means wrong assumption in code + #[error("internal error {0}")] + InternalError(&'static str), +} diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs new file mode 100644 index 00000000..2f64ba8d --- /dev/null +++ b/merk/src/proofs/chunk/util.rs @@ -0,0 +1,700 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//! Collection of state independent algorithms needed for facilitate chunk +//! production and restoration + +use std::io::Write; + +// TODO: figure out better nomenclature +use crate::{proofs::chunk::binary_range::BinaryRange, Error}; +use crate::{ + proofs::chunk::{ + chunk::{LEFT, RIGHT}, + error::{ChunkError, ChunkError::BadTraversalInstruction}, + }, + Error::InternalError, +}; + +/// Represents the height as a linear combination of 3 amd 2 +/// of the form 3x + 2y +/// this breaks the tree into layers of height 3 or 2 +/// the minimum chunk height is 2, so if tree height is less than 2 +/// we just return a single layer of height 2 +fn chunk_height_per_layer(height: usize) -> Vec { + let mut two_count = 0; + let mut three_count = height / 3; + + if height == 0 { + return vec![]; + } + + // minimum chunk height is 2, if tree height is less than 2 + // return a single layer with chunk height 2 + if height < 2 { + two_count = 1; + } else { + match height % 3 { + 0 => { /* do nothing */ } + 1 => { + // reduce the three_count by 1 + // so the remainder becomes 3 + 1 + // which is equivalent to 2 + 2 + three_count -= 1; + two_count += 2; + } + 2 => { + // remainder is a factor of 2 + // just increase the two_count + two_count += 1; + } + // this is unreachable because height is a positive number + // remainder set after diving by 3 is fixed to [0,1,2] + _ => unreachable!(""), + } + } + + let mut layer_heights = vec![3; three_count]; + layer_heights.extend(vec![2; two_count]); + + layer_heights +} + +/// Return the layer a chunk subtree belongs to +pub fn chunk_layer(height: usize, chunk_id: usize) -> Result { + // remaining depth tells us how deep in the tree the specified chunk is + let mut remaining_depth = generate_traversal_instruction(height, chunk_id)?.len() + 1; + let layer_heights = chunk_height_per_layer(height); + + let mut layer = 1; + + while remaining_depth > 1 { + // remaining depth will always larger than the next layer height + // if it is not already 1 + // this is because a every chunk always starts at a layer boundary + // and remaining depth points to a chunk + debug_assert!(remaining_depth > layer_heights[layer - 1]); + + remaining_depth -= layer_heights[layer - 1]; + layer += 1; + } + + Ok(layer - 1) +} + +/// Return the depth of a chunk given the height +/// and chunk id +pub fn chunk_height(height: usize, chunk_id: usize) -> Result { + let chunk_layer = chunk_layer(height, chunk_id)?; + let layer_heights = chunk_height_per_layer(height); + + Ok(layer_heights[chunk_layer]) +} + +/// Given a tree of height h, return the number of chunks needed +/// to completely represent the tree +pub fn number_of_chunks(height: usize) -> usize { + let layer_heights = chunk_height_per_layer(height); + number_of_chunks_internal(layer_heights) +} + +/// Locates the subtree represented by a chunk id and returns +/// the number of chunks under that subtree +pub fn number_of_chunks_under_chunk_id(height: usize, chunk_id: usize) -> Result { + let chunk_layer = chunk_layer(height, chunk_id)?; + let layer_heights = chunk_height_per_layer(height); + + // we only care about the layer heights after the chunk layer + // as we are getting the number of chunks under a subtree and not + // the entire tree of height h + Ok(number_of_chunks_internal( + layer_heights[chunk_layer..].to_vec(), + )) +} + +/// Given the heights of a tree per layer, return the total number of chunks in +/// that tree +fn number_of_chunks_internal(layer_heights: Vec) -> usize { + // a layer consists of 1 or more subtrees of a given height + // here we figure out number of exit nodes from a single subtree for each layer + let mut single_subtree_exits_per_layer = layer_heights + .into_iter() + .map(exit_node_count) + .collect::>(); + + // we don't care about exit nodes from the last layer + // as that points to non-existent subtrees + single_subtree_exits_per_layer.pop(); + + // now we get the total exit nodes per layer + // by multiplying the exits per subtree with the number of subtrees on that + // layer + let mut chunk_counts_per_layer = vec![1]; + for i in 0..single_subtree_exits_per_layer.len() { + let previous_layer_chunk_count = chunk_counts_per_layer[i]; + let current_layer_chunk_count = + previous_layer_chunk_count * single_subtree_exits_per_layer[i]; + chunk_counts_per_layer.push(current_layer_chunk_count); + } + + chunk_counts_per_layer.into_iter().sum() +} + +/// Calculates the maximum number of exit nodes for a tree of height h. +fn exit_node_count(height: usize) -> usize { + 2_usize.pow(height as u32) +} + +/// Generate instruction for traversing to a given chunk in a binary tree +pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result, Error> { + let mut instructions = vec![]; + + let total_chunk_count = number_of_chunks(height); + + // out of bounds + if chunk_id < 1 || chunk_id > total_chunk_count { + return Err(Error::ChunkingError(ChunkError::OutOfBounds( + "chunk id out of bounds", + ))); + } + + let mut chunk_range = BinaryRange::new(1, total_chunk_count).map_err(|_| { + Error::ChunkingError(ChunkError::InternalError( + "failed to initialize chunk range", + )) + })?; + + // total chunk count will always be odd because + // from the initial chunk (1) we have an even number of + // exit nodes, and they have even numbers of exit nodes ... + // so total_chunk_count = 1 + some_even_number = odd + debug_assert!(chunk_range.odd()); + + // bisect and reduce the chunk range until we get to the desired chunk + // we keep track of every left right decision we make + while chunk_range.len() > 1 { + if chunk_range.odd() { + // checks if we last decision we made got us to the desired chunk id + let advance_result = chunk_range.advance_range_start().unwrap(); + chunk_range = advance_result.0; + if advance_result.1 == chunk_id { + return Ok(instructions); + } + } else { + // for even chunk range, we are at the decision point + // we can either go left or right + // we first check which half the desired chunk is + // then follow that path + let chunk_id_half = chunk_range + .which_half(chunk_id) + .expect("chunk id must exist in range"); + instructions.push(chunk_id_half); + chunk_range = chunk_range + .get_half(chunk_id_half) + .expect("confirmed range is not odd"); + } + } + + // chunk range len is exactly 1 + // this must be the desired chunk id + // return instructions that got us here + Ok(instructions) +} + +/// Determine the chunk id given the traversal instruction and the max height of +/// the tree +pub fn chunk_id_from_traversal_instruction( + traversal_instruction: &[bool], + height: usize, +) -> Result { + // empty traversal instruction points to the first chunk + if traversal_instruction.is_empty() { + return Ok(1); + } + + let mut chunk_count = number_of_chunks(height); + let mut current_chunk_id = 1; + + let mut layer_heights = chunk_height_per_layer(height); + let last_layer_height = layer_heights.pop().expect("confirmed not empty"); + + // traversal instructions should only point to the root node of chunks (chunk + // boundaries) the layer heights represent the height of each chunk layer + // the last chunk layer is at height = total_height - last_chunk_height + 1 + // traversal instructions require 1 less than height to address it + // e.g. height 1 is represented by [] - len of 0 + // height 2 is represented by [left] or [right] len of 1 + // therefore last chunk root node is address with total_height - + // last_chunk_height + if traversal_instruction.len() > height - last_layer_height { + return Err(Error::ChunkingError(BadTraversalInstruction( + "traversal instruction should not address nodes past the root of the last layer chunks", + ))); + } + + // verify that the traversal instruction points to a chunk boundary + let mut traversal_length = traversal_instruction.len(); + let mut relevant_layer_heights = vec![]; + for layer_height in layer_heights { + // the traversal_length should be a perfect sum of a subset of the layer_height + // if the traversal_length is not 0, it should be larger than or equal to the + // next layer height. + if traversal_length < layer_height { + return Err(Error::ChunkingError(BadTraversalInstruction( + "traversal instruction should point to a chunk boundary", + ))); + } + + traversal_length -= layer_height; + relevant_layer_heights.push(layer_height); + + if traversal_length == 0 { + break; + } + } + + // take layer_height instructions and determine the updated chunk id + let mut start_index = 0; + for layer_height in relevant_layer_heights { + let end_index = start_index + layer_height; + let subset_instructions = &traversal_instruction[start_index..end_index]; + + // offset multiplier determines what subchunk we are on based on the given + // instruction offset multiplier just converts the binary instruction to + // decimal, taking left as 0 and right as 0 i.e [left, left, left] = 0 + // means we are at subchunk 0 + let mut offset_multiplier = 0; + for (i, instruction) in subset_instructions.iter().enumerate() { + offset_multiplier += 2_usize.pow((subset_instructions.len() - i - 1) as u32) + * (1 - *instruction as usize); + } + + if chunk_count % 2 != 0 { + // remove the current chunk from the chunk count + chunk_count -= 1; + } + + chunk_count /= exit_node_count(layer_height); + + current_chunk_id = current_chunk_id + offset_multiplier * chunk_count + 1; + + start_index = end_index; + } + + Ok(current_chunk_id) +} + +/// Determine the chunk id given the traversal instruction and the max height of +/// the tree. This can recover from traversal instructions not pointing to a +/// chunk boundary, in such a case, it backtracks until it hits a chunk +/// boundary. +pub fn chunk_id_from_traversal_instruction_with_recovery( + traversal_instruction: &[bool], + height: usize, +) -> Result { + let chunk_id_result = chunk_id_from_traversal_instruction(traversal_instruction, height); + if chunk_id_result.is_err() { + return chunk_id_from_traversal_instruction_with_recovery( + &traversal_instruction[0..traversal_instruction.len() - 1], + height, + ); + } + chunk_id_result +} + +/// Generate instruction for traversing to a given chunk in a binary tree, +/// returns string representation +pub fn generate_traversal_instruction_as_string( + height: usize, + chunk_id: usize, +) -> Result { + let instruction = generate_traversal_instruction(height, chunk_id)?; + Ok(traversal_instruction_as_string(&instruction)) +} + +/// Convert traversal instruction to byte string +/// 1 represents left (true) +/// 0 represents right (false) +pub fn traversal_instruction_as_string(instruction: &[bool]) -> String { + instruction + .iter() + .map(|v| if *v { "1" } else { "0" }) + .collect() +} + +/// Converts a string that represents a traversal instruction +/// to a vec of bool, true = left and false = right +pub fn string_as_traversal_instruction(instruction_string: &str) -> Result, Error> { + instruction_string + .chars() + .map(|char| match char { + '1' => Ok(LEFT), + '0' => Ok(RIGHT), + _ => Err(Error::ChunkingError(ChunkError::BadTraversalInstruction( + "failed to parse instruction string", + ))), + }) + .collect() +} + +pub fn write_to_vec(dest: &mut W, value: &[u8]) -> Result<(), Error> { + dest.write_all(value) + .map_err(|_e| InternalError("failed to write to vector")) +} + +#[cfg(test)] +mod test { + + use super::*; + use crate::proofs::chunk::chunk::{LEFT, RIGHT}; + + #[test] + fn test_chunk_height_per_layer() { + let layer_heights = chunk_height_per_layer(10); + assert_eq!(layer_heights.iter().sum::(), 10); + assert_eq!(layer_heights, [3, 3, 2, 2]); + + let layer_heights = chunk_height_per_layer(45); + assert_eq!(layer_heights.iter().sum::(), 45); + assert_eq!(layer_heights, [3; 15]); + + let layer_heights = chunk_height_per_layer(2); + assert_eq!(layer_heights.iter().sum::(), 2); + assert_eq!(layer_heights, [2]); + + // height less than 2 + let layer_heights = chunk_height_per_layer(1); + assert_eq!(layer_heights.iter().sum::(), 2); + assert_eq!(layer_heights, [2]); + + let layer_heights = chunk_height_per_layer(0); + assert_eq!(layer_heights.iter().sum::(), 0); + assert_eq!(layer_heights, Vec::::new()); + } + + #[test] + fn test_exit_node_count() { + // tree with just one node has 2 exit nodes + assert_eq!(exit_node_count(1), 2); + + // tree with height 2 has 4 exit nodes + assert_eq!(exit_node_count(2), 4); + + // tree with height 6 has 64 exit nodes + assert_eq!(exit_node_count(6), 64); + } + + #[test] + fn test_number_of_chunks() { + // given a chunk of height less than 3 chunk count should be 1 + assert_eq!(number_of_chunks(1), 1); + assert_eq!(number_of_chunks(2), 1); + + // tree with height 4 should have 5 chunks + // we split the tree into 2 layers of chunk height 2 each + // first layer contains just one chunk (1), but has 4 exit nodes + // hence total chunk count = 1 + 4 = 5 + assert_eq!(number_of_chunks(4), 5); + + // tree with height 6 should have 9 chunks + // will be split into two layers of chunk height 3 = [3,3] + // first chunk takes 1, has 2^3 = 8 exit nodes + // total chunks = 1 + 8 = 9 + assert_eq!(number_of_chunks(6), 9); + + // tree with height 10 should have 341 chunks + // will be split into 5 layers = [3, 3, 2, 2] + // first layer has just 1 chunk, exit nodes = 2^3 = 8 + // second layer has 4 chunks, exit nodes = 2^3 * 8 = 64 + // third layer has 16 chunks, exit nodes = 2^2 * 64 = 256 + // fourth layer has 256 chunks + // total chunks = 1 + 8 + 64 + 256 = 329 chunks + assert_eq!(number_of_chunks(10), 329); + } + + #[test] + fn test_number_of_chunks_under_chunk_id() { + // tree with height less than 3 should have just 1 chunk + assert_eq!(number_of_chunks_under_chunk_id(1, 1).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(2, 1).unwrap(), 1); + + // asking for chunk out of bounds should return error + assert!(number_of_chunks_under_chunk_id(1, 3).is_err()); + + // tree with height 4 should have 5 chunks at chunk id 1 + // but 1 chunk at id 2 - 5 + assert_eq!(number_of_chunks_under_chunk_id(4, 1).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(4, 2).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 3).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 4).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 5).unwrap(), 1); + + // tree with height 10 should have 329 chunks + // layer_heights = [3, 3, 2, 2] + // chunk_id 1 = 329 + // chunk_id 2 = 41 i.e (329 - 1) / 2^3 + // chunk_id 3 = 5 i.e (41 - 1) / 2^3 + // chunk_id 4 = 1 i.e (5 - 1) / 2^2 + // chunk_id 5 = 1 on the same layer as 4 + // chunk_id 43 = 41 as chunk 43 should wrap back to the same layer as chunk_id 2 + // chunk_id 44 = mirrors chunk_id 3 + // chunk_id 45 = mirrors chunk_id 4 + // chunk_id 46 = mirrors chunk_id 5 + assert_eq!(number_of_chunks_under_chunk_id(10, 1).unwrap(), 329); + assert_eq!(number_of_chunks_under_chunk_id(10, 2).unwrap(), 41); + assert_eq!(number_of_chunks_under_chunk_id(10, 3).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 4).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 5).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 43).unwrap(), 41); + assert_eq!(number_of_chunks_under_chunk_id(10, 44).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 45).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 46).unwrap(), 1); + } + + #[test] + fn test_traversal_instruction_generation() { + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + // height: 4 + // layer_height: 3, 3 + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // ............................ + // / \ \ + // 4 6 9 + // 5 chunks + // chunk 1 entry - 3 + // chunk 2 entry - 0 + // chunk 3 entry - 2 + // chunk 4 entry - 5 + // chunk 5 entry - 8 + + // chunk 1 entry - 3 is at the top of the tree so empty instruction set + let instruction = + generate_traversal_instruction(4, 1).expect("should generate traversal instruction"); + let empty_instruction: &[bool] = &[]; + assert_eq!(instruction, empty_instruction); + + // chunk 2 entry - 0 + // go left twice from root i.e 3 left -> 1 left -> 0 + let instruction = + generate_traversal_instruction(4, 2).expect("should generate traversal instruction"); + assert_eq!(instruction, &[LEFT, LEFT]); + + // chunk 3 entry - 2 + // go left then right from root i.e 3 left -> 1 right -> 2 + let instruction = + generate_traversal_instruction(4, 3).expect("should generate traversal instruction"); + assert_eq!(instruction, &[LEFT, RIGHT]); + + // chunk 4 entry - 5 + // go right then left i.e 3 right -> 7 left -> 5 + let instruction = + generate_traversal_instruction(4, 4).expect("should generate traversal instruction"); + assert_eq!(instruction, &[RIGHT, LEFT]); + + // chunk 5 entry - 8 + // go right twice i.e 3 right -> 7 right -> 8 + let instruction = + generate_traversal_instruction(4, 5).expect("should generate traversal instruction"); + assert_eq!(instruction, &[RIGHT, RIGHT]); + + // out of bound tests + assert!(generate_traversal_instruction(4, 6).is_err()); + assert!(generate_traversal_instruction(4, 0).is_err()); + } + + #[test] + fn test_chunk_height() { + // tree of height 6 + // all chunks have the same height + // since layer height = [3,3] + // we have 9 chunks in a tree of this height + for i in 1..=9 { + assert_eq!(chunk_height(6, i).unwrap(), 3); + } + + // tree of height 5 + // layer_height = [3, 2] + // we have 9 chunks, just the first chunk is of height 3 + // the rest are of height 2 + assert_eq!(chunk_height(5, 1).unwrap(), 3); + for i in 2..=9 { + assert_eq!(chunk_height(5, i).unwrap(), 2); + } + + // tree of height 10 + // layer_height = [3, 3, 2, 2] + // just going to check chunk 1 - 5 + assert_eq!(chunk_height(10, 1).unwrap(), 3); + assert_eq!(chunk_height(10, 2).unwrap(), 3); + assert_eq!(chunk_height(10, 3).unwrap(), 2); + assert_eq!(chunk_height(10, 4).unwrap(), 2); + assert_eq!(chunk_height(10, 5).unwrap(), 2); + } + + #[test] + fn test_traversal_instruction_as_string() { + assert_eq!(traversal_instruction_as_string(&vec![]), ""); + assert_eq!(traversal_instruction_as_string(&vec![LEFT]), "1"); + assert_eq!(traversal_instruction_as_string(&vec![RIGHT]), "0"); + assert_eq!( + traversal_instruction_as_string(&vec![RIGHT, LEFT, LEFT, RIGHT]), + "0110" + ); + } + + #[test] + fn test_instruction_string_to_traversal_instruction() { + assert_eq!(string_as_traversal_instruction("1").unwrap(), vec![LEFT]); + assert_eq!(string_as_traversal_instruction("0").unwrap(), vec![RIGHT]); + assert_eq!( + string_as_traversal_instruction("001").unwrap(), + vec![RIGHT, RIGHT, LEFT] + ); + assert!(string_as_traversal_instruction("002").is_err()); + assert_eq!( + string_as_traversal_instruction("").unwrap(), + Vec::::new() + ); + } + + #[test] + fn test_chunk_id_from_traversal_instruction() { + // tree of height 4 + let traversal_instruction = generate_traversal_instruction(4, 1).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 1 + ); + let traversal_instruction = generate_traversal_instruction(4, 2).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 2 + ); + let traversal_instruction = generate_traversal_instruction(4, 3).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 3 + ); + let traversal_instruction = generate_traversal_instruction(4, 4).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 4 + ); + + // tree of height 6 + let traversal_instruction = generate_traversal_instruction(6, 1).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 1 + ); + let traversal_instruction = generate_traversal_instruction(6, 2).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 2 + ); + let traversal_instruction = generate_traversal_instruction(6, 3).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 3 + ); + let traversal_instruction = generate_traversal_instruction(6, 4).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 4 + ); + let traversal_instruction = generate_traversal_instruction(6, 5).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 5 + ); + let traversal_instruction = generate_traversal_instruction(6, 6).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 6 + ); + let traversal_instruction = generate_traversal_instruction(6, 7).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 7 + ); + let traversal_instruction = generate_traversal_instruction(6, 8).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 8 + ); + let traversal_instruction = generate_traversal_instruction(6, 9).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 9 + ); + } + + #[test] + fn test_chunk_id_from_traversal_instruction_with_recovery() { + // tree of height 5 + // layer heights = [3, 2] + // first chunk boundary is at instruction len 0 e.g. [] + // second chunk boundary is at instruction len 3 e.g. [left, left, left] + // anything outside of this should return an error with regular chunk_id + // function with recovery we expect this to backtrack to the last chunk + // boundary e.g. [left] should backtrack to [] + // [left, left, right, left] should backtrack to [left, left, right] + assert!(chunk_id_from_traversal_instruction(&[LEFT], 5).is_err()); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT], 5).unwrap(), + 1 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT], 5).unwrap(), + 1 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT, RIGHT], 5).unwrap(), + 3 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT, RIGHT, LEFT], 5) + .unwrap(), + 3 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT; 50], 5).unwrap(), + 2 + ); + } +} diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index 819fd43b..b3bf9cf1 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -43,6 +43,12 @@ use super::{Node, Op}; use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_hash, NULL_HASH}; #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; +#[cfg(feature = "full")] +use crate::{ + proofs::chunk::chunk::{LEFT, RIGHT}, + Link, + TreeFeatureType::SummedMerkNode, +}; #[cfg(any(feature = "full", feature = "verify"))] /// Contains a tree's child node and its hash. The hash can always be assumed to @@ -55,6 +61,36 @@ pub struct Child { pub hash: CryptoHash, } +impl Child { + #[cfg(feature = "full")] + pub fn as_link(&self) -> Link { + let (key, sum) = match &self.tree.node { + Node::KV(key, _) | Node::KVValueHash(key, ..) => (key.as_slice(), None), + Node::KVValueHashFeatureType(key, _, _, feature_type) => { + let sum_value = match feature_type { + SummedMerkNode(sum) => Some(*sum), + _ => None, + }; + (key.as_slice(), sum_value) + } + // for the connection between the trunk and leaf chunks, we don't + // have the child key so we must first write in an empty one. once + // the leaf gets verified, we can write in this key to its parent + _ => (&[] as &[u8], None), + }; + + Link::Reference { + hash: self.hash, + sum, + child_heights: ( + self.tree.child_heights.0 as u8, + self.tree.child_heights.1 as u8, + ), + key: key.to_vec(), + } + } +} + #[cfg(any(feature = "full", feature = "verify"))] /// A binary tree data structure used to represent a select subset of a tree /// when verifying Merkle proofs. @@ -68,6 +104,8 @@ pub struct Tree { pub right: Option, /// Height pub height: usize, + /// Child Heights + pub child_heights: (usize, usize), } #[cfg(any(feature = "full", feature = "verify"))] @@ -79,6 +117,7 @@ impl From for Tree { left: None, right: None, height: 1, + child_heights: (0, 0), } } } @@ -167,6 +206,42 @@ impl Tree { Ok(()) } + #[cfg(feature = "full")] + /// Does an in-order traversal over references to all the nodes in the tree, + /// calling `visit_node` for each with the current traversal path. + pub fn visit_refs_track_traversal_and_parent< + F: FnMut(&Self, &mut Vec, Option<&[u8]>) -> Result<(), Error>, + >( + &self, + base_traversal_instruction: &mut Vec, + parent_key: Option<&[u8]>, + visit_node: &mut F, + ) -> Result<(), Error> { + if let Some(child) = &self.left { + base_traversal_instruction.push(LEFT); + child.tree.visit_refs_track_traversal_and_parent( + base_traversal_instruction, + Some(self.key()), + visit_node, + )?; + base_traversal_instruction.pop(); + } + + visit_node(self, base_traversal_instruction, parent_key)?; + + if let Some(child) = &self.right { + base_traversal_instruction.push(RIGHT); + child.tree.visit_refs_track_traversal_and_parent( + base_traversal_instruction, + Some(self.key()), + visit_node, + )?; + base_traversal_instruction.pop(); + } + + Ok(()) + } + /// Returns an immutable reference to the child on the given side, if any. #[cfg(any(feature = "full", feature = "verify"))] pub const fn child(&self, left: bool) -> Option<&Child> { @@ -202,6 +277,13 @@ impl Tree { self.height = self.height.max(child.height + 1); + // update child height + if left { + self.child_heights.0 = child.height; + } else { + self.child_heights.1 = child.height; + } + let hash = child.hash().unwrap_add_cost(&mut cost); let tree = Box::new(child); *self.child_mut(left) = Some(Child { tree, hash }); @@ -238,13 +320,24 @@ impl Tree { _ => panic!("Expected node to be type KV"), } } + + #[cfg(feature = "full")] + pub(crate) fn sum(&self) -> Option { + match self.node { + Node::KVValueHashFeatureType(.., feature_type) => match feature_type { + SummedMerkNode(sum) => Some(sum), + _ => None, + }, + _ => panic!("Expected node to be type KVValueHashFeatureType"), + } + } } #[cfg(feature = "full")] /// `LayerIter` iterates over the nodes in a `Tree` at a given depth. Nodes are /// visited in order. pub struct LayerIter<'a> { - stack: Vec<&'a Tree>, + stack: Vec<(&'a Tree, usize)>, depth: usize, } @@ -257,25 +350,9 @@ impl<'a> LayerIter<'a> { depth, }; - iter.traverse_to_start(tree, depth); + iter.stack.push((tree, 0)); iter } - - /// Builds up the stack by traversing through left children to the desired - /// depth. - fn traverse_to_start(&mut self, tree: &'a Tree, remaining_depth: usize) { - self.stack.push(tree); - - if remaining_depth == 0 { - return; - } - - if let Some(child) = tree.child(true) { - self.traverse_to_start(&child.tree, remaining_depth - 1) - } else { - panic!("Could not traverse to given layer") - } - } } #[cfg(feature = "full")] @@ -283,32 +360,20 @@ impl<'a> Iterator for LayerIter<'a> { type Item = &'a Tree; fn next(&mut self) -> Option { - let item = self.stack.pop(); - let mut popped = item; - - loop { - if self.stack.is_empty() { - return item; - } - - let parent = self.stack.last().unwrap(); - let left_child = parent.child(true).unwrap(); - let right_child = parent.child(false).unwrap(); - - if left_child.tree.as_ref() == popped.unwrap() { - self.stack.push(&right_child.tree); - - while self.stack.len() - 1 < self.depth { - let parent = self.stack.last().unwrap(); - let left_child = parent.child(true).unwrap(); - self.stack.push(&left_child.tree); + while let Some((item, item_depth)) = self.stack.pop() { + if item_depth != self.depth { + if let Some(right_child) = item.child(false) { + self.stack.push((&right_child.tree, item_depth + 1)) + } + if let Some(left_child) = item.child(true) { + self.stack.push((&left_child.tree, item_depth + 1)) } - - return item; } else { - popped = self.stack.pop(); + return Some(item); } } + + None } } @@ -471,7 +536,19 @@ where .wrap_with_cost(cost); } - Ok(stack.pop().unwrap()).wrap_with_cost(cost) + let tree = stack.pop().unwrap(); + + if tree.child_heights.0.max(tree.child_heights.1) + - tree.child_heights.0.min(tree.child_heights.1) + > 1 + { + return Err(Error::InvalidProofError( + "Expected proof to result in a valid avl tree".to_string(), + )) + .wrap_with_cost(cost); + } + + Ok(tree).wrap_with_cost(cost) } #[cfg(feature = "full")] @@ -555,4 +632,104 @@ mod test { } assert!(iter.next().is_none()); } + + #[test] + fn execute_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + let execution_result = + execute(non_avl_tree_proof.into_iter().map(Ok), false, |_| Ok(())).unwrap(); + assert!(execution_result.is_err()); + } + + #[test] + fn child_to_link() { + let basic_merk_tree = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Child, + ]; + let tree = execute(basic_merk_tree.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + + let left_link = tree.left.as_ref().unwrap().as_link(); + let right_link = tree.right.as_ref().unwrap().as_link(); + + assert_eq!( + left_link, + Link::Reference { + hash: tree.left.as_ref().map(|node| node.hash).unwrap(), + sum: None, + child_heights: (0, 0), + key: vec![1] + } + ); + + assert_eq!( + right_link, + Link::Reference { + hash: tree.right.as_ref().map(|node| node.hash).unwrap(), + sum: None, + child_heights: (0, 0), + key: vec![3] + } + ); + + let sum_merk_tree = vec![ + Op::Push(Node::KVValueHashFeatureType( + vec![1], + vec![1], + [0; 32], + SummedMerkNode(3), + )), + Op::Push(Node::KVValueHashFeatureType( + vec![2], + vec![2], + [0; 32], + SummedMerkNode(1), + )), + Op::Parent, + Op::Push(Node::KVValueHashFeatureType( + vec![3], + vec![3], + [0; 32], + SummedMerkNode(1), + )), + Op::Child, + ]; + let tree = execute(sum_merk_tree.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + + let left_link = tree.left.as_ref().unwrap().as_link(); + let right_link = tree.right.as_ref().unwrap().as_link(); + + assert_eq!( + left_link, + Link::Reference { + hash: tree.left.as_ref().map(|node| node.hash).unwrap(), + sum: Some(3), + child_heights: (0, 0), + key: vec![1] + } + ); + + assert_eq!( + right_link, + Link::Reference { + hash: tree.right.as_ref().map(|node| node.hash).unwrap(), + sum: Some(1), + child_heights: (0, 0), + key: vec![3] + } + ); + } } diff --git a/merk/src/test_utils/mod.rs b/merk/src/test_utils/mod.rs index 6abe167e..49a492e2 100644 --- a/merk/src/test_utils/mod.rs +++ b/merk/src/test_utils/mod.rs @@ -164,6 +164,7 @@ pub fn apply_to_memonly( }) .unwrap() .expect("commit failed"); + println!("{:?}", &tree); assert_tree_invariants(&tree); tree }) @@ -260,7 +261,15 @@ pub fn make_tree_rand( /// Create tree with initial fixed values and apply `node count` Put ops using /// sequential keys using memory only +/// starting tree node is [0; 20] pub fn make_tree_seq(node_count: u64) -> TreeNode { + make_tree_seq_with_start_key(node_count, [0; 20].to_vec()) +} + +/// Create tree with initial fixed values and apply `node count` Put ops using +/// sequential keys using memory only +/// requires a starting key vector +pub fn make_tree_seq_with_start_key(node_count: u64, start_key: Vec) -> TreeNode { let batch_size = if node_count >= 10_000 { assert_eq!(node_count % 10_000, 0); 10_000 @@ -269,7 +278,8 @@ pub fn make_tree_seq(node_count: u64) -> TreeNode { }; let value = vec![123; 60]; - let mut tree = TreeNode::new(vec![0; 20], value, None, BasicMerkNode).unwrap(); + + let mut tree = TreeNode::new(start_key, value, None, BasicMerkNode).unwrap(); let batch_count = node_count / batch_size; for i in 0..batch_count { @@ -279,7 +289,6 @@ pub fn make_tree_seq(node_count: u64) -> TreeNode { tree } - /// Shortcut to open a Merk with a provided storage and batch pub fn empty_path_merk<'db, S>( storage: &'db S, diff --git a/merk/src/tree/link.rs b/merk/src/tree/link.rs index ab26159b..fa0d1563 100644 --- a/merk/src/tree/link.rs +++ b/merk/src/tree/link.rs @@ -46,7 +46,7 @@ use crate::HASH_LENGTH_U32; #[cfg(feature = "full")] /// Represents a reference to a child tree node. Links may or may not contain /// the child's `Tree` instance (storing its key if not). -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Link { /// Represents a child tree node which has been pruned from memory, only /// retaining a reference to it (its key). The child node can always be diff --git a/merk/src/tree/mod.rs b/merk/src/tree/mod.rs index cb732b56..401b8722 100644 --- a/merk/src/tree/mod.rs +++ b/merk/src/tree/mod.rs @@ -37,7 +37,7 @@ mod encoding; #[cfg(feature = "full")] mod fuzz_tests; #[cfg(any(feature = "full", feature = "verify"))] -mod hash; +pub mod hash; #[cfg(feature = "full")] mod iter; #[cfg(feature = "full")] @@ -102,7 +102,7 @@ use crate::{error::Error, Error::Overflow}; #[cfg(feature = "full")] /// The fields of the `Tree` type, stored on the heap. -#[derive(Clone, Encode, Decode, Debug)] +#[derive(Clone, Encode, Decode, Debug, PartialEq)] pub struct TreeNodeInner { pub(crate) left: Option, pub(crate) right: Option, @@ -141,7 +141,7 @@ impl Terminated for Box {} /// Trees' inner fields are stored on the heap so that nodes can recursively /// link to each other, and so we can detach nodes from their parents, then /// reattach without allocating or freeing heap memory. -#[derive(Clone)] +#[derive(Clone, PartialEq)] pub struct TreeNode { pub(crate) inner: Box, pub(crate) old_value: Option>, diff --git a/merk/src/visualize.rs b/merk/src/visualize.rs index 4b3b2fb7..0235f92d 100644 --- a/merk/src/visualize.rs +++ b/merk/src/visualize.rs @@ -87,9 +87,9 @@ impl<'a, 'db, S: StorageContext<'db>, T: Visualize, F: Fn(&[u8]) -> T + Copy> Vi impl<'a, T: Visualize, F: Fn(&[u8]) -> T + Copy> Visualize for VisualizableTree<'a, F> { fn visualize(&self, mut drawer: Drawer) -> Result> { drawer.write(b"[key: ")?; - drawer = self.tree.inner.key_as_slice().visualize(drawer)?; + drawer = self.tree.inner.kv.key_as_ref().visualize(drawer)?; drawer.write(b", value: ")?; - drawer = (self.deserialize_fn)(self.tree.inner.value_as_slice()).visualize(drawer)?; + drawer = (self.deserialize_fn)(self.tree.inner.kv.value_as_slice()).visualize(drawer)?; drawer.down(); drawer.write(b"\n")?; diff --git a/storage/src/rocksdb_storage.rs b/storage/src/rocksdb_storage.rs index 90d0cc21..14c4df5a 100644 --- a/storage/src/rocksdb_storage.rs +++ b/storage/src/rocksdb_storage.rs @@ -28,7 +28,7 @@ //! GroveDB storage layer implemented over RocksDB backend. mod storage; -mod storage_context; +pub mod storage_context; pub mod test_utils; #[cfg(test)] mod tests; diff --git a/storage/src/rocksdb_storage/storage_context.rs b/storage/src/rocksdb_storage/storage_context.rs index 7481fc13..0611d51c 100644 --- a/storage/src/rocksdb_storage/storage_context.rs +++ b/storage/src/rocksdb_storage/storage_context.rs @@ -29,7 +29,7 @@ //! Implementation of prefixed storage context. mod batch; -mod context_immediate; +pub mod context_immediate; mod context_no_tx; mod context_tx; mod raw_iterator; diff --git a/tutorials/Cargo.toml b/tutorials/Cargo.toml index ec220b44..409a1c64 100644 --- a/tutorials/Cargo.toml +++ b/tutorials/Cargo.toml @@ -7,9 +7,14 @@ default-run = "tutorials" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -grovedb = { git = "https://github.com/dashpay/grovedb.git" } -path = { path = "../path" } +#grovedb = { git = "https://github.com/dashpay/grovedb.git" } +grovedb = { path = "../grovedb" } +grovedb-merk = { path = "../merk" } +grovedb-storage = { path = "../storage" } +grovedb-visualize = { path = "../visualize" } +grovedb-path = { path = "../path" } rand = "0.8.5" +hex = "0.4" [workspace] diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs new file mode 100644 index 00000000..fc9c058c --- /dev/null +++ b/tutorials/src/bin/replication.rs @@ -0,0 +1,244 @@ +use std::collections::VecDeque; +use std::path::Path; +use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction, replication::StateSyncInfo}; +use grovedb::reference_path::ReferencePathType; +use rand::{distributions::Alphanumeric, Rng, }; +use grovedb::element::SumValue; +use grovedb_path::{SubtreePath}; + +const MAIN_ΚΕΥ: &[u8] = b"key_main"; +const MAIN_ΚΕΥ_EMPTY: &[u8] = b"key_main_empty"; + +const KEY_INT_0: &[u8] = b"key_int_0"; +const KEY_INT_REF_0: &[u8] = b"key_int_ref_0"; +const KEY_INT_A: &[u8] = b"key_sum_0"; +const ROOT_PATH: &[&[u8]] = &[]; + +// Allow insertions to overwrite trees +// This is necessary so the tutorial can be rerun easily +const INSERT_OPTIONS: Option = Some(InsertOptions { + validate_insertion_does_not_override: false, + validate_insertion_does_not_override_tree: false, + base_root_storage_is_free: true, +}); + +fn populate_db(grovedb_path: String) -> GroveDb { + let db = GroveDb::open(grovedb_path).unwrap(); + + insert_empty_tree_db(&db, ROOT_PATH, MAIN_ΚΕΥ); + insert_empty_tree_db(&db, ROOT_PATH, MAIN_ΚΕΥ_EMPTY); + insert_empty_tree_db(&db, &[MAIN_ΚΕΥ], KEY_INT_0); + + let tx = db.start_transaction(); + let batch_size = 100; + for i in 0..=10 { + insert_range_values_db(&db, &[MAIN_ΚΕΥ, KEY_INT_0], i * batch_size, i * batch_size + batch_size - 1, &tx); + } + let _ = db.commit_transaction(tx); + + insert_empty_tree_db(&db, &[MAIN_ΚΕΥ], KEY_INT_REF_0); + + let tx_2 = db.start_transaction(); + insert_range_ref_double_values_db(&db, &[MAIN_ΚΕΥ, KEY_INT_REF_0], KEY_INT_0, 1, 50, &tx_2); + let _ = db.commit_transaction(tx_2); + + insert_empty_sum_tree_db(&db, &[MAIN_ΚΕΥ], KEY_INT_A); + + let tx_3 = db.start_transaction(); + insert_range_values_db(&db, &[MAIN_ΚΕΥ, KEY_INT_A], 1, 100, &tx_3); + insert_sum_element_db(&db, &[MAIN_ΚΕΥ, KEY_INT_A], 101, 150, &tx_3); + let _ = db.commit_transaction(tx_3); + db +} + +fn create_empty_db(grovedb_path: String) -> GroveDb { + let db = GroveDb::open(grovedb_path).unwrap(); + db +} + +fn main() { + let path_source = generate_random_path("../tutorial-storage/", "/db_0", 24); + let db_source = populate_db(path_source.clone()); + + let checkpoint_dir = path_source + "/checkpoint"; + let path_checkpoint = Path::new(checkpoint_dir.as_str()); + + db_source.create_checkpoint(&path_checkpoint).expect("cannot create checkpoint"); + let db_checkpoint_0 = GroveDb::open(path_checkpoint).expect("cannot open groveDB from checkpoint"); + + let path_destination = generate_random_path("../tutorial-storage/", "/db_copy", 24); + let db_destination = create_empty_db(path_destination.clone()); + + println!("\n######### root_hashes:"); + let root_hash_source = db_source.root_hash(None).unwrap().unwrap(); + println!("root_hash_source: {:?}", hex::encode(root_hash_source)); + let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); + println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); + let root_hash_destination = db_destination.root_hash(None).unwrap().unwrap(); + println!("root_hash_destination: {:?}", hex::encode(root_hash_destination)); + + println!("\n######### source_subtree_metadata of db_source"); + let subtrees_metadata_source = db_source.get_subtrees_metadata(None).unwrap(); + println!("{:?}", subtrees_metadata_source); + + println!("\n######### db_checkpoint_0 -> db_destination state sync"); + let state_info = db_destination.create_state_sync_info(); + let tx = db_destination.start_transaction(); + sync_db_demo(&db_checkpoint_0, &db_destination, state_info, &tx).unwrap(); + db_destination.commit_transaction(tx).unwrap().expect("expected to commit transaction"); + + println!("\n######### verify db_destination"); + let incorrect_hashes = db_destination.verify_grovedb(None).unwrap(); + if incorrect_hashes.len() > 0 { + println!("DB verification failed!"); + } + else { + println!("DB verification success"); + } + + println!("\n######### root_hashes:"); + let root_hash_source = db_source.root_hash(None).unwrap().unwrap(); + println!("root_hash_source: {:?}", hex::encode(root_hash_source)); + let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); + println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); + let root_hash_destination = db_destination.root_hash(None).unwrap().unwrap(); + println!("root_hash_destination: {:?}", hex::encode(root_hash_destination)); + + let query_path = &[MAIN_ΚΕΥ, KEY_INT_0]; + let query_key = (20487u32).to_be_bytes().to_vec(); + println!("\n######## Query on db_checkpoint_0:"); + query_db(&db_checkpoint_0, query_path, query_key.clone()); + println!("\n######## Query on db_destination:"); + query_db(&db_destination, query_path, query_key.clone()); + + return; + +} + +fn insert_empty_tree_db(db: &GroveDb, path: &[&[u8]], key: &[u8]) +{ + db.insert(path, key, Element::empty_tree(), INSERT_OPTIONS, None) + .unwrap() + .expect("successfully inserted tree"); +} +fn insert_range_values_db(db: &GroveDb, path: &[&[u8]], min_i: u32, max_i: u32, transaction: &Transaction) +{ + for i in min_i..=max_i { + let i_vec = i.to_be_bytes().to_vec(); + db.insert( + path, + &i_vec, + Element::new_item(i_vec.to_vec()), + INSERT_OPTIONS, + Some(&transaction), + ) + .unwrap() + .expect("successfully inserted values"); + } +} + +fn insert_range_ref_double_values_db(db: &GroveDb, path: &[&[u8]], ref_key: &[u8], min_i: u32, max_i: u32, transaction: &Transaction) +{ + for i in min_i..=max_i { + let i_vec = i.to_be_bytes().to_vec(); + let value = i * 2; + let value_vec = value.to_be_bytes().to_vec(); + db.insert( + path, + &i_vec, + Element::new_reference(ReferencePathType::AbsolutePathReference(vec![ + MAIN_ΚΕΥ.to_vec(), + ref_key.to_vec(), + value_vec.to_vec() + ])), + INSERT_OPTIONS, + Some(&transaction), + ) + .unwrap() + .expect("successfully inserted values"); + } +} + +fn insert_empty_sum_tree_db(db: &GroveDb, path: &[&[u8]], key: &[u8]) +{ + db.insert(path, key, Element::empty_sum_tree(), INSERT_OPTIONS, None) + .unwrap() + .expect("successfully inserted tree"); +} +fn insert_sum_element_db(db: &GroveDb, path: &[&[u8]], min_i: u32, max_i: u32, transaction: &Transaction) +{ + for i in min_i..=max_i { + //let value : u32 = i; + let value = i as u64; + //let value: u64 = 1; + let i_vec = i.to_be_bytes().to_vec(); + db.insert( + path, + &i_vec, + Element::new_sum_item(value as SumValue), + INSERT_OPTIONS, + Some(&transaction), + ) + .unwrap() + .expect("successfully inserted values"); + } +} +fn generate_random_path(prefix: &str, suffix: &str, len: usize) -> String { + let random_string: String = rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect(); + format!("{}{}{}", prefix, random_string, suffix) +} + +fn query_db(db: &GroveDb, path: &[&[u8]], key: Vec) { + let path_vec: Vec> = path.iter() + .map(|&slice| slice.to_vec()) + .collect(); + + let mut query = Query::new(); + query.insert_key(key); + + let path_query = PathQuery::new_unsized(path_vec, query.clone()); + + let (elements, _) = db + .query_item_value(&path_query, true, None) + .unwrap() + .expect("expected successful get_path_query"); + for e in elements.into_iter() { + println!(">> {:?}", e); + } + + let proof = db.prove_query(&path_query).unwrap().unwrap(); + // Get hash from query proof and print to terminal along with GroveDB root hash. + let (verify_hash, _) = GroveDb::verify_query(&proof, &path_query).unwrap(); + println!("verify_hash: {:?}", hex::encode(verify_hash)); + if verify_hash == db.root_hash(None).unwrap().unwrap() { + println!("Query verified"); + } else { println!("Verification FAILED"); }; +} + +fn sync_db_demo( + source_db: &GroveDb, + target_db: &GroveDb, + state_sync_info: StateSyncInfo, + target_tx: &Transaction, +) -> Result<(), grovedb::Error> { + let app_hash = source_db.root_hash(None).value.unwrap(); + let (chunk_ids, mut state_sync_info) = target_db.start_snapshot_syncing(state_sync_info, app_hash, target_tx)?; + + let mut chunk_queue : VecDeque> = VecDeque::new(); + + chunk_queue.extend(chunk_ids); + + while let Some(chunk_id) = chunk_queue.pop_front() { + let ops = source_db.fetch_chunk(chunk_id.as_slice(), None)?; + let (more_chunks, new_state_sync_info) = target_db.apply_chunk(state_sync_info, (chunk_id.as_slice(), ops), target_tx)?; + state_sync_info = new_state_sync_info; + chunk_queue.extend(more_chunks); + } + + Ok(()) +} +