From 87c69b3d9c8916bf304d9c0ac8b63668ef7b8584 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Fri, 25 Aug 2023 08:37:19 +0100 Subject: [PATCH 01/30] wip finalize multi chunk with limit implement chunk op test chunk op encoding convert chunk op chunk id to string implement traversal instruction to string add chunking error + devoid multi subtree chunk from encoding work multi-subtree-chunk skeleton + return encoding length in multichunk make chunks fixed size height 2 test height proof implement height proof verifier update documentation verify height proof generation add documentation test no of chunk under chunk id fn implement number of chunks under chunk id function extract chunk layer function from chunk height seperate number_of_chunk into height and layer_height functions return multi chunk result enforce limit without storage overhead add test for encoding length check implement iterator for chunk producer remove cost from chunks fix the error type implement random chunk access fixes implement chunk height function add traverse then build chunk function to ref walker update comment implement chunk producer length init chunk producer struct implement merk tree height function update traversal generation instruction add instruction traversal test fix documentation implement binary range function clean up number of chunks function given a subtree of a given height return the exit node count documentation fixes implement chunk_height_per_layer verify that chunks produce expected root hash implement and test variable depth chunk creation restart chunking v2 --- merk/src/error.rs | 20 +- merk/src/merk/chunks.rs | 26 +- merk/src/merk/chunks2.rs | 943 ++++++++++++++++++++++++++ merk/src/merk/mod.rs | 54 ++ merk/src/merk/restore.rs | 6 +- merk/src/merk/restore2.rs | 195 ++++++ merk/src/proofs/chunk.rs | 26 +- merk/src/proofs/chunk/binary_range.rs | 211 ++++++ merk/src/proofs/chunk/chunk2.rs | 620 +++++++++++++++++ merk/src/proofs/chunk/chunk_op.rs | 141 ++++ merk/src/proofs/chunk/error.rs | 32 + merk/src/proofs/chunk/util.rs | 432 ++++++++++++ merk/src/test_utils/mod.rs | 12 +- 13 files changed, 2692 insertions(+), 26 deletions(-) create mode 100644 merk/src/merk/chunks2.rs create mode 100644 merk/src/merk/restore2.rs create mode 100644 merk/src/proofs/chunk/binary_range.rs create mode 100644 merk/src/proofs/chunk/chunk2.rs create mode 100644 merk/src/proofs/chunk/chunk_op.rs create mode 100644 merk/src/proofs/chunk/error.rs create mode 100644 merk/src/proofs/chunk/util.rs diff --git a/merk/src/error.rs b/merk/src/error.rs index 4455ef96..96717391 100644 --- a/merk/src/error.rs +++ b/merk/src/error.rs @@ -28,6 +28,8 @@ //! Errors +use crate::proofs::chunk::error::ChunkError; + #[cfg(any(feature = "full", feature = "verify"))] #[derive(Debug, thiserror::Error)] /// Errors @@ -59,11 +61,21 @@ pub enum Error { /// Chunking error #[error("chunking error {0}")] - ChunkingError(&'static str), + ChunkingError(ChunkError), + + // TODO: remove + /// Old chunking error + #[error("chunking error {0}")] + OldChunkingError(&'static str), /// Chunk restoring error #[error("chunk restoring error {0}")] - ChunkRestoringError(String), + ChunkRestoringError(ChunkError), + + // TODO: remove + /// Chunk restoring error + #[error("chunk restoring error {0}")] + OldChunkRestoringError(String), /// Key not found error #[error("key not found error {0}")] @@ -97,6 +109,10 @@ pub enum Error { #[error("invalid operation error {0}")] InvalidOperation(&'static str), + /// Internal error + #[error("internal error {0}")] + InternalError(&'static str), + /// Specialized costs error #[error("specialized costs error {0}")] SpecializedCostsError(&'static str), diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 7e8c588e..0df9655a 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -96,7 +96,7 @@ where /// calling `producer.len()`. pub fn chunk(&mut self, index: usize) -> Result, Error> { if index >= self.len() { - return Err(Error::ChunkingError("Chunk index out-of-bounds")); + return Err(Error::OldChunkingError("Chunk index out-of-bounds")); } self.index = index; @@ -129,7 +129,7 @@ where fn next_chunk(&mut self) -> Result, Error> { if self.index == 0 { if self.trunk.is_empty() { - return Err(Error::ChunkingError( + return Err(Error::OldChunkingError( "Attempted to fetch chunk on empty tree", )); } @@ -198,7 +198,7 @@ where { /// Creates a `ChunkProducer` which can return chunk proofs for replicating /// the entire Merk tree. - pub fn chunks(&self) -> Result, Error> { + pub fn chunks_old(&self) -> Result, Error> { ChunkProducer::new(self) } } @@ -223,7 +223,7 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); merk.commit(); - let chunks = merk.chunks().unwrap(); + let chunks = merk.chunks_old().unwrap(); assert_eq!(chunks.len(), 1); assert_eq!(chunks.into_iter().size_hint().0, 1); } @@ -235,7 +235,7 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); merk.commit(); - let chunks = merk.chunks().unwrap(); + let chunks = merk.chunks_old().unwrap(); assert_eq!(chunks.len(), 129); assert_eq!(chunks.into_iter().size_hint().0, 129); } @@ -247,7 +247,7 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); merk.commit(); - let mut chunks = merk.chunks().unwrap().into_iter().map(|x| x.unwrap()); + let mut chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); let chunk = chunks.next().unwrap(); let (trunk, height) = verify_trunk(chunk.into_iter().map(Ok)).unwrap().unwrap(); @@ -297,7 +297,7 @@ mod tests { .unwrap() .unwrap(); - merk.chunks() + merk.chunks_old() .unwrap() .into_iter() .map(|x| x.unwrap()) @@ -314,7 +314,7 @@ mod tests { ) .unwrap() .unwrap(); - let reopen_chunks = merk.chunks().unwrap().into_iter().map(|x| x.unwrap()); + let reopen_chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); for (original, checkpoint) in original_chunks.zip(reopen_chunks) { assert_eq!(original.len(), checkpoint.len()); @@ -352,13 +352,13 @@ mod tests { merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); let chunks = merk - .chunks() + .chunks_old() .unwrap() .into_iter() .map(|x| x.unwrap()) .collect::>(); - let mut producer = merk.chunks().unwrap(); + let mut producer = merk.chunks_old().unwrap(); for i in 0..chunks.len() * 2 { let index = i % chunks.len(); assert_eq!(producer.chunk(index).unwrap(), chunks[index]); @@ -371,7 +371,7 @@ mod tests { let merk = TempMerk::new(); let _chunks = merk - .chunks() + .chunks_old() .unwrap() .into_iter() .map(|x| x.unwrap()) @@ -385,7 +385,7 @@ mod tests { let batch = make_batch_seq(1..42); merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - let mut producer = merk.chunks().unwrap(); + let mut producer = merk.chunks_old().unwrap(); let _chunk = producer.chunk(50000).unwrap(); } @@ -493,7 +493,7 @@ mod tests { let batch = make_batch_seq(1..42); merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - let mut producer = merk.chunks().unwrap(); + let mut producer = merk.chunks_old().unwrap(); let _chunk1 = producer.next_chunk(); let _chunk2 = producer.next_chunk(); } diff --git a/merk/src/merk/chunks2.rs b/merk/src/merk/chunks2.rs new file mode 100644 index 00000000..d455dbde --- /dev/null +++ b/merk/src/merk/chunks2.rs @@ -0,0 +1,943 @@ +// TODO: add MIT License +// TODO: add module description +// TODO: figure out verification features + +use std::{ + cmp::max, + collections::{LinkedList, VecDeque}, + path::Iter, +}; + +use ed::Encode; +use grovedb_costs::{CostResult, CostsExt, OperationCost}; +use grovedb_storage::StorageContext; +use integer_encoding::VarInt; + +use crate::{ + error::Error, + proofs::{ + chunk::{ + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + chunk_height, generate_traversal_instruction, number_of_chunks, + traversal_instruction_as_string, write_to_vec, + }, + }, + Node, Op, + }, + tree::RefWalker, + Error::ChunkingError, + Merk, PanicSource, +}; + +// TODO: move types to some other file +// TODO: add documentation +#[derive(Debug)] +pub struct SubtreeChunk { + chunk: Vec, + next_index: Option, + remaining_limit: Option, +} + +impl SubtreeChunk { + pub fn new(chunk: Vec, next_index: Option, remaining_limit: Option) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + +#[derive(Debug)] +pub struct MultiChunk { + pub chunk: Vec, + pub next_index: Option, + pub remaining_limit: Option, +} + +impl MultiChunk { + pub fn new( + chunk: Vec, + next_index: Option, + remaining_limit: Option, + ) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + +/// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly +/// replicating entire Merk trees. Chunks can be generated on the fly in a +/// random order, or iterated in order for slightly better performance. +pub struct ChunkProducer<'db, S> { + /// Represents the max height of the Merk tree + height: usize, + /// Represents the index of the next chunk + index: usize, + merk: &'db Merk, +} + +impl<'db, S> ChunkProducer<'db, S> +where + S: StorageContext<'db>, +{ + /// Creates a new `ChunkProducer` for the given `Merk` instance + pub(crate) fn new(merk: &'db Merk) -> Result { + let tree_height = merk + .height() + .ok_or(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + )))?; + Ok(Self { + height: tree_height as usize, + index: 1, + merk, + }) + } + + /// Gets the chunk with the given index. Errors if the index is out of + /// bounds or the tree is empty - the number of chunks can be checked by + /// calling `producer.len()`. + pub fn chunk(&mut self, index: usize) -> Result, Error> { + // ensure that the chunk index is within bounds + let max_chunk_index = self.len(); + if index < 1 || index > max_chunk_index { + return Err(ChunkingError(ChunkError::OutOfBounds( + "chunk index out of bounds", + ))); + } + + self.index = index + 1; + + let traversal_instructions = generate_traversal_instruction(self.height, index)?; + + let chunk_height = chunk_height(self.height, index).unwrap(); + + self.merk.walk(|maybe_walker| match maybe_walker { + Some(mut walker) => { + walker.traverse_and_build_chunk(&traversal_instructions, chunk_height) + } + None => Err(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + ))), + }) + } + + // TODO: add documentation + pub fn multi_chunk_with_limit( + &mut self, + index: usize, + limit: Option, + ) -> Result { + // TODO: what happens if the vec is filled? + // we need to have some kind of hardhoc limit value if none is supplied. + // maybe we can just do something with the length to fix this? + let mut chunk = vec![]; + + let mut current_index = Some(index); + let mut current_limit = limit; + + // generate as many subtree chunks as we can + // until we have exhausted all or hit a limit restriction + while current_index != None { + let current_index_traversal_instruction = generate_traversal_instruction( + self.height, + current_index.expect("confirmed is Some"), + )?; + let chunk_id_op = ChunkOp::ChunkId(current_index_traversal_instruction); + + // factor in the ChunkId encoding length in limit calculations + let temp_limit = if let Some(limit) = current_limit { + let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) + })?; + if limit >= chunk_id_op_encoding_len { + Some(limit - chunk_id_op_encoding_len) + } else { + Some(0) + } + } else { + None + }; + + let subtree_multi_chunk_result = self.subtree_multi_chunk_with_limit( + current_index.expect("confirmed is not None"), + temp_limit, + ); + + let limit_too_small_error = matches!( + subtree_multi_chunk_result, + Err(ChunkingError(ChunkError::LimitTooSmall(..))) + ); + + if limit_too_small_error { + if chunk.is_empty() { + // no progress, return limit too small error + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); + } else { + // made progress, send accumulated chunk + break; + } + } + + let subtree_multi_chunk = subtree_multi_chunk_result?; + + chunk.push(chunk_id_op); + chunk.push(ChunkOp::Chunk(subtree_multi_chunk.chunk)); + + // update loop parameters + current_index = subtree_multi_chunk.next_index; + current_limit = subtree_multi_chunk.remaining_limit; + } + + Ok(MultiChunk::new(chunk, current_index, current_limit)) + } + + /// Packs as many chunks as it can from a starting chunk index, into a + /// vector. Stops when we have exhausted all chunks or we have reached + /// some limit. + pub fn subtree_multi_chunk_with_limit( + &mut self, + index: usize, + limit: Option, + ) -> Result { + let mut chunk_byte_length = 0; + + let max_chunk_index = number_of_chunks(self.height); + let mut chunk_index = index; + + // we first get the chunk at the given index + let chunk_ops = self.chunk(chunk_index)?; + chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + chunk_index += 1; + + let mut chunk = VecDeque::from(chunk_ops); + + // ensure the limit is not less than first chunk byte length + // if it is we can't proceed and didn't make progress so we return an error + if let Some(limit) = limit { + if chunk_byte_length > limit { + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); + } + } + + let mut iteration_index = 0; + while iteration_index < chunk.len() { + // we only perform replacements on Hash nodes + if matches!(chunk[iteration_index], Op::Push(Node::Hash(..))) { + let replacement_chunk = self.chunk(chunk_index)?; + + // calculate the new total + let new_total = replacement_chunk.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })? + chunk_byte_length + - chunk[iteration_index].encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + + // verify that this chunk doesn't make use exceed the limit + if let Some(limit) = limit { + if new_total > limit { + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; + + return Ok(SubtreeChunk::new( + chunk.into(), + next_index, + Some(limit - chunk_byte_length), + )); + } + } + + chunk_byte_length = new_total; + chunk_index += 1; + + chunk.remove(iteration_index); + for op in replacement_chunk.into_iter().rev() { + chunk.insert(iteration_index, op); + } + } else { + iteration_index += 1; + } + } + + let remaining_limit = limit.map(|l| l - chunk_byte_length); + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; + + Ok(SubtreeChunk::new(chunk.into(), next_index, remaining_limit)) + } + + /// Returns the total number of chunks for the underlying Merk tree. + pub fn len(&self) -> usize { + number_of_chunks(self.height as usize) + } + + /// Gets the next chunk based on the `ChunkProducer`'s internal index state. + /// This is mostly useful for letting `ChunkIter` yield the chunks in order, + /// optimizing throughput compared to random access. + // TODO: does this really optimize throughput, how can you make the statement + // true? + fn next_chunk(&mut self) -> Option, Error>> { + // for now not better than random access + // TODO: fix + let max_index = number_of_chunks(self.height); + if self.index > max_index { + return None; + } + + let chunk = self.chunk(self.index); + + return Some(chunk); + } + + // TODO: test this logic out + fn get_chunk_encoding_length(chunk: &[Op]) -> usize { + // TODO: deal with error + chunk + .iter() + .fold(0, |sum, op| sum + op.encoding_length().unwrap()) + } +} + +/// Iterate over each chunk, returning `None` after last chunk +impl<'db, S> Iterator for ChunkProducer<'db, S> +where + S: StorageContext<'db>, +{ + type Item = Result, Error>; + + fn next(&mut self) -> Option { + self.next_chunk() + } +} + +impl<'db, S> Merk +where + S: StorageContext<'db>, +{ + /// Creates a `ChunkProducer` which can return chunk proofs for replicating + /// the entire Merk tree. + pub fn chunks(&'db self) -> Result, Error> { + ChunkProducer::new(self) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + proofs::{ + chunk::chunk2::{ + tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, + LEFT, RIGHT, + }, + tree::execute, + Tree, + }, + test_utils::{make_batch_seq, TempMerk}, + }; + + #[derive(Default)] + struct NodeCounts { + hash: usize, + kv_hash: usize, + kv: usize, + kv_value_hash: usize, + kv_digest: usize, + kv_ref_value_hash: usize, + kv_value_hash_feature_type: usize, + } + + impl NodeCounts { + fn sum(&self) -> usize { + return self.hash + + self.kv_hash + + self.kv + + self.kv_value_hash + + self.kv_digest + + self.kv_ref_value_hash + + self.kv_value_hash_feature_type; + } + } + + fn count_node_types(tree: Tree) -> NodeCounts { + let mut counts = NodeCounts::default(); + + tree.visit_nodes(&mut |node| { + match node { + Node::Hash(_) => counts.hash += 1, + Node::KVHash(_) => counts.kv_hash += 1, + Node::KV(..) => counts.kv += 1, + Node::KVValueHash(..) => counts.kv_value_hash += 1, + Node::KVDigest(..) => counts.kv_digest += 1, + Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, + Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, + }; + }); + + counts + } + + #[test] + fn test_merk_chunk_len() { + // Tree of height 5 - max of 31 elements, min of 16 elements + // 5 will be broken into 3 layers = [2, 2, 2] + // exit nodes from first layer = 2^2 = 4 + // exit nodes from the second layer = 4 ^ 2^2 = 16 + // total_chunk = 1 + 4 + 16 = 21 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..20); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 21); + + // Tree of height 10 - max of 1023 elements, min of 512 elements + // 4 layers -> [2,2,2,2,2] + // chunk_count_per_layer -> [1, 4, 16, 64, 256] + // total = 341 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1000); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(10)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 341); + } + + #[test] + fn test_chunk_producer_iter() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks + + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // build iterator from first chunk producer + let mut chunks = merk.chunks().expect("should return producer"); + + // ensure that the chunks gotten from the iterator is the same + // as that from the chunk producer + for i in 1..=5 { + assert_eq!( + chunks.next().unwrap().unwrap(), + chunk_producer.chunk(i).unwrap() + ); + } + + // returns None after max + assert_eq!(chunks.next().is_none(), true); + } + + #[test] + fn test_random_chunk_access() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks + + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut inner_tree = merk.tree.take().expect("has inner tree"); + merk.tree.set(Some(inner_tree.clone())); + + // TODO: should I be using panic source? + let mut tree_walker = RefWalker::new(&mut inner_tree, PanicSource {}); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + assert_eq!(chunk_producer.len(), 5); + + // assert bounds + assert_eq!(chunk_producer.chunk(0).is_err(), true); + assert_eq!(chunk_producer.chunk(6).is_err(), true); + + // first chunk + // expected: + // 7 + // / \ + // 3 11 + // / \ / \ + // H(1) H(5) H(9) H(13) + let chunk = chunk_producer.chunk(1).expect("should generate chunk"); + assert_eq!(chunk.len(), 13); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])), + Op::Child, + Op::Child + ] + ); + + // second chunk + // expected: + // 1 + // / \ + // 0 2 + let chunk = chunk_producer.chunk(2).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 5 + // / \ + // 4 6 + let chunk = chunk_producer.chunk(3).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 9 + // / \ + // 8 10 + let chunk = chunk_producer.chunk(4).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child + ] + ); + + // third chunk + // expected: + // 13 + // / \ + // 12 14 + let chunk = chunk_producer.chunk(5).expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + } + + #[test] + fn test_subtree_chunk_no_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + // generate multi chunk with no limit + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + let tree = execute(chunk_result.chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + // assert that all nodes are of type kv_value_hash_feature_type + let node_counts = count_node_types(tree); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.kv_hash, 0); + assert_eq!(node_counts.kv, 0); + assert_eq!(node_counts.kv_value_hash, 0); + assert_eq!(node_counts.kv_digest, 0); + assert_eq!(node_counts.kv_ref_value_hash, 0); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + } + + #[test] + fn test_subtree_chunk_with_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // initial chunk is of size 453, so limit of 10 is too small + // should return an error + let chunk = chunk_producer.subtree_multi_chunk_with_limit(1, Some(10)); + assert!(chunk.is_err()); + + // get just the fist chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(453)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(2)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 453); + assert_eq!(chunk.len(), 13); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 3); + assert_eq!(node_counts.hash, 4); + assert_eq!(node_counts.sum(), 4 + 3); + + // get up to second chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(737)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(3)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 737); + assert_eq!(chunk.len(), 17); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 6); + assert_eq!(node_counts.hash, 3); + assert_eq!(node_counts.sum(), 6 + 3); + + // get up to third chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1021)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(4)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1021); + assert_eq!(chunk.len(), 21); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 9); + assert_eq!(node_counts.hash, 2); + assert_eq!(node_counts.sum(), 9 + 2); + + // get up to fourth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1305)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(5)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1305); + assert_eq!(chunk.len(), 25); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 12); + assert_eq!(node_counts.hash, 1); + assert_eq!(node_counts.sum(), 12 + 1); + + // get up to fifth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1589)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); + + // limit larger than total chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(usize::MAX)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); + } + + #[test] + fn test_multi_chunk_with_no_limit_trunk() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 2, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit(1, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // should only contain 2 items, the starting chunk id and the entire tree + assert_eq!(chunk_result.chunk.len(), 2); + + // assert items + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![])); + if let ChunkOp::Chunk(chunk) = &chunk_result.chunk[1] { + let tree = execute(chunk.clone().into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + } else { + panic!("expected ChunkOp::Chunk"); + } + } + + #[test] + fn test_multi_chunk_with_no_limit_not_trunk() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 2, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit(2, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // chunk 2 - 5 will be considered separate subtrees + // each will have an accompanying chunk id, so 8 elements total + assert_eq!(chunk_result.chunk.len(), 8); + + // assert the chunk id's + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); + assert_eq!(chunk_result.chunk[4], ChunkOp::ChunkId(vec![RIGHT, LEFT])); + assert_eq!(chunk_result.chunk[6], ChunkOp::ChunkId(vec![RIGHT, RIGHT])); + + // assert the chunks + assert_eq!( + chunk_result.chunk[1], + ChunkOp::Chunk(chunk_producer.chunk(2).expect("should generate chunk")) + ); + assert_eq!( + chunk_result.chunk[3], + ChunkOp::Chunk(chunk_producer.chunk(3).expect("should generate chunk")) + ); + assert_eq!( + chunk_result.chunk[5], + ChunkOp::Chunk(chunk_producer.chunk(4).expect("should generate chunk")) + ); + assert_eq!( + chunk_result.chunk[7], + ChunkOp::Chunk(chunk_producer.chunk(5).expect("should generate chunk")) + ); + } + + #[test] + fn test_multi_chunk_with_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // ensure that the remaining limit, next index and values given are correct + // if limit is smaller than first chunk, we should get an error + let chunk_result = chunk_producer.multi_chunk_with_limit(1, Some(5)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); + + // get chunk 2 + // data size of chunk 2 is exactly 317 + // chunk op encoding for chunk 2 = 321 + // hence limit of 317 will be insufficient + let chunk_result = chunk_producer.multi_chunk_with_limit(2, Some(317)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); + + // get chunk 2 and 3 + // chunk 2 chunk op = 331 + // chunk 3 chunk op = 321 + let chunk_result = chunk_producer + .multi_chunk_with_limit(2, Some(321 + 321 + 5)) + .expect("should generate chunk"); + assert_eq!(chunk_result.next_index, Some(4)); + assert_eq!(chunk_result.remaining_limit, Some(5)); + assert_eq!(chunk_result.chunk.len(), 4); + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); + } +} diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 37276b65..0eb3f8cd 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -34,7 +34,9 @@ pub(crate) mod defaults; pub mod options; +mod chunks2; pub mod restore; +mod restore2; use std::{ cell::Cell, @@ -607,6 +609,23 @@ where }) } + /// Returns the height of the Merk tree + pub fn height(&self) -> Option { + self.use_tree(|tree| match tree { + None => None, + Some(tree) => Some(tree.height()), + }) + } + + // TODO: remove this + // /// Returns a clone of the Tree instance in Merk + // pub fn get_root_tree(&self) -> Option { + // self.use_tree(|tree| match tree { + // None => None, + // Some(tree) => Some(tree.clone()), + // }) + // } + /// Returns the root non-prefixed key of the tree. If the tree is empty, /// None. pub fn root_key(&self) -> Option> { @@ -1563,6 +1582,41 @@ mod test { ); } + #[test] + fn tree_height() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(1)); + + // height 2 + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..2); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(2)); + + // height 5 + // 2^5 - 1 = 31 (max number of elements in tree of height 5) + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..31); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + + // should still be height 5 for 29 elements + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..29); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + } + #[test] fn insert_uncached() { let batch_size = 20; diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 23cef703..e1a1afd4 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -106,7 +106,7 @@ impl<'db, S: StorageContext<'db>> Restorer { /// to 0). pub fn finalize(mut self) -> Result, Error> { if self.remaining_chunks().unwrap_or(0) != 0 { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Called finalize before all chunks were processed".to_string(), )); } @@ -188,7 +188,7 @@ impl<'db, S: StorageContext<'db>> Restorer { }; if root_hash != self.expected_root_hash { - return Err(Error::ChunkRestoringError(format!( + return Err(Error::OldChunkRestoringError(format!( "Proof did not match expected hash\n\tExpected: {:?}\n\tActual: {:?}", self.expected_root_hash, trunk.hash() @@ -419,7 +419,7 @@ mod tests { .unwrap(); } - let chunks = original.chunks().unwrap(); + let chunks = original.chunks_old().unwrap(); let storage = TempStorage::default(); let _tx2 = storage.start_transaction(); diff --git a/merk/src/merk/restore2.rs b/merk/src/merk/restore2.rs new file mode 100644 index 00000000..084f3759 --- /dev/null +++ b/merk/src/merk/restore2.rs @@ -0,0 +1,195 @@ +// TODO: add license + +//! Provides `Restorer`, which can create a replica of a Merk instance by +//! receiving chunk proofs. + +use std::collections::BTreeMap; + +use grovedb_storage::{Batch, StorageContext}; + +use crate::{ + merk::MerkSource, + proofs::{ + chunk::{ + chunk_op::ChunkOp, + error::ChunkError, + util::{traversal_instruction_as_string, write_to_vec}, + }, + tree::{execute, Child, Tree as ProofTree}, + Node, + }, + tree::{RefWalker, Tree}, + CryptoHash, Error, + Error::{CostsError, EdError, StorageError}, + Link, Merk, + TreeFeatureType::BasicMerk, +}; + +// TODO: add documentation +pub struct Restorer { + merk: Merk, + chunk_id_to_root_hash: BTreeMap, +} + +impl<'db, S: StorageContext<'db>> Restorer { + // TODO: add documenation + pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { + let mut chunk_id_to_root_hash = BTreeMap::new(); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(vec![]), expected_root_hash); + + Self { + merk, + chunk_id_to_root_hash, + } + } + + // TODO: add documentation + // what does the restorer process? + // it should be able to process single chunks, subtree chunks and multi chunks + // right? or just one of them? + // I think it should process just multi chunk at least for now + pub fn process_multi_chunk( + &mut self, + chunk: impl IntoIterator, + ) -> Result<(), Error> { + // chunk id, chunk + // we use the chunk id to know what to verify against + let mut chunks = chunk.into_iter(); + + // TODO: clean this up, make external function that peeks and asserts + let chunk_id_string = if let Some(ChunkOp::ChunkId(chunk_id)) = chunks.next() { + traversal_instruction_as_string(chunk_id) + } else { + return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunkId)); + }; + + // TODO: deal with unwrap + let expected_root_hash = self.chunk_id_to_root_hash.get(&chunk_id_string).unwrap(); + dbg!(expected_root_hash); + + if let Some(ChunkOp::Chunk(chunk)) = chunks.next() { + // todo: deal with error + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + debug_assert!(tree.hash().unwrap() == *expected_root_hash); + dbg!("yayy"); + self.write_chunk(tree); + } else { + return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunk)); + } + + Ok(()) + } + + /// Writes the data contained in `tree` (extracted from a verified chunk + /// proof) to the RocksDB. + fn write_chunk(&mut self, tree: ProofTree) -> Result<(), Error> { + let mut batch = self.merk.storage.new_batch(); + + tree.visit_refs(&mut |proof_node| { + if let Some((mut node, key)) = match &proof_node.node { + Node::KV(key, value) => Some(( + Tree::new(key.clone(), value.clone(), None, BasicMerk).unwrap(), + key, + )), + Node::KVValueHash(key, value, value_hash) => Some(( + Tree::new_with_value_hash(key.clone(), value.clone(), *value_hash, BasicMerk) + .unwrap(), + key, + )), + Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => Some(( + Tree::new_with_value_hash( + key.clone(), + value.clone(), + *value_hash, + *feature_type, + ) + .unwrap(), + key, + )), + _ => None, + } { + // TODO: encode tree node without cloning key/value + // *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); + // *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); + + let bytes = node.encode(); + batch.put(key, &bytes, None, None).map_err(CostsError) + } else { + Ok(()) + } + })?; + + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError) + } +} + +#[cfg(test)] +mod tests { + use grovedb_path::SubtreePath; + use grovedb_storage::{rocksdb_storage::test_utils::TempStorage, Storage}; + + use super::*; + use crate::{merk::chunks2::ChunkProducer, test_utils::make_batch_seq, Merk}; + + #[test] + fn restoration_test() { + // Create source merk and populate + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut original = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + let batch = make_batch_seq(0..15); + original + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(original.height(), Some(4)); + + // Create to be restored merk + let storage = TempStorage::new(); + let tx2 = storage.start_transaction(); + let restored_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx2) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + assert_eq!(restored_merk.height(), None); + + // assert initial conditions + assert_ne!( + original.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + + // Perform Restoration + let mut chunk_producer = + ChunkProducer::new(&original).expect("should create chunk producer"); + + let mut restorer = Restorer::new(restored_merk, original.root_hash().unwrap()); + + let chunk = chunk_producer + .multi_chunk_with_limit(1, None) + .expect("should generate chunk"); + + assert_eq!(chunk.next_index, None); + assert_eq!(chunk.remaining_limit, None); + assert_eq!(chunk.chunk.len(), 2); + + restorer.process_multi_chunk(chunk.chunk).unwrap(); + } +} diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 48afe8f3..d5ef376c 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -51,6 +51,18 @@ use crate::{ TreeFeatureType::BasicMerk, }; +mod binary_range; +#[cfg(feature = "full")] +// TODO: remove from here +pub mod chunk2; +#[cfg(feature = "full")] +// TODO: remove from here +pub mod util; +// TODO: remove from here +pub mod error; +// TODO: remove from here +pub mod chunk_op; + /// The minimum number of layers the trunk will be guaranteed to have before /// splitting into multiple chunks. If the tree's height is less than double /// this value, the trunk should be verified as a leaf chunk. @@ -264,14 +276,14 @@ pub(crate) fn verify_leaf>>( ) -> CostResult { execute(ops, false, |node| match node { Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => Ok(()), - _ => Err(Error::ChunkRestoringError( + _ => Err(Error::OldChunkRestoringError( "Leaf chunks must contain full subtree".to_string(), )), }) .flat_map_ok(|tree| { tree.hash().map(|hash| { if hash != expected_hash { - Error::ChunkRestoringError(format!( + Error::OldChunkRestoringError(format!( "Leaf chunk proof did not match expected hash\n\tExpected: {:?}\n\tActual: \ {:?}", expected_hash, @@ -297,7 +309,7 @@ pub(crate) fn verify_trunk>>( Ok(match tree.child(true) { Some(child) => { if let Node::Hash(_) = child.tree.node { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Expected height proof to only contain KV and KVHash nodes".to_string(), )); } @@ -323,7 +335,7 @@ pub(crate) fn verify_trunk>>( match tree.node { Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => {} _ => { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Expected trunk inner nodes to contain keys and values".to_string(), )) } @@ -333,14 +345,14 @@ pub(crate) fn verify_trunk>>( } else if !leftmost { match tree.node { Node::Hash(_) => Ok(()), - _ => Err(Error::ChunkRestoringError( + _ => Err(Error::OldChunkRestoringError( "Expected trunk leaves to contain Hash nodes".to_string(), )), } } else { match &tree.node { Node::KVHash(_) => Ok(()), - _ => Err(Error::ChunkRestoringError( + _ => Err(Error::OldChunkRestoringError( "Expected leftmost trunk leaf to contain KVHash node".to_string(), )), } @@ -363,7 +375,7 @@ pub(crate) fn verify_trunk>>( if trunk_height < MIN_TRUNK_HEIGHT { if !kv_only { - return Err(Error::ChunkRestoringError( + return Err(Error::OldChunkRestoringError( "Leaf chunks must contain full subtree".to_string(), )) .wrap_with_cost(cost); diff --git a/merk/src/proofs/chunk/binary_range.rs b/merk/src/proofs/chunk/binary_range.rs new file mode 100644 index 00000000..350c9718 --- /dev/null +++ b/merk/src/proofs/chunk/binary_range.rs @@ -0,0 +1,211 @@ +const LEFT: bool = true; +const RIGHT: bool = false; + +/// Utility type for range bisection and advancement +#[derive(Debug)] +pub(crate) struct BinaryRange { + start: usize, + end: usize, +} + +impl BinaryRange { + /// Returns a new BinaryRange and ensures that start < end + /// and min start value is 1 + pub fn new(start: usize, end: usize) -> Result { + // start should be less than or equal to end + if start > end { + return Err(String::from("start value cannot be greater than end value")); + } + + // the minimum value for start should be 1 + // that way the length of the maximum length + // of the range is usize::MAX and not + // usize::MAX + 1 + if start < 1 { + return Err(String::from( + "minimum start value should be 1 to avoid len overflow", + )); + } + + return Ok(Self { start, end }); + } + + /// Returns the len of the current range + pub fn len(&self) -> usize { + self.end - self.start + 1 + } + + /// Returns true when the len of the range is odd + pub fn odd(&self) -> bool { + (self.len() % 2) != 0 + } + + /// Determines if a value belongs to the left half or right half of a range + /// returns true for left and false for right + /// returns None if value is outside the range or range len is odd + pub fn which_half(&self, value: usize) -> Option { + // return None if value is not in the range + if value < self.start || value > self.end { + return None; + } + + // can't divide the range into equal halves + // when odd, so return None + if self.odd() { + return None; + } + + let half_size = self.len() / 2; + let second_half_start = self.start + half_size; + + if value >= second_half_start { + return Some(RIGHT); + } + + Some(LEFT) + } + + /// Returns a new range that only contains elements on the specified half + /// returns an error if range is not odd + pub fn get_half(&self, left: bool) -> Result { + if self.odd() { + return Err(String::from("cannot break odd range in half")); + } + + let half_size = self.len() / 2; + let second_half_start = self.start + half_size; + + return Ok(if left { + Self { + start: self.start, + end: second_half_start - 1, + } + } else { + Self { + start: second_half_start, + end: self.end, + } + }); + } + + /// Returns a new range that increments the start value + /// also return the previous start value + /// returns an error if the operation will cause start to be larger than end + pub fn advance_range_start(&self) -> Result<(Self, usize), String> { + // check if operation will cause start > end + if self.start == self.end { + return Err(String::from( + "can't advance start when start is equal to end", + )); + } + + Ok(( + Self { + start: self.start + 1, + end: self.end, + }, + self.start, + )) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn cannot_create_invalid_range() { + let invalid_range = BinaryRange::new(5, 3); + assert_eq!(invalid_range.is_err(), true); + } + + #[test] + fn can_get_range_len() { + let range = BinaryRange::new(2, 5).expect("should create range"); + assert_eq!(range.len(), 4); + assert_eq!(range.odd(), false); + + let range = BinaryRange::new(2, 2).expect("should create range"); + assert_eq!(range.len(), 1); + assert_eq!(range.odd(), true); + } + + #[test] + fn can_determine_correct_half() { + let range = BinaryRange::new(3, 7).expect("should create range"); + assert_eq!(range.len(), 5); + assert_eq!(range.odd(), true); + + // cannot determine half for value outside a range + assert_eq!(range.which_half(1).is_none(), true); + assert_eq!(range.which_half(7).is_none(), true); + + // cannot determine half when range is odd + assert_eq!(range.which_half(3).is_none(), true); + + let range = BinaryRange::new(3, 6).expect("should create range"); + assert_eq!(range.len(), 4); + assert_eq!(range.odd(), false); + + assert_eq!(range.which_half(3), Some(LEFT)); + assert_eq!(range.which_half(4), Some(LEFT)); + assert_eq!(range.which_half(5), Some(RIGHT)); + assert_eq!(range.which_half(6), Some(RIGHT)); + } + + #[test] + fn can_advance_start_range() { + let range = BinaryRange::new(2, 5).expect("should create range"); + assert_eq!(range.len(), 4); + assert_eq!(range.start, 2); + + // advance the range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 2); + assert_eq!(range.len(), 3); + assert_eq!(range.start, 3); + + // advance range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 3); + assert_eq!(range.len(), 2); + assert_eq!(range.start, 4); + + // advance range + let (range, prev_start) = range.advance_range_start().expect("should advance range"); + assert_eq!(prev_start, 4); + assert_eq!(range.len(), 1); + assert_eq!(range.start, 5); + + // should not be allowed to advance the range anymore + let advance_result = range.advance_range_start(); + assert_eq!(advance_result.is_err(), true); + } + + #[test] + fn can_break_range_into_halves() { + let range = BinaryRange::new(2, 10).expect("should create range"); + assert_eq!(range.len(), 9); + assert_eq!(range.odd(), true); + assert_eq!(range.get_half(LEFT).is_err(), true); + + let range = BinaryRange::new(2, 11).expect("should create range"); + assert_eq!(range.len(), 10); + assert_eq!(range.odd(), false); + + let left_range = range.get_half(LEFT).expect("should get sub range"); + assert_eq!(left_range.start, 2); + assert_eq!(left_range.end, 6); + + let right_range = range.get_half(RIGHT).expect("should get sub range"); + assert_eq!(right_range.start, 7); + assert_eq!(right_range.end, 11); + + // right_range is false, advance to make even + let (right_range, prev) = right_range.advance_range_start().expect("should advance"); + let right_left_range = right_range.get_half(LEFT).expect("should get sub range"); + assert_eq!(right_left_range.len(), 2); + assert_eq!(right_left_range.start, 8); + assert_eq!(right_left_range.end, 9); + } +} diff --git a/merk/src/proofs/chunk/chunk2.rs b/merk/src/proofs/chunk/chunk2.rs new file mode 100644 index 00000000..a6072871 --- /dev/null +++ b/merk/src/proofs/chunk/chunk2.rs @@ -0,0 +1,620 @@ +use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost}; + +// TODO: add copyright comment +use crate::proofs::{Node, Op, Tree}; +use crate::{ + proofs::{chunk::error::ChunkError, tree::execute}, + tree::{Fetch, RefWalker}, + CryptoHash, Error, +}; + +pub const LEFT: bool = true; +pub const RIGHT: bool = false; + +impl<'a, S> RefWalker<'a, S> +where + S: Fetch + Sized + Clone, +{ + /// Returns a chunk of a given depth from a RefWalker + pub fn create_chunk(&mut self, depth: usize) -> Result, Error> { + // build the proof vector + let mut proof = vec![]; + + self.create_chunk_internal(&mut proof, depth)?; + + Ok(proof) + } + + fn create_chunk_internal( + &mut self, + proof: &mut Vec, + remaining_depth: usize, + ) -> Result<(), Error> { + // at some point we will reach the depth + // here we need to put the node hash + if remaining_depth == 0 { + proof.push(Op::Push(self.to_hash_node().unwrap())); + return Ok(()); + } + + // traverse left + let has_left_child = self.tree().link(true).is_some(); + if has_left_child { + let mut left = self.walk(true).unwrap()?.expect("confirmed is some"); + left.create_chunk_internal(proof, remaining_depth - 1)?; + } + + // add current node's data + proof.push(Op::Push(self.to_kv_value_hash_feature_type_node())); + + if has_left_child { + proof.push(Op::Parent); + } + + // traverse right + if let Some(mut right) = self.walk(false).unwrap()? { + right.create_chunk_internal(proof, remaining_depth - 1)?; + + proof.push(Op::Child); + } + + Ok(()) + } + + /// Returns a chunk of a given depth after applying some traversal + /// instruction to the RefWalker + pub fn traverse_and_build_chunk( + &mut self, + instructions: &[bool], + depth: usize, + ) -> Result, Error> { + // base case + if instructions.len() == 0 { + // we are at the desired node + return self.create_chunk(depth); + } + + // link must exist + let has_link = self.tree().link(instructions[0]).is_some(); + if !has_link { + return Err(Error::ChunkingError(ChunkError::BadTraversalInstruction( + "no node found at given traversal instruction", + ))); + } + + // grab child + let mut child = self + .walk(instructions[0]) + .unwrap()? + .expect("confirmed link exists so cannot be none"); + + // recurse on child + child.traverse_and_build_chunk(&instructions[1..], depth) + } + + /// Returns the smallest amount of tree ops, that can convince + /// a verifier of the tree height + /// the generated subtree is of this form + /// kv_hash + /// / \ + /// kv_hash node_hash + /// / \ + /// kv_hash node_hash + /// . + /// . + /// . + pub fn generate_height_proof(&mut self, proof: &mut Vec) -> CostResult<(), Error> { + // TODO: look into making height proofs more efficient + // they will always be used in the context of some + // existing chunk, we don't want to repeat nodes unnecessarily + let mut cost = OperationCost::default(); + + let maybe_left = cost_return_on_error!(&mut cost, self.walk(LEFT)); + let has_left_child = maybe_left.is_some(); + + // recurse to leftmost element + if let Some(mut left) = maybe_left { + cost_return_on_error!(&mut cost, left.generate_height_proof(proof)) + } + + proof.push(Op::Push(self.to_kvhash_node())); + + if has_left_child { + proof.push(Op::Parent); + } + + if let Some(right) = self.tree().link(RIGHT) { + proof.push(Op::Push(Node::Hash(*right.hash()))); + proof.push(Op::Child); + } + + Ok(()).wrap_with_cost(cost) + } +} + +// TODO: add documentation +pub fn verify_height_proof(proof: Vec, expected_root_hash: CryptoHash) -> Result { + // todo: remove unwrap + let height_proof_tree = execute(proof.into_iter().map(Ok), false, |_| Ok(())).unwrap()?; + + // todo: deal with cost + // todo: deal with old chunk restoring error + if height_proof_tree.hash().unwrap() != expected_root_hash { + return Err(Error::OldChunkRestoringError( + "invalid height proof: root hash mismatch".to_string(), + )); + } + + verify_height_tree(&height_proof_tree) +} + +// TODO: add documentation +pub fn verify_height_tree(height_proof_tree: &Tree) -> Result { + return Ok(match height_proof_tree.child(LEFT) { + Some(child) => { + if !matches!(child.tree.node, Node::KVHash(..)) { + // todo deal with old chunk restoring error + return Err(Error::OldChunkRestoringError( + "Expected left nodes in height proofs to be kvhash nodes".to_string(), + )); + } + verify_height_tree(&child.tree)? + 1 + } + None => 1, + }); +} + +#[cfg(test)] +pub mod tests { + use ed::Encode; + + use crate::{ + proofs::{ + chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, + tree::execute, + Node, Op, + Op::Parent, + }, + test_utils::{make_tree_seq, make_tree_seq_with_start_key}, + tree::{RefWalker, Tree}, + CryptoHash, PanicSource, TreeFeatureType, + }; + + fn build_tree_10_nodes() -> Tree { + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + make_tree_seq_with_start_key(10, [0; 8].to_vec()) + } + + /// Traverses a tree to a certain node and returns the node hash of that + /// node + pub fn traverse_get_node_hash( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + return traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_hash_node().unwrap() + }); + } + + /// Traverses a tree to a certain node and returns the kv_feature_type of + /// that node + pub fn traverse_get_kv_feature_type( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + return traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_kv_value_hash_feature_type_node() + }); + } + /// Traverses a tree to a certain node and returns the kv_hash of + /// that node + pub fn traverse_get_kv_hash( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + ) -> Node { + return traverse_and_apply(walker, traverse_instructions, |walker| { + walker.to_kvhash_node() + }); + } + + /// Traverses a tree to a certain node and returns the result of applying + /// some arbitrary function + pub fn traverse_and_apply( + mut walker: &mut RefWalker, + traverse_instructions: &[bool], + apply_fn: T, + ) -> Node + where + T: Fn(&mut RefWalker) -> Node, + { + if traverse_instructions.is_empty() { + return apply_fn(walker); + } + + let mut child = walker + .walk(traverse_instructions[0]) + .unwrap() + .unwrap() + .unwrap(); + return traverse_and_apply(&mut child, &traverse_instructions[1..], apply_fn); + } + + #[test] + fn build_chunk_from_root_depth_0() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // should return the node hash of the root node + let chunk = tree_walker.create_chunk(0).expect("should build chunk"); + assert_eq!(chunk.len(), 1); + assert_eq!( + chunk[0], + Op::Push(traverse_get_node_hash(&mut tree_walker, &[])) + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_1() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for depth 1 + // expected: + // 3 + // / \ + // Hash(1) Hash(7) + let chunk = tree_walker.create_chunk(1).expect("should build chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT])), + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_3() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for depth 3 + // expected: + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // H(4) H(6) H(9) + let chunk = tree_walker.create_chunk(3).expect("should build chunk"); + assert_eq!(chunk.len(), 19); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn build_chunk_from_root_depth_max_depth() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk for entire tree (depth 4) + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + let chunk = tree_walker.create_chunk(4).expect("should build chunk"); + assert_eq!(chunk.len(), 19); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + Op::Child + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(computed_tree.hash().unwrap(), tree.hash().unwrap()); + } + + #[test] + fn chunk_greater_than_max_should_equal_max_depth() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // build chunk with depth greater than tree + // we should get the same result as building with the exact depth + let large_depth_chunk = tree_walker.create_chunk(100).expect("should build chunk"); + let exact_depth_chunk = tree_walker.create_chunk(4).expect("should build chunk"); + assert_eq!(large_depth_chunk, exact_depth_chunk); + + let tree_a = execute(large_depth_chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + let tree_b = execute(exact_depth_chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree_a.hash().unwrap(), tree_b.hash().unwrap()); + } + + #[test] + fn build_chunk_after_traversal_depth_2() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // traverse to the right first then build chunk + // expected + // 7 + // / \ + // 5 8 + // / \ \ + // H(4) H(6) H(9) + + // right traversal + let chunk = tree_walker + .traverse_and_build_chunk(&[RIGHT], 2) + .expect("should build chunk"); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child, + Op::Child, + ] + ); + + // the hash of the tree computed from the chunk + // should be the same as the node_hash of the element + // on the right + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!( + Node::Hash(computed_tree.hash().unwrap()), + traverse_get_node_hash(&mut tree_walker, &[RIGHT]) + ); + } + + #[test] + fn build_chunk_after_traversal_depth_1() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + // traverse with [right, left] and then build chunk of depth 1 + // expected + // 5 + // / \ + // H(4) H(6) + + // instruction traversal + let chunk = tree_walker + .traverse_and_build_chunk(&[RIGHT, LEFT], 1) + .expect("should build chunk"); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_node_hash( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child, + ] + ); + + let computed_tree = execute(chunk.into_iter().map(Ok), true, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!( + Node::Hash(computed_tree.hash().unwrap()), + traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT]) + ); + } + + #[test] + fn test_chunk_encoding() { + let chunk = vec![ + Op::Push(Node::Hash([0; 32])), + Op::Push(Node::KVValueHashFeatureType( + vec![1], + vec![2], + [0; 32], + TreeFeatureType::BasicMerk, + )), + ]; + let encoded_chunk = chunk.encode().expect("should encode"); + assert_eq!(encoded_chunk.len(), 33 + 39); + assert_eq!( + encoded_chunk.len(), + chunk.encoding_length().expect("should get encoding length") + ); + } + + #[test] + fn test_height_proof_generation() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + let mut height_proof = vec![]; + tree_walker + .generate_height_proof(&mut height_proof) + .unwrap() + .expect("should generate height proof"); + + assert_eq!(height_proof.len(), 9); + assert_eq!( + height_proof, + vec![ + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_hash(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT])), + Op::Child, + ] + ); + } + + #[test] + fn test_height_proof_verification() { + let mut tree = build_tree_10_nodes(); + let mut tree_walker = RefWalker::new(&mut tree, PanicSource {}); + + let mut height_proof = vec![]; + tree_walker + .generate_height_proof(&mut height_proof) + .unwrap() + .expect("should generate height proof"); + + let verified_height = verify_height_proof(height_proof, tree.hash().unwrap()) + .expect("should verify height proof"); + + // doesn't represent the max height of the tree + assert_eq!(verified_height, 3); + } +} diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs new file mode 100644 index 00000000..29687932 --- /dev/null +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -0,0 +1,141 @@ +use std::io::{Read, Write}; + +use ed::{Decode, Encode}; +use integer_encoding::{VarInt, VarIntReader}; + +use crate::proofs::{chunk::chunk_op::ChunkOp::Chunk, Op}; + +/// Represents the chunk generated from a given starting chunk id +#[derive(PartialEq, Debug)] +pub enum ChunkOp { + ChunkId(Vec), + Chunk(Vec), +} + +impl Encode for ChunkOp { + fn encode_into(&self, dest: &mut W) -> ed::Result<()> { + match self { + Self::ChunkId(instruction) => { + // write the marker then the len + dest.write_all(&[0_u8]); + dest.write_all(instruction.len().encode_var_vec().as_slice())?; + let instruction_as_binary: Vec = instruction + .iter() + .map(|v| if *v { 1_u8 } else { 0_u8 }) + .collect(); + dest.write_all(&instruction_as_binary)?; + } + Self::Chunk(chunk) => { + dest.write_all(&[1_u8]); + // chunk len represents the number of ops not the total encoding len of ops + dest.write_all(chunk.len().encode_var_vec().as_slice())?; + for op in chunk { + dest.write_all(&op.encode()?)?; + } + } + } + + Ok(()) + } + + fn encoding_length(&self) -> ed::Result { + Ok(match self { + Self::ChunkId(instruction) => { + 1 + instruction.len().encode_var_vec().len() + instruction.len() + } + Self::Chunk(chunk) => { + 1 + chunk.len().encode_var_vec().len() + chunk.encoding_length()? + } + }) + } +} + +impl Decode for ChunkOp { + fn decode(input: R) -> ed::Result { + let mut chunk_op = ChunkOp::ChunkId(vec![]); + Self::decode_into(&mut chunk_op, input)?; + Ok(chunk_op) + } + + fn decode_into(&mut self, mut input: R) -> ed::Result<()> { + let mut marker = [0_u8; 1]; + input.read_exact(&mut marker)?; + + match marker[0] { + 0 => { + let length = input.read_varint()?; + let mut instruction_as_binary = vec![0_u8; length]; + input.read_exact(&mut instruction_as_binary)?; + + let instruction: Vec = instruction_as_binary + .into_iter() + .map(|v| v == 1_u8) + .collect(); + + *self = ChunkOp::ChunkId(instruction); + } + 1 => { + let ops_length = input.read_varint()?; + let mut chunk = Vec::with_capacity(ops_length); + + for _ in 0..ops_length { + let op = Decode::decode(&mut input)?; + chunk.push(op); + } + + *self = ChunkOp::Chunk(chunk); + } + _ => return Err(ed::Error::UnexpectedByte(marker[0])), + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use ed::{Decode, Encode}; + + use crate::proofs::{ + chunk::{ + chunk2::{LEFT, RIGHT}, + chunk_op::ChunkOp, + }, + Node, Op, + }; + + #[test] + fn test_chunk_op_encoding() { + let chunk_op = ChunkOp::ChunkId(vec![LEFT, RIGHT]); + let encoded_chunk_op = chunk_op.encode().unwrap(); + assert_eq!(encoded_chunk_op, vec![0, 2, 1, 0]); + assert_eq!(encoded_chunk_op.len(), chunk_op.encoding_length().unwrap()); + + let chunk_op = ChunkOp::Chunk(vec![Op::Push(Node::Hash([0; 32])), Op::Child]); + let encoded_chunk_op = chunk_op.encode().unwrap(); + let mut expected_encoding = vec![1, 2]; + expected_encoding.extend(Op::Push(Node::Hash([0; 32])).encode().unwrap()); + expected_encoding.extend(Op::Child.encode().unwrap()); + assert_eq!(encoded_chunk_op, expected_encoding); + assert_eq!(encoded_chunk_op.len(), chunk_op.encoding_length().unwrap()); + } + + #[test] + fn test_chunk_op_decoding() { + let encoded_chunk_op = vec![0, 3, 1, 0, 1]; + let decoded_chunk_op = ChunkOp::decode(encoded_chunk_op.as_slice()).unwrap(); + assert_eq!(decoded_chunk_op, ChunkOp::ChunkId(vec![LEFT, RIGHT, LEFT])); + + let mut encoded_chunk_op = vec![1, 2]; + encoded_chunk_op.extend(Op::Push(Node::Hash([1; 32])).encode().unwrap()); + encoded_chunk_op.extend(Op::Push(Node::KV(vec![1], vec![2])).encode().unwrap()); + let decoded_chunk_op = ChunkOp::decode(encoded_chunk_op.as_slice()).unwrap(); + assert_eq!( + decoded_chunk_op, + ChunkOp::Chunk(vec![ + Op::Push(Node::Hash([1; 32])), + Op::Push(Node::KV(vec![1], vec![2])) + ]) + ); + } +} diff --git a/merk/src/proofs/chunk/error.rs b/merk/src/proofs/chunk/error.rs new file mode 100644 index 00000000..0c926203 --- /dev/null +++ b/merk/src/proofs/chunk/error.rs @@ -0,0 +1,32 @@ +#[derive(Debug, thiserror::Error)] +/// Chunk related errors +pub enum ChunkError { + /// Limit too small for first chunk, cannot make progress + #[error("overflow error {0}")] + LimitTooSmall(&'static str), + + /// Chunk index out of bounds + #[error("chunk index out of bounds: {0}")] + OutOfBounds(&'static str), + + /// Empty tree contains no chunks + #[error("chunk from empty tree: {0}")] + EmptyTree(&'static str), + + /// Invalid traversal instruction (points to no element) + #[error("traversal instruction invalid {0}")] + BadTraversalInstruction(&'static str), + + /// Expected ChunkId when parsing chunk ops + #[error("expected chunk id when parsing chunk op")] + ExpectedChunkId, + + /// Expected Chunk when parsing chunk ops + #[error("expected chunk when parsing chunk op")] + ExpectedChunk, + + /// Internal error, this should never surface + /// if it does, it means wrong assumption in code + #[error("internal error {0}")] + InternalError(&'static str), +} diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs new file mode 100644 index 00000000..1094e50f --- /dev/null +++ b/merk/src/proofs/chunk/util.rs @@ -0,0 +1,432 @@ +// TODO: add MIT License +// TODO: add module description + +use std::io::Write; + +// TODO: figure out better nomenclature +use crate::{proofs::chunk::binary_range::BinaryRange, Error}; +use crate::{proofs::chunk::error::ChunkError, Error::InternalError}; + +// TODO: add documentation +fn chunk_height_per_layer(height: usize) -> Vec { + // every chunk has a fixed height of 2 + // it is possible for a chunk to not reach full capacity + let mut two_count = height / 2; + if height % 2 != 0 { + two_count += 1; + } + + return vec![2; two_count]; +} + +/// Represents the height as a linear combination of 3 amd 2 +/// of the form 3x + 2y +/// this breaks the tree into layers of height 3 or 2 +/// the minimum chunk height is 2, so if tree height is less than 2 +/// we just return a single layer of height 2 +fn chunk_height_per_layer_lin_comb(height: usize) -> Vec { + let mut two_count = 0; + let mut three_count = height / 3; + + // minimum chunk height is 2, if tree height is less than 2 + // return a single layer with chunk height 2 + if height < 2 { + two_count = 1; + } else { + match height % 3 { + 0 => { /* do nothing */ } + 1 => { + // reduce the three_count by 1 + // so the remainder becomes 3 + 1 + // which is equivalent to 2 + 2 + three_count = three_count - 1; + two_count += 2; + } + 2 => { + // remainder is a factor of 2 + // just increase the two_count + two_count += 1; + } + // this is unreachable because height is a positive number + // remainder set after diving by 3 is fixed to [0,1,2] + _ => unreachable!(""), + } + } + + let mut layer_heights = vec![3; three_count]; + layer_heights.extend(vec![2; two_count]); + + layer_heights +} + +/// Return the layer a chunk subtree belongs to +pub fn chunk_layer(height: usize, chunk_id: usize) -> Result { + // remaining depth tells us how deep in the tree the specified chunk is + let mut remaining_depth = generate_traversal_instruction(height, chunk_id)?.len() + 1; + let layer_heights = chunk_height_per_layer(height); + + let mut layer = 1; + + while remaining_depth > 1 { + // remaining depth will always larger than the next layer height + // if it is not already 1 + // this is because a every chunk always starts at a layer boundary + // and remaining depth points to a chunk + debug_assert!(remaining_depth > layer_heights[layer - 1]); + + remaining_depth = remaining_depth - layer_heights[layer - 1]; + layer = layer + 1; + } + + Ok(layer - 1) +} + +/// Return the depth of a chunk given the height +/// and chunk id +pub fn chunk_height(height: usize, chunk_id: usize) -> Result { + let chunk_layer = chunk_layer(height, chunk_id)?; + let layer_heights = chunk_height_per_layer(height); + + Ok(layer_heights[chunk_layer]) +} + +/// Given a tree of height h, return the number of chunks needed +/// to completely represent the tree +pub fn number_of_chunks(height: usize) -> usize { + let layer_heights = chunk_height_per_layer(height); + number_of_chunks_internal(layer_heights) +} + +/// Locates the subtree represented by a chunk id and returns +/// the number of chunks under that subtree +pub fn number_of_chunks_under_chunk_id(height: usize, chunk_id: usize) -> Result { + let chunk_layer = chunk_layer(height, chunk_id)?; + let layer_heights = chunk_height_per_layer(height); + + // we only care about the layer heights after the chunk layer + // as we are getting the number of chunks under a subtree and not + // the entire tree of height h + Ok(number_of_chunks_internal( + layer_heights[chunk_layer..].to_vec(), + )) +} + +/// Given the heights of a tree per layer, return the total number of chunks in +/// that tree +fn number_of_chunks_internal(layer_heights: Vec) -> usize { + // a layer consists of 1 or more subtrees of a given height + // here we figure out number of exit nodes from a single subtree for each layer + let mut single_subtree_exits_per_layer = layer_heights + .into_iter() + .map(exit_node_count) + .collect::>(); + + // we don't care about exit nodes from the last layer + // as that points to non-existent subtrees + single_subtree_exits_per_layer.pop(); + + // now we get the total exit nodes per layer + // by multiplying the exits per subtree with the number of subtrees on that + // layer + let mut chunk_counts_per_layer = vec![1]; + for i in 0..single_subtree_exits_per_layer.len() { + let previous_layer_chunk_count = chunk_counts_per_layer[i]; + let current_layer_chunk_count = + previous_layer_chunk_count * single_subtree_exits_per_layer[i]; + chunk_counts_per_layer.push(current_layer_chunk_count); + } + + return chunk_counts_per_layer.into_iter().sum(); +} + +/// Calculates the maximum number of exit nodes for a tree of height h. +fn exit_node_count(height: usize) -> usize { + 2_usize.pow(height as u32) +} + +/// Generate instruction for traversing to a given chunk in a binary tree +pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result, Error> { + let mut instructions = vec![]; + + let total_chunk_count = number_of_chunks(height); + + // out of bounds + if chunk_id < 1 || chunk_id > total_chunk_count { + return Err(Error::ChunkingError(ChunkError::OutOfBounds( + "chunk id out of bounds", + ))); + } + + let mut chunk_range = BinaryRange::new(1, total_chunk_count).map_err(|_| { + Error::ChunkingError(ChunkError::InternalError( + "failed to initialize chunk range", + )) + })?; + + // total chunk count will always be odd because + // from the initial chunk (1) we have an even number of + // exit nodes, and they have even numbers of exit nodes ... + // so total_chunk_count = 1 + some_even_number = odd + debug_assert_eq!(chunk_range.odd(), true); + + // bisect and reduce the chunk range until we get to the desired chunk + // we keep track of every left right decision we make + while chunk_range.len() > 1 { + if chunk_range.odd() { + // checks if we last decision we made got us to the desired chunk id + let advance_result = chunk_range.advance_range_start().unwrap(); + chunk_range = advance_result.0; + if advance_result.1 == chunk_id { + return Ok(instructions); + } + } else { + // for even chunk range, we are at the decision point + // we can either go left or right + // we first check which half the desired chunk is + // then follow that path + let chunk_id_half = chunk_range + .which_half(chunk_id) + .expect("chunk id must exist in range"); + instructions.push(chunk_id_half); + chunk_range = chunk_range + .get_half(chunk_id_half) + .expect("confirmed range is not odd"); + } + } + + // chunk range len is exactly 1 + // this must be the desired chunk id + // return instructions that got us here + return Ok(instructions); +} + +/// Convert traversal instruction to byte string +/// 1 represents left +/// 0 represents right +pub fn traversal_instruction_as_string(instruction: Vec) -> String { + instruction + .iter() + .map(|v| if *v { "1" } else { "0" }) + .collect() +} + +// TODO: move this to a better file +pub fn write_to_vec(dest: &mut W, value: &[u8]) -> Result<(), Error> { + dest.write_all(value) + .map_err(|_e| InternalError("failed to write to vector")) +} + +#[cfg(test)] +mod test { + use byteorder::LE; + + use super::*; + use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; + + #[test] + fn test_chunk_height_per_layer() { + let layer_heights = chunk_height_per_layer(10); + assert_eq!(layer_heights.iter().sum::(), 10); + assert_eq!(layer_heights, [2, 2, 2, 2, 2]); + + let layer_heights = chunk_height_per_layer(45); + assert_eq!(layer_heights.iter().sum::(), 46); + assert_eq!(layer_heights, [2; 23]); + + let layer_heights = chunk_height_per_layer(2); + assert_eq!(layer_heights.iter().sum::(), 2); + assert_eq!(layer_heights, [2]); + + // height less than 2 + let layer_heights = chunk_height_per_layer(1); + assert_eq!(layer_heights.iter().sum::(), 2); + assert_eq!(layer_heights, [2]); + + let layer_heights = chunk_height_per_layer(0); + assert_eq!(layer_heights.iter().sum::(), 0); + assert_eq!(layer_heights, Vec::::new()); + } + + #[test] + fn test_exit_node_count() { + // tree with just one node has 2 exit nodes + assert_eq!(exit_node_count(1), 2); + + // tree with height 2 has 4 exit nodes + assert_eq!(exit_node_count(2), 4); + + // tree with height 6 has 64 exit nodes + assert_eq!(exit_node_count(6), 64); + } + + #[test] + fn test_number_of_chunks() { + // given a chunk of height less than 3 chunk count should be 1 + assert_eq!(number_of_chunks(1), 1); + assert_eq!(number_of_chunks(2), 1); + + // tree with height 4 should have 5 chunks + // we split the tree into 2 layers of chunk height 2 each + // first layer contains just one chunk (1), but has 4 exit nodes + // hence total chunk count = 1 + 4 = 5 + assert_eq!(number_of_chunks(4), 5); + + // tree with height 6 should have 21 chunks + // will be split into three layers of chunk height 2 = [2,2,2] + // first chunk takes 1, has 2^2 = 4 exit nodes + // second chunk takes 4 with each having 2^2 exit nodes + // total exit from second chunk = 4 * 4 = 16 + // total chunks = 1 + 4 + 16 = 21 + assert_eq!(number_of_chunks(6), 21); + + // tree with height 10 should have 341 chunks + // will be split into 5 layers = [2,2,2,2,2] + // first layer has just 1 chunk, exit nodes = 2^2 = 4 + // second layer has 4 chunks, exit nodes = 2^2 * 4 = 16 + // third layer has 16 chunks, exit nodes = 2^2 * 16 = 64 + // fourth layer has 64 chunks, exit nodes = 2^2 * 64 = 256 + // fifth layer has 256 chunks + // total chunks = 1 + 4 + 16 + 64 + 256 = 341 chunks + assert_eq!(number_of_chunks(10), 341); + } + + #[test] + fn test_number_of_chunks_under_chunk_id() { + // tree with height less than 3 should have just 1 chunk + assert_eq!(number_of_chunks_under_chunk_id(1, 1).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(2, 1).unwrap(), 1); + + // asking for chunk out of bounds should return error + assert!(number_of_chunks_under_chunk_id(1, 3).is_err()); + + // tree with height 4 should have 5 chunks at chunk id 1 + // but 1 chunk at id 2 - 5 + assert_eq!(number_of_chunks_under_chunk_id(4, 1).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(4, 2).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 3).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 4).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(4, 5).unwrap(), 1); + + // tree with height 10 should have 341 chunks + // layer_heights = [2, 2, 2, 2, 2] + // chunk_id 1 = 341 + // chunk_id 2 = 85 i.e (341 - 1) / 2^2 + // chunk_id 3 = 21 i.e (85 - 1) / 2^2 + // chunk_id 4 = 5 i.e (21 - 1) / 2^2 + // chunk_id 5 = 1 i.e (5 - 1) / 2^2 + // chunk_id 6 = 1 on the same layer as 5 + // chunk_id 87 = 85 as chunk 87 should wrap back to the same layer as chunk_id 2 + // chunk_id 88 = mirrors chunk_id 3 + // chunk_id 89 = mirrors chunk_id 4 + // chunk_id 90 = mirrors chunk_id 5 + assert_eq!(number_of_chunks_under_chunk_id(10, 1).unwrap(), 341); + assert_eq!(number_of_chunks_under_chunk_id(10, 2).unwrap(), 85); + assert_eq!(number_of_chunks_under_chunk_id(10, 3).unwrap(), 21); + assert_eq!(number_of_chunks_under_chunk_id(10, 4).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 5).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 6).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 87).unwrap(), 85); + assert_eq!(number_of_chunks_under_chunk_id(10, 88).unwrap(), 21); + assert_eq!(number_of_chunks_under_chunk_id(10, 89).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 90).unwrap(), 1); + } + + #[test] + fn test_traversal_instruction_generation() { + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // / \ \ + // 4 6 9 + // height: 4 + // layer_height: 3, 3 + // 3 + // / \ + // 1 7 + // / \ / \ + // 0 2 5 8 + // ............................ + // / \ \ + // 4 6 9 + // 5 chunks + // chunk 1 entry - 3 + // chunk 2 entry - 0 + // chunk 3 entry - 2 + // chunk 4 entry - 5 + // chunk 5 entry - 8 + + // chunk 1 entry - 3 is at the top of the tree so empty instruction set + let instruction = + generate_traversal_instruction(4, 1).expect("should generate traversal instruction"); + let empty_instruction: &[bool] = &[]; + assert_eq!(instruction, empty_instruction); + + // chunk 2 entry - 0 + // go left twice from root i.e 3 left -> 1 left -> 0 + let instruction = + generate_traversal_instruction(4, 2).expect("should generate traversal instruction"); + assert_eq!(instruction, &[LEFT, LEFT]); + + // chunk 3 entry - 2 + // go left then right from root i.e 3 left -> 1 right -> 2 + let instruction = + generate_traversal_instruction(4, 3).expect("should generate traversal instruction"); + assert_eq!(instruction, &[LEFT, RIGHT]); + + // chunk 4 entry - 5 + // go right then left i.e 3 right -> 7 left -> 5 + let instruction = + generate_traversal_instruction(4, 4).expect("should generate traversal instruction"); + assert_eq!(instruction, &[RIGHT, LEFT]); + + // chunk 5 entry - 8 + // go right twice i.e 3 right -> 7 right -> 8 + let instruction = + generate_traversal_instruction(4, 5).expect("should generate traversal instruction"); + assert_eq!(instruction, &[RIGHT, RIGHT]); + + // out of bound tests + assert_eq!(generate_traversal_instruction(4, 6).is_err(), true); + assert_eq!(generate_traversal_instruction(4, 0).is_err(), true); + } + + #[test] + fn test_chunk_height() { + // tree of height 6 + // all chunks have the same height + // since layer height = [2,2,2] + // we have 21 chunks in a tree of this height + for i in 1..=21 { + assert_eq!(chunk_height(6, i).unwrap(), 2); + } + + // tree of height 5 + // layer_height = [2, 2] + // we also have 21 chunks here + for i in 1..=21 { + assert_eq!(chunk_height(5, i).unwrap(), 2); + } + + // tree of height 10 + // layer_height = [3, 3, 3, 3] + // just going to check chunk 1 - 5 + assert_eq!(chunk_height(10, 1).unwrap(), 2); + assert_eq!(chunk_height(10, 2).unwrap(), 2); + assert_eq!(chunk_height(10, 3).unwrap(), 2); + assert_eq!(chunk_height(10, 4).unwrap(), 2); + assert_eq!(chunk_height(10, 5).unwrap(), 2); + } + + #[test] + fn test_traversal_instruction_as_string() { + assert_eq!(traversal_instruction_as_string(vec![]), ""); + assert_eq!(traversal_instruction_as_string(vec![LEFT]), "1"); + assert_eq!(traversal_instruction_as_string(vec![RIGHT]), "0"); + assert_eq!( + traversal_instruction_as_string(vec![RIGHT, LEFT, LEFT, RIGHT]), + "0110" + ); + } +} diff --git a/merk/src/test_utils/mod.rs b/merk/src/test_utils/mod.rs index b7518158..0611fa7a 100644 --- a/merk/src/test_utils/mod.rs +++ b/merk/src/test_utils/mod.rs @@ -190,6 +190,7 @@ pub const fn seq_key(n: u64) -> [u8; 8] { /// Create batch entry with Put op using key n and a fixed value pub fn put_entry(n: u64) -> BatchEntry> { + let key = seq_key(n).to_vec(); (seq_key(n).to_vec(), Op::Put(vec![123; 60], BasicMerk)) } @@ -274,7 +275,15 @@ pub fn make_tree_rand( /// Create tree with initial fixed values and apply `node count` Put ops using /// sequential keys using memory only +/// starting tree node is [0; 20] pub fn make_tree_seq(node_count: u64) -> Tree { + make_tree_seq_with_start_key(node_count, [0; 20].to_vec()) +} + +/// Create tree with initial fixed values and apply `node count` Put ops using +/// sequential keys using memory only +/// requires a starting key vector +pub fn make_tree_seq_with_start_key(node_count: u64, start_key: Vec) -> Tree { let batch_size = if node_count >= 10_000 { assert_eq!(node_count % 10_000, 0); 10_000 @@ -283,7 +292,8 @@ pub fn make_tree_seq(node_count: u64) -> Tree { }; let value = vec![123; 60]; - let mut tree = Tree::new(vec![0; 20], value, None, BasicMerk).unwrap(); + + let mut tree = Tree::new(start_key, value, None, BasicMerk).unwrap(); let batch_count = node_count / batch_size; for i in 0..batch_count { From 03b42c9600b81647012670cf88505d014c823668 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Mon, 2 Oct 2023 08:11:04 +0100 Subject: [PATCH 02/30] Squashed commit of the following: remove bad test rename files update documentation wip wip implement merk verifier + state building implement replication from multichunk fix chunk verification fixed implementation of chunkid from traversal instructions fix some tests make chunk_id from traversal instruction test resistant to changes in underlying chunking scheme add restoration logic test function returning the next chunk id when you call chunk use strings as communication interface between producer and restorer implement chunk id from traversal instruction add traversal instruction generation to direct string chunk producer returns next index as string for multi chunk clean up rewrite parent links restoration done successfully rough implementation of rewrite parent implement function to extract sum from node type wip chunk write logic + restorer finalization + parent key tracking new visit ref function that keeps track of traversal path implement instruction string to traversal instruction test child to link functionality for basic and sum merks implement node to link include sum wip implement and test chunk verification Fix layer iter function Previous implementation made a key assumption that nodes are unique including hash nodes, this made the layer iteration functionality depend on the contents of the tree, which shouldn't be the case. This adds a simpler implementation of the layer iter logic using breadth first search. add test to ensure chunks only contain hash and kvfeaturetype test for avl tree during proof op execution remove chunk_height_per_layer_lin_comb every chunk now has fixed height of 2 --- grovedb/src/lib.rs | 4 +- grovedb/src/replication.rs | 1978 ++++++++--------- merk/src/error.rs | 4 + merk/src/lib.rs | 2 +- merk/src/merk/chunks.rs | 1278 ++++++++--- merk/src/merk/chunks2.rs | 943 -------- merk/src/merk/mod.rs | 138 +- merk/src/merk/restore.rs | 1384 +++++++++--- merk/src/merk/restore2.rs | 195 -- merk/src/proofs/chunk.rs | 603 +---- merk/src/proofs/chunk/binary_range.rs | 28 + merk/src/proofs/chunk/{chunk2.rs => chunk.rs} | 30 +- merk/src/proofs/chunk/chunk_op.rs | 30 +- merk/src/proofs/chunk/error.rs | 47 + merk/src/proofs/chunk/util.rs | 417 +++- merk/src/proofs/tree.rs | 258 ++- merk/src/tree/link.rs | 2 +- merk/src/tree/mod.rs | 4 +- 18 files changed, 3795 insertions(+), 3550 deletions(-) delete mode 100644 merk/src/merk/chunks2.rs delete mode 100644 merk/src/merk/restore2.rs rename merk/src/proofs/chunk/{chunk2.rs => chunk.rs} (94%) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 87ca2443..23840455 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -215,8 +215,8 @@ use grovedb_storage::{Storage, StorageContext}; use grovedb_visualize::DebugByteVectors; #[cfg(any(feature = "full", feature = "verify"))] pub use query::{PathQuery, SizedQuery}; -#[cfg(feature = "full")] -pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer}; +// #[cfg(feature = "full")] +// pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer}; #[cfg(any(feature = "full", feature = "verify"))] pub use crate::error::Error; diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index 86c1c3f0..e97f7820 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -1,989 +1,989 @@ -// MIT LICENSE -// -// Copyright (c) 2021 Dash Core Group -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. - -//! Replication - -use std::{ - collections::VecDeque, - iter::{empty, once}, -}; - -use grovedb_merk::{ - proofs::{Node, Op}, - Merk, TreeFeatureType, -}; -use grovedb_path::SubtreePath; -use grovedb_storage::{ - rocksdb_storage::{PrefixedRocksDbImmediateStorageContext, PrefixedRocksDbStorageContext}, - Storage, StorageContext, -}; - -use crate::{Element, Error, GroveDb, Hash, Transaction}; - -const OPS_PER_CHUNK: usize = 128; - -impl GroveDb { - /// Creates a chunk producer to replicate GroveDb. - pub fn chunks(&self) -> SubtreeChunkProducer { - SubtreeChunkProducer::new(self) - } -} - -/// Subtree chunks producer. -pub struct SubtreeChunkProducer<'db> { - grove_db: &'db GroveDb, - cache: Option>, -} - -struct SubtreeChunkProducerCache<'db> { - current_merk_path: Vec>, - current_merk: Merk>, - // This needed to be an `Option` because it requires a reference on Merk but it's within the - // same struct and during struct init a referenced Merk would be moved inside a struct, - // using `Option` this init happens in two steps. - current_chunk_producer: - Option>>, -} - -impl<'db> SubtreeChunkProducer<'db> { - fn new(storage: &'db GroveDb) -> Self { - SubtreeChunkProducer { - grove_db: storage, - cache: None, - } - } - - /// Chunks in current producer - pub fn chunks_in_current_producer(&self) -> usize { - self.cache - .as_ref() - .and_then(|c| c.current_chunk_producer.as_ref().map(|p| p.len())) - .unwrap_or(0) - } - - /// Get chunk - pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator, - { - let path_iter = path.into_iter(); - - if let Some(SubtreeChunkProducerCache { - current_merk_path, .. - }) = &self.cache - { - if !itertools::equal(current_merk_path, path_iter.clone()) { - self.cache = None; - } - } - - if self.cache.is_none() { - let current_merk = self - .grove_db - .open_non_transactional_merk_at_path( - path_iter.clone().collect::>().as_slice().into(), - None, - ) - .unwrap()?; - - if current_merk.root_key().is_none() { - return Ok(Vec::new()); - } - - self.cache = Some(SubtreeChunkProducerCache { - current_merk_path: path_iter.map(|p| p.to_vec()).collect(), - current_merk, - current_chunk_producer: None, - }); - let cache = self.cache.as_mut().expect("exists at this point"); - cache.current_chunk_producer = Some( - grovedb_merk::ChunkProducer::new(&cache.current_merk) - .map_err(|e| Error::CorruptedData(e.to_string()))?, - ); - } - - self.cache - .as_mut() - .expect("must exist at this point") - .current_chunk_producer - .as_mut() - .expect("must exist at this point") - .chunk(index) - .map_err(|e| Error::CorruptedData(e.to_string())) - } -} - -// TODO: make generic over storage_cost context -type MerkRestorer<'db> = grovedb_merk::Restorer>; - -type Path = Vec>; - -/// Structure to drive GroveDb restore process. -pub struct Restorer<'db> { - current_merk_restorer: Option>, - current_merk_chunk_index: usize, - current_merk_path: Path, - queue: VecDeque<(Path, Vec, Hash, TreeFeatureType)>, - grove_db: &'db GroveDb, - tx: &'db Transaction<'db>, -} - -/// Indicates what next piece of information `Restorer` expects or wraps a -/// successful result. -#[derive(Debug)] -pub enum RestorerResponse { - AwaitNextChunk { path: Vec>, index: usize }, - Ready, -} - -#[derive(Debug)] -pub struct RestorerError(String); - -impl<'db> Restorer<'db> { - /// Create a GroveDb restorer using a backing storage_cost and root hash. - pub fn new( - grove_db: &'db GroveDb, - root_hash: Hash, - tx: &'db Transaction<'db>, - ) -> Result { - Ok(Restorer { - tx, - current_merk_restorer: Some(MerkRestorer::new( - Merk::open_base( - grove_db - .db - .get_immediate_storage_context(SubtreePath::empty(), tx) - .unwrap(), - false, - ) - .unwrap() - .map_err(|e| RestorerError(e.to_string()))?, - None, - root_hash, - )), - current_merk_chunk_index: 0, - current_merk_path: vec![], - queue: VecDeque::new(), - grove_db, - }) - } - - /// Process next chunk and receive instruction on what to do next. - pub fn process_chunk( - &mut self, - chunk_ops: impl IntoIterator, - ) -> Result { - if self.current_merk_restorer.is_none() { - // Last restorer was consumed and no more Merks to process. - return Ok(RestorerResponse::Ready); - } - // First we decode a chunk to take out info about nested trees to add them into - // todo list. - let mut ops = Vec::new(); - for op in chunk_ops { - ops.push(op); - match ops.last().expect("just inserted") { - Op::Push(Node::KVValueHashFeatureType( - key, - value_bytes, - value_hash, - feature_type, - )) - | Op::PushInverted(Node::KVValueHashFeatureType( - key, - value_bytes, - value_hash, - feature_type, - )) => { - if let Element::Tree(root_key, _) | Element::SumTree(root_key, ..) = - Element::deserialize(value_bytes) - .map_err(|e| RestorerError(e.to_string()))? - { - if root_key.is_none() || self.current_merk_path.last() == Some(key) { - // We add only subtrees of the current subtree to queue, skipping - // itself; Also skipping empty Merks. - continue; - } - let mut path = self.current_merk_path.clone(); - path.push(key.clone()); - // The value hash is the root tree hash - self.queue.push_back(( - path, - value_bytes.to_owned(), - *value_hash, - *feature_type, - )); - } - } - _ => {} - } - } - - // Process chunk using Merk's possibilities. - let remaining = self - .current_merk_restorer - .as_mut() - .expect("restorer exists at this point") - .process_chunk(ops) - .map_err(|e| RestorerError(e.to_string()))?; - - self.current_merk_chunk_index += 1; - - if remaining == 0 { - // If no more chunks for this Merk required decide if we're done or take a next - // Merk to process. - self.current_merk_restorer - .take() - .expect("restorer exists at this point") - .finalize() - .map_err(|e| RestorerError(e.to_string()))?; - if let Some((next_path, combining_value, expected_hash, _)) = self.queue.pop_front() { - // Process next subtree. - let merk = self - .grove_db - .open_merk_for_replication(next_path.as_slice().into(), self.tx) - .map_err(|e| RestorerError(e.to_string()))?; - self.current_merk_restorer = Some(MerkRestorer::new( - merk, - Some(combining_value), - expected_hash, - )); - self.current_merk_chunk_index = 0; - self.current_merk_path = next_path; - - Ok(RestorerResponse::AwaitNextChunk { - path: self.current_merk_path.clone(), - index: self.current_merk_chunk_index, - }) - } else { - Ok(RestorerResponse::Ready) - } - } else { - // Request a chunk at the same path but with incremented index. - Ok(RestorerResponse::AwaitNextChunk { - path: self.current_merk_path.clone(), - index: self.current_merk_chunk_index, - }) - } - } -} - -/// Chunk producer wrapper which uses bigger messages that may include chunks of -/// requested subtree with its right siblings. -/// -/// Because `Restorer` builds GroveDb replica breadth-first way from top to -/// bottom it makes sense to send a subtree's siblings next instead of its own -/// subtrees. -pub struct SiblingsChunkProducer<'db> { - chunk_producer: SubtreeChunkProducer<'db>, -} - -#[derive(Debug)] -pub struct GroveChunk { - subtree_chunks: Vec<(usize, Vec)>, -} - -impl<'db> SiblingsChunkProducer<'db> { - /// New - pub fn new(chunk_producer: SubtreeChunkProducer<'db>) -> Self { - SiblingsChunkProducer { chunk_producer } - } - - /// Get a collection of chunks possibly from different Merks with the first - /// one as requested. - pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator + ExactSizeIterator, - { - let path_iter = path.into_iter(); - let mut result = Vec::new(); - let mut ops_count = 0; - - if path_iter.len() == 0 { - // We're at the root of GroveDb, no siblings here. - self.process_subtree_chunks(&mut result, &mut ops_count, empty(), index)?; - return Ok(result); - }; - - // Get siblings on the right to send chunks of multiple Merks if it meets the - // limit. - - let mut siblings_keys: VecDeque> = VecDeque::new(); - - let mut parent_path = path_iter; - let requested_key = parent_path.next_back(); - - let parent_ctx = self - .chunk_producer - .grove_db - .db - .get_storage_context( - parent_path.clone().collect::>().as_slice().into(), - None, - ) - .unwrap(); - let mut siblings_iter = Element::iterator(parent_ctx.raw_iter()).unwrap(); - - if let Some(key) = requested_key { - siblings_iter.fast_forward(key)?; - } - - while let Some(element) = siblings_iter.next_element().unwrap()? { - if let (key, Element::Tree(..)) | (key, Element::SumTree(..)) = element { - siblings_keys.push_back(key); - } - } - - let mut current_index = index; - // Process each subtree - while let Some(subtree_key) = siblings_keys.pop_front() { - #[allow(clippy::map_identity)] - let subtree_path = parent_path - .clone() - .map(|x| x) - .chain(once(subtree_key.as_slice())); - - self.process_subtree_chunks(&mut result, &mut ops_count, subtree_path, current_index)?; - // Going to a next sibling, should start from 0. - - if ops_count >= OPS_PER_CHUNK { - break; - } - current_index = 0; - } - - Ok(result) - } - - /// Process one subtree's chunks - fn process_subtree_chunks<'p, P>( - &mut self, - result: &mut Vec, - ops_count: &mut usize, - subtree_path: P, - from_index: usize, - ) -> Result<(), Error> - where - P: IntoIterator, -

::IntoIter: Clone + DoubleEndedIterator, - { - let path_iter = subtree_path.into_iter(); - - let mut current_index = from_index; - let mut subtree_chunks = Vec::new(); - - loop { - let ops = self - .chunk_producer - .get_chunk(path_iter.clone(), current_index)?; - - *ops_count += ops.len(); - subtree_chunks.push((current_index, ops)); - current_index += 1; - if current_index >= self.chunk_producer.chunks_in_current_producer() - || *ops_count >= OPS_PER_CHUNK - { - break; - } - } - - result.push(GroveChunk { subtree_chunks }); - - Ok(()) - } -} - -/// `Restorer` wrapper that applies multiple chunks at once and eventually -/// returns less requests. It is named by analogy with IO types that do less -/// syscalls. -pub struct BufferedRestorer<'db> { - restorer: Restorer<'db>, -} - -impl<'db> BufferedRestorer<'db> { - /// New - pub fn new(restorer: Restorer<'db>) -> Self { - BufferedRestorer { restorer } - } - - /// Process next chunk and receive instruction on what to do next. - pub fn process_grove_chunks(&mut self, chunks: I) -> Result - where - I: IntoIterator + ExactSizeIterator, - { - let mut response = RestorerResponse::Ready; - - for c in chunks.into_iter() { - for ops in c.subtree_chunks.into_iter().map(|x| x.1) { - if !ops.is_empty() { - response = self.restorer.process_chunk(ops)?; - } - } - } - - Ok(response) - } -} - -#[cfg(test)] -mod test { - use rand::RngCore; - use tempfile::TempDir; - - use super::*; - use crate::{ - batch::GroveDbOp, - reference_path::ReferencePathType, - tests::{common::EMPTY_PATH, make_test_grovedb, TempGroveDb, ANOTHER_TEST_LEAF, TEST_LEAF}, - }; - - fn replicate(original_db: &GroveDb) -> TempDir { - let replica_tempdir = TempDir::new().unwrap(); - - { - let replica_db = GroveDb::open(replica_tempdir.path()).unwrap(); - let mut chunk_producer = original_db.chunks(); - let tx = replica_db.start_transaction(); - - let mut restorer = Restorer::new( - &replica_db, - original_db.root_hash(None).unwrap().unwrap(), - &tx, - ) - .expect("cannot create restorer"); - - // That means root tree chunk with index 0 - let mut next_chunk: (Vec>, usize) = (vec![], 0); - - loop { - let chunk = chunk_producer - .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) - .expect("cannot get next chunk"); - match restorer.process_chunk(chunk).expect("cannot process chunk") { - RestorerResponse::Ready => break, - RestorerResponse::AwaitNextChunk { path, index } => { - next_chunk = (path, index); - } - } - } - - replica_db.commit_transaction(tx).unwrap().unwrap(); - } - replica_tempdir - } - - fn replicate_bigger_messages(original_db: &GroveDb) -> TempDir { - let replica_tempdir = TempDir::new().unwrap(); - - { - let replica_grove_db = GroveDb::open(replica_tempdir.path()).unwrap(); - let mut chunk_producer = SiblingsChunkProducer::new(original_db.chunks()); - let tx = replica_grove_db.start_transaction(); - - let mut restorer = BufferedRestorer::new( - Restorer::new( - &replica_grove_db, - original_db.root_hash(None).unwrap().unwrap(), - &tx, - ) - .expect("cannot create restorer"), - ); - - // That means root tree chunk with index 0 - let mut next_chunk: (Vec>, usize) = (vec![], 0); - - loop { - let chunks = chunk_producer - .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) - .expect("cannot get next chunk"); - match restorer - .process_grove_chunks(chunks.into_iter()) - .expect("cannot process chunk") - { - RestorerResponse::Ready => break, - RestorerResponse::AwaitNextChunk { path, index } => { - next_chunk = (path, index); - } - } - } - - replica_grove_db.commit_transaction(tx).unwrap().unwrap(); - } - - replica_tempdir - } - - fn test_replication_internal<'a, I, R, F>( - original_db: &TempGroveDb, - to_compare: I, - replicate_fn: F, - ) where - R: AsRef<[u8]> + 'a, - I: Iterator, - F: Fn(&GroveDb) -> TempDir, - { - let expected_root_hash = original_db.root_hash(None).unwrap().unwrap(); - - let replica_tempdir = replicate_fn(original_db); - - let replica = GroveDb::open(replica_tempdir.path()).unwrap(); - assert_eq!( - replica.root_hash(None).unwrap().unwrap(), - expected_root_hash - ); - - for full_path in to_compare { - let (key, path) = full_path.split_last().unwrap(); - assert_eq!( - original_db.get(path, key.as_ref(), None).unwrap().unwrap(), - replica.get(path, key.as_ref(), None).unwrap().unwrap() - ); - } - } - - fn test_replication<'a, I, R>(original_db: &TempGroveDb, to_compare: I) - where - R: AsRef<[u8]> + 'a, - I: Iterator + Clone, - { - test_replication_internal(original_db, to_compare.clone(), replicate); - test_replication_internal(original_db, to_compare, replicate_bigger_messages); - } - - #[test] - fn replicate_wrong_root_hash() { - let db = make_test_grovedb(); - let mut bad_hash = db.root_hash(None).unwrap().unwrap(); - bad_hash[0] = bad_hash[0].wrapping_add(1); - - let tmp_dir = TempDir::new().unwrap(); - let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); - let tx = restored_db.start_transaction(); - let mut restorer = Restorer::new(&restored_db, bad_hash, &tx).unwrap(); - let mut chunks = db.chunks(); - assert!(restorer - .process_chunk(chunks.get_chunk([], 0).unwrap()) - .is_err()); - } - - #[test] - fn replicate_provide_wrong_tree() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key1", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let expected_hash = db.root_hash(None).unwrap().unwrap(); - - let tmp_dir = TempDir::new().unwrap(); - let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); - let tx = restored_db.start_transaction(); - let mut restorer = Restorer::new(&restored_db, expected_hash, &tx).unwrap(); - let mut chunks = db.chunks(); - - let next_op = restorer - .process_chunk(chunks.get_chunk([], 0).unwrap()) - .unwrap(); - match next_op { - RestorerResponse::AwaitNextChunk { path, index } => { - // Feed restorer a wrong Merk! - let chunk = if path == [TEST_LEAF] { - chunks.get_chunk([ANOTHER_TEST_LEAF], index).unwrap() - } else { - chunks.get_chunk([TEST_LEAF], index).unwrap() - }; - assert!(restorer.process_chunk(chunk).is_err()); - } - _ => {} - } - } - - #[test] - fn replicate_nested_grovedb() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF], - b"key2", - Element::new_reference(ReferencePathType::SiblingReference(b"key1".to_vec())), - None, - None, - ) - .unwrap() - .expect("should insert reference"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"key3", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2", b"key3"], - b"key4", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - - #[test] - fn replicate_nested_grovedb_with_sum_trees() { - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF], - b"key2", - Element::new_reference(ReferencePathType::SiblingReference(b"key1".to_vec())), - None, - None, - ) - .unwrap() - .expect("should insert reference"); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::empty_sum_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"sumitem", - Element::new_sum_item(15), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2"], - b"key3", - Element::empty_tree(), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[ANOTHER_TEST_LEAF, b"key2", b"key3"], - b"key4", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF, b"key2"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"sumitem"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), - [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - - // TODO: Highlights a bug in replication - #[test] - fn replicate_grovedb_with_sum_tree() { - let db = make_test_grovedb(); - db.insert(&[TEST_LEAF], b"key1", Element::empty_tree(), None, None) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF, b"key1"], - b"key2", - Element::new_item(vec![4]), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - db.insert( - &[TEST_LEAF, b"key1"], - b"key3", - Element::new_item(vec![10]), - None, - None, - ) - .unwrap() - .expect("cannot insert an element"); - - let to_compare = [ - [TEST_LEAF].as_ref(), - [ANOTHER_TEST_LEAF].as_ref(), - [TEST_LEAF, b"key1"].as_ref(), - [TEST_LEAF, b"key1", b"key2"].as_ref(), - [TEST_LEAF, b"key1", b"key3"].as_ref(), - ]; - test_replication(&db, to_compare.into_iter()); - } - - #[test] - fn replicate_a_big_one() { - const HEIGHT: usize = 3; - const SUBTREES_FOR_EACH: usize = 3; - const SCALARS_FOR_EACH: usize = 600; - - let db = make_test_grovedb(); - let mut to_compare = Vec::new(); - - let mut rng = rand::thread_rng(); - let mut subtrees: VecDeque> = VecDeque::new(); - - // Generate root tree leafs - for _ in 0..SUBTREES_FOR_EACH { - let mut bytes = [0; 8]; - rng.fill_bytes(&mut bytes); - db.insert(EMPTY_PATH, &bytes, Element::empty_tree(), None, None) - .unwrap() - .unwrap(); - subtrees.push_front(vec![bytes]); - to_compare.push(vec![bytes]); - } - - while let Some(path) = subtrees.pop_front() { - let mut batch = Vec::new(); - - if path.len() < HEIGHT { - for _ in 0..SUBTREES_FOR_EACH { - let mut bytes = [0; 8]; - rng.fill_bytes(&mut bytes); - - batch.push(GroveDbOp::insert_op( - path.iter().map(|x| x.to_vec()).collect(), - bytes.to_vec(), - Element::empty_tree(), - )); - - let mut new_path = path.clone(); - new_path.push(bytes); - subtrees.push_front(new_path.clone()); - to_compare.push(new_path.clone()); - } - } - - for _ in 0..SCALARS_FOR_EACH { - let mut bytes = [0; 8]; - let mut bytes_val = vec![]; - rng.fill_bytes(&mut bytes); - rng.fill_bytes(&mut bytes_val); - - batch.push(GroveDbOp::insert_op( - path.iter().map(|x| x.to_vec()).collect(), - bytes.to_vec(), - Element::new_item(bytes_val), - )); - - let mut new_path = path.clone(); - new_path.push(bytes); - to_compare.push(new_path.clone()); - } - - db.apply_batch(batch, None, None).unwrap().unwrap(); - } - - test_replication(&db, to_compare.iter().map(|x| x.as_slice())); - } - - #[test] - fn replicate_from_checkpoint() { - // Create a simple GroveDb first - let db = make_test_grovedb(); - db.insert( - &[TEST_LEAF], - b"key1", - Element::new_item(b"ayya".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::new_item(b"ayyb".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - - // Save its state with checkpoint - let checkpoint_dir_parent = TempDir::new().unwrap(); - let checkpoint_dir = checkpoint_dir_parent.path().join("cp"); - db.create_checkpoint(&checkpoint_dir).unwrap(); - - // Alter the db to make difference between current state and checkpoint - db.delete(&[TEST_LEAF], b"key1", None, None) - .unwrap() - .unwrap(); - db.insert( - &[TEST_LEAF], - b"key3", - Element::new_item(b"ayyd".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - db.insert( - &[ANOTHER_TEST_LEAF], - b"key2", - Element::new_item(b"ayyc".to_vec()), - None, - None, - ) - .unwrap() - .unwrap(); - - let checkpoint_db = GroveDb::open(&checkpoint_dir).unwrap(); - - // Ensure checkpoint differs from current state - assert_ne!( - checkpoint_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - db.get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - ); - - // Build a replica from checkpoint - let replica_dir = replicate(&checkpoint_db); - let replica_db = GroveDb::open(&replica_dir).unwrap(); - - assert_eq!( - checkpoint_db.root_hash(None).unwrap().unwrap(), - replica_db.root_hash(None).unwrap().unwrap() - ); - - assert_eq!( - checkpoint_db - .get(&[TEST_LEAF], b"key1", None) - .unwrap() - .unwrap(), - replica_db - .get(&[TEST_LEAF], b"key1", None) - .unwrap() - .unwrap(), - ); - assert_eq!( - checkpoint_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - replica_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - ); - assert!(matches!( - replica_db.get(&[TEST_LEAF], b"key3", None).unwrap(), - Err(Error::PathKeyNotFound(_)) - )); - - // Drop original db and checkpoint dir too to ensure there is no dependency - drop(db); - drop(checkpoint_db); - drop(checkpoint_dir); - - assert_eq!( - replica_db - .get(&[ANOTHER_TEST_LEAF], b"key2", None) - .unwrap() - .unwrap(), - Element::new_item(b"ayyb".to_vec()) - ); - } -} +// // MIT LICENSE +// // +// // Copyright (c) 2021 Dash Core Group +// // +// // Permission is hereby granted, free of charge, to any +// // person obtaining a copy of this software and associated +// // documentation files (the "Software"), to deal in the +// // Software without restriction, including without +// // limitation the rights to use, copy, modify, merge, +// // publish, distribute, sublicense, and/or sell copies of +// // the Software, and to permit persons to whom the Software +// // is furnished to do so, subject to the following +// // conditions: +// // +// // The above copyright notice and this permission notice +// // shall be included in all copies or substantial portions +// // of the Software. +// // +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// // DEALINGS IN THE SOFTWARE. +// +// //! Replication +// +// use std::{ +// collections::VecDeque, +// iter::{empty, once}, +// }; +// +// use grovedb_merk::{ +// proofs::{Node, Op}, +// Merk, TreeFeatureType, +// }; +// use grovedb_path::SubtreePath; +// use grovedb_storage::{ +// rocksdb_storage::{PrefixedRocksDbImmediateStorageContext, PrefixedRocksDbStorageContext}, +// Storage, StorageContext, +// }; +// +// use crate::{Element, Error, GroveDb, Hash, Transaction}; +// +// const OPS_PER_CHUNK: usize = 128; +// +// impl GroveDb { +// /// Creates a chunk producer to replicate GroveDb. +// pub fn chunks(&self) -> SubtreeChunkProducer { +// SubtreeChunkProducer::new(self) +// } +// } +// +// /// Subtree chunks producer. +// pub struct SubtreeChunkProducer<'db> { +// grove_db: &'db GroveDb, +// cache: Option>, +// } +// +// struct SubtreeChunkProducerCache<'db> { +// current_merk_path: Vec>, +// current_merk: Merk>, +// // This needed to be an `Option` because it requires a reference on Merk but it's within the +// // same struct and during struct init a referenced Merk would be moved inside a struct, +// // using `Option` this init happens in two steps. +// current_chunk_producer: +// Option>>, +// } +// +// impl<'db> SubtreeChunkProducer<'db> { +// fn new(storage: &'db GroveDb) -> Self { +// SubtreeChunkProducer { +// grove_db: storage, +// cache: None, +// } +// } +// +// /// Chunks in current producer +// pub fn chunks_in_current_producer(&self) -> usize { +// self.cache +// .as_ref() +// .and_then(|c| c.current_chunk_producer.as_ref().map(|p| p.len())) +// .unwrap_or(0) +// } +// +// /// Get chunk +// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> +// where +// P: IntoIterator, +//

::IntoIter: Clone + DoubleEndedIterator, +// { +// let path_iter = path.into_iter(); +// +// if let Some(SubtreeChunkProducerCache { +// current_merk_path, .. +// }) = &self.cache +// { +// if !itertools::equal(current_merk_path, path_iter.clone()) { +// self.cache = None; +// } +// } +// +// if self.cache.is_none() { +// let current_merk = self +// .grove_db +// .open_non_transactional_merk_at_path( +// path_iter.clone().collect::>().as_slice().into(), +// None, +// ) +// .unwrap()?; +// +// if current_merk.root_key().is_none() { +// return Ok(Vec::new()); +// } +// +// self.cache = Some(SubtreeChunkProducerCache { +// current_merk_path: path_iter.map(|p| p.to_vec()).collect(), +// current_merk, +// current_chunk_producer: None, +// }); +// let cache = self.cache.as_mut().expect("exists at this point"); +// cache.current_chunk_producer = Some( +// grovedb_merk::ChunkProducer::new(&cache.current_merk) +// .map_err(|e| Error::CorruptedData(e.to_string()))?, +// ); +// } +// +// self.cache +// .as_mut() +// .expect("must exist at this point") +// .current_chunk_producer +// .as_mut() +// .expect("must exist at this point") +// .chunk(index) +// .map_err(|e| Error::CorruptedData(e.to_string())) +// } +// } +// +// // TODO: make generic over storage_cost context +// type MerkRestorer<'db> = grovedb_merk::Restorer>; +// +// type Path = Vec>; +// +// /// Structure to drive GroveDb restore process. +// pub struct Restorer<'db> { +// current_merk_restorer: Option>, +// current_merk_chunk_index: usize, +// current_merk_path: Path, +// queue: VecDeque<(Path, Vec, Hash, TreeFeatureType)>, +// grove_db: &'db GroveDb, +// tx: &'db Transaction<'db>, +// } +// +// /// Indicates what next piece of information `Restorer` expects or wraps a +// /// successful result. +// #[derive(Debug)] +// pub enum RestorerResponse { +// AwaitNextChunk { path: Vec>, index: usize }, +// Ready, +// } +// +// #[derive(Debug)] +// pub struct RestorerError(String); +// +// impl<'db> Restorer<'db> { +// /// Create a GroveDb restorer using a backing storage_cost and root hash. +// pub fn new( +// grove_db: &'db GroveDb, +// root_hash: Hash, +// tx: &'db Transaction<'db>, +// ) -> Result { +// Ok(Restorer { +// tx, +// current_merk_restorer: Some(MerkRestorer::new( +// Merk::open_base( +// grove_db +// .db +// .get_immediate_storage_context(SubtreePath::empty(), tx) +// .unwrap(), +// false, +// ) +// .unwrap() +// .map_err(|e| RestorerError(e.to_string()))?, +// None, +// root_hash, +// )), +// current_merk_chunk_index: 0, +// current_merk_path: vec![], +// queue: VecDeque::new(), +// grove_db, +// }) +// } +// +// /// Process next chunk and receive instruction on what to do next. +// pub fn process_chunk( +// &mut self, +// chunk_ops: impl IntoIterator, +// ) -> Result { +// if self.current_merk_restorer.is_none() { +// // Last restorer was consumed and no more Merks to process. +// return Ok(RestorerResponse::Ready); +// } +// // First we decode a chunk to take out info about nested trees to add them into +// // todo list. +// let mut ops = Vec::new(); +// for op in chunk_ops { +// ops.push(op); +// match ops.last().expect("just inserted") { +// Op::Push(Node::KVValueHashFeatureType( +// key, +// value_bytes, +// value_hash, +// feature_type, +// )) +// | Op::PushInverted(Node::KVValueHashFeatureType( +// key, +// value_bytes, +// value_hash, +// feature_type, +// )) => { +// if let Element::Tree(root_key, _) | Element::SumTree(root_key, ..) = +// Element::deserialize(value_bytes) +// .map_err(|e| RestorerError(e.to_string()))? +// { +// if root_key.is_none() || self.current_merk_path.last() == Some(key) { +// // We add only subtrees of the current subtree to queue, skipping +// // itself; Also skipping empty Merks. +// continue; +// } +// let mut path = self.current_merk_path.clone(); +// path.push(key.clone()); +// // The value hash is the root tree hash +// self.queue.push_back(( +// path, +// value_bytes.to_owned(), +// *value_hash, +// *feature_type, +// )); +// } +// } +// _ => {} +// } +// } +// +// // Process chunk using Merk's possibilities. +// let remaining = self +// .current_merk_restorer +// .as_mut() +// .expect("restorer exists at this point") +// .process_chunk(ops) +// .map_err(|e| RestorerError(e.to_string()))?; +// +// self.current_merk_chunk_index += 1; +// +// if remaining == 0 { +// // If no more chunks for this Merk required decide if we're done or take a next +// // Merk to process. +// self.current_merk_restorer +// .take() +// .expect("restorer exists at this point") +// .finalize() +// .map_err(|e| RestorerError(e.to_string()))?; +// if let Some((next_path, combining_value, expected_hash, _)) = self.queue.pop_front() { +// // Process next subtree. +// let merk = self +// .grove_db +// .open_merk_for_replication(next_path.as_slice().into(), self.tx) +// .map_err(|e| RestorerError(e.to_string()))?; +// self.current_merk_restorer = Some(MerkRestorer::new( +// merk, +// Some(combining_value), +// expected_hash, +// )); +// self.current_merk_chunk_index = 0; +// self.current_merk_path = next_path; +// +// Ok(RestorerResponse::AwaitNextChunk { +// path: self.current_merk_path.clone(), +// index: self.current_merk_chunk_index, +// }) +// } else { +// Ok(RestorerResponse::Ready) +// } +// } else { +// // Request a chunk at the same path but with incremented index. +// Ok(RestorerResponse::AwaitNextChunk { +// path: self.current_merk_path.clone(), +// index: self.current_merk_chunk_index, +// }) +// } +// } +// } +// +// /// Chunk producer wrapper which uses bigger messages that may include chunks of +// /// requested subtree with its right siblings. +// /// +// /// Because `Restorer` builds GroveDb replica breadth-first way from top to +// /// bottom it makes sense to send a subtree's siblings next instead of its own +// /// subtrees. +// pub struct SiblingsChunkProducer<'db> { +// chunk_producer: SubtreeChunkProducer<'db>, +// } +// +// #[derive(Debug)] +// pub struct GroveChunk { +// subtree_chunks: Vec<(usize, Vec)>, +// } +// +// impl<'db> SiblingsChunkProducer<'db> { +// /// New +// pub fn new(chunk_producer: SubtreeChunkProducer<'db>) -> Self { +// SiblingsChunkProducer { chunk_producer } +// } +// +// /// Get a collection of chunks possibly from different Merks with the first +// /// one as requested. +// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> +// where +// P: IntoIterator, +//

::IntoIter: Clone + DoubleEndedIterator + ExactSizeIterator, +// { +// let path_iter = path.into_iter(); +// let mut result = Vec::new(); +// let mut ops_count = 0; +// +// if path_iter.len() == 0 { +// // We're at the root of GroveDb, no siblings here. +// self.process_subtree_chunks(&mut result, &mut ops_count, empty(), index)?; +// return Ok(result); +// }; +// +// // Get siblings on the right to send chunks of multiple Merks if it meets the +// // limit. +// +// let mut siblings_keys: VecDeque> = VecDeque::new(); +// +// let mut parent_path = path_iter; +// let requested_key = parent_path.next_back(); +// +// let parent_ctx = self +// .chunk_producer +// .grove_db +// .db +// .get_storage_context( +// parent_path.clone().collect::>().as_slice().into(), +// None, +// ) +// .unwrap(); +// let mut siblings_iter = Element::iterator(parent_ctx.raw_iter()).unwrap(); +// +// if let Some(key) = requested_key { +// siblings_iter.fast_forward(key)?; +// } +// +// while let Some(element) = siblings_iter.next_element().unwrap()? { +// if let (key, Element::Tree(..)) | (key, Element::SumTree(..)) = element { +// siblings_keys.push_back(key); +// } +// } +// +// let mut current_index = index; +// // Process each subtree +// while let Some(subtree_key) = siblings_keys.pop_front() { +// #[allow(clippy::map_identity)] +// let subtree_path = parent_path +// .clone() +// .map(|x| x) +// .chain(once(subtree_key.as_slice())); +// +// self.process_subtree_chunks(&mut result, &mut ops_count, subtree_path, current_index)?; +// // Going to a next sibling, should start from 0. +// +// if ops_count >= OPS_PER_CHUNK { +// break; +// } +// current_index = 0; +// } +// +// Ok(result) +// } +// +// /// Process one subtree's chunks +// fn process_subtree_chunks<'p, P>( +// &mut self, +// result: &mut Vec, +// ops_count: &mut usize, +// subtree_path: P, +// from_index: usize, +// ) -> Result<(), Error> +// where +// P: IntoIterator, +//

::IntoIter: Clone + DoubleEndedIterator, +// { +// let path_iter = subtree_path.into_iter(); +// +// let mut current_index = from_index; +// let mut subtree_chunks = Vec::new(); +// +// loop { +// let ops = self +// .chunk_producer +// .get_chunk(path_iter.clone(), current_index)?; +// +// *ops_count += ops.len(); +// subtree_chunks.push((current_index, ops)); +// current_index += 1; +// if current_index >= self.chunk_producer.chunks_in_current_producer() +// || *ops_count >= OPS_PER_CHUNK +// { +// break; +// } +// } +// +// result.push(GroveChunk { subtree_chunks }); +// +// Ok(()) +// } +// } +// +// /// `Restorer` wrapper that applies multiple chunks at once and eventually +// /// returns less requests. It is named by analogy with IO types that do less +// /// syscalls. +// pub struct BufferedRestorer<'db> { +// restorer: Restorer<'db>, +// } +// +// impl<'db> BufferedRestorer<'db> { +// /// New +// pub fn new(restorer: Restorer<'db>) -> Self { +// BufferedRestorer { restorer } +// } +// +// /// Process next chunk and receive instruction on what to do next. +// pub fn process_grove_chunks(&mut self, chunks: I) -> Result +// where +// I: IntoIterator + ExactSizeIterator, +// { +// let mut response = RestorerResponse::Ready; +// +// for c in chunks.into_iter() { +// for ops in c.subtree_chunks.into_iter().map(|x| x.1) { +// if !ops.is_empty() { +// response = self.restorer.process_chunk(ops)?; +// } +// } +// } +// +// Ok(response) +// } +// } +// +// // #[cfg(test)] +// // mod test { +// // use rand::RngCore; +// // use tempfile::TempDir; +// // +// // use super::*; +// // use crate::{ +// // batch::GroveDbOp, +// // reference_path::ReferencePathType, +// // tests::{common::EMPTY_PATH, make_test_grovedb, TempGroveDb, +// // ANOTHER_TEST_LEAF, TEST_LEAF}, }; +// // +// // fn replicate(original_db: &GroveDb) -> TempDir { +// // let replica_tempdir = TempDir::new().unwrap(); +// // +// // { +// // let replica_db = GroveDb::open(replica_tempdir.path()).unwrap(); +// // let mut chunk_producer = original_db.chunks(); +// // let tx = replica_db.start_transaction(); +// // +// // let mut restorer = Restorer::new( +// // &replica_db, +// // original_db.root_hash(None).unwrap().unwrap(), +// // &tx, +// // ) +// // .expect("cannot create restorer"); +// // +// // That means root tree chunk with index 0 +// // let mut next_chunk: (Vec>, usize) = (vec![], 0); +// // +// // loop { +// // let chunk = chunk_producer +// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) +// // .expect("cannot get next chunk"); +// // match restorer.process_chunk(chunk).expect("cannot process chunk") { +// // RestorerResponse::Ready => break, +// // RestorerResponse::AwaitNextChunk { path, index } => { +// // next_chunk = (path, index); +// // } +// // } +// // } +// // +// // replica_db.commit_transaction(tx).unwrap().unwrap(); +// // } +// // replica_tempdir +// // } +// // +// // fn replicate_bigger_messages(original_db: &GroveDb) -> TempDir { +// // let replica_tempdir = TempDir::new().unwrap(); +// // +// // { +// // let replica_grove_db = GroveDb::open(replica_tempdir.path()).unwrap(); +// // let mut chunk_producer = SiblingsChunkProducer::new(original_db.chunks()); +// // let tx = replica_grove_db.start_transaction(); +// // +// // let mut restorer = BufferedRestorer::new( +// // Restorer::new( +// // &replica_grove_db, +// // original_db.root_hash(None).unwrap().unwrap(), +// // &tx, +// // ) +// // .expect("cannot create restorer"), +// // ); +// // +// // That means root tree chunk with index 0 +// // let mut next_chunk: (Vec>, usize) = (vec![], 0); +// // +// // loop { +// // let chunks = chunk_producer +// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) +// // .expect("cannot get next chunk"); +// // match restorer +// // .process_grove_chunks(chunks.into_iter()) +// // .expect("cannot process chunk") +// // { +// // RestorerResponse::Ready => break, +// // RestorerResponse::AwaitNextChunk { path, index } => { +// // next_chunk = (path, index); +// // } +// // } +// // } +// // +// // replica_grove_db.commit_transaction(tx).unwrap().unwrap(); +// // } +// // +// // replica_tempdir +// // } +// // +// // fn test_replication_internal<'a, I, R, F>( +// // original_db: &TempGroveDb, +// // to_compare: I, +// // replicate_fn: F, +// // ) where +// // R: AsRef<[u8]> + 'a, +// // I: Iterator, +// // F: Fn(&GroveDb) -> TempDir, +// // { +// // let expected_root_hash = original_db.root_hash(None).unwrap().unwrap(); +// // +// // let replica_tempdir = replicate_fn(original_db); +// // +// // let replica = GroveDb::open(replica_tempdir.path()).unwrap(); +// // assert_eq!( +// // replica.root_hash(None).unwrap().unwrap(), +// // expected_root_hash +// // ); +// // +// // for full_path in to_compare { +// // let (key, path) = full_path.split_last().unwrap(); +// // assert_eq!( +// // original_db.get(path, key.as_ref(), None).unwrap().unwrap(), +// // replica.get(path, key.as_ref(), None).unwrap().unwrap() +// // ); +// // } +// // } +// // +// // fn test_replication<'a, I, R>(original_db: &TempGroveDb, to_compare: I) +// // where +// // R: AsRef<[u8]> + 'a, +// // I: Iterator + Clone, +// // { +// // test_replication_internal(original_db, to_compare.clone(), replicate); +// // test_replication_internal(original_db, to_compare, +// // replicate_bigger_messages); } +// // +// // #[test] +// // fn replicate_wrong_root_hash() { +// // let db = make_test_grovedb(); +// // let mut bad_hash = db.root_hash(None).unwrap().unwrap(); +// // bad_hash[0] = bad_hash[0].wrapping_add(1); +// // +// // let tmp_dir = TempDir::new().unwrap(); +// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); +// // let tx = restored_db.start_transaction(); +// // let mut restorer = Restorer::new(&restored_db, bad_hash, &tx).unwrap(); +// // let mut chunks = db.chunks(); +// // assert!(restorer +// // .process_chunk(chunks.get_chunk([], 0).unwrap()) +// // .is_err()); +// // } +// // +// // #[test] +// // fn replicate_provide_wrong_tree() { +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let expected_hash = db.root_hash(None).unwrap().unwrap(); +// // +// // let tmp_dir = TempDir::new().unwrap(); +// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); +// // let tx = restored_db.start_transaction(); +// // let mut restorer = Restorer::new(&restored_db, expected_hash, &tx).unwrap(); +// // let mut chunks = db.chunks(); +// // +// // let next_op = restorer +// // .process_chunk(chunks.get_chunk([], 0).unwrap()) +// // .unwrap(); +// // match next_op { +// // RestorerResponse::AwaitNextChunk { path, index } => { +// // Feed restorer a wrong Merk! +// // let chunk = if path == [TEST_LEAF] { +// // chunks.get_chunk([ANOTHER_TEST_LEAF], index).unwrap() +// // } else { +// // chunks.get_chunk([TEST_LEAF], index).unwrap() +// // }; +// // assert!(restorer.process_chunk(chunk).is_err()); +// // } +// // _ => {} +// // } +// // } +// // +// // #[test] +// // fn replicate_nested_grovedb() { +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF], +// // b"key2", +// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". +// // to_vec())), None, +// // None, +// // ) +// // .unwrap() +// // .expect("should insert reference"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::empty_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2"], +// // b"key3", +// // Element::empty_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], +// // b"key4", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let to_compare = [ +// // [TEST_LEAF].as_ref(), +// // [TEST_LEAF, b"key1"].as_ref(), +// // [TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), +// // ]; +// // test_replication(&db, to_compare.into_iter()); +// // } +// // +// // #[test] +// // fn replicate_nested_grovedb_with_sum_trees() { +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF], +// // b"key2", +// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". +// // to_vec())), None, +// // None, +// // ) +// // .unwrap() +// // .expect("should insert reference"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::empty_sum_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2"], +// // b"sumitem", +// // Element::new_sum_item(15), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2"], +// // b"key3", +// // Element::empty_tree(), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], +// // b"key4", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let to_compare = [ +// // [TEST_LEAF].as_ref(), +// // [TEST_LEAF, b"key1"].as_ref(), +// // [TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"sumitem"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), +// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), +// // ]; +// // test_replication(&db, to_compare.into_iter()); +// // } +// // +// // TODO: Highlights a bug in replication +// // #[test] +// // fn replicate_grovedb_with_sum_tree() { +// // let db = make_test_grovedb(); +// // db.insert(&[TEST_LEAF], b"key1", Element::empty_tree(), None, None) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF, b"key1"], +// // b"key2", +// // Element::new_item(vec![4]), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // db.insert( +// // &[TEST_LEAF, b"key1"], +// // b"key3", +// // Element::new_item(vec![10]), +// // None, +// // None, +// // ) +// // .unwrap() +// // .expect("cannot insert an element"); +// // +// // let to_compare = [ +// // [TEST_LEAF].as_ref(), +// // [ANOTHER_TEST_LEAF].as_ref(), +// // [TEST_LEAF, b"key1"].as_ref(), +// // [TEST_LEAF, b"key1", b"key2"].as_ref(), +// // [TEST_LEAF, b"key1", b"key3"].as_ref(), +// // ]; +// // test_replication(&db, to_compare.into_iter()); +// // } +// // +// // #[test] +// // fn replicate_a_big_one() { +// // const HEIGHT: usize = 3; +// // const SUBTREES_FOR_EACH: usize = 3; +// // const SCALARS_FOR_EACH: usize = 600; +// // +// // let db = make_test_grovedb(); +// // let mut to_compare = Vec::new(); +// // +// // let mut rng = rand::thread_rng(); +// // let mut subtrees: VecDeque> = VecDeque::new(); +// // +// // Generate root tree leafs +// // for _ in 0..SUBTREES_FOR_EACH { +// // let mut bytes = [0; 8]; +// // rng.fill_bytes(&mut bytes); +// // db.insert(EMPTY_PATH, &bytes, Element::empty_tree(), None, None) +// // .unwrap() +// // .unwrap(); +// // subtrees.push_front(vec![bytes]); +// // to_compare.push(vec![bytes]); +// // } +// // +// // while let Some(path) = subtrees.pop_front() { +// // let mut batch = Vec::new(); +// // +// // if path.len() < HEIGHT { +// // for _ in 0..SUBTREES_FOR_EACH { +// // let mut bytes = [0; 8]; +// // rng.fill_bytes(&mut bytes); +// // +// // batch.push(GroveDbOp::insert_op( +// // path.iter().map(|x| x.to_vec()).collect(), +// // bytes.to_vec(), +// // Element::empty_tree(), +// // )); +// // +// // let mut new_path = path.clone(); +// // new_path.push(bytes); +// // subtrees.push_front(new_path.clone()); +// // to_compare.push(new_path.clone()); +// // } +// // } +// // +// // for _ in 0..SCALARS_FOR_EACH { +// // let mut bytes = [0; 8]; +// // let mut bytes_val = vec![]; +// // rng.fill_bytes(&mut bytes); +// // rng.fill_bytes(&mut bytes_val); +// // +// // batch.push(GroveDbOp::insert_op( +// // path.iter().map(|x| x.to_vec()).collect(), +// // bytes.to_vec(), +// // Element::new_item(bytes_val), +// // )); +// // +// // let mut new_path = path.clone(); +// // new_path.push(bytes); +// // to_compare.push(new_path.clone()); +// // } +// // +// // db.apply_batch(batch, None, None).unwrap().unwrap(); +// // } +// // +// // test_replication(&db, to_compare.iter().map(|x| x.as_slice())); +// // } +// // +// // #[test] +// // fn replicate_from_checkpoint() { +// // Create a simple GroveDb first +// // let db = make_test_grovedb(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key1", +// // Element::new_item(b"ayya".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::new_item(b"ayyb".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // +// // Save its state with checkpoint +// // let checkpoint_dir_parent = TempDir::new().unwrap(); +// // let checkpoint_dir = checkpoint_dir_parent.path().join("cp"); +// // db.create_checkpoint(&checkpoint_dir).unwrap(); +// // +// // Alter the db to make difference between current state and checkpoint +// // db.delete(&[TEST_LEAF], b"key1", None, None) +// // .unwrap() +// // .unwrap(); +// // db.insert( +// // &[TEST_LEAF], +// // b"key3", +// // Element::new_item(b"ayyd".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // db.insert( +// // &[ANOTHER_TEST_LEAF], +// // b"key2", +// // Element::new_item(b"ayyc".to_vec()), +// // None, +// // None, +// // ) +// // .unwrap() +// // .unwrap(); +// // +// // let checkpoint_db = GroveDb::open(&checkpoint_dir).unwrap(); +// // +// // Ensure checkpoint differs from current state +// // assert_ne!( +// // checkpoint_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // db.get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // ); +// // +// // Build a replica from checkpoint +// // let replica_dir = replicate(&checkpoint_db); +// // let replica_db = GroveDb::open(&replica_dir).unwrap(); +// // +// // assert_eq!( +// // checkpoint_db.root_hash(None).unwrap().unwrap(), +// // replica_db.root_hash(None).unwrap().unwrap() +// // ); +// // +// // assert_eq!( +// // checkpoint_db +// // .get(&[TEST_LEAF], b"key1", None) +// // .unwrap() +// // .unwrap(), +// // replica_db +// // .get(&[TEST_LEAF], b"key1", None) +// // .unwrap() +// // .unwrap(), +// // ); +// // assert_eq!( +// // checkpoint_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // replica_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // ); +// // assert!(matches!( +// // replica_db.get(&[TEST_LEAF], b"key3", None).unwrap(), +// // Err(Error::PathKeyNotFound(_)) +// // )); +// // +// // Drop original db and checkpoint dir too to ensure there is no dependency +// // drop(db); +// // drop(checkpoint_db); +// // drop(checkpoint_dir); +// // +// // assert_eq!( +// // replica_db +// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) +// // .unwrap() +// // .unwrap(), +// // Element::new_item(b"ayyb".to_vec()) +// // ); +// // } +// // } diff --git a/merk/src/error.rs b/merk/src/error.rs index 96717391..7581ba6f 100644 --- a/merk/src/error.rs +++ b/merk/src/error.rs @@ -59,6 +59,10 @@ pub enum Error { #[error("corrupted code execution error {0}")] CorruptedCodeExecution(&'static str), + /// Corrupted state + #[error("corrupted state: {0}")] + CorruptedState(&'static str), + /// Chunking error #[error("chunking error {0}")] ChunkingError(ChunkError), diff --git a/merk/src/lib.rs b/merk/src/lib.rs index b780b6f4..5a858dfc 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -38,7 +38,7 @@ extern crate core; mod merk; #[cfg(feature = "full")] -pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions, restore::Restorer}; +pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions}; /// Provides a container type that allows temporarily taking ownership of a /// value. diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 0df9655a..51521ced 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -26,475 +26,1047 @@ // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. -//! Provides `ChunkProducer`, which creates chunk proofs for full replication of -//! a Merk. +use std::{ + cmp::max, + collections::{LinkedList, VecDeque}, + path::Iter, +}; -#[cfg(feature = "full")] -use grovedb_costs::CostsExt; -#[cfg(feature = "full")] -use grovedb_storage::{RawIterator, StorageContext}; +use ed::Encode; +use grovedb_costs::{CostResult, CostsExt, OperationCost}; +use grovedb_storage::StorageContext; +use integer_encoding::VarInt; -#[cfg(feature = "full")] -use super::Merk; -#[cfg(feature = "full")] use crate::{ error::Error, - proofs::{chunk::get_next_chunk, Node, Op}, + proofs::{ + chunk::{ + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + chunk_height, chunk_id_from_traversal_instruction, + chunk_id_from_traversal_instruction_with_recovery, generate_traversal_instruction, + generate_traversal_instruction_as_string, number_of_chunks, + string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + }, + }, + Node, Op, + }, + tree::RefWalker, + Error::ChunkingError, + Merk, PanicSource, }; -#[cfg(feature = "full")] +/// ChunkProof for replication of a single subtree +#[derive(Debug)] +pub struct SubtreeChunk { + chunk: Vec, + next_index: Option, + remaining_limit: Option, +} + +impl SubtreeChunk { + pub fn new(chunk: Vec, next_index: Option, remaining_limit: Option) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + +/// ChunkProof for the replication of multiple subtrees. +#[derive(Debug)] +pub struct MultiChunk { + pub chunk: Vec, + pub next_index: Option, + pub remaining_limit: Option, +} + +impl MultiChunk { + pub fn new( + chunk: Vec, + next_index: Option, + remaining_limit: Option, + ) -> Self { + Self { + chunk, + next_index, + remaining_limit, + } + } +} + /// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly /// replicating entire Merk trees. Chunks can be generated on the fly in a /// random order, or iterated in order for slightly better performance. -pub struct ChunkProducer<'db, S: StorageContext<'db>> { - trunk: Vec, - chunk_boundaries: Vec>, - raw_iter: S::RawIterator, +pub struct ChunkProducer<'db, S> { + /// Represents the max height of the Merk tree + height: usize, + /// Represents the index of the next chunk index: usize, + merk: &'db Merk, } -#[cfg(feature = "full")] impl<'db, S> ChunkProducer<'db, S> where S: StorageContext<'db>, { - /// Creates a new `ChunkProducer` for the given `Merk` instance. In the - /// constructor, the first chunk (the "trunk") will be created. - pub fn new(merk: &Merk) -> Result { - let (trunk, has_more) = merk - .walk(|maybe_walker| match maybe_walker { - Some(mut walker) => walker.create_trunk_proof(), - None => Ok((vec![], false)).wrap_with_cost(Default::default()), - }) - .unwrap()?; - - let chunk_boundaries = if has_more { - trunk - .iter() - .filter_map(|op| match op { - Op::Push(Node::KVValueHashFeatureType(key, ..)) => Some(key.clone()), - _ => None, - }) - .collect() - } else { - vec![] - }; - - let mut raw_iter = merk.storage.raw_iter(); - raw_iter.seek_to_first().unwrap(); - - Ok(ChunkProducer { - trunk, - chunk_boundaries, - raw_iter, - index: 0, + /// Creates a new `ChunkProducer` for the given `Merk` instance + pub(crate) fn new(merk: &'db Merk) -> Result { + let tree_height = merk + .height() + .ok_or(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + )))?; + Ok(Self { + height: tree_height as usize, + index: 1, + merk, }) } /// Gets the chunk with the given index. Errors if the index is out of /// bounds or the tree is empty - the number of chunks can be checked by /// calling `producer.len()`. - pub fn chunk(&mut self, index: usize) -> Result, Error> { - if index >= self.len() { - return Err(Error::OldChunkingError("Chunk index out-of-bounds")); + pub fn chunk_with_index( + &mut self, + chunk_index: usize, + ) -> Result<(Vec, Option), Error> { + let traversal_instructions = generate_traversal_instruction(self.height, chunk_index)?; + self.chunk_internal(chunk_index, traversal_instructions) + } + + /// Returns the chunk at a given chunk id. + pub fn chunk(&mut self, chunk_id: &str) -> Result<(Vec, Option), Error> { + let traversal_instructions = string_as_traversal_instruction(chunk_id)?; + let chunk_index = chunk_id_from_traversal_instruction_with_recovery( + traversal_instructions.as_slice(), + self.height, + )?; + let (chunk, next_index) = self.chunk_internal(chunk_index, traversal_instructions)?; + let index_string = next_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose()?; + Ok((chunk, index_string)) + } + + /// Returns the chunk at the given index + /// Assumes index and traversal_instructions represents the same information + fn chunk_internal( + &mut self, + index: usize, + traversal_instructions: Vec, + ) -> Result<(Vec, Option), Error> { + // ensure that the chunk index is within bounds + let max_chunk_index = self.len(); + if index < 1 || index > max_chunk_index { + return Err(ChunkingError(ChunkError::OutOfBounds( + "chunk index out of bounds", + ))); } - self.index = index; + self.index = index + 1; - if index == 0 || index == 1 { - self.raw_iter.seek_to_first().unwrap(); + let chunk_height = chunk_height(self.height, index).unwrap(); + + let chunk = self.merk.walk(|maybe_walker| match maybe_walker { + Some(mut walker) => { + walker.traverse_and_build_chunk(&traversal_instructions, chunk_height) + } + None => Err(Error::ChunkingError(ChunkError::EmptyTree( + "cannot create chunk producer for empty Merk", + ))), + })?; + + // now we need to return the next index + // how do we know if we should return some or none + if self.index > max_chunk_index { + Ok((chunk, None)) } else { - let preceding_key = self.chunk_boundaries.get(index - 2).unwrap(); - self.raw_iter.seek(preceding_key).unwrap(); - self.raw_iter.next().unwrap(); + Ok((chunk, Some(self.index))) } + } - self.next_chunk() + /// Generate multichunk with chunk id + /// Multichunks accumulate as many chunks as they can until they have all + /// chunks or hit some optional limit + pub fn multi_chunk_with_limit( + &mut self, + chunk_id: &str, + limit: Option, + ) -> Result { + // we want to convert the chunk id to the index + let chunk_index = string_as_traversal_instruction(chunk_id).and_then(|instruction| { + chunk_id_from_traversal_instruction(instruction.as_slice(), self.height) + })?; + self.multi_chunk_with_limit_and_index(chunk_index, limit) } - /// Returns the total number of chunks for the underlying Merk tree. - #[allow(clippy::len_without_is_empty)] - pub fn len(&self) -> usize { - let boundaries_len = self.chunk_boundaries.len(); - if boundaries_len == 0 { - 1 - } else { - boundaries_len + 2 + /// Generate multichunk with chunk index + /// Multichunks accumulate as many chunks as they can until they have all + /// chunks or hit some optional limit + pub fn multi_chunk_with_limit_and_index( + &mut self, + index: usize, + limit: Option, + ) -> Result { + // TODO: what happens if the vec is filled? + // we need to have some kind of hardhoc limit value if none is supplied. + // maybe we can just do something with the length to fix this? + let mut chunk = vec![]; + + let mut current_index = Some(index); + let mut current_limit = limit; + + // generate as many subtree chunks as we can + // until we have exhausted all or hit a limit restriction + while current_index != None { + let current_index_traversal_instruction = generate_traversal_instruction( + self.height, + current_index.expect("confirmed is Some"), + )?; + let chunk_id_op = ChunkOp::ChunkId(current_index_traversal_instruction); + + // factor in the ChunkId encoding length in limit calculations + let temp_limit = if let Some(limit) = current_limit { + let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) + })?; + if limit >= chunk_id_op_encoding_len { + Some(limit - chunk_id_op_encoding_len) + } else { + Some(0) + } + } else { + None + }; + + let subtree_multi_chunk_result = self.subtree_multi_chunk_with_limit( + current_index.expect("confirmed is not None"), + temp_limit, + ); + + let limit_too_small_error = matches!( + subtree_multi_chunk_result, + Err(ChunkingError(ChunkError::LimitTooSmall(..))) + ); + + if limit_too_small_error { + if chunk.is_empty() { + // no progress, return limit too small error + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); + } else { + // made progress, send accumulated chunk + break; + } + } + + let subtree_multi_chunk = subtree_multi_chunk_result?; + + chunk.push(chunk_id_op); + chunk.push(ChunkOp::Chunk(subtree_multi_chunk.chunk)); + + // update loop parameters + current_index = subtree_multi_chunk.next_index; + current_limit = subtree_multi_chunk.remaining_limit; } + + let index_string = current_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose()?; + + Ok(MultiChunk::new(chunk, index_string, current_limit)) } - /// Gets the next chunk based on the `ChunkProducer`'s internal index state. - /// This is mostly useful for letting `ChunkIter` yield the chunks in order, - /// optimizing throughput compared to random access. - fn next_chunk(&mut self) -> Result, Error> { - if self.index == 0 { - if self.trunk.is_empty() { - return Err(Error::OldChunkingError( - "Attempted to fetch chunk on empty tree", - )); + /// Packs as many chunks as it can from a starting chunk index, into a + /// vector. Stops when we have exhausted all chunks or we have reached + /// some limit. + fn subtree_multi_chunk_with_limit( + &mut self, + index: usize, + limit: Option, + ) -> Result { + let mut chunk_byte_length = 0; + + let max_chunk_index = number_of_chunks(self.height); + let mut chunk_index = index; + + // we first get the chunk at the given index + // TODO: use the returned chunk index rather than tracking + let (chunk_ops, _) = self.chunk_with_index(chunk_index)?; + chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + chunk_index += 1; + + let mut chunk = VecDeque::from(chunk_ops); + + // ensure the limit is not less than first chunk byte length + // if it is we can't proceed and didn't make progress so we return an error + if let Some(limit) = limit { + if chunk_byte_length > limit { + return Err(Error::ChunkingError(ChunkError::LimitTooSmall( + "limit too small for initial chunk", + ))); } - self.index += 1; - return Ok(self.trunk.clone()); } - if self.index >= self.len() { - panic!("Called next_chunk after end"); + let mut iteration_index = 0; + while iteration_index < chunk.len() { + // we only perform replacements on Hash nodes + if matches!(chunk[iteration_index], Op::Push(Node::Hash(..))) { + // TODO: use the returned chunk index rather than tracking + let (replacement_chunk, _) = self.chunk_with_index(chunk_index)?; + + // calculate the new total + let new_total = replacement_chunk.encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })? + chunk_byte_length + - chunk[iteration_index].encoding_length().map_err(|e| { + Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) + })?; + + // verify that this chunk doesn't make use exceed the limit + if let Some(limit) = limit { + if new_total > limit { + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; + + return Ok(SubtreeChunk::new( + chunk.into(), + next_index, + Some(limit - chunk_byte_length), + )); + } + } + + chunk_byte_length = new_total; + chunk_index += 1; + + chunk.remove(iteration_index); + for op in replacement_chunk.into_iter().rev() { + chunk.insert(iteration_index, op); + } + } else { + iteration_index += 1; + } } - let end_key = self.chunk_boundaries.get(self.index - 1); - let end_key_slice = end_key.as_ref().map(|k| k.as_slice()); + let remaining_limit = limit.map(|l| l - chunk_byte_length); + let next_index = match chunk_index > max_chunk_index { + true => None, + _ => Some(chunk_index), + }; - self.index += 1; + Ok(SubtreeChunk::new(chunk.into(), next_index, remaining_limit)) + } - get_next_chunk(&mut self.raw_iter, end_key_slice).unwrap() + /// Returns the total number of chunks for the underlying Merk tree. + pub fn len(&self) -> usize { + number_of_chunks(self.height as usize) } -} -#[cfg(feature = "full")] -impl<'db, S> IntoIterator for ChunkProducer<'db, S> -where - S: StorageContext<'db>, -{ - type IntoIter = ChunkIter<'db, S>; - type Item = as Iterator>::Item; + /// Gets the next chunk based on the `ChunkProducer`'s internal index state. + /// This is mostly useful for letting `ChunkIter` yield the chunks in order, + /// optimizing throughput compared to random access. + // TODO: this is not better than random access, as we are not keeping state + // that will make this more efficient, decide if this should be fixed or not + fn next_chunk(&mut self) -> Option, Option), Error>> { + let max_index = number_of_chunks(self.height); + if self.index > max_index { + return None; + } + + // get the chunk at the given index + // return the next index as a string + Some( + self.chunk_with_index(self.index) + .and_then(|(chunk, chunk_index)| { + chunk_index + .map(|index| generate_traversal_instruction_as_string(self.height, index)) + .transpose() + .and_then(|v| Ok((chunk, v))) + }), + ) + } - fn into_iter(self) -> Self::IntoIter { - ChunkIter(self) + // TODO: test this logic out + fn get_chunk_encoding_length(chunk: &[Op]) -> usize { + // TODO: deal with error + chunk + .iter() + .fold(0, |sum, op| sum + op.encoding_length().unwrap()) } } -#[cfg(feature = "full")] -/// A `ChunkIter` iterates through all the chunks for the underlying `Merk` -/// instance in order (the first chunk is the "trunk" chunk). Yields `None` -/// after all chunks have been yielded. -pub struct ChunkIter<'db, S>(ChunkProducer<'db, S>) -where - S: StorageContext<'db>; - -#[cfg(feature = "full")] -impl<'db, S> Iterator for ChunkIter<'db, S> +/// Iterate over each chunk, returning `None` after last chunk +impl<'db, S> Iterator for ChunkProducer<'db, S> where S: StorageContext<'db>, { - type Item = Result, Error>; - - fn size_hint(&self) -> (usize, Option) { - (self.0.len(), Some(self.0.len())) - } + type Item = Result<(Vec, Option), Error>; fn next(&mut self) -> Option { - if self.0.index >= self.0.len() { - None - } else { - Some(self.0.next_chunk()) - } + self.next_chunk() } } -#[cfg(feature = "full")] impl<'db, S> Merk where S: StorageContext<'db>, { /// Creates a `ChunkProducer` which can return chunk proofs for replicating /// the entire Merk tree. - pub fn chunks_old(&self) -> Result, Error> { + pub fn chunks(&'db self) -> Result, Error> { ChunkProducer::new(self) } } -#[cfg(feature = "full")] #[cfg(test)] -mod tests { - use grovedb_path::SubtreePath; - use grovedb_storage::{rocksdb_storage::RocksDbStorage, Storage, StorageBatch}; - use tempfile::TempDir; - +mod test { use super::*; use crate::{ - proofs::chunk::{verify_leaf, verify_trunk}, - test_utils::*, + proofs::{ + chunk::chunk::{ + tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, + LEFT, RIGHT, + }, + tree::execute, + Tree, + }, + test_utils::{make_batch_seq, TempMerk}, }; - #[test] - fn len_small() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..256); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + #[derive(Default)] + struct NodeCounts { + hash: usize, + kv_hash: usize, + kv: usize, + kv_value_hash: usize, + kv_digest: usize, + kv_ref_value_hash: usize, + kv_value_hash_feature_type: usize, + } - let chunks = merk.chunks_old().unwrap(); - assert_eq!(chunks.len(), 1); - assert_eq!(chunks.into_iter().size_hint().0, 1); + impl NodeCounts { + fn sum(&self) -> usize { + return self.hash + + self.kv_hash + + self.kv + + self.kv_value_hash + + self.kv_digest + + self.kv_ref_value_hash + + self.kv_value_hash_feature_type; + } + } + + fn count_node_types(tree: Tree) -> NodeCounts { + let mut counts = NodeCounts::default(); + + tree.visit_nodes(&mut |node| { + match node { + Node::Hash(_) => counts.hash += 1, + Node::KVHash(_) => counts.kv_hash += 1, + Node::KV(..) => counts.kv += 1, + Node::KVValueHash(..) => counts.kv_value_hash += 1, + Node::KVDigest(..) => counts.kv_digest += 1, + Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, + Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, + }; + }); + + counts } #[test] - fn len_big() { + fn test_merk_chunk_len() { + // Tree of height 5 - max of 31 elements, min of 16 elements + // 5 will be broken into 2 layers = [3, 2] + // exit nodes from first layer = 2^3 = 8 + // total_chunk = 1 + 8 = 9 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..10_000); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + let batch = make_batch_seq(0..20); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(5)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 9); - let chunks = merk.chunks_old().unwrap(); - assert_eq!(chunks.len(), 129); - assert_eq!(chunks.into_iter().size_hint().0, 129); + // Tree of height 10 - max of 1023 elements, min of 512 elements + // 4 layers -> [3,3,2,2] + // chunk_count_per_layer -> [1, 8, 64, 256] + // total = 341 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..1000); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(10)); + let chunk_producer = ChunkProducer::new(&merk).unwrap(); + assert_eq!(chunk_producer.len(), 329); } #[test] - fn generate_and_verify_chunks() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..10_000); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); - merk.commit(); + fn test_chunk_producer_iter() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks - let mut chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let chunk = chunks.next().unwrap(); - let (trunk, height) = verify_trunk(chunk.into_iter().map(Ok)).unwrap().unwrap(); - assert_eq!(height, 14); - assert_eq!(trunk.hash().unwrap(), merk.root_hash().unwrap()); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - assert_eq!(trunk.layer(7).count(), 128); + // build iterator from first chunk producer + let mut chunks = merk.chunks().expect("should return producer"); - for (ops, node) in chunks.zip(trunk.layer(height / 2)) { - verify_leaf(ops.into_iter().map(Ok), node.hash().unwrap()) - .unwrap() - .unwrap(); + // ensure that the chunks gotten from the iterator is the same + // as that from the chunk producer + for i in 1..=5 { + assert_eq!( + chunks.next().unwrap().unwrap().0, + chunk_producer.chunk_with_index(i).unwrap().0 + ); } + + // returns None after max + assert_eq!(chunks.next().is_none(), true); } #[test] - fn chunks_from_reopen() { - let tmp_dir = TempDir::new().expect("cannot create tempdir"); - let original_chunks = { - let storage = RocksDbStorage::default_rocksdb_with_path(tmp_dir.path()) - .expect("cannot open rocksdb storage"); - let batch = StorageBatch::new(); - let mut merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), Some(&batch)) - .unwrap(), - false, - ) + fn test_random_chunk_access() { + // tree with height 4 + // full tree + // 7 + // / \ + // 3 11 + // / \ / \ + // 1 5 9 13 + // / \ / \ / \ / \ + // 0 2 4 6 8 10 12 14 + // going to be broken into [2, 2] + // that's a total of 5 chunks + + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) .unwrap() - .unwrap(); - let merk_batch = make_batch_seq(1..10); - merk.apply::<_, Vec<_>>(&merk_batch, &[], None) - .unwrap() - .unwrap(); + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - storage - .commit_multi_context_batch(batch, None) - .unwrap() - .expect("cannot commit batch"); + let mut inner_tree = merk.tree.take().expect("has inner tree"); + merk.tree.set(Some(inner_tree.clone())); - let merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), None) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); + // TODO: should I be using panic source? + let mut tree_walker = RefWalker::new(&mut inner_tree, PanicSource {}); - merk.chunks_old() - .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>() - .into_iter() - }; - let storage = RocksDbStorage::default_rocksdb_with_path(tmp_dir.path()) - .expect("cannot open rocksdb storage"); - let merk = Merk::open_base( - storage - .get_storage_context(SubtreePath::empty(), None) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); - let reopen_chunks = merk.chunks_old().unwrap().into_iter().map(|x| x.unwrap()); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + assert_eq!(chunk_producer.len(), 5); - for (original, checkpoint) in original_chunks.zip(reopen_chunks) { - assert_eq!(original.len(), checkpoint.len()); - } - } + // assert bounds + assert_eq!(chunk_producer.chunk_with_index(0).is_err(), true); + assert_eq!(chunk_producer.chunk_with_index(6).is_err(), true); - // #[test] - // fn chunks_from_checkpoint() { - // let mut merk = TempMerk::new(); - // let batch = make_batch_seq(1..10); - // merk.apply(batch.as_slice(), &[]).unwrap(); + // first chunk + // expected: + // 7 + // / \ + // 3 11 + // / \ / \ + // H(1) H(5) H(9) H(13) + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(1) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 13); + assert_eq!(next_chunk, Some(2)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), + Op::Child, + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])), + Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), + Op::Parent, + Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])), + Op::Child, + Op::Child + ] + ); - // let path: std::path::PathBuf = - // "generate_and_verify_chunks_from_checkpoint.db".into(); if path. - // exists() { std::fs::remove_dir_all(&path).unwrap(); - // } - // let checkpoint = merk.checkpoint(&path).unwrap(); + // second chunk + // expected: + // 1 + // / \ + // 0 2 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(2) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(3)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, LEFT, RIGHT] + )), + Op::Child + ] + ); - // let original_chunks = - // merk.chunks().unwrap().into_iter().map(Result::unwrap); - // let checkpoint_chunks = - // checkpoint.chunks().unwrap().into_iter().map(Result::unwrap); + // third chunk + // expected: + // 5 + // / \ + // 4 6 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(3) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(4)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[LEFT, RIGHT, RIGHT] + )), + Op::Child + ] + ); - // for (original, checkpoint) in original_chunks.zip(checkpoint_chunks) { - // assert_eq!(original.len(), checkpoint.len()); - // } + // third chunk + // expected: + // 9 + // / \ + // 8 10 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(4) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, Some(5)); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, LEFT, RIGHT] + )), + Op::Child + ] + ); - // std::fs::remove_dir_all(&path).unwrap(); - // } + // third chunk + // expected: + // 13 + // / \ + // 12 14 + let (chunk, next_chunk) = chunk_producer + .chunk_with_index(5) + .expect("should generate chunk"); + assert_eq!(chunk.len(), 5); + assert_eq!(next_chunk, None); + assert_eq!( + chunk, + vec![ + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, LEFT] + )), + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT] + )), + Op::Parent, + Op::Push(traverse_get_kv_feature_type( + &mut tree_walker, + &[RIGHT, RIGHT, RIGHT] + )), + Op::Child + ] + ); + } #[test] - fn random_access_chunks() { + fn test_subtree_chunk_no_limit() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..111); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + // generate multi chunk with no limit + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, None) + .expect("should generate chunk with limit"); - let chunks = merk - .chunks_old() + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + let tree = execute(chunk_result.chunk.into_iter().map(Ok), false, |_| Ok(())) .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>(); - - let mut producer = merk.chunks_old().unwrap(); - for i in 0..chunks.len() * 2 { - let index = i % chunks.len(); - assert_eq!(producer.chunk(index).unwrap(), chunks[index]); - } + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + // assert that all nodes are of type kv_value_hash_feature_type + let node_counts = count_node_types(tree); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.kv_hash, 0); + assert_eq!(node_counts.kv, 0); + assert_eq!(node_counts.kv_value_hash, 0); + assert_eq!(node_counts.kv_digest, 0); + assert_eq!(node_counts.kv_ref_value_hash, 0); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); } #[test] - #[should_panic(expected = "Attempted to fetch chunk on empty tree")] - fn test_chunk_empty() { - let merk = TempMerk::new(); + fn test_subtree_chunk_with_limit() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let _chunks = merk - .chunks_old() + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // initial chunk is of size 453, so limit of 10 is too small + // should return an error + let chunk = chunk_producer.subtree_multi_chunk_with_limit(1, Some(10)); + assert!(chunk.is_err()); + + // get just the fist chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(453)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(2)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 453); + assert_eq!(chunk.len(), 13); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 3); + assert_eq!(node_counts.hash, 4); + assert_eq!(node_counts.sum(), 4 + 3); + + // get up to second chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(737)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(3)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 737); + assert_eq!(chunk.len(), 17); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) .unwrap() - .into_iter() - .map(|x| x.unwrap()) - .collect::>(); + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 6); + assert_eq!(node_counts.hash, 3); + assert_eq!(node_counts.sum(), 6 + 3); + + // get up to third chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1021)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(4)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1021); + assert_eq!(chunk.len(), 21); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 9); + assert_eq!(node_counts.hash, 2); + assert_eq!(node_counts.sum(), 9 + 2); + + // get up to fourth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1305)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, Some(5)); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1305); + assert_eq!(chunk.len(), 25); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 12); + assert_eq!(node_counts.hash, 1); + assert_eq!(node_counts.sum(), 12 + 1); + + // get up to fifth chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(1589)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(0)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); + + // limit larger than total chunk + let chunk_result = chunk_producer + .subtree_multi_chunk_with_limit(1, Some(usize::MAX)) + .expect("should generate chunk with limit"); + assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); + assert_eq!(chunk_result.next_index, None); + + let mut chunk = chunk_result.chunk; + assert_eq!(chunk.encoding_length().unwrap(), 1589); + assert_eq!(chunk.len(), 29); // op count + let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + + let node_counts = count_node_types(tree); + assert_eq!(node_counts.kv_value_hash_feature_type, 15); + assert_eq!(node_counts.hash, 0); + assert_eq!(node_counts.sum(), 15); } #[test] - #[should_panic(expected = "Chunk index out-of-bounds")] - fn test_chunk_index_oob() { + fn test_multi_chunk_with_no_limit_trunk() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..42); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); - let mut producer = merk.chunks_old().unwrap(); - let _chunk = producer.chunk(50000).unwrap(); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 1, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(1, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // should only contain 2 items, the starting chunk id and the entire tree + assert_eq!(chunk_result.chunk.len(), 2); + + // assert items + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![])); + if let ChunkOp::Chunk(chunk) = &chunk_result.chunk[1] { + let tree = execute(chunk.clone().into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .expect("should reconstruct tree"); + assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); + } else { + panic!("expected ChunkOp::Chunk"); + } } - // #[test] - // fn test_chunk_index_gt_1_access() { - // let mut merk = TempMerk::new(); - // let batch = make_batch_seq(1..513); - // merk.apply::<_, Vec<_>>(&batch, &[]).unwrap().unwrap(); - - // let mut producer = merk.chunks().unwrap(); - // println!("length: {}", producer.len()); - // let chunk = producer.chunk(2).unwrap(); - // assert_eq!( - // chunk, - // vec![ - // 3, 8, 0, 0, 0, 0, 0, 0, 0, 18, 0, 60, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 3, 8, 0, 0, 0, 0, 0, 0, 0, 19, 0, 60, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 20, 0, 60, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 21, 0, 60, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 22, - // 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 23, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 24, 0, 60, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 17, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 25, 0, - // 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 26, 0, 60, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 3, 8, 0, 0, 0, 0, 0, 0, 0, 27, 0, 60, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, - // 0, 0, 0, 28, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 17, 3, 8, 0, 0, 0, 0, 0, 0, 0, 29, 0, 60, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, - // 30, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 3, 8, 0, 0, - // 0, 0, 0, 0, 0, 31, 0, 60, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 16, 3, 8, 0, 0, 0, 0, 0, 0, 0, 32, 0, 60, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, - // 123, 123, 123, 123, 123, 123, 17, 17, 17 - // ] - // ); - // } + #[test] + fn test_multi_chunk_with_no_limit_not_trunk() { + // tree of height 4 + // 5 chunks + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // we generate the chunk starting from index 2, this has no hash nodes + // so no multi chunk will be generated + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(2, None) + .expect("should generate chunk with limit"); + + assert_eq!(chunk_result.remaining_limit, None); + assert_eq!(chunk_result.next_index, None); + + // chunk 2 - 5 will be considered separate subtrees + // each will have an accompanying chunk id, so 8 elements total + assert_eq!(chunk_result.chunk.len(), 8); + + // assert the chunk id's + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); + assert_eq!(chunk_result.chunk[4], ChunkOp::ChunkId(vec![RIGHT, LEFT])); + assert_eq!(chunk_result.chunk[6], ChunkOp::ChunkId(vec![RIGHT, RIGHT])); + + // assert the chunks + assert_eq!( + chunk_result.chunk[1], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(2) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[3], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(3) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[5], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(4) + .expect("should generate chunk") + .0 + ) + ); + assert_eq!( + chunk_result.chunk[7], + ChunkOp::Chunk( + chunk_producer + .chunk_with_index(5) + .expect("should generate chunk") + .0 + ) + ); + } #[test] - #[should_panic(expected = "Called next_chunk after end")] - fn test_next_chunk_index_oob() { + fn test_multi_chunk_with_limit() { + // tree of height 4 + // 5 chunks let mut merk = TempMerk::new(); - let batch = make_batch_seq(1..42); - merk.apply::<_, Vec<_>>(&batch, &[], None).unwrap().unwrap(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + + // ensure that the remaining limit, next index and values given are correct + // if limit is smaller than first chunk, we should get an error + let chunk_result = chunk_producer.multi_chunk_with_limit("", Some(5)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); + + // get chunk 2 + // data size of chunk 2 is exactly 317 + // chunk op encoding for chunk 2 = 321 + // hence limit of 317 will be insufficient + let chunk_result = chunk_producer.multi_chunk_with_limit_and_index(2, Some(317)); + assert!(matches!( + chunk_result, + Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) + )); - let mut producer = merk.chunks_old().unwrap(); - let _chunk1 = producer.next_chunk(); - let _chunk2 = producer.next_chunk(); + // get chunk 2 and 3 + // chunk 2 chunk op = 331 + // chunk 3 chunk op = 321 + // padding = 5 + let chunk_result = chunk_producer + .multi_chunk_with_limit_and_index(2, Some(321 + 321 + 5)) + .expect("should generate chunk"); + assert_eq!( + chunk_result.next_index, + Some(traversal_instruction_as_string( + &generate_traversal_instruction(4, 4).unwrap() + )) + ); + assert_eq!(chunk_result.remaining_limit, Some(5)); + assert_eq!(chunk_result.chunk.len(), 4); + assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); + assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); } } diff --git a/merk/src/merk/chunks2.rs b/merk/src/merk/chunks2.rs deleted file mode 100644 index d455dbde..00000000 --- a/merk/src/merk/chunks2.rs +++ /dev/null @@ -1,943 +0,0 @@ -// TODO: add MIT License -// TODO: add module description -// TODO: figure out verification features - -use std::{ - cmp::max, - collections::{LinkedList, VecDeque}, - path::Iter, -}; - -use ed::Encode; -use grovedb_costs::{CostResult, CostsExt, OperationCost}; -use grovedb_storage::StorageContext; -use integer_encoding::VarInt; - -use crate::{ - error::Error, - proofs::{ - chunk::{ - chunk_op::ChunkOp, - error::{ChunkError, ChunkError::InternalError}, - util::{ - chunk_height, generate_traversal_instruction, number_of_chunks, - traversal_instruction_as_string, write_to_vec, - }, - }, - Node, Op, - }, - tree::RefWalker, - Error::ChunkingError, - Merk, PanicSource, -}; - -// TODO: move types to some other file -// TODO: add documentation -#[derive(Debug)] -pub struct SubtreeChunk { - chunk: Vec, - next_index: Option, - remaining_limit: Option, -} - -impl SubtreeChunk { - pub fn new(chunk: Vec, next_index: Option, remaining_limit: Option) -> Self { - Self { - chunk, - next_index, - remaining_limit, - } - } -} - -#[derive(Debug)] -pub struct MultiChunk { - pub chunk: Vec, - pub next_index: Option, - pub remaining_limit: Option, -} - -impl MultiChunk { - pub fn new( - chunk: Vec, - next_index: Option, - remaining_limit: Option, - ) -> Self { - Self { - chunk, - next_index, - remaining_limit, - } - } -} - -/// A `ChunkProducer` allows the creation of chunk proofs, used for trustlessly -/// replicating entire Merk trees. Chunks can be generated on the fly in a -/// random order, or iterated in order for slightly better performance. -pub struct ChunkProducer<'db, S> { - /// Represents the max height of the Merk tree - height: usize, - /// Represents the index of the next chunk - index: usize, - merk: &'db Merk, -} - -impl<'db, S> ChunkProducer<'db, S> -where - S: StorageContext<'db>, -{ - /// Creates a new `ChunkProducer` for the given `Merk` instance - pub(crate) fn new(merk: &'db Merk) -> Result { - let tree_height = merk - .height() - .ok_or(Error::ChunkingError(ChunkError::EmptyTree( - "cannot create chunk producer for empty Merk", - )))?; - Ok(Self { - height: tree_height as usize, - index: 1, - merk, - }) - } - - /// Gets the chunk with the given index. Errors if the index is out of - /// bounds or the tree is empty - the number of chunks can be checked by - /// calling `producer.len()`. - pub fn chunk(&mut self, index: usize) -> Result, Error> { - // ensure that the chunk index is within bounds - let max_chunk_index = self.len(); - if index < 1 || index > max_chunk_index { - return Err(ChunkingError(ChunkError::OutOfBounds( - "chunk index out of bounds", - ))); - } - - self.index = index + 1; - - let traversal_instructions = generate_traversal_instruction(self.height, index)?; - - let chunk_height = chunk_height(self.height, index).unwrap(); - - self.merk.walk(|maybe_walker| match maybe_walker { - Some(mut walker) => { - walker.traverse_and_build_chunk(&traversal_instructions, chunk_height) - } - None => Err(Error::ChunkingError(ChunkError::EmptyTree( - "cannot create chunk producer for empty Merk", - ))), - }) - } - - // TODO: add documentation - pub fn multi_chunk_with_limit( - &mut self, - index: usize, - limit: Option, - ) -> Result { - // TODO: what happens if the vec is filled? - // we need to have some kind of hardhoc limit value if none is supplied. - // maybe we can just do something with the length to fix this? - let mut chunk = vec![]; - - let mut current_index = Some(index); - let mut current_limit = limit; - - // generate as many subtree chunks as we can - // until we have exhausted all or hit a limit restriction - while current_index != None { - let current_index_traversal_instruction = generate_traversal_instruction( - self.height, - current_index.expect("confirmed is Some"), - )?; - let chunk_id_op = ChunkOp::ChunkId(current_index_traversal_instruction); - - // factor in the ChunkId encoding length in limit calculations - let temp_limit = if let Some(limit) = current_limit { - let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) - })?; - if limit >= chunk_id_op_encoding_len { - Some(limit - chunk_id_op_encoding_len) - } else { - Some(0) - } - } else { - None - }; - - let subtree_multi_chunk_result = self.subtree_multi_chunk_with_limit( - current_index.expect("confirmed is not None"), - temp_limit, - ); - - let limit_too_small_error = matches!( - subtree_multi_chunk_result, - Err(ChunkingError(ChunkError::LimitTooSmall(..))) - ); - - if limit_too_small_error { - if chunk.is_empty() { - // no progress, return limit too small error - return Err(Error::ChunkingError(ChunkError::LimitTooSmall( - "limit too small for initial chunk", - ))); - } else { - // made progress, send accumulated chunk - break; - } - } - - let subtree_multi_chunk = subtree_multi_chunk_result?; - - chunk.push(chunk_id_op); - chunk.push(ChunkOp::Chunk(subtree_multi_chunk.chunk)); - - // update loop parameters - current_index = subtree_multi_chunk.next_index; - current_limit = subtree_multi_chunk.remaining_limit; - } - - Ok(MultiChunk::new(chunk, current_index, current_limit)) - } - - /// Packs as many chunks as it can from a starting chunk index, into a - /// vector. Stops when we have exhausted all chunks or we have reached - /// some limit. - pub fn subtree_multi_chunk_with_limit( - &mut self, - index: usize, - limit: Option, - ) -> Result { - let mut chunk_byte_length = 0; - - let max_chunk_index = number_of_chunks(self.height); - let mut chunk_index = index; - - // we first get the chunk at the given index - let chunk_ops = self.chunk(chunk_index)?; - chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) - })?; - chunk_index += 1; - - let mut chunk = VecDeque::from(chunk_ops); - - // ensure the limit is not less than first chunk byte length - // if it is we can't proceed and didn't make progress so we return an error - if let Some(limit) = limit { - if chunk_byte_length > limit { - return Err(Error::ChunkingError(ChunkError::LimitTooSmall( - "limit too small for initial chunk", - ))); - } - } - - let mut iteration_index = 0; - while iteration_index < chunk.len() { - // we only perform replacements on Hash nodes - if matches!(chunk[iteration_index], Op::Push(Node::Hash(..))) { - let replacement_chunk = self.chunk(chunk_index)?; - - // calculate the new total - let new_total = replacement_chunk.encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) - })? + chunk_byte_length - - chunk[iteration_index].encoding_length().map_err(|e| { - Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) - })?; - - // verify that this chunk doesn't make use exceed the limit - if let Some(limit) = limit { - if new_total > limit { - let next_index = match chunk_index > max_chunk_index { - true => None, - _ => Some(chunk_index), - }; - - return Ok(SubtreeChunk::new( - chunk.into(), - next_index, - Some(limit - chunk_byte_length), - )); - } - } - - chunk_byte_length = new_total; - chunk_index += 1; - - chunk.remove(iteration_index); - for op in replacement_chunk.into_iter().rev() { - chunk.insert(iteration_index, op); - } - } else { - iteration_index += 1; - } - } - - let remaining_limit = limit.map(|l| l - chunk_byte_length); - let next_index = match chunk_index > max_chunk_index { - true => None, - _ => Some(chunk_index), - }; - - Ok(SubtreeChunk::new(chunk.into(), next_index, remaining_limit)) - } - - /// Returns the total number of chunks for the underlying Merk tree. - pub fn len(&self) -> usize { - number_of_chunks(self.height as usize) - } - - /// Gets the next chunk based on the `ChunkProducer`'s internal index state. - /// This is mostly useful for letting `ChunkIter` yield the chunks in order, - /// optimizing throughput compared to random access. - // TODO: does this really optimize throughput, how can you make the statement - // true? - fn next_chunk(&mut self) -> Option, Error>> { - // for now not better than random access - // TODO: fix - let max_index = number_of_chunks(self.height); - if self.index > max_index { - return None; - } - - let chunk = self.chunk(self.index); - - return Some(chunk); - } - - // TODO: test this logic out - fn get_chunk_encoding_length(chunk: &[Op]) -> usize { - // TODO: deal with error - chunk - .iter() - .fold(0, |sum, op| sum + op.encoding_length().unwrap()) - } -} - -/// Iterate over each chunk, returning `None` after last chunk -impl<'db, S> Iterator for ChunkProducer<'db, S> -where - S: StorageContext<'db>, -{ - type Item = Result, Error>; - - fn next(&mut self) -> Option { - self.next_chunk() - } -} - -impl<'db, S> Merk -where - S: StorageContext<'db>, -{ - /// Creates a `ChunkProducer` which can return chunk proofs for replicating - /// the entire Merk tree. - pub fn chunks(&'db self) -> Result, Error> { - ChunkProducer::new(self) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::{ - proofs::{ - chunk::chunk2::{ - tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, - LEFT, RIGHT, - }, - tree::execute, - Tree, - }, - test_utils::{make_batch_seq, TempMerk}, - }; - - #[derive(Default)] - struct NodeCounts { - hash: usize, - kv_hash: usize, - kv: usize, - kv_value_hash: usize, - kv_digest: usize, - kv_ref_value_hash: usize, - kv_value_hash_feature_type: usize, - } - - impl NodeCounts { - fn sum(&self) -> usize { - return self.hash - + self.kv_hash - + self.kv - + self.kv_value_hash - + self.kv_digest - + self.kv_ref_value_hash - + self.kv_value_hash_feature_type; - } - } - - fn count_node_types(tree: Tree) -> NodeCounts { - let mut counts = NodeCounts::default(); - - tree.visit_nodes(&mut |node| { - match node { - Node::Hash(_) => counts.hash += 1, - Node::KVHash(_) => counts.kv_hash += 1, - Node::KV(..) => counts.kv += 1, - Node::KVValueHash(..) => counts.kv_value_hash += 1, - Node::KVDigest(..) => counts.kv_digest += 1, - Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, - Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, - }; - }); - - counts - } - - #[test] - fn test_merk_chunk_len() { - // Tree of height 5 - max of 31 elements, min of 16 elements - // 5 will be broken into 3 layers = [2, 2, 2] - // exit nodes from first layer = 2^2 = 4 - // exit nodes from the second layer = 4 ^ 2^2 = 16 - // total_chunk = 1 + 4 + 16 = 21 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..20); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(5)); - let chunk_producer = ChunkProducer::new(&merk).unwrap(); - assert_eq!(chunk_producer.len(), 21); - - // Tree of height 10 - max of 1023 elements, min of 512 elements - // 4 layers -> [2,2,2,2,2] - // chunk_count_per_layer -> [1, 4, 16, 64, 256] - // total = 341 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..1000); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(10)); - let chunk_producer = ChunkProducer::new(&merk).unwrap(); - assert_eq!(chunk_producer.len(), 341); - } - - #[test] - fn test_chunk_producer_iter() { - // tree with height 4 - // full tree - // 7 - // / \ - // 3 11 - // / \ / \ - // 1 5 9 13 - // / \ / \ / \ / \ - // 0 2 4 6 8 10 12 14 - // going to be broken into [2, 2] - // that's a total of 5 chunks - - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // build iterator from first chunk producer - let mut chunks = merk.chunks().expect("should return producer"); - - // ensure that the chunks gotten from the iterator is the same - // as that from the chunk producer - for i in 1..=5 { - assert_eq!( - chunks.next().unwrap().unwrap(), - chunk_producer.chunk(i).unwrap() - ); - } - - // returns None after max - assert_eq!(chunks.next().is_none(), true); - } - - #[test] - fn test_random_chunk_access() { - // tree with height 4 - // full tree - // 7 - // / \ - // 3 11 - // / \ / \ - // 1 5 9 13 - // / \ / \ / \ / \ - // 0 2 4 6 8 10 12 14 - // going to be broken into [2, 2] - // that's a total of 5 chunks - - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut inner_tree = merk.tree.take().expect("has inner tree"); - merk.tree.set(Some(inner_tree.clone())); - - // TODO: should I be using panic source? - let mut tree_walker = RefWalker::new(&mut inner_tree, PanicSource {}); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - assert_eq!(chunk_producer.len(), 5); - - // assert bounds - assert_eq!(chunk_producer.chunk(0).is_err(), true); - assert_eq!(chunk_producer.chunk(6).is_err(), true); - - // first chunk - // expected: - // 7 - // / \ - // 3 11 - // / \ / \ - // H(1) H(5) H(9) H(13) - let chunk = chunk_producer.chunk(1).expect("should generate chunk"); - assert_eq!(chunk.len(), 13); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])), - Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[LEFT])), - Op::Parent, - Op::Push(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])), - Op::Child, - Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[])), - Op::Parent, - Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])), - Op::Push(traverse_get_kv_feature_type(&mut tree_walker, &[RIGHT])), - Op::Parent, - Op::Push(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])), - Op::Child, - Op::Child - ] - ); - - // second chunk - // expected: - // 1 - // / \ - // 0 2 - let chunk = chunk_producer.chunk(2).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, LEFT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, LEFT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, LEFT, RIGHT] - )), - Op::Child - ] - ); - - // third chunk - // expected: - // 5 - // / \ - // 4 6 - let chunk = chunk_producer.chunk(3).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, RIGHT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, RIGHT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[LEFT, RIGHT, RIGHT] - )), - Op::Child - ] - ); - - // third chunk - // expected: - // 9 - // / \ - // 8 10 - let chunk = chunk_producer.chunk(4).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, LEFT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, LEFT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, LEFT, RIGHT] - )), - Op::Child - ] - ); - - // third chunk - // expected: - // 13 - // / \ - // 12 14 - let chunk = chunk_producer.chunk(5).expect("should generate chunk"); - assert_eq!(chunk.len(), 5); - assert_eq!( - chunk, - vec![ - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, RIGHT, LEFT] - )), - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, RIGHT] - )), - Op::Parent, - Op::Push(traverse_get_kv_feature_type( - &mut tree_walker, - &[RIGHT, RIGHT, RIGHT] - )), - Op::Child - ] - ); - } - - #[test] - fn test_subtree_chunk_no_limit() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - // generate multi chunk with no limit - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, None) - .expect("should generate chunk with limit"); - - assert_eq!(chunk_result.remaining_limit, None); - assert_eq!(chunk_result.next_index, None); - - let tree = execute(chunk_result.chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - // assert that all nodes are of type kv_value_hash_feature_type - let node_counts = count_node_types(tree); - assert_eq!(node_counts.hash, 0); - assert_eq!(node_counts.kv_hash, 0); - assert_eq!(node_counts.kv, 0); - assert_eq!(node_counts.kv_value_hash, 0); - assert_eq!(node_counts.kv_digest, 0); - assert_eq!(node_counts.kv_ref_value_hash, 0); - assert_eq!(node_counts.kv_value_hash_feature_type, 15); - } - - #[test] - fn test_subtree_chunk_with_limit() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // initial chunk is of size 453, so limit of 10 is too small - // should return an error - let chunk = chunk_producer.subtree_multi_chunk_with_limit(1, Some(10)); - assert!(chunk.is_err()); - - // get just the fist chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(453)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(2)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 453); - assert_eq!(chunk.len(), 13); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 3); - assert_eq!(node_counts.hash, 4); - assert_eq!(node_counts.sum(), 4 + 3); - - // get up to second chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(737)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(3)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 737); - assert_eq!(chunk.len(), 17); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 6); - assert_eq!(node_counts.hash, 3); - assert_eq!(node_counts.sum(), 6 + 3); - - // get up to third chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(1021)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(4)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1021); - assert_eq!(chunk.len(), 21); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 9); - assert_eq!(node_counts.hash, 2); - assert_eq!(node_counts.sum(), 9 + 2); - - // get up to fourth chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(1305)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, Some(5)); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1305); - assert_eq!(chunk.len(), 25); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 12); - assert_eq!(node_counts.hash, 1); - assert_eq!(node_counts.sum(), 12 + 1); - - // get up to fifth chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(1589)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(0)); - assert_eq!(chunk_result.next_index, None); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1589); - assert_eq!(chunk.len(), 29); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 15); - assert_eq!(node_counts.hash, 0); - assert_eq!(node_counts.sum(), 15); - - // limit larger than total chunk - let chunk_result = chunk_producer - .subtree_multi_chunk_with_limit(1, Some(usize::MAX)) - .expect("should generate chunk with limit"); - assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); - assert_eq!(chunk_result.next_index, None); - - let mut chunk = chunk_result.chunk; - assert_eq!(chunk.encoding_length().unwrap(), 1589); - assert_eq!(chunk.len(), 29); // op count - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - - let node_counts = count_node_types(tree); - assert_eq!(node_counts.kv_value_hash_feature_type, 15); - assert_eq!(node_counts.hash, 0); - assert_eq!(node_counts.sum(), 15); - } - - #[test] - fn test_multi_chunk_with_no_limit_trunk() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // we generate the chunk starting from index 2, this has no hash nodes - // so no multi chunk will be generated - let chunk_result = chunk_producer - .multi_chunk_with_limit(1, None) - .expect("should generate chunk with limit"); - - assert_eq!(chunk_result.remaining_limit, None); - assert_eq!(chunk_result.next_index, None); - - // should only contain 2 items, the starting chunk id and the entire tree - assert_eq!(chunk_result.chunk.len(), 2); - - // assert items - assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![])); - if let ChunkOp::Chunk(chunk) = &chunk_result.chunk[1] { - let tree = execute(chunk.clone().into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .expect("should reconstruct tree"); - assert_eq!(tree.hash().unwrap(), merk.root_hash().unwrap()); - } else { - panic!("expected ChunkOp::Chunk"); - } - } - - #[test] - fn test_multi_chunk_with_no_limit_not_trunk() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // we generate the chunk starting from index 2, this has no hash nodes - // so no multi chunk will be generated - let chunk_result = chunk_producer - .multi_chunk_with_limit(2, None) - .expect("should generate chunk with limit"); - - assert_eq!(chunk_result.remaining_limit, None); - assert_eq!(chunk_result.next_index, None); - - // chunk 2 - 5 will be considered separate subtrees - // each will have an accompanying chunk id, so 8 elements total - assert_eq!(chunk_result.chunk.len(), 8); - - // assert the chunk id's - assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); - assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); - assert_eq!(chunk_result.chunk[4], ChunkOp::ChunkId(vec![RIGHT, LEFT])); - assert_eq!(chunk_result.chunk[6], ChunkOp::ChunkId(vec![RIGHT, RIGHT])); - - // assert the chunks - assert_eq!( - chunk_result.chunk[1], - ChunkOp::Chunk(chunk_producer.chunk(2).expect("should generate chunk")) - ); - assert_eq!( - chunk_result.chunk[3], - ChunkOp::Chunk(chunk_producer.chunk(3).expect("should generate chunk")) - ); - assert_eq!( - chunk_result.chunk[5], - ChunkOp::Chunk(chunk_producer.chunk(4).expect("should generate chunk")) - ); - assert_eq!( - chunk_result.chunk[7], - ChunkOp::Chunk(chunk_producer.chunk(5).expect("should generate chunk")) - ); - } - - #[test] - fn test_multi_chunk_with_limit() { - // tree of height 4 - // 5 chunks - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - - // ensure that the remaining limit, next index and values given are correct - // if limit is smaller than first chunk, we should get an error - let chunk_result = chunk_producer.multi_chunk_with_limit(1, Some(5)); - assert!(matches!( - chunk_result, - Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) - )); - - // get chunk 2 - // data size of chunk 2 is exactly 317 - // chunk op encoding for chunk 2 = 321 - // hence limit of 317 will be insufficient - let chunk_result = chunk_producer.multi_chunk_with_limit(2, Some(317)); - assert!(matches!( - chunk_result, - Err(Error::ChunkingError(ChunkError::LimitTooSmall(..))) - )); - - // get chunk 2 and 3 - // chunk 2 chunk op = 331 - // chunk 3 chunk op = 321 - let chunk_result = chunk_producer - .multi_chunk_with_limit(2, Some(321 + 321 + 5)) - .expect("should generate chunk"); - assert_eq!(chunk_result.next_index, Some(4)); - assert_eq!(chunk_result.remaining_limit, Some(5)); - assert_eq!(chunk_result.chunk.len(), 4); - assert_eq!(chunk_result.chunk[0], ChunkOp::ChunkId(vec![LEFT, LEFT])); - assert_eq!(chunk_result.chunk[2], ChunkOp::ChunkId(vec![LEFT, RIGHT])); - } -} diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 0eb3f8cd..56308a64 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -29,19 +29,14 @@ //! Merk pub mod chunks; - pub(crate) mod defaults; - pub mod options; - -mod chunks2; pub mod restore; -mod restore2; use std::{ cell::Cell, cmp::Ordering, - collections::{BTreeSet, LinkedList}, + collections::{BTreeMap, BTreeSet, LinkedList}, fmt, }; @@ -62,12 +57,21 @@ use crate::{ defaults::{MAX_UPDATE_VALUE_BASED_ON_COSTS_TIMES, ROOT_KEY_KEY}, options::MerkOptions, }, - proofs::{encode_into, query::query_item::QueryItem, Op as ProofOp, Query}, + proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + util::traversal_instruction_as_string, + }, + encode_into, + query::query_item::QueryItem, + Op as ProofOp, Query, + }, tree::{ kv::{ValueDefinedCostType, KV}, AuxMerkBatch, Commit, CryptoHash, Fetch, Link, MerkBatch, Op, RefWalker, Tree, Walker, NULL_HASH, }, + verify_query, Error::{CostsError, EdError, StorageError}, MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, TreeFeatureType, @@ -1284,6 +1288,126 @@ where Ok(()).wrap_with_cost(Default::default()) } } + + /// Verifies the correctness of a merk tree + /// hash values are computed correctly, heights are accurate and links + /// consistent with backing store. + // TODO: define the return types + pub fn verify(&self) -> (BTreeMap, BTreeMap>) { + let tree = self.tree.take(); + + let mut bad_link_map: BTreeMap = BTreeMap::new(); + let mut parent_keys: BTreeMap> = BTreeMap::new(); + let mut root_traversal_instruction = vec![]; + + // TODO: remove clone + self.verify_tree( + // TODO: handle unwrap + &tree.clone().unwrap(), + &mut root_traversal_instruction, + &mut bad_link_map, + &mut parent_keys, + ); + self.tree.set(tree); + + return (bad_link_map, parent_keys); + } + + fn verify_tree( + &self, + tree: &Tree, + traversal_instruction: &mut Vec, + bad_link_map: &mut BTreeMap, + parent_keys: &mut BTreeMap>, + ) { + if let Some(link) = tree.link(LEFT) { + traversal_instruction.push(LEFT); + self.verify_link( + link, + tree.key(), + traversal_instruction, + bad_link_map, + parent_keys, + ); + traversal_instruction.pop(); + } + + if let Some(link) = tree.link(RIGHT) { + traversal_instruction.push(RIGHT); + self.verify_link( + link, + tree.key(), + traversal_instruction, + bad_link_map, + parent_keys, + ); + traversal_instruction.pop(); + } + } + + fn verify_link( + &self, + link: &Link, + parent_key: &[u8], + traversal_instruction: &mut Vec, + bad_link_map: &mut BTreeMap, + parent_keys: &mut BTreeMap>, + ) { + let (hash, key, sum) = match link { + Link::Reference { hash, key, sum, .. } => { + (hash.to_owned(), key.to_owned(), sum.to_owned()) + } + Link::Modified { + tree, + child_heights, + .. + } => ( + tree.hash().unwrap(), + tree.key().to_vec(), + tree.sum().unwrap(), + ), + Link::Loaded { + hash, + child_heights, + sum, + tree, + } => (hash.to_owned(), tree.key().to_vec(), sum.to_owned()), + _ => todo!(), + }; + + let instruction_id = traversal_instruction_as_string(&traversal_instruction); + let node = Tree::get(&self.storage, key).unwrap(); + + if node.is_err() { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + let node = node.unwrap(); + if node.is_none() { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + let node = node.unwrap(); + if &node.hash().unwrap() != &hash { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + if node.sum().unwrap() != sum { + bad_link_map.insert(instruction_id.clone(), hash.clone()); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } + + // TODO: check child heights + // all checks passed, recurse + self.verify_tree(&node, traversal_instruction, bad_link_map, parent_keys); + } } fn fetch_node<'db>(db: &impl StorageContext<'db>, key: &[u8]) -> Result, Error> { diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index e1a1afd4..0b866cac 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -29,250 +29,260 @@ //! Provides `Restorer`, which can create a replica of a Merk instance by //! receiving chunk proofs. -#[cfg(feature = "full")] -use std::{iter::Peekable, u8}; +use std::collections::BTreeMap; -#[cfg(feature = "full")] use grovedb_storage::{Batch, StorageContext}; -#[cfg(feature = "full")] -use super::Merk; -#[cfg(feature = "full")] use crate::{ - error::Error, + merk, merk::MerkSource, proofs::{ - chunk::{verify_leaf, verify_trunk, MIN_TRUNK_HEIGHT}, - tree::{Child, Tree as ProofTree}, + chunk::{ + chunk::{LEFT, RIGHT}, + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + }, + }, + tree::{execute, Child, Tree as ProofTree}, Node, Op, }, - tree::{combine_hash, value_hash, Link, RefWalker, Tree}, - CryptoHash, + tree::{RefWalker, Tree}, + CryptoHash, Error, Error::{CostsError, EdError, StorageError}, - TreeFeatureType::BasicMerk, + Link, Merk, + TreeFeatureType::{BasicMerk, SummedMerk}, }; -#[cfg(feature = "full")] -/// A `Restorer` handles decoding, verifying, and storing chunk proofs to -/// replicate an entire Merk tree. It expects the chunks to be processed in -/// order, retrying the last chunk if verification fails. +/// Restorer handles verification of chunks and replication of Merk trees. +/// Chunks can be processed randomly as long as their parent has been processed +/// already. pub struct Restorer { - leaf_hashes: Option>>, - parent_keys: Option>>>, - trunk_height: Option, merk: Merk, - expected_root_hash: CryptoHash, - combining_value: Option>, + chunk_id_to_root_hash: BTreeMap, + // this is used to keep track of parents whose links need to be rewritten + parent_keys: BTreeMap>, } -#[cfg(feature = "full")] impl<'db, S: StorageContext<'db>> Restorer { - /// Creates a new `Restorer`, which will initialize a new Merk at the given - /// file path. The first chunk (the "trunk") will be compared against - /// `expected_root_hash`, then each subsequent chunk will be compared - /// against the hashes stored in the trunk, so that the restore process will - /// never allow malicious peers to send more than a single invalid chunk. - pub fn new( - merk: Merk, - combining_value: Option>, - expected_root_hash: CryptoHash, - ) -> Self { + /// Initializes a new chunk restorer with the expected root hash for the + /// first chunk + pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { + let mut chunk_id_to_root_hash = BTreeMap::new(); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); + Self { - expected_root_hash, - combining_value, - trunk_height: None, merk, - leaf_hashes: None, - parent_keys: None, + chunk_id_to_root_hash, + parent_keys: BTreeMap::new(), } } - /// Verifies a chunk and writes it to the working RocksDB instance. Expects - /// to be called for each chunk in order. Returns the number of remaining - /// chunks. - /// - /// Once there are no remaining chunks to be processed, `finalize` should - /// be called. - pub fn process_chunk(&mut self, ops: impl IntoIterator) -> Result { - match self.leaf_hashes { - None => self.process_trunk(ops), - Some(_) => self.process_leaf(ops), - } - } - - /// Consumes the `Restorer` and returns the newly-created, fully-populated - /// Merk instance. This method will return an error if called before - /// processing all chunks (e.g. `restorer.remaining_chunks()` is not equal - /// to 0). - pub fn finalize(mut self) -> Result, Error> { - if self.remaining_chunks().unwrap_or(0) != 0 { - return Err(Error::OldChunkRestoringError( - "Called finalize before all chunks were processed".to_string(), - )); + // TODO: consider converting chunk id to a vec + /// Processes a chunk at some chunk id, returns the chunks id's of chunks + /// that can be requested + pub fn process_chunk( + &mut self, + chunk_id: String, + chunk: Vec, + ) -> Result, Error> { + let expected_root_hash = self + .chunk_id_to_root_hash + .get(&chunk_id) + .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; + + let chunk_tree = Self::verify_chunk(chunk, expected_root_hash)?; + + let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; + + if root_traversal_instruction.is_empty() { + self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); + } else { + // every non root chunk has some associated parent with an placeholder link + // here we update the placeholder link to represent the true data + self.rewrite_parent_link(&chunk_id, &root_traversal_instruction, &chunk_tree)?; } - if self.trunk_height.unwrap() >= MIN_TRUNK_HEIGHT { - self.rewrite_trunk_child_heights()?; + // next up, we need to write the chunk and build the map again + let chunk_write_result = self.write_chunk(chunk_tree, &mut root_traversal_instruction); + if chunk_write_result.is_ok() { + // if we were able to successfully write the chunk, we can remove + // the chunk expected root hash from our chunk id map + self.chunk_id_to_root_hash.remove(&chunk_id); } - self.merk.load_base_root().unwrap()?; - - Ok(self.merk) + chunk_write_result } - /// Returns the number of remaining chunks to be processed. If called before - /// the first chunk is processed, this method will return `None` since we do - /// not yet have enough information to know about the number of chunks. - pub fn remaining_chunks(&self) -> Option { - self.leaf_hashes.as_ref().map(|lh| lh.len()) + /// Process multi chunks (space optimized chunk proofs that can contain + /// multiple singluar chunks) + pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { + let mut expect_chunk_id = true; + let mut chunk_ids = vec![]; + let mut current_chunk_id: String = "".to_string(); + + for chunk_op in multi_chunk { + if (matches!(chunk_op, ChunkOp::ChunkId(..)) && !expect_chunk_id) + || (matches!(chunk_op, ChunkOp::Chunk(..)) && expect_chunk_id) + { + return Err(Error::ChunkRestoringError(ChunkError::InvalidMultiChunk( + "invalid multi chunk ordering", + ))); + } + match chunk_op { + ChunkOp::ChunkId(instructions) => { + current_chunk_id = traversal_instruction_as_string(&instructions); + } + ChunkOp::Chunk(chunk) => { + // TODO: remove clone + let next_chunk_ids = self.process_chunk(current_chunk_id.clone(), chunk)?; + chunk_ids.extend(next_chunk_ids); + } + } + expect_chunk_id = !expect_chunk_id; + } + Ok(chunk_ids) } - /// Writes the data contained in `tree` (extracted from a verified chunk - /// proof) to the RocksDB. - fn write_chunk(&mut self, tree: ProofTree) -> Result<(), Error> { - let mut batch = self.merk.storage.new_batch(); - - tree.visit_refs(&mut |proof_node| { - if let Some((mut node, key)) = match &proof_node.node { - Node::KV(key, value) => Some(( - Tree::new(key.clone(), value.clone(), None, BasicMerk).unwrap(), - key, - )), - Node::KVValueHash(key, value, value_hash) => Some(( - Tree::new_with_value_hash(key.clone(), value.clone(), *value_hash, BasicMerk) - .unwrap(), - key, - )), - Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => Some(( - Tree::new_with_value_hash( - key.clone(), - value.clone(), - *value_hash, - *feature_type, - ) - .unwrap(), - key, - )), - _ => None, - } { - // TODO: encode tree node without cloning key/value - *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); - *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); - - let bytes = node.encode(); - batch.put(key, &bytes, None, None).map_err(CostsError) - } else { + /// Verifies the structure of a chunk and ensures the chunk matches the + /// expected root hash + fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash) -> Result { + let chunk_len = chunk.len(); + let mut kv_count = 0; + let mut hash_count = 0; + + // build tree from ops + // ensure only made of KvValueFeatureType and Hash nodes and count them + let tree = execute(chunk.clone().into_iter().map(Ok), false, |node| { + if matches!(node, Node::KVValueHashFeatureType(..)) { + kv_count += 1; + Ok(()) + } else if matches!(node, Node::Hash(..)) { + hash_count += 1; Ok(()) + } else { + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) } - })?; + }) + .unwrap()?; - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError) - } - - /// Verifies the trunk then writes its data to the RocksDB. - fn process_trunk(&mut self, ops: impl IntoIterator) -> Result { - let (trunk, height) = verify_trunk(ops.into_iter().map(Ok)).unwrap()?; + // chunk len must be exactly equal to the kv_count + hash_count + + // parent_branch_count + child_branch_count + debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); - let root_hash = if self.combining_value.is_none() { - trunk.hash().unwrap() - } else { - combine_hash( - value_hash(self.combining_value.as_ref().expect("confirmed exists")).value(), - &trunk.hash().unwrap(), - ) - .value - }; - - if root_hash != self.expected_root_hash { - return Err(Error::OldChunkRestoringError(format!( - "Proof did not match expected hash\n\tExpected: {:?}\n\tActual: {:?}", - self.expected_root_hash, - trunk.hash() + // chunk structure verified, next verify root hash + if &tree.hash().unwrap() != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", ))); } - let root_key = trunk.key().to_vec(); - - let trunk_height = height / 2; - self.trunk_height = Some(trunk_height); - - let chunks_remaining = if trunk_height >= MIN_TRUNK_HEIGHT { - let leaf_hashes = trunk - .layer(trunk_height) - .map(|node| node.hash().unwrap()) - .collect::>() - .into_iter() - .peekable(); - self.leaf_hashes = Some(leaf_hashes); - - let parent_keys = trunk - .layer(trunk_height - 1) - .map(|node| node.key().to_vec()) - .collect::>>() - .into_iter() - .peekable(); - self.parent_keys = Some(parent_keys); - assert_eq!( - self.parent_keys.as_ref().unwrap().len(), - self.leaf_hashes.as_ref().unwrap().len() / 2 - ); - - let chunks_remaining = (2_usize).pow(trunk_height as u32); - assert_eq!(self.remaining_chunks_unchecked(), chunks_remaining); - chunks_remaining - } else { - self.leaf_hashes = Some(vec![].into_iter().peekable()); - self.parent_keys = Some(vec![].into_iter().peekable()); - 0 - }; - - // note that these writes don't happen atomically, which is fine here - // because if anything fails during the restore process we will just - // scrap the whole restore and start over - self.write_chunk(trunk)?; - self.merk.set_base_root_key(Some(root_key)).unwrap()?; - - Ok(chunks_remaining) + Ok(tree) } - /// Verifies a leaf chunk then writes it to the RocksDB. This needs to be - /// called in order, retrying the last chunk for any failed verifications. - fn process_leaf(&mut self, ops: impl IntoIterator) -> Result { - let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); - let leaf_hash = leaf_hashes - .peek() - .expect("Received more chunks than expected"); - - let leaf = verify_leaf(ops.into_iter().map(Ok), *leaf_hash).unwrap()?; - self.rewrite_parent_link(&leaf)?; - self.write_chunk(leaf)?; - - let leaf_hashes = self.leaf_hashes.as_mut().unwrap(); - leaf_hashes.next(); + /// Write the verified chunk to storage + fn write_chunk( + &mut self, + chunk_tree: ProofTree, + traversal_instruction: &mut Vec, + ) -> Result, Error> { + // this contains all the elements we want to write to storage + let mut batch = self.merk.storage.new_batch(); + let mut new_chunk_ids = Vec::new(); + + chunk_tree.visit_refs_track_traversal_and_parent( + traversal_instruction, + None, + &mut |proof_node, node_traversal_instruction, parent_key| { + match &proof_node.node { + Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => { + // build tree from node value + let mut tree = Tree::new_with_value_hash( + key.clone(), + value.clone(), + value_hash.clone(), + *feature_type, + ) + .unwrap(); + + // update tree links + *tree.slot_mut(LEFT) = proof_node.left.as_ref().map(Child::as_link); + *tree.slot_mut(RIGHT) = proof_node.right.as_ref().map(Child::as_link); + + // encode the node and add it to the batch + let bytes = tree.encode(); + + batch.put(key, &bytes, None, None).map_err(CostsError) + } + Node::Hash(hash) => { + // the node hash points to the root of another chunk + // we get the chunk id and add the hash to restorer state + let chunk_id = traversal_instruction_as_string(node_traversal_instruction); + new_chunk_ids.push(chunk_id.clone()); + self.chunk_id_to_root_hash + .insert(chunk_id.clone(), hash.clone()); + // TODO: handle unwrap + self.parent_keys + .insert(chunk_id, parent_key.unwrap().to_owned()); + Ok(()) + } + _ => { + // we do nothing for other node types + // technically verify chunk will be called before this + // as such this should be be reached + Ok(()) + } + } + }, + )?; + + // write the batch + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError)?; - Ok(self.remaining_chunks_unchecked()) + Ok(new_chunk_ids) } - /// The parent of the root node of the leaf does not know the key of its - /// children when it is first written. Now that we have verified this leaf, - /// we can write the key into the parent node's entry. Note that this does - /// not need to recalcuate hashes since it already had the child hash. - fn rewrite_parent_link(&mut self, leaf: &ProofTree) -> Result<(), Error> { - let parent_keys = self.parent_keys.as_mut().unwrap(); - let parent_key = parent_keys.peek().unwrap().clone(); - let mut parent = crate::merk::fetch_node(&self.merk.storage, parent_key.as_slice())? - .expect("Could not find parent of leaf chunk"); - - let is_left_child = self.remaining_chunks_unchecked() % 2 == 0; - if let Some(Link::Reference { ref mut key, .. }) = parent.link_mut(is_left_child) { - *key = leaf.key().to_vec(); - } else { - panic!("Expected parent links to be type Link::Reference"); - }; + /// When we process truncated chunks, the parents of Node::Hash have invalid + /// placeholder for links. + /// When we get the actual chunk associated with the Node::Hash, + /// we need to update the parent link to reflect the correct data. + fn rewrite_parent_link( + &mut self, + chunk_id: &str, + traversal_instruction: &[bool], + chunk_tree: &ProofTree, + ) -> Result<(), Error> { + let parent_key = self + .parent_keys + .get(chunk_id) + .ok_or(Error::ChunkRestoringError(InternalError( + "after successful chunk verification parent key should exist", + )))?; + + let mut parent = merk::fetch_node(&self.merk.storage, parent_key.as_slice())?.ok_or( + Error::ChunkRestoringError(InternalError( + "cannot find expected parent in memory, most likely state corruption issue", + )), + )?; + + let is_left = traversal_instruction + .last() + .expect("rewrite is only called when traversal_instruction is not empty"); + + let updated_key = chunk_tree.key(); + let updated_sum = chunk_tree.sum(); + + if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { + *key = updated_key.to_vec(); + *sum = updated_sum; + } let parent_bytes = parent.encode(); self.merk @@ -281,56 +291,59 @@ impl<'db, S: StorageContext<'db>> Restorer { .unwrap() .map_err(StorageError)?; - if !is_left_child { - let parent_keys = self.parent_keys.as_mut().unwrap(); - parent_keys.next(); - } + self.parent_keys + .remove(chunk_id) + .expect("confirmed parent key exists above"); Ok(()) } - fn rewrite_trunk_child_heights(&mut self) -> Result<(), Error> { - fn recurse<'s, 'db, S: StorageContext<'db>>( - mut node: RefWalker>, - remaining_depth: usize, + /// Each nodes height is not added to state as such the producer could lie + /// about the height values after replication we need to verify the + /// heights and if invalid recompute the correct values + fn rewrite_heights(&mut self) -> Result<(), Error> { + fn rewrite_child_heights<'s, 'db, S: StorageContext<'db>>( + mut walker: RefWalker>, batch: &mut >::Batch, ) -> Result<(u8, u8), Error> { - if remaining_depth == 0 { - return Ok(node.tree().child_heights()); - } + // TODO: remove unwrap + let mut cloned_node = Tree::decode( + walker.tree().key().to_vec(), + walker.tree().encode().as_slice(), + ) + .unwrap(); - let mut cloned_node = - Tree::decode(node.tree().key().to_vec(), node.tree().encode().as_slice()) - .map_err(EdError)?; + let mut left_height = 0; + let mut right_height = 0; - let left_child = node.walk(true).unwrap()?.unwrap(); - let left_child_heights = recurse(left_child, remaining_depth - 1, batch)?; - let left_height = left_child_heights.0.max(left_child_heights.1) + 1; - *cloned_node.link_mut(true).unwrap().child_heights_mut() = left_child_heights; + if let Some(left_walker) = walker.walk(LEFT).unwrap()? { + let left_child_heights = rewrite_child_heights(left_walker, batch)?; + left_height = left_child_heights.0.max(left_child_heights.1) + 1; + *cloned_node.link_mut(LEFT).unwrap().child_heights_mut() = left_child_heights; + } - let right_child = node.walk(false).unwrap()?.unwrap(); - let right_child_heights = recurse(right_child, remaining_depth - 1, batch)?; - let right_height = right_child_heights.0.max(right_child_heights.1) + 1; - *cloned_node.link_mut(false).unwrap().child_heights_mut() = right_child_heights; + if let Some(right_walker) = walker.walk(RIGHT).unwrap()? { + let right_child_heights = rewrite_child_heights(right_walker, batch)?; + right_height = right_child_heights.0.max(right_child_heights.1) + 1; + *cloned_node.link_mut(RIGHT).unwrap().child_heights_mut() = right_child_heights; + } let bytes = cloned_node.encode(); batch - .put(node.tree().key(), &bytes, None, None) + .put(walker.tree().key(), &bytes, None, None) .map_err(CostsError)?; - Ok((left_height, right_height)) + return Ok((left_height, right_height)); } - self.merk.load_base_root().unwrap()?; - let mut batch = self.merk.storage.new_batch(); + // TODO: deal with unwrap + let mut tree = self.merk.tree.take().unwrap(); + let mut walker = RefWalker::new(&mut tree, self.merk.source()); - let depth = self.trunk_height.unwrap(); - self.merk.use_tree_mut(|maybe_tree| { - let tree = maybe_tree.unwrap(); - let walker = RefWalker::new(tree, self.merk.source()); - recurse(walker, depth, &mut batch) - })?; + rewrite_child_heights(walker, &mut batch)?; + + self.merk.tree.set(Some(tree)); self.merk .storage @@ -339,72 +352,239 @@ impl<'db, S: StorageContext<'db>> Restorer { .map_err(StorageError) } - /// Returns the number of remaining chunks to be processed. This method will - /// panic if called before processing the first chunk (since that chunk - /// gives us the information to know how many chunks to expect). - pub fn remaining_chunks_unchecked(&self) -> usize { - self.leaf_hashes.as_ref().unwrap().len() - } -} + /// Rebuild restoration state from partial storage state + fn attempt_state_recovery(&mut self) -> Result<(), Error> { + // TODO: think about the return type some more + let (bad_link_map, parent_keys) = self.merk.verify(); + if !bad_link_map.is_empty() { + self.chunk_id_to_root_hash = bad_link_map; + self.parent_keys = parent_keys; + } -#[cfg(feature = "full")] -impl<'db, S: StorageContext<'db>> Merk { - /// Creates a new `Restorer`, which can be used to verify chunk proofs to - /// replicate an entire Merk tree. A new Merk instance will be initialized - /// by creating a RocksDB at `path`. - pub fn restore(merk: Merk, expected_root_hash: CryptoHash) -> Restorer { - Restorer::new(merk, None, expected_root_hash) + Ok(()) } -} -#[cfg(feature = "full")] -impl ProofTree { - fn child_heights(&self) -> (u8, u8) { - ( - self.left.as_ref().map_or(0, |c| c.tree.height as u8), - self.right.as_ref().map_or(0, |c| c.tree.height as u8), - ) + /// Consumes the `Restorer` and returns a newly created, fully populated + /// Merk instance. This method will return an error if called before + /// processing all chunks. + pub fn finalize(mut self) -> Result, Error> { + // ensure all chunks have been processed + if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { + return Err(Error::ChunkRestoringError( + ChunkError::RestorationNotComplete, + )); + } + + // get the latest version of the root node + self.merk.load_base_root(); + + // if height values are wrong, rewrite height + if self.verify_height().is_err() { + self.rewrite_heights(); + // update the root node after height rewrite + self.merk.load_base_root(); + } + + if self.merk.verify().0.len() != 0 { + return Err(Error::ChunkRestoringError(ChunkError::InternalError( + "restored tree invalid", + ))); + } + + Ok(self.merk) } -} -#[cfg(feature = "full")] -impl Child { - fn as_link(&self) -> Link { - let key = match &self.tree.node { - Node::KV(key, _) - | Node::KVValueHash(key, ..) - | Node::KVValueHashFeatureType(key, ..) => key.as_slice(), - // for the connection between the trunk and leaf chunks, we don't - // have the child key so we must first write in an empty one. once - // the leaf gets verified, we can write in this key to its parent - _ => &[], + /// Verify that the child heights of the merk tree links correctly represent + /// the tree + fn verify_height(&self) -> Result<(), Error> { + let tree = self.merk.tree.take(); + let height_verification_result = if let Some(tree) = &tree { + self.verify_tree_height(&tree, tree.height()) + } else { + Ok(()) }; + self.merk.tree.set(tree); + height_verification_result + } + + fn verify_tree_height(&self, tree: &Tree, parent_height: u8) -> Result<(), Error> { + let (left_height, right_height) = tree.child_heights(); - Link::Reference { - hash: self.hash, - sum: None, - child_heights: self.tree.child_heights(), - key: key.to_vec(), + if (left_height.abs_diff(right_height)) > 1 { + return Err(Error::CorruptedState( + "invalid child heights, difference greater than 1 for AVL tree", + )); + } + + let max_child_height = left_height.max(right_height); + if parent_height <= max_child_height || parent_height - max_child_height != 1 { + return Err(Error::CorruptedState( + "invalid child heights, parent height is not 1 less than max child height", + )); + } + + let left_link = tree.link(LEFT); + let right_link = tree.link(RIGHT); + + if (left_height == 0 && left_link.is_some()) || (right_height == 0 && right_link.is_some()) + { + return Err(Error::CorruptedState( + "invalid child heights node has child height 0, but hash child", + )); } + + if let Some(link) = left_link { + let left_tree = link.tree(); + if left_tree.is_none() { + let left_tree = Tree::get(&self.merk.storage, link.key().to_vec()) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&left_tree, left_height)?; + } else { + self.verify_tree_height(left_tree.unwrap(), left_height)?; + } + } + + if let Some(link) = right_link { + let right_tree = link.tree(); + if right_tree.is_none() { + let right_tree = Tree::get(&self.merk.storage, link.key().to_vec()) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&right_tree, right_height)?; + } else { + self.verify_tree_height(right_tree.unwrap(), right_height)?; + } + } + + Ok(()) } } -#[cfg(feature = "full")] #[cfg(test)] mod tests { use grovedb_path::SubtreePath; use grovedb_storage::{ - rocksdb_storage::{test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext}, + rocksdb_storage::{ + test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext, + PrefixedRocksDbStorageContext, + }, RawIterator, Storage, }; use super::*; - use crate::{test_utils::*, tree::Op, MerkBatch}; + use crate::{ + execute_proof, + merk::chunks::ChunkProducer, + proofs::{ + chunk::{ + chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, + error::ChunkError::InvalidChunkProof, + }, + Query, + }, + test_utils::{make_batch_seq, TempMerk}, + Error::ChunkRestoringError, + KVIterator, Merk, PanicSource, + }; + + #[test] + fn test_chunk_verification_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + assert!(Restorer::::verify_chunk( + non_avl_tree_proof, + &[0; 32] + ) + .is_err()); + } + + #[test] + fn test_chunk_verification_only_kv_feature_and_hash() { + // should not accept kv + let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvhash + let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvdigest + let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvrefvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + } + + fn get_node_hash(node: Node) -> Result { + match node { + Node::Hash(hash) => Ok(hash), + _ => Err("expected node hash".to_string()), + } + } + + #[test] + fn test_process_chunk_correct_chunk_id_map() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut merk_tree = merk.tree.take().expect("should have inner tree"); + merk.tree.set(Some(merk_tree.clone())); + let mut tree_walker = RefWalker::new(&mut merk_tree, PanicSource {}); - fn restore_test(batches: &[&MerkBatch>], expected_nodes: usize) { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut original = Merk::open_base( + let mut restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -412,77 +592,136 @@ mod tests { ) .unwrap() .unwrap(); - for batch in batches { - original - .apply::, Vec<_>>(batch, &[], None) - .unwrap() - .unwrap(); - } - let chunks = original.chunks_old().unwrap(); + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - let storage = TempStorage::default(); - let _tx2 = storage.start_transaction(); - let ctx = storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(); - let merk = Merk::open_base(ctx, false).unwrap().unwrap(); - let mut restorer = Merk::restore(merk, original.root_hash().unwrap()); - - assert_eq!(restorer.remaining_chunks(), None); - - let mut expected_remaining = chunks.len(); - for chunk in chunks { - let remaining = restorer.process_chunk(chunk.unwrap()).unwrap(); + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); - expected_remaining -= 1; - assert_eq!(remaining, expected_remaining); - assert_eq!(restorer.remaining_chunks().unwrap(), expected_remaining); - } - assert_eq!(expected_remaining, 0); + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - let restored = restorer.finalize().unwrap(); - assert_eq!(restored.root_hash(), original.root_hash()); - assert_raw_db_entries_eq(&restored, &original, expected_nodes); - } + // initial restorer state should contain just the root hash of the source merk + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); - #[test] - fn restore_10000() { - restore_test(&[&make_batch_seq(0..10_000)], 10_000); - } + // generate first chunk + let (chunk, _) = chunk_producer.chunk_with_index(1).unwrap(); + // apply first chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk successfully"); + assert_eq!(new_chunk_ids.len(), 4); + + // after first chunk application + // the chunk_map should contain 4 items + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + // assert all the chunk hash values + assert_eq!( + restorer.chunk_id_to_root_hash.get("11"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("10"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("01"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("00"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])).unwrap()) + .as_ref() + ); - #[test] - fn restore_3() { - restore_test(&[&make_batch_seq(0..3)], 3); - } + // generate second chunk + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 3); + assert_eq!(restorer.chunk_id_to_root_hash.get("11"), None); + + // let's try to apply the second chunk again, should not work + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) + )); + + // next let's get a random but expected chunk and work with that e.g. chunk 4 + // but let's apply it to the wrong place + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + .. + ))) + )); + + // correctly apply chunk 5 + let (chunk, _) = chunk_producer.chunk_with_index(5).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.chunk_id_to_root_hash.get("00"), None); + + // correctly apply chunk 3 + let (chunk, _) = chunk_producer.chunk_with_index(3).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.chunk_id_to_root_hash.get("10"), None); + + // correctly apply chunk 4 + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); - #[test] - fn restore_2_left_heavy() { - restore_test( - &[ - &[(vec![0], Op::Put(vec![], BasicMerk))], - &[(vec![1], Op::Put(vec![], BasicMerk))], - ], - 2, - ); - } + // finalize merk + let mut restored_merk = restorer.finalize().expect("should finalized successfully"); - #[test] - fn restore_2_right_heavy() { - restore_test( - &[ - &[(vec![1], Op::Put(vec![], BasicMerk))], - &[(vec![0], Op::Put(vec![], BasicMerk))], - ], - 2, + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() ); } - #[test] - fn restore_1() { - restore_test(&[&make_batch_seq(0..1)], 1); - } - fn assert_raw_db_entries_eq( restored: &Merk, original: &Merk, @@ -497,7 +736,10 @@ mod tests { let mut i = 0; loop { - assert_eq!(restored_entries.valid(), original_entries.valid()); + assert_eq!( + restored_entries.valid().unwrap(), + original_entries.valid().unwrap() + ); if !restored_entries.valid().unwrap() { break; } @@ -513,4 +755,466 @@ mod tests { assert_eq!(i, length); } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk + // verifies that restoration was performed correctly. + fn test_restoration_single_chunk_strategy(batch_size: u64) { + // build the source merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut source_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let (chunk, next_chunk_id) = chunk_producer + .chunk(chunk_id.as_str()) + .expect("should get chunk"); + restorer + .process_chunk(chunk_id.to_string(), chunk) + .expect("should process chunk successfully"); + chunk_id_opt = next_chunk_id; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + + assert_raw_db_entries_eq(&restored_merk, &source_merk, batch_size as usize); + } + + #[test] + fn restore_single_chunk_20() { + test_restoration_single_chunk_strategy(20); + } + + #[test] + fn restore_single_chunk_1000() { + test_restoration_single_chunk_strategy(1000); + } + + #[test] + fn test_process_multi_chunk_no_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // generate multi chunk from root with no limit + let chunk = chunk_producer + .multi_chunk_with_limit("", None) + .expect("should generate multichunk"); + + assert_eq!(chunk.chunk.len(), 2); + assert_eq!(chunk.next_index, None); + assert_eq!(chunk.remaining_limit, None); + + let next_ids = restorer + .process_multi_chunk(chunk.chunk) + .expect("should process chunk"); + // should have replicated all chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_no_limit_but_non_root() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // generate multi chunk from the 2nd chunk with no limit + let multi_chunk = chunk_producer + .multi_chunk_with_limit_and_index(next_chunk_index.unwrap(), None) + .unwrap(); + // tree of height 4 has 5 chunks + // we have restored the first leaving 4 chunks + // each chunk has an extra chunk id, since they are disjoint + // hence the size of the multi chunk should be 8 + assert_eq!(multi_chunk.chunk.len(), 8); + let new_chunk_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_with_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // build multi chunk with with limit of 325 + let multi_chunk = chunk_producer + .multi_chunk_with_limit("", Some(600)) + .unwrap(); + // should only contain the first chunk + assert_eq!(multi_chunk.chunk.len(), 2); + // should point to chunk 2 + assert_eq!(multi_chunk.next_index, Some("11".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(next_ids.len(), 4); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // subsequent chunks are of size 321 + // with limit just above 642 should get 2 chunks (2 and 3) + // disjoint, so multi chunk len should be 4 + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, Some("01".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.parent_keys.len(), 2); + + // get the last 2 chunks + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, None); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + // finalize merk + let restored_merk = restorer.finalize().unwrap(); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk, with multi chunks + // verifies that restoration was performed correctly. + fn test_restoration_multi_chunk_strategy(batch_size: u64, limit: Option) { + // build the source merk + let mut source_merk = TempMerk::new(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let multi_chunk = chunk_producer + .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) + .expect("should get chunk"); + restorer + .process_multi_chunk(multi_chunk.chunk) + .expect("should process chunk successfully"); + chunk_id_opt = multi_chunk.next_index; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + } + + #[test] + fn restore_multi_chunk_20_no_limit() { + test_restoration_multi_chunk_strategy(20, None); + } + + #[test] + #[should_panic] + fn restore_multi_chunk_20_tiny_limit() { + test_restoration_multi_chunk_strategy(20, Some(1)); + } + + #[test] + fn restore_multi_chunk_20_limit() { + test_restoration_multi_chunk_strategy(20, Some(1200)); + } + + #[test] + fn restore_multi_chunk_10000_limit() { + test_restoration_multi_chunk_strategy(10000, Some(1200)); + } + + #[test] + fn test_restoration_interruption() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // store old state for later reference + let old_chunk_id_to_root_hash = restorer.chunk_id_to_root_hash.clone(); + let old_parent_keys = restorer.parent_keys.clone(); + + // drop the restorer and the restoration merk + drop(restorer); + // open the restoration merk again and build a restorer from it + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + ) + .unwrap() + .unwrap(); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // assert the state of the restorer + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.parent_keys.len(), 0); + + // recover state + let recovery_attempt = restorer.attempt_state_recovery(); + assert_eq!(recovery_attempt.is_ok(), true); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // assert equality to old state + assert_eq!(old_chunk_id_to_root_hash, restorer.chunk_id_to_root_hash); + assert_eq!(old_parent_keys, restorer.parent_keys); + } } diff --git a/merk/src/merk/restore2.rs b/merk/src/merk/restore2.rs deleted file mode 100644 index 084f3759..00000000 --- a/merk/src/merk/restore2.rs +++ /dev/null @@ -1,195 +0,0 @@ -// TODO: add license - -//! Provides `Restorer`, which can create a replica of a Merk instance by -//! receiving chunk proofs. - -use std::collections::BTreeMap; - -use grovedb_storage::{Batch, StorageContext}; - -use crate::{ - merk::MerkSource, - proofs::{ - chunk::{ - chunk_op::ChunkOp, - error::ChunkError, - util::{traversal_instruction_as_string, write_to_vec}, - }, - tree::{execute, Child, Tree as ProofTree}, - Node, - }, - tree::{RefWalker, Tree}, - CryptoHash, Error, - Error::{CostsError, EdError, StorageError}, - Link, Merk, - TreeFeatureType::BasicMerk, -}; - -// TODO: add documentation -pub struct Restorer { - merk: Merk, - chunk_id_to_root_hash: BTreeMap, -} - -impl<'db, S: StorageContext<'db>> Restorer { - // TODO: add documenation - pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { - let mut chunk_id_to_root_hash = BTreeMap::new(); - chunk_id_to_root_hash.insert(traversal_instruction_as_string(vec![]), expected_root_hash); - - Self { - merk, - chunk_id_to_root_hash, - } - } - - // TODO: add documentation - // what does the restorer process? - // it should be able to process single chunks, subtree chunks and multi chunks - // right? or just one of them? - // I think it should process just multi chunk at least for now - pub fn process_multi_chunk( - &mut self, - chunk: impl IntoIterator, - ) -> Result<(), Error> { - // chunk id, chunk - // we use the chunk id to know what to verify against - let mut chunks = chunk.into_iter(); - - // TODO: clean this up, make external function that peeks and asserts - let chunk_id_string = if let Some(ChunkOp::ChunkId(chunk_id)) = chunks.next() { - traversal_instruction_as_string(chunk_id) - } else { - return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunkId)); - }; - - // TODO: deal with unwrap - let expected_root_hash = self.chunk_id_to_root_hash.get(&chunk_id_string).unwrap(); - dbg!(expected_root_hash); - - if let Some(ChunkOp::Chunk(chunk)) = chunks.next() { - // todo: deal with error - let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) - .unwrap() - .unwrap(); - debug_assert!(tree.hash().unwrap() == *expected_root_hash); - dbg!("yayy"); - self.write_chunk(tree); - } else { - return Err(Error::ChunkRestoringError(ChunkError::ExpectedChunk)); - } - - Ok(()) - } - - /// Writes the data contained in `tree` (extracted from a verified chunk - /// proof) to the RocksDB. - fn write_chunk(&mut self, tree: ProofTree) -> Result<(), Error> { - let mut batch = self.merk.storage.new_batch(); - - tree.visit_refs(&mut |proof_node| { - if let Some((mut node, key)) = match &proof_node.node { - Node::KV(key, value) => Some(( - Tree::new(key.clone(), value.clone(), None, BasicMerk).unwrap(), - key, - )), - Node::KVValueHash(key, value, value_hash) => Some(( - Tree::new_with_value_hash(key.clone(), value.clone(), *value_hash, BasicMerk) - .unwrap(), - key, - )), - Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => Some(( - Tree::new_with_value_hash( - key.clone(), - value.clone(), - *value_hash, - *feature_type, - ) - .unwrap(), - key, - )), - _ => None, - } { - // TODO: encode tree node without cloning key/value - // *node.slot_mut(true) = proof_node.left.as_ref().map(Child::as_link); - // *node.slot_mut(false) = proof_node.right.as_ref().map(Child::as_link); - - let bytes = node.encode(); - batch.put(key, &bytes, None, None).map_err(CostsError) - } else { - Ok(()) - } - })?; - - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError) - } -} - -#[cfg(test)] -mod tests { - use grovedb_path::SubtreePath; - use grovedb_storage::{rocksdb_storage::test_utils::TempStorage, Storage}; - - use super::*; - use crate::{merk::chunks2::ChunkProducer, test_utils::make_batch_seq, Merk}; - - #[test] - fn restoration_test() { - // Create source merk and populate - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut original = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); - let batch = make_batch_seq(0..15); - original - .apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(original.height(), Some(4)); - - // Create to be restored merk - let storage = TempStorage::new(); - let tx2 = storage.start_transaction(); - let restored_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx2) - .unwrap(), - false, - ) - .unwrap() - .unwrap(); - assert_eq!(restored_merk.height(), None); - - // assert initial conditions - assert_ne!( - original.root_hash().unwrap(), - restored_merk.root_hash().unwrap() - ); - - // Perform Restoration - let mut chunk_producer = - ChunkProducer::new(&original).expect("should create chunk producer"); - - let mut restorer = Restorer::new(restored_merk, original.root_hash().unwrap()); - - let chunk = chunk_producer - .multi_chunk_with_limit(1, None) - .expect("should generate chunk"); - - assert_eq!(chunk.next_index, None); - assert_eq!(chunk.remaining_limit, None); - assert_eq!(chunk.chunk.len(), 2); - - restorer.process_multi_chunk(chunk.chunk).unwrap(); - } -} diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index d5ef376c..28114876 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -53,605 +53,8 @@ use crate::{ mod binary_range; #[cfg(feature = "full")] -// TODO: remove from here -pub mod chunk2; -#[cfg(feature = "full")] -// TODO: remove from here -pub mod util; -// TODO: remove from here -pub mod error; -// TODO: remove from here +pub mod chunk; pub mod chunk_op; - -/// The minimum number of layers the trunk will be guaranteed to have before -/// splitting into multiple chunks. If the tree's height is less than double -/// this value, the trunk should be verified as a leaf chunk. -#[cfg(feature = "full")] -pub const MIN_TRUNK_HEIGHT: usize = 5; - -#[cfg(feature = "full")] -impl<'a, S> RefWalker<'a, S> -where - S: Fetch + Sized + Clone, -{ - /// Generates a trunk proof by traversing the tree. - /// - /// Returns a tuple containing the produced proof, and a boolean indicating - /// whether or not there will be more chunks to follow. If the chunk - /// contains the entire tree, the boolean will be `false`, if the chunk - /// is abridged and will be connected to leaf chunks, it will be `true`. - pub fn create_trunk_proof(&mut self) -> CostResult<(Vec, bool), Error> { - let approx_size = 2usize.pow((self.tree().height() / 2) as u32) * 3; - let mut proof = Vec::with_capacity(approx_size); - - self.traverse_for_height_proof(&mut proof, 1) - .flat_map_ok(|trunk_height| { - if trunk_height < MIN_TRUNK_HEIGHT { - proof.clear(); - self.traverse_for_trunk(&mut proof, usize::MAX, true) - .map_ok(|_| Ok((proof, false))) - } else { - self.traverse_for_trunk(&mut proof, trunk_height, true) - .map_ok(|_| Ok((proof, true))) - } - }) - .flatten() - } - - /// Traverses down the left edge of the tree and pushes ops to the proof, to - /// act as a proof of the height of the tree. This is the first step in - /// generating a trunk proof. - fn traverse_for_height_proof( - &mut self, - proof: &mut Vec, - depth: usize, - ) -> CostResult { - let mut cost = OperationCost::default(); - let maybe_left = match self.walk(true).unwrap_add_cost(&mut cost) { - Ok(maybe_left) => maybe_left, - Err(e) => { - return Err(e).wrap_with_cost(cost); - } - }; - let has_left_child = maybe_left.is_some(); - - let trunk_height = if let Some(mut left) = maybe_left { - match left - .traverse_for_height_proof(proof, depth + 1) - .unwrap_add_cost(&mut cost) - { - Ok(x) => x, - Err(e) => return Err(e).wrap_with_cost(cost), - } - } else { - depth / 2 - }; - - if depth > trunk_height { - proof.push(Op::Push(self.to_kvhash_node())); - - if has_left_child { - proof.push(Op::Parent); - } - - if let Some(right) = self.tree().link(false) { - proof.push(Op::Push(Node::Hash(*right.hash()))); - proof.push(Op::Child); - } - } - - Ok(trunk_height).wrap_with_cost(cost) - } - - /// Traverses down the tree and adds KV push ops for all nodes up to a - /// certain depth. This expects the proof to contain a height proof as - /// generated by `traverse_for_height_proof`. - fn traverse_for_trunk( - &mut self, - proof: &mut Vec, - remaining_depth: usize, - is_leftmost: bool, - ) -> CostResult<(), Error> { - let mut cost = OperationCost::default(); - - if remaining_depth == 0 { - // return early if we have reached bottom of trunk - - // for leftmost node, we already have height proof - if is_leftmost { - return Ok(()).wrap_with_cost(cost); - } - - // add this node's hash - proof.push(Op::Push(self.to_hash_node().unwrap_add_cost(&mut cost))); - - return Ok(()).wrap_with_cost(cost); - } - - // traverse left - let has_left_child = self.tree().link(true).is_some(); - if has_left_child { - let mut left = cost_return_on_error!(&mut cost, self.walk(true)).unwrap(); - cost_return_on_error!( - &mut cost, - left.traverse_for_trunk(proof, remaining_depth - 1, is_leftmost) - ); - } - - // add this node's data - proof.push(Op::Push(self.to_kv_value_hash_feature_type_node())); - - if has_left_child { - proof.push(Op::Parent); - } - - // traverse right - if let Some(mut right) = cost_return_on_error!(&mut cost, self.walk(false)) { - cost_return_on_error!( - &mut cost, - right.traverse_for_trunk(proof, remaining_depth - 1, false) - ); - proof.push(Op::Child); - } - - Ok(()).wrap_with_cost(cost) - } -} - -/// Builds a chunk proof by iterating over values in a RocksDB, ending the chunk -/// when a node with key `end_key` is encountered. -/// -/// Advances the iterator for all nodes in the chunk and the `end_key` (if any). -#[cfg(feature = "full")] -pub(crate) fn get_next_chunk( - iter: &mut impl RawIterator, - end_key: Option<&[u8]>, -) -> CostResult, Error> { - let mut cost = OperationCost::default(); - - let mut chunk = Vec::with_capacity(512); - let mut stack = Vec::with_capacity(32); - let mut node = Tree::new(vec![], vec![], None, BasicMerk).unwrap_add_cost(&mut cost); - - while iter.valid().unwrap_add_cost(&mut cost) { - let key = iter.key().unwrap_add_cost(&mut cost).unwrap(); - - if let Some(end_key) = end_key { - if key == end_key { - break; - } - } - - let encoded_node = iter.value().unwrap_add_cost(&mut cost).unwrap(); - cost_return_on_error_no_add!( - &cost, - Tree::decode_into(&mut node, vec![], encoded_node).map_err(EdError) - ); - - // TODO: Only use the KVValueHash if needed, saves 32 bytes - // only needed when dealing with references and trees - let kv = Node::KVValueHashFeatureType( - key.to_vec(), - node.value_ref().to_vec(), - *node.value_hash(), - node.feature_type(), - ); - - chunk.push(Op::Push(kv)); - - if node.link(true).is_some() { - chunk.push(Op::Parent); - } - - if let Some(child) = node.link(false) { - stack.push(child.key().to_vec()); - } else { - while let Some(top_key) = stack.last() { - if key < top_key.as_slice() { - break; - } - stack.pop(); - chunk.push(Op::Child); - } - } - - iter.next().unwrap_add_cost(&mut cost); - } - - if iter.valid().unwrap_add_cost(&mut cost) { - iter.next().unwrap_add_cost(&mut cost); - } - - Ok(chunk).wrap_with_cost(cost) -} - -/// Verifies a leaf chunk proof by executing its operators. Checks that there -/// were no abridged nodes (Hash or KVHash) and the proof hashes to -/// `expected_hash`. -#[cfg(feature = "full")] -#[allow(dead_code)] // TODO: remove when proofs will be enabled -pub(crate) fn verify_leaf>>( - ops: I, - expected_hash: CryptoHash, -) -> CostResult { - execute(ops, false, |node| match node { - Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => Ok(()), - _ => Err(Error::OldChunkRestoringError( - "Leaf chunks must contain full subtree".to_string(), - )), - }) - .flat_map_ok(|tree| { - tree.hash().map(|hash| { - if hash != expected_hash { - Error::OldChunkRestoringError(format!( - "Leaf chunk proof did not match expected hash\n\tExpected: {:?}\n\tActual: \ - {:?}", - expected_hash, - tree.hash() - )); - } - Ok(tree) - }) - }) -} - -/// Verifies a trunk chunk proof by executing its operators. Ensures the -/// resulting tree contains a valid height proof, the trunk is the correct -/// height, and all of its inner nodes are not abridged. Returns the tree and -/// the height given by the height proof. -#[cfg(feature = "full")] -pub(crate) fn verify_trunk>>( - ops: I, -) -> CostResult<(ProofTree, usize), Error> { - let mut cost = OperationCost::default(); - - fn verify_height_proof(tree: &ProofTree) -> Result { - Ok(match tree.child(true) { - Some(child) => { - if let Node::Hash(_) = child.tree.node { - return Err(Error::OldChunkRestoringError( - "Expected height proof to only contain KV and KVHash nodes".to_string(), - )); - } - verify_height_proof(&child.tree)? + 1 - } - None => 1, - }) - } - - fn verify_completeness( - tree: &ProofTree, - remaining_depth: usize, - leftmost: bool, - ) -> Result<(), Error> { - let recurse = |left, leftmost| { - if let Some(child) = tree.child(left) { - verify_completeness(&child.tree, remaining_depth - 1, left && leftmost)?; - } - Ok(()) - }; - - if remaining_depth > 0 { - match tree.node { - Node::KVValueHash(..) | Node::KV(..) | Node::KVValueHashFeatureType(..) => {} - _ => { - return Err(Error::OldChunkRestoringError( - "Expected trunk inner nodes to contain keys and values".to_string(), - )) - } - } - recurse(true, leftmost)?; - recurse(false, false) - } else if !leftmost { - match tree.node { - Node::Hash(_) => Ok(()), - _ => Err(Error::OldChunkRestoringError( - "Expected trunk leaves to contain Hash nodes".to_string(), - )), - } - } else { - match &tree.node { - Node::KVHash(_) => Ok(()), - _ => Err(Error::OldChunkRestoringError( - "Expected leftmost trunk leaf to contain KVHash node".to_string(), - )), - } - } - } - - let mut kv_only = true; - let tree = cost_return_on_error!( - &mut cost, - execute(ops, false, |node| { - kv_only &= matches!(node, Node::KVValueHash(..)) - || matches!(node, Node::KV(..)) - || matches!(node, Node::KVValueHashFeatureType(..)); - Ok(()) - }) - ); - - let height = cost_return_on_error_no_add!(&cost, verify_height_proof(&tree)); - let trunk_height = height / 2; - - if trunk_height < MIN_TRUNK_HEIGHT { - if !kv_only { - return Err(Error::OldChunkRestoringError( - "Leaf chunks must contain full subtree".to_string(), - )) - .wrap_with_cost(cost); - } - } else { - cost_return_on_error_no_add!(&cost, verify_completeness(&tree, trunk_height, true)); - } - - Ok((tree, height)).wrap_with_cost(cost) -} - +pub mod error; #[cfg(feature = "full")] -#[cfg(test)] -mod tests { - use std::usize; - - use grovedb_costs::storage_cost::removal::StorageRemovedBytes::NoStorageRemoval; - use grovedb_storage::StorageContext; - - use super::{super::tree::Tree, *}; - use crate::{ - test_utils::*, - tree::{NoopCommit, PanicSource, Tree as BaseTree}, - }; - - #[derive(Default)] - struct NodeCounts { - hash: usize, - kv_hash: usize, - kv: usize, - kv_value_hash: usize, - kv_digest: usize, - kv_ref_value_hash: usize, - kv_value_hash_feature_type: usize, - } - - fn count_node_types(tree: Tree) -> NodeCounts { - let mut counts = NodeCounts::default(); - - tree.visit_nodes(&mut |node| { - match node { - Node::Hash(_) => counts.hash += 1, - Node::KVHash(_) => counts.kv_hash += 1, - Node::KV(..) => counts.kv += 1, - Node::KVValueHash(..) => counts.kv_value_hash += 1, - Node::KVDigest(..) => counts.kv_digest += 1, - Node::KVRefValueHash(..) => counts.kv_ref_value_hash += 1, - Node::KVValueHashFeatureType(..) => counts.kv_value_hash_feature_type += 1, - }; - }); - - counts - } - - #[test] - fn small_trunk_roundtrip() { - let mut tree = make_tree_seq(31); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - // println!("{:?}", &proof); - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 32); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn big_trunk_roundtrip() { - let mut tree = make_tree_seq(2u64.pow(MIN_TRUNK_HEIGHT as u32 * 2 + 1) - 1); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(has_more); - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - - let counts = count_node_types(trunk); - // are these formulas correct for all values of `MIN_TRUNK_HEIGHT`? 🤔 - assert_eq!( - counts.hash, - 2usize.pow(MIN_TRUNK_HEIGHT as u32) + MIN_TRUNK_HEIGHT - 1 - ); - assert_eq!( - counts.kv_value_hash_feature_type, - 2usize.pow(MIN_TRUNK_HEIGHT as u32) - 1 - ); - assert_eq!(counts.kv_hash, MIN_TRUNK_HEIGHT + 1); - } - - #[test] - fn one_node_tree_trunk_roundtrip() { - let mut tree = BaseTree::new(vec![0], vec![], None, BasicMerk).unwrap(); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 1); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn two_node_right_heavy_tree_trunk_roundtrip() { - // 0 - // \ - // 1 - let mut tree = BaseTree::new(vec![0], vec![], None, BasicMerk) - .unwrap() - .attach( - false, - Some(BaseTree::new(vec![1], vec![], None, BasicMerk).unwrap()), - ); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 2); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn two_node_left_heavy_tree_trunk_roundtrip() { - // 1 - // / - // 0 - let mut tree = BaseTree::new(vec![1], vec![], None, BasicMerk) - .unwrap() - .attach( - true, - Some(BaseTree::new(vec![0], vec![], None, BasicMerk).unwrap()), - ); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 2); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn three_node_tree_trunk_roundtrip() { - // 1 - // / \ - // 0 2 - let mut tree = BaseTree::new(vec![1], vec![], None, BasicMerk) - .unwrap() - .attach( - true, - Some(BaseTree::new(vec![0], vec![], None, BasicMerk).unwrap()), - ) - .attach( - false, - Some(BaseTree::new(vec![2], vec![], None, BasicMerk).unwrap()), - ); - tree.commit( - &mut NoopCommit {}, - &|_, _| Ok(0), - &mut |_, _, _| Ok((false, None)), - &mut |_, _, _| Ok((NoStorageRemoval, NoStorageRemoval)), - ) - .unwrap() - .unwrap(); - - let mut walker = RefWalker::new(&mut tree, PanicSource {}); - let (proof, has_more) = walker.create_trunk_proof().unwrap().unwrap(); - assert!(!has_more); - - let (trunk, _) = verify_trunk(proof.into_iter().map(Ok)).unwrap().unwrap(); - let counts = count_node_types(trunk); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_value_hash_feature_type, 3); - assert_eq!(counts.kv_hash, 0); - } - - #[test] - fn leaf_chunk_roundtrip() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..31); - merk.apply::<_, Vec<_>>(batch.as_slice(), &[], None) - .unwrap() - .unwrap(); - - merk.commit(); - - let root_node = merk.tree.take(); - let root_key = root_node.as_ref().unwrap().key().to_vec(); - merk.tree.set(root_node); - - // whole tree as 1 leaf - let mut iter = merk.storage.raw_iter(); - iter.seek_to_first().unwrap(); - let chunk = get_next_chunk(&mut iter, None).unwrap().unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf(ops, merk.root_hash().unwrap()) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 31); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - drop(iter); - - let mut iter = merk.storage.raw_iter(); - iter.seek_to_first().unwrap(); - - // left leaf - let chunk = get_next_chunk(&mut iter, Some(root_key.as_slice())) - .unwrap() - .unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf( - ops, - [ - 78, 230, 25, 188, 163, 2, 169, 185, 254, 174, 196, 206, 162, 187, 245, 188, 74, 70, - 220, 160, 35, 78, 120, 122, 61, 90, 241, 105, 35, 180, 133, 98, - ], - ) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 15); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - - // right leaf - let chunk = get_next_chunk(&mut iter, None).unwrap().unwrap(); - let ops = chunk.into_iter().map(Ok); - let chunk = verify_leaf( - ops, - [ - 21, 147, 223, 29, 106, 19, 23, 38, 233, 134, 245, 44, 246, 179, 48, 19, 111, 50, - 19, 191, 134, 37, 165, 5, 35, 111, 233, 213, 212, 5, 92, 45, - ], - ) - .unwrap() - .unwrap(); - let counts = count_node_types(chunk); - assert_eq!(counts.kv_value_hash_feature_type, 15); - assert_eq!(counts.hash, 0); - assert_eq!(counts.kv_hash, 0); - } -} +pub mod util; diff --git a/merk/src/proofs/chunk/binary_range.rs b/merk/src/proofs/chunk/binary_range.rs index 350c9718..01a20531 100644 --- a/merk/src/proofs/chunk/binary_range.rs +++ b/merk/src/proofs/chunk/binary_range.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + const LEFT: bool = true; const RIGHT: bool = false; diff --git a/merk/src/proofs/chunk/chunk2.rs b/merk/src/proofs/chunk/chunk.rs similarity index 94% rename from merk/src/proofs/chunk/chunk2.rs rename to merk/src/proofs/chunk/chunk.rs index a6072871..95c686b7 100644 --- a/merk/src/proofs/chunk/chunk2.rs +++ b/merk/src/proofs/chunk/chunk.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + use grovedb_costs::{cost_return_on_error, CostResult, CostsExt, OperationCost}; // TODO: add copyright comment @@ -170,7 +198,7 @@ pub mod tests { use crate::{ proofs::{ - chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, + chunk::chunk::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, Op::Parent, diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 29687932..9402d3d5 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + use std::io::{Read, Write}; use ed::{Decode, Encode}; @@ -98,7 +126,7 @@ mod test { use crate::proofs::{ chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, chunk_op::ChunkOp, }, Node, Op, diff --git a/merk/src/proofs/chunk/error.rs b/merk/src/proofs/chunk/error.rs index 0c926203..bd482666 100644 --- a/merk/src/proofs/chunk/error.rs +++ b/merk/src/proofs/chunk/error.rs @@ -1,3 +1,31 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + #[derive(Debug, thiserror::Error)] /// Chunk related errors pub enum ChunkError { @@ -25,6 +53,25 @@ pub enum ChunkError { #[error("expected chunk when parsing chunk op")] ExpectedChunk, + // Restoration Errors + /// Chunk restoration starts from the root chunk, this lead to a set of + /// root hash values to verify other chunks .... + /// Hence before you can verify a child you need to have verified it's + /// parent. + #[error("unexpected chunk: cannot verify chunk because verification hash is not in memory")] + UnexpectedChunk, + + /// Invalid chunk proof when verifying chunk + #[error("invalid chunk proof: {0}")] + InvalidChunkProof(&'static str), + + /// Invalid multi chunk + #[error("invalid multi chunk: {0}")] + InvalidMultiChunk(&'static str), + + #[error("called finalize too early still expecting chunks")] + RestorationNotComplete, + /// Internal error, this should never surface /// if it does, it means wrong assumption in code #[error("internal error {0}")] diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index 1094e50f..3e430acf 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -1,33 +1,59 @@ -// TODO: add MIT License -// TODO: add module description +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//! Collection of state independent algorithms needed for facilitate chunk +//! production and restoration use std::io::Write; // TODO: figure out better nomenclature use crate::{proofs::chunk::binary_range::BinaryRange, Error}; -use crate::{proofs::chunk::error::ChunkError, Error::InternalError}; - -// TODO: add documentation -fn chunk_height_per_layer(height: usize) -> Vec { - // every chunk has a fixed height of 2 - // it is possible for a chunk to not reach full capacity - let mut two_count = height / 2; - if height % 2 != 0 { - two_count += 1; - } - - return vec![2; two_count]; -} +use crate::{ + proofs::chunk::{ + chunk::{LEFT, RIGHT}, + error::{ChunkError, ChunkError::BadTraversalInstruction}, + }, + Error::InternalError, +}; /// Represents the height as a linear combination of 3 amd 2 /// of the form 3x + 2y /// this breaks the tree into layers of height 3 or 2 /// the minimum chunk height is 2, so if tree height is less than 2 /// we just return a single layer of height 2 -fn chunk_height_per_layer_lin_comb(height: usize) -> Vec { +fn chunk_height_per_layer(height: usize) -> Vec { let mut two_count = 0; let mut three_count = height / 3; + if height == 0 { + return vec![]; + } + // minimum chunk height is 2, if tree height is less than 2 // return a single layer with chunk height 2 if height < 2 { @@ -200,17 +226,142 @@ pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result< return Ok(instructions); } +/// Determine the chunk id given the traversal instruction and the max height of +/// the tree +pub fn chunk_id_from_traversal_instruction( + traversal_instruction: &[bool], + height: usize, +) -> Result { + // empty traversal instruction points to the first chunk + if traversal_instruction.is_empty() { + return Ok(1); + } + + let mut chunk_count = number_of_chunks(height); + let mut current_chunk_id = 1; + + let mut layer_heights = chunk_height_per_layer(height); + let last_layer_height = layer_heights.pop().expect("confirmed not empty"); + + // traversal instructions should only point to the root node of chunks (chunk + // boundaries) the layer heights represent the height of each chunk layer + // the last chunk layer is at height = total_height - last_chunk_height + 1 + // traversal instructions require 1 less than height to address it + // e.g. height 1 is represented by [] - len of 0 + // height 2 is represented by [left] or [right] len of 1 + // therefore last chunk root node is address with total_height - + // last_chunk_height + if traversal_instruction.len() > height - last_layer_height { + return Err(Error::ChunkingError(BadTraversalInstruction( + "traversal instruction should not address nodes past the root of the last layer chunks", + ))); + } + + // verify that the traversal instruction points to a chunk boundary + let mut traversal_length = traversal_instruction.len(); + let mut relevant_layer_heights = vec![]; + for layer_height in layer_heights { + // the traversal_length should be a perfect sum of a subset of the layer_height + // if the traversal_length is not 0, it should be larger than or equal to the + // next layer height. + if traversal_length < layer_height { + return Err(Error::ChunkingError(BadTraversalInstruction( + "traversal instruction should point to a chunk boundary", + ))); + } + + traversal_length -= layer_height; + relevant_layer_heights.push(layer_height); + + if traversal_length == 0 { + break; + } + } + + // take layer_height instructions and determine the updated chunk id + let mut start_index = 0; + for layer_height in relevant_layer_heights { + let end_index = start_index + layer_height; + let subset_instructions = &traversal_instruction[start_index..end_index]; + + // offset multiplier determines what subchunk we are on based on the given + // instruction offset multiplier just converts the binary instruction to + // decimal, taking left as 0 and right as 0 i.e [left, left, left] = 0 + // means we are at subchunk 0 + let mut offset_multiplier = 0; + for (i, instruction) in subset_instructions.iter().enumerate() { + offset_multiplier += 2_usize.pow((subset_instructions.len() - i - 1) as u32) + * (1 - *instruction as usize); + } + + if chunk_count % 2 != 0 { + // remove the current chunk from the chunk count + chunk_count = chunk_count - 1; + } + + chunk_count = chunk_count / exit_node_count(layer_height); + + current_chunk_id = current_chunk_id + offset_multiplier as usize * chunk_count + 1; + + start_index = end_index; + } + + Ok(current_chunk_id) +} + +/// Determine the chunk id given the traversal instruction and the max height of +/// the tree. This can recover from traversal instructions not pointing to a +/// chunk boundary, in such a case, it backtracks until it hits a chunk +/// boundary. +pub fn chunk_id_from_traversal_instruction_with_recovery( + traversal_instruction: &[bool], + height: usize, +) -> Result { + let chunk_id_result = chunk_id_from_traversal_instruction(traversal_instruction, height); + if chunk_id_result.is_err() { + return chunk_id_from_traversal_instruction_with_recovery( + &traversal_instruction[0..traversal_instruction.len() - 1], + height, + ); + } + return chunk_id_result; +} + +/// Generate instruction for traversing to a given chunk in a binary tree, +/// returns string representation +pub fn generate_traversal_instruction_as_string( + height: usize, + chunk_id: usize, +) -> Result { + let instruction = generate_traversal_instruction(height, chunk_id)?; + Ok(traversal_instruction_as_string(&instruction)) +} + /// Convert traversal instruction to byte string -/// 1 represents left -/// 0 represents right -pub fn traversal_instruction_as_string(instruction: Vec) -> String { +/// 1 represents left (true) +/// 0 represents right (false) +pub fn traversal_instruction_as_string(instruction: &Vec) -> String { instruction .iter() .map(|v| if *v { "1" } else { "0" }) .collect() } -// TODO: move this to a better file +/// Converts a string that represents a traversal instruction +/// to a vec of bool, true = left and false = right +pub fn string_as_traversal_instruction(instruction_string: &str) -> Result, Error> { + instruction_string + .chars() + .map(|char| match char { + '1' => Ok(LEFT), + '0' => Ok(RIGHT), + _ => Err(Error::ChunkingError(ChunkError::BadTraversalInstruction( + "failed to parse instruction string", + ))), + }) + .collect() +} + pub fn write_to_vec(dest: &mut W, value: &[u8]) -> Result<(), Error> { dest.write_all(value) .map_err(|_e| InternalError("failed to write to vector")) @@ -221,17 +372,17 @@ mod test { use byteorder::LE; use super::*; - use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; + use crate::proofs::chunk::chunk::{LEFT, RIGHT}; #[test] fn test_chunk_height_per_layer() { let layer_heights = chunk_height_per_layer(10); assert_eq!(layer_heights.iter().sum::(), 10); - assert_eq!(layer_heights, [2, 2, 2, 2, 2]); + assert_eq!(layer_heights, [3, 3, 2, 2]); let layer_heights = chunk_height_per_layer(45); - assert_eq!(layer_heights.iter().sum::(), 46); - assert_eq!(layer_heights, [2; 23]); + assert_eq!(layer_heights.iter().sum::(), 45); + assert_eq!(layer_heights, [3; 15]); let layer_heights = chunk_height_per_layer(2); assert_eq!(layer_heights.iter().sum::(), 2); @@ -271,23 +422,20 @@ mod test { // hence total chunk count = 1 + 4 = 5 assert_eq!(number_of_chunks(4), 5); - // tree with height 6 should have 21 chunks - // will be split into three layers of chunk height 2 = [2,2,2] - // first chunk takes 1, has 2^2 = 4 exit nodes - // second chunk takes 4 with each having 2^2 exit nodes - // total exit from second chunk = 4 * 4 = 16 - // total chunks = 1 + 4 + 16 = 21 - assert_eq!(number_of_chunks(6), 21); + // tree with height 6 should have 9 chunks + // will be split into two layers of chunk height 3 = [3,3] + // first chunk takes 1, has 2^3 = 8 exit nodes + // total chunks = 1 + 8 = 9 + assert_eq!(number_of_chunks(6), 9); // tree with height 10 should have 341 chunks - // will be split into 5 layers = [2,2,2,2,2] - // first layer has just 1 chunk, exit nodes = 2^2 = 4 - // second layer has 4 chunks, exit nodes = 2^2 * 4 = 16 - // third layer has 16 chunks, exit nodes = 2^2 * 16 = 64 - // fourth layer has 64 chunks, exit nodes = 2^2 * 64 = 256 - // fifth layer has 256 chunks - // total chunks = 1 + 4 + 16 + 64 + 256 = 341 chunks - assert_eq!(number_of_chunks(10), 341); + // will be split into 5 layers = [3, 3, 2, 2] + // first layer has just 1 chunk, exit nodes = 2^3 = 8 + // second layer has 4 chunks, exit nodes = 2^3 * 8 = 64 + // third layer has 16 chunks, exit nodes = 2^2 * 64 = 256 + // fourth layer has 256 chunks + // total chunks = 1 + 8 + 64 + 256 = 329 chunks + assert_eq!(number_of_chunks(10), 329); } #[test] @@ -307,28 +455,26 @@ mod test { assert_eq!(number_of_chunks_under_chunk_id(4, 4).unwrap(), 1); assert_eq!(number_of_chunks_under_chunk_id(4, 5).unwrap(), 1); - // tree with height 10 should have 341 chunks - // layer_heights = [2, 2, 2, 2, 2] - // chunk_id 1 = 341 - // chunk_id 2 = 85 i.e (341 - 1) / 2^2 - // chunk_id 3 = 21 i.e (85 - 1) / 2^2 - // chunk_id 4 = 5 i.e (21 - 1) / 2^2 - // chunk_id 5 = 1 i.e (5 - 1) / 2^2 - // chunk_id 6 = 1 on the same layer as 5 - // chunk_id 87 = 85 as chunk 87 should wrap back to the same layer as chunk_id 2 - // chunk_id 88 = mirrors chunk_id 3 - // chunk_id 89 = mirrors chunk_id 4 - // chunk_id 90 = mirrors chunk_id 5 - assert_eq!(number_of_chunks_under_chunk_id(10, 1).unwrap(), 341); - assert_eq!(number_of_chunks_under_chunk_id(10, 2).unwrap(), 85); - assert_eq!(number_of_chunks_under_chunk_id(10, 3).unwrap(), 21); - assert_eq!(number_of_chunks_under_chunk_id(10, 4).unwrap(), 5); + // tree with height 10 should have 329 chunks + // layer_heights = [3, 3, 2, 2] + // chunk_id 1 = 329 + // chunk_id 2 = 41 i.e (329 - 1) / 2^3 + // chunk_id 3 = 5 i.e (41 - 1) / 2^3 + // chunk_id 4 = 1 i.e (5 - 1) / 2^2 + // chunk_id 5 = 1 on the same layer as 4 + // chunk_id 43 = 41 as chunk 43 should wrap back to the same layer as chunk_id 2 + // chunk_id 44 = mirrors chunk_id 3 + // chunk_id 45 = mirrors chunk_id 4 + // chunk_id 46 = mirrors chunk_id 5 + assert_eq!(number_of_chunks_under_chunk_id(10, 1).unwrap(), 329); + assert_eq!(number_of_chunks_under_chunk_id(10, 2).unwrap(), 41); + assert_eq!(number_of_chunks_under_chunk_id(10, 3).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 4).unwrap(), 1); assert_eq!(number_of_chunks_under_chunk_id(10, 5).unwrap(), 1); - assert_eq!(number_of_chunks_under_chunk_id(10, 6).unwrap(), 1); - assert_eq!(number_of_chunks_under_chunk_id(10, 87).unwrap(), 85); - assert_eq!(number_of_chunks_under_chunk_id(10, 88).unwrap(), 21); - assert_eq!(number_of_chunks_under_chunk_id(10, 89).unwrap(), 5); - assert_eq!(number_of_chunks_under_chunk_id(10, 90).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 43).unwrap(), 41); + assert_eq!(number_of_chunks_under_chunk_id(10, 44).unwrap(), 5); + assert_eq!(number_of_chunks_under_chunk_id(10, 45).unwrap(), 1); + assert_eq!(number_of_chunks_under_chunk_id(10, 46).unwrap(), 1); } #[test] @@ -396,24 +542,26 @@ mod test { fn test_chunk_height() { // tree of height 6 // all chunks have the same height - // since layer height = [2,2,2] - // we have 21 chunks in a tree of this height - for i in 1..=21 { - assert_eq!(chunk_height(6, i).unwrap(), 2); + // since layer height = [3,3] + // we have 9 chunks in a tree of this height + for i in 1..=9 { + assert_eq!(chunk_height(6, i).unwrap(), 3); } // tree of height 5 - // layer_height = [2, 2] - // we also have 21 chunks here - for i in 1..=21 { + // layer_height = [3, 2] + // we have 9 chunks, just the first chunk is of height 3 + // the rest are of height 2 + assert_eq!(chunk_height(5, 1).unwrap(), 3); + for i in 2..=9 { assert_eq!(chunk_height(5, i).unwrap(), 2); } // tree of height 10 - // layer_height = [3, 3, 3, 3] + // layer_height = [3, 3, 2, 2] // just going to check chunk 1 - 5 - assert_eq!(chunk_height(10, 1).unwrap(), 2); - assert_eq!(chunk_height(10, 2).unwrap(), 2); + assert_eq!(chunk_height(10, 1).unwrap(), 3); + assert_eq!(chunk_height(10, 2).unwrap(), 3); assert_eq!(chunk_height(10, 3).unwrap(), 2); assert_eq!(chunk_height(10, 4).unwrap(), 2); assert_eq!(chunk_height(10, 5).unwrap(), 2); @@ -421,12 +569,133 @@ mod test { #[test] fn test_traversal_instruction_as_string() { - assert_eq!(traversal_instruction_as_string(vec![]), ""); - assert_eq!(traversal_instruction_as_string(vec![LEFT]), "1"); - assert_eq!(traversal_instruction_as_string(vec![RIGHT]), "0"); + assert_eq!(traversal_instruction_as_string(&vec![]), ""); + assert_eq!(traversal_instruction_as_string(&vec![LEFT]), "1"); + assert_eq!(traversal_instruction_as_string(&vec![RIGHT]), "0"); assert_eq!( - traversal_instruction_as_string(vec![RIGHT, LEFT, LEFT, RIGHT]), + traversal_instruction_as_string(&vec![RIGHT, LEFT, LEFT, RIGHT]), "0110" ); } + + #[test] + fn test_instruction_string_to_traversal_instruction() { + assert_eq!(string_as_traversal_instruction("1").unwrap(), vec![LEFT]); + assert_eq!(string_as_traversal_instruction("0").unwrap(), vec![RIGHT]); + assert_eq!( + string_as_traversal_instruction("001").unwrap(), + vec![RIGHT, RIGHT, LEFT] + ); + assert_eq!(string_as_traversal_instruction("002").is_err(), true); + assert_eq!(string_as_traversal_instruction("").unwrap(), vec![]); + } + + #[test] + fn test_chunk_id_from_traversal_instruction() { + // tree of height 4 + let traversal_instruction = generate_traversal_instruction(4, 1).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 1 + ); + let traversal_instruction = generate_traversal_instruction(4, 2).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 2 + ); + let traversal_instruction = generate_traversal_instruction(4, 3).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 3 + ); + let traversal_instruction = generate_traversal_instruction(4, 4).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 4).unwrap(), + 4 + ); + + // tree of height 6 + let traversal_instruction = generate_traversal_instruction(6, 1).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 1 + ); + let traversal_instruction = generate_traversal_instruction(6, 2).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 2 + ); + let traversal_instruction = generate_traversal_instruction(6, 3).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 3 + ); + let traversal_instruction = generate_traversal_instruction(6, 4).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 4 + ); + let traversal_instruction = generate_traversal_instruction(6, 5).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 5 + ); + let traversal_instruction = generate_traversal_instruction(6, 6).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 6 + ); + let traversal_instruction = generate_traversal_instruction(6, 7).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 7 + ); + let traversal_instruction = generate_traversal_instruction(6, 8).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 8 + ); + let traversal_instruction = generate_traversal_instruction(6, 9).unwrap(); + assert_eq!( + chunk_id_from_traversal_instruction(traversal_instruction.as_slice(), 6).unwrap(), + 9 + ); + } + + #[test] + fn test_chunk_id_from_traversal_instruction_with_recovery() { + // tree of height 5 + // layer heights = [3, 2] + // first chunk boundary is at instruction len 0 e.g. [] + // second chunk boundary is at instruction len 3 e.g. [left, left, left] + // anything outside of this should return an error with regular chunk_id + // function with recovery we expect this to backtrack to the last chunk + // boundary e.g. [left] should backtrack to [] + // [left, left, right, left] should backtrack to [left, left, right] + assert_eq!( + chunk_id_from_traversal_instruction(&[LEFT], 5).is_err(), + true + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT], 5).unwrap(), + 1 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT], 5).unwrap(), + 1 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT, RIGHT], 5).unwrap(), + 3 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT, LEFT, RIGHT, LEFT], 5) + .unwrap(), + 3 + ); + assert_eq!( + chunk_id_from_traversal_instruction_with_recovery(&[LEFT; 50], 5).unwrap(), + 2 + ); + } } diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index 819fd43b..99e84827 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -43,6 +43,11 @@ use super::{Node, Op}; use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_hash, NULL_HASH}; #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; +use crate::{ + proofs::chunk::chunk::{LEFT, RIGHT}, + Link, + TreeFeatureType::SummedMerk, +}; #[cfg(any(feature = "full", feature = "verify"))] /// Contains a tree's child node and its hash. The hash can always be assumed to @@ -55,6 +60,35 @@ pub struct Child { pub hash: CryptoHash, } +impl Child { + pub fn as_link(&self) -> Link { + let (key, sum) = match &self.tree.node { + Node::KV(key, _) | Node::KVValueHash(key, ..) => (key.as_slice(), None), + Node::KVValueHashFeatureType(key, _, _, feature_type) => { + let sum_value = match feature_type { + SummedMerk(sum) => Some(sum.clone()), + _ => None, + }; + (key.as_slice(), sum_value) + } + // for the connection between the trunk and leaf chunks, we don't + // have the child key so we must first write in an empty one. once + // the leaf gets verified, we can write in this key to its parent + _ => (&[] as &[u8], None), + }; + + Link::Reference { + hash: self.hash, + sum, + child_heights: ( + self.tree.child_heights.0 as u8, + self.tree.child_heights.1 as u8, + ), + key: key.to_vec(), + } + } +} + #[cfg(any(feature = "full", feature = "verify"))] /// A binary tree data structure used to represent a select subset of a tree /// when verifying Merkle proofs. @@ -68,6 +102,8 @@ pub struct Tree { pub right: Option, /// Height pub height: usize, + /// Child Heights + pub child_heights: (usize, usize), } #[cfg(any(feature = "full", feature = "verify"))] @@ -79,6 +115,7 @@ impl From for Tree { left: None, right: None, height: 1, + child_heights: (0, 0), } } } @@ -167,6 +204,42 @@ impl Tree { Ok(()) } + #[cfg(feature = "full")] + /// Does an in-order traversal over references to all the nodes in the tree, + /// calling `visit_node` for each with the current traversal path. + pub fn visit_refs_track_traversal_and_parent< + F: FnMut(&Self, &mut Vec, Option<&[u8]>) -> Result<(), Error>, + >( + &self, + base_traversal_instruction: &mut Vec, + parent_key: Option<&[u8]>, + visit_node: &mut F, + ) -> Result<(), Error> { + if let Some(child) = &self.left { + base_traversal_instruction.push(LEFT); + child.tree.visit_refs_track_traversal_and_parent( + base_traversal_instruction, + Some(self.key()), + visit_node, + )?; + base_traversal_instruction.pop(); + } + + visit_node(self, base_traversal_instruction, parent_key)?; + + if let Some(child) = &self.right { + base_traversal_instruction.push(RIGHT); + child.tree.visit_refs_track_traversal_and_parent( + base_traversal_instruction, + Some(self.key()), + visit_node, + )?; + base_traversal_instruction.pop(); + } + + Ok(()) + } + /// Returns an immutable reference to the child on the given side, if any. #[cfg(any(feature = "full", feature = "verify"))] pub const fn child(&self, left: bool) -> Option<&Child> { @@ -202,6 +275,13 @@ impl Tree { self.height = self.height.max(child.height + 1); + // update child height + if left { + self.child_heights.0 = child.height; + } else { + self.child_heights.1 = child.height; + } + let hash = child.hash().unwrap_add_cost(&mut cost); let tree = Box::new(child); *self.child_mut(left) = Some(Child { tree, hash }); @@ -238,13 +318,24 @@ impl Tree { _ => panic!("Expected node to be type KV"), } } + + #[cfg(feature = "full")] + pub(crate) fn sum(&self) -> Option { + match self.node { + Node::KVValueHashFeatureType(.., feature_type) => match feature_type { + SummedMerk(sum) => Some(sum), + _ => None, + }, + _ => panic!("Expected node to be type KVValueHashFeatureType"), + } + } } #[cfg(feature = "full")] /// `LayerIter` iterates over the nodes in a `Tree` at a given depth. Nodes are /// visited in order. pub struct LayerIter<'a> { - stack: Vec<&'a Tree>, + stack: Vec<(&'a Tree, usize)>, depth: usize, } @@ -257,25 +348,9 @@ impl<'a> LayerIter<'a> { depth, }; - iter.traverse_to_start(tree, depth); + iter.stack.push((tree, 0)); iter } - - /// Builds up the stack by traversing through left children to the desired - /// depth. - fn traverse_to_start(&mut self, tree: &'a Tree, remaining_depth: usize) { - self.stack.push(tree); - - if remaining_depth == 0 { - return; - } - - if let Some(child) = tree.child(true) { - self.traverse_to_start(&child.tree, remaining_depth - 1) - } else { - panic!("Could not traverse to given layer") - } - } } #[cfg(feature = "full")] @@ -283,32 +358,21 @@ impl<'a> Iterator for LayerIter<'a> { type Item = &'a Tree; fn next(&mut self) -> Option { - let item = self.stack.pop(); - let mut popped = item; - - loop { - if self.stack.is_empty() { - return item; - } - - let parent = self.stack.last().unwrap(); - let left_child = parent.child(true).unwrap(); - let right_child = parent.child(false).unwrap(); - - if left_child.tree.as_ref() == popped.unwrap() { - self.stack.push(&right_child.tree); - - while self.stack.len() - 1 < self.depth { - let parent = self.stack.last().unwrap(); - let left_child = parent.child(true).unwrap(); - self.stack.push(&left_child.tree); + while !self.stack.is_empty() { + let (item, item_depth) = self.stack.pop().expect("confirmed not None"); + if item_depth != self.depth { + if let Some(right_child) = item.child(false) { + self.stack.push((&right_child.tree, item_depth + 1)) + } + if let Some(left_child) = item.child(true) { + self.stack.push((&left_child.tree, item_depth + 1)) } - - return item; } else { - popped = self.stack.pop(); + return Some(item); } } + + return None; } } @@ -471,7 +535,19 @@ where .wrap_with_cost(cost); } - Ok(stack.pop().unwrap()).wrap_with_cost(cost) + let tree = stack.pop().unwrap(); + + if tree.child_heights.0.max(tree.child_heights.1) + - tree.child_heights.0.min(tree.child_heights.1) + > 1 + { + return Err(Error::InvalidProofError( + "Expected proof to result in a valid avl tree".to_string(), + )) + .wrap_with_cost(cost); + } + + Ok(tree).wrap_with_cost(cost) } #[cfg(feature = "full")] @@ -555,4 +631,104 @@ mod test { } assert!(iter.next().is_none()); } + + #[test] + fn execute_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + let execution_result = + execute(non_avl_tree_proof.into_iter().map(Ok), false, |_| Ok(())).unwrap(); + assert!(execution_result.is_err()); + } + + #[test] + fn child_to_link() { + let basic_merk_tree = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Child, + ]; + let tree = execute(basic_merk_tree.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + + let left_link = tree.left.as_ref().unwrap().as_link(); + let right_link = tree.right.as_ref().unwrap().as_link(); + + assert_eq!( + left_link, + Link::Reference { + hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + sum: None, + child_heights: (0, 0), + key: vec![1] + } + ); + + assert_eq!( + right_link, + Link::Reference { + hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + sum: None, + child_heights: (0, 0), + key: vec![3] + } + ); + + let sum_merk_tree = vec![ + Op::Push(Node::KVValueHashFeatureType( + vec![1], + vec![1], + [0; 32], + SummedMerk(3), + )), + Op::Push(Node::KVValueHashFeatureType( + vec![2], + vec![2], + [0; 32], + SummedMerk(1), + )), + Op::Parent, + Op::Push(Node::KVValueHashFeatureType( + vec![3], + vec![3], + [0; 32], + SummedMerk(1), + )), + Op::Child, + ]; + let tree = execute(sum_merk_tree.into_iter().map(Ok), false, |_| Ok(())) + .unwrap() + .unwrap(); + + let left_link = tree.left.as_ref().unwrap().as_link(); + let right_link = tree.right.as_ref().unwrap().as_link(); + + assert_eq!( + left_link, + Link::Reference { + hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + sum: Some(3), + child_heights: (0, 0), + key: vec![1] + } + ); + + assert_eq!( + right_link, + Link::Reference { + hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + sum: Some(1), + child_heights: (0, 0), + key: vec![3] + } + ); + } } diff --git a/merk/src/tree/link.rs b/merk/src/tree/link.rs index 56d9f1b0..f1b4b9bd 100644 --- a/merk/src/tree/link.rs +++ b/merk/src/tree/link.rs @@ -46,7 +46,7 @@ use crate::HASH_LENGTH_U32; #[cfg(feature = "full")] /// Represents a reference to a child tree node. Links may or may not contain /// the child's `Tree` instance (storing its key if not). -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Link { /// Represents a child tree node which has been pruned from memory, only /// retaining a reference to it (its key). The child node can always be diff --git a/merk/src/tree/mod.rs b/merk/src/tree/mod.rs index c497b893..e9ac2799 100644 --- a/merk/src/tree/mod.rs +++ b/merk/src/tree/mod.rs @@ -98,7 +98,7 @@ use crate::{error::Error, Error::Overflow}; #[cfg(feature = "full")] /// The fields of the `Tree` type, stored on the heap. -#[derive(Clone, Encode, Decode, Debug)] +#[derive(Clone, Encode, Decode, Debug, PartialEq)] pub struct TreeInner { pub(crate) left: Option, pub(crate) right: Option, @@ -137,7 +137,7 @@ impl Terminated for Box {} /// Trees' inner fields are stored on the heap so that nodes can recursively /// link to each other, and so we can detach nodes from their parents, then /// reattach without allocating or freeing heap memory. -#[derive(Clone)] +#[derive(Clone, PartialEq)] pub struct Tree { pub(crate) inner: Box, pub(crate) old_size_with_parent_to_child_hook: u32, From fcfe5ae5ff8e7d49c996b1d3c06a8acf237e88c9 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Mon, 2 Oct 2023 08:34:57 +0100 Subject: [PATCH 03/30] wip --- grovedb/src/lib.rs | 2 - grovedb/src/replication.rs | 989 ------------------------------------- merk/src/merk/mod.rs | 9 - 3 files changed, 1000 deletions(-) delete mode 100644 grovedb/src/replication.rs diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 23840455..0c3c7413 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -215,8 +215,6 @@ use grovedb_storage::{Storage, StorageContext}; use grovedb_visualize::DebugByteVectors; #[cfg(any(feature = "full", feature = "verify"))] pub use query::{PathQuery, SizedQuery}; -// #[cfg(feature = "full")] -// pub use replication::{BufferedRestorer, Restorer, SiblingsChunkProducer, SubtreeChunkProducer}; #[cfg(any(feature = "full", feature = "verify"))] pub use crate::error::Error; diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs deleted file mode 100644 index e97f7820..00000000 --- a/grovedb/src/replication.rs +++ /dev/null @@ -1,989 +0,0 @@ -// // MIT LICENSE -// // -// // Copyright (c) 2021 Dash Core Group -// // -// // Permission is hereby granted, free of charge, to any -// // person obtaining a copy of this software and associated -// // documentation files (the "Software"), to deal in the -// // Software without restriction, including without -// // limitation the rights to use, copy, modify, merge, -// // publish, distribute, sublicense, and/or sell copies of -// // the Software, and to permit persons to whom the Software -// // is furnished to do so, subject to the following -// // conditions: -// // -// // The above copyright notice and this permission notice -// // shall be included in all copies or substantial portions -// // of the Software. -// // -// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// // DEALINGS IN THE SOFTWARE. -// -// //! Replication -// -// use std::{ -// collections::VecDeque, -// iter::{empty, once}, -// }; -// -// use grovedb_merk::{ -// proofs::{Node, Op}, -// Merk, TreeFeatureType, -// }; -// use grovedb_path::SubtreePath; -// use grovedb_storage::{ -// rocksdb_storage::{PrefixedRocksDbImmediateStorageContext, PrefixedRocksDbStorageContext}, -// Storage, StorageContext, -// }; -// -// use crate::{Element, Error, GroveDb, Hash, Transaction}; -// -// const OPS_PER_CHUNK: usize = 128; -// -// impl GroveDb { -// /// Creates a chunk producer to replicate GroveDb. -// pub fn chunks(&self) -> SubtreeChunkProducer { -// SubtreeChunkProducer::new(self) -// } -// } -// -// /// Subtree chunks producer. -// pub struct SubtreeChunkProducer<'db> { -// grove_db: &'db GroveDb, -// cache: Option>, -// } -// -// struct SubtreeChunkProducerCache<'db> { -// current_merk_path: Vec>, -// current_merk: Merk>, -// // This needed to be an `Option` because it requires a reference on Merk but it's within the -// // same struct and during struct init a referenced Merk would be moved inside a struct, -// // using `Option` this init happens in two steps. -// current_chunk_producer: -// Option>>, -// } -// -// impl<'db> SubtreeChunkProducer<'db> { -// fn new(storage: &'db GroveDb) -> Self { -// SubtreeChunkProducer { -// grove_db: storage, -// cache: None, -// } -// } -// -// /// Chunks in current producer -// pub fn chunks_in_current_producer(&self) -> usize { -// self.cache -// .as_ref() -// .and_then(|c| c.current_chunk_producer.as_ref().map(|p| p.len())) -// .unwrap_or(0) -// } -// -// /// Get chunk -// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> -// where -// P: IntoIterator, -//

::IntoIter: Clone + DoubleEndedIterator, -// { -// let path_iter = path.into_iter(); -// -// if let Some(SubtreeChunkProducerCache { -// current_merk_path, .. -// }) = &self.cache -// { -// if !itertools::equal(current_merk_path, path_iter.clone()) { -// self.cache = None; -// } -// } -// -// if self.cache.is_none() { -// let current_merk = self -// .grove_db -// .open_non_transactional_merk_at_path( -// path_iter.clone().collect::>().as_slice().into(), -// None, -// ) -// .unwrap()?; -// -// if current_merk.root_key().is_none() { -// return Ok(Vec::new()); -// } -// -// self.cache = Some(SubtreeChunkProducerCache { -// current_merk_path: path_iter.map(|p| p.to_vec()).collect(), -// current_merk, -// current_chunk_producer: None, -// }); -// let cache = self.cache.as_mut().expect("exists at this point"); -// cache.current_chunk_producer = Some( -// grovedb_merk::ChunkProducer::new(&cache.current_merk) -// .map_err(|e| Error::CorruptedData(e.to_string()))?, -// ); -// } -// -// self.cache -// .as_mut() -// .expect("must exist at this point") -// .current_chunk_producer -// .as_mut() -// .expect("must exist at this point") -// .chunk(index) -// .map_err(|e| Error::CorruptedData(e.to_string())) -// } -// } -// -// // TODO: make generic over storage_cost context -// type MerkRestorer<'db> = grovedb_merk::Restorer>; -// -// type Path = Vec>; -// -// /// Structure to drive GroveDb restore process. -// pub struct Restorer<'db> { -// current_merk_restorer: Option>, -// current_merk_chunk_index: usize, -// current_merk_path: Path, -// queue: VecDeque<(Path, Vec, Hash, TreeFeatureType)>, -// grove_db: &'db GroveDb, -// tx: &'db Transaction<'db>, -// } -// -// /// Indicates what next piece of information `Restorer` expects or wraps a -// /// successful result. -// #[derive(Debug)] -// pub enum RestorerResponse { -// AwaitNextChunk { path: Vec>, index: usize }, -// Ready, -// } -// -// #[derive(Debug)] -// pub struct RestorerError(String); -// -// impl<'db> Restorer<'db> { -// /// Create a GroveDb restorer using a backing storage_cost and root hash. -// pub fn new( -// grove_db: &'db GroveDb, -// root_hash: Hash, -// tx: &'db Transaction<'db>, -// ) -> Result { -// Ok(Restorer { -// tx, -// current_merk_restorer: Some(MerkRestorer::new( -// Merk::open_base( -// grove_db -// .db -// .get_immediate_storage_context(SubtreePath::empty(), tx) -// .unwrap(), -// false, -// ) -// .unwrap() -// .map_err(|e| RestorerError(e.to_string()))?, -// None, -// root_hash, -// )), -// current_merk_chunk_index: 0, -// current_merk_path: vec![], -// queue: VecDeque::new(), -// grove_db, -// }) -// } -// -// /// Process next chunk and receive instruction on what to do next. -// pub fn process_chunk( -// &mut self, -// chunk_ops: impl IntoIterator, -// ) -> Result { -// if self.current_merk_restorer.is_none() { -// // Last restorer was consumed and no more Merks to process. -// return Ok(RestorerResponse::Ready); -// } -// // First we decode a chunk to take out info about nested trees to add them into -// // todo list. -// let mut ops = Vec::new(); -// for op in chunk_ops { -// ops.push(op); -// match ops.last().expect("just inserted") { -// Op::Push(Node::KVValueHashFeatureType( -// key, -// value_bytes, -// value_hash, -// feature_type, -// )) -// | Op::PushInverted(Node::KVValueHashFeatureType( -// key, -// value_bytes, -// value_hash, -// feature_type, -// )) => { -// if let Element::Tree(root_key, _) | Element::SumTree(root_key, ..) = -// Element::deserialize(value_bytes) -// .map_err(|e| RestorerError(e.to_string()))? -// { -// if root_key.is_none() || self.current_merk_path.last() == Some(key) { -// // We add only subtrees of the current subtree to queue, skipping -// // itself; Also skipping empty Merks. -// continue; -// } -// let mut path = self.current_merk_path.clone(); -// path.push(key.clone()); -// // The value hash is the root tree hash -// self.queue.push_back(( -// path, -// value_bytes.to_owned(), -// *value_hash, -// *feature_type, -// )); -// } -// } -// _ => {} -// } -// } -// -// // Process chunk using Merk's possibilities. -// let remaining = self -// .current_merk_restorer -// .as_mut() -// .expect("restorer exists at this point") -// .process_chunk(ops) -// .map_err(|e| RestorerError(e.to_string()))?; -// -// self.current_merk_chunk_index += 1; -// -// if remaining == 0 { -// // If no more chunks for this Merk required decide if we're done or take a next -// // Merk to process. -// self.current_merk_restorer -// .take() -// .expect("restorer exists at this point") -// .finalize() -// .map_err(|e| RestorerError(e.to_string()))?; -// if let Some((next_path, combining_value, expected_hash, _)) = self.queue.pop_front() { -// // Process next subtree. -// let merk = self -// .grove_db -// .open_merk_for_replication(next_path.as_slice().into(), self.tx) -// .map_err(|e| RestorerError(e.to_string()))?; -// self.current_merk_restorer = Some(MerkRestorer::new( -// merk, -// Some(combining_value), -// expected_hash, -// )); -// self.current_merk_chunk_index = 0; -// self.current_merk_path = next_path; -// -// Ok(RestorerResponse::AwaitNextChunk { -// path: self.current_merk_path.clone(), -// index: self.current_merk_chunk_index, -// }) -// } else { -// Ok(RestorerResponse::Ready) -// } -// } else { -// // Request a chunk at the same path but with incremented index. -// Ok(RestorerResponse::AwaitNextChunk { -// path: self.current_merk_path.clone(), -// index: self.current_merk_chunk_index, -// }) -// } -// } -// } -// -// /// Chunk producer wrapper which uses bigger messages that may include chunks of -// /// requested subtree with its right siblings. -// /// -// /// Because `Restorer` builds GroveDb replica breadth-first way from top to -// /// bottom it makes sense to send a subtree's siblings next instead of its own -// /// subtrees. -// pub struct SiblingsChunkProducer<'db> { -// chunk_producer: SubtreeChunkProducer<'db>, -// } -// -// #[derive(Debug)] -// pub struct GroveChunk { -// subtree_chunks: Vec<(usize, Vec)>, -// } -// -// impl<'db> SiblingsChunkProducer<'db> { -// /// New -// pub fn new(chunk_producer: SubtreeChunkProducer<'db>) -> Self { -// SiblingsChunkProducer { chunk_producer } -// } -// -// /// Get a collection of chunks possibly from different Merks with the first -// /// one as requested. -// pub fn get_chunk<'p, P>(&mut self, path: P, index: usize) -> Result, Error> -// where -// P: IntoIterator, -//

::IntoIter: Clone + DoubleEndedIterator + ExactSizeIterator, -// { -// let path_iter = path.into_iter(); -// let mut result = Vec::new(); -// let mut ops_count = 0; -// -// if path_iter.len() == 0 { -// // We're at the root of GroveDb, no siblings here. -// self.process_subtree_chunks(&mut result, &mut ops_count, empty(), index)?; -// return Ok(result); -// }; -// -// // Get siblings on the right to send chunks of multiple Merks if it meets the -// // limit. -// -// let mut siblings_keys: VecDeque> = VecDeque::new(); -// -// let mut parent_path = path_iter; -// let requested_key = parent_path.next_back(); -// -// let parent_ctx = self -// .chunk_producer -// .grove_db -// .db -// .get_storage_context( -// parent_path.clone().collect::>().as_slice().into(), -// None, -// ) -// .unwrap(); -// let mut siblings_iter = Element::iterator(parent_ctx.raw_iter()).unwrap(); -// -// if let Some(key) = requested_key { -// siblings_iter.fast_forward(key)?; -// } -// -// while let Some(element) = siblings_iter.next_element().unwrap()? { -// if let (key, Element::Tree(..)) | (key, Element::SumTree(..)) = element { -// siblings_keys.push_back(key); -// } -// } -// -// let mut current_index = index; -// // Process each subtree -// while let Some(subtree_key) = siblings_keys.pop_front() { -// #[allow(clippy::map_identity)] -// let subtree_path = parent_path -// .clone() -// .map(|x| x) -// .chain(once(subtree_key.as_slice())); -// -// self.process_subtree_chunks(&mut result, &mut ops_count, subtree_path, current_index)?; -// // Going to a next sibling, should start from 0. -// -// if ops_count >= OPS_PER_CHUNK { -// break; -// } -// current_index = 0; -// } -// -// Ok(result) -// } -// -// /// Process one subtree's chunks -// fn process_subtree_chunks<'p, P>( -// &mut self, -// result: &mut Vec, -// ops_count: &mut usize, -// subtree_path: P, -// from_index: usize, -// ) -> Result<(), Error> -// where -// P: IntoIterator, -//

::IntoIter: Clone + DoubleEndedIterator, -// { -// let path_iter = subtree_path.into_iter(); -// -// let mut current_index = from_index; -// let mut subtree_chunks = Vec::new(); -// -// loop { -// let ops = self -// .chunk_producer -// .get_chunk(path_iter.clone(), current_index)?; -// -// *ops_count += ops.len(); -// subtree_chunks.push((current_index, ops)); -// current_index += 1; -// if current_index >= self.chunk_producer.chunks_in_current_producer() -// || *ops_count >= OPS_PER_CHUNK -// { -// break; -// } -// } -// -// result.push(GroveChunk { subtree_chunks }); -// -// Ok(()) -// } -// } -// -// /// `Restorer` wrapper that applies multiple chunks at once and eventually -// /// returns less requests. It is named by analogy with IO types that do less -// /// syscalls. -// pub struct BufferedRestorer<'db> { -// restorer: Restorer<'db>, -// } -// -// impl<'db> BufferedRestorer<'db> { -// /// New -// pub fn new(restorer: Restorer<'db>) -> Self { -// BufferedRestorer { restorer } -// } -// -// /// Process next chunk and receive instruction on what to do next. -// pub fn process_grove_chunks(&mut self, chunks: I) -> Result -// where -// I: IntoIterator + ExactSizeIterator, -// { -// let mut response = RestorerResponse::Ready; -// -// for c in chunks.into_iter() { -// for ops in c.subtree_chunks.into_iter().map(|x| x.1) { -// if !ops.is_empty() { -// response = self.restorer.process_chunk(ops)?; -// } -// } -// } -// -// Ok(response) -// } -// } -// -// // #[cfg(test)] -// // mod test { -// // use rand::RngCore; -// // use tempfile::TempDir; -// // -// // use super::*; -// // use crate::{ -// // batch::GroveDbOp, -// // reference_path::ReferencePathType, -// // tests::{common::EMPTY_PATH, make_test_grovedb, TempGroveDb, -// // ANOTHER_TEST_LEAF, TEST_LEAF}, }; -// // -// // fn replicate(original_db: &GroveDb) -> TempDir { -// // let replica_tempdir = TempDir::new().unwrap(); -// // -// // { -// // let replica_db = GroveDb::open(replica_tempdir.path()).unwrap(); -// // let mut chunk_producer = original_db.chunks(); -// // let tx = replica_db.start_transaction(); -// // -// // let mut restorer = Restorer::new( -// // &replica_db, -// // original_db.root_hash(None).unwrap().unwrap(), -// // &tx, -// // ) -// // .expect("cannot create restorer"); -// // -// // That means root tree chunk with index 0 -// // let mut next_chunk: (Vec>, usize) = (vec![], 0); -// // -// // loop { -// // let chunk = chunk_producer -// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) -// // .expect("cannot get next chunk"); -// // match restorer.process_chunk(chunk).expect("cannot process chunk") { -// // RestorerResponse::Ready => break, -// // RestorerResponse::AwaitNextChunk { path, index } => { -// // next_chunk = (path, index); -// // } -// // } -// // } -// // -// // replica_db.commit_transaction(tx).unwrap().unwrap(); -// // } -// // replica_tempdir -// // } -// // -// // fn replicate_bigger_messages(original_db: &GroveDb) -> TempDir { -// // let replica_tempdir = TempDir::new().unwrap(); -// // -// // { -// // let replica_grove_db = GroveDb::open(replica_tempdir.path()).unwrap(); -// // let mut chunk_producer = SiblingsChunkProducer::new(original_db.chunks()); -// // let tx = replica_grove_db.start_transaction(); -// // -// // let mut restorer = BufferedRestorer::new( -// // Restorer::new( -// // &replica_grove_db, -// // original_db.root_hash(None).unwrap().unwrap(), -// // &tx, -// // ) -// // .expect("cannot create restorer"), -// // ); -// // -// // That means root tree chunk with index 0 -// // let mut next_chunk: (Vec>, usize) = (vec![], 0); -// // -// // loop { -// // let chunks = chunk_producer -// // .get_chunk(next_chunk.0.iter().map(|x| x.as_slice()), next_chunk.1) -// // .expect("cannot get next chunk"); -// // match restorer -// // .process_grove_chunks(chunks.into_iter()) -// // .expect("cannot process chunk") -// // { -// // RestorerResponse::Ready => break, -// // RestorerResponse::AwaitNextChunk { path, index } => { -// // next_chunk = (path, index); -// // } -// // } -// // } -// // -// // replica_grove_db.commit_transaction(tx).unwrap().unwrap(); -// // } -// // -// // replica_tempdir -// // } -// // -// // fn test_replication_internal<'a, I, R, F>( -// // original_db: &TempGroveDb, -// // to_compare: I, -// // replicate_fn: F, -// // ) where -// // R: AsRef<[u8]> + 'a, -// // I: Iterator, -// // F: Fn(&GroveDb) -> TempDir, -// // { -// // let expected_root_hash = original_db.root_hash(None).unwrap().unwrap(); -// // -// // let replica_tempdir = replicate_fn(original_db); -// // -// // let replica = GroveDb::open(replica_tempdir.path()).unwrap(); -// // assert_eq!( -// // replica.root_hash(None).unwrap().unwrap(), -// // expected_root_hash -// // ); -// // -// // for full_path in to_compare { -// // let (key, path) = full_path.split_last().unwrap(); -// // assert_eq!( -// // original_db.get(path, key.as_ref(), None).unwrap().unwrap(), -// // replica.get(path, key.as_ref(), None).unwrap().unwrap() -// // ); -// // } -// // } -// // -// // fn test_replication<'a, I, R>(original_db: &TempGroveDb, to_compare: I) -// // where -// // R: AsRef<[u8]> + 'a, -// // I: Iterator + Clone, -// // { -// // test_replication_internal(original_db, to_compare.clone(), replicate); -// // test_replication_internal(original_db, to_compare, -// // replicate_bigger_messages); } -// // -// // #[test] -// // fn replicate_wrong_root_hash() { -// // let db = make_test_grovedb(); -// // let mut bad_hash = db.root_hash(None).unwrap().unwrap(); -// // bad_hash[0] = bad_hash[0].wrapping_add(1); -// // -// // let tmp_dir = TempDir::new().unwrap(); -// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); -// // let tx = restored_db.start_transaction(); -// // let mut restorer = Restorer::new(&restored_db, bad_hash, &tx).unwrap(); -// // let mut chunks = db.chunks(); -// // assert!(restorer -// // .process_chunk(chunks.get_chunk([], 0).unwrap()) -// // .is_err()); -// // } -// // -// // #[test] -// // fn replicate_provide_wrong_tree() { -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let expected_hash = db.root_hash(None).unwrap().unwrap(); -// // -// // let tmp_dir = TempDir::new().unwrap(); -// // let restored_db = GroveDb::open(tmp_dir.path()).unwrap(); -// // let tx = restored_db.start_transaction(); -// // let mut restorer = Restorer::new(&restored_db, expected_hash, &tx).unwrap(); -// // let mut chunks = db.chunks(); -// // -// // let next_op = restorer -// // .process_chunk(chunks.get_chunk([], 0).unwrap()) -// // .unwrap(); -// // match next_op { -// // RestorerResponse::AwaitNextChunk { path, index } => { -// // Feed restorer a wrong Merk! -// // let chunk = if path == [TEST_LEAF] { -// // chunks.get_chunk([ANOTHER_TEST_LEAF], index).unwrap() -// // } else { -// // chunks.get_chunk([TEST_LEAF], index).unwrap() -// // }; -// // assert!(restorer.process_chunk(chunk).is_err()); -// // } -// // _ => {} -// // } -// // } -// // -// // #[test] -// // fn replicate_nested_grovedb() { -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF], -// // b"key2", -// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". -// // to_vec())), None, -// // None, -// // ) -// // .unwrap() -// // .expect("should insert reference"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::empty_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2"], -// // b"key3", -// // Element::empty_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], -// // b"key4", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let to_compare = [ -// // [TEST_LEAF].as_ref(), -// // [TEST_LEAF, b"key1"].as_ref(), -// // [TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), -// // ]; -// // test_replication(&db, to_compare.into_iter()); -// // } -// // -// // #[test] -// // fn replicate_nested_grovedb_with_sum_trees() { -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF], -// // b"key2", -// // Element::new_reference(ReferencePathType::SiblingReference(b"key1". -// // to_vec())), None, -// // None, -// // ) -// // .unwrap() -// // .expect("should insert reference"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::empty_sum_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2"], -// // b"sumitem", -// // Element::new_sum_item(15), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2"], -// // b"key3", -// // Element::empty_tree(), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[ANOTHER_TEST_LEAF, b"key2", b"key3"], -// // b"key4", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let to_compare = [ -// // [TEST_LEAF].as_ref(), -// // [TEST_LEAF, b"key1"].as_ref(), -// // [TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"sumitem"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3"].as_ref(), -// // [ANOTHER_TEST_LEAF, b"key2", b"key3", b"key4"].as_ref(), -// // ]; -// // test_replication(&db, to_compare.into_iter()); -// // } -// // -// // TODO: Highlights a bug in replication -// // #[test] -// // fn replicate_grovedb_with_sum_tree() { -// // let db = make_test_grovedb(); -// // db.insert(&[TEST_LEAF], b"key1", Element::empty_tree(), None, None) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF, b"key1"], -// // b"key2", -// // Element::new_item(vec![4]), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // db.insert( -// // &[TEST_LEAF, b"key1"], -// // b"key3", -// // Element::new_item(vec![10]), -// // None, -// // None, -// // ) -// // .unwrap() -// // .expect("cannot insert an element"); -// // -// // let to_compare = [ -// // [TEST_LEAF].as_ref(), -// // [ANOTHER_TEST_LEAF].as_ref(), -// // [TEST_LEAF, b"key1"].as_ref(), -// // [TEST_LEAF, b"key1", b"key2"].as_ref(), -// // [TEST_LEAF, b"key1", b"key3"].as_ref(), -// // ]; -// // test_replication(&db, to_compare.into_iter()); -// // } -// // -// // #[test] -// // fn replicate_a_big_one() { -// // const HEIGHT: usize = 3; -// // const SUBTREES_FOR_EACH: usize = 3; -// // const SCALARS_FOR_EACH: usize = 600; -// // -// // let db = make_test_grovedb(); -// // let mut to_compare = Vec::new(); -// // -// // let mut rng = rand::thread_rng(); -// // let mut subtrees: VecDeque> = VecDeque::new(); -// // -// // Generate root tree leafs -// // for _ in 0..SUBTREES_FOR_EACH { -// // let mut bytes = [0; 8]; -// // rng.fill_bytes(&mut bytes); -// // db.insert(EMPTY_PATH, &bytes, Element::empty_tree(), None, None) -// // .unwrap() -// // .unwrap(); -// // subtrees.push_front(vec![bytes]); -// // to_compare.push(vec![bytes]); -// // } -// // -// // while let Some(path) = subtrees.pop_front() { -// // let mut batch = Vec::new(); -// // -// // if path.len() < HEIGHT { -// // for _ in 0..SUBTREES_FOR_EACH { -// // let mut bytes = [0; 8]; -// // rng.fill_bytes(&mut bytes); -// // -// // batch.push(GroveDbOp::insert_op( -// // path.iter().map(|x| x.to_vec()).collect(), -// // bytes.to_vec(), -// // Element::empty_tree(), -// // )); -// // -// // let mut new_path = path.clone(); -// // new_path.push(bytes); -// // subtrees.push_front(new_path.clone()); -// // to_compare.push(new_path.clone()); -// // } -// // } -// // -// // for _ in 0..SCALARS_FOR_EACH { -// // let mut bytes = [0; 8]; -// // let mut bytes_val = vec![]; -// // rng.fill_bytes(&mut bytes); -// // rng.fill_bytes(&mut bytes_val); -// // -// // batch.push(GroveDbOp::insert_op( -// // path.iter().map(|x| x.to_vec()).collect(), -// // bytes.to_vec(), -// // Element::new_item(bytes_val), -// // )); -// // -// // let mut new_path = path.clone(); -// // new_path.push(bytes); -// // to_compare.push(new_path.clone()); -// // } -// // -// // db.apply_batch(batch, None, None).unwrap().unwrap(); -// // } -// // -// // test_replication(&db, to_compare.iter().map(|x| x.as_slice())); -// // } -// // -// // #[test] -// // fn replicate_from_checkpoint() { -// // Create a simple GroveDb first -// // let db = make_test_grovedb(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key1", -// // Element::new_item(b"ayya".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::new_item(b"ayyb".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // -// // Save its state with checkpoint -// // let checkpoint_dir_parent = TempDir::new().unwrap(); -// // let checkpoint_dir = checkpoint_dir_parent.path().join("cp"); -// // db.create_checkpoint(&checkpoint_dir).unwrap(); -// // -// // Alter the db to make difference between current state and checkpoint -// // db.delete(&[TEST_LEAF], b"key1", None, None) -// // .unwrap() -// // .unwrap(); -// // db.insert( -// // &[TEST_LEAF], -// // b"key3", -// // Element::new_item(b"ayyd".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // db.insert( -// // &[ANOTHER_TEST_LEAF], -// // b"key2", -// // Element::new_item(b"ayyc".to_vec()), -// // None, -// // None, -// // ) -// // .unwrap() -// // .unwrap(); -// // -// // let checkpoint_db = GroveDb::open(&checkpoint_dir).unwrap(); -// // -// // Ensure checkpoint differs from current state -// // assert_ne!( -// // checkpoint_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // db.get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // ); -// // -// // Build a replica from checkpoint -// // let replica_dir = replicate(&checkpoint_db); -// // let replica_db = GroveDb::open(&replica_dir).unwrap(); -// // -// // assert_eq!( -// // checkpoint_db.root_hash(None).unwrap().unwrap(), -// // replica_db.root_hash(None).unwrap().unwrap() -// // ); -// // -// // assert_eq!( -// // checkpoint_db -// // .get(&[TEST_LEAF], b"key1", None) -// // .unwrap() -// // .unwrap(), -// // replica_db -// // .get(&[TEST_LEAF], b"key1", None) -// // .unwrap() -// // .unwrap(), -// // ); -// // assert_eq!( -// // checkpoint_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // replica_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // ); -// // assert!(matches!( -// // replica_db.get(&[TEST_LEAF], b"key3", None).unwrap(), -// // Err(Error::PathKeyNotFound(_)) -// // )); -// // -// // Drop original db and checkpoint dir too to ensure there is no dependency -// // drop(db); -// // drop(checkpoint_db); -// // drop(checkpoint_dir); -// // -// // assert_eq!( -// // replica_db -// // .get(&[ANOTHER_TEST_LEAF], b"key2", None) -// // .unwrap() -// // .unwrap(), -// // Element::new_item(b"ayyb".to_vec()) -// // ); -// // } -// // } diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 56308a64..4ddf64a2 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -621,15 +621,6 @@ where }) } - // TODO: remove this - // /// Returns a clone of the Tree instance in Merk - // pub fn get_root_tree(&self) -> Option { - // self.use_tree(|tree| match tree { - // None => None, - // Some(tree) => Some(tree.clone()), - // }) - // } - /// Returns the root non-prefixed key of the tree. If the tree is empty, /// None. pub fn root_key(&self) -> Option> { From cf7963f5b03ff27df54e0d5deb15ef952d6d9668 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 08:25:18 +0100 Subject: [PATCH 04/30] wip --- grovedb/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 0c3c7413..6ca3bf80 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -159,8 +159,6 @@ mod query; pub mod query_result_type; #[cfg(any(feature = "full", feature = "verify"))] pub mod reference_path; -#[cfg(feature = "full")] -mod replication; #[cfg(all(test, feature = "full"))] mod tests; #[cfg(feature = "full")] From 467e1215c6f23c17f58d9aad5f442d25bf50dbdc Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 08:27:26 +0100 Subject: [PATCH 05/30] wip --- merk/src/lib.rs | 2 +- merk/src/merk/{chunks.rs => chunks2.rs} | 2 +- merk/src/merk/mod.rs | 6 +++--- merk/src/merk/{restore.rs => restore2.rs} | 6 +++--- merk/src/proofs/chunk.rs | 2 +- merk/src/proofs/chunk/{chunk.rs => chunk2.rs} | 2 +- merk/src/proofs/chunk/chunk_op.rs | 2 +- merk/src/proofs/chunk/util.rs | 4 ++-- merk/src/proofs/tree.rs | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) rename merk/src/merk/{chunks.rs => chunks2.rs} (99%) rename merk/src/merk/{restore.rs => restore2.rs} (99%) rename merk/src/proofs/chunk/{chunk.rs => chunk2.rs} (99%) diff --git a/merk/src/lib.rs b/merk/src/lib.rs index 5a858dfc..5b82876f 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -38,7 +38,7 @@ extern crate core; mod merk; #[cfg(feature = "full")] -pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions}; +pub use crate::merk::{chunks2::ChunkProducer, options::MerkOptions}; /// Provides a container type that allows temporarily taking ownership of a /// value. diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks2.rs similarity index 99% rename from merk/src/merk/chunks.rs rename to merk/src/merk/chunks2.rs index 51521ced..84d01e3e 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks2.rs @@ -432,7 +432,7 @@ mod test { use super::*; use crate::{ proofs::{ - chunk::chunk::{ + chunk::chunk2::{ tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, LEFT, RIGHT, }, diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 4ddf64a2..a92bc2a8 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -28,10 +28,10 @@ //! Merk -pub mod chunks; +pub mod chunks2; pub(crate) mod defaults; pub mod options; -pub mod restore; +pub mod restore2; use std::{ cell::Cell, @@ -59,7 +59,7 @@ use crate::{ }, proofs::{ chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, util::traversal_instruction_as_string, }, encode_into, diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore2.rs similarity index 99% rename from merk/src/merk/restore.rs rename to merk/src/merk/restore2.rs index 0b866cac..d82d3e40 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore2.rs @@ -38,7 +38,7 @@ use crate::{ merk::MerkSource, proofs::{ chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, chunk_op::ChunkOp, error::{ChunkError, ChunkError::InternalError}, util::{ @@ -475,10 +475,10 @@ mod tests { use super::*; use crate::{ execute_proof, - merk::chunks::ChunkProducer, + merk::chunks2::ChunkProducer, proofs::{ chunk::{ - chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, + chunk2::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, error::ChunkError::InvalidChunkProof, }, Query, diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 28114876..3768559d 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -53,7 +53,7 @@ use crate::{ mod binary_range; #[cfg(feature = "full")] -pub mod chunk; +pub mod chunk2; pub mod chunk_op; pub mod error; #[cfg(feature = "full")] diff --git a/merk/src/proofs/chunk/chunk.rs b/merk/src/proofs/chunk/chunk2.rs similarity index 99% rename from merk/src/proofs/chunk/chunk.rs rename to merk/src/proofs/chunk/chunk2.rs index 95c686b7..4b8a0548 100644 --- a/merk/src/proofs/chunk/chunk.rs +++ b/merk/src/proofs/chunk/chunk2.rs @@ -198,7 +198,7 @@ pub mod tests { use crate::{ proofs::{ - chunk::chunk::{verify_height_proof, LEFT, RIGHT}, + chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, Op::Parent, diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 9402d3d5..535af055 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -126,7 +126,7 @@ mod test { use crate::proofs::{ chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, chunk_op::ChunkOp, }, Node, Op, diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index 3e430acf..2ba21ee0 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -35,7 +35,7 @@ use std::io::Write; use crate::{proofs::chunk::binary_range::BinaryRange, Error}; use crate::{ proofs::chunk::{ - chunk::{LEFT, RIGHT}, + chunk2::{LEFT, RIGHT}, error::{ChunkError, ChunkError::BadTraversalInstruction}, }, Error::InternalError, @@ -372,7 +372,7 @@ mod test { use byteorder::LE; use super::*; - use crate::proofs::chunk::chunk::{LEFT, RIGHT}; + use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; #[test] fn test_chunk_height_per_layer() { diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index 99e84827..528288d5 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -44,7 +44,7 @@ use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_ #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; use crate::{ - proofs::chunk::chunk::{LEFT, RIGHT}, + proofs::chunk::chunk2::{LEFT, RIGHT}, Link, TreeFeatureType::SummedMerk, }; From b59e63aad69c8cda445af55da4014d2d06845122 Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 09:27:57 +0100 Subject: [PATCH 06/30] rename job --- merk/src/lib.rs | 2 +- merk/src/merk/{chunks2.rs => chunks.rs} | 2 +- merk/src/merk/mod.rs | 5 +- merk/src/merk/restore.rs | 1248 ++++++++++++++++ merk/src/merk/restore2.rs | 1249 ----------------- merk/src/proofs/chunk.rs | 2 +- merk/src/proofs/chunk/{chunk2.rs => chunk.rs} | 2 +- merk/src/proofs/chunk/chunk_op.rs | 2 +- merk/src/proofs/chunk/util.rs | 4 +- merk/src/proofs/tree.rs | 2 +- 10 files changed, 1258 insertions(+), 1260 deletions(-) rename merk/src/merk/{chunks2.rs => chunks.rs} (99%) delete mode 100644 merk/src/merk/restore2.rs rename merk/src/proofs/chunk/{chunk2.rs => chunk.rs} (99%) diff --git a/merk/src/lib.rs b/merk/src/lib.rs index e7e8bc23..adfde559 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -38,7 +38,7 @@ extern crate core; mod merk; #[cfg(feature = "full")] -pub use crate::merk::{chunks2::ChunkProducer, options::MerkOptions}; +pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions}; /// Provides a container type that allows temporarily taking ownership of a /// value. diff --git a/merk/src/merk/chunks2.rs b/merk/src/merk/chunks.rs similarity index 99% rename from merk/src/merk/chunks2.rs rename to merk/src/merk/chunks.rs index 84d01e3e..51521ced 100644 --- a/merk/src/merk/chunks2.rs +++ b/merk/src/merk/chunks.rs @@ -432,7 +432,7 @@ mod test { use super::*; use crate::{ proofs::{ - chunk::chunk2::{ + chunk::chunk::{ tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, LEFT, RIGHT, }, diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index e8a2a073..52e28ba0 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -28,11 +28,10 @@ //! Merk -pub mod chunks2; +pub mod chunks; pub(crate) mod defaults; pub mod options; -pub mod restore2; pub mod apply; pub mod clear; @@ -67,7 +66,7 @@ use crate::{ }, proofs::{ chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, util::traversal_instruction_as_string, }, encode_into, diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 8b137891..a4c747d1 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -1 +1,1249 @@ +// MIT LICENSE +// +// Copyright (c) 2021 Dash Core Group +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +//! Provides `Restorer`, which can create a replica of a Merk instance by +//! receiving chunk proofs. + +use std::collections::BTreeMap; + +use grovedb_storage::{Batch, StorageContext}; + +use crate::{ + merk, + merk::MerkSource, + proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + chunk_op::ChunkOp, + error::{ChunkError, ChunkError::InternalError}, + util::{ + string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + }, + }, + tree::{execute, Child, Tree as ProofTree}, + Node, Op, + }, + tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, + CryptoHash, Error, + Error::{CostsError, EdError, StorageError}, + Link, Merk, + TreeFeatureType::{BasicMerkNode, SummedMerkNode}, +}; + +/// Restorer handles verification of chunks and replication of Merk trees. +/// Chunks can be processed randomly as long as their parent has been processed +/// already. +pub struct Restorer { + merk: Merk, + chunk_id_to_root_hash: BTreeMap, + // this is used to keep track of parents whose links need to be rewritten + parent_keys: BTreeMap>, +} + +impl<'db, S: StorageContext<'db>> Restorer { + /// Initializes a new chunk restorer with the expected root hash for the + /// first chunk + pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { + let mut chunk_id_to_root_hash = BTreeMap::new(); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); + + Self { + merk, + chunk_id_to_root_hash, + parent_keys: BTreeMap::new(), + } + } + + // TODO: consider converting chunk id to a vec + /// Processes a chunk at some chunk id, returns the chunks id's of chunks + /// that can be requested + pub fn process_chunk( + &mut self, + chunk_id: String, + chunk: Vec, + ) -> Result, Error> { + let expected_root_hash = self + .chunk_id_to_root_hash + .get(&chunk_id) + .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; + + let chunk_tree = Self::verify_chunk(chunk, expected_root_hash)?; + + let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; + + if root_traversal_instruction.is_empty() { + self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); + } else { + // every non root chunk has some associated parent with an placeholder link + // here we update the placeholder link to represent the true data + self.rewrite_parent_link(&chunk_id, &root_traversal_instruction, &chunk_tree)?; + } + + // next up, we need to write the chunk and build the map again + let chunk_write_result = self.write_chunk(chunk_tree, &mut root_traversal_instruction); + if chunk_write_result.is_ok() { + // if we were able to successfully write the chunk, we can remove + // the chunk expected root hash from our chunk id map + self.chunk_id_to_root_hash.remove(&chunk_id); + } + + chunk_write_result + } + + /// Process multi chunks (space optimized chunk proofs that can contain + /// multiple singluar chunks) + pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { + let mut expect_chunk_id = true; + let mut chunk_ids = vec![]; + let mut current_chunk_id: String = "".to_string(); + + for chunk_op in multi_chunk { + if (matches!(chunk_op, ChunkOp::ChunkId(..)) && !expect_chunk_id) + || (matches!(chunk_op, ChunkOp::Chunk(..)) && expect_chunk_id) + { + return Err(Error::ChunkRestoringError(ChunkError::InvalidMultiChunk( + "invalid multi chunk ordering", + ))); + } + match chunk_op { + ChunkOp::ChunkId(instructions) => { + current_chunk_id = traversal_instruction_as_string(&instructions); + } + ChunkOp::Chunk(chunk) => { + // TODO: remove clone + let next_chunk_ids = self.process_chunk(current_chunk_id.clone(), chunk)?; + chunk_ids.extend(next_chunk_ids); + } + } + expect_chunk_id = !expect_chunk_id; + } + Ok(chunk_ids) + } + + /// Verifies the structure of a chunk and ensures the chunk matches the + /// expected root hash + fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash) -> Result { + let chunk_len = chunk.len(); + let mut kv_count = 0; + let mut hash_count = 0; + + // build tree from ops + // ensure only made of KvValueFeatureType and Hash nodes and count them + let tree = execute(chunk.clone().into_iter().map(Ok), false, |node| { + if matches!(node, Node::KVValueHashFeatureType(..)) { + kv_count += 1; + Ok(()) + } else if matches!(node, Node::Hash(..)) { + hash_count += 1; + Ok(()) + } else { + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + } + }) + .unwrap()?; + + // chunk len must be exactly equal to the kv_count + hash_count + + // parent_branch_count + child_branch_count + debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); + + // chunk structure verified, next verify root hash + if &tree.hash().unwrap() != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", + ))); + } + + Ok(tree) + } + + /// Write the verified chunk to storage + fn write_chunk( + &mut self, + chunk_tree: ProofTree, + traversal_instruction: &mut Vec, + ) -> Result, Error> { + // this contains all the elements we want to write to storage + let mut batch = self.merk.storage.new_batch(); + let mut new_chunk_ids = Vec::new(); + + chunk_tree.visit_refs_track_traversal_and_parent( + traversal_instruction, + None, + &mut |proof_node, node_traversal_instruction, parent_key| { + match &proof_node.node { + Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => { + // build tree from node value + let mut tree = TreeNode::new_with_value_hash( + key.clone(), + value.clone(), + value_hash.clone(), + *feature_type, + ) + .unwrap(); + + // update tree links + *tree.slot_mut(LEFT) = proof_node.left.as_ref().map(Child::as_link); + *tree.slot_mut(RIGHT) = proof_node.right.as_ref().map(Child::as_link); + + // encode the node and add it to the batch + let bytes = tree.encode(); + + batch.put(key, &bytes, None, None).map_err(CostsError) + } + Node::Hash(hash) => { + // the node hash points to the root of another chunk + // we get the chunk id and add the hash to restorer state + let chunk_id = traversal_instruction_as_string(node_traversal_instruction); + new_chunk_ids.push(chunk_id.clone()); + self.chunk_id_to_root_hash + .insert(chunk_id.clone(), hash.clone()); + // TODO: handle unwrap + self.parent_keys + .insert(chunk_id, parent_key.unwrap().to_owned()); + Ok(()) + } + _ => { + // we do nothing for other node types + // technically verify chunk will be called before this + // as such this should be be reached + Ok(()) + } + } + }, + )?; + + // write the batch + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError)?; + + Ok(new_chunk_ids) + } + + /// When we process truncated chunks, the parents of Node::Hash have invalid + /// placeholder for links. + /// When we get the actual chunk associated with the Node::Hash, + /// we need to update the parent link to reflect the correct data. + fn rewrite_parent_link( + &mut self, + chunk_id: &str, + traversal_instruction: &[bool], + chunk_tree: &ProofTree, + ) -> Result<(), Error> { + let parent_key = self + .parent_keys + .get(chunk_id) + .ok_or(Error::ChunkRestoringError(InternalError( + "after successful chunk verification parent key should exist", + )))?; + + let mut parent = merk::fetch_node( + &self.merk.storage, + parent_key.as_slice(), + None::<&fn(&[u8]) -> Option>, + )? + .ok_or(Error::ChunkRestoringError(InternalError( + "cannot find expected parent in memory, most likely state corruption issue", + )))?; + + let is_left = traversal_instruction + .last() + .expect("rewrite is only called when traversal_instruction is not empty"); + + let updated_key = chunk_tree.key(); + let updated_sum = chunk_tree.sum(); + + if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { + *key = updated_key.to_vec(); + *sum = updated_sum; + } + + let parent_bytes = parent.encode(); + self.merk + .storage + .put(parent_key, &parent_bytes, None, None) + .unwrap() + .map_err(StorageError)?; + + self.parent_keys + .remove(chunk_id) + .expect("confirmed parent key exists above"); + + Ok(()) + } + + /// Each nodes height is not added to state as such the producer could lie + /// about the height values after replication we need to verify the + /// heights and if invalid recompute the correct values + fn rewrite_heights(&mut self) -> Result<(), Error> { + fn rewrite_child_heights<'s, 'db, S: StorageContext<'db>>( + mut walker: RefWalker>, + batch: &mut >::Batch, + ) -> Result<(u8, u8), Error> { + // TODO: remove unwrap + let mut cloned_node = TreeNode::decode( + walker.tree().key().to_vec(), + walker.tree().encode().as_slice(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap(); + + let mut left_height = 0; + let mut right_height = 0; + + if let Some(left_walker) = walker + .walk(LEFT, None::<&fn(&[u8]) -> Option>) + .unwrap()? + { + let left_child_heights = rewrite_child_heights(left_walker, batch)?; + left_height = left_child_heights.0.max(left_child_heights.1) + 1; + *cloned_node.link_mut(LEFT).unwrap().child_heights_mut() = left_child_heights; + } + + if let Some(right_walker) = walker + .walk(RIGHT, None::<&fn(&[u8]) -> Option>) + .unwrap()? + { + let right_child_heights = rewrite_child_heights(right_walker, batch)?; + right_height = right_child_heights.0.max(right_child_heights.1) + 1; + *cloned_node.link_mut(RIGHT).unwrap().child_heights_mut() = right_child_heights; + } + + let bytes = cloned_node.encode(); + batch + .put(walker.tree().key(), &bytes, None, None) + .map_err(CostsError)?; + + return Ok((left_height, right_height)); + } + + let mut batch = self.merk.storage.new_batch(); + // TODO: deal with unwrap + let mut tree = self.merk.tree.take().unwrap(); + let mut walker = RefWalker::new(&mut tree, self.merk.source()); + + rewrite_child_heights(walker, &mut batch)?; + + self.merk.tree.set(Some(tree)); + + self.merk + .storage + .commit_batch(batch) + .unwrap() + .map_err(StorageError) + } + + /// Rebuild restoration state from partial storage state + fn attempt_state_recovery(&mut self) -> Result<(), Error> { + // TODO: think about the return type some more + let (bad_link_map, parent_keys) = self.merk.verify(); + if !bad_link_map.is_empty() { + self.chunk_id_to_root_hash = bad_link_map; + self.parent_keys = parent_keys; + } + + Ok(()) + } + + /// Consumes the `Restorer` and returns a newly created, fully populated + /// Merk instance. This method will return an error if called before + /// processing all chunks. + pub fn finalize(mut self) -> Result, Error> { + // ensure all chunks have been processed + if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { + return Err(Error::ChunkRestoringError( + ChunkError::RestorationNotComplete, + )); + } + + // get the latest version of the root node + self.merk + .load_base_root(None::<&fn(&[u8]) -> Option>); + + // if height values are wrong, rewrite height + if self.verify_height().is_err() { + self.rewrite_heights(); + // update the root node after height rewrite + self.merk + .load_base_root(None::<&fn(&[u8]) -> Option>); + } + + if self.merk.verify().0.len() != 0 { + return Err(Error::ChunkRestoringError(ChunkError::InternalError( + "restored tree invalid", + ))); + } + + Ok(self.merk) + } + + /// Verify that the child heights of the merk tree links correctly represent + /// the tree + fn verify_height(&self) -> Result<(), Error> { + let tree = self.merk.tree.take(); + let height_verification_result = if let Some(tree) = &tree { + self.verify_tree_height(&tree, tree.height()) + } else { + Ok(()) + }; + self.merk.tree.set(tree); + height_verification_result + } + + fn verify_tree_height(&self, tree: &TreeNode, parent_height: u8) -> Result<(), Error> { + let (left_height, right_height) = tree.child_heights(); + + if (left_height.abs_diff(right_height)) > 1 { + return Err(Error::CorruptedState( + "invalid child heights, difference greater than 1 for AVL tree", + )); + } + + let max_child_height = left_height.max(right_height); + if parent_height <= max_child_height || parent_height - max_child_height != 1 { + return Err(Error::CorruptedState( + "invalid child heights, parent height is not 1 less than max child height", + )); + } + + let left_link = tree.link(LEFT); + let right_link = tree.link(RIGHT); + + if (left_height == 0 && left_link.is_some()) || (right_height == 0 && right_link.is_some()) + { + return Err(Error::CorruptedState( + "invalid child heights node has child height 0, but hash child", + )); + } + + if let Some(link) = left_link { + let left_tree = link.tree(); + if left_tree.is_none() { + let left_tree = TreeNode::get( + &self.merk.storage, + link.key().to_vec(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&left_tree, left_height)?; + } else { + self.verify_tree_height(left_tree.unwrap(), left_height)?; + } + } + + if let Some(link) = right_link { + let right_tree = link.tree(); + if right_tree.is_none() { + let right_tree = TreeNode::get( + &self.merk.storage, + link.key().to_vec(), + None::<&fn(&[u8]) -> Option>, + ) + .unwrap()? + .ok_or(Error::CorruptedState("link points to non-existent node"))?; + self.verify_tree_height(&right_tree, right_height)?; + } else { + self.verify_tree_height(right_tree.unwrap(), right_height)?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use grovedb_path::SubtreePath; + use grovedb_storage::{ + rocksdb_storage::{ + test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext, + PrefixedRocksDbStorageContext, + }, + RawIterator, Storage, + }; + + use super::*; + use crate::{ + execute_proof, + merk::chunks::ChunkProducer, + proofs::{ + chunk::{ + chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, + error::ChunkError::InvalidChunkProof, + }, + Query, + }, + test_utils::{make_batch_seq, TempMerk}, + Error::ChunkRestoringError, + KVIterator, Merk, PanicSource, + }; + + #[test] + fn test_chunk_verification_non_avl_tree() { + let non_avl_tree_proof = vec![ + Op::Push(Node::KV(vec![1], vec![1])), + Op::Push(Node::KV(vec![2], vec![2])), + Op::Parent, + Op::Push(Node::KV(vec![3], vec![3])), + Op::Parent, + ]; + assert!(Restorer::::verify_chunk( + non_avl_tree_proof, + &[0; 32] + ) + .is_err()); + } + + #[test] + fn test_chunk_verification_only_kv_feature_and_hash() { + // should not accept kv + let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvhash + let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvdigest + let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + + // should not accept kvrefvaluehash + let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; + let verification_result = + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + assert!(matches!( + verification_result, + Err(ChunkRestoringError(InvalidChunkProof( + "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", + ))) + )); + } + + fn get_node_hash(node: Node) -> Result { + match node { + Node::Hash(hash) => Ok(hash), + _ => Err("expected node hash".to_string()), + } + } + + #[test] + fn test_process_chunk_correct_chunk_id_map() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let mut merk_tree = merk.tree.take().expect("should have inner tree"); + merk.tree.set(Some(merk_tree.clone())); + let mut tree_walker = RefWalker::new(&mut merk_tree, PanicSource {}); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // initial restorer state should contain just the root hash of the source merk + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // generate first chunk + let (chunk, _) = chunk_producer.chunk_with_index(1).unwrap(); + // apply first chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk successfully"); + assert_eq!(new_chunk_ids.len(), 4); + + // after first chunk application + // the chunk_map should contain 4 items + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + // assert all the chunk hash values + assert_eq!( + restorer.chunk_id_to_root_hash.get("11"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("10"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("01"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])).unwrap()) + .as_ref() + ); + assert_eq!( + restorer.chunk_id_to_root_hash.get("00"), + Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])).unwrap()) + .as_ref() + ); + + // generate second chunk + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 3); + assert_eq!(restorer.chunk_id_to_root_hash.get("11"), None); + + // let's try to apply the second chunk again, should not work + let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); + // apply second chunk + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) + )); + + // next let's get a random but expected chunk and work with that e.g. chunk 4 + // but let's apply it to the wrong place + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + let chunk_process_result = + restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); + assert_eq!(chunk_process_result.is_err(), true); + assert!(matches!( + chunk_process_result, + Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + .. + ))) + )); + + // correctly apply chunk 5 + let (chunk, _) = chunk_producer.chunk_with_index(5).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.chunk_id_to_root_hash.get("00"), None); + + // correctly apply chunk 3 + let (chunk, _) = chunk_producer.chunk_with_index(3).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.chunk_id_to_root_hash.get("10"), None); + + // correctly apply chunk 4 + let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); + // apply second chunk + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![RIGHT, LEFT]), chunk) + .unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + // chunk_map should have 1 less element + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); + + // finalize merk + let mut restored_merk = restorer.finalize().expect("should finalized successfully"); + + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + fn assert_raw_db_entries_eq( + restored: &Merk, + original: &Merk, + length: usize, + ) { + assert_eq!(restored.root_hash().unwrap(), original.root_hash().unwrap()); + + let mut original_entries = original.storage.raw_iter(); + let mut restored_entries = restored.storage.raw_iter(); + original_entries.seek_to_first().unwrap(); + restored_entries.seek_to_first().unwrap(); + + let mut i = 0; + loop { + assert_eq!( + restored_entries.valid().unwrap(), + original_entries.valid().unwrap() + ); + if !restored_entries.valid().unwrap() { + break; + } + + assert_eq!(restored_entries.key(), original_entries.key()); + assert_eq!(restored_entries.value(), original_entries.value()); + + restored_entries.next().unwrap(); + original_entries.next().unwrap(); + + i += 1; + } + + assert_eq!(i, length); + } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk + // verifies that restoration was performed correctly. + fn test_restoration_single_chunk_strategy(batch_size: u64) { + // build the source merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut source_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let (chunk, next_chunk_id) = chunk_producer + .chunk(chunk_id.as_str()) + .expect("should get chunk"); + restorer + .process_chunk(chunk_id.to_string(), chunk) + .expect("should process chunk successfully"); + chunk_id_opt = next_chunk_id; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + + assert_raw_db_entries_eq(&restored_merk, &source_merk, batch_size as usize); + } + + #[test] + fn restore_single_chunk_20() { + test_restoration_single_chunk_strategy(20); + } + + #[test] + fn restore_single_chunk_1000() { + test_restoration_single_chunk_strategy(1000); + } + + #[test] + fn test_process_multi_chunk_no_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // generate multi chunk from root with no limit + let chunk = chunk_producer + .multi_chunk_with_limit("", None) + .expect("should generate multichunk"); + + assert_eq!(chunk.chunk.len(), 2); + assert_eq!(chunk.next_index, None); + assert_eq!(chunk.remaining_limit, None); + + let next_ids = restorer + .process_multi_chunk(chunk.chunk) + .expect("should process chunk"); + // should have replicated all chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_no_limit_but_non_root() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // generate multi chunk from the 2nd chunk with no limit + let multi_chunk = chunk_producer + .multi_chunk_with_limit_and_index(next_chunk_index.unwrap(), None) + .unwrap(); + // tree of height 4 has 5 chunks + // we have restored the first leaving 4 chunks + // each chunk has an extra chunk id, since they are disjoint + // hence the size of the multi chunk should be 8 + assert_eq!(multi_chunk.chunk.len(), 8); + let new_chunk_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(new_chunk_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + let restored_merk = restorer.finalize().expect("should be able to finalize"); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + #[test] + fn test_process_multi_chunk_with_limit() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // build multi chunk with with limit of 325 + let multi_chunk = chunk_producer + .multi_chunk_with_limit("", Some(600)) + .unwrap(); + // should only contain the first chunk + assert_eq!(multi_chunk.chunk.len(), 2); + // should point to chunk 2 + assert_eq!(multi_chunk.next_index, Some("11".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + assert_eq!(next_ids.len(), 4); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // subsequent chunks are of size 321 + // with limit just above 642 should get 2 chunks (2 and 3) + // disjoint, so multi chunk len should be 4 + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, Some("01".to_string())); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); + assert_eq!(restorer.parent_keys.len(), 2); + + // get the last 2 chunks + let multi_chunk = chunk_producer + .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) + .unwrap(); + assert_eq!(multi_chunk.chunk.len(), 4); + assert_eq!(multi_chunk.next_index, None); + let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); + // chunks 2 and 3 are leaf chunks + assert_eq!(next_ids.len(), 0); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + + // finalize merk + let restored_merk = restorer.finalize().unwrap(); + + // compare root hash values + assert_eq!( + restored_merk.root_hash().unwrap(), + merk.root_hash().unwrap() + ); + } + + // Builds a source merk with batch_size number of elements + // attempts restoration on some empty merk, with multi chunks + // verifies that restoration was performed correctly. + fn test_restoration_multi_chunk_strategy(batch_size: u64, limit: Option) { + // build the source merk + let mut source_merk = TempMerk::new(); + let batch = make_batch_seq(0..batch_size); + source_merk + .apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + + // build the restoration merk + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // at the start + // restoration merk should have empty root hash + // and source merk should have a different root hash + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + assert_ne!( + source_merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + // instantiate chunk producer and restorer + let mut chunk_producer = + ChunkProducer::new(&source_merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + + // perform chunk production and processing + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let multi_chunk = chunk_producer + .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) + .expect("should get chunk"); + restorer + .process_multi_chunk(multi_chunk.chunk) + .expect("should process chunk successfully"); + chunk_id_opt = multi_chunk.next_index; + } + + // after chunk processing we should be able to finalize + assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); + assert_eq!(restorer.parent_keys.len(), 0); + let restored_merk = restorer.finalize().expect("should finalize"); + + // compare root hash values + assert_eq!( + source_merk.root_hash().unwrap(), + restored_merk.root_hash().unwrap() + ); + } + + #[test] + fn restore_multi_chunk_20_no_limit() { + test_restoration_multi_chunk_strategy(20, None); + } + + #[test] + #[should_panic] + fn restore_multi_chunk_20_tiny_limit() { + test_restoration_multi_chunk_strategy(20, Some(1)); + } + + #[test] + fn restore_multi_chunk_20_limit() { + test_restoration_multi_chunk_strategy(20, Some(1200)); + } + + #[test] + fn restore_multi_chunk_10000_limit() { + test_restoration_multi_chunk_strategy(10000, Some(1200)); + } + + #[test] + fn test_restoration_interruption() { + let mut merk = TempMerk::new(); + let batch = make_batch_seq(0..15); + merk.apply::<_, Vec<_>>(&batch, &[], None) + .unwrap() + .expect("apply failed"); + assert_eq!(merk.height(), Some(4)); + + let storage = TempStorage::new(); + let tx = storage.start_transaction(); + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + + // restorer root hash should be empty + assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); + + // at the start both merks should have different root hash values + assert_ne!( + merk.root_hash().unwrap(), + restoration_merk.root_hash().unwrap() + ); + + let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!( + restorer.chunk_id_to_root_hash.get(""), + Some(merk.root_hash().unwrap()).as_ref() + ); + + // first restore the first chunk + let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); + let new_chunk_ids = restorer + .process_chunk(traversal_instruction_as_string(&vec![]), chunk) + .expect("should process chunk"); + assert_eq!(new_chunk_ids.len(), 4); + assert_eq!(next_chunk_index, Some(2)); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // store old state for later reference + let old_chunk_id_to_root_hash = restorer.chunk_id_to_root_hash.clone(); + let old_parent_keys = restorer.parent_keys.clone(); + + // drop the restorer and the restoration merk + drop(restorer); + // open the restoration merk again and build a restorer from it + let mut restoration_merk = Merk::open_base( + storage + .get_immediate_storage_context(SubtreePath::empty(), &tx) + .unwrap(), + false, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .unwrap(); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + + // assert the state of the restorer + assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); + assert_eq!(restorer.parent_keys.len(), 0); + + // recover state + let recovery_attempt = restorer.attempt_state_recovery(); + assert_eq!(recovery_attempt.is_ok(), true); + assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); + assert_eq!(restorer.parent_keys.len(), 4); + + // assert equality to old state + assert_eq!(old_chunk_id_to_root_hash, restorer.chunk_id_to_root_hash); + assert_eq!(old_parent_keys, restorer.parent_keys); + } +} diff --git a/merk/src/merk/restore2.rs b/merk/src/merk/restore2.rs deleted file mode 100644 index b8dfe813..00000000 --- a/merk/src/merk/restore2.rs +++ /dev/null @@ -1,1249 +0,0 @@ -// MIT LICENSE -// -// Copyright (c) 2021 Dash Core Group -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. - -//! Provides `Restorer`, which can create a replica of a Merk instance by -//! receiving chunk proofs. - -use std::collections::BTreeMap; - -use grovedb_storage::{Batch, StorageContext}; - -use crate::{ - merk, - merk::MerkSource, - proofs::{ - chunk::{ - chunk2::{LEFT, RIGHT}, - chunk_op::ChunkOp, - error::{ChunkError, ChunkError::InternalError}, - util::{ - string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, - }, - }, - tree::{execute, Child, Tree as ProofTree}, - Node, Op, - }, - tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, - CryptoHash, Error, - Error::{CostsError, EdError, StorageError}, - Link, Merk, - TreeFeatureType::{BasicMerkNode, SummedMerkNode}, -}; - -/// Restorer handles verification of chunks and replication of Merk trees. -/// Chunks can be processed randomly as long as their parent has been processed -/// already. -pub struct Restorer { - merk: Merk, - chunk_id_to_root_hash: BTreeMap, - // this is used to keep track of parents whose links need to be rewritten - parent_keys: BTreeMap>, -} - -impl<'db, S: StorageContext<'db>> Restorer { - /// Initializes a new chunk restorer with the expected root hash for the - /// first chunk - pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { - let mut chunk_id_to_root_hash = BTreeMap::new(); - chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); - - Self { - merk, - chunk_id_to_root_hash, - parent_keys: BTreeMap::new(), - } - } - - // TODO: consider converting chunk id to a vec - /// Processes a chunk at some chunk id, returns the chunks id's of chunks - /// that can be requested - pub fn process_chunk( - &mut self, - chunk_id: String, - chunk: Vec, - ) -> Result, Error> { - let expected_root_hash = self - .chunk_id_to_root_hash - .get(&chunk_id) - .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; - - let chunk_tree = Self::verify_chunk(chunk, expected_root_hash)?; - - let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; - - if root_traversal_instruction.is_empty() { - self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); - } else { - // every non root chunk has some associated parent with an placeholder link - // here we update the placeholder link to represent the true data - self.rewrite_parent_link(&chunk_id, &root_traversal_instruction, &chunk_tree)?; - } - - // next up, we need to write the chunk and build the map again - let chunk_write_result = self.write_chunk(chunk_tree, &mut root_traversal_instruction); - if chunk_write_result.is_ok() { - // if we were able to successfully write the chunk, we can remove - // the chunk expected root hash from our chunk id map - self.chunk_id_to_root_hash.remove(&chunk_id); - } - - chunk_write_result - } - - /// Process multi chunks (space optimized chunk proofs that can contain - /// multiple singluar chunks) - pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { - let mut expect_chunk_id = true; - let mut chunk_ids = vec![]; - let mut current_chunk_id: String = "".to_string(); - - for chunk_op in multi_chunk { - if (matches!(chunk_op, ChunkOp::ChunkId(..)) && !expect_chunk_id) - || (matches!(chunk_op, ChunkOp::Chunk(..)) && expect_chunk_id) - { - return Err(Error::ChunkRestoringError(ChunkError::InvalidMultiChunk( - "invalid multi chunk ordering", - ))); - } - match chunk_op { - ChunkOp::ChunkId(instructions) => { - current_chunk_id = traversal_instruction_as_string(&instructions); - } - ChunkOp::Chunk(chunk) => { - // TODO: remove clone - let next_chunk_ids = self.process_chunk(current_chunk_id.clone(), chunk)?; - chunk_ids.extend(next_chunk_ids); - } - } - expect_chunk_id = !expect_chunk_id; - } - Ok(chunk_ids) - } - - /// Verifies the structure of a chunk and ensures the chunk matches the - /// expected root hash - fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash) -> Result { - let chunk_len = chunk.len(); - let mut kv_count = 0; - let mut hash_count = 0; - - // build tree from ops - // ensure only made of KvValueFeatureType and Hash nodes and count them - let tree = execute(chunk.clone().into_iter().map(Ok), false, |node| { - if matches!(node, Node::KVValueHashFeatureType(..)) { - kv_count += 1; - Ok(()) - } else if matches!(node, Node::Hash(..)) { - hash_count += 1; - Ok(()) - } else { - Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - } - }) - .unwrap()?; - - // chunk len must be exactly equal to the kv_count + hash_count + - // parent_branch_count + child_branch_count - debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); - - // chunk structure verified, next verify root hash - if &tree.hash().unwrap() != expected_root_hash { - return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( - "chunk doesn't match expected root hash", - ))); - } - - Ok(tree) - } - - /// Write the verified chunk to storage - fn write_chunk( - &mut self, - chunk_tree: ProofTree, - traversal_instruction: &mut Vec, - ) -> Result, Error> { - // this contains all the elements we want to write to storage - let mut batch = self.merk.storage.new_batch(); - let mut new_chunk_ids = Vec::new(); - - chunk_tree.visit_refs_track_traversal_and_parent( - traversal_instruction, - None, - &mut |proof_node, node_traversal_instruction, parent_key| { - match &proof_node.node { - Node::KVValueHashFeatureType(key, value, value_hash, feature_type) => { - // build tree from node value - let mut tree = TreeNode::new_with_value_hash( - key.clone(), - value.clone(), - value_hash.clone(), - *feature_type, - ) - .unwrap(); - - // update tree links - *tree.slot_mut(LEFT) = proof_node.left.as_ref().map(Child::as_link); - *tree.slot_mut(RIGHT) = proof_node.right.as_ref().map(Child::as_link); - - // encode the node and add it to the batch - let bytes = tree.encode(); - - batch.put(key, &bytes, None, None).map_err(CostsError) - } - Node::Hash(hash) => { - // the node hash points to the root of another chunk - // we get the chunk id and add the hash to restorer state - let chunk_id = traversal_instruction_as_string(node_traversal_instruction); - new_chunk_ids.push(chunk_id.clone()); - self.chunk_id_to_root_hash - .insert(chunk_id.clone(), hash.clone()); - // TODO: handle unwrap - self.parent_keys - .insert(chunk_id, parent_key.unwrap().to_owned()); - Ok(()) - } - _ => { - // we do nothing for other node types - // technically verify chunk will be called before this - // as such this should be be reached - Ok(()) - } - } - }, - )?; - - // write the batch - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError)?; - - Ok(new_chunk_ids) - } - - /// When we process truncated chunks, the parents of Node::Hash have invalid - /// placeholder for links. - /// When we get the actual chunk associated with the Node::Hash, - /// we need to update the parent link to reflect the correct data. - fn rewrite_parent_link( - &mut self, - chunk_id: &str, - traversal_instruction: &[bool], - chunk_tree: &ProofTree, - ) -> Result<(), Error> { - let parent_key = self - .parent_keys - .get(chunk_id) - .ok_or(Error::ChunkRestoringError(InternalError( - "after successful chunk verification parent key should exist", - )))?; - - let mut parent = merk::fetch_node( - &self.merk.storage, - parent_key.as_slice(), - None::<&fn(&[u8]) -> Option>, - )? - .ok_or(Error::ChunkRestoringError(InternalError( - "cannot find expected parent in memory, most likely state corruption issue", - )))?; - - let is_left = traversal_instruction - .last() - .expect("rewrite is only called when traversal_instruction is not empty"); - - let updated_key = chunk_tree.key(); - let updated_sum = chunk_tree.sum(); - - if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { - *key = updated_key.to_vec(); - *sum = updated_sum; - } - - let parent_bytes = parent.encode(); - self.merk - .storage - .put(parent_key, &parent_bytes, None, None) - .unwrap() - .map_err(StorageError)?; - - self.parent_keys - .remove(chunk_id) - .expect("confirmed parent key exists above"); - - Ok(()) - } - - /// Each nodes height is not added to state as such the producer could lie - /// about the height values after replication we need to verify the - /// heights and if invalid recompute the correct values - fn rewrite_heights(&mut self) -> Result<(), Error> { - fn rewrite_child_heights<'s, 'db, S: StorageContext<'db>>( - mut walker: RefWalker>, - batch: &mut >::Batch, - ) -> Result<(u8, u8), Error> { - // TODO: remove unwrap - let mut cloned_node = TreeNode::decode( - walker.tree().key().to_vec(), - walker.tree().encode().as_slice(), - None::<&fn(&[u8]) -> Option>, - ) - .unwrap(); - - let mut left_height = 0; - let mut right_height = 0; - - if let Some(left_walker) = walker - .walk(LEFT, None::<&fn(&[u8]) -> Option>) - .unwrap()? - { - let left_child_heights = rewrite_child_heights(left_walker, batch)?; - left_height = left_child_heights.0.max(left_child_heights.1) + 1; - *cloned_node.link_mut(LEFT).unwrap().child_heights_mut() = left_child_heights; - } - - if let Some(right_walker) = walker - .walk(RIGHT, None::<&fn(&[u8]) -> Option>) - .unwrap()? - { - let right_child_heights = rewrite_child_heights(right_walker, batch)?; - right_height = right_child_heights.0.max(right_child_heights.1) + 1; - *cloned_node.link_mut(RIGHT).unwrap().child_heights_mut() = right_child_heights; - } - - let bytes = cloned_node.encode(); - batch - .put(walker.tree().key(), &bytes, None, None) - .map_err(CostsError)?; - - return Ok((left_height, right_height)); - } - - let mut batch = self.merk.storage.new_batch(); - // TODO: deal with unwrap - let mut tree = self.merk.tree.take().unwrap(); - let mut walker = RefWalker::new(&mut tree, self.merk.source()); - - rewrite_child_heights(walker, &mut batch)?; - - self.merk.tree.set(Some(tree)); - - self.merk - .storage - .commit_batch(batch) - .unwrap() - .map_err(StorageError) - } - - /// Rebuild restoration state from partial storage state - fn attempt_state_recovery(&mut self) -> Result<(), Error> { - // TODO: think about the return type some more - let (bad_link_map, parent_keys) = self.merk.verify(); - if !bad_link_map.is_empty() { - self.chunk_id_to_root_hash = bad_link_map; - self.parent_keys = parent_keys; - } - - Ok(()) - } - - /// Consumes the `Restorer` and returns a newly created, fully populated - /// Merk instance. This method will return an error if called before - /// processing all chunks. - pub fn finalize(mut self) -> Result, Error> { - // ensure all chunks have been processed - if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { - return Err(Error::ChunkRestoringError( - ChunkError::RestorationNotComplete, - )); - } - - // get the latest version of the root node - self.merk - .load_base_root(None::<&fn(&[u8]) -> Option>); - - // if height values are wrong, rewrite height - if self.verify_height().is_err() { - self.rewrite_heights(); - // update the root node after height rewrite - self.merk - .load_base_root(None::<&fn(&[u8]) -> Option>); - } - - if self.merk.verify().0.len() != 0 { - return Err(Error::ChunkRestoringError(ChunkError::InternalError( - "restored tree invalid", - ))); - } - - Ok(self.merk) - } - - /// Verify that the child heights of the merk tree links correctly represent - /// the tree - fn verify_height(&self) -> Result<(), Error> { - let tree = self.merk.tree.take(); - let height_verification_result = if let Some(tree) = &tree { - self.verify_tree_height(&tree, tree.height()) - } else { - Ok(()) - }; - self.merk.tree.set(tree); - height_verification_result - } - - fn verify_tree_height(&self, tree: &TreeNode, parent_height: u8) -> Result<(), Error> { - let (left_height, right_height) = tree.child_heights(); - - if (left_height.abs_diff(right_height)) > 1 { - return Err(Error::CorruptedState( - "invalid child heights, difference greater than 1 for AVL tree", - )); - } - - let max_child_height = left_height.max(right_height); - if parent_height <= max_child_height || parent_height - max_child_height != 1 { - return Err(Error::CorruptedState( - "invalid child heights, parent height is not 1 less than max child height", - )); - } - - let left_link = tree.link(LEFT); - let right_link = tree.link(RIGHT); - - if (left_height == 0 && left_link.is_some()) || (right_height == 0 && right_link.is_some()) - { - return Err(Error::CorruptedState( - "invalid child heights node has child height 0, but hash child", - )); - } - - if let Some(link) = left_link { - let left_tree = link.tree(); - if left_tree.is_none() { - let left_tree = TreeNode::get( - &self.merk.storage, - link.key().to_vec(), - None::<&fn(&[u8]) -> Option>, - ) - .unwrap()? - .ok_or(Error::CorruptedState("link points to non-existent node"))?; - self.verify_tree_height(&left_tree, left_height)?; - } else { - self.verify_tree_height(left_tree.unwrap(), left_height)?; - } - } - - if let Some(link) = right_link { - let right_tree = link.tree(); - if right_tree.is_none() { - let right_tree = TreeNode::get( - &self.merk.storage, - link.key().to_vec(), - None::<&fn(&[u8]) -> Option>, - ) - .unwrap()? - .ok_or(Error::CorruptedState("link points to non-existent node"))?; - self.verify_tree_height(&right_tree, right_height)?; - } else { - self.verify_tree_height(right_tree.unwrap(), right_height)?; - } - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use grovedb_path::SubtreePath; - use grovedb_storage::{ - rocksdb_storage::{ - test_utils::TempStorage, PrefixedRocksDbImmediateStorageContext, - PrefixedRocksDbStorageContext, - }, - RawIterator, Storage, - }; - - use super::*; - use crate::{ - execute_proof, - merk::chunks2::ChunkProducer, - proofs::{ - chunk::{ - chunk2::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, - error::ChunkError::InvalidChunkProof, - }, - Query, - }, - test_utils::{make_batch_seq, TempMerk}, - Error::ChunkRestoringError, - KVIterator, Merk, PanicSource, - }; - - #[test] - fn test_chunk_verification_non_avl_tree() { - let non_avl_tree_proof = vec![ - Op::Push(Node::KV(vec![1], vec![1])), - Op::Push(Node::KV(vec![2], vec![2])), - Op::Parent, - Op::Push(Node::KV(vec![3], vec![3])), - Op::Parent, - ]; - assert!(Restorer::::verify_chunk( - non_avl_tree_proof, - &[0; 32] - ) - .is_err()); - } - - #[test] - fn test_chunk_verification_only_kv_feature_and_hash() { - // should not accept kv - let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvhash - let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvdigest - let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvvaluehash - let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - - // should not accept kvrefvaluehash - let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); - assert!(matches!( - verification_result, - Err(ChunkRestoringError(InvalidChunkProof( - "expected chunk proof to contain only kvvaluefeaturetype or hash nodes", - ))) - )); - } - - fn get_node_hash(node: Node) -> Result { - match node { - Node::Hash(hash) => Ok(hash), - _ => Err("expected node hash".to_string()), - } - } - - #[test] - fn test_process_chunk_correct_chunk_id_map() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let mut merk_tree = merk.tree.take().expect("should have inner tree"); - merk.tree.set(Some(merk_tree.clone())); - let mut tree_walker = RefWalker::new(&mut merk_tree, PanicSource {}); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - // initial restorer state should contain just the root hash of the source merk - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // generate first chunk - let (chunk, _) = chunk_producer.chunk_with_index(1).unwrap(); - // apply first chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![]), chunk) - .expect("should process chunk successfully"); - assert_eq!(new_chunk_ids.len(), 4); - - // after first chunk application - // the chunk_map should contain 4 items - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - // assert all the chunk hash values - assert_eq!( - restorer.chunk_id_to_root_hash.get("11"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, LEFT])).unwrap()) - .as_ref() - ); - assert_eq!( - restorer.chunk_id_to_root_hash.get("10"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[LEFT, RIGHT])).unwrap()) - .as_ref() - ); - assert_eq!( - restorer.chunk_id_to_root_hash.get("01"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, LEFT])).unwrap()) - .as_ref() - ); - assert_eq!( - restorer.chunk_id_to_root_hash.get("00"), - Some(get_node_hash(traverse_get_node_hash(&mut tree_walker, &[RIGHT, RIGHT])).unwrap()) - .as_ref() - ); - - // generate second chunk - let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 3); - assert_eq!(restorer.chunk_id_to_root_hash.get("11"), None); - - // let's try to apply the second chunk again, should not work - let (chunk, _) = chunk_producer.chunk_with_index(2).unwrap(); - // apply second chunk - let chunk_process_result = - restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); - assert!(matches!( - chunk_process_result, - Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) - )); - - // next let's get a random but expected chunk and work with that e.g. chunk 4 - // but let's apply it to the wrong place - let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); - let chunk_process_result = - restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); - assert!(matches!( - chunk_process_result, - Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( - .. - ))) - )); - - // correctly apply chunk 5 - let (chunk, _) = chunk_producer.chunk_with_index(5).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![RIGHT, RIGHT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); - assert_eq!(restorer.chunk_id_to_root_hash.get("00"), None); - - // correctly apply chunk 3 - let (chunk, _) = chunk_producer.chunk_with_index(3).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!(restorer.chunk_id_to_root_hash.get("10"), None); - - // correctly apply chunk 4 - let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); - // apply second chunk - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![RIGHT, LEFT]), chunk) - .unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - // chunk_map should have 1 less element - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); - - // finalize merk - let mut restored_merk = restorer.finalize().expect("should finalized successfully"); - - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - fn assert_raw_db_entries_eq( - restored: &Merk, - original: &Merk, - length: usize, - ) { - assert_eq!(restored.root_hash().unwrap(), original.root_hash().unwrap()); - - let mut original_entries = original.storage.raw_iter(); - let mut restored_entries = restored.storage.raw_iter(); - original_entries.seek_to_first().unwrap(); - restored_entries.seek_to_first().unwrap(); - - let mut i = 0; - loop { - assert_eq!( - restored_entries.valid().unwrap(), - original_entries.valid().unwrap() - ); - if !restored_entries.valid().unwrap() { - break; - } - - assert_eq!(restored_entries.key(), original_entries.key()); - assert_eq!(restored_entries.value(), original_entries.value()); - - restored_entries.next().unwrap(); - original_entries.next().unwrap(); - - i += 1; - } - - assert_eq!(i, length); - } - - // Builds a source merk with batch_size number of elements - // attempts restoration on some empty merk - // verifies that restoration was performed correctly. - fn test_restoration_single_chunk_strategy(batch_size: u64) { - // build the source merk - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut source_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - let batch = make_batch_seq(0..batch_size); - source_merk - .apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - - // build the restoration merk - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // at the start - // restoration merk should have empty root hash - // and source merk should have a different root hash - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - assert_ne!( - source_merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - // instantiate chunk producer and restorer - let mut chunk_producer = - ChunkProducer::new(&source_merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); - - // perform chunk production and processing - let mut chunk_id_opt = Some("".to_string()); - while let Some(chunk_id) = chunk_id_opt { - let (chunk, next_chunk_id) = chunk_producer - .chunk(chunk_id.as_str()) - .expect("should get chunk"); - restorer - .process_chunk(chunk_id.to_string(), chunk) - .expect("should process chunk successfully"); - chunk_id_opt = next_chunk_id; - } - - // after chunk processing we should be able to finalize - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - let restored_merk = restorer.finalize().expect("should finalize"); - - // compare root hash values - assert_eq!( - source_merk.root_hash().unwrap(), - restored_merk.root_hash().unwrap() - ); - - assert_raw_db_entries_eq(&restored_merk, &source_merk, batch_size as usize); - } - - #[test] - fn restore_single_chunk_20() { - test_restoration_single_chunk_strategy(20); - } - - #[test] - fn restore_single_chunk_1000() { - test_restoration_single_chunk_strategy(1000); - } - - #[test] - fn test_process_multi_chunk_no_limit() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // generate multi chunk from root with no limit - let chunk = chunk_producer - .multi_chunk_with_limit("", None) - .expect("should generate multichunk"); - - assert_eq!(chunk.chunk.len(), 2); - assert_eq!(chunk.next_index, None); - assert_eq!(chunk.remaining_limit, None); - - let next_ids = restorer - .process_multi_chunk(chunk.chunk) - .expect("should process chunk"); - // should have replicated all chunks - assert_eq!(next_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - - let restored_merk = restorer.finalize().expect("should be able to finalize"); - - // compare root hash values - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - #[test] - fn test_process_multi_chunk_no_limit_but_non_root() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // first restore the first chunk - let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![]), chunk) - .expect("should process chunk"); - assert_eq!(new_chunk_ids.len(), 4); - assert_eq!(next_chunk_index, Some(2)); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // generate multi chunk from the 2nd chunk with no limit - let multi_chunk = chunk_producer - .multi_chunk_with_limit_and_index(next_chunk_index.unwrap(), None) - .unwrap(); - // tree of height 4 has 5 chunks - // we have restored the first leaving 4 chunks - // each chunk has an extra chunk id, since they are disjoint - // hence the size of the multi chunk should be 8 - assert_eq!(multi_chunk.chunk.len(), 8); - let new_chunk_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - assert_eq!(new_chunk_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - - let restored_merk = restorer.finalize().expect("should be able to finalize"); - - // compare root hash values - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - #[test] - fn test_process_multi_chunk_with_limit() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - // build multi chunk with with limit of 325 - let multi_chunk = chunk_producer - .multi_chunk_with_limit("", Some(600)) - .unwrap(); - // should only contain the first chunk - assert_eq!(multi_chunk.chunk.len(), 2); - // should point to chunk 2 - assert_eq!(multi_chunk.next_index, Some("11".to_string())); - let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - assert_eq!(next_ids.len(), 4); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // subsequent chunks are of size 321 - // with limit just above 642 should get 2 chunks (2 and 3) - // disjoint, so multi chunk len should be 4 - let multi_chunk = chunk_producer - .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) - .unwrap(); - assert_eq!(multi_chunk.chunk.len(), 4); - assert_eq!(multi_chunk.next_index, Some("01".to_string())); - let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - // chunks 2 and 3 are leaf chunks - assert_eq!(next_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 2); - assert_eq!(restorer.parent_keys.len(), 2); - - // get the last 2 chunks - let multi_chunk = chunk_producer - .multi_chunk_with_limit(multi_chunk.next_index.unwrap().as_str(), Some(645)) - .unwrap(); - assert_eq!(multi_chunk.chunk.len(), 4); - assert_eq!(multi_chunk.next_index, None); - let next_ids = restorer.process_multi_chunk(multi_chunk.chunk).unwrap(); - // chunks 2 and 3 are leaf chunks - assert_eq!(next_ids.len(), 0); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - - // finalize merk - let restored_merk = restorer.finalize().unwrap(); - - // compare root hash values - assert_eq!( - restored_merk.root_hash().unwrap(), - merk.root_hash().unwrap() - ); - } - - // Builds a source merk with batch_size number of elements - // attempts restoration on some empty merk, with multi chunks - // verifies that restoration was performed correctly. - fn test_restoration_multi_chunk_strategy(batch_size: u64, limit: Option) { - // build the source merk - let mut source_merk = TempMerk::new(); - let batch = make_batch_seq(0..batch_size); - source_merk - .apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - - // build the restoration merk - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // at the start - // restoration merk should have empty root hash - // and source merk should have a different root hash - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - assert_ne!( - source_merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - // instantiate chunk producer and restorer - let mut chunk_producer = - ChunkProducer::new(&source_merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); - - // perform chunk production and processing - let mut chunk_id_opt = Some("".to_string()); - while let Some(chunk_id) = chunk_id_opt { - let multi_chunk = chunk_producer - .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) - .expect("should get chunk"); - restorer - .process_multi_chunk(multi_chunk.chunk) - .expect("should process chunk successfully"); - chunk_id_opt = multi_chunk.next_index; - } - - // after chunk processing we should be able to finalize - assert_eq!(restorer.chunk_id_to_root_hash.len(), 0); - assert_eq!(restorer.parent_keys.len(), 0); - let restored_merk = restorer.finalize().expect("should finalize"); - - // compare root hash values - assert_eq!( - source_merk.root_hash().unwrap(), - restored_merk.root_hash().unwrap() - ); - } - - #[test] - fn restore_multi_chunk_20_no_limit() { - test_restoration_multi_chunk_strategy(20, None); - } - - #[test] - #[should_panic] - fn restore_multi_chunk_20_tiny_limit() { - test_restoration_multi_chunk_strategy(20, Some(1)); - } - - #[test] - fn restore_multi_chunk_20_limit() { - test_restoration_multi_chunk_strategy(20, Some(1200)); - } - - #[test] - fn restore_multi_chunk_10000_limit() { - test_restoration_multi_chunk_strategy(10000, Some(1200)); - } - - #[test] - fn test_restoration_interruption() { - let mut merk = TempMerk::new(); - let batch = make_batch_seq(0..15); - merk.apply::<_, Vec<_>>(&batch, &[], None) - .unwrap() - .expect("apply failed"); - assert_eq!(merk.height(), Some(4)); - - let storage = TempStorage::new(); - let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - - // restorer root hash should be empty - assert_eq!(restoration_merk.root_hash().unwrap(), [0; 32]); - - // at the start both merks should have different root hash values - assert_ne!( - merk.root_hash().unwrap(), - restoration_merk.root_hash().unwrap() - ); - - let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!( - restorer.chunk_id_to_root_hash.get(""), - Some(merk.root_hash().unwrap()).as_ref() - ); - - // first restore the first chunk - let (chunk, next_chunk_index) = chunk_producer.chunk_with_index(1).unwrap(); - let new_chunk_ids = restorer - .process_chunk(traversal_instruction_as_string(&vec![]), chunk) - .expect("should process chunk"); - assert_eq!(new_chunk_ids.len(), 4); - assert_eq!(next_chunk_index, Some(2)); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // store old state for later reference - let old_chunk_id_to_root_hash = restorer.chunk_id_to_root_hash.clone(); - let old_parent_keys = restorer.parent_keys.clone(); - - // drop the restorer and the restoration merk - drop(restorer); - // open the restoration merk again and build a restorer from it - let mut restoration_merk = Merk::open_base( - storage - .get_immediate_storage_context(SubtreePath::empty(), &tx) - .unwrap(), - false, - None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .unwrap(); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); - - // assert the state of the restorer - assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); - assert_eq!(restorer.parent_keys.len(), 0); - - // recover state - let recovery_attempt = restorer.attempt_state_recovery(); - assert_eq!(recovery_attempt.is_ok(), true); - assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); - assert_eq!(restorer.parent_keys.len(), 4); - - // assert equality to old state - assert_eq!(old_chunk_id_to_root_hash, restorer.chunk_id_to_root_hash); - assert_eq!(old_parent_keys, restorer.parent_keys); - } -} diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 20fa7c05..2ea820ef 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -53,7 +53,7 @@ use crate::{ mod binary_range; #[cfg(feature = "full")] -pub mod chunk2; +pub mod chunk; pub mod chunk_op; pub mod error; #[cfg(feature = "full")] diff --git a/merk/src/proofs/chunk/chunk2.rs b/merk/src/proofs/chunk/chunk.rs similarity index 99% rename from merk/src/proofs/chunk/chunk2.rs rename to merk/src/proofs/chunk/chunk.rs index e556ee1f..5258dffa 100644 --- a/merk/src/proofs/chunk/chunk2.rs +++ b/merk/src/proofs/chunk/chunk.rs @@ -210,7 +210,7 @@ pub mod tests { use crate::{ proofs::{ - chunk::chunk2::{verify_height_proof, LEFT, RIGHT}, + chunk::chunk::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, Op::Parent, diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 535af055..9402d3d5 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -126,7 +126,7 @@ mod test { use crate::proofs::{ chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, chunk_op::ChunkOp, }, Node, Op, diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index a00041fc..986b24c7 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -35,7 +35,7 @@ use std::io::Write; use crate::{proofs::chunk::binary_range::BinaryRange, Error}; use crate::{ proofs::chunk::{ - chunk2::{LEFT, RIGHT}, + chunk::{LEFT, RIGHT}, error::{ChunkError, ChunkError::BadTraversalInstruction}, }, Error::InternalError, @@ -372,7 +372,7 @@ mod test { use byteorder::LE; use super::*; - use crate::proofs::chunk::chunk2::{LEFT, RIGHT}; + use crate::proofs::chunk::chunk::{LEFT, RIGHT}; #[test] fn test_chunk_height_per_layer() { diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index 47f96d2b..b91bd68f 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -44,7 +44,7 @@ use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_ #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; use crate::{ - proofs::chunk::chunk2::{LEFT, RIGHT}, + proofs::chunk::chunk::{LEFT, RIGHT}, Link, TreeFeatureType::SummedMerkNode, }; From cd57d8b21909e35bc755ecff8b2f31efe1a4a59f Mon Sep 17 00:00:00 2001 From: Wisdom Ogwu Date: Tue, 3 Oct 2023 10:07:09 +0100 Subject: [PATCH 07/30] clippy fixes --- grovedb/src/batch/mod.rs | 13 +++--- grovedb/src/operations/delete/mod.rs | 10 +---- grovedb/src/tests/mod.rs | 13 +++--- grovedb/src/tests/query_tests.rs | 20 ++++----- grovedb/src/versioning.rs | 2 +- merk/src/merk/chunks.rs | 64 +++++++++++++------------- merk/src/merk/mod.rs | 36 +++++---------- merk/src/merk/restore.rs | 65 ++++++++++++--------------- merk/src/proofs/chunk.rs | 23 ---------- merk/src/proofs/chunk/binary_range.rs | 32 ++++++------- merk/src/proofs/chunk/chunk.rs | 29 ++++++------ merk/src/proofs/chunk/chunk_op.rs | 2 +- merk/src/proofs/chunk/util.rs | 32 ++++++------- merk/src/proofs/tree.rs | 15 +++---- 14 files changed, 147 insertions(+), 209 deletions(-) diff --git a/grovedb/src/batch/mod.rs b/grovedb/src/batch/mod.rs index a3b2d502..70c47619 100644 --- a/grovedb/src/batch/mod.rs +++ b/grovedb/src/batch/mod.rs @@ -2432,8 +2432,8 @@ mod tests { Element::empty_tree(), ), ]; - assert!(matches!( - db.apply_batch( + assert!(db + .apply_batch( ops, Some(BatchApplyOptions { validate_insertion_does_not_override: false, @@ -2446,9 +2446,8 @@ mod tests { }), None ) - .unwrap(), - Ok(_) - )); + .unwrap() + .is_ok()); } #[test] @@ -3489,7 +3488,7 @@ mod tests { elem.clone(), ), ]; - assert!(matches!(db.apply_batch(batch, None, None).unwrap(), Ok(_))); + assert!(db.apply_batch(batch, None, None).unwrap().is_ok()); assert_eq!( db.get([TEST_LEAF].as_ref(), b"key1", None) .unwrap() @@ -3506,7 +3505,7 @@ mod tests { .unwrap() .expect("should generate proof"); let verification_result = GroveDb::verify_query_raw(&proof, &path_query); - assert!(matches!(verification_result, Ok(_))); + assert!(verification_result.is_ok()); // Hit reference limit when you specify max reference hop, lower than actual hop // count diff --git a/grovedb/src/operations/delete/mod.rs b/grovedb/src/operations/delete/mod.rs index 6d7a34d0..350dd2df 100644 --- a/grovedb/src/operations/delete/mod.rs +++ b/grovedb/src/operations/delete/mod.rs @@ -1029,10 +1029,7 @@ mod tests { db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(), Err(Error::PathKeyNotFound(_)) )); - assert!(matches!( - db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(), - Ok(_) - )); + assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok()); } #[test] @@ -1397,10 +1394,7 @@ mod tests { db.get([TEST_LEAF].as_ref(), b"key1", None).unwrap(), Err(Error::PathKeyNotFound(_)) )); - assert!(matches!( - db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap(), - Ok(_) - )); + assert!(db.get([TEST_LEAF].as_ref(), b"key4", None).unwrap().is_ok()); } #[test] diff --git a/grovedb/src/tests/mod.rs b/grovedb/src/tests/mod.rs index 451b2307..cbb0d195 100644 --- a/grovedb/src/tests/mod.rs +++ b/grovedb/src/tests/mod.rs @@ -465,7 +465,7 @@ fn test_element_with_flags() { let db = make_test_grovedb(); db.insert( - [TEST_LEAF.as_ref()].as_ref(), + [TEST_LEAF].as_ref(), b"key1", Element::empty_tree(), None, @@ -2803,7 +2803,7 @@ fn test_root_hash() { #[test] fn test_get_non_existing_root_leaf() { let db = make_test_grovedb(); - assert!(matches!(db.get(EMPTY_PATH, b"ayy", None).unwrap(), Err(_))); + assert!(db.get(EMPTY_PATH, b"ayy", None).unwrap().is_err()); } #[test] @@ -2830,7 +2830,7 @@ fn test_check_subtree_exists_function() { // Empty tree path means root always exist assert!(db - .check_subtree_exists_invalid_path(EMPTY_PATH.into(), None) + .check_subtree_exists_invalid_path(EMPTY_PATH, None) .unwrap() .is_ok()); @@ -2943,17 +2943,14 @@ fn test_storage_wipe() { .expect("cannot insert item"); // retrieve key before wipe - let elem = db - .get(&[TEST_LEAF.as_ref()], b"key", None) - .unwrap() - .unwrap(); + let elem = db.get(&[TEST_LEAF], b"key", None).unwrap().unwrap(); assert_eq!(elem, Element::new_item(b"ayy".to_vec())); // wipe the database db.grove_db.wipe().unwrap(); // retrieve key after wipe - let elem_result = db.get(&[TEST_LEAF.as_ref()], b"key", None).unwrap(); + let elem_result = db.get(&[TEST_LEAF], b"key", None).unwrap(); assert!(elem_result.is_err()); assert!(matches!( elem_result, diff --git a/grovedb/src/tests/query_tests.rs b/grovedb/src/tests/query_tests.rs index 0bb6a1f0..0092b444 100644 --- a/grovedb/src/tests/query_tests.rs +++ b/grovedb/src/tests/query_tests.rs @@ -46,7 +46,7 @@ use crate::{ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -70,7 +70,7 @@ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { for j in 100u32..150 { let mut j_vec = i_vec.clone(); - j_vec.append(&mut (j as u32).to_be_bytes().to_vec()); + j_vec.append(&mut j.to_be_bytes().to_vec()); db.insert( [TEST_LEAF, i_vec.as_slice(), b"\0"].as_ref(), &j_vec.clone(), @@ -87,7 +87,7 @@ fn populate_tree_for_non_unique_range_subquery(db: &TempGroveDb) { fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 0u32..10 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -110,7 +110,7 @@ fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for j in 25u32..50 { - let j_vec = (j as u32).to_be_bytes().to_vec(); + let j_vec = j.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, i_vec.as_slice(), b"a"].as_ref(), &j_vec, @@ -134,7 +134,7 @@ fn populate_tree_for_non_unique_double_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for k in 100u32..110 { - let k_vec = (k as u32).to_be_bytes().to_vec(); + let k_vec = k.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, i_vec.as_slice(), b"a", &j_vec, b"\0"].as_ref(), &k_vec.clone(), @@ -173,7 +173,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, b"1"].as_ref(), &i_vec, @@ -198,7 +198,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { for j in 100u32..150 { let random_key = rand::thread_rng().gen::<[u8; 32]>(); let mut j_vec = i_vec.clone(); - j_vec.append(&mut (j as u32).to_be_bytes().to_vec()); + j_vec.append(&mut j.to_be_bytes().to_vec()); // We should insert every item to the tree holding items db.insert( @@ -231,7 +231,7 @@ fn populate_tree_by_reference_for_non_unique_range_subquery(db: &TempGroveDb) { fn populate_tree_for_unique_range_subquery(db: &TempGroveDb) { // Insert a couple of subtrees first for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF].as_ref(), &i_vec, @@ -278,7 +278,7 @@ fn populate_tree_by_reference_for_unique_range_subquery(db: &TempGroveDb) { .expect("successful subtree insert"); for i in 1985u32..2000 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, b"1"].as_ref(), &i_vec, @@ -333,7 +333,7 @@ fn populate_tree_for_unique_range_subquery_with_non_unique_null_values(db: &mut .expect("successful subtree insert"); // Insert a couple of subtrees first for i in 100u32..200 { - let i_vec = (i as u32).to_be_bytes().to_vec(); + let i_vec = i.to_be_bytes().to_vec(); db.insert( [TEST_LEAF, &[], b"\0"].as_ref(), &i_vec, diff --git a/grovedb/src/versioning.rs b/grovedb/src/versioning.rs index a041b3d8..5a724afc 100644 --- a/grovedb/src/versioning.rs +++ b/grovedb/src/versioning.rs @@ -52,7 +52,7 @@ mod tests { assert_eq!(new_data, [244, 3, 1, 2, 3]); // show that read_version doesn't consume - assert_eq!(read_proof_version(&mut new_data.as_slice()).unwrap(), 500); + assert_eq!(read_proof_version(new_data.as_slice()).unwrap(), 500); assert_eq!(new_data, [244, 3, 1, 2, 3]); // show that we consume the version number and return the remaining vector diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 51521ced..1b014365 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -26,35 +26,28 @@ // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. -use std::{ - cmp::max, - collections::{LinkedList, VecDeque}, - path::Iter, -}; +use std::collections::VecDeque; use ed::Encode; -use grovedb_costs::{CostResult, CostsExt, OperationCost}; use grovedb_storage::StorageContext; -use integer_encoding::VarInt; use crate::{ error::Error, proofs::{ chunk::{ chunk_op::ChunkOp, - error::{ChunkError, ChunkError::InternalError}, + error::ChunkError, util::{ chunk_height, chunk_id_from_traversal_instruction, chunk_id_from_traversal_instruction_with_recovery, generate_traversal_instruction, generate_traversal_instruction_as_string, number_of_chunks, - string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, + string_as_traversal_instruction, }, }, Node, Op, }, - tree::RefWalker, Error::ChunkingError, - Merk, PanicSource, + Merk, }; /// ChunkProof for replication of a single subtree @@ -221,7 +214,7 @@ where // generate as many subtree chunks as we can // until we have exhausted all or hit a limit restriction - while current_index != None { + while current_index.is_some() { let current_index_traversal_instruction = generate_traversal_instruction( self.height, current_index.expect("confirmed is Some"), @@ -230,7 +223,7 @@ where // factor in the ChunkId encoding length in limit calculations let temp_limit = if let Some(limit) = current_limit { - let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|e| { + let chunk_id_op_encoding_len = chunk_id_op.encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("cannot get encoding length")) })?; if limit >= chunk_id_op_encoding_len { @@ -297,7 +290,7 @@ where // we first get the chunk at the given index // TODO: use the returned chunk index rather than tracking let (chunk_ops, _) = self.chunk_with_index(chunk_index)?; - chunk_byte_length = chunk_ops.encoding_length().map_err(|e| { + chunk_byte_length = chunk_ops.encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) })?; chunk_index += 1; @@ -322,10 +315,10 @@ where let (replacement_chunk, _) = self.chunk_with_index(chunk_index)?; // calculate the new total - let new_total = replacement_chunk.encoding_length().map_err(|e| { + let new_total = replacement_chunk.encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) })? + chunk_byte_length - - chunk[iteration_index].encoding_length().map_err(|e| { + - chunk[iteration_index].encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) })?; @@ -368,7 +361,7 @@ where /// Returns the total number of chunks for the underlying Merk tree. pub fn len(&self) -> usize { - number_of_chunks(self.height as usize) + number_of_chunks(self.height) } /// Gets the next chunk based on the `ChunkProducer`'s internal index state. @@ -390,7 +383,7 @@ where chunk_index .map(|index| generate_traversal_instruction_as_string(self.height, index)) .transpose() - .and_then(|v| Ok((chunk, v))) + .map(|v| (chunk, v)) }), ) } @@ -432,14 +425,19 @@ mod test { use super::*; use crate::{ proofs::{ - chunk::chunk::{ - tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, - LEFT, RIGHT, + chunk::{ + chunk::{ + tests::{traverse_get_kv_feature_type, traverse_get_node_hash}, + LEFT, RIGHT, + }, + util::traversal_instruction_as_string, }, tree::execute, Tree, }, test_utils::{make_batch_seq, TempMerk}, + tree::RefWalker, + PanicSource, }; #[derive(Default)] @@ -455,13 +453,13 @@ mod test { impl NodeCounts { fn sum(&self) -> usize { - return self.hash + self.hash + self.kv_hash + self.kv + self.kv_value_hash + self.kv_digest + self.kv_ref_value_hash - + self.kv_value_hash_feature_type; + + self.kv_value_hash_feature_type } } @@ -548,7 +546,7 @@ mod test { } // returns None after max - assert_eq!(chunks.next().is_none(), true); + assert!(chunks.next().is_none()); } #[test] @@ -582,8 +580,8 @@ mod test { assert_eq!(chunk_producer.len(), 5); // assert bounds - assert_eq!(chunk_producer.chunk_with_index(0).is_err(), true); - assert_eq!(chunk_producer.chunk_with_index(6).is_err(), true); + assert!(chunk_producer.chunk_with_index(0).is_err()); + assert!(chunk_producer.chunk_with_index(6).is_err()); // first chunk // expected: @@ -750,7 +748,7 @@ mod test { // generate multi chunk with no limit let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut chunk_result = chunk_producer + let chunk_result = chunk_producer .subtree_multi_chunk_with_limit(1, None) .expect("should generate chunk with limit"); @@ -798,7 +796,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(2)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 453); assert_eq!(chunk.len(), 13); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -818,7 +816,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(3)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 737); assert_eq!(chunk.len(), 17); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -838,7 +836,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(4)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1021); assert_eq!(chunk.len(), 21); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -858,7 +856,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, Some(5)); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1305); assert_eq!(chunk.len(), 25); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -878,7 +876,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(0)); assert_eq!(chunk_result.next_index, None); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1589); assert_eq!(chunk.len(), 29); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) @@ -898,7 +896,7 @@ mod test { assert_eq!(chunk_result.remaining_limit, Some(18446744073709550026)); assert_eq!(chunk_result.next_index, None); - let mut chunk = chunk_result.chunk; + let chunk = chunk_result.chunk; assert_eq!(chunk.encoding_length().unwrap(), 1589); assert_eq!(chunk.len(), 29); // op count let tree = execute(chunk.into_iter().map(Ok), false, |_| Ok(())) diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 52e28ba0..cea9b2b6 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -44,7 +44,6 @@ pub mod source; use std::{ cell::Cell, - cmp::Ordering, collections::{BTreeMap, BTreeSet, LinkedList}, fmt, }; @@ -60,18 +59,14 @@ use source::MerkSource; use crate::{ error::Error, - merk::{ - defaults::{MAX_UPDATE_VALUE_BASED_ON_COSTS_TIMES, ROOT_KEY_KEY}, - options::MerkOptions, - }, + merk::{defaults::ROOT_KEY_KEY, options::MerkOptions}, proofs::{ chunk::{ chunk::{LEFT, RIGHT}, util::traversal_instruction_as_string, }, - encode_into, query::query_item::QueryItem, - Op as ProofOp, Query, + Query, }, tree::{ kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, @@ -290,10 +285,7 @@ where /// Returns the height of the Merk tree pub fn height(&self) -> Option { - self.use_tree(|tree| match tree { - None => None, - Some(tree) => Some(tree.height()), - }) + self.use_tree(|tree| tree.map(|tree| tree.height())) } /// Returns the root non-prefixed key of the tree. If the tree is empty, @@ -578,7 +570,7 @@ where ); self.tree.set(tree); - return (bad_link_map, parent_keys); + (bad_link_map, parent_keys) } fn verify_tree( @@ -625,25 +617,21 @@ where Link::Reference { hash, key, sum, .. } => { (hash.to_owned(), key.to_owned(), sum.to_owned()) } - Link::Modified { - tree, - child_heights, - .. - } => ( + Link::Modified { tree, .. } => ( tree.hash().unwrap(), tree.key().to_vec(), tree.sum().unwrap(), ), Link::Loaded { hash, - child_heights, + child_heights: _, sum, tree, } => (hash.to_owned(), tree.key().to_vec(), sum.to_owned()), _ => todo!(), }; - let instruction_id = traversal_instruction_as_string(&traversal_instruction); + let instruction_id = traversal_instruction_as_string(traversal_instruction); let node = TreeNode::get( &self.storage, key, @@ -652,27 +640,27 @@ where .unwrap(); if node.is_err() { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } let node = node.unwrap(); if node.is_none() { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } let node = node.unwrap(); if &node.hash().unwrap() != &hash { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } if node.sum().unwrap() != sum { - bad_link_map.insert(instruction_id.clone(), hash.clone()); + bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } @@ -702,7 +690,7 @@ fn fetch_node<'db>( #[cfg(test)] mod test { - use grovedb_costs::OperationCost; + use grovedb_path::SubtreePath; use grovedb_storage::{ rocksdb_storage::{PrefixedRocksDbStorageContext, RocksDbStorage}, diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index a4c747d1..98e70672 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -41,18 +41,15 @@ use crate::{ chunk::{LEFT, RIGHT}, chunk_op::ChunkOp, error::{ChunkError, ChunkError::InternalError}, - util::{ - string_as_traversal_instruction, traversal_instruction_as_string, write_to_vec, - }, + util::{string_as_traversal_instruction, traversal_instruction_as_string}, }, tree::{execute, Child, Tree as ProofTree}, Node, Op, }, tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, CryptoHash, Error, - Error::{CostsError, EdError, StorageError}, + Error::{CostsError, StorageError}, Link, Merk, - TreeFeatureType::{BasicMerkNode, SummedMerkNode}, }; /// Restorer handles verification of chunks and replication of Merk trees. @@ -203,7 +200,7 @@ impl<'db, S: StorageContext<'db>> Restorer { let mut tree = TreeNode::new_with_value_hash( key.clone(), value.clone(), - value_hash.clone(), + *value_hash, *feature_type, ) .unwrap(); @@ -222,8 +219,7 @@ impl<'db, S: StorageContext<'db>> Restorer { // we get the chunk id and add the hash to restorer state let chunk_id = traversal_instruction_as_string(node_traversal_instruction); new_chunk_ids.push(chunk_id.clone()); - self.chunk_id_to_root_hash - .insert(chunk_id.clone(), hash.clone()); + self.chunk_id_to_root_hash.insert(chunk_id.clone(), *hash); // TODO: handle unwrap self.parent_keys .insert(chunk_id, parent_key.unwrap().to_owned()); @@ -282,7 +278,7 @@ impl<'db, S: StorageContext<'db>> Restorer { let updated_key = chunk_tree.key(); let updated_sum = chunk_tree.sum(); - if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(is_left.clone()) { + if let Some(Link::Reference { key, sum, .. }) = parent.link_mut(*is_left) { *key = updated_key.to_vec(); *sum = updated_sum; } @@ -343,13 +339,13 @@ impl<'db, S: StorageContext<'db>> Restorer { .put(walker.tree().key(), &bytes, None, None) .map_err(CostsError)?; - return Ok((left_height, right_height)); + Ok((left_height, right_height)) } let mut batch = self.merk.storage.new_batch(); // TODO: deal with unwrap let mut tree = self.merk.tree.take().unwrap(); - let mut walker = RefWalker::new(&mut tree, self.merk.source()); + let walker = RefWalker::new(&mut tree, self.merk.source()); rewrite_child_heights(walker, &mut batch)?; @@ -379,7 +375,7 @@ impl<'db, S: StorageContext<'db>> Restorer { /// processing all chunks. pub fn finalize(mut self) -> Result, Error> { // ensure all chunks have been processed - if self.chunk_id_to_root_hash.len() != 0 || self.parent_keys.len() != 0 { + if !self.chunk_id_to_root_hash.is_empty() || !self.parent_keys.is_empty() { return Err(Error::ChunkRestoringError( ChunkError::RestorationNotComplete, )); @@ -397,7 +393,7 @@ impl<'db, S: StorageContext<'db>> Restorer { .load_base_root(None::<&fn(&[u8]) -> Option>); } - if self.merk.verify().0.len() != 0 { + if !self.merk.verify().0.is_empty() { return Err(Error::ChunkRestoringError(ChunkError::InternalError( "restored tree invalid", ))); @@ -411,7 +407,7 @@ impl<'db, S: StorageContext<'db>> Restorer { fn verify_height(&self) -> Result<(), Error> { let tree = self.merk.tree.take(); let height_verification_result = if let Some(tree) = &tree { - self.verify_tree_height(&tree, tree.height()) + self.verify_tree_height(tree, tree.height()) } else { Ok(()) }; @@ -450,7 +446,7 @@ impl<'db, S: StorageContext<'db>> Restorer { if left_tree.is_none() { let left_tree = TreeNode::get( &self.merk.storage, - link.key().to_vec(), + link.key(), None::<&fn(&[u8]) -> Option>, ) .unwrap()? @@ -466,7 +462,7 @@ impl<'db, S: StorageContext<'db>> Restorer { if right_tree.is_none() { let right_tree = TreeNode::get( &self.merk.storage, - link.key().to_vec(), + link.key(), None::<&fn(&[u8]) -> Option>, ) .unwrap()? @@ -494,18 +490,13 @@ mod tests { use super::*; use crate::{ - execute_proof, merk::chunks::ChunkProducer, - proofs::{ - chunk::{ - chunk::tests::traverse_get_node_hash, chunk_op::ChunkOp::Chunk, - error::ChunkError::InvalidChunkProof, - }, - Query, + proofs::chunk::{ + chunk::tests::traverse_get_node_hash, error::ChunkError::InvalidChunkProof, }, test_utils::{make_batch_seq, TempMerk}, Error::ChunkRestoringError, - KVIterator, Merk, PanicSource, + Merk, PanicSource, }; #[test] @@ -604,7 +595,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -682,7 +673,7 @@ mod tests { // apply second chunk let chunk_process_result = restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, LEFT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); + assert!(chunk_process_result.is_err()); assert!(matches!( chunk_process_result, Err(Error::ChunkRestoringError(ChunkError::UnexpectedChunk)) @@ -693,7 +684,7 @@ mod tests { let (chunk, _) = chunk_producer.chunk_with_index(4).unwrap(); let chunk_process_result = restorer.process_chunk(traversal_instruction_as_string(&vec![LEFT, RIGHT]), chunk); - assert_eq!(chunk_process_result.is_err(), true); + assert!(chunk_process_result.is_err()); assert!(matches!( chunk_process_result, Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( @@ -735,7 +726,7 @@ mod tests { assert_eq!(restorer.chunk_id_to_root_hash.get("01"), None); // finalize merk - let mut restored_merk = restorer.finalize().expect("should finalized successfully"); + let restored_merk = restorer.finalize().expect("should finalized successfully"); assert_eq!( restored_merk.root_hash().unwrap(), @@ -802,7 +793,7 @@ mod tests { // build the restoration merk let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -873,7 +864,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -938,7 +929,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1010,7 +1001,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1096,7 +1087,7 @@ mod tests { // build the restoration merk let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1124,7 +1115,7 @@ mod tests { let mut chunk_id_opt = Some("".to_string()); while let Some(chunk_id) = chunk_id_opt { let multi_chunk = chunk_producer - .multi_chunk_with_limit(chunk_id.as_str(), limit.clone()) + .multi_chunk_with_limit(chunk_id.as_str(), limit) .expect("should get chunk"); restorer .process_multi_chunk(multi_chunk.chunk) @@ -1176,7 +1167,7 @@ mod tests { let storage = TempStorage::new(); let tx = storage.start_transaction(); - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1221,7 +1212,7 @@ mod tests { // drop the restorer and the restoration merk drop(restorer); // open the restoration merk again and build a restorer from it - let mut restoration_merk = Merk::open_base( + let restoration_merk = Merk::open_base( storage .get_immediate_storage_context(SubtreePath::empty(), &tx) .unwrap(), @@ -1238,7 +1229,7 @@ mod tests { // recover state let recovery_attempt = restorer.attempt_state_recovery(); - assert_eq!(recovery_attempt.is_ok(), true); + assert!(recovery_attempt.is_ok()); assert_eq!(restorer.chunk_id_to_root_hash.len(), 4); assert_eq!(restorer.parent_keys.len(), 4); diff --git a/merk/src/proofs/chunk.rs b/merk/src/proofs/chunk.rs index 2ea820ef..22334688 100644 --- a/merk/src/proofs/chunk.rs +++ b/merk/src/proofs/chunk.rs @@ -28,29 +28,6 @@ //! Chunk proofs -#[cfg(feature = "full")] -use grovedb_costs::{ - cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, -}; -#[cfg(feature = "full")] -use grovedb_storage::RawIterator; -#[cfg(feature = "full")] -use { - super::tree::{execute, Tree as ProofTree}, - crate::tree::CryptoHash, - crate::tree::TreeNode, -}; - -#[cfg(feature = "full")] -use super::{Node, Op}; -#[cfg(feature = "full")] -use crate::{ - error::Error, - tree::{Fetch, RefWalker}, - Error::EdError, - TreeFeatureType::BasicMerkNode, -}; - mod binary_range; #[cfg(feature = "full")] pub mod chunk; diff --git a/merk/src/proofs/chunk/binary_range.rs b/merk/src/proofs/chunk/binary_range.rs index 01a20531..2acaa728 100644 --- a/merk/src/proofs/chunk/binary_range.rs +++ b/merk/src/proofs/chunk/binary_range.rs @@ -55,7 +55,7 @@ impl BinaryRange { )); } - return Ok(Self { start, end }); + Ok(Self { start, end }) } /// Returns the len of the current range @@ -103,7 +103,7 @@ impl BinaryRange { let half_size = self.len() / 2; let second_half_start = self.start + half_size; - return Ok(if left { + Ok(if left { Self { start: self.start, end: second_half_start - 1, @@ -113,7 +113,7 @@ impl BinaryRange { start: second_half_start, end: self.end, } - }); + }) } /// Returns a new range that increments the start value @@ -144,36 +144,36 @@ mod test { #[test] fn cannot_create_invalid_range() { let invalid_range = BinaryRange::new(5, 3); - assert_eq!(invalid_range.is_err(), true); + assert!(invalid_range.is_err()); } #[test] fn can_get_range_len() { let range = BinaryRange::new(2, 5).expect("should create range"); assert_eq!(range.len(), 4); - assert_eq!(range.odd(), false); + assert!(!range.odd()); let range = BinaryRange::new(2, 2).expect("should create range"); assert_eq!(range.len(), 1); - assert_eq!(range.odd(), true); + assert!(range.odd()); } #[test] fn can_determine_correct_half() { let range = BinaryRange::new(3, 7).expect("should create range"); assert_eq!(range.len(), 5); - assert_eq!(range.odd(), true); + assert!(range.odd()); // cannot determine half for value outside a range - assert_eq!(range.which_half(1).is_none(), true); - assert_eq!(range.which_half(7).is_none(), true); + assert!(range.which_half(1).is_none()); + assert!(range.which_half(7).is_none()); // cannot determine half when range is odd - assert_eq!(range.which_half(3).is_none(), true); + assert!(range.which_half(3).is_none()); let range = BinaryRange::new(3, 6).expect("should create range"); assert_eq!(range.len(), 4); - assert_eq!(range.odd(), false); + assert!(!range.odd()); assert_eq!(range.which_half(3), Some(LEFT)); assert_eq!(range.which_half(4), Some(LEFT)); @@ -207,19 +207,19 @@ mod test { // should not be allowed to advance the range anymore let advance_result = range.advance_range_start(); - assert_eq!(advance_result.is_err(), true); + assert!(advance_result.is_err()); } #[test] fn can_break_range_into_halves() { let range = BinaryRange::new(2, 10).expect("should create range"); assert_eq!(range.len(), 9); - assert_eq!(range.odd(), true); - assert_eq!(range.get_half(LEFT).is_err(), true); + assert!(range.odd()); + assert!(range.get_half(LEFT).is_err()); let range = BinaryRange::new(2, 11).expect("should create range"); assert_eq!(range.len(), 10); - assert_eq!(range.odd(), false); + assert!(!range.odd()); let left_range = range.get_half(LEFT).expect("should get sub range"); assert_eq!(left_range.start, 2); @@ -230,7 +230,7 @@ mod test { assert_eq!(right_range.end, 11); // right_range is false, advance to make even - let (right_range, prev) = right_range.advance_range_start().expect("should advance"); + let (right_range, _prev) = right_range.advance_range_start().expect("should advance"); let right_left_range = right_range.get_half(LEFT).expect("should get sub range"); assert_eq!(right_left_range.len(), 2); assert_eq!(right_left_range.start, 8); diff --git a/merk/src/proofs/chunk/chunk.rs b/merk/src/proofs/chunk/chunk.rs index 5258dffa..95d888ec 100644 --- a/merk/src/proofs/chunk/chunk.rs +++ b/merk/src/proofs/chunk/chunk.rs @@ -103,7 +103,7 @@ where depth: usize, ) -> Result, Error> { // base case - if instructions.len() == 0 { + if instructions.is_empty() { // we are at the desired node return self.create_chunk(depth); } @@ -213,11 +213,10 @@ pub mod tests { chunk::chunk::{verify_height_proof, LEFT, RIGHT}, tree::execute, Node, Op, - Op::Parent, }, - test_utils::{make_tree_seq, make_tree_seq_with_start_key}, + test_utils::make_tree_seq_with_start_key, tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, - CryptoHash, PanicSource, TreeFeatureType, + PanicSource, TreeFeatureType, }; fn build_tree_10_nodes() -> TreeNode { @@ -234,39 +233,39 @@ pub mod tests { /// Traverses a tree to a certain node and returns the node hash of that /// node pub fn traverse_get_node_hash( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], ) -> Node { - return traverse_and_apply(walker, traverse_instructions, |walker| { + traverse_and_apply(walker, traverse_instructions, |walker| { walker.to_hash_node().unwrap() - }); + }) } /// Traverses a tree to a certain node and returns the kv_feature_type of /// that node pub fn traverse_get_kv_feature_type( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], ) -> Node { - return traverse_and_apply(walker, traverse_instructions, |walker| { + traverse_and_apply(walker, traverse_instructions, |walker| { walker.to_kv_value_hash_feature_type_node() - }); + }) } /// Traverses a tree to a certain node and returns the kv_hash of /// that node pub fn traverse_get_kv_hash( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], ) -> Node { - return traverse_and_apply(walker, traverse_instructions, |walker| { + traverse_and_apply(walker, traverse_instructions, |walker| { walker.to_kvhash_node() - }); + }) } /// Traverses a tree to a certain node and returns the result of applying /// some arbitrary function pub fn traverse_and_apply( - mut walker: &mut RefWalker, + walker: &mut RefWalker, traverse_instructions: &[bool], apply_fn: T, ) -> Node @@ -285,7 +284,7 @@ pub mod tests { .unwrap() .unwrap() .unwrap(); - return traverse_and_apply(&mut child, &traverse_instructions[1..], apply_fn); + traverse_and_apply(&mut child, &traverse_instructions[1..], apply_fn) } #[test] diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 9402d3d5..55f5751b 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -31,7 +31,7 @@ use std::io::{Read, Write}; use ed::{Decode, Encode}; use integer_encoding::{VarInt, VarIntReader}; -use crate::proofs::{chunk::chunk_op::ChunkOp::Chunk, Op}; +use crate::proofs::Op; /// Represents the chunk generated from a given starting chunk id #[derive(PartialEq, Debug)] diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index 986b24c7..530e00e7 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -65,7 +65,7 @@ fn chunk_height_per_layer(height: usize) -> Vec { // reduce the three_count by 1 // so the remainder becomes 3 + 1 // which is equivalent to 2 + 2 - three_count = three_count - 1; + three_count -= 1; two_count += 2; } 2 => { @@ -100,8 +100,8 @@ pub fn chunk_layer(height: usize, chunk_id: usize) -> Result { // and remaining depth points to a chunk debug_assert!(remaining_depth > layer_heights[layer - 1]); - remaining_depth = remaining_depth - layer_heights[layer - 1]; - layer = layer + 1; + remaining_depth -= layer_heights[layer - 1]; + layer += 1; } Ok(layer - 1) @@ -162,7 +162,7 @@ fn number_of_chunks_internal(layer_heights: Vec) -> usize { chunk_counts_per_layer.push(current_layer_chunk_count); } - return chunk_counts_per_layer.into_iter().sum(); + chunk_counts_per_layer.into_iter().sum() } /// Calculates the maximum number of exit nodes for a tree of height h. @@ -193,7 +193,7 @@ pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result< // from the initial chunk (1) we have an even number of // exit nodes, and they have even numbers of exit nodes ... // so total_chunk_count = 1 + some_even_number = odd - debug_assert_eq!(chunk_range.odd(), true); + debug_assert!(chunk_range.odd()); // bisect and reduce the chunk range until we get to the desired chunk // we keep track of every left right decision we make @@ -223,7 +223,7 @@ pub fn generate_traversal_instruction(height: usize, chunk_id: usize) -> Result< // chunk range len is exactly 1 // this must be the desired chunk id // return instructions that got us here - return Ok(instructions); + Ok(instructions) } /// Determine the chunk id given the traversal instruction and the max height of @@ -296,12 +296,12 @@ pub fn chunk_id_from_traversal_instruction( if chunk_count % 2 != 0 { // remove the current chunk from the chunk count - chunk_count = chunk_count - 1; + chunk_count -= 1; } - chunk_count = chunk_count / exit_node_count(layer_height); + chunk_count /= exit_node_count(layer_height); - current_chunk_id = current_chunk_id + offset_multiplier as usize * chunk_count + 1; + current_chunk_id = current_chunk_id + offset_multiplier * chunk_count + 1; start_index = end_index; } @@ -324,7 +324,7 @@ pub fn chunk_id_from_traversal_instruction_with_recovery( height, ); } - return chunk_id_result; + chunk_id_result } /// Generate instruction for traversing to a given chunk in a binary tree, @@ -369,7 +369,6 @@ pub fn write_to_vec(dest: &mut W, value: &[u8]) -> Result<(), Error> { #[cfg(test)] mod test { - use byteorder::LE; use super::*; use crate::proofs::chunk::chunk::{LEFT, RIGHT}; @@ -534,8 +533,8 @@ mod test { assert_eq!(instruction, &[RIGHT, RIGHT]); // out of bound tests - assert_eq!(generate_traversal_instruction(4, 6).is_err(), true); - assert_eq!(generate_traversal_instruction(4, 0).is_err(), true); + assert!(generate_traversal_instruction(4, 6).is_err()); + assert!(generate_traversal_instruction(4, 0).is_err()); } #[test] @@ -586,7 +585,7 @@ mod test { string_as_traversal_instruction("001").unwrap(), vec![RIGHT, RIGHT, LEFT] ); - assert_eq!(string_as_traversal_instruction("002").is_err(), true); + assert!(string_as_traversal_instruction("002").is_err()); assert_eq!( string_as_traversal_instruction("").unwrap(), Vec::::new() @@ -675,10 +674,7 @@ mod test { // function with recovery we expect this to backtrack to the last chunk // boundary e.g. [left] should backtrack to [] // [left, left, right, left] should backtrack to [left, left, right] - assert_eq!( - chunk_id_from_traversal_instruction(&[LEFT], 5).is_err(), - true - ); + assert!(chunk_id_from_traversal_instruction(&[LEFT], 5).is_err()); assert_eq!( chunk_id_from_traversal_instruction_with_recovery(&[LEFT], 5).unwrap(), 1 diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index b91bd68f..b3db0d77 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -66,7 +66,7 @@ impl Child { Node::KV(key, _) | Node::KVValueHash(key, ..) => (key.as_slice(), None), Node::KVValueHashFeatureType(key, _, _, feature_type) => { let sum_value = match feature_type { - SummedMerkNode(sum) => Some(sum.clone()), + SummedMerkNode(sum) => Some(*sum), _ => None, }; (key.as_slice(), sum_value) @@ -358,8 +358,7 @@ impl<'a> Iterator for LayerIter<'a> { type Item = &'a Tree; fn next(&mut self) -> Option { - while !self.stack.is_empty() { - let (item, item_depth) = self.stack.pop().expect("confirmed not None"); + while let Some((item, item_depth)) = self.stack.pop() { if item_depth != self.depth { if let Some(right_child) = item.child(false) { self.stack.push((&right_child.tree, item_depth + 1)) @@ -372,7 +371,7 @@ impl<'a> Iterator for LayerIter<'a> { } } - return None; + None } } @@ -665,7 +664,7 @@ mod test { assert_eq!( left_link, Link::Reference { - hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.left.as_ref().map(|node| node.hash).unwrap(), sum: None, child_heights: (0, 0), key: vec![1] @@ -675,7 +674,7 @@ mod test { assert_eq!( right_link, Link::Reference { - hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.right.as_ref().map(|node| node.hash).unwrap(), sum: None, child_heights: (0, 0), key: vec![3] @@ -714,7 +713,7 @@ mod test { assert_eq!( left_link, Link::Reference { - hash: tree.left.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.left.as_ref().map(|node| node.hash).unwrap(), sum: Some(3), child_heights: (0, 0), key: vec![1] @@ -724,7 +723,7 @@ mod test { assert_eq!( right_link, Link::Reference { - hash: tree.right.as_ref().map(|node| node.hash).clone().unwrap(), + hash: tree.right.as_ref().map(|node| node.hash).unwrap(), sum: Some(1), child_heights: (0, 0), key: vec![3] From bde2deaecfb4a39d74bf3582a22af3db74d03b5a Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 16 Apr 2024 11:44:11 +0300 Subject: [PATCH 08/30] feat: base state sync --- grovedb/src/lib.rs | 344 ++++++++++++++++++++++++++- grovedb/src/operations/auxiliary.rs | 53 ++++- grovedb/src/operations/delete/mod.rs | 46 ---- merk/src/lib.rs | 2 +- merk/src/merk/chunks.rs | 2 +- merk/src/merk/mod.rs | 46 ++-- merk/src/merk/restore.rs | 43 +++- tutorials/Cargo.toml | 9 +- tutorials/src/bin/replication.rs | 221 +++++++++++++++++ 9 files changed, 679 insertions(+), 87 deletions(-) create mode 100644 tutorials/src/bin/replication.rs diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index aebbf25e..177706e2 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -168,7 +168,10 @@ mod versioning; mod visualize; #[cfg(feature = "full")] -use std::{collections::HashMap, option::Option::None, path::Path}; +use std::{collections::HashMap, option::Option::None, path::Path, fmt}; +use std::collections::{BTreeMap, BTreeSet, LinkedList, VecDeque}; +use std::marker::PhantomData; +use itertools::Chunk; #[cfg(any(feature = "full", feature = "verify"))] use element::helpers; @@ -180,6 +183,7 @@ pub use element::ElementFlags; use grovedb_costs::{ cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, }; +use grovedb_costs::storage_cost::key_value_cost::KeyValueStorageCost; #[cfg(feature = "estimated_costs")] pub use grovedb_merk::estimated_costs::{ average_case_costs::{ @@ -199,6 +203,12 @@ use grovedb_merk::{ tree::{combine_hash, value_hash}, BatchEntry, CryptoHash, KVIterator, Merk, }; +use grovedb_merk::{ChunkProducer, Restorer, TreeFeatureType}; +use grovedb_merk::Error::ChunkingError; +use grovedb_merk::proofs::{Node, Op}; +use grovedb_merk::proofs::chunk::error::ChunkError; +use grovedb_merk::proofs::chunk::util::{generate_traversal_instruction_as_string, number_of_chunks}; +use grovedb_merk::tree::kv_digest_to_kv_hash; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext; @@ -233,6 +243,82 @@ pub struct GroveDb { db: RocksDbStorage, } +pub struct s_db_snapshot { + pub root_hash: CryptoHash, + pub data: Vec<(String, Vec)> +} + +impl s_db_snapshot { + pub fn new() -> s_db_snapshot { + s_db_snapshot { + root_hash: CryptoHash::default(), + data: Vec::new(), + } + } +} + +impl fmt::Debug for s_db_snapshot { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "root_hash:{:?}\n", hex::encode(self.root_hash)); + for (global_chunk_id, _) in self.data.iter() { + write!(f, " global_chunk_id:{:?}\n", global_chunk_id); + } + Ok(()) + } +} + +pub struct s_db_snapshot_sorted { + pub root_hash: CryptoHash, + pub data: BTreeMap)>> +} + +impl s_db_snapshot_sorted { + pub fn new() -> s_db_snapshot_sorted { + s_db_snapshot_sorted { + root_hash: CryptoHash::default(), + data: BTreeMap::new(), + } + } +} + +impl fmt::Debug for s_db_snapshot_sorted { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "root_hash:{:?}\n", hex::encode(self.root_hash)); + for (prefix, chunk_vec) in self.data.iter() { + write!(f, " prefix:{:?}\n", prefix); + for (chunk_id, _) in chunk_vec.iter() { + write!(f, " chunk_id:{:?}\n", chunk_id); + } + } + Ok(()) + } +} + +pub struct s_subtrees_metadata { + pub data: BTreeMap>, CryptoHash, CryptoHash)> +} + +impl s_subtrees_metadata { + pub fn new() -> s_subtrees_metadata { + s_subtrees_metadata { + data: BTreeMap::new(), + } + } +} + +impl fmt::Debug for s_subtrees_metadata { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (prefix, metadata) in self.data.iter() { + let metadata_path = &metadata.0; + let metadata_path_str = s_util_path_to_string(&metadata_path); + let metadata_hash_0 = &metadata.1; + let metadata_hash_1 = &metadata.2; + write!(f, " prefix:{:?} -> path:{:?} ({:?}:{:?})\n", prefix, metadata_path_str, hex::encode(metadata_hash_0), hex::encode(metadata_hash_1)); + } + Ok(()) + } +} + /// Transaction #[cfg(feature = "full")] pub type Transaction<'db> = >::Transaction; @@ -1004,4 +1090,260 @@ impl GroveDb { } Ok(issues) } + + pub fn s_create_db_snapshot( + &self, + list_only_chunk_ids: bool, + ) -> Result { + let mut db_snapsot = s_db_snapshot::new(); + + db_snapsot.root_hash = self.root_hash(None).unwrap().unwrap(); + + let subtrees_root = self.find_subtrees(&SubtreePath::empty(), None).unwrap()?; + for subtree in subtrees_root.into_iter() { + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + let continue_storage_batch = StorageBatch::new(); + + let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); + let merk = self.open_batch_merk_at_path(&continue_storage_batch, path.into(), false).value.unwrap(); + + if (merk.is_empty_tree().unwrap()) { + continue; + } + + let mut chunk_producer = ChunkProducer::new(&merk).unwrap(); + + let mut chunk_id_opt = Some("".to_string()); + while let Some(chunk_id) = chunk_id_opt { + let (chunk, next_chunk_id) = chunk_producer.chunk(chunk_id.as_str()).unwrap(); + + let global_chunk_id = hex::encode(prefix) + &chunk_id; + if (list_only_chunk_ids) { + db_snapsot.data.push((global_chunk_id, vec![])); + } + else { + db_snapsot.data.push((global_chunk_id, chunk)); + } + + chunk_id_opt = next_chunk_id; + } + } + + Ok(db_snapsot) + } + + fn s_sort_db_snapshot( + &self, + snapshot: s_db_snapshot, + ) -> Result { + let mut db_snapsot_sorted = s_db_snapshot_sorted::new(); + db_snapsot_sorted.root_hash = snapshot.root_hash; + + let CHUNK_PREFIX_LENGTH: usize = 64; + + for chunk_entry in snapshot.data { + let global_chunk_id = chunk_entry.0; + let chunk_data = chunk_entry.1; + + if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { + return Err(Error::CorruptedData( + "expected global chunk id of at least 64 length".to_string(), + )); + } + + let chunk_prefix = global_chunk_id.chars().take(CHUNK_PREFIX_LENGTH).collect::(); + let chunk_id = global_chunk_id.chars().skip(CHUNK_PREFIX_LENGTH).collect::(); + + db_snapsot_sorted.data.entry(chunk_prefix).or_insert(Vec::new()).push((chunk_id, chunk_data.to_vec())); + } + + for (_key, vec) in db_snapsot_sorted.data.iter_mut() { + vec.sort_by(|a, b| a.0.len().cmp(&b.0.len())); + } + + Ok(db_snapsot_sorted) + } + + fn s_get_subtrees_metadata>( + &self, + path: &SubtreePath, + ) -> Result { + let mut subtrees_metadata = crate::s_subtrees_metadata::new(); + + let subtrees_root = self.find_subtrees(&SubtreePath::empty(), None).unwrap().unwrap(); + for subtree in subtrees_root.into_iter() { + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); + + let current_path = SubtreePath::from(path); + + let parent_path_opt = current_path.derive_parent(); + if (parent_path_opt.is_some()) { + let parent_path = parent_path_opt.unwrap().0; + let continue_storage_batch = StorageBatch::new(); + let parent_merk = self.open_batch_merk_at_path(&continue_storage_batch, parent_path, false).value.unwrap(); + let parent_key = subtree.last().unwrap(); + let (elem_value, elem_value_hash) = parent_merk + .get_value_and_value_hash( + parent_key, + true, + None::<&fn(&[u8]) -> Option>, + ) + .unwrap() + .expect("should get value hash") + .expect("value hash should be some"); + + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert(hex::encode(prefix), (current_path.to_vec(), actual_value_hash, elem_value_hash)); + } + else { + subtrees_metadata.data.insert(hex::encode(prefix), (current_path.to_vec(), CryptoHash::default(), CryptoHash::default())); + } + } + Ok(subtrees_metadata) + } + + pub fn s_fetch_chunk( + &self, + global_chunk_id: String + ) -> Result, Error> { + let CHUNK_PREFIX_LENGTH: usize = 64; + if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { + return Err(Error::CorruptedData( + "expected global chunk id of at least 64 length".to_string(), + )); + } + + let chunk_prefix = global_chunk_id.chars().take(CHUNK_PREFIX_LENGTH).collect::(); + let chunk_id = global_chunk_id.chars().skip(CHUNK_PREFIX_LENGTH).collect::(); + + let subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + + match subtrees_metadata.data.get(&chunk_prefix) { + Some(path_data) => { + let subtree = &path_data.0; + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + let continue_storage_batch = StorageBatch::new(); + let merk = self.open_batch_merk_at_path(&continue_storage_batch, path.into(), false).value?; + + if (merk.is_empty_tree().unwrap()) { + return Err(Error::CorruptedData( + "Empty merk".to_string(), + )); + } + + let mut chunk_producer = ChunkProducer::new(&merk).unwrap(); + let (chunk, _) = chunk_producer.chunk(chunk_id.as_str()).unwrap(); + Ok(chunk) + }, + None => { + return Err(Error::CorruptedData( + "Prefix not found".to_string(), + )); + } + } + } + + pub fn s_reconstruct_db( + &self, + snapshot: s_db_snapshot + ) -> Result<(), Error> { + let mut sorted_snapshot = self.s_sort_db_snapshot(snapshot)?; + + //Always start by empty prefix = root + if let Some(chunk_vec) = sorted_snapshot.data.remove(&hex::encode(CryptoHash::default())) { + let tx = self.start_transaction(); + let merk = self.open_merk_for_replication(SubtreePath::empty(), &tx).unwrap(); + let mut restorer = Restorer::new(merk, sorted_snapshot.root_hash, None); + for (chunk_id, chunk) in chunk_vec { + restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); + } + restorer.finalize().expect("should finalize"); + self.commit_transaction(tx); + } else { + return Err(Error::CorruptedData( + "No root prefix chunks found".to_string(), + )); + } + + let mut processed_prefixes :BTreeSet = BTreeSet::new(); + processed_prefixes.insert(hex::encode(CryptoHash::default())); + + let mut queue_prefixes_to_be_processed : VecDeque = VecDeque::new(); + + let mut subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + for prefix in subtrees_metadata.data.keys() { + if !processed_prefixes.contains(prefix) { + //println!("prefix:{:?} pending for processing", prefix); + queue_prefixes_to_be_processed.push_back(prefix.to_string()); + } + } + + while (!queue_prefixes_to_be_processed.is_empty()) { + while let Some(current_prefix) = queue_prefixes_to_be_processed.pop_front() { + let prefix_metadata = &subtrees_metadata.data[¤t_prefix]; + let s_path = &prefix_metadata.0; + let s_actual_value_hash = &prefix_metadata.1; + let s_elem_value_hash = &prefix_metadata.2; + println!(" about to process{:?} with ({:?}:{:?})", s_util_path_to_string(&s_path), hex::encode(s_actual_value_hash), hex::encode(s_elem_value_hash)); + + let subtree_path: Vec<&[u8]> = s_path.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + if let Some(chunk_vec) = sorted_snapshot.data.remove(¤t_prefix) { + let tx = self.start_transaction(); + if (chunk_vec.is_empty()) {println!("empty"); } + let merk = self.open_merk_for_replication(path.into(), &tx).unwrap(); + let mut restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + for (chunk_id, chunk) in chunk_vec { + restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); + } + restorer.finalize().expect("should finalize"); + self.commit_transaction(tx); + } else { + println!(" skipping empty {:?}", s_util_path_to_string(&s_path)); + } + + processed_prefixes.insert(current_prefix); + } + + subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + for prefix in subtrees_metadata.data.keys() { + if !processed_prefixes.contains(prefix) { + queue_prefixes_to_be_processed.push_back(prefix.to_string()); + } + } + } + + if (sorted_snapshot.data.len() > 0) { + return Err(Error::CorruptedData( + "Remaining chunks not processed".to_string(), + )); + } + + let incorrect_hashes = self.verify_grovedb(None)?; + if (incorrect_hashes.len() > 0) { + return Err(Error::CorruptedData( + "DB verification failed".to_string(), + )); + } + + Ok(()) + } +} + +pub fn s_util_path_to_string( + path: &Vec>, +) -> Vec { + let mut subtree_path_str: Vec = vec![]; + for subtree in path.to_vec() { + let string = std::str::from_utf8(&subtree).unwrap(); + subtree_path_str.push(string.parse().unwrap()); + } + subtree_path_str } diff --git a/grovedb/src/operations/auxiliary.rs b/grovedb/src/operations/auxiliary.rs index 0a29c510..6f3ec40a 100644 --- a/grovedb/src/operations/auxiliary.rs +++ b/grovedb/src/operations/auxiliary.rs @@ -30,15 +30,18 @@ #[cfg(feature = "full")] use grovedb_costs::{ - cost_return_on_error_no_add, storage_cost::key_value_cost::KeyValueStorageCost, CostResult, + cost_return_on_error_no_add, cost_return_on_error, storage_cost::key_value_cost::KeyValueStorageCost, CostResult, CostsExt, OperationCost, }; +use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::StorageContext; use grovedb_storage::{Storage, StorageBatch}; +use grovedb_merk::{proofs::Query, KVIterator}; #[cfg(feature = "full")] -use crate::{util::meta_storage_context_optional_tx, Error, GroveDb, TransactionArg}; +use crate::{util::meta_storage_context_optional_tx, Error, GroveDb, TransactionArg, Element}; +use crate::util::storage_context_optional_tx; #[cfg(feature = "full")] impl GroveDb { @@ -118,4 +121,50 @@ impl GroveDb { Ok(value).wrap_with_cost(cost) }) } + + // TODO: dumb traversal should not be tolerated + /// Finds keys which are trees for a given subtree recursively. + /// One element means a key of a `merk`, n > 1 elements mean relative path + /// for a deeply nested subtree. + pub fn find_subtrees>( + &self, + path: &SubtreePath, + transaction: TransactionArg, + ) -> CostResult>>, Error> { + let mut cost = OperationCost::default(); + + // TODO: remove conversion to vec; + // However, it's not easy for a reason: + // new keys to enqueue are taken from raw iterator which returns Vec; + // changing that to slice is hard as cursor should be moved for next iteration + // which requires exclusive (&mut) reference, also there is no guarantee that + // slice which points into storage internals will remain valid if raw + // iterator got altered so why that reference should be exclusive; + // + // Update: there are pinned views into RocksDB to return slices of data, perhaps + // there is something for iterators + + let mut queue: Vec>> = vec![path.to_vec()]; + let mut result: Vec>> = queue.clone(); + + while let Some(q) = queue.pop() { + let subtree_path: SubtreePath> = q.as_slice().into(); + // Get the correct subtree with q_ref as path + storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, { + let storage = storage.unwrap_add_cost(&mut cost); + let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost); + while let Some((key, value)) = + cost_return_on_error!(&mut cost, raw_iter.next_element()) + { + if value.is_tree() { + let mut sub_path = q.clone(); + sub_path.push(key.to_vec()); + queue.push(sub_path.clone()); + result.push(sub_path); + } + } + }) + } + Ok(result).wrap_with_cost(cost) + } } diff --git a/grovedb/src/operations/delete/mod.rs b/grovedb/src/operations/delete/mod.rs index 350dd2df..f3dcc6cd 100644 --- a/grovedb/src/operations/delete/mod.rs +++ b/grovedb/src/operations/delete/mod.rs @@ -879,52 +879,6 @@ impl GroveDb { Ok(true).wrap_with_cost(cost) } - - // TODO: dumb traversal should not be tolerated - /// Finds keys which are trees for a given subtree recursively. - /// One element means a key of a `merk`, n > 1 elements mean relative path - /// for a deeply nested subtree. - pub(crate) fn find_subtrees>( - &self, - path: &SubtreePath, - transaction: TransactionArg, - ) -> CostResult>>, Error> { - let mut cost = OperationCost::default(); - - // TODO: remove conversion to vec; - // However, it's not easy for a reason: - // new keys to enqueue are taken from raw iterator which returns Vec; - // changing that to slice is hard as cursor should be moved for next iteration - // which requires exclusive (&mut) reference, also there is no guarantee that - // slice which points into storage internals will remain valid if raw - // iterator got altered so why that reference should be exclusive; - // - // Update: there are pinned views into RocksDB to return slices of data, perhaps - // there is something for iterators - - let mut queue: Vec>> = vec![path.to_vec()]; - let mut result: Vec>> = queue.clone(); - - while let Some(q) = queue.pop() { - let subtree_path: SubtreePath> = q.as_slice().into(); - // Get the correct subtree with q_ref as path - storage_context_optional_tx!(self.db, subtree_path, None, transaction, storage, { - let storage = storage.unwrap_add_cost(&mut cost); - let mut raw_iter = Element::iterator(storage.raw_iter()).unwrap_add_cost(&mut cost); - while let Some((key, value)) = - cost_return_on_error!(&mut cost, raw_iter.next_element()) - { - if value.is_tree() { - let mut sub_path = q.clone(); - sub_path.push(key.to_vec()); - queue.push(sub_path.clone()); - result.push(sub_path); - } - } - }) - } - Ok(result).wrap_with_cost(cost) - } } #[cfg(feature = "full")] diff --git a/merk/src/lib.rs b/merk/src/lib.rs index adfde559..caf3837c 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -38,7 +38,7 @@ extern crate core; mod merk; #[cfg(feature = "full")] -pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions}; +pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions, restore::Restorer}; /// Provides a container type that allows temporarily taking ownership of a /// value. diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 1b014365..37c485cc 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -106,7 +106,7 @@ where S: StorageContext<'db>, { /// Creates a new `ChunkProducer` for the given `Merk` instance - pub(crate) fn new(merk: &'db Merk) -> Result { + pub fn new(merk: &'db Merk) -> Result { let tree_height = merk .height() .ok_or(Error::ChunkingError(ChunkError::EmptyTree( diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index cea9b2b6..3ccb787c 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -57,24 +57,16 @@ use grovedb_costs::{ use grovedb_storage::{self, Batch, RawIterator, StorageContext}; use source::MerkSource; -use crate::{ - error::Error, - merk::{defaults::ROOT_KEY_KEY, options::MerkOptions}, - proofs::{ - chunk::{ - chunk::{LEFT, RIGHT}, - util::traversal_instruction_as_string, - }, - query::query_item::QueryItem, - Query, +use crate::{error::Error, merk::{defaults::ROOT_KEY_KEY, options::MerkOptions}, proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + util::traversal_instruction_as_string, }, - tree::{ - kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, - }, - Error::{CostsError, EdError, StorageError}, - Link, - MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, -}; + query::query_item::QueryItem, + Query, +}, tree::{ + kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, +}, Error::{CostsError, EdError, StorageError}, Link, MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, BatchEntry}; /// Key update types pub struct KeyUpdates { @@ -553,7 +545,7 @@ where /// hash values are computed correctly, heights are accurate and links /// consistent with backing store. // TODO: define the return types - pub fn verify(&self) -> (BTreeMap, BTreeMap>) { + pub fn verify(&self, skip_sum_checks: bool) -> (BTreeMap, BTreeMap>) { let tree = self.tree.take(); let mut bad_link_map: BTreeMap = BTreeMap::new(); @@ -567,6 +559,7 @@ where &mut root_traversal_instruction, &mut bad_link_map, &mut parent_keys, + skip_sum_checks, ); self.tree.set(tree); @@ -579,6 +572,7 @@ where traversal_instruction: &mut Vec, bad_link_map: &mut BTreeMap, parent_keys: &mut BTreeMap>, + skip_sum_checks: bool, ) { if let Some(link) = tree.link(LEFT) { traversal_instruction.push(LEFT); @@ -588,6 +582,7 @@ where traversal_instruction, bad_link_map, parent_keys, + skip_sum_checks ); traversal_instruction.pop(); } @@ -600,6 +595,7 @@ where traversal_instruction, bad_link_map, parent_keys, + skip_sum_checks ); traversal_instruction.pop(); } @@ -612,6 +608,7 @@ where traversal_instruction: &mut Vec, bad_link_map: &mut BTreeMap, parent_keys: &mut BTreeMap>, + skip_sum_checks: bool, ) { let (hash, key, sum) = match link { Link::Reference { hash, key, sum, .. } => { @@ -659,15 +656,18 @@ where return; } - if node.sum().unwrap() != sum { - bad_link_map.insert(instruction_id.clone(), hash); - parent_keys.insert(instruction_id, parent_key.to_vec()); - return; + // Need to skip this when restoring a sum tree + if !skip_sum_checks { + if node.sum().unwrap() != sum { + bad_link_map.insert(instruction_id.clone(), hash); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; + } } // TODO: check child heights // all checks passed, recurse - self.verify_tree(&node, traversal_instruction, bad_link_map, parent_keys); + self.verify_tree(&node, traversal_instruction, bad_link_map, parent_keys, skip_sum_checks); } } diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 98e70672..9d5365ca 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -30,6 +30,7 @@ //! receiving chunk proofs. use std::collections::BTreeMap; +use grovedb_costs::cost_return_on_error; use grovedb_storage::{Batch, StorageContext}; @@ -51,6 +52,8 @@ use crate::{ Error::{CostsError, StorageError}, Link, Merk, }; +use crate::merk::committer::MerkCommitter; +use crate::tree::{combine_hash, NoopCommit}; /// Restorer handles verification of chunks and replication of Merk trees. /// Chunks can be processed randomly as long as their parent has been processed @@ -58,6 +61,7 @@ use crate::{ pub struct Restorer { merk: Merk, chunk_id_to_root_hash: BTreeMap, + parent_key_value_hash: Option, // this is used to keep track of parents whose links need to be rewritten parent_keys: BTreeMap>, } @@ -65,13 +69,13 @@ pub struct Restorer { impl<'db, S: StorageContext<'db>> Restorer { /// Initializes a new chunk restorer with the expected root hash for the /// first chunk - pub fn new(merk: Merk, expected_root_hash: CryptoHash) -> Self { + pub fn new(merk: Merk, expected_root_hash: CryptoHash, parent_key_value_hash: Option) -> Self { let mut chunk_id_to_root_hash = BTreeMap::new(); chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); - Self { merk, chunk_id_to_root_hash, + parent_key_value_hash, parent_keys: BTreeMap::new(), } } @@ -89,7 +93,11 @@ impl<'db, S: StorageContext<'db>> Restorer { .get(&chunk_id) .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; - let chunk_tree = Self::verify_chunk(chunk, expected_root_hash)?; + let mut parent_key_value_hash: Option = None; + if (chunk_id.len() == 0) { + parent_key_value_hash = self.parent_key_value_hash.clone(); + } + let chunk_tree = Self::verify_chunk(chunk, expected_root_hash, &parent_key_value_hash)?; let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; @@ -144,7 +152,7 @@ impl<'db, S: StorageContext<'db>> Restorer { /// Verifies the structure of a chunk and ensures the chunk matches the /// expected root hash - fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash) -> Result { + fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash, parent_key_value_hash_opt: &Option) -> Result { let chunk_len = chunk.len(); let mut kv_count = 0; let mut hash_count = 0; @@ -171,11 +179,23 @@ impl<'db, S: StorageContext<'db>> Restorer { debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); // chunk structure verified, next verify root hash - if &tree.hash().unwrap() != expected_root_hash { - return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( - "chunk doesn't match expected root hash", - ))); - } + let parent_key_value_hash = match parent_key_value_hash_opt { + Some(val_hash) => { + let combined_hash = combine_hash(&val_hash, &tree.hash().unwrap()).unwrap(); + if &combined_hash != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", + ))); + } + }, + None => { + if &tree.hash().unwrap() != expected_root_hash { + return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( + "chunk doesn't match expected root hash", + ))); + } + } + }; Ok(tree) } @@ -361,7 +381,7 @@ impl<'db, S: StorageContext<'db>> Restorer { /// Rebuild restoration state from partial storage state fn attempt_state_recovery(&mut self) -> Result<(), Error> { // TODO: think about the return type some more - let (bad_link_map, parent_keys) = self.merk.verify(); + let (bad_link_map, parent_keys) = self.merk.verify(false); if !bad_link_map.is_empty() { self.chunk_id_to_root_hash = bad_link_map; self.parent_keys = parent_keys; @@ -393,7 +413,7 @@ impl<'db, S: StorageContext<'db>> Restorer { .load_base_root(None::<&fn(&[u8]) -> Option>); } - if !self.merk.verify().0.is_empty() { + if !self.merk.verify(self.merk.is_sum_tree).0.is_empty() { return Err(Error::ChunkRestoringError(ChunkError::InternalError( "restored tree invalid", ))); @@ -498,6 +518,7 @@ mod tests { Error::ChunkRestoringError, Merk, PanicSource, }; + use crate::test_utils::{make_batch_seq_with_same_value, make_batch_seq_with_value}; #[test] fn test_chunk_verification_non_avl_tree() { diff --git a/tutorials/Cargo.toml b/tutorials/Cargo.toml index ec220b44..409a1c64 100644 --- a/tutorials/Cargo.toml +++ b/tutorials/Cargo.toml @@ -7,9 +7,14 @@ default-run = "tutorials" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -grovedb = { git = "https://github.com/dashpay/grovedb.git" } -path = { path = "../path" } +#grovedb = { git = "https://github.com/dashpay/grovedb.git" } +grovedb = { path = "../grovedb" } +grovedb-merk = { path = "../merk" } +grovedb-storage = { path = "../storage" } +grovedb-visualize = { path = "../visualize" } +grovedb-path = { path = "../path" } rand = "0.8.5" +hex = "0.4" [workspace] diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs new file mode 100644 index 00000000..cfe417d7 --- /dev/null +++ b/tutorials/src/bin/replication.rs @@ -0,0 +1,221 @@ +use std::collections::VecDeque; +use std::ops::Range; +use std::path::Path; +use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction}; +use grovedb::reference_path::ReferencePathType; +use rand::{distributions::Alphanumeric, Rng, thread_rng}; +use rand::prelude::SliceRandom; +use grovedb::element::SumValue; +use grovedb::query_result_type::QueryResultType; +use grovedb_merk::{BatchEntry, ChunkProducer, CryptoHash, Error, Op}; +use grovedb_merk::Error::{EdError, StorageError}; +use grovedb_merk::proofs::chunk::error::ChunkError; +use grovedb_merk::Restorer; +use grovedb_merk::tree::kv::ValueDefinedCostType; +use grovedb_merk::tree::{RefWalker, TreeNode}; +use grovedb_merk::TreeFeatureType::BasicMerkNode; +use grovedb_path::{SubtreePath, SubtreePathBuilder}; +use grovedb_storage::{StorageBatch, StorageContext}; +use grovedb_storage::rocksdb_storage::PrefixedRocksDbStorageContext; +use grovedb_visualize::Visualize; + +const MAIN_ΚΕΥ: &[u8] = b"key_main"; +const MAIN_ΚΕΥ_EMPTY: &[u8] = b"key_main_empty"; + +const KEY_INT_0: &[u8] = b"key_int_0"; +const KEY_INT_REF_0: &[u8] = b"key_int_ref_0"; +const KEY_INT_A: &[u8] = b"key_sum_0"; +const ROOT_PATH: &[&[u8]] = &[]; + +// Allow insertions to overwrite trees +// This is necessary so the tutorial can be rerun easily +const INSERT_OPTIONS: Option = Some(InsertOptions { + validate_insertion_does_not_override: false, + validate_insertion_does_not_override_tree: false, + base_root_storage_is_free: true, +}); + +fn populate_db(grovedb_path: String) -> GroveDb { + let db = GroveDb::open(grovedb_path).unwrap(); + + insert_empty_tree_db(&db, ROOT_PATH, MAIN_ΚΕΥ); + insert_empty_tree_db(&db, ROOT_PATH, MAIN_ΚΕΥ_EMPTY); + insert_empty_tree_db(&db, &[MAIN_ΚΕΥ], KEY_INT_0); + + let tx = db.start_transaction(); + let batch_size = 100; + for i in 0..=10 { + insert_range_values_db(&db, &[MAIN_ΚΕΥ, KEY_INT_0], i * batch_size, i * batch_size + batch_size - 1, &tx); + } + let _ = db.commit_transaction(tx); + + insert_empty_tree_db(&db, &[MAIN_ΚΕΥ], KEY_INT_REF_0); + + let tx_2 = db.start_transaction(); + insert_range_ref_double_values_db(&db, &[MAIN_ΚΕΥ, KEY_INT_REF_0], KEY_INT_0, 1, 50, &tx_2); + let _ = db.commit_transaction(tx_2); + + insert_empty_sum_tree_db(&db, &[MAIN_ΚΕΥ], KEY_INT_A); + + let tx_3 = db.start_transaction(); + insert_range_values_db(&db, &[MAIN_ΚΕΥ, KEY_INT_A], 1, 100, &tx_3); + insert_sum_element_db(&db, &[MAIN_ΚΕΥ, KEY_INT_A], 101, 150, &tx_3); + let _ = db.commit_transaction(tx_3); + db +} + +fn create_empty_db(grovedb_path: String) -> GroveDb { + let db = GroveDb::open(grovedb_path).unwrap(); + db +} + +fn main() { + let path_0 = generate_random_path("../tutorial-storage/", "/db_0", 24); + let db_0 = populate_db(path_0.clone()); + let checkpoint_dir = path_0 + "/checkpoint"; + let path_checkpoint = Path::new(checkpoint_dir.as_str()); + db_0.create_checkpoint(&path_checkpoint).expect("cannot create checkpoint"); + let db_checkpoint_0 = GroveDb::open(path_checkpoint).expect("cannot open grovedb from checkpoint"); + + let path_copy = generate_random_path("../tutorial-storage/", "/db_copy", 24); + let db_copy = create_empty_db(path_copy.clone()); + + println!("\n######### root_hashes:"); + let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); + println!("root_hash_0: {:?}", hex::encode(root_hash_0)); + let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); + println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); + let root_hash_copy = db_copy.root_hash(None).unwrap().unwrap(); + println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); + + let mut snapshot_checkpoint_0 = db_checkpoint_0.s_create_db_snapshot(true).unwrap(); + let mut rng = thread_rng(); // Create a random number generator + snapshot_checkpoint_0.data.shuffle(&mut rng); // Shuffle the vector in place + + println!("\n######## list of available chunks_ids"); + println!("{:?}", snapshot_checkpoint_0); + + println!("\n######## fetching chunks..."); + for (global_chunk_id, chunk_data) in snapshot_checkpoint_0.data.iter_mut() { + *chunk_data = db_checkpoint_0.s_fetch_chunk(global_chunk_id.to_string()).unwrap(); + } + + println!("\n######### db_checkpoint_0 -> db_copy state sync"); + db_copy.s_reconstruct_db(snapshot_checkpoint_0).expect("should be able to reconstruct db"); + + let query_path = &[MAIN_ΚΕΥ, KEY_INT_0]; + let query_key = (20487u32).to_be_bytes().to_vec(); + println!("\n######## Query on db_checkpoint_0:"); + query_db(&db_checkpoint_0, query_path, query_key.clone()); + println!("\n######## Query on db_copy:"); + query_db(&db_copy, query_path, query_key.clone()); + + return; + +} + +fn insert_empty_tree_db(db: &GroveDb, path: &[&[u8]], key: &[u8]) +{ + db.insert(path, key, Element::empty_tree(), INSERT_OPTIONS, None) + .unwrap() + .expect("successfully inserted tree"); +} +fn insert_range_values_db(db: &GroveDb, path: &[&[u8]], min_i: u32, max_i: u32, transaction: &Transaction) +{ + for i in min_i..=max_i { + let i_vec = i.to_be_bytes().to_vec(); + db.insert( + path, + &i_vec, + Element::new_item(i_vec.to_vec()), + INSERT_OPTIONS, + Some(&transaction), + ) + .unwrap() + .expect("successfully inserted values"); + } +} + +fn insert_range_ref_double_values_db(db: &GroveDb, path: &[&[u8]], ref_key: &[u8], min_i: u32, max_i: u32, transaction: &Transaction) +{ + for i in min_i..=max_i { + let i_vec = i.to_be_bytes().to_vec(); + let value = i * 2; + let value_vec = value.to_be_bytes().to_vec(); + db.insert( + path, + &i_vec, + Element::new_reference(ReferencePathType::AbsolutePathReference(vec![ + MAIN_ΚΕΥ.to_vec(), + ref_key.to_vec(), + value_vec.to_vec() + ])), + INSERT_OPTIONS, + Some(&transaction), + ) + .unwrap() + .expect("successfully inserted values"); + } +} + +fn insert_empty_sum_tree_db(db: &GroveDb, path: &[&[u8]], key: &[u8]) +{ + db.insert(path, key, Element::empty_sum_tree(), INSERT_OPTIONS, None) + .unwrap() + .expect("successfully inserted tree"); +} +fn insert_sum_element_db(db: &GroveDb, path: &[&[u8]], min_i: u32, max_i: u32, transaction: &Transaction) +{ + for i in min_i..=max_i { + //let value : u32 = i; + let value = i as u64; + //let value: u64 = 1; + let i_vec = i.to_be_bytes().to_vec(); + db.insert( + path, + &i_vec, + Element::new_sum_item(value as SumValue), + INSERT_OPTIONS, + Some(&transaction), + ) + .unwrap() + .expect("successfully inserted values"); + } +} +fn generate_random_path(prefix: &str, suffix: &str, len: usize) -> String { + let random_string: String = rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect(); + format!("{}{}{}", prefix, random_string, suffix) +} + +fn query_db(db: &GroveDb, path: &[&[u8]], key: Vec) { + let path_vec: Vec> = path.iter() + .map(|&slice| slice.to_vec()) + .collect(); + + let mut query = Query::new(); + query.insert_key(key); + + let path_query = PathQuery::new_unsized(path_vec, query.clone()); + + let (elements, _) = db + .query_item_value(&path_query, true, None) + .unwrap() + .expect("expected successful get_path_query"); + for e in elements.into_iter() { + //let be_num = u32::from_be_bytes(e.try_into().expect("Slice with incorrect length")); + println!(">> {:?}", e); + } + + let proof = db.prove_query(&path_query).unwrap().unwrap(); + // Get hash from query proof and print to terminal along with GroveDB root hash. + let (verify_hash, result_set) = GroveDb::verify_query(&proof, &path_query).unwrap(); + println!("verify_hash: {:?}", hex::encode(verify_hash)); + if verify_hash == db.root_hash(None).unwrap().unwrap() { + println!("Query verified"); + } else { println!("Verification FAILED"); }; +} + From 62db2461ae1d3a39435289c0c360ae8b6a8baef6 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Mon, 22 Apr 2024 17:13:51 +0300 Subject: [PATCH 09/30] dynamic chunk id calculation --- grovedb/src/lib.rs | 318 ++++++++++++++----------------- tutorials/src/bin/replication.rs | 26 ++- 2 files changed, 153 insertions(+), 191 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 177706e2..c5091353 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -234,6 +234,9 @@ use crate::helpers::raw_decode; use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; use crate::Error::MerkError; +use std::rc::Rc; +use std::cell::RefCell; + #[cfg(feature = "full")] type Hash = [u8; 32]; @@ -241,57 +244,11 @@ type Hash = [u8; 32]; pub struct GroveDb { #[cfg(feature = "full")] db: RocksDbStorage, -} - -pub struct s_db_snapshot { - pub root_hash: CryptoHash, - pub data: Vec<(String, Vec)> -} - -impl s_db_snapshot { - pub fn new() -> s_db_snapshot { - s_db_snapshot { - root_hash: CryptoHash::default(), - data: Vec::new(), - } - } -} - -impl fmt::Debug for s_db_snapshot { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "root_hash:{:?}\n", hex::encode(self.root_hash)); - for (global_chunk_id, _) in self.data.iter() { - write!(f, " global_chunk_id:{:?}\n", global_chunk_id); - } - Ok(()) - } -} -pub struct s_db_snapshot_sorted { - pub root_hash: CryptoHash, - pub data: BTreeMap)>> -} - -impl s_db_snapshot_sorted { - pub fn new() -> s_db_snapshot_sorted { - s_db_snapshot_sorted { - root_hash: CryptoHash::default(), - data: BTreeMap::new(), - } - } -} - -impl fmt::Debug for s_db_snapshot_sorted { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "root_hash:{:?}\n", hex::encode(self.root_hash)); - for (prefix, chunk_vec) in self.data.iter() { - write!(f, " prefix:{:?}\n", prefix); - for (chunk_id, _) in chunk_vec.iter() { - write!(f, " chunk_id:{:?}\n", chunk_id); - } - } - Ok(()) - } + version: i32, + pending_chunks: BTreeMap>, + //current_tx: Option>, + //restorer: Restorer, } pub struct s_subtrees_metadata { @@ -330,8 +287,10 @@ pub type TransactionArg<'db, 'a> = Option<&'a Transaction<'db>>; impl GroveDb { /// Opens a given path pub fn open>(path: P) -> Result { + let db = RocksDbStorage::default_rocksdb_with_path(path)?; - Ok(GroveDb { db }) + let pending_chunks = BTreeMap::new(); + Ok(GroveDb { db, version: 1, pending_chunks }) } /// Uses raw iter to delete GroveDB key values pairs from rocksdb @@ -1091,81 +1050,6 @@ impl GroveDb { Ok(issues) } - pub fn s_create_db_snapshot( - &self, - list_only_chunk_ids: bool, - ) -> Result { - let mut db_snapsot = s_db_snapshot::new(); - - db_snapsot.root_hash = self.root_hash(None).unwrap().unwrap(); - - let subtrees_root = self.find_subtrees(&SubtreePath::empty(), None).unwrap()?; - for subtree in subtrees_root.into_iter() { - let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); - let path: &[&[u8]] = &subtree_path; - - let continue_storage_batch = StorageBatch::new(); - - let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); - let merk = self.open_batch_merk_at_path(&continue_storage_batch, path.into(), false).value.unwrap(); - - if (merk.is_empty_tree().unwrap()) { - continue; - } - - let mut chunk_producer = ChunkProducer::new(&merk).unwrap(); - - let mut chunk_id_opt = Some("".to_string()); - while let Some(chunk_id) = chunk_id_opt { - let (chunk, next_chunk_id) = chunk_producer.chunk(chunk_id.as_str()).unwrap(); - - let global_chunk_id = hex::encode(prefix) + &chunk_id; - if (list_only_chunk_ids) { - db_snapsot.data.push((global_chunk_id, vec![])); - } - else { - db_snapsot.data.push((global_chunk_id, chunk)); - } - - chunk_id_opt = next_chunk_id; - } - } - - Ok(db_snapsot) - } - - fn s_sort_db_snapshot( - &self, - snapshot: s_db_snapshot, - ) -> Result { - let mut db_snapsot_sorted = s_db_snapshot_sorted::new(); - db_snapsot_sorted.root_hash = snapshot.root_hash; - - let CHUNK_PREFIX_LENGTH: usize = 64; - - for chunk_entry in snapshot.data { - let global_chunk_id = chunk_entry.0; - let chunk_data = chunk_entry.1; - - if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { - return Err(Error::CorruptedData( - "expected global chunk id of at least 64 length".to_string(), - )); - } - - let chunk_prefix = global_chunk_id.chars().take(CHUNK_PREFIX_LENGTH).collect::(); - let chunk_id = global_chunk_id.chars().skip(CHUNK_PREFIX_LENGTH).collect::(); - - db_snapsot_sorted.data.entry(chunk_prefix).or_insert(Vec::new()).push((chunk_id, chunk_data.to_vec())); - } - - for (_key, vec) in db_snapsot_sorted.data.iter_mut() { - vec.sort_by(|a, b| a.0.len().cmp(&b.0.len())); - } - - Ok(db_snapsot_sorted) - } - fn s_get_subtrees_metadata>( &self, path: &SubtreePath, @@ -1232,9 +1116,7 @@ impl GroveDb { let merk = self.open_batch_merk_at_path(&continue_storage_batch, path.into(), false).value?; if (merk.is_empty_tree().unwrap()) { - return Err(Error::CorruptedData( - "Empty merk".to_string(), - )); + return Ok(vec![]); } let mut chunk_producer = ChunkProducer::new(&merk).unwrap(); @@ -1249,82 +1131,149 @@ impl GroveDb { } } - pub fn s_reconstruct_db( - &self, - snapshot: s_db_snapshot + pub fn s_sync_db_demo( + &mut self, + source_db: &GroveDb, ) -> Result<(), Error> { - let mut sorted_snapshot = self.s_sort_db_snapshot(snapshot)?; - //Always start by empty prefix = root - if let Some(chunk_vec) = sorted_snapshot.data.remove(&hex::encode(CryptoHash::default())) { + // Start always by root + let app_hash = source_db.root_hash(None).value.unwrap(); + let root_global_chunk_id = hex::encode(vec![0u8; 32]); + + let root_chunk = source_db.s_fetch_chunk(root_global_chunk_id.to_string())?; + let (root_chunk_prefix, _) = s_util_split_global_chunk_id(&root_global_chunk_id)?; + + let mut pending_chunks :BTreeMap> = BTreeMap::new(); + let mut processed_prefixes :BTreeSet = BTreeSet::new(); + { let tx = self.start_transaction(); let merk = self.open_merk_for_replication(SubtreePath::empty(), &tx).unwrap(); - let mut restorer = Restorer::new(merk, sorted_snapshot.root_hash, None); - for (chunk_id, chunk) in chunk_vec { - restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); + let mut restorer = Restorer::new(merk, app_hash, None); + let next_chunk_ids = restorer.process_chunk("".to_string(), root_chunk).expect("should process chunk successfully"); + for next_chunk_id in next_chunk_ids { + let next_global_chunk_id = hex::encode(root_chunk_prefix.to_string()) + &next_chunk_id; + pending_chunks.insert(next_global_chunk_id, vec![]); } + + while (!pending_chunks.is_empty()) { + for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { + match source_db.s_fetch_chunk(global_chunk_id.to_string()) { + Ok(chunk) => { + *chunk_data = chunk; + } + Err(e) => { + println!("Error while updating {}", e); + } + } + } + + // Collect the keys to avoid borrowing issues during removal + let keys: Vec = pending_chunks.keys().cloned().collect(); + + // Iterate over the collected keys and remove each entry from the map + for key in keys { + if let Some(chunk) = pending_chunks.remove(&key) { + let (_, chunk_id) = s_util_split_global_chunk_id(&key)?; + let next_chunk_ids = restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); + for next_chunk_id in next_chunk_ids { + let next_global_chunk_id = hex::encode(root_chunk_prefix.to_string()) + &next_chunk_id; + pending_chunks.insert(next_global_chunk_id, vec![]); + } + } + } + } + restorer.finalize().expect("should finalize"); self.commit_transaction(tx); - } else { - return Err(Error::CorruptedData( - "No root prefix chunks found".to_string(), - )); } - - let mut processed_prefixes :BTreeSet = BTreeSet::new(); - processed_prefixes.insert(hex::encode(CryptoHash::default())); - - let mut queue_prefixes_to_be_processed : VecDeque = VecDeque::new(); + processed_prefixes.insert(root_chunk_prefix.to_string()); let mut subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); - for prefix in subtrees_metadata.data.keys() { + + let mut current_subtree_opt :Option<(String, Vec>, CryptoHash, CryptoHash)> = None; + for (prefix, prefix_metadata) in &subtrees_metadata.data { if !processed_prefixes.contains(prefix) { - //println!("prefix:{:?} pending for processing", prefix); - queue_prefixes_to_be_processed.push_back(prefix.to_string()); + current_subtree_opt = Some((prefix.to_string(), prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2)); + break; } } - while (!queue_prefixes_to_be_processed.is_empty()) { - while let Some(current_prefix) = queue_prefixes_to_be_processed.pop_front() { - let prefix_metadata = &subtrees_metadata.data[¤t_prefix]; - let s_path = &prefix_metadata.0; - let s_actual_value_hash = &prefix_metadata.1; - let s_elem_value_hash = &prefix_metadata.2; - println!(" about to process{:?} with ({:?}:{:?})", s_util_path_to_string(&s_path), hex::encode(s_actual_value_hash), hex::encode(s_elem_value_hash)); + while current_subtree_opt.is_some() { + if let Some(current_subtree) = current_subtree_opt { + let current_prefix = ¤t_subtree.0; + let current_path = ¤t_subtree.1; + let s_actual_value_hash = ¤t_subtree.2; + let s_elem_value_hash = ¤t_subtree.3; - let subtree_path: Vec<&[u8]> = s_path.iter().map(|vec| vec.as_slice()).collect(); + println!(" about to process prefix:{:?} {:?})", current_prefix, s_util_path_to_string(¤t_path)); + + let subtree_path: Vec<&[u8]> = current_path.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; - if let Some(chunk_vec) = sorted_snapshot.data.remove(¤t_prefix) { - let tx = self.start_transaction(); - if (chunk_vec.is_empty()) {println!("empty"); } - let merk = self.open_merk_for_replication(path.into(), &tx).unwrap(); - let mut restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); - for (chunk_id, chunk) in chunk_vec { - restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); + let tx = self.start_transaction(); + let merk = self.open_merk_for_replication(path.into(), &tx).unwrap(); + let mut restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + + let subtree_root_chunk = source_db.s_fetch_chunk(current_prefix.to_string())?; + if (!subtree_root_chunk.is_empty()) { + let next_chunk_ids = restorer.process_chunk("".to_string(), subtree_root_chunk).expect("should process chunk successfully"); + for next_chunk_id in next_chunk_ids { + let next_global_chunk_id = current_prefix.to_string() + &next_chunk_id; + pending_chunks.insert(next_global_chunk_id, vec![]); } + while (!pending_chunks.is_empty()) { + for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { + match source_db.s_fetch_chunk(global_chunk_id.to_string()) { + Ok(chunk) => { + *chunk_data = chunk; + } + Err(e) => { + println!("Error while updating {}", e); + } + } + } + + // Collect the keys to avoid borrowing issues during removal + let keys: Vec = pending_chunks.keys().cloned().collect(); + + // Iterate over the collected keys and remove each entry from the map + for key in keys { + if let Some(chunk) = pending_chunks.remove(&key) { + let (_, chunk_id) = s_util_split_global_chunk_id(&key)?; + let next_chunk_ids = restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); + for next_chunk_id in next_chunk_ids { + let next_global_chunk_id = current_prefix.to_string() + &next_chunk_id; + pending_chunks.insert(next_global_chunk_id, vec![]); + } + } + } + } + restorer.finalize().expect("should finalize"); self.commit_transaction(tx); - } else { - println!(" skipping empty {:?}", s_util_path_to_string(&s_path)); } - - processed_prefixes.insert(current_prefix); + else { + self.rollback_transaction(&tx); + println!(" subtree{:?} is empty", s_util_path_to_string(¤t_path)); + } + processed_prefixes.insert(current_prefix.to_string()); + println!(" prefix:{:?} done", current_prefix); } + current_subtree_opt = None; subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); - for prefix in subtrees_metadata.data.keys() { + + for (prefix, prefix_metadata) in &subtrees_metadata.data { if !processed_prefixes.contains(prefix) { - queue_prefixes_to_be_processed.push_back(prefix.to_string()); + current_subtree_opt = Some((prefix.to_string(), prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2)); + break; } } } - if (sorted_snapshot.data.len() > 0) { - return Err(Error::CorruptedData( - "Remaining chunks not processed".to_string(), - )); - } + subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + println!("now containing:{:?}", subtrees_metadata); + println!("processed prefixes:{:?}", processed_prefixes); let incorrect_hashes = self.verify_grovedb(None)?; if (incorrect_hashes.len() > 0) { @@ -1347,3 +1296,20 @@ pub fn s_util_path_to_string( } subtree_path_str } + +pub fn s_util_split_global_chunk_id( + global_chunk_id: &String, +) -> Result<(String, String), Error> { + let CHUNK_PREFIX_LENGTH: usize = 64; + + if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { + return Err(Error::CorruptedData( + "expected global chunk id of at least 64 length".to_string(), + )); + } + + let chunk_prefix = global_chunk_id.chars().take(CHUNK_PREFIX_LENGTH).collect::(); + let chunk_id = global_chunk_id.chars().skip(CHUNK_PREFIX_LENGTH).collect::(); + + Ok((chunk_prefix, chunk_id)) +} diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index cfe417d7..8519ca60 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -64,7 +64,7 @@ fn populate_db(grovedb_path: String) -> GroveDb { db } -fn create_empty_db(grovedb_path: String) -> GroveDb { +fn create_empty_db(grovedb_path: String) -> GroveDb { let db = GroveDb::open(grovedb_path).unwrap(); db } @@ -78,7 +78,7 @@ fn main() { let db_checkpoint_0 = GroveDb::open(path_checkpoint).expect("cannot open grovedb from checkpoint"); let path_copy = generate_random_path("../tutorial-storage/", "/db_copy", 24); - let db_copy = create_empty_db(path_copy.clone()); + let mut db_copy = create_empty_db(path_copy.clone()); println!("\n######### root_hashes:"); let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); @@ -88,20 +88,16 @@ fn main() { let root_hash_copy = db_copy.root_hash(None).unwrap().unwrap(); println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); - let mut snapshot_checkpoint_0 = db_checkpoint_0.s_create_db_snapshot(true).unwrap(); - let mut rng = thread_rng(); // Create a random number generator - snapshot_checkpoint_0.data.shuffle(&mut rng); // Shuffle the vector in place - - println!("\n######## list of available chunks_ids"); - println!("{:?}", snapshot_checkpoint_0); - - println!("\n######## fetching chunks..."); - for (global_chunk_id, chunk_data) in snapshot_checkpoint_0.data.iter_mut() { - *chunk_data = db_checkpoint_0.s_fetch_chunk(global_chunk_id.to_string()).unwrap(); - } - println!("\n######### db_checkpoint_0 -> db_copy state sync"); - db_copy.s_reconstruct_db(snapshot_checkpoint_0).expect("should be able to reconstruct db"); + db_copy.s_sync_db_demo(&db_checkpoint_0).unwrap(); + + println!("\n######### root_hashes:"); + let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); + println!("root_hash_0: {:?}", hex::encode(root_hash_0)); + let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); + println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); + let root_hash_copy = db_copy.root_hash(None).unwrap().unwrap(); + println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); let query_path = &[MAIN_ΚΕΥ, KEY_INT_0]; let query_key = (20487u32).to_be_bytes().to_vec(); From 40564e21770d1399cacc8c051ff8f9295d283a0e Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Mon, 22 Apr 2024 22:48:36 +0300 Subject: [PATCH 10/30] more work --- grovedb/Cargo.toml | 2 + grovedb/src/lib.rs | 261 ++++++++++++++++++++----------- tutorials/src/bin/replication.rs | 2 +- 3 files changed, 173 insertions(+), 92 deletions(-) diff --git a/grovedb/Cargo.toml b/grovedb/Cargo.toml index a7fb382b..55e15d80 100644 --- a/grovedb/Cargo.toml +++ b/grovedb/Cargo.toml @@ -27,6 +27,8 @@ nohash-hasher = { version = "0.2.0", optional = true } indexmap = { version = "1.9.2", optional = true } intmap = { version = "2.0.0", optional = true } grovedb-path = { version = "1.0.0-rc.2", path = "../path" } +blake3 = "1.4.0" +bitvec = "1" [dev-dependencies] rand = "0.8.5" diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index c5091353..14d3ab9a 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -233,9 +233,8 @@ use crate::helpers::raw_decode; #[cfg(feature = "full")] use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; use crate::Error::MerkError; - -use std::rc::Rc; -use std::cell::RefCell; +use blake3; +use bitvec::prelude::*; #[cfg(feature = "full")] type Hash = [u8; 32]; @@ -252,7 +251,7 @@ pub struct GroveDb { } pub struct s_subtrees_metadata { - pub data: BTreeMap>, CryptoHash, CryptoHash)> + pub data: BTreeMap>, CryptoHash, Option)> } impl s_subtrees_metadata { @@ -264,13 +263,38 @@ impl s_subtrees_metadata { } impl fmt::Debug for s_subtrees_metadata { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (prefix, metadata) in self.data.iter() { + let metadata_path = &metadata.0; + let metadata_path_str = s_util_path_to_string(&metadata_path); + write!(f, " prefix:{:?} -> path:{:?}\n", prefix, metadata_path_str); + } + Ok(()) + } +} + +pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; + +pub struct w_subtree_metadata { + pub data: BTreeMap>, CryptoHash, CryptoHash, bool)> +} + +impl w_subtree_metadata { + pub fn new() -> w_subtree_metadata { + w_subtree_metadata { + data: BTreeMap::new(), + } + } +} + +impl fmt::Debug for w_subtree_metadata { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for (prefix, metadata) in self.data.iter() { let metadata_path = &metadata.0; let metadata_path_str = s_util_path_to_string(&metadata_path); let metadata_hash_0 = &metadata.1; let metadata_hash_1 = &metadata.2; - write!(f, " prefix:{:?} -> path:{:?} ({:?}:{:?})\n", prefix, metadata_path_str, hex::encode(metadata_hash_0), hex::encode(metadata_hash_1)); + write!(f, " prefix:{:?} -> path:{:?} ({:?}:{:?})\n", hex::encode(prefix), metadata_path_str, hex::encode(metadata_hash_0), hex::encode(metadata_hash_1)); } Ok(()) } @@ -1050,11 +1074,55 @@ impl GroveDb { Ok(issues) } - fn s_get_subtrees_metadata>( + pub fn w_fetch_chunk( + &self, + global_chunk_id: Vec + ) -> Result, Error> { + let CHUNK_PREFIX_LENGTH: usize = 32; + if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { + return Err(Error::CorruptedData( + "expected global chunk id of at least 32 length".to_string(), + )); + } + + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(32); + + let mut array = [0u8; 32]; // Initialize an array of the correct size with default values + array.copy_from_slice(chunk_prefix); // Copy data from the slice into the array + let chunk_prefix_key: SubtreePrefix = array; + + let subtrees_metadata = self.w_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + + match subtrees_metadata.data.get(&chunk_prefix_key) { + Some(path_data) => { + let subtree = &path_data.0; + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + let continue_storage_batch = StorageBatch::new(); + let merk = self.open_batch_merk_at_path(&continue_storage_batch, path.into(), false).value?; + + if (merk.is_empty_tree().unwrap()) { + return Ok(vec![]); + } + + let mut chunk_producer = ChunkProducer::new(&merk).unwrap(); + let (chunk, _) = chunk_producer.chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()).unwrap(); + Ok(chunk) + }, + None => { + return Err(Error::CorruptedData( + "Prefix not found".to_string(), + )); + } + } + } + + fn w_get_subtrees_metadata>( &self, path: &SubtreePath, - ) -> Result { - let mut subtrees_metadata = crate::s_subtrees_metadata::new(); + ) -> Result { + let mut subtrees_metadata = crate::w_subtree_metadata::new(); let subtrees_root = self.find_subtrees(&SubtreePath::empty(), None).unwrap().unwrap(); for subtree in subtrees_root.into_iter() { @@ -1063,6 +1131,7 @@ impl GroveDb { let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); let current_path = SubtreePath::from(path); + let is_root_prefix = current_path.is_root(); let parent_path_opt = current_path.derive_parent(); if (parent_path_opt.is_some()) { @@ -1081,83 +1150,48 @@ impl GroveDb { .expect("value hash should be some"); let actual_value_hash = value_hash(&elem_value).unwrap(); - subtrees_metadata.data.insert(hex::encode(prefix), (current_path.to_vec(), actual_value_hash, elem_value_hash)); + subtrees_metadata.data.insert(prefix, (current_path.to_vec(), actual_value_hash, elem_value_hash, false)); } else { - subtrees_metadata.data.insert(hex::encode(prefix), (current_path.to_vec(), CryptoHash::default(), CryptoHash::default())); + subtrees_metadata.data.insert(prefix, (current_path.to_vec(), CryptoHash::default(), CryptoHash::default(), is_root_prefix)); } } Ok(subtrees_metadata) } - pub fn s_fetch_chunk( - &self, - global_chunk_id: String - ) -> Result, Error> { - let CHUNK_PREFIX_LENGTH: usize = 64; - if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { - return Err(Error::CorruptedData( - "expected global chunk id of at least 64 length".to_string(), - )); - } - - let chunk_prefix = global_chunk_id.chars().take(CHUNK_PREFIX_LENGTH).collect::(); - let chunk_id = global_chunk_id.chars().skip(CHUNK_PREFIX_LENGTH).collect::(); - - let subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); - - match subtrees_metadata.data.get(&chunk_prefix) { - Some(path_data) => { - let subtree = &path_data.0; - let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); - let path: &[&[u8]] = &subtree_path; - - let continue_storage_batch = StorageBatch::new(); - let merk = self.open_batch_merk_at_path(&continue_storage_batch, path.into(), false).value?; - - if (merk.is_empty_tree().unwrap()) { - return Ok(vec![]); - } - - let mut chunk_producer = ChunkProducer::new(&merk).unwrap(); - let (chunk, _) = chunk_producer.chunk(chunk_id.as_str()).unwrap(); - Ok(chunk) - }, - None => { - return Err(Error::CorruptedData( - "Prefix not found".to_string(), - )); - } - } - } - - pub fn s_sync_db_demo( + pub fn w_sync_db_demo( &mut self, source_db: &GroveDb, ) -> Result<(), Error> { // Start always by root let app_hash = source_db.root_hash(None).value.unwrap(); - let root_global_chunk_id = hex::encode(vec![0u8; 32]); + let root_global_chunk_id = vec![0u8; 32]; + let (root_prefix, _) = w_util_split_global_chunk_id(&root_global_chunk_id)?; + + let root_chunk = source_db.w_fetch_chunk(root_global_chunk_id.clone())?; - let root_chunk = source_db.s_fetch_chunk(root_global_chunk_id.to_string())?; - let (root_chunk_prefix, _) = s_util_split_global_chunk_id(&root_global_chunk_id)?; + let mut pending_chunks :BTreeMap, Vec> = BTreeMap::new(); + let mut processed_prefixes :BTreeSet = BTreeSet::new(); + let mut subtrees_metadata = crate::w_subtree_metadata::new(); + let mut current_subtree_opt :Option<(SubtreePrefix, Vec>, CryptoHash, CryptoHash, bool)> = None; - let mut pending_chunks :BTreeMap> = BTreeMap::new(); - let mut processed_prefixes :BTreeSet = BTreeSet::new(); + + //current_subtree_opt = (SubtreePrefix::default()) { let tx = self.start_transaction(); let merk = self.open_merk_for_replication(SubtreePath::empty(), &tx).unwrap(); let mut restorer = Restorer::new(merk, app_hash, None); let next_chunk_ids = restorer.process_chunk("".to_string(), root_chunk).expect("should process chunk successfully"); for next_chunk_id in next_chunk_ids { - let next_global_chunk_id = hex::encode(root_chunk_prefix.to_string()) + &next_chunk_id; + let mut next_global_chunk_id = root_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); pending_chunks.insert(next_global_chunk_id, vec![]); } while (!pending_chunks.is_empty()) { for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { - match source_db.s_fetch_chunk(global_chunk_id.to_string()) { + match source_db.w_fetch_chunk(global_chunk_id.to_vec()) { Ok(chunk) => { *chunk_data = chunk; } @@ -1168,15 +1202,16 @@ impl GroveDb { } // Collect the keys to avoid borrowing issues during removal - let keys: Vec = pending_chunks.keys().cloned().collect(); + let keys: Vec> = pending_chunks.keys().cloned().collect(); // Iterate over the collected keys and remove each entry from the map for key in keys { if let Some(chunk) = pending_chunks.remove(&key) { - let (_, chunk_id) = s_util_split_global_chunk_id(&key)?; + let (_, chunk_id) = w_util_split_global_chunk_id(&key)?; let next_chunk_ids = restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); for next_chunk_id in next_chunk_ids { - let next_global_chunk_id = hex::encode(root_chunk_prefix.to_string()) + &next_chunk_id; + let mut next_global_chunk_id = root_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); pending_chunks.insert(next_global_chunk_id, vec![]); } } @@ -1186,26 +1221,25 @@ impl GroveDb { restorer.finalize().expect("should finalize"); self.commit_transaction(tx); } - processed_prefixes.insert(root_chunk_prefix.to_string()); - let mut subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + processed_prefixes.insert(root_prefix); + subtrees_metadata = self.w_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); - let mut current_subtree_opt :Option<(String, Vec>, CryptoHash, CryptoHash)> = None; for (prefix, prefix_metadata) in &subtrees_metadata.data { if !processed_prefixes.contains(prefix) { - current_subtree_opt = Some((prefix.to_string(), prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2)); + current_subtree_opt = Some((*prefix, prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2, prefix_metadata.3)); break; } } while current_subtree_opt.is_some() { - if let Some(current_subtree) = current_subtree_opt { + if let Some(ref current_subtree) = current_subtree_opt { let current_prefix = ¤t_subtree.0; let current_path = ¤t_subtree.1; let s_actual_value_hash = ¤t_subtree.2; let s_elem_value_hash = ¤t_subtree.3; - println!(" about to process prefix:{:?} {:?})", current_prefix, s_util_path_to_string(¤t_path)); + println!(" about to process prefix:{:?} {:?})", hex::encode(current_prefix), s_util_path_to_string(¤t_path)); let subtree_path: Vec<&[u8]> = current_path.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; @@ -1214,16 +1248,17 @@ impl GroveDb { let merk = self.open_merk_for_replication(path.into(), &tx).unwrap(); let mut restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); - let subtree_root_chunk = source_db.s_fetch_chunk(current_prefix.to_string())?; + let subtree_root_chunk = source_db.w_fetch_chunk(current_prefix.to_vec())?; if (!subtree_root_chunk.is_empty()) { let next_chunk_ids = restorer.process_chunk("".to_string(), subtree_root_chunk).expect("should process chunk successfully"); for next_chunk_id in next_chunk_ids { - let next_global_chunk_id = current_prefix.to_string() + &next_chunk_id; + let mut next_global_chunk_id = current_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); pending_chunks.insert(next_global_chunk_id, vec![]); } while (!pending_chunks.is_empty()) { for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { - match source_db.s_fetch_chunk(global_chunk_id.to_string()) { + match source_db.w_fetch_chunk(global_chunk_id.to_vec()) { Ok(chunk) => { *chunk_data = chunk; } @@ -1234,15 +1269,16 @@ impl GroveDb { } // Collect the keys to avoid borrowing issues during removal - let keys: Vec = pending_chunks.keys().cloned().collect(); + let keys: Vec> = pending_chunks.keys().cloned().collect(); // Iterate over the collected keys and remove each entry from the map for key in keys { if let Some(chunk) = pending_chunks.remove(&key) { - let (_, chunk_id) = s_util_split_global_chunk_id(&key)?; + let (_, chunk_id) = w_util_split_global_chunk_id(&key)?; let next_chunk_ids = restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); for next_chunk_id in next_chunk_ids { - let next_global_chunk_id = current_prefix.to_string() + &next_chunk_id; + let mut next_global_chunk_id = current_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); pending_chunks.insert(next_global_chunk_id, vec![]); } } @@ -1256,25 +1292,21 @@ impl GroveDb { self.rollback_transaction(&tx); println!(" subtree{:?} is empty", s_util_path_to_string(¤t_path)); } - processed_prefixes.insert(current_prefix.to_string()); - println!(" prefix:{:?} done", current_prefix); + processed_prefixes.insert(*current_prefix); + println!(" prefix:{:?} done", hex::encode(current_prefix)); } current_subtree_opt = None; - subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + subtrees_metadata = self.w_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); for (prefix, prefix_metadata) in &subtrees_metadata.data { if !processed_prefixes.contains(prefix) { - current_subtree_opt = Some((prefix.to_string(), prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2)); + current_subtree_opt = Some((*prefix, prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2, prefix_metadata.3)); break; } } } - subtrees_metadata = self.s_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); - println!("now containing:{:?}", subtrees_metadata); - println!("processed prefixes:{:?}", processed_prefixes); - let incorrect_hashes = self.verify_grovedb(None)?; if (incorrect_hashes.len() > 0) { return Err(Error::CorruptedData( @@ -1284,6 +1316,7 @@ impl GroveDb { Ok(()) } + } pub fn s_util_path_to_string( @@ -1297,19 +1330,65 @@ pub fn s_util_path_to_string( subtree_path_str } -pub fn s_util_split_global_chunk_id( - global_chunk_id: &String, -) -> Result<(String, String), Error> { - let CHUNK_PREFIX_LENGTH: usize = 64; - +pub fn w_util_split_global_chunk_id( + global_chunk_id: &Vec, +) -> Result<(SubtreePrefix, String), Error> { + let CHUNK_PREFIX_LENGTH: usize = 32; if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { return Err(Error::CorruptedData( - "expected global chunk id of at least 64 length".to_string(), + "expected global chunk id of at least 32 length".to_string(), )); } - let chunk_prefix = global_chunk_id.chars().take(CHUNK_PREFIX_LENGTH).collect::(); - let chunk_id = global_chunk_id.chars().skip(CHUNK_PREFIX_LENGTH).collect::(); + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(32); + let mut array = [0u8; 32]; // Initialize an array of the correct size with default values + array.copy_from_slice(chunk_prefix); // Copy data from the slice into the array + let chunk_prefix_key: SubtreePrefix = array; + let str_chunk_id = String::from_utf8(chunk_id.to_vec()); + match str_chunk_id { + Ok(s) => Ok((chunk_prefix_key, s)), + Err(e) => return Err(Error::CorruptedData( + "unable to convert to string".to_string(), + )), + } +} - Ok((chunk_prefix, chunk_id)) +/* +pub fn w_util_string_to_compacted_vec_u8( + string: &String, +) -> Result, Error> { + let mut bb = BitVec::new(); + let bits = w_util_string_chunk_id_to_bitset(string)?; + + Ok(bits.as_raw_slice().to_vec()) +} + +pub fn w_util_string_chunk_id_to_bitset( + chunk_id: &String, +) -> Result { + let mut bits = BitVec::new(); + for ch in chunk_id.chars() { + match ch { + '1' => bits.push(true), + '0' => bits.push(false), + _ => return Err(Error::CorruptedData("Invalid character in input string".to_string())), // Return an error instead of panicking + } + } + Ok(bits) +} + +pub fn w_util_bitset_to_chunk_id(bits: &BitVec) -> String { + bits.iter().map(|bit| if *bit { '1' } else { '0' }).collect() +} + +/// Converts a BitVec to Vec +fn bitvec_to_vec_u8(bitvec: &BitVec) -> Vec { + bitvec.as_raw_slice().to_vec() } + +/// Converts Vec to BitVec +fn vec_u8_to_bitvec(vec: &Vec) -> BitVec { + BitVec::from_vec(vec.clone()) +} + + */ \ No newline at end of file diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index 8519ca60..0ca0c576 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -89,7 +89,7 @@ fn main() { println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); println!("\n######### db_checkpoint_0 -> db_copy state sync"); - db_copy.s_sync_db_demo(&db_checkpoint_0).unwrap(); + db_copy.w_sync_db_demo(&db_checkpoint_0).unwrap(); println!("\n######### root_hashes:"); let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); From 86c72047baf5034c0beed783e1aa6d3ce95025d1 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 23 Apr 2024 21:10:22 +0300 Subject: [PATCH 11/30] more work --- grovedb/src/lib.rs | 190 +++++++++++++++++++++++++++---- tutorials/src/bin/replication.rs | 16 ++- 2 files changed, 180 insertions(+), 26 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 14d3ab9a..286c487d 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -244,32 +244,31 @@ pub struct GroveDb { #[cfg(feature = "full")] db: RocksDbStorage, - version: i32, - pending_chunks: BTreeMap>, - //current_tx: Option>, - //restorer: Restorer, -} - -pub struct s_subtrees_metadata { - pub data: BTreeMap>, CryptoHash, Option)> + version: i32 } -impl s_subtrees_metadata { - pub fn new() -> s_subtrees_metadata { - s_subtrees_metadata { - data: BTreeMap::new(), - } - } +pub struct state_sync_info<'db/*, S*/> { + restorer: Option>>, + tx: Option>, + pending_chunks :BTreeSet>, + processed_prefixes :BTreeSet, + current_prefix: Option, + version: i32, } -impl fmt::Debug for s_subtrees_metadata { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (prefix, metadata) in self.data.iter() { - let metadata_path = &metadata.0; - let metadata_path_str = s_util_path_to_string(&metadata_path); - write!(f, " prefix:{:?} -> path:{:?}\n", prefix, metadata_path_str); +impl/**/ state_sync_info<'_/*, S*/> { + pub fn new() -> state_sync_info<'static/*, S*/> { + let pending_chunks = BTreeSet::new(); + let processed_prefixes = BTreeSet::new(); + state_sync_info { + restorer: None, + tx: None, + pending_chunks: pending_chunks, + processed_prefixes: processed_prefixes, + //current_subtree_opt: None, + current_prefix: None, + version: 1 } - Ok(()) } } @@ -313,8 +312,7 @@ impl GroveDb { pub fn open>(path: P) -> Result { let db = RocksDbStorage::default_rocksdb_with_path(path)?; - let pending_chunks = BTreeMap::new(); - Ok(GroveDb { db, version: 1, pending_chunks }) + Ok(GroveDb { db, version: 1 }) } /// Uses raw iter to delete GroveDB key values pairs from rocksdb @@ -1177,7 +1175,6 @@ impl GroveDb { let mut current_subtree_opt :Option<(SubtreePrefix, Vec>, CryptoHash, CryptoHash, bool)> = None; - //current_subtree_opt = (SubtreePrefix::default()) { let tx = self.start_transaction(); let merk = self.open_merk_for_replication(SubtreePath::empty(), &tx).unwrap(); @@ -1317,6 +1314,151 @@ impl GroveDb { Ok(()) } + // pub fn w_start_snapshot_syncing<'db: 'a, 'a/*, S: StorageContext<'db>*/>( + // &'a self, + // state_sync_info: &'db mut state_sync_info<'a/*, S*/>, + // app_hash: CryptoHash, + // ) + + pub fn w_start_snapshot_syncing<'db: 'a, 'a/*, S: StorageContext<'db>*/>( + &'a self, + state_sync_info: &'db mut state_sync_info<'a/*, S*/>, + app_hash: CryptoHash, + ) -> Result>, Error>{ + let mut res = vec![]; + + match (&mut state_sync_info.restorer, &state_sync_info.tx, &state_sync_info.current_prefix) { + (None, None, None) => { + if state_sync_info.pending_chunks.is_empty() && state_sync_info.processed_prefixes.is_empty() { + let root_prefix = [0u8; 32]; + state_sync_info.tx = Some(self.start_transaction()); + if let Some(ref_tx) = state_sync_info.tx.as_ref() { + let merk = self.open_merk_for_replication(SubtreePath::empty(), ref_tx).unwrap(); + let restorer = Restorer::new(merk, app_hash, None); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(root_prefix); + state_sync_info.pending_chunks.insert(root_prefix.to_vec()); + + res.push(root_prefix.to_vec()); + } + else { + return Err(Error::InternalError( + "Unable to start a tx", + )); + } + } else { + return Err(Error::InternalError( + "Invalid internal state sync info", + )); + } + }, + _ => { + return Err(Error::InternalError( + "GroveDB has already started a snapshot syncing", + )); + } + } + + Ok(res) + } + + pub fn w_apply_chunk<'db: 'a, 'a/*, S: StorageContext<'db>*/>( + &'a self, + state_sync_info: &'db mut state_sync_info<'a/*, S*/>, + chunk: (Vec, Vec) + ) -> Result>, Error>{ + let mut res = vec![]; + + let (global_chunk_id, chunk_data) = chunk; + let (chunk_prefix, chunk_id) = w_util_split_global_chunk_id(&global_chunk_id)?; + + match (&mut state_sync_info.restorer, &state_sync_info.tx, &state_sync_info.current_prefix) { + (Some(restorer), Some(tx), Some(ref current_prefix)) => { + if (*current_prefix != chunk_prefix) { + return Err(Error::InternalError( + "Invalid incoming prefix", + )); + } + if (!state_sync_info.pending_chunks.contains(&global_chunk_id)) { + return Err(Error::InternalError( + "Incoming global_chunk_id not expected", + )); + } + state_sync_info.pending_chunks.remove(&global_chunk_id); + match restorer.process_chunk(chunk_id.to_string(), chunk_data) { + Ok(next_chunk_ids) => { + for next_chunk_id in next_chunk_ids { + let mut next_global_chunk_id = chunk_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); + state_sync_info.pending_chunks.insert(next_global_chunk_id.clone()); + res.push(next_global_chunk_id); + } + }, + _ => { + return Err(Error::InternalError( + "Unable to process incoming chunk", + )); + }, + }; + } + _ => { + return Err(Error::InternalError( + "GroveDB is not in syncing mode", + )); + } + } + + if (res.is_empty()) { + match (state_sync_info.restorer.take(), state_sync_info.tx.take(), state_sync_info.current_prefix.take()) { + (Some(restorer), Some(tx), Some(current_prefix)) => { + //make sure that pending_chunks is empty + if (!restorer.finalize().is_ok()) { + return Err(Error::InternalError( + "Unable to finalize merk", + )); + } + self.commit_transaction(tx); + state_sync_info.processed_prefixes.insert(current_prefix); + let subtrees_metadata = crate::w_subtree_metadata::new(); + for (prefix, prefix_metadata) in &subtrees_metadata.data { + if !state_sync_info.processed_prefixes.contains(prefix) { + let current_path = &prefix_metadata.0; + let s_actual_value_hash = &prefix_metadata.1; + let s_elem_value_hash = &prefix_metadata.2; + + let subtree_path: Vec<&[u8]> = current_path.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + state_sync_info.tx = Some(self.start_transaction()); + if let Some(ref_val) = state_sync_info.tx.as_ref() { + let merk = self.open_merk_for_replication(path.into(), ref_val).unwrap(); + let restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(*prefix); + + let mut root_chunk_prefix = prefix.to_vec(); + state_sync_info.pending_chunks.insert(root_chunk_prefix.clone()); + res.push(root_chunk_prefix); + } + else { + return Err(Error::InternalError( + "Unable to start a tx", + )); + } + break; + } + } + }, + _ => { + return Err(Error::InternalError( + "Unable to finalize tree", + )); + } + } + } + + Ok(res) + } } pub fn s_util_path_to_string( diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index 0ca0c576..568f2a2a 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use std::ops::Range; use std::path::Path; -use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction}; +use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction, state_sync_info}; use grovedb::reference_path::ReferencePathType; use rand::{distributions::Alphanumeric, Rng, thread_rng}; use rand::prelude::SliceRandom; @@ -89,7 +89,9 @@ fn main() { println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); println!("\n######### db_checkpoint_0 -> db_copy state sync"); - db_copy.w_sync_db_demo(&db_checkpoint_0).unwrap(); + sync_db_demo(&db_checkpoint_0, &db_copy).unwrap(); + //db_copy.w_sync_db_demo(&db_checkpoint_0).unwrap(); + return; println!("\n######### root_hashes:"); let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); @@ -215,3 +217,13 @@ fn query_db(db: &GroveDb, path: &[&[u8]], key: Vec) { } else { println!("Verification FAILED"); }; } +fn sync_db_demo( + source_db: &GroveDb, + target_db: &GroveDb, +) -> Result<(), grovedb::Error> { + let mut state_sync_inf = state_sync_info::new(); + let app_hash = source_db.root_hash(None).value.unwrap(); + //target_db.w_start_snapshot_syncing(&mut state_sync_inf, app_hash); + Ok(()) +} + From a87f9f10a8995afb9896360169c67168ec619ba3 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Thu, 25 Apr 2024 11:05:17 +0300 Subject: [PATCH 12/30] more work --- grovedb/src/lib.rs | 103 +++++++++++++++++++++++++++---- merk/src/merk/restore.rs | 2 +- tutorials/src/bin/replication.rs | 13 ++-- 3 files changed, 99 insertions(+), 19 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 286c487d..41ee8c5a 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -247,7 +247,7 @@ pub struct GroveDb { version: i32 } -pub struct state_sync_info<'db/*, S*/> { +pub struct StateSyncInfo<'db/*, S*/> { restorer: Option>>, tx: Option>, pending_chunks :BTreeSet>, @@ -256,20 +256,77 @@ pub struct state_sync_info<'db/*, S*/> { version: i32, } -impl/**/ state_sync_info<'_/*, S*/> { - pub fn new() -> state_sync_info<'static/*, S*/> { +impl/**/ StateSyncInfo<'_/*, S*/> { + /* + pub fn new<'a>() -> StateSyncInfo<'a/*, S*/> { let pending_chunks = BTreeSet::new(); let processed_prefixes = BTreeSet::new(); - state_sync_info { + StateSyncInfo { restorer: None, tx: None, - pending_chunks: pending_chunks, - processed_prefixes: processed_prefixes, + pending_chunks, + processed_prefixes, //current_subtree_opt: None, current_prefix: None, version: 1 } } + + */ +/* + pub fn start_syncing( + &self, + source_db: &GroveDb, + target_db: &GroveDb, + ) -> Result<(), Error> { + let app_hash = source_db.root_hash(None).value.unwrap(); + target_db.w_start_snapshot_syncing(&mut self, app_hash).expect("TODO: panic message"); + Ok(()) + } + + */ + /* + pub fn w_start_snapshot_syncing<'db>( + &'db mut self, + grovedb: &'db GroveDb, + app_hash: CryptoHash, + ) -> Result>, Error>{ + let mut res = vec![]; + + match (&mut self.restorer, &self.tx, &self.current_prefix) { + (None, None, None) => { + if self.pending_chunks.is_empty() && self.processed_prefixes.is_empty() { + let root_prefix = [0u8; 32]; + self.tx = Some(grovedb.start_transaction()); + if let Some(ref_tx) = self.tx.as_ref() { + let merk = grovedb.open_merk_for_replication(SubtreePath::empty(), ref_tx).unwrap(); + let restorer = Restorer::new(merk, app_hash, None); + self.restorer = Some(restorer); + self.current_prefix = Some(root_prefix); + self.pending_chunks.insert(root_prefix.to_vec()); + + res.push(root_prefix.to_vec()); + } + else { + return Err(Error::InternalError( + "Unable to start a tx", + )); + } + } else { + return Err(Error::InternalError( + "Invalid internal state sync info", + )); + } + }, + _ => { + return Err(Error::InternalError( + "GroveDB has already started a snapshot syncing", + )); + } + } + + Ok(res) + }*/ } pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; @@ -308,6 +365,21 @@ pub type TransactionArg<'db, 'a> = Option<&'a Transaction<'db>>; #[cfg(feature = "full")] impl GroveDb { + pub fn create_state_sync_info( + &self, + ) -> StateSyncInfo { + let pending_chunks = BTreeSet::new(); + let processed_prefixes = BTreeSet::new(); + StateSyncInfo { + restorer: None, + tx: None, + pending_chunks, + processed_prefixes, + //current_subtree_opt: None, + current_prefix: None, + version: 1 + } + } /// Opens a given path pub fn open>(path: P) -> Result { @@ -387,7 +459,7 @@ impl GroveDb { fn open_merk_for_replication<'db, 'b, B>( &'db self, path: SubtreePath<'b, B>, - tx: &'db Transaction, + tx: &'b Transaction<'db>, ) -> Result>, Error> where B: AsRef<[u8]> + 'b, @@ -1316,13 +1388,20 @@ impl GroveDb { // pub fn w_start_snapshot_syncing<'db: 'a, 'a/*, S: StorageContext<'db>*/>( // &'a self, - // state_sync_info: &'db mut state_sync_info<'a/*, S*/>, + // StateSyncInfo: &'db mut StateSyncInfo<'a/*, S*/>, // app_hash: CryptoHash, // ) - pub fn w_start_snapshot_syncing<'db: 'a, 'a/*, S: StorageContext<'db>*/>( - &'a self, - state_sync_info: &'db mut state_sync_info<'a/*, S*/>, + // pub fn w_start_snapshot_syncing<'db: 'a, 'a/*, S: StorageContext<'db>*/>( + // &'a self, + // StateSyncInfo: &'db mut StateSyncInfo<'a/*, S*/>, + // app_hash: CryptoHash, + // ) + + pub fn w_start_snapshot_syncing<'db>( + &'db self, + //state_sync_info: &'db mut StateSyncInfo<'db>, + mut state_sync_info: StateSyncInfo<'db>, app_hash: CryptoHash, ) -> Result>, Error>{ let mut res = vec![]; @@ -1364,7 +1443,7 @@ impl GroveDb { pub fn w_apply_chunk<'db: 'a, 'a/*, S: StorageContext<'db>*/>( &'a self, - state_sync_info: &'db mut state_sync_info<'a/*, S*/>, + state_sync_info: &'db mut StateSyncInfo<'a/*, S*/>, chunk: (Vec, Vec) ) -> Result>, Error>{ let mut res = vec![]; diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 9d5365ca..37a775f1 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -121,7 +121,7 @@ impl<'db, S: StorageContext<'db>> Restorer { } /// Process multi chunks (space optimized chunk proofs that can contain - /// multiple singluar chunks) + /// multiple singular chunks) pub fn process_multi_chunk(&mut self, multi_chunk: Vec) -> Result, Error> { let mut expect_chunk_id = true; let mut chunk_ids = vec![]; diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index 568f2a2a..04ffc5d2 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use std::ops::Range; use std::path::Path; -use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction, state_sync_info}; +use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction, StateSyncInfo}; use grovedb::reference_path::ReferencePathType; use rand::{distributions::Alphanumeric, Rng, thread_rng}; use rand::prelude::SliceRandom; @@ -78,7 +78,7 @@ fn main() { let db_checkpoint_0 = GroveDb::open(path_checkpoint).expect("cannot open grovedb from checkpoint"); let path_copy = generate_random_path("../tutorial-storage/", "/db_copy", 24); - let mut db_copy = create_empty_db(path_copy.clone()); + let db_copy = create_empty_db(path_copy.clone()); println!("\n######### root_hashes:"); let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); @@ -89,7 +89,8 @@ fn main() { println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); println!("\n######### db_checkpoint_0 -> db_copy state sync"); - sync_db_demo(&db_checkpoint_0, &db_copy).unwrap(); + let mut state_info = db_copy.create_state_sync_info(); + sync_db_demo(&db_checkpoint_0, &db_copy, &mut state_info).unwrap(); //db_copy.w_sync_db_demo(&db_checkpoint_0).unwrap(); return; @@ -219,11 +220,11 @@ fn query_db(db: &GroveDb, path: &[&[u8]], key: Vec) { fn sync_db_demo( source_db: &GroveDb, - target_db: &GroveDb, + target_db: & GroveDb, + state_sync_info: &mut StateSyncInfo, ) -> Result<(), grovedb::Error> { - let mut state_sync_inf = state_sync_info::new(); let app_hash = source_db.root_hash(None).value.unwrap(); - //target_db.w_start_snapshot_syncing(&mut state_sync_inf, app_hash); + target_db.w_start_snapshot_syncing(state_sync_info, app_hash).expect("TODO: panic message"); Ok(()) } From 3d66267aa3c6ef470c6c4e00e7cc297091cb0daf Mon Sep 17 00:00:00 2001 From: Quantum Explorer Date: Thu, 25 Apr 2024 17:05:53 +0700 Subject: [PATCH 13/30] fix: help with lifetimes --- Cargo.toml | 2 +- grovedb/src/lib.rs | 98 +++++++++++++------------------- tutorials/src/bin/replication.rs | 55 ++++++++++++------ 3 files changed, 77 insertions(+), 78 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b0a38948..6ebd27d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,5 @@ members = [ "node-grove", "storage", "visualize", - "path", + "path" ] diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 41ee8c5a..56925251 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -249,7 +249,6 @@ pub struct GroveDb { pub struct StateSyncInfo<'db/*, S*/> { restorer: Option>>, - tx: Option>, pending_chunks :BTreeSet>, processed_prefixes :BTreeSet, current_prefix: Option, @@ -372,7 +371,6 @@ impl GroveDb { let processed_prefixes = BTreeSet::new(); StateSyncInfo { restorer: None, - tx: None, pending_chunks, processed_prefixes, //current_subtree_opt: None, @@ -459,7 +457,7 @@ impl GroveDb { fn open_merk_for_replication<'db, 'b, B>( &'db self, path: SubtreePath<'b, B>, - tx: &'b Transaction<'db>, + tx: &'db Transaction, ) -> Result>, Error> where B: AsRef<[u8]> + 'b, @@ -1146,7 +1144,7 @@ impl GroveDb { pub fn w_fetch_chunk( &self, - global_chunk_id: Vec + global_chunk_id: &[u8] ) -> Result, Error> { let CHUNK_PREFIX_LENGTH: usize = 32; if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { @@ -1239,7 +1237,7 @@ impl GroveDb { let root_global_chunk_id = vec![0u8; 32]; let (root_prefix, _) = w_util_split_global_chunk_id(&root_global_chunk_id)?; - let root_chunk = source_db.w_fetch_chunk(root_global_chunk_id.clone())?; + let root_chunk = source_db.w_fetch_chunk(root_global_chunk_id.as_slice())?; let mut pending_chunks :BTreeMap, Vec> = BTreeMap::new(); let mut processed_prefixes :BTreeSet = BTreeSet::new(); @@ -1260,7 +1258,7 @@ impl GroveDb { while (!pending_chunks.is_empty()) { for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { - match source_db.w_fetch_chunk(global_chunk_id.to_vec()) { + match source_db.w_fetch_chunk(global_chunk_id.as_slice()) { Ok(chunk) => { *chunk_data = chunk; } @@ -1317,7 +1315,7 @@ impl GroveDb { let merk = self.open_merk_for_replication(path.into(), &tx).unwrap(); let mut restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); - let subtree_root_chunk = source_db.w_fetch_chunk(current_prefix.to_vec())?; + let subtree_root_chunk = source_db.w_fetch_chunk(current_prefix.as_slice())?; if (!subtree_root_chunk.is_empty()) { let next_chunk_ids = restorer.process_chunk("".to_string(), subtree_root_chunk).expect("should process chunk successfully"); for next_chunk_id in next_chunk_ids { @@ -1327,7 +1325,7 @@ impl GroveDb { } while (!pending_chunks.is_empty()) { for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { - match source_db.w_fetch_chunk(global_chunk_id.to_vec()) { + match source_db.w_fetch_chunk(global_chunk_id.as_slice()) { Ok(chunk) => { *chunk_data = chunk; } @@ -1400,31 +1398,23 @@ impl GroveDb { pub fn w_start_snapshot_syncing<'db>( &'db self, - //state_sync_info: &'db mut StateSyncInfo<'db>, mut state_sync_info: StateSyncInfo<'db>, app_hash: CryptoHash, - ) -> Result>, Error>{ + tx: &'db Transaction, + ) -> Result<(Vec>, StateSyncInfo), Error>{ let mut res = vec![]; - match (&mut state_sync_info.restorer, &state_sync_info.tx, &state_sync_info.current_prefix) { - (None, None, None) => { + match (&mut state_sync_info.restorer, &state_sync_info.current_prefix) { + (None, None) => { if state_sync_info.pending_chunks.is_empty() && state_sync_info.processed_prefixes.is_empty() { let root_prefix = [0u8; 32]; - state_sync_info.tx = Some(self.start_transaction()); - if let Some(ref_tx) = state_sync_info.tx.as_ref() { - let merk = self.open_merk_for_replication(SubtreePath::empty(), ref_tx).unwrap(); - let restorer = Restorer::new(merk, app_hash, None); - state_sync_info.restorer = Some(restorer); - state_sync_info.current_prefix = Some(root_prefix); - state_sync_info.pending_chunks.insert(root_prefix.to_vec()); + let merk = self.open_merk_for_replication(SubtreePath::empty(), tx).unwrap(); + let restorer = Restorer::new(merk, app_hash, None); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(root_prefix); + state_sync_info.pending_chunks.insert(root_prefix.to_vec()); - res.push(root_prefix.to_vec()); - } - else { - return Err(Error::InternalError( - "Unable to start a tx", - )); - } + res.push(root_prefix.to_vec()); } else { return Err(Error::InternalError( "Invalid internal state sync info", @@ -1438,32 +1428,33 @@ impl GroveDb { } } - Ok(res) + Ok((res, state_sync_info)) } - pub fn w_apply_chunk<'db: 'a, 'a/*, S: StorageContext<'db>*/>( - &'a self, - state_sync_info: &'db mut StateSyncInfo<'a/*, S*/>, - chunk: (Vec, Vec) - ) -> Result>, Error>{ + pub fn w_apply_chunk<'db>( + &'db self, + mut state_sync_info: StateSyncInfo<'db>, + chunk: (&[u8], Vec), + tx: &'db Transaction, + ) -> Result<(Vec>, StateSyncInfo), Error>{ let mut res = vec![]; let (global_chunk_id, chunk_data) = chunk; let (chunk_prefix, chunk_id) = w_util_split_global_chunk_id(&global_chunk_id)?; - match (&mut state_sync_info.restorer, &state_sync_info.tx, &state_sync_info.current_prefix) { - (Some(restorer), Some(tx), Some(ref current_prefix)) => { + match (&mut state_sync_info.restorer, &state_sync_info.current_prefix) { + (Some(restorer), Some(ref current_prefix)) => { if (*current_prefix != chunk_prefix) { return Err(Error::InternalError( "Invalid incoming prefix", )); } - if (!state_sync_info.pending_chunks.contains(&global_chunk_id)) { + if (!state_sync_info.pending_chunks.contains(global_chunk_id)) { return Err(Error::InternalError( "Incoming global_chunk_id not expected", )); } - state_sync_info.pending_chunks.remove(&global_chunk_id); + state_sync_info.pending_chunks.remove(global_chunk_id); match restorer.process_chunk(chunk_id.to_string(), chunk_data) { Ok(next_chunk_ids) => { for next_chunk_id in next_chunk_ids { @@ -1488,42 +1479,31 @@ impl GroveDb { } if (res.is_empty()) { - match (state_sync_info.restorer.take(), state_sync_info.tx.take(), state_sync_info.current_prefix.take()) { - (Some(restorer), Some(tx), Some(current_prefix)) => { + match (state_sync_info.restorer.take(), state_sync_info.current_prefix.take()) { + (Some(restorer), Some(current_prefix)) => { //make sure that pending_chunks is empty if (!restorer.finalize().is_ok()) { return Err(Error::InternalError( "Unable to finalize merk", )); } - self.commit_transaction(tx); state_sync_info.processed_prefixes.insert(current_prefix); let subtrees_metadata = crate::w_subtree_metadata::new(); for (prefix, prefix_metadata) in &subtrees_metadata.data { if !state_sync_info.processed_prefixes.contains(prefix) { - let current_path = &prefix_metadata.0; - let s_actual_value_hash = &prefix_metadata.1; - let s_elem_value_hash = &prefix_metadata.2; + let (current_path, s_actual_value_hash, s_elem_value_hash, _) = &prefix_metadata; let subtree_path: Vec<&[u8]> = current_path.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; - state_sync_info.tx = Some(self.start_transaction()); - if let Some(ref_val) = state_sync_info.tx.as_ref() { - let merk = self.open_merk_for_replication(path.into(), ref_val).unwrap(); - let restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); - state_sync_info.restorer = Some(restorer); - state_sync_info.current_prefix = Some(*prefix); + let merk = self.open_merk_for_replication(path.into(), tx).unwrap(); + let restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(*prefix); - let mut root_chunk_prefix = prefix.to_vec(); - state_sync_info.pending_chunks.insert(root_chunk_prefix.clone()); - res.push(root_chunk_prefix); - } - else { - return Err(Error::InternalError( - "Unable to start a tx", - )); - } + let mut root_chunk_prefix = prefix.to_vec(); + state_sync_info.pending_chunks.insert(root_chunk_prefix.clone()); + res.push(root_chunk_prefix); break; } } @@ -1536,7 +1516,7 @@ impl GroveDb { } } - Ok(res) + Ok((res, state_sync_info)) } } @@ -1552,7 +1532,7 @@ pub fn s_util_path_to_string( } pub fn w_util_split_global_chunk_id( - global_chunk_id: &Vec, + global_chunk_id: &[u8], ) -> Result<(SubtreePrefix, String), Error> { let CHUNK_PREFIX_LENGTH: usize = 32; if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index 04ffc5d2..681f77af 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -70,36 +70,42 @@ fn create_empty_db(grovedb_path: String) -> GroveDb { } fn main() { - let path_0 = generate_random_path("../tutorial-storage/", "/db_0", 24); - let db_0 = populate_db(path_0.clone()); - let checkpoint_dir = path_0 + "/checkpoint"; + let path_source = generate_random_path("../tutorial-storage/", "/db_0", 24); + let db_source = populate_db(path_source.clone()); + + let checkpoint_dir = path_source + "/checkpoint"; let path_checkpoint = Path::new(checkpoint_dir.as_str()); - db_0.create_checkpoint(&path_checkpoint).expect("cannot create checkpoint"); - let db_checkpoint_0 = GroveDb::open(path_checkpoint).expect("cannot open grovedb from checkpoint"); - let path_copy = generate_random_path("../tutorial-storage/", "/db_copy", 24); - let db_copy = create_empty_db(path_copy.clone()); + db_source.create_checkpoint(&path_checkpoint).expect("cannot create checkpoint"); + let db_checkpoint_0 = GroveDb::open(path_checkpoint).expect("cannot open groveDB from checkpoint"); + + let path_destination = generate_random_path("../tutorial-storage/", "/db_copy", 24); + let db_destination = create_empty_db(path_destination.clone()); println!("\n######### root_hashes:"); - let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); + let root_hash_0 = db_source.root_hash(None).unwrap().unwrap(); println!("root_hash_0: {:?}", hex::encode(root_hash_0)); let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); - let root_hash_copy = db_copy.root_hash(None).unwrap().unwrap(); + let root_hash_copy = db_destination.root_hash(None).unwrap().unwrap(); println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); println!("\n######### db_checkpoint_0 -> db_copy state sync"); - let mut state_info = db_copy.create_state_sync_info(); - sync_db_demo(&db_checkpoint_0, &db_copy, &mut state_info).unwrap(); + let state_info = db_destination.create_state_sync_info(); + + let transaction = db_destination.start_transaction(); + sync_db_demo(&db_checkpoint_0, &db_destination, state_info, &transaction).unwrap(); //db_copy.w_sync_db_demo(&db_checkpoint_0).unwrap(); - return; + + db_destination.commit_transaction(transaction).unwrap().expect("expected to commit transaction"); + println!("\n######### root_hashes:"); - let root_hash_0 = db_0.root_hash(None).unwrap().unwrap(); + let root_hash_0 = db_source.root_hash(None).unwrap().unwrap(); println!("root_hash_0: {:?}", hex::encode(root_hash_0)); let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); - let root_hash_copy = db_copy.root_hash(None).unwrap().unwrap(); + let root_hash_copy = db_destination.root_hash(None).unwrap().unwrap(); println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); let query_path = &[MAIN_ΚΕΥ, KEY_INT_0]; @@ -107,7 +113,7 @@ fn main() { println!("\n######## Query on db_checkpoint_0:"); query_db(&db_checkpoint_0, query_path, query_key.clone()); println!("\n######## Query on db_copy:"); - query_db(&db_copy, query_path, query_key.clone()); + query_db(&db_destination, query_path, query_key.clone()); return; @@ -220,11 +226,24 @@ fn query_db(db: &GroveDb, path: &[&[u8]], key: Vec) { fn sync_db_demo( source_db: &GroveDb, - target_db: & GroveDb, - state_sync_info: &mut StateSyncInfo, + target_db: &GroveDb, + state_sync_info: StateSyncInfo, + tx: &Transaction, ) -> Result<(), grovedb::Error> { let app_hash = source_db.root_hash(None).value.unwrap(); - target_db.w_start_snapshot_syncing(state_sync_info, app_hash).expect("TODO: panic message"); + let (chunk_ids, mut state_sync_info) = target_db.w_start_snapshot_syncing(state_sync_info, app_hash, tx)?; + + let mut chunk_queue : VecDeque> = VecDeque::new(); + + chunk_queue.extend(chunk_ids); + + while let Some(chunk_id) = chunk_queue.pop_front() { + let ops = source_db.w_fetch_chunk(chunk_id.as_slice())?; + let (more_chunks, new_state_sync_info) = target_db.w_apply_chunk(state_sync_info, (chunk_id.as_slice(), ops), tx)?; + state_sync_info = new_state_sync_info; + chunk_queue.extend(more_chunks); + } + Ok(()) } From aaeb4deb24e981f91e1afe3c83473c8189e6d2f0 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Fri, 26 Apr 2024 16:00:15 +0300 Subject: [PATCH 14/30] more work --- grovedb/src/lib.rs | 469 +++++++------------------------ tutorials/src/bin/replication.rs | 54 ++-- 2 files changed, 130 insertions(+), 393 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 56925251..e957ff49 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -169,9 +169,7 @@ mod visualize; #[cfg(feature = "full")] use std::{collections::HashMap, option::Option::None, path::Path, fmt}; -use std::collections::{BTreeMap, BTreeSet, LinkedList, VecDeque}; -use std::marker::PhantomData; -use itertools::Chunk; +use std::collections::{BTreeMap, BTreeSet}; #[cfg(any(feature = "full", feature = "verify"))] use element::helpers; @@ -203,12 +201,8 @@ use grovedb_merk::{ tree::{combine_hash, value_hash}, BatchEntry, CryptoHash, KVIterator, Merk, }; -use grovedb_merk::{ChunkProducer, Restorer, TreeFeatureType}; -use grovedb_merk::Error::ChunkingError; -use grovedb_merk::proofs::{Node, Op}; -use grovedb_merk::proofs::chunk::error::ChunkError; -use grovedb_merk::proofs::chunk::util::{generate_traversal_instruction_as_string, number_of_chunks}; -use grovedb_merk::tree::kv_digest_to_kv_hash; +use grovedb_merk::{ChunkProducer, Restorer}; +use grovedb_merk::proofs::{Op}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext; @@ -234,7 +228,6 @@ use crate::helpers::raw_decode; use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; use crate::Error::MerkError; use blake3; -use bitvec::prelude::*; #[cfg(feature = "full")] type Hash = [u8; 32]; @@ -247,109 +240,34 @@ pub struct GroveDb { version: i32 } -pub struct StateSyncInfo<'db/*, S*/> { +pub struct StateSyncInfo<'db> { restorer: Option>>, - pending_chunks :BTreeSet>, processed_prefixes :BTreeSet, current_prefix: Option, - version: i32, -} - -impl/**/ StateSyncInfo<'_/*, S*/> { - /* - pub fn new<'a>() -> StateSyncInfo<'a/*, S*/> { - let pending_chunks = BTreeSet::new(); - let processed_prefixes = BTreeSet::new(); - StateSyncInfo { - restorer: None, - tx: None, - pending_chunks, - processed_prefixes, - //current_subtree_opt: None, - current_prefix: None, - version: 1 - } - } - - */ -/* - pub fn start_syncing( - &self, - source_db: &GroveDb, - target_db: &GroveDb, - ) -> Result<(), Error> { - let app_hash = source_db.root_hash(None).value.unwrap(); - target_db.w_start_snapshot_syncing(&mut self, app_hash).expect("TODO: panic message"); - Ok(()) - } - - */ - /* - pub fn w_start_snapshot_syncing<'db>( - &'db mut self, - grovedb: &'db GroveDb, - app_hash: CryptoHash, - ) -> Result>, Error>{ - let mut res = vec![]; - - match (&mut self.restorer, &self.tx, &self.current_prefix) { - (None, None, None) => { - if self.pending_chunks.is_empty() && self.processed_prefixes.is_empty() { - let root_prefix = [0u8; 32]; - self.tx = Some(grovedb.start_transaction()); - if let Some(ref_tx) = self.tx.as_ref() { - let merk = grovedb.open_merk_for_replication(SubtreePath::empty(), ref_tx).unwrap(); - let restorer = Restorer::new(merk, app_hash, None); - self.restorer = Some(restorer); - self.current_prefix = Some(root_prefix); - self.pending_chunks.insert(root_prefix.to_vec()); - - res.push(root_prefix.to_vec()); - } - else { - return Err(Error::InternalError( - "Unable to start a tx", - )); - } - } else { - return Err(Error::InternalError( - "Invalid internal state sync info", - )); - } - }, - _ => { - return Err(Error::InternalError( - "GroveDB has already started a snapshot syncing", - )); - } - } - - Ok(res) - }*/ + pending_chunks :BTreeSet>, + num_processed_chunks: usize, } pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; -pub struct w_subtree_metadata { - pub data: BTreeMap>, CryptoHash, CryptoHash, bool)> +pub struct SubtreesMetadata { + pub data: BTreeMap>, CryptoHash, CryptoHash)> } -impl w_subtree_metadata { - pub fn new() -> w_subtree_metadata { - w_subtree_metadata { +impl SubtreesMetadata { + pub fn new() -> SubtreesMetadata { + SubtreesMetadata { data: BTreeMap::new(), } } } -impl fmt::Debug for w_subtree_metadata { +impl fmt::Debug for SubtreesMetadata { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for (prefix, metadata) in self.data.iter() { let metadata_path = &metadata.0; let metadata_path_str = s_util_path_to_string(&metadata_path); - let metadata_hash_0 = &metadata.1; - let metadata_hash_1 = &metadata.2; - write!(f, " prefix:{:?} -> path:{:?} ({:?}:{:?})\n", hex::encode(prefix), metadata_path_str, hex::encode(metadata_hash_0), hex::encode(metadata_hash_1)); + write!(f, " prefix:{:?} -> path:{:?}\n", hex::encode(prefix), metadata_path_str); } Ok(()) } @@ -371,11 +289,11 @@ impl GroveDb { let processed_prefixes = BTreeSet::new(); StateSyncInfo { restorer: None, - pending_chunks, processed_prefixes, - //current_subtree_opt: None, current_prefix: None, - version: 1 + pending_chunks, + num_processed_chunks: 0, + } } /// Opens a given path @@ -1142,9 +1060,10 @@ impl GroveDb { Ok(issues) } - pub fn w_fetch_chunk( - &self, - global_chunk_id: &[u8] + pub fn fetch_chunk<'db>( + &'db self, + global_chunk_id: &[u8], + tx: &'db Transaction, ) -> Result, Error> { let CHUNK_PREFIX_LENGTH: usize = 32; if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { @@ -1153,13 +1072,14 @@ impl GroveDb { )); } - let (chunk_prefix, chunk_id) = global_chunk_id.split_at(32); + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(CHUNK_PREFIX_LENGTH); - let mut array = [0u8; 32]; // Initialize an array of the correct size with default values - array.copy_from_slice(chunk_prefix); // Copy data from the slice into the array + let mut array = [0u8; 32]; + array.copy_from_slice(chunk_prefix); let chunk_prefix_key: SubtreePrefix = array; - let subtrees_metadata = self.w_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); + let tx = self.start_transaction(); + let subtrees_metadata = self.get_subtrees_metadata(&SubtreePath::empty(), &tx)?; match subtrees_metadata.data.get(&chunk_prefix_key) { Some(path_data) => { @@ -1167,16 +1087,34 @@ impl GroveDb { let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; - let continue_storage_batch = StorageBatch::new(); - let merk = self.open_batch_merk_at_path(&continue_storage_batch, path.into(), false).value?; + let merk = self.open_transactional_merk_at_path(path.into(), &tx, None).value?; if (merk.is_empty_tree().unwrap()) { return Ok(vec![]); } - let mut chunk_producer = ChunkProducer::new(&merk).unwrap(); - let (chunk, _) = chunk_producer.chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()).unwrap(); - Ok(chunk) + let mut chunk_producer_res = ChunkProducer::new(&merk); + match chunk_producer_res { + Ok(mut chunk_producer) => { + let chunk_res = chunk_producer.chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()); + match chunk_res { + Ok((chunk, _)) => { + Ok(chunk) + } + Err(_) => { + return Err(Error::CorruptedData( + "Unable to create to load chunk".to_string(), + )); + } + } + } + Err(_) => { + return Err(Error::CorruptedData( + "Unable to create Chunk producer".to_string(), + )); + } + } + }, None => { return Err(Error::CorruptedData( @@ -1186,217 +1124,44 @@ impl GroveDb { } } - fn w_get_subtrees_metadata>( - &self, + pub fn get_subtrees_metadata<'db, B: AsRef<[u8]>>( + &'db self, path: &SubtreePath, - ) -> Result { - let mut subtrees_metadata = crate::w_subtree_metadata::new(); + tx: &'db Transaction, + ) -> Result { + let mut subtrees_metadata = crate::SubtreesMetadata::new(); - let subtrees_root = self.find_subtrees(&SubtreePath::empty(), None).unwrap().unwrap(); + let subtrees_root = self.find_subtrees(&SubtreePath::empty(), Some(tx)).value?; for subtree in subtrees_root.into_iter() { let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); let current_path = SubtreePath::from(path); - let is_root_prefix = current_path.is_root(); let parent_path_opt = current_path.derive_parent(); if (parent_path_opt.is_some()) { let parent_path = parent_path_opt.unwrap().0; - let continue_storage_batch = StorageBatch::new(); - let parent_merk = self.open_batch_merk_at_path(&continue_storage_batch, parent_path, false).value.unwrap(); + let parent_merk = self.open_transactional_merk_at_path(parent_path, tx, None).value?; let parent_key = subtree.last().unwrap(); let (elem_value, elem_value_hash) = parent_merk .get_value_and_value_hash( parent_key, true, None::<&fn(&[u8]) -> Option>, - ) - .unwrap() - .expect("should get value hash") - .expect("value hash should be some"); + ).value.expect("should get value hash").expect("value hash should be some"); let actual_value_hash = value_hash(&elem_value).unwrap(); - subtrees_metadata.data.insert(prefix, (current_path.to_vec(), actual_value_hash, elem_value_hash, false)); + subtrees_metadata.data.insert(prefix, (current_path.to_vec(), actual_value_hash, elem_value_hash)); } else { - subtrees_metadata.data.insert(prefix, (current_path.to_vec(), CryptoHash::default(), CryptoHash::default(), is_root_prefix)); + subtrees_metadata.data.insert(prefix, (current_path.to_vec(), CryptoHash::default(), CryptoHash::default())); } } Ok(subtrees_metadata) } - pub fn w_sync_db_demo( - &mut self, - source_db: &GroveDb, - ) -> Result<(), Error> { - - // Start always by root - let app_hash = source_db.root_hash(None).value.unwrap(); - let root_global_chunk_id = vec![0u8; 32]; - let (root_prefix, _) = w_util_split_global_chunk_id(&root_global_chunk_id)?; - - let root_chunk = source_db.w_fetch_chunk(root_global_chunk_id.as_slice())?; - - let mut pending_chunks :BTreeMap, Vec> = BTreeMap::new(); - let mut processed_prefixes :BTreeSet = BTreeSet::new(); - let mut subtrees_metadata = crate::w_subtree_metadata::new(); - let mut current_subtree_opt :Option<(SubtreePrefix, Vec>, CryptoHash, CryptoHash, bool)> = None; - - - { - let tx = self.start_transaction(); - let merk = self.open_merk_for_replication(SubtreePath::empty(), &tx).unwrap(); - let mut restorer = Restorer::new(merk, app_hash, None); - let next_chunk_ids = restorer.process_chunk("".to_string(), root_chunk).expect("should process chunk successfully"); - for next_chunk_id in next_chunk_ids { - let mut next_global_chunk_id = root_prefix.to_vec(); - next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); - pending_chunks.insert(next_global_chunk_id, vec![]); - } - - while (!pending_chunks.is_empty()) { - for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { - match source_db.w_fetch_chunk(global_chunk_id.as_slice()) { - Ok(chunk) => { - *chunk_data = chunk; - } - Err(e) => { - println!("Error while updating {}", e); - } - } - } - - // Collect the keys to avoid borrowing issues during removal - let keys: Vec> = pending_chunks.keys().cloned().collect(); - - // Iterate over the collected keys and remove each entry from the map - for key in keys { - if let Some(chunk) = pending_chunks.remove(&key) { - let (_, chunk_id) = w_util_split_global_chunk_id(&key)?; - let next_chunk_ids = restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); - for next_chunk_id in next_chunk_ids { - let mut next_global_chunk_id = root_prefix.to_vec(); - next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); - pending_chunks.insert(next_global_chunk_id, vec![]); - } - } - } - } - - restorer.finalize().expect("should finalize"); - self.commit_transaction(tx); - } - - processed_prefixes.insert(root_prefix); - subtrees_metadata = self.w_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); - - for (prefix, prefix_metadata) in &subtrees_metadata.data { - if !processed_prefixes.contains(prefix) { - current_subtree_opt = Some((*prefix, prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2, prefix_metadata.3)); - break; - } - } - - while current_subtree_opt.is_some() { - if let Some(ref current_subtree) = current_subtree_opt { - let current_prefix = ¤t_subtree.0; - let current_path = ¤t_subtree.1; - let s_actual_value_hash = ¤t_subtree.2; - let s_elem_value_hash = ¤t_subtree.3; - - println!(" about to process prefix:{:?} {:?})", hex::encode(current_prefix), s_util_path_to_string(¤t_path)); - - let subtree_path: Vec<&[u8]> = current_path.iter().map(|vec| vec.as_slice()).collect(); - let path: &[&[u8]] = &subtree_path; - - let tx = self.start_transaction(); - let merk = self.open_merk_for_replication(path.into(), &tx).unwrap(); - let mut restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); - - let subtree_root_chunk = source_db.w_fetch_chunk(current_prefix.as_slice())?; - if (!subtree_root_chunk.is_empty()) { - let next_chunk_ids = restorer.process_chunk("".to_string(), subtree_root_chunk).expect("should process chunk successfully"); - for next_chunk_id in next_chunk_ids { - let mut next_global_chunk_id = current_prefix.to_vec(); - next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); - pending_chunks.insert(next_global_chunk_id, vec![]); - } - while (!pending_chunks.is_empty()) { - for (global_chunk_id, chunk_data) in pending_chunks.iter_mut() { - match source_db.w_fetch_chunk(global_chunk_id.as_slice()) { - Ok(chunk) => { - *chunk_data = chunk; - } - Err(e) => { - println!("Error while updating {}", e); - } - } - } - - // Collect the keys to avoid borrowing issues during removal - let keys: Vec> = pending_chunks.keys().cloned().collect(); - - // Iterate over the collected keys and remove each entry from the map - for key in keys { - if let Some(chunk) = pending_chunks.remove(&key) { - let (_, chunk_id) = w_util_split_global_chunk_id(&key)?; - let next_chunk_ids = restorer.process_chunk(chunk_id, chunk).expect("should process chunk successfully"); - for next_chunk_id in next_chunk_ids { - let mut next_global_chunk_id = current_prefix.to_vec(); - next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); - pending_chunks.insert(next_global_chunk_id, vec![]); - } - } - } - } - - restorer.finalize().expect("should finalize"); - self.commit_transaction(tx); - } - else { - self.rollback_transaction(&tx); - println!(" subtree{:?} is empty", s_util_path_to_string(¤t_path)); - } - processed_prefixes.insert(*current_prefix); - println!(" prefix:{:?} done", hex::encode(current_prefix)); - } - - current_subtree_opt = None; - subtrees_metadata = self.w_get_subtrees_metadata(&SubtreePath::empty()).unwrap(); - - for (prefix, prefix_metadata) in &subtrees_metadata.data { - if !processed_prefixes.contains(prefix) { - current_subtree_opt = Some((*prefix, prefix_metadata.0.to_vec(), prefix_metadata.1, prefix_metadata.2, prefix_metadata.3)); - break; - } - } - } - - let incorrect_hashes = self.verify_grovedb(None)?; - if (incorrect_hashes.len() > 0) { - return Err(Error::CorruptedData( - "DB verification failed".to_string(), - )); - } - - Ok(()) - } - - // pub fn w_start_snapshot_syncing<'db: 'a, 'a/*, S: StorageContext<'db>*/>( - // &'a self, - // StateSyncInfo: &'db mut StateSyncInfo<'a/*, S*/>, - // app_hash: CryptoHash, - // ) - - // pub fn w_start_snapshot_syncing<'db: 'a, 'a/*, S: StorageContext<'db>*/>( - // &'a self, - // StateSyncInfo: &'db mut StateSyncInfo<'a/*, S*/>, - // app_hash: CryptoHash, - // ) - - pub fn w_start_snapshot_syncing<'db>( + pub fn start_snapshot_syncing<'db>( &'db self, mut state_sync_info: StateSyncInfo<'db>, app_hash: CryptoHash, @@ -1431,7 +1196,7 @@ impl GroveDb { Ok((res, state_sync_info)) } - pub fn w_apply_chunk<'db>( + pub fn apply_chunk<'db>( &'db self, mut state_sync_info: StateSyncInfo<'db>, chunk: (&[u8], Vec), @@ -1440,7 +1205,7 @@ impl GroveDb { let mut res = vec![]; let (global_chunk_id, chunk_data) = chunk; - let (chunk_prefix, chunk_id) = w_util_split_global_chunk_id(&global_chunk_id)?; + let (chunk_prefix, chunk_id) = util_split_global_chunk_id(&global_chunk_id)?; match (&mut state_sync_info.restorer, &state_sync_info.current_prefix) { (Some(restorer), Some(ref current_prefix)) => { @@ -1455,21 +1220,24 @@ impl GroveDb { )); } state_sync_info.pending_chunks.remove(global_chunk_id); - match restorer.process_chunk(chunk_id.to_string(), chunk_data) { - Ok(next_chunk_ids) => { - for next_chunk_id in next_chunk_ids { - let mut next_global_chunk_id = chunk_prefix.to_vec(); - next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); - state_sync_info.pending_chunks.insert(next_global_chunk_id.clone()); - res.push(next_global_chunk_id); - } - }, - _ => { - return Err(Error::InternalError( - "Unable to process incoming chunk", - )); - }, - }; + if (!chunk_data.is_empty()) { + match restorer.process_chunk(chunk_id.to_string(), chunk_data) { + Ok(next_chunk_ids) => { + state_sync_info.num_processed_chunks += 1; + for next_chunk_id in next_chunk_ids { + let mut next_global_chunk_id = chunk_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); + state_sync_info.pending_chunks.insert(next_global_chunk_id.clone()); + res.push(next_global_chunk_id); + } + }, + _ => { + return Err(Error::InternalError( + "Unable to process incoming chunk", + )); + }, + }; + } } _ => { return Err(Error::InternalError( @@ -1479,19 +1247,29 @@ impl GroveDb { } if (res.is_empty()) { + if (!state_sync_info.pending_chunks.is_empty()) { + return Ok((res, state_sync_info)) + } match (state_sync_info.restorer.take(), state_sync_info.current_prefix.take()) { (Some(restorer), Some(current_prefix)) => { - //make sure that pending_chunks is empty - if (!restorer.finalize().is_ok()) { - return Err(Error::InternalError( - "Unable to finalize merk", - )); + if (state_sync_info.num_processed_chunks > 0) { + if (!restorer.finalize().is_ok()) { + return Err(Error::InternalError( + "Unable to finalize merk", + )); + } } state_sync_info.processed_prefixes.insert(current_prefix); - let subtrees_metadata = crate::w_subtree_metadata::new(); + + let subtrees_metadata = self.get_subtrees_metadata(&SubtreePath::empty(), tx)?; + if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { + let v_path = &value.0; + println!(" path:{:?} done", s_util_path_to_string(&value.0)); + } + for (prefix, prefix_metadata) in &subtrees_metadata.data { if !state_sync_info.processed_prefixes.contains(prefix) { - let (current_path, s_actual_value_hash, s_elem_value_hash, _) = &prefix_metadata; + let (current_path, s_actual_value_hash, s_elem_value_hash) = &prefix_metadata; let subtree_path: Vec<&[u8]> = current_path.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; @@ -1500,8 +1278,9 @@ impl GroveDb { let restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); state_sync_info.restorer = Some(restorer); state_sync_info.current_prefix = Some(*prefix); + state_sync_info.num_processed_chunks = 0; - let mut root_chunk_prefix = prefix.to_vec(); + let root_chunk_prefix = prefix.to_vec(); state_sync_info.pending_chunks.insert(root_chunk_prefix.clone()); res.push(root_chunk_prefix); break; @@ -1531,7 +1310,7 @@ pub fn s_util_path_to_string( subtree_path_str } -pub fn w_util_split_global_chunk_id( +pub fn util_split_global_chunk_id( global_chunk_id: &[u8], ) -> Result<(SubtreePrefix, String), Error> { let CHUNK_PREFIX_LENGTH: usize = 32; @@ -1542,54 +1321,14 @@ pub fn w_util_split_global_chunk_id( } let (chunk_prefix, chunk_id) = global_chunk_id.split_at(32); - let mut array = [0u8; 32]; // Initialize an array of the correct size with default values - array.copy_from_slice(chunk_prefix); // Copy data from the slice into the array + let mut array = [0u8; 32]; + array.copy_from_slice(chunk_prefix); let chunk_prefix_key: SubtreePrefix = array; let str_chunk_id = String::from_utf8(chunk_id.to_vec()); match str_chunk_id { Ok(s) => Ok((chunk_prefix_key, s)), - Err(e) => return Err(Error::CorruptedData( - "unable to convert to string".to_string(), + Err(_) => return Err(Error::CorruptedData( + "unable to convert chunk id to string".to_string(), )), } -} - -/* -pub fn w_util_string_to_compacted_vec_u8( - string: &String, -) -> Result, Error> { - let mut bb = BitVec::new(); - let bits = w_util_string_chunk_id_to_bitset(string)?; - - Ok(bits.as_raw_slice().to_vec()) -} - -pub fn w_util_string_chunk_id_to_bitset( - chunk_id: &String, -) -> Result { - let mut bits = BitVec::new(); - for ch in chunk_id.chars() { - match ch { - '1' => bits.push(true), - '0' => bits.push(false), - _ => return Err(Error::CorruptedData("Invalid character in input string".to_string())), // Return an error instead of panicking - } - } - Ok(bits) -} - -pub fn w_util_bitset_to_chunk_id(bits: &BitVec) -> String { - bits.iter().map(|bit| if *bit { '1' } else { '0' }).collect() -} - -/// Converts a BitVec to Vec -fn bitvec_to_vec_u8(bitvec: &BitVec) -> Vec { - bitvec.as_raw_slice().to_vec() -} - -/// Converts Vec to BitVec -fn vec_u8_to_bitvec(vec: &Vec) -> BitVec { - BitVec::from_vec(vec.clone()) -} - - */ \ No newline at end of file +} \ No newline at end of file diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index 681f77af..e5bafd08 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -1,23 +1,10 @@ use std::collections::VecDeque; -use std::ops::Range; use std::path::Path; use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction, StateSyncInfo}; use grovedb::reference_path::ReferencePathType; -use rand::{distributions::Alphanumeric, Rng, thread_rng}; -use rand::prelude::SliceRandom; +use rand::{distributions::Alphanumeric, Rng, }; use grovedb::element::SumValue; -use grovedb::query_result_type::QueryResultType; -use grovedb_merk::{BatchEntry, ChunkProducer, CryptoHash, Error, Op}; -use grovedb_merk::Error::{EdError, StorageError}; -use grovedb_merk::proofs::chunk::error::ChunkError; -use grovedb_merk::Restorer; -use grovedb_merk::tree::kv::ValueDefinedCostType; -use grovedb_merk::tree::{RefWalker, TreeNode}; -use grovedb_merk::TreeFeatureType::BasicMerkNode; -use grovedb_path::{SubtreePath, SubtreePathBuilder}; -use grovedb_storage::{StorageBatch, StorageContext}; -use grovedb_storage::rocksdb_storage::PrefixedRocksDbStorageContext; -use grovedb_visualize::Visualize; +use grovedb_path::{SubtreePath}; const MAIN_ΚΕΥ: &[u8] = b"key_main"; const MAIN_ΚΕΥ_EMPTY: &[u8] = b"key_main_empty"; @@ -90,15 +77,26 @@ fn main() { let root_hash_copy = db_destination.root_hash(None).unwrap().unwrap(); println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); + println!("\n######### source_subtree_metadata"); + let source_tx = db_source.start_transaction(); + let subtrees_metadata = db_source.get_subtrees_metadata(&SubtreePath::empty(), &source_tx).unwrap(); + println!("{:?}", subtrees_metadata); + println!("\n######### db_checkpoint_0 -> db_copy state sync"); let state_info = db_destination.create_state_sync_info(); - - let transaction = db_destination.start_transaction(); - sync_db_demo(&db_checkpoint_0, &db_destination, state_info, &transaction).unwrap(); - //db_copy.w_sync_db_demo(&db_checkpoint_0).unwrap(); - - db_destination.commit_transaction(transaction).unwrap().expect("expected to commit transaction"); - + let source_tx = db_source.start_transaction(); + let target_tx = db_destination.start_transaction(); + sync_db_demo(&db_checkpoint_0, &db_destination, state_info, &source_tx, &target_tx).unwrap(); + db_destination.commit_transaction(target_tx).unwrap().expect("expected to commit transaction"); + + println!("\n######### verify db_copy"); + let incorrect_hashes = db_destination.verify_grovedb(None).unwrap(); + if incorrect_hashes.len() > 0 { + println!("DB verification failed!"); + } + else { + println!("DB verification success"); + } println!("\n######### root_hashes:"); let root_hash_0 = db_source.root_hash(None).unwrap().unwrap(); @@ -211,13 +209,12 @@ fn query_db(db: &GroveDb, path: &[&[u8]], key: Vec) { .unwrap() .expect("expected successful get_path_query"); for e in elements.into_iter() { - //let be_num = u32::from_be_bytes(e.try_into().expect("Slice with incorrect length")); println!(">> {:?}", e); } let proof = db.prove_query(&path_query).unwrap().unwrap(); // Get hash from query proof and print to terminal along with GroveDB root hash. - let (verify_hash, result_set) = GroveDb::verify_query(&proof, &path_query).unwrap(); + let (verify_hash, _) = GroveDb::verify_query(&proof, &path_query).unwrap(); println!("verify_hash: {:?}", hex::encode(verify_hash)); if verify_hash == db.root_hash(None).unwrap().unwrap() { println!("Query verified"); @@ -228,18 +225,19 @@ fn sync_db_demo( source_db: &GroveDb, target_db: &GroveDb, state_sync_info: StateSyncInfo, - tx: &Transaction, + source_tx: &Transaction, + target_tx: &Transaction, ) -> Result<(), grovedb::Error> { let app_hash = source_db.root_hash(None).value.unwrap(); - let (chunk_ids, mut state_sync_info) = target_db.w_start_snapshot_syncing(state_sync_info, app_hash, tx)?; + let (chunk_ids, mut state_sync_info) = target_db.start_snapshot_syncing(state_sync_info, app_hash, target_tx)?; let mut chunk_queue : VecDeque> = VecDeque::new(); chunk_queue.extend(chunk_ids); while let Some(chunk_id) = chunk_queue.pop_front() { - let ops = source_db.w_fetch_chunk(chunk_id.as_slice())?; - let (more_chunks, new_state_sync_info) = target_db.w_apply_chunk(state_sync_info, (chunk_id.as_slice(), ops), tx)?; + let ops = source_db.fetch_chunk(chunk_id.as_slice(), source_tx)?; + let (more_chunks, new_state_sync_info) = target_db.apply_chunk(state_sync_info, (chunk_id.as_slice(), ops), target_tx)?; state_sync_info = new_state_sync_info; chunk_queue.extend(more_chunks); } From 511c7cf65910363b931e5eec6a797b5de57f7ff1 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Fri, 26 Apr 2024 17:43:53 +0300 Subject: [PATCH 15/30] final work --- grovedb/src/lib.rs | 125 ++++++++++++++++++++----------- tutorials/src/bin/replication.rs | 2 +- 2 files changed, 81 insertions(+), 46 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index e957ff49..fb623805 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -240,17 +240,26 @@ pub struct GroveDb { version: i32 } +// Struct governing state sync pub struct StateSyncInfo<'db> { + // Current Chunk restorer restorer: Option>>, + // Set of processed prefixes (Path digests) processed_prefixes :BTreeSet, + // Current processed prefix (Path digest) current_prefix: Option, + // Set of global chunk ids requested to be fetched and pending for processing. For the description of global chunk id check fetch_chunk(). pending_chunks :BTreeSet>, + // Number of processed chunks in current prefix (Path digest) num_processed_chunks: usize, } pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; +// Struct containing information about current subtrees found in GroveDB pub struct SubtreesMetadata { + // Map of Prefix (Path digest) -> (Actual path, Parent Subtree actual_value_hash, Parent Subtree elem_value_hash) + // Note: Parent Subtree actual_value_hash, Parent Subtree elem_value_hash are needed when verifying the new constructed subtree after wards. pub data: BTreeMap>, CryptoHash, CryptoHash)> } @@ -266,7 +275,7 @@ impl fmt::Debug for SubtreesMetadata { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for (prefix, metadata) in self.data.iter() { let metadata_path = &metadata.0; - let metadata_path_str = s_util_path_to_string(&metadata_path); + let metadata_path_str = util_path_to_string(&metadata_path); write!(f, " prefix:{:?} -> path:{:?}\n", hex::encode(prefix), metadata_path_str); } Ok(()) @@ -1060,6 +1069,56 @@ impl GroveDb { Ok(issues) } + // Returns the discovered subtrees found recursively along with their associated metadata + // Params: + // tx: Transaction. Function returns the data by opening merks at given tx. + // TODO: Add a SubTreePath as param and start searching from that path instead of root (as it is now) + pub fn get_subtrees_metadata<'db>( + &'db self, + tx: &'db Transaction, + ) -> Result { + let mut subtrees_metadata = crate::SubtreesMetadata::new(); + + let subtrees_root = self.find_subtrees(&SubtreePath::empty(), Some(tx)).value?; + for subtree in subtrees_root.into_iter() { + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); + + let current_path = SubtreePath::from(path); + + let parent_path_opt = current_path.derive_parent(); + if (parent_path_opt.is_some()) { + let parent_path = parent_path_opt.unwrap().0; + let parent_merk = self.open_transactional_merk_at_path(parent_path, tx, None).value?; + let parent_key = subtree.last().unwrap(); + let (elem_value, elem_value_hash) = parent_merk + .get_value_and_value_hash( + parent_key, + true, + None::<&fn(&[u8]) -> Option>, + ).value.expect("should get value hash").expect("value hash should be some"); + + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert(prefix, (current_path.to_vec(), actual_value_hash, elem_value_hash)); + } + else { + subtrees_metadata.data.insert(prefix, (current_path.to_vec(), CryptoHash::default(), CryptoHash::default())); + } + } + Ok(subtrees_metadata) + } + + // Fetch a chunk by global chunk id (should be called by ABCI when LoadSnapshotChunk method is called) + // Params: + // global_chunk_id: Global chunk id in the following format: [SUBTREE_PREFIX:CHUNK_ID] + // SUBTREE_PREFIX: 32 bytes (mandatory) (All zeros = Root subtree) + // CHUNK_ID: 0.. bytes (optional) Traversal instructions to the root of the given chunk. + // Traversal instructions are "1" for left, and "0" for right. + // TODO: Compact CHUNK_ID into bitset for size optimization as a subtree can be big hence traversal instructions for the deepest chunks + // tx: Transaction. Function returns the data by opening merks at given tx. + // TODO: Make this tx optional: None -> Use latest data + // Returns the Chunk proof operators for the requested chunk pub fn fetch_chunk<'db>( &'db self, global_chunk_id: &[u8], @@ -1078,8 +1137,7 @@ impl GroveDb { array.copy_from_slice(chunk_prefix); let chunk_prefix_key: SubtreePrefix = array; - let tx = self.start_transaction(); - let subtrees_metadata = self.get_subtrees_metadata(&SubtreePath::empty(), &tx)?; + let subtrees_metadata = self.get_subtrees_metadata(&tx)?; match subtrees_metadata.data.get(&chunk_prefix_key) { Some(path_data) => { @@ -1124,43 +1182,12 @@ impl GroveDb { } } - pub fn get_subtrees_metadata<'db, B: AsRef<[u8]>>( - &'db self, - path: &SubtreePath, - tx: &'db Transaction, - ) -> Result { - let mut subtrees_metadata = crate::SubtreesMetadata::new(); - - let subtrees_root = self.find_subtrees(&SubtreePath::empty(), Some(tx)).value?; - for subtree in subtrees_root.into_iter() { - let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); - let path: &[&[u8]] = &subtree_path; - let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); - - let current_path = SubtreePath::from(path); - - let parent_path_opt = current_path.derive_parent(); - if (parent_path_opt.is_some()) { - let parent_path = parent_path_opt.unwrap().0; - let parent_merk = self.open_transactional_merk_at_path(parent_path, tx, None).value?; - let parent_key = subtree.last().unwrap(); - let (elem_value, elem_value_hash) = parent_merk - .get_value_and_value_hash( - parent_key, - true, - None::<&fn(&[u8]) -> Option>, - ).value.expect("should get value hash").expect("value hash should be some"); - - let actual_value_hash = value_hash(&elem_value).unwrap(); - subtrees_metadata.data.insert(prefix, (current_path.to_vec(), actual_value_hash, elem_value_hash)); - } - else { - subtrees_metadata.data.insert(prefix, (current_path.to_vec(), CryptoHash::default(), CryptoHash::default())); - } - } - Ok(subtrees_metadata) - } - + // Starts a state sync process (should be called by ABCI when OfferSnapshot method is called) + // Params: + // state_sync_info: Consumed StateSyncInfo + // app_hash: Snapshot's AppHash + // tx: Transaction for the state sync + // Returns the first set of global chunk ids that can be fetched from sources (+ the StateSyncInfo transferring ownership back to the caller) pub fn start_snapshot_syncing<'db>( &'db self, mut state_sync_info: StateSyncInfo<'db>, @@ -1196,6 +1223,12 @@ impl GroveDb { Ok((res, state_sync_info)) } + // Apply a chunk (should be called by ABCI when ApplySnapshotChunk method is called) + // Params: + // state_sync_info: Consumed StateSyncInfo + // chunk: (Global chunk id, Chunk proof operators) + // tx: Transaction for the state sync + // Returns the next set of global chunk ids that can be fetched from sources (+ the StateSyncInfo transferring ownership back to the caller) pub fn apply_chunk<'db>( &'db self, mut state_sync_info: StateSyncInfo<'db>, @@ -1261,10 +1294,10 @@ impl GroveDb { } state_sync_info.processed_prefixes.insert(current_prefix); - let subtrees_metadata = self.get_subtrees_metadata(&SubtreePath::empty(), tx)?; + let subtrees_metadata = self.get_subtrees_metadata(tx)?; if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { let v_path = &value.0; - println!(" path:{:?} done", s_util_path_to_string(&value.0)); + println!(" path:{:?} done", util_path_to_string(&value.0)); } for (prefix, prefix_metadata) in &subtrees_metadata.data { @@ -1299,7 +1332,8 @@ impl GroveDb { } } -pub fn s_util_path_to_string( +// Converts a path into a human-readable string (for debuting) +pub fn util_path_to_string( path: &Vec>, ) -> Vec { let mut subtree_path_str: Vec = vec![]; @@ -1310,17 +1344,18 @@ pub fn s_util_path_to_string( subtree_path_str } +// Splits the given global chunk id into [SUBTREE_PREFIX:CHUNK_ID] pub fn util_split_global_chunk_id( global_chunk_id: &[u8], ) -> Result<(SubtreePrefix, String), Error> { let CHUNK_PREFIX_LENGTH: usize = 32; - if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { + if global_chunk_id.len() < CHUNK_PREFIX_LENGTH { return Err(Error::CorruptedData( "expected global chunk id of at least 32 length".to_string(), )); } - let (chunk_prefix, chunk_id) = global_chunk_id.split_at(32); + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(CHUNK_PREFIX_LENGTH); let mut array = [0u8; 32]; array.copy_from_slice(chunk_prefix); let chunk_prefix_key: SubtreePrefix = array; diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index e5bafd08..3d523c90 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -79,7 +79,7 @@ fn main() { println!("\n######### source_subtree_metadata"); let source_tx = db_source.start_transaction(); - let subtrees_metadata = db_source.get_subtrees_metadata(&SubtreePath::empty(), &source_tx).unwrap(); + let subtrees_metadata = db_source.get_subtrees_metadata(&source_tx).unwrap(); println!("{:?}", subtrees_metadata); println!("\n######### db_checkpoint_0 -> db_copy state sync"); From 2f7b65ad9228f13d27bf4bc5115cfbf8c9d0c74e Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Fri, 26 Apr 2024 17:52:42 +0300 Subject: [PATCH 16/30] cargo fmt --- grovedb/src/lib.rs | 169 ++++++++++++++++------------ grovedb/src/operations/auxiliary.rs | 10 +- merk/src/merk/mod.rs | 44 +++++--- merk/src/merk/restore.rs | 22 ++-- 4 files changed, 146 insertions(+), 99 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index fb623805..471f418e 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -167,9 +167,9 @@ mod versioning; #[cfg(feature = "full")] mod visualize; -#[cfg(feature = "full")] -use std::{collections::HashMap, option::Option::None, path::Path, fmt}; use std::collections::{BTreeMap, BTreeSet}; +#[cfg(feature = "full")] +use std::{collections::HashMap, fmt, option::Option::None, path::Path}; #[cfg(any(feature = "full", feature = "verify"))] use element::helpers; @@ -177,11 +177,11 @@ use element::helpers; pub use element::Element; #[cfg(feature = "full")] pub use element::ElementFlags; +use grovedb_costs::storage_cost::key_value_cost::KeyValueStorageCost; #[cfg(feature = "full")] use grovedb_costs::{ cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, }; -use grovedb_costs::storage_cost::key_value_cost::KeyValueStorageCost; #[cfg(feature = "estimated_costs")] pub use grovedb_merk::estimated_costs::{ average_case_costs::{ @@ -191,6 +191,7 @@ pub use grovedb_merk::estimated_costs::{ }; #[cfg(any(feature = "full", feature = "verify"))] pub use grovedb_merk::proofs::query::query_item::QueryItem; +use grovedb_merk::proofs::Op; #[cfg(any(feature = "full", feature = "verify"))] pub use grovedb_merk::proofs::Query; #[cfg(feature = "full")] @@ -202,7 +203,6 @@ use grovedb_merk::{ BatchEntry, CryptoHash, KVIterator, Merk, }; use grovedb_merk::{ChunkProducer, Restorer}; -use grovedb_merk::proofs::{Op}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext; @@ -237,7 +237,7 @@ pub struct GroveDb { #[cfg(feature = "full")] db: RocksDbStorage, - version: i32 + version: i32, } // Struct governing state sync @@ -245,11 +245,11 @@ pub struct StateSyncInfo<'db> { // Current Chunk restorer restorer: Option>>, // Set of processed prefixes (Path digests) - processed_prefixes :BTreeSet, + processed_prefixes: BTreeSet, // Current processed prefix (Path digest) current_prefix: Option, // Set of global chunk ids requested to be fetched and pending for processing. For the description of global chunk id check fetch_chunk(). - pending_chunks :BTreeSet>, + pending_chunks: BTreeSet>, // Number of processed chunks in current prefix (Path digest) num_processed_chunks: usize, } @@ -260,7 +260,7 @@ pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; pub struct SubtreesMetadata { // Map of Prefix (Path digest) -> (Actual path, Parent Subtree actual_value_hash, Parent Subtree elem_value_hash) // Note: Parent Subtree actual_value_hash, Parent Subtree elem_value_hash are needed when verifying the new constructed subtree after wards. - pub data: BTreeMap>, CryptoHash, CryptoHash)> + pub data: BTreeMap>, CryptoHash, CryptoHash)>, } impl SubtreesMetadata { @@ -276,7 +276,12 @@ impl fmt::Debug for SubtreesMetadata { for (prefix, metadata) in self.data.iter() { let metadata_path = &metadata.0; let metadata_path_str = util_path_to_string(&metadata_path); - write!(f, " prefix:{:?} -> path:{:?}\n", hex::encode(prefix), metadata_path_str); + write!( + f, + " prefix:{:?} -> path:{:?}\n", + hex::encode(prefix), + metadata_path_str + ); } Ok(()) } @@ -291,9 +296,7 @@ pub type TransactionArg<'db, 'a> = Option<&'a Transaction<'db>>; #[cfg(feature = "full")] impl GroveDb { - pub fn create_state_sync_info( - &self, - ) -> StateSyncInfo { + pub fn create_state_sync_info(&self) -> StateSyncInfo { let pending_chunks = BTreeSet::new(); let processed_prefixes = BTreeSet::new(); StateSyncInfo { @@ -302,12 +305,10 @@ impl GroveDb { current_prefix: None, pending_chunks, num_processed_chunks: 0, - } } /// Opens a given path pub fn open>(path: P) -> Result { - let db = RocksDbStorage::default_rocksdb_with_path(path)?; Ok(GroveDb { db, version: 1 }) } @@ -1090,20 +1091,34 @@ impl GroveDb { let parent_path_opt = current_path.derive_parent(); if (parent_path_opt.is_some()) { let parent_path = parent_path_opt.unwrap().0; - let parent_merk = self.open_transactional_merk_at_path(parent_path, tx, None).value?; + let parent_merk = self + .open_transactional_merk_at_path(parent_path, tx, None) + .value?; let parent_key = subtree.last().unwrap(); let (elem_value, elem_value_hash) = parent_merk .get_value_and_value_hash( parent_key, true, None::<&fn(&[u8]) -> Option>, - ).value.expect("should get value hash").expect("value hash should be some"); - - let actual_value_hash = value_hash(&elem_value).unwrap(); - subtrees_metadata.data.insert(prefix, (current_path.to_vec(), actual_value_hash, elem_value_hash)); - } - else { - subtrees_metadata.data.insert(prefix, (current_path.to_vec(), CryptoHash::default(), CryptoHash::default())); + ) + .value + .expect("should get value hash") + .expect("value hash should be some"); + + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert( + prefix, + (current_path.to_vec(), actual_value_hash, elem_value_hash), + ); + } else { + subtrees_metadata.data.insert( + prefix, + ( + current_path.to_vec(), + CryptoHash::default(), + CryptoHash::default(), + ), + ); } } Ok(subtrees_metadata) @@ -1145,7 +1160,9 @@ impl GroveDb { let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; - let merk = self.open_transactional_merk_at_path(path.into(), &tx, None).value?; + let merk = self + .open_transactional_merk_at_path(path.into(), &tx, None) + .value?; if (merk.is_empty_tree().unwrap()) { return Ok(vec![]); @@ -1154,11 +1171,10 @@ impl GroveDb { let mut chunk_producer_res = ChunkProducer::new(&merk); match chunk_producer_res { Ok(mut chunk_producer) => { - let chunk_res = chunk_producer.chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()); + let chunk_res = chunk_producer + .chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()); match chunk_res { - Ok((chunk, _)) => { - Ok(chunk) - } + Ok((chunk, _)) => Ok(chunk), Err(_) => { return Err(Error::CorruptedData( "Unable to create to load chunk".to_string(), @@ -1172,12 +1188,9 @@ impl GroveDb { )); } } - - }, + } None => { - return Err(Error::CorruptedData( - "Prefix not found".to_string(), - )); + return Err(Error::CorruptedData("Prefix not found".to_string())); } } } @@ -1193,14 +1206,21 @@ impl GroveDb { mut state_sync_info: StateSyncInfo<'db>, app_hash: CryptoHash, tx: &'db Transaction, - ) -> Result<(Vec>, StateSyncInfo), Error>{ + ) -> Result<(Vec>, StateSyncInfo), Error> { let mut res = vec![]; - match (&mut state_sync_info.restorer, &state_sync_info.current_prefix) { + match ( + &mut state_sync_info.restorer, + &state_sync_info.current_prefix, + ) { (None, None) => { - if state_sync_info.pending_chunks.is_empty() && state_sync_info.processed_prefixes.is_empty() { + if state_sync_info.pending_chunks.is_empty() + && state_sync_info.processed_prefixes.is_empty() + { let root_prefix = [0u8; 32]; - let merk = self.open_merk_for_replication(SubtreePath::empty(), tx).unwrap(); + let merk = self + .open_merk_for_replication(SubtreePath::empty(), tx) + .unwrap(); let restorer = Restorer::new(merk, app_hash, None); state_sync_info.restorer = Some(restorer); state_sync_info.current_prefix = Some(root_prefix); @@ -1208,11 +1228,9 @@ impl GroveDb { res.push(root_prefix.to_vec()); } else { - return Err(Error::InternalError( - "Invalid internal state sync info", - )); + return Err(Error::InternalError("Invalid internal state sync info")); } - }, + } _ => { return Err(Error::InternalError( "GroveDB has already started a snapshot syncing", @@ -1234,18 +1252,19 @@ impl GroveDb { mut state_sync_info: StateSyncInfo<'db>, chunk: (&[u8], Vec), tx: &'db Transaction, - ) -> Result<(Vec>, StateSyncInfo), Error>{ + ) -> Result<(Vec>, StateSyncInfo), Error> { let mut res = vec![]; let (global_chunk_id, chunk_data) = chunk; let (chunk_prefix, chunk_id) = util_split_global_chunk_id(&global_chunk_id)?; - match (&mut state_sync_info.restorer, &state_sync_info.current_prefix) { + match ( + &mut state_sync_info.restorer, + &state_sync_info.current_prefix, + ) { (Some(restorer), Some(ref current_prefix)) => { if (*current_prefix != chunk_prefix) { - return Err(Error::InternalError( - "Invalid incoming prefix", - )); + return Err(Error::InternalError("Invalid incoming prefix")); } if (!state_sync_info.pending_chunks.contains(global_chunk_id)) { return Err(Error::InternalError( @@ -1260,36 +1279,35 @@ impl GroveDb { for next_chunk_id in next_chunk_ids { let mut next_global_chunk_id = chunk_prefix.to_vec(); next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); - state_sync_info.pending_chunks.insert(next_global_chunk_id.clone()); + state_sync_info + .pending_chunks + .insert(next_global_chunk_id.clone()); res.push(next_global_chunk_id); } - }, + } _ => { - return Err(Error::InternalError( - "Unable to process incoming chunk", - )); - }, + return Err(Error::InternalError("Unable to process incoming chunk")); + } }; } } _ => { - return Err(Error::InternalError( - "GroveDB is not in syncing mode", - )); + return Err(Error::InternalError("GroveDB is not in syncing mode")); } } if (res.is_empty()) { if (!state_sync_info.pending_chunks.is_empty()) { - return Ok((res, state_sync_info)) + return Ok((res, state_sync_info)); } - match (state_sync_info.restorer.take(), state_sync_info.current_prefix.take()) { + match ( + state_sync_info.restorer.take(), + state_sync_info.current_prefix.take(), + ) { (Some(restorer), Some(current_prefix)) => { if (state_sync_info.num_processed_chunks > 0) { if (!restorer.finalize().is_ok()) { - return Err(Error::InternalError( - "Unable to finalize merk", - )); + return Err(Error::InternalError("Unable to finalize merk")); } } state_sync_info.processed_prefixes.insert(current_prefix); @@ -1302,28 +1320,31 @@ impl GroveDb { for (prefix, prefix_metadata) in &subtrees_metadata.data { if !state_sync_info.processed_prefixes.contains(prefix) { - let (current_path, s_actual_value_hash, s_elem_value_hash) = &prefix_metadata; + let (current_path, s_actual_value_hash, s_elem_value_hash) = + &prefix_metadata; - let subtree_path: Vec<&[u8]> = current_path.iter().map(|vec| vec.as_slice()).collect(); + let subtree_path: Vec<&[u8]> = + current_path.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; let merk = self.open_merk_for_replication(path.into(), tx).unwrap(); - let restorer = Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + let restorer = + Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); state_sync_info.restorer = Some(restorer); state_sync_info.current_prefix = Some(*prefix); state_sync_info.num_processed_chunks = 0; let root_chunk_prefix = prefix.to_vec(); - state_sync_info.pending_chunks.insert(root_chunk_prefix.clone()); + state_sync_info + .pending_chunks + .insert(root_chunk_prefix.clone()); res.push(root_chunk_prefix); break; } } - }, + } _ => { - return Err(Error::InternalError( - "Unable to finalize tree", - )); + return Err(Error::InternalError("Unable to finalize tree")); } } } @@ -1333,9 +1354,7 @@ impl GroveDb { } // Converts a path into a human-readable string (for debuting) -pub fn util_path_to_string( - path: &Vec>, -) -> Vec { +pub fn util_path_to_string(path: &Vec>) -> Vec { let mut subtree_path_str: Vec = vec![]; for subtree in path.to_vec() { let string = std::str::from_utf8(&subtree).unwrap(); @@ -1362,8 +1381,10 @@ pub fn util_split_global_chunk_id( let str_chunk_id = String::from_utf8(chunk_id.to_vec()); match str_chunk_id { Ok(s) => Ok((chunk_prefix_key, s)), - Err(_) => return Err(Error::CorruptedData( - "unable to convert chunk id to string".to_string(), - )), + Err(_) => { + return Err(Error::CorruptedData( + "unable to convert chunk id to string".to_string(), + )) + } } -} \ No newline at end of file +} diff --git a/grovedb/src/operations/auxiliary.rs b/grovedb/src/operations/auxiliary.rs index 6f3ec40a..2e0cbe69 100644 --- a/grovedb/src/operations/auxiliary.rs +++ b/grovedb/src/operations/auxiliary.rs @@ -30,18 +30,18 @@ #[cfg(feature = "full")] use grovedb_costs::{ - cost_return_on_error_no_add, cost_return_on_error, storage_cost::key_value_cost::KeyValueStorageCost, CostResult, - CostsExt, OperationCost, + cost_return_on_error, cost_return_on_error_no_add, + storage_cost::key_value_cost::KeyValueStorageCost, CostResult, CostsExt, OperationCost, }; +use grovedb_merk::{proofs::Query, KVIterator}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::StorageContext; use grovedb_storage::{Storage, StorageBatch}; -use grovedb_merk::{proofs::Query, KVIterator}; -#[cfg(feature = "full")] -use crate::{util::meta_storage_context_optional_tx, Error, GroveDb, TransactionArg, Element}; use crate::util::storage_context_optional_tx; +#[cfg(feature = "full")] +use crate::{util::meta_storage_context_optional_tx, Element, Error, GroveDb, TransactionArg}; #[cfg(feature = "full")] impl GroveDb { diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index 3ccb787c..ecd66dac 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -57,16 +57,25 @@ use grovedb_costs::{ use grovedb_storage::{self, Batch, RawIterator, StorageContext}; use source::MerkSource; -use crate::{error::Error, merk::{defaults::ROOT_KEY_KEY, options::MerkOptions}, proofs::{ - chunk::{ - chunk::{LEFT, RIGHT}, - util::traversal_instruction_as_string, +use crate::{ + error::Error, + merk::{defaults::ROOT_KEY_KEY, options::MerkOptions}, + proofs::{ + chunk::{ + chunk::{LEFT, RIGHT}, + util::traversal_instruction_as_string, + }, + query::query_item::QueryItem, + Query, }, - query::query_item::QueryItem, - Query, -}, tree::{ - kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, -}, Error::{CostsError, EdError, StorageError}, Link, MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, BatchEntry}; + tree::{ + kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, + }, + BatchEntry, + Error::{CostsError, EdError, StorageError}, + Link, + MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, +}; /// Key update types pub struct KeyUpdates { @@ -545,7 +554,10 @@ where /// hash values are computed correctly, heights are accurate and links /// consistent with backing store. // TODO: define the return types - pub fn verify(&self, skip_sum_checks: bool) -> (BTreeMap, BTreeMap>) { + pub fn verify( + &self, + skip_sum_checks: bool, + ) -> (BTreeMap, BTreeMap>) { let tree = self.tree.take(); let mut bad_link_map: BTreeMap = BTreeMap::new(); @@ -582,7 +594,7 @@ where traversal_instruction, bad_link_map, parent_keys, - skip_sum_checks + skip_sum_checks, ); traversal_instruction.pop(); } @@ -595,7 +607,7 @@ where traversal_instruction, bad_link_map, parent_keys, - skip_sum_checks + skip_sum_checks, ); traversal_instruction.pop(); } @@ -667,7 +679,13 @@ where // TODO: check child heights // all checks passed, recurse - self.verify_tree(&node, traversal_instruction, bad_link_map, parent_keys, skip_sum_checks); + self.verify_tree( + &node, + traversal_instruction, + bad_link_map, + parent_keys, + skip_sum_checks, + ); } } diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 37a775f1..a00dae2b 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -29,11 +29,13 @@ //! Provides `Restorer`, which can create a replica of a Merk instance by //! receiving chunk proofs. -use std::collections::BTreeMap; use grovedb_costs::cost_return_on_error; +use std::collections::BTreeMap; use grovedb_storage::{Batch, StorageContext}; +use crate::merk::committer::MerkCommitter; +use crate::tree::{combine_hash, NoopCommit}; use crate::{ merk, merk::MerkSource, @@ -52,8 +54,6 @@ use crate::{ Error::{CostsError, StorageError}, Link, Merk, }; -use crate::merk::committer::MerkCommitter; -use crate::tree::{combine_hash, NoopCommit}; /// Restorer handles verification of chunks and replication of Merk trees. /// Chunks can be processed randomly as long as their parent has been processed @@ -69,7 +69,11 @@ pub struct Restorer { impl<'db, S: StorageContext<'db>> Restorer { /// Initializes a new chunk restorer with the expected root hash for the /// first chunk - pub fn new(merk: Merk, expected_root_hash: CryptoHash, parent_key_value_hash: Option) -> Self { + pub fn new( + merk: Merk, + expected_root_hash: CryptoHash, + parent_key_value_hash: Option, + ) -> Self { let mut chunk_id_to_root_hash = BTreeMap::new(); chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); Self { @@ -152,7 +156,11 @@ impl<'db, S: StorageContext<'db>> Restorer { /// Verifies the structure of a chunk and ensures the chunk matches the /// expected root hash - fn verify_chunk(chunk: Vec, expected_root_hash: &CryptoHash, parent_key_value_hash_opt: &Option) -> Result { + fn verify_chunk( + chunk: Vec, + expected_root_hash: &CryptoHash, + parent_key_value_hash_opt: &Option, + ) -> Result { let chunk_len = chunk.len(); let mut kv_count = 0; let mut hash_count = 0; @@ -187,7 +195,7 @@ impl<'db, S: StorageContext<'db>> Restorer { "chunk doesn't match expected root hash", ))); } - }, + } None => { if &tree.hash().unwrap() != expected_root_hash { return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( @@ -509,6 +517,7 @@ mod tests { }; use super::*; + use crate::test_utils::{make_batch_seq_with_same_value, make_batch_seq_with_value}; use crate::{ merk::chunks::ChunkProducer, proofs::chunk::{ @@ -518,7 +527,6 @@ mod tests { Error::ChunkRestoringError, Merk, PanicSource, }; - use crate::test_utils::{make_batch_seq_with_same_value, make_batch_seq_with_value}; #[test] fn test_chunk_verification_non_avl_tree() { From 599553f21ab27e2936d43542da6c6acdb5bdb623 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Fri, 26 Apr 2024 17:54:53 +0300 Subject: [PATCH 17/30] more fmt --- grovedb/src/lib.rs | 50 ++++++++++++++++++++++------------------ merk/src/merk/restore.rs | 13 +++++------ 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 471f418e..9e8eae6b 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -171,6 +171,7 @@ use std::collections::{BTreeMap, BTreeSet}; #[cfg(feature = "full")] use std::{collections::HashMap, fmt, option::Option::None, path::Path}; +use blake3; #[cfg(any(feature = "full", feature = "verify"))] use element::helpers; #[cfg(any(feature = "full", feature = "verify"))] @@ -191,7 +192,6 @@ pub use grovedb_merk::estimated_costs::{ }; #[cfg(any(feature = "full", feature = "verify"))] pub use grovedb_merk::proofs::query::query_item::QueryItem; -use grovedb_merk::proofs::Op; #[cfg(any(feature = "full", feature = "verify"))] pub use grovedb_merk::proofs::Query; #[cfg(feature = "full")] @@ -202,7 +202,7 @@ use grovedb_merk::{ tree::{combine_hash, value_hash}, BatchEntry, CryptoHash, KVIterator, Merk, }; -use grovedb_merk::{ChunkProducer, Restorer}; +use grovedb_merk::{proofs::Op, ChunkProducer, Restorer}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext; @@ -227,7 +227,6 @@ use crate::helpers::raw_decode; #[cfg(feature = "full")] use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; use crate::Error::MerkError; -use blake3; #[cfg(feature = "full")] type Hash = [u8; 32]; @@ -248,7 +247,8 @@ pub struct StateSyncInfo<'db> { processed_prefixes: BTreeSet, // Current processed prefix (Path digest) current_prefix: Option, - // Set of global chunk ids requested to be fetched and pending for processing. For the description of global chunk id check fetch_chunk(). + // Set of global chunk ids requested to be fetched and pending for processing. For the + // description of global chunk id check fetch_chunk(). pending_chunks: BTreeSet>, // Number of processed chunks in current prefix (Path digest) num_processed_chunks: usize, @@ -258,8 +258,9 @@ pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; // Struct containing information about current subtrees found in GroveDB pub struct SubtreesMetadata { - // Map of Prefix (Path digest) -> (Actual path, Parent Subtree actual_value_hash, Parent Subtree elem_value_hash) - // Note: Parent Subtree actual_value_hash, Parent Subtree elem_value_hash are needed when verifying the new constructed subtree after wards. + // Map of Prefix (Path digest) -> (Actual path, Parent Subtree actual_value_hash, Parent + // Subtree elem_value_hash) Note: Parent Subtree actual_value_hash, Parent Subtree + // elem_value_hash are needed when verifying the new constructed subtree after wards. pub data: BTreeMap>, CryptoHash, CryptoHash)>, } @@ -307,6 +308,7 @@ impl GroveDb { num_processed_chunks: 0, } } + /// Opens a given path pub fn open>(path: P) -> Result { let db = RocksDbStorage::default_rocksdb_with_path(path)?; @@ -1070,10 +1072,11 @@ impl GroveDb { Ok(issues) } - // Returns the discovered subtrees found recursively along with their associated metadata - // Params: + // Returns the discovered subtrees found recursively along with their associated + // metadata Params: // tx: Transaction. Function returns the data by opening merks at given tx. - // TODO: Add a SubTreePath as param and start searching from that path instead of root (as it is now) + // TODO: Add a SubTreePath as param and start searching from that path instead + // of root (as it is now) pub fn get_subtrees_metadata<'db>( &'db self, tx: &'db Transaction, @@ -1124,13 +1127,14 @@ impl GroveDb { Ok(subtrees_metadata) } - // Fetch a chunk by global chunk id (should be called by ABCI when LoadSnapshotChunk method is called) - // Params: - // global_chunk_id: Global chunk id in the following format: [SUBTREE_PREFIX:CHUNK_ID] - // SUBTREE_PREFIX: 32 bytes (mandatory) (All zeros = Root subtree) - // CHUNK_ID: 0.. bytes (optional) Traversal instructions to the root of the given chunk. - // Traversal instructions are "1" for left, and "0" for right. - // TODO: Compact CHUNK_ID into bitset for size optimization as a subtree can be big hence traversal instructions for the deepest chunks + // Fetch a chunk by global chunk id (should be called by ABCI when + // LoadSnapshotChunk method is called) Params: + // global_chunk_id: Global chunk id in the following format: + // [SUBTREE_PREFIX:CHUNK_ID] SUBTREE_PREFIX: 32 bytes (mandatory) (All zeros + // = Root subtree) CHUNK_ID: 0.. bytes (optional) Traversal instructions to + // the root of the given chunk. Traversal instructions are "1" for left, and + // "0" for right. TODO: Compact CHUNK_ID into bitset for size optimization + // as a subtree can be big hence traversal instructions for the deepest chunks // tx: Transaction. Function returns the data by opening merks at given tx. // TODO: Make this tx optional: None -> Use latest data // Returns the Chunk proof operators for the requested chunk @@ -1195,12 +1199,13 @@ impl GroveDb { } } - // Starts a state sync process (should be called by ABCI when OfferSnapshot method is called) - // Params: + // Starts a state sync process (should be called by ABCI when OfferSnapshot + // method is called) Params: // state_sync_info: Consumed StateSyncInfo // app_hash: Snapshot's AppHash // tx: Transaction for the state sync - // Returns the first set of global chunk ids that can be fetched from sources (+ the StateSyncInfo transferring ownership back to the caller) + // Returns the first set of global chunk ids that can be fetched from sources (+ + // the StateSyncInfo transferring ownership back to the caller) pub fn start_snapshot_syncing<'db>( &'db self, mut state_sync_info: StateSyncInfo<'db>, @@ -1241,12 +1246,13 @@ impl GroveDb { Ok((res, state_sync_info)) } - // Apply a chunk (should be called by ABCI when ApplySnapshotChunk method is called) - // Params: + // Apply a chunk (should be called by ABCI when ApplySnapshotChunk method is + // called) Params: // state_sync_info: Consumed StateSyncInfo // chunk: (Global chunk id, Chunk proof operators) // tx: Transaction for the state sync - // Returns the next set of global chunk ids that can be fetched from sources (+ the StateSyncInfo transferring ownership back to the caller) + // Returns the next set of global chunk ids that can be fetched from sources (+ + // the StateSyncInfo transferring ownership back to the caller) pub fn apply_chunk<'db>( &'db self, mut state_sync_info: StateSyncInfo<'db>, diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index a00dae2b..3fd42a1d 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -29,16 +29,14 @@ //! Provides `Restorer`, which can create a replica of a Merk instance by //! receiving chunk proofs. -use grovedb_costs::cost_return_on_error; use std::collections::BTreeMap; +use grovedb_costs::cost_return_on_error; use grovedb_storage::{Batch, StorageContext}; -use crate::merk::committer::MerkCommitter; -use crate::tree::{combine_hash, NoopCommit}; use crate::{ merk, - merk::MerkSource, + merk::{committer::MerkCommitter, MerkSource}, proofs::{ chunk::{ chunk::{LEFT, RIGHT}, @@ -49,7 +47,7 @@ use crate::{ tree::{execute, Child, Tree as ProofTree}, Node, Op, }, - tree::{kv::ValueDefinedCostType, RefWalker, TreeNode}, + tree::{combine_hash, kv::ValueDefinedCostType, NoopCommit, RefWalker, TreeNode}, CryptoHash, Error, Error::{CostsError, StorageError}, Link, Merk, @@ -517,13 +515,14 @@ mod tests { }; use super::*; - use crate::test_utils::{make_batch_seq_with_same_value, make_batch_seq_with_value}; use crate::{ merk::chunks::ChunkProducer, proofs::chunk::{ chunk::tests::traverse_get_node_hash, error::ChunkError::InvalidChunkProof, }, - test_utils::{make_batch_seq, TempMerk}, + test_utils::{ + make_batch_seq, make_batch_seq_with_same_value, make_batch_seq_with_value, TempMerk, + }, Error::ChunkRestoringError, Merk, PanicSource, }; From 4c63644e742ccc04d1548f9f16f63494037b5f7a Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 10:59:00 +0300 Subject: [PATCH 18/30] clippy fixes --- grovedb/src/batch/mod.rs | 2 +- grovedb/src/lib.rs | 75 ++++++++++++++-------------- grovedb/src/operations/auxiliary.rs | 1 - grovedb/src/operations/delete/mod.rs | 2 +- merk/src/merk/chunks.rs | 12 ++--- merk/src/merk/mod.rs | 13 ++--- merk/src/merk/restore.rs | 21 ++++---- merk/src/proofs/chunk/chunk_op.rs | 4 +- merk/src/proofs/chunk/util.rs | 2 +- 9 files changed, 61 insertions(+), 71 deletions(-) diff --git a/grovedb/src/batch/mod.rs b/grovedb/src/batch/mod.rs index 70c47619..3eac9289 100644 --- a/grovedb/src/batch/mod.rs +++ b/grovedb/src/batch/mod.rs @@ -560,7 +560,7 @@ impl GroveDbOp { } /// Verify consistency of operations - pub fn verify_consistency_of_operations(ops: &Vec) -> GroveDbOpConsistencyResults { + pub fn verify_consistency_of_operations(ops: &[GroveDbOp]) -> GroveDbOpConsistencyResults { let ops_len = ops.len(); // operations should not have any duplicates let mut repeated_ops = vec![]; diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 9e8eae6b..9de0b339 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -171,14 +171,12 @@ use std::collections::{BTreeMap, BTreeSet}; #[cfg(feature = "full")] use std::{collections::HashMap, fmt, option::Option::None, path::Path}; -use blake3; #[cfg(any(feature = "full", feature = "verify"))] use element::helpers; #[cfg(any(feature = "full", feature = "verify"))] pub use element::Element; #[cfg(feature = "full")] pub use element::ElementFlags; -use grovedb_costs::storage_cost::key_value_cost::KeyValueStorageCost; #[cfg(feature = "full")] use grovedb_costs::{ cost_return_on_error, cost_return_on_error_no_add, CostResult, CostsExt, OperationCost, @@ -235,8 +233,6 @@ type Hash = [u8; 32]; pub struct GroveDb { #[cfg(feature = "full")] db: RocksDbStorage, - - version: i32, } // Struct governing state sync @@ -272,12 +268,18 @@ impl SubtreesMetadata { } } +impl Default for SubtreesMetadata { + fn default() -> Self { + Self::new() + } +} + impl fmt::Debug for SubtreesMetadata { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for (prefix, metadata) in self.data.iter() { let metadata_path = &metadata.0; - let metadata_path_str = util_path_to_string(&metadata_path); - write!( + let metadata_path_str = util_path_to_string(metadata_path); + writeln!( f, " prefix:{:?} -> path:{:?}\n", hex::encode(prefix), @@ -312,7 +314,7 @@ impl GroveDb { /// Opens a given path pub fn open>(path: P) -> Result { let db = RocksDbStorage::default_rocksdb_with_path(path)?; - Ok(GroveDb { db, version: 1 }) + Ok(GroveDb { db }) } /// Uses raw iter to delete GroveDB key values pairs from rocksdb @@ -1092,7 +1094,7 @@ impl GroveDb { let current_path = SubtreePath::from(path); let parent_path_opt = current_path.derive_parent(); - if (parent_path_opt.is_some()) { + if parent_path_opt.is_some() { let parent_path = parent_path_opt.unwrap().0; let parent_merk = self .open_transactional_merk_at_path(parent_path, tx, None) @@ -1143,20 +1145,20 @@ impl GroveDb { global_chunk_id: &[u8], tx: &'db Transaction, ) -> Result, Error> { - let CHUNK_PREFIX_LENGTH: usize = 32; - if (global_chunk_id.len() < CHUNK_PREFIX_LENGTH) { + let chunk_prefix_length: usize = 32; + if global_chunk_id.len() < chunk_prefix_length { return Err(Error::CorruptedData( "expected global chunk id of at least 32 length".to_string(), )); } - let (chunk_prefix, chunk_id) = global_chunk_id.split_at(CHUNK_PREFIX_LENGTH); + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); let mut array = [0u8; 32]; array.copy_from_slice(chunk_prefix); let chunk_prefix_key: SubtreePrefix = array; - let subtrees_metadata = self.get_subtrees_metadata(&tx)?; + let subtrees_metadata = self.get_subtrees_metadata(tx)?; match subtrees_metadata.data.get(&chunk_prefix_key) { Some(path_data) => { @@ -1165,14 +1167,14 @@ impl GroveDb { let path: &[&[u8]] = &subtree_path; let merk = self - .open_transactional_merk_at_path(path.into(), &tx, None) + .open_transactional_merk_at_path(path.into(), tx, None) .value?; - if (merk.is_empty_tree().unwrap()) { + if merk.is_empty_tree().unwrap() { return Ok(vec![]); } - let mut chunk_producer_res = ChunkProducer::new(&merk); + let chunk_producer_res = ChunkProducer::new(&merk); match chunk_producer_res { Ok(mut chunk_producer) => { let chunk_res = chunk_producer @@ -1180,21 +1182,21 @@ impl GroveDb { match chunk_res { Ok((chunk, _)) => Ok(chunk), Err(_) => { - return Err(Error::CorruptedData( + Err(Error::CorruptedData( "Unable to create to load chunk".to_string(), - )); + )) } } } Err(_) => { - return Err(Error::CorruptedData( + Err(Error::CorruptedData( "Unable to create Chunk producer".to_string(), - )); + )) } } } None => { - return Err(Error::CorruptedData("Prefix not found".to_string())); + Err(Error::CorruptedData("Prefix not found".to_string())) } } } @@ -1262,23 +1264,23 @@ impl GroveDb { let mut res = vec![]; let (global_chunk_id, chunk_data) = chunk; - let (chunk_prefix, chunk_id) = util_split_global_chunk_id(&global_chunk_id)?; + let (chunk_prefix, chunk_id) = util_split_global_chunk_id(global_chunk_id)?; match ( &mut state_sync_info.restorer, &state_sync_info.current_prefix, ) { (Some(restorer), Some(ref current_prefix)) => { - if (*current_prefix != chunk_prefix) { + if *current_prefix != chunk_prefix { return Err(Error::InternalError("Invalid incoming prefix")); } - if (!state_sync_info.pending_chunks.contains(global_chunk_id)) { + if !state_sync_info.pending_chunks.contains(global_chunk_id) { return Err(Error::InternalError( "Incoming global_chunk_id not expected", )); } state_sync_info.pending_chunks.remove(global_chunk_id); - if (!chunk_data.is_empty()) { + if !chunk_data.is_empty() { match restorer.process_chunk(chunk_id.to_string(), chunk_data) { Ok(next_chunk_ids) => { state_sync_info.num_processed_chunks += 1; @@ -1302,8 +1304,8 @@ impl GroveDb { } } - if (res.is_empty()) { - if (!state_sync_info.pending_chunks.is_empty()) { + if res.is_empty() { + if !state_sync_info.pending_chunks.is_empty() { return Ok((res, state_sync_info)); } match ( @@ -1311,16 +1313,13 @@ impl GroveDb { state_sync_info.current_prefix.take(), ) { (Some(restorer), Some(current_prefix)) => { - if (state_sync_info.num_processed_chunks > 0) { - if (!restorer.finalize().is_ok()) { - return Err(Error::InternalError("Unable to finalize merk")); - } + if (state_sync_info.num_processed_chunks > 0) && (restorer.finalize().is_err()) { + return Err(Error::InternalError("Unable to finalize merk")); } state_sync_info.processed_prefixes.insert(current_prefix); let subtrees_metadata = self.get_subtrees_metadata(tx)?; if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { - let v_path = &value.0; println!(" path:{:?} done", util_path_to_string(&value.0)); } @@ -1360,10 +1359,10 @@ impl GroveDb { } // Converts a path into a human-readable string (for debuting) -pub fn util_path_to_string(path: &Vec>) -> Vec { +pub fn util_path_to_string(path: &[Vec]) -> Vec { let mut subtree_path_str: Vec = vec![]; - for subtree in path.to_vec() { - let string = std::str::from_utf8(&subtree).unwrap(); + for subtree in path { + let string = std::str::from_utf8(subtree).unwrap(); subtree_path_str.push(string.parse().unwrap()); } subtree_path_str @@ -1373,14 +1372,14 @@ pub fn util_path_to_string(path: &Vec>) -> Vec { pub fn util_split_global_chunk_id( global_chunk_id: &[u8], ) -> Result<(SubtreePrefix, String), Error> { - let CHUNK_PREFIX_LENGTH: usize = 32; - if global_chunk_id.len() < CHUNK_PREFIX_LENGTH { + let chunk_prefix_length: usize = 32; + if global_chunk_id.len() < chunk_prefix_length { return Err(Error::CorruptedData( "expected global chunk id of at least 32 length".to_string(), )); } - let (chunk_prefix, chunk_id) = global_chunk_id.split_at(CHUNK_PREFIX_LENGTH); + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); let mut array = [0u8; 32]; array.copy_from_slice(chunk_prefix); let chunk_prefix_key: SubtreePrefix = array; @@ -1388,7 +1387,7 @@ pub fn util_split_global_chunk_id( match str_chunk_id { Ok(s) => Ok((chunk_prefix_key, s)), Err(_) => { - return Err(Error::CorruptedData( + Err(Error::CorruptedData( "unable to convert chunk id to string".to_string(), )) } diff --git a/grovedb/src/operations/auxiliary.rs b/grovedb/src/operations/auxiliary.rs index 2e0cbe69..1b6b884d 100644 --- a/grovedb/src/operations/auxiliary.rs +++ b/grovedb/src/operations/auxiliary.rs @@ -33,7 +33,6 @@ use grovedb_costs::{ cost_return_on_error, cost_return_on_error_no_add, storage_cost::key_value_cost::KeyValueStorageCost, CostResult, CostsExt, OperationCost, }; -use grovedb_merk::{proofs::Query, KVIterator}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::StorageContext; diff --git a/grovedb/src/operations/delete/mod.rs b/grovedb/src/operations/delete/mod.rs index f3dcc6cd..233efc3a 100644 --- a/grovedb/src/operations/delete/mod.rs +++ b/grovedb/src/operations/delete/mod.rs @@ -59,7 +59,7 @@ use grovedb_storage::{ #[cfg(feature = "full")] use crate::{ batch::{GroveDbOp, Op}, - util::{storage_context_optional_tx, storage_context_with_parent_optional_tx}, + util::{storage_context_with_parent_optional_tx}, Element, ElementFlags, Error, GroveDb, Transaction, TransactionArg, }; use crate::{raw_decode, util::merk_optional_tx_path_not_empty}; diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index 37c485cc..be2ecffc 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -364,6 +364,10 @@ where number_of_chunks(self.height) } + pub fn is_empty(&self) -> bool { + number_of_chunks(self.height) == 0 + } + /// Gets the next chunk based on the `ChunkProducer`'s internal index state. /// This is mostly useful for letting `ChunkIter` yield the chunks in order, /// optimizing throughput compared to random access. @@ -387,14 +391,6 @@ where }), ) } - - // TODO: test this logic out - fn get_chunk_encoding_length(chunk: &[Op]) -> usize { - // TODO: deal with error - chunk - .iter() - .fold(0, |sum, op| sum + op.encoding_length().unwrap()) - } } /// Iterate over each chunk, returning `None` after last chunk diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index ecd66dac..94b99add 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -71,7 +71,6 @@ use crate::{ tree::{ kv::ValueDefinedCostType, AuxMerkBatch, CryptoHash, Op, RefWalker, TreeNode, NULL_HASH, }, - BatchEntry, Error::{CostsError, EdError, StorageError}, Link, MerkType::{BaseMerk, LayeredMerk, StandaloneMerk}, @@ -662,19 +661,17 @@ where } let node = node.unwrap(); - if &node.hash().unwrap() != &hash { + if node.hash().unwrap() != hash { bad_link_map.insert(instruction_id.clone(), hash); parent_keys.insert(instruction_id, parent_key.to_vec()); return; } // Need to skip this when restoring a sum tree - if !skip_sum_checks { - if node.sum().unwrap() != sum { - bad_link_map.insert(instruction_id.clone(), hash); - parent_keys.insert(instruction_id, parent_key.to_vec()); - return; - } + if !skip_sum_checks && node.sum().unwrap() != sum { + bad_link_map.insert(instruction_id.clone(), hash); + parent_keys.insert(instruction_id, parent_key.to_vec()); + return; } // TODO: check child heights diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 3fd42a1d..f38503d3 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -31,12 +31,11 @@ use std::collections::BTreeMap; -use grovedb_costs::cost_return_on_error; use grovedb_storage::{Batch, StorageContext}; use crate::{ merk, - merk::{committer::MerkCommitter, MerkSource}, + merk::{MerkSource}, proofs::{ chunk::{ chunk::{LEFT, RIGHT}, @@ -47,7 +46,7 @@ use crate::{ tree::{execute, Child, Tree as ProofTree}, Node, Op, }, - tree::{combine_hash, kv::ValueDefinedCostType, NoopCommit, RefWalker, TreeNode}, + tree::{combine_hash, kv::ValueDefinedCostType, RefWalker, TreeNode}, CryptoHash, Error, Error::{CostsError, StorageError}, Link, Merk, @@ -96,15 +95,15 @@ impl<'db, S: StorageContext<'db>> Restorer { .ok_or(Error::ChunkRestoringError(ChunkError::UnexpectedChunk))?; let mut parent_key_value_hash: Option = None; - if (chunk_id.len() == 0) { - parent_key_value_hash = self.parent_key_value_hash.clone(); + if chunk_id.is_empty() { + parent_key_value_hash = self.parent_key_value_hash; } let chunk_tree = Self::verify_chunk(chunk, expected_root_hash, &parent_key_value_hash)?; let mut root_traversal_instruction = string_as_traversal_instruction(&chunk_id)?; if root_traversal_instruction.is_empty() { - self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); + let _ = self.merk.set_base_root_key(Some(chunk_tree.key().to_vec())); } else { // every non root chunk has some associated parent with an placeholder link // here we update the placeholder link to represent the true data @@ -185,9 +184,9 @@ impl<'db, S: StorageContext<'db>> Restorer { debug_assert_eq!(chunk_len, ((kv_count + hash_count) * 2) - 1); // chunk structure verified, next verify root hash - let parent_key_value_hash = match parent_key_value_hash_opt { + match parent_key_value_hash_opt { Some(val_hash) => { - let combined_hash = combine_hash(&val_hash, &tree.hash().unwrap()).unwrap(); + let combined_hash = combine_hash(val_hash, &tree.hash().unwrap()).unwrap(); if &combined_hash != expected_root_hash { return Err(Error::ChunkRestoringError(ChunkError::InvalidChunkProof( "chunk doesn't match expected root hash", @@ -408,14 +407,14 @@ impl<'db, S: StorageContext<'db>> Restorer { } // get the latest version of the root node - self.merk + let _ = self.merk .load_base_root(None::<&fn(&[u8]) -> Option>); // if height values are wrong, rewrite height if self.verify_height().is_err() { - self.rewrite_heights(); + let _ = self.rewrite_heights(); // update the root node after height rewrite - self.merk + let _ = self.merk .load_base_root(None::<&fn(&[u8]) -> Option>); } diff --git a/merk/src/proofs/chunk/chunk_op.rs b/merk/src/proofs/chunk/chunk_op.rs index 55f5751b..6d0d08cd 100644 --- a/merk/src/proofs/chunk/chunk_op.rs +++ b/merk/src/proofs/chunk/chunk_op.rs @@ -45,7 +45,7 @@ impl Encode for ChunkOp { match self { Self::ChunkId(instruction) => { // write the marker then the len - dest.write_all(&[0_u8]); + let _ = dest.write_all(&[0_u8]); dest.write_all(instruction.len().encode_var_vec().as_slice())?; let instruction_as_binary: Vec = instruction .iter() @@ -54,7 +54,7 @@ impl Encode for ChunkOp { dest.write_all(&instruction_as_binary)?; } Self::Chunk(chunk) => { - dest.write_all(&[1_u8]); + let _ = dest.write_all(&[1_u8]); // chunk len represents the number of ops not the total encoding len of ops dest.write_all(chunk.len().encode_var_vec().as_slice())?; for op in chunk { diff --git a/merk/src/proofs/chunk/util.rs b/merk/src/proofs/chunk/util.rs index 530e00e7..2f64ba8d 100644 --- a/merk/src/proofs/chunk/util.rs +++ b/merk/src/proofs/chunk/util.rs @@ -340,7 +340,7 @@ pub fn generate_traversal_instruction_as_string( /// Convert traversal instruction to byte string /// 1 represents left (true) /// 0 represents right (false) -pub fn traversal_instruction_as_string(instruction: &Vec) -> String { +pub fn traversal_instruction_as_string(instruction: &[bool]) -> String { instruction .iter() .map(|v| if *v { "1" } else { "0" }) From 0fe432adaaed747a937543713d75c3191d1426b6 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 11:02:30 +0300 Subject: [PATCH 19/30] more fmt --- grovedb/src/lib.rs | 31 +++++++++++----------------- grovedb/src/operations/delete/mod.rs | 2 +- merk/src/merk/restore.rs | 8 ++++--- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 9de0b339..92b33122 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -1181,23 +1181,17 @@ impl GroveDb { .chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()); match chunk_res { Ok((chunk, _)) => Ok(chunk), - Err(_) => { - Err(Error::CorruptedData( - "Unable to create to load chunk".to_string(), - )) - } + Err(_) => Err(Error::CorruptedData( + "Unable to create to load chunk".to_string(), + )), } } - Err(_) => { - Err(Error::CorruptedData( - "Unable to create Chunk producer".to_string(), - )) - } + Err(_) => Err(Error::CorruptedData( + "Unable to create Chunk producer".to_string(), + )), } } - None => { - Err(Error::CorruptedData("Prefix not found".to_string())) - } + None => Err(Error::CorruptedData("Prefix not found".to_string())), } } @@ -1313,7 +1307,8 @@ impl GroveDb { state_sync_info.current_prefix.take(), ) { (Some(restorer), Some(current_prefix)) => { - if (state_sync_info.num_processed_chunks > 0) && (restorer.finalize().is_err()) { + if (state_sync_info.num_processed_chunks > 0) && (restorer.finalize().is_err()) + { return Err(Error::InternalError("Unable to finalize merk")); } state_sync_info.processed_prefixes.insert(current_prefix); @@ -1386,10 +1381,8 @@ pub fn util_split_global_chunk_id( let str_chunk_id = String::from_utf8(chunk_id.to_vec()); match str_chunk_id { Ok(s) => Ok((chunk_prefix_key, s)), - Err(_) => { - Err(Error::CorruptedData( - "unable to convert chunk id to string".to_string(), - )) - } + Err(_) => Err(Error::CorruptedData( + "unable to convert chunk id to string".to_string(), + )), } } diff --git a/grovedb/src/operations/delete/mod.rs b/grovedb/src/operations/delete/mod.rs index 233efc3a..3bddd6b5 100644 --- a/grovedb/src/operations/delete/mod.rs +++ b/grovedb/src/operations/delete/mod.rs @@ -59,7 +59,7 @@ use grovedb_storage::{ #[cfg(feature = "full")] use crate::{ batch::{GroveDbOp, Op}, - util::{storage_context_with_parent_optional_tx}, + util::storage_context_with_parent_optional_tx, Element, ElementFlags, Error, GroveDb, Transaction, TransactionArg, }; use crate::{raw_decode, util::merk_optional_tx_path_not_empty}; diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index f38503d3..6b56c0c2 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -35,7 +35,7 @@ use grovedb_storage::{Batch, StorageContext}; use crate::{ merk, - merk::{MerkSource}, + merk::MerkSource, proofs::{ chunk::{ chunk::{LEFT, RIGHT}, @@ -407,14 +407,16 @@ impl<'db, S: StorageContext<'db>> Restorer { } // get the latest version of the root node - let _ = self.merk + let _ = self + .merk .load_base_root(None::<&fn(&[u8]) -> Option>); // if height values are wrong, rewrite height if self.verify_height().is_err() { let _ = self.rewrite_heights(); // update the root node after height rewrite - let _ = self.merk + let _ = self + .merk .load_base_root(None::<&fn(&[u8]) -> Option>); } From 5dfb1188988a453f5cac07cb87235eb972b3ca2e Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 14:41:37 +0300 Subject: [PATCH 20/30] fix for verify feature --- grovedb/src/lib.rs | 112 +++--------------- grovedb/src/replication.rs | 95 +++++++++++++++ merk/src/error.rs | 4 +- merk/src/lib.rs | 2 +- merk/src/proofs/tree.rs | 2 + merk/src/tree/mod.rs | 2 +- storage/src/rocksdb_storage.rs | 2 +- .../src/rocksdb_storage/storage_context.rs | 2 +- 8 files changed, 119 insertions(+), 102 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 92b33122..af7e309a 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -166,13 +166,13 @@ mod util; mod versioning; #[cfg(feature = "full")] mod visualize; +#[cfg(feature = "full")] +mod replication; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeSet; #[cfg(feature = "full")] -use std::{collections::HashMap, fmt, option::Option::None, path::Path}; +use std::{collections::HashMap, option::Option::None, path::Path}; -#[cfg(any(feature = "full", feature = "verify"))] -use element::helpers; #[cfg(any(feature = "full", feature = "verify"))] pub use element::Element; #[cfg(feature = "full")] @@ -197,10 +197,11 @@ use grovedb_merk::tree::kv::ValueDefinedCostType; #[cfg(feature = "full")] use grovedb_merk::{ self, - tree::{combine_hash, value_hash}, - BatchEntry, CryptoHash, KVIterator, Merk, + BatchEntry, + CryptoHash, KVIterator, Merk, tree::{combine_hash, value_hash}, }; -use grovedb_merk::{proofs::Op, ChunkProducer, Restorer}; +#[cfg(feature = "full")] +use grovedb_merk::{ChunkProducer, proofs::Op, Restorer}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext; @@ -221,10 +222,14 @@ pub use query::{PathQuery, SizedQuery}; #[cfg(any(feature = "full", feature = "verify"))] pub use crate::error::Error; #[cfg(feature = "full")] -use crate::helpers::raw_decode; +use crate::element::helpers::raw_decode; #[cfg(feature = "full")] use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; use crate::Error::MerkError; +#[cfg(feature = "full")] +pub use crate::replication::StateSyncInfo; +#[cfg(feature = "full")] +use crate::replication::SubtreesMetadata; #[cfg(feature = "full")] type Hash = [u8; 32]; @@ -235,61 +240,8 @@ pub struct GroveDb { db: RocksDbStorage, } -// Struct governing state sync -pub struct StateSyncInfo<'db> { - // Current Chunk restorer - restorer: Option>>, - // Set of processed prefixes (Path digests) - processed_prefixes: BTreeSet, - // Current processed prefix (Path digest) - current_prefix: Option, - // Set of global chunk ids requested to be fetched and pending for processing. For the - // description of global chunk id check fetch_chunk(). - pending_chunks: BTreeSet>, - // Number of processed chunks in current prefix (Path digest) - num_processed_chunks: usize, -} - pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; -// Struct containing information about current subtrees found in GroveDB -pub struct SubtreesMetadata { - // Map of Prefix (Path digest) -> (Actual path, Parent Subtree actual_value_hash, Parent - // Subtree elem_value_hash) Note: Parent Subtree actual_value_hash, Parent Subtree - // elem_value_hash are needed when verifying the new constructed subtree after wards. - pub data: BTreeMap>, CryptoHash, CryptoHash)>, -} - -impl SubtreesMetadata { - pub fn new() -> SubtreesMetadata { - SubtreesMetadata { - data: BTreeMap::new(), - } - } -} - -impl Default for SubtreesMetadata { - fn default() -> Self { - Self::new() - } -} - -impl fmt::Debug for SubtreesMetadata { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (prefix, metadata) in self.data.iter() { - let metadata_path = &metadata.0; - let metadata_path_str = util_path_to_string(metadata_path); - writeln!( - f, - " prefix:{:?} -> path:{:?}\n", - hex::encode(prefix), - metadata_path_str - ); - } - Ok(()) - } -} - /// Transaction #[cfg(feature = "full")] pub type Transaction<'db> = >::Transaction; @@ -1258,7 +1210,7 @@ impl GroveDb { let mut res = vec![]; let (global_chunk_id, chunk_data) = chunk; - let (chunk_prefix, chunk_id) = util_split_global_chunk_id(global_chunk_id)?; + let (chunk_prefix, chunk_id) = replication::util_split_global_chunk_id(global_chunk_id)?; match ( &mut state_sync_info.restorer, @@ -1315,7 +1267,7 @@ impl GroveDb { let subtrees_metadata = self.get_subtrees_metadata(tx)?; if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { - println!(" path:{:?} done", util_path_to_string(&value.0)); + println!(" path:{:?} done", replication::util_path_to_string(&value.0)); } for (prefix, prefix_metadata) in &subtrees_metadata.data { @@ -1352,37 +1304,3 @@ impl GroveDb { Ok((res, state_sync_info)) } } - -// Converts a path into a human-readable string (for debuting) -pub fn util_path_to_string(path: &[Vec]) -> Vec { - let mut subtree_path_str: Vec = vec![]; - for subtree in path { - let string = std::str::from_utf8(subtree).unwrap(); - subtree_path_str.push(string.parse().unwrap()); - } - subtree_path_str -} - -// Splits the given global chunk id into [SUBTREE_PREFIX:CHUNK_ID] -pub fn util_split_global_chunk_id( - global_chunk_id: &[u8], -) -> Result<(SubtreePrefix, String), Error> { - let chunk_prefix_length: usize = 32; - if global_chunk_id.len() < chunk_prefix_length { - return Err(Error::CorruptedData( - "expected global chunk id of at least 32 length".to_string(), - )); - } - - let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); - let mut array = [0u8; 32]; - array.copy_from_slice(chunk_prefix); - let chunk_prefix_key: SubtreePrefix = array; - let str_chunk_id = String::from_utf8(chunk_id.to_vec()); - match str_chunk_id { - Ok(s) => Ok((chunk_prefix_key, s)), - Err(_) => Err(Error::CorruptedData( - "unable to convert chunk id to string".to_string(), - )), - } -} diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index e69de29b..86f92737 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -0,0 +1,95 @@ +use grovedb_merk::merk::restore::Restorer; +use grovedb_storage::rocksdb_storage::storage_context::context_immediate::PrefixedRocksDbImmediateStorageContext; +use std::collections::{BTreeMap, BTreeSet}; +use grovedb_merk::tree::hash::CryptoHash; +use std::fmt; +use crate::Error; + +pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; + +// Struct governing state sync +pub struct StateSyncInfo<'db> { + // Current Chunk restorer + pub restorer: Option>>, + // Set of processed prefixes (Path digests) + pub processed_prefixes: BTreeSet, + // Current processed prefix (Path digest) + pub current_prefix: Option, + // Set of global chunk ids requested to be fetched and pending for processing. For the + // description of global chunk id check fetch_chunk(). + pub pending_chunks: BTreeSet>, + // Number of processed chunks in current prefix (Path digest) + pub num_processed_chunks: usize, +} + +// Struct containing information about current subtrees found in GroveDB +pub struct SubtreesMetadata { + // Map of Prefix (Path digest) -> (Actual path, Parent Subtree actual_value_hash, Parent + // Subtree elem_value_hash) Note: Parent Subtree actual_value_hash, Parent Subtree + // elem_value_hash are needed when verifying the new constructed subtree after wards. + pub data: BTreeMap>, CryptoHash, CryptoHash)>, +} + +impl SubtreesMetadata { + pub fn new() -> SubtreesMetadata { + SubtreesMetadata { + data: BTreeMap::new(), + } + } +} + +impl Default for SubtreesMetadata { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Debug for SubtreesMetadata { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (prefix, metadata) in self.data.iter() { + let metadata_path = &metadata.0; + let metadata_path_str = util_path_to_string(metadata_path); + writeln!( + f, + " prefix:{:?} -> path:{:?}\n", + hex::encode(prefix), + metadata_path_str + ); + } + Ok(()) + } +} + +// Converts a path into a human-readable string (for debuting) +pub fn util_path_to_string(path: &[Vec]) -> Vec { + let mut subtree_path_str: Vec = vec![]; + for subtree in path { + let string = std::str::from_utf8(subtree).unwrap(); + subtree_path_str.push(string.parse().unwrap()); + } + subtree_path_str +} + +// Splits the given global chunk id into [SUBTREE_PREFIX:CHUNK_ID] +pub fn util_split_global_chunk_id( + global_chunk_id: &[u8], +) -> Result<(crate::SubtreePrefix, String), Error> { + let chunk_prefix_length: usize = 32; + if global_chunk_id.len() < chunk_prefix_length { + return Err(Error::CorruptedData( + "expected global chunk id of at least 32 length".to_string(), + )); + } + + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); + let mut array = [0u8; 32]; + array.copy_from_slice(chunk_prefix); + let chunk_prefix_key: crate::SubtreePrefix = array; + let str_chunk_id = String::from_utf8(chunk_id.to_vec()); + match str_chunk_id { + Ok(s) => Ok((chunk_prefix_key, s)), + Err(_) => Err(Error::CorruptedData( + "unable to convert chunk id to string".to_string(), + )), + } +} diff --git a/merk/src/error.rs b/merk/src/error.rs index 7581ba6f..27402425 100644 --- a/merk/src/error.rs +++ b/merk/src/error.rs @@ -27,7 +27,7 @@ // DEALINGS IN THE SOFTWARE. //! Errors - +#[cfg(feature = "full")] use crate::proofs::chunk::error::ChunkError; #[cfg(any(feature = "full", feature = "verify"))] @@ -64,6 +64,7 @@ pub enum Error { CorruptedState(&'static str), /// Chunking error + #[cfg(feature = "full")] #[error("chunking error {0}")] ChunkingError(ChunkError), @@ -73,6 +74,7 @@ pub enum Error { OldChunkingError(&'static str), /// Chunk restoring error + #[cfg(feature = "full")] #[error("chunk restoring error {0}")] ChunkRestoringError(ChunkError), diff --git a/merk/src/lib.rs b/merk/src/lib.rs index caf3837c..18255b27 100644 --- a/merk/src/lib.rs +++ b/merk/src/lib.rs @@ -35,7 +35,7 @@ extern crate core; /// The top-level store API. #[cfg(feature = "full")] -mod merk; +pub mod merk; #[cfg(feature = "full")] pub use crate::merk::{chunks::ChunkProducer, options::MerkOptions, restore::Restorer}; diff --git a/merk/src/proofs/tree.rs b/merk/src/proofs/tree.rs index b3db0d77..b3bf9cf1 100644 --- a/merk/src/proofs/tree.rs +++ b/merk/src/proofs/tree.rs @@ -43,6 +43,7 @@ use super::{Node, Op}; use crate::tree::{combine_hash, kv_digest_to_kv_hash, kv_hash, node_hash, value_hash, NULL_HASH}; #[cfg(any(feature = "full", feature = "verify"))] use crate::{error::Error, tree::CryptoHash}; +#[cfg(feature = "full")] use crate::{ proofs::chunk::chunk::{LEFT, RIGHT}, Link, @@ -61,6 +62,7 @@ pub struct Child { } impl Child { + #[cfg(feature = "full")] pub fn as_link(&self) -> Link { let (key, sum) = match &self.tree.node { Node::KV(key, _) | Node::KVValueHash(key, ..) => (key.as_slice(), None), diff --git a/merk/src/tree/mod.rs b/merk/src/tree/mod.rs index a1ac4152..401b8722 100644 --- a/merk/src/tree/mod.rs +++ b/merk/src/tree/mod.rs @@ -37,7 +37,7 @@ mod encoding; #[cfg(feature = "full")] mod fuzz_tests; #[cfg(any(feature = "full", feature = "verify"))] -mod hash; +pub mod hash; #[cfg(feature = "full")] mod iter; #[cfg(feature = "full")] diff --git a/storage/src/rocksdb_storage.rs b/storage/src/rocksdb_storage.rs index 90d0cc21..14c4df5a 100644 --- a/storage/src/rocksdb_storage.rs +++ b/storage/src/rocksdb_storage.rs @@ -28,7 +28,7 @@ //! GroveDB storage layer implemented over RocksDB backend. mod storage; -mod storage_context; +pub mod storage_context; pub mod test_utils; #[cfg(test)] mod tests; diff --git a/storage/src/rocksdb_storage/storage_context.rs b/storage/src/rocksdb_storage/storage_context.rs index 7481fc13..0611d51c 100644 --- a/storage/src/rocksdb_storage/storage_context.rs +++ b/storage/src/rocksdb_storage/storage_context.rs @@ -29,7 +29,7 @@ //! Implementation of prefixed storage context. mod batch; -mod context_immediate; +pub mod context_immediate; mod context_no_tx; mod context_tx; mod raw_iterator; From 58903b3ab6c29ec304cc340f60cc96c498487c48 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 14:45:48 +0300 Subject: [PATCH 21/30] more fmt --- grovedb/src/lib.rs | 25 ++++++++++++++----------- grovedb/src/replication.rs | 12 ++++++++---- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index af7e309a..af2b5039 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -159,6 +159,8 @@ mod query; pub mod query_result_type; #[cfg(any(feature = "full", feature = "verify"))] pub mod reference_path; +#[cfg(feature = "full")] +mod replication; #[cfg(all(test, feature = "full"))] mod tests; #[cfg(feature = "full")] @@ -166,8 +168,6 @@ mod util; mod versioning; #[cfg(feature = "full")] mod visualize; -#[cfg(feature = "full")] -mod replication; use std::collections::BTreeSet; #[cfg(feature = "full")] @@ -197,11 +197,11 @@ use grovedb_merk::tree::kv::ValueDefinedCostType; #[cfg(feature = "full")] use grovedb_merk::{ self, - BatchEntry, - CryptoHash, KVIterator, Merk, tree::{combine_hash, value_hash}, + tree::{combine_hash, value_hash}, + BatchEntry, CryptoHash, KVIterator, Merk, }; #[cfg(feature = "full")] -use grovedb_merk::{ChunkProducer, proofs::Op, Restorer}; +use grovedb_merk::{proofs::Op, ChunkProducer, Restorer}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext; @@ -219,17 +219,17 @@ use grovedb_visualize::DebugByteVectors; #[cfg(any(feature = "full", feature = "verify"))] pub use query::{PathQuery, SizedQuery}; -#[cfg(any(feature = "full", feature = "verify"))] -pub use crate::error::Error; #[cfg(feature = "full")] use crate::element::helpers::raw_decode; -#[cfg(feature = "full")] -use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; -use crate::Error::MerkError; +#[cfg(any(feature = "full", feature = "verify"))] +pub use crate::error::Error; #[cfg(feature = "full")] pub use crate::replication::StateSyncInfo; #[cfg(feature = "full")] use crate::replication::SubtreesMetadata; +#[cfg(feature = "full")] +use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; +use crate::Error::MerkError; #[cfg(feature = "full")] type Hash = [u8; 32]; @@ -1267,7 +1267,10 @@ impl GroveDb { let subtrees_metadata = self.get_subtrees_metadata(tx)?; if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { - println!(" path:{:?} done", replication::util_path_to_string(&value.0)); + println!( + " path:{:?} done", + replication::util_path_to_string(&value.0) + ); } for (prefix, prefix_metadata) in &subtrees_metadata.data { diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index 86f92737..d43c7787 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -1,8 +1,12 @@ -use grovedb_merk::merk::restore::Restorer; +use std::{ + collections::{BTreeMap, BTreeSet}, + fmt, +}; + +use grovedb_merk::{merk::restore::Restorer, tree::hash::CryptoHash}; +#[rustfmt::skip] use grovedb_storage::rocksdb_storage::storage_context::context_immediate::PrefixedRocksDbImmediateStorageContext; -use std::collections::{BTreeMap, BTreeSet}; -use grovedb_merk::tree::hash::CryptoHash; -use std::fmt; + use crate::Error; pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; From 6580ebadf98b78606b4ab90826c3e346d5a8348c Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 14:59:00 +0300 Subject: [PATCH 22/30] test fixes --- merk/src/merk/restore.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 6b56c0c2..1bd84239 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -522,7 +522,7 @@ mod tests { chunk::tests::traverse_get_node_hash, error::ChunkError::InvalidChunkProof, }, test_utils::{ - make_batch_seq, make_batch_seq_with_same_value, make_batch_seq_with_value, TempMerk, + make_batch_seq, TempMerk, }, Error::ChunkRestoringError, Merk, PanicSource, @@ -539,7 +539,8 @@ mod tests { ]; assert!(Restorer::::verify_chunk( non_avl_tree_proof, - &[0; 32] + &[0; 32], + &None ) .is_err()); } @@ -549,7 +550,7 @@ mod tests { // should not accept kv let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -560,7 +561,7 @@ mod tests { // should not accept kvhash let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -571,7 +572,7 @@ mod tests { // should not accept kvdigest let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -582,7 +583,7 @@ mod tests { // should not accept kvvaluehash let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -593,7 +594,7 @@ mod tests { // should not accept kvrefvaluehash let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32]); + Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -644,7 +645,7 @@ mod tests { ); let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); // initial restorer state should contain just the root hash of the source merk assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); @@ -844,7 +845,7 @@ mod tests { // instantiate chunk producer and restorer let mut chunk_producer = ChunkProducer::new(&source_merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap(), None); // perform chunk production and processing let mut chunk_id_opt = Some("".to_string()); @@ -913,7 +914,7 @@ mod tests { ); let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); assert_eq!( @@ -978,7 +979,7 @@ mod tests { ); let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); assert_eq!( @@ -1050,7 +1051,7 @@ mod tests { ); let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); // build multi chunk with with limit of 325 let multi_chunk = chunk_producer @@ -1138,7 +1139,7 @@ mod tests { // instantiate chunk producer and restorer let mut chunk_producer = ChunkProducer::new(&source_merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, source_merk.root_hash().unwrap(), None); // perform chunk production and processing let mut chunk_id_opt = Some("".to_string()); @@ -1216,7 +1217,7 @@ mod tests { ); let mut chunk_producer = ChunkProducer::new(&merk).expect("should create chunk producer"); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); assert_eq!( @@ -1250,7 +1251,7 @@ mod tests { ) .unwrap() .unwrap(); - let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap()); + let mut restorer = Restorer::new(restoration_merk, merk.root_hash().unwrap(), None); // assert the state of the restorer assert_eq!(restorer.chunk_id_to_root_hash.len(), 1); From 884a8eeb97c1534a27f08fbb8b8d9c91ea18b07e Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 14:59:49 +0300 Subject: [PATCH 23/30] more fmt --- merk/src/merk/restore.rs | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 1bd84239..44c3f0c6 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -521,9 +521,7 @@ mod tests { proofs::chunk::{ chunk::tests::traverse_get_node_hash, error::ChunkError::InvalidChunkProof, }, - test_utils::{ - make_batch_seq, TempMerk, - }, + test_utils::{make_batch_seq, TempMerk}, Error::ChunkRestoringError, Merk, PanicSource, }; @@ -549,8 +547,11 @@ mod tests { fn test_chunk_verification_only_kv_feature_and_hash() { // should not accept kv let invalid_chunk_proof = vec![Op::Push(Node::KV(vec![1], vec![1]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -560,8 +561,11 @@ mod tests { // should not accept kvhash let invalid_chunk_proof = vec![Op::Push(Node::KVHash([0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -571,8 +575,11 @@ mod tests { // should not accept kvdigest let invalid_chunk_proof = vec![Op::Push(Node::KVDigest(vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -582,8 +589,11 @@ mod tests { // should not accept kvvaluehash let invalid_chunk_proof = vec![Op::Push(Node::KVValueHash(vec![0], vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( @@ -593,8 +603,11 @@ mod tests { // should not accept kvrefvaluehash let invalid_chunk_proof = vec![Op::Push(Node::KVRefValueHash(vec![0], vec![0], [0; 32]))]; - let verification_result = - Restorer::::verify_chunk(invalid_chunk_proof, &[0; 32], &None); + let verification_result = Restorer::::verify_chunk( + invalid_chunk_proof, + &[0; 32], + &None, + ); assert!(matches!( verification_result, Err(ChunkRestoringError(InvalidChunkProof( From 01da07988b0d7bce58855bf70e9659485255d1ee Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 15:20:10 +0300 Subject: [PATCH 24/30] refactor --- grovedb/src/lib.rs | 281 ----------------------------------- grovedb/src/replication.rs | 295 ++++++++++++++++++++++++++++++++++++- 2 files changed, 293 insertions(+), 283 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index af2b5039..35385ba8 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -1025,285 +1025,4 @@ impl GroveDb { } Ok(issues) } - - // Returns the discovered subtrees found recursively along with their associated - // metadata Params: - // tx: Transaction. Function returns the data by opening merks at given tx. - // TODO: Add a SubTreePath as param and start searching from that path instead - // of root (as it is now) - pub fn get_subtrees_metadata<'db>( - &'db self, - tx: &'db Transaction, - ) -> Result { - let mut subtrees_metadata = crate::SubtreesMetadata::new(); - - let subtrees_root = self.find_subtrees(&SubtreePath::empty(), Some(tx)).value?; - for subtree in subtrees_root.into_iter() { - let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); - let path: &[&[u8]] = &subtree_path; - let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); - - let current_path = SubtreePath::from(path); - - let parent_path_opt = current_path.derive_parent(); - if parent_path_opt.is_some() { - let parent_path = parent_path_opt.unwrap().0; - let parent_merk = self - .open_transactional_merk_at_path(parent_path, tx, None) - .value?; - let parent_key = subtree.last().unwrap(); - let (elem_value, elem_value_hash) = parent_merk - .get_value_and_value_hash( - parent_key, - true, - None::<&fn(&[u8]) -> Option>, - ) - .value - .expect("should get value hash") - .expect("value hash should be some"); - - let actual_value_hash = value_hash(&elem_value).unwrap(); - subtrees_metadata.data.insert( - prefix, - (current_path.to_vec(), actual_value_hash, elem_value_hash), - ); - } else { - subtrees_metadata.data.insert( - prefix, - ( - current_path.to_vec(), - CryptoHash::default(), - CryptoHash::default(), - ), - ); - } - } - Ok(subtrees_metadata) - } - - // Fetch a chunk by global chunk id (should be called by ABCI when - // LoadSnapshotChunk method is called) Params: - // global_chunk_id: Global chunk id in the following format: - // [SUBTREE_PREFIX:CHUNK_ID] SUBTREE_PREFIX: 32 bytes (mandatory) (All zeros - // = Root subtree) CHUNK_ID: 0.. bytes (optional) Traversal instructions to - // the root of the given chunk. Traversal instructions are "1" for left, and - // "0" for right. TODO: Compact CHUNK_ID into bitset for size optimization - // as a subtree can be big hence traversal instructions for the deepest chunks - // tx: Transaction. Function returns the data by opening merks at given tx. - // TODO: Make this tx optional: None -> Use latest data - // Returns the Chunk proof operators for the requested chunk - pub fn fetch_chunk<'db>( - &'db self, - global_chunk_id: &[u8], - tx: &'db Transaction, - ) -> Result, Error> { - let chunk_prefix_length: usize = 32; - if global_chunk_id.len() < chunk_prefix_length { - return Err(Error::CorruptedData( - "expected global chunk id of at least 32 length".to_string(), - )); - } - - let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); - - let mut array = [0u8; 32]; - array.copy_from_slice(chunk_prefix); - let chunk_prefix_key: SubtreePrefix = array; - - let subtrees_metadata = self.get_subtrees_metadata(tx)?; - - match subtrees_metadata.data.get(&chunk_prefix_key) { - Some(path_data) => { - let subtree = &path_data.0; - let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); - let path: &[&[u8]] = &subtree_path; - - let merk = self - .open_transactional_merk_at_path(path.into(), tx, None) - .value?; - - if merk.is_empty_tree().unwrap() { - return Ok(vec![]); - } - - let chunk_producer_res = ChunkProducer::new(&merk); - match chunk_producer_res { - Ok(mut chunk_producer) => { - let chunk_res = chunk_producer - .chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()); - match chunk_res { - Ok((chunk, _)) => Ok(chunk), - Err(_) => Err(Error::CorruptedData( - "Unable to create to load chunk".to_string(), - )), - } - } - Err(_) => Err(Error::CorruptedData( - "Unable to create Chunk producer".to_string(), - )), - } - } - None => Err(Error::CorruptedData("Prefix not found".to_string())), - } - } - - // Starts a state sync process (should be called by ABCI when OfferSnapshot - // method is called) Params: - // state_sync_info: Consumed StateSyncInfo - // app_hash: Snapshot's AppHash - // tx: Transaction for the state sync - // Returns the first set of global chunk ids that can be fetched from sources (+ - // the StateSyncInfo transferring ownership back to the caller) - pub fn start_snapshot_syncing<'db>( - &'db self, - mut state_sync_info: StateSyncInfo<'db>, - app_hash: CryptoHash, - tx: &'db Transaction, - ) -> Result<(Vec>, StateSyncInfo), Error> { - let mut res = vec![]; - - match ( - &mut state_sync_info.restorer, - &state_sync_info.current_prefix, - ) { - (None, None) => { - if state_sync_info.pending_chunks.is_empty() - && state_sync_info.processed_prefixes.is_empty() - { - let root_prefix = [0u8; 32]; - let merk = self - .open_merk_for_replication(SubtreePath::empty(), tx) - .unwrap(); - let restorer = Restorer::new(merk, app_hash, None); - state_sync_info.restorer = Some(restorer); - state_sync_info.current_prefix = Some(root_prefix); - state_sync_info.pending_chunks.insert(root_prefix.to_vec()); - - res.push(root_prefix.to_vec()); - } else { - return Err(Error::InternalError("Invalid internal state sync info")); - } - } - _ => { - return Err(Error::InternalError( - "GroveDB has already started a snapshot syncing", - )); - } - } - - Ok((res, state_sync_info)) - } - - // Apply a chunk (should be called by ABCI when ApplySnapshotChunk method is - // called) Params: - // state_sync_info: Consumed StateSyncInfo - // chunk: (Global chunk id, Chunk proof operators) - // tx: Transaction for the state sync - // Returns the next set of global chunk ids that can be fetched from sources (+ - // the StateSyncInfo transferring ownership back to the caller) - pub fn apply_chunk<'db>( - &'db self, - mut state_sync_info: StateSyncInfo<'db>, - chunk: (&[u8], Vec), - tx: &'db Transaction, - ) -> Result<(Vec>, StateSyncInfo), Error> { - let mut res = vec![]; - - let (global_chunk_id, chunk_data) = chunk; - let (chunk_prefix, chunk_id) = replication::util_split_global_chunk_id(global_chunk_id)?; - - match ( - &mut state_sync_info.restorer, - &state_sync_info.current_prefix, - ) { - (Some(restorer), Some(ref current_prefix)) => { - if *current_prefix != chunk_prefix { - return Err(Error::InternalError("Invalid incoming prefix")); - } - if !state_sync_info.pending_chunks.contains(global_chunk_id) { - return Err(Error::InternalError( - "Incoming global_chunk_id not expected", - )); - } - state_sync_info.pending_chunks.remove(global_chunk_id); - if !chunk_data.is_empty() { - match restorer.process_chunk(chunk_id.to_string(), chunk_data) { - Ok(next_chunk_ids) => { - state_sync_info.num_processed_chunks += 1; - for next_chunk_id in next_chunk_ids { - let mut next_global_chunk_id = chunk_prefix.to_vec(); - next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); - state_sync_info - .pending_chunks - .insert(next_global_chunk_id.clone()); - res.push(next_global_chunk_id); - } - } - _ => { - return Err(Error::InternalError("Unable to process incoming chunk")); - } - }; - } - } - _ => { - return Err(Error::InternalError("GroveDB is not in syncing mode")); - } - } - - if res.is_empty() { - if !state_sync_info.pending_chunks.is_empty() { - return Ok((res, state_sync_info)); - } - match ( - state_sync_info.restorer.take(), - state_sync_info.current_prefix.take(), - ) { - (Some(restorer), Some(current_prefix)) => { - if (state_sync_info.num_processed_chunks > 0) && (restorer.finalize().is_err()) - { - return Err(Error::InternalError("Unable to finalize merk")); - } - state_sync_info.processed_prefixes.insert(current_prefix); - - let subtrees_metadata = self.get_subtrees_metadata(tx)?; - if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { - println!( - " path:{:?} done", - replication::util_path_to_string(&value.0) - ); - } - - for (prefix, prefix_metadata) in &subtrees_metadata.data { - if !state_sync_info.processed_prefixes.contains(prefix) { - let (current_path, s_actual_value_hash, s_elem_value_hash) = - &prefix_metadata; - - let subtree_path: Vec<&[u8]> = - current_path.iter().map(|vec| vec.as_slice()).collect(); - let path: &[&[u8]] = &subtree_path; - - let merk = self.open_merk_for_replication(path.into(), tx).unwrap(); - let restorer = - Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); - state_sync_info.restorer = Some(restorer); - state_sync_info.current_prefix = Some(*prefix); - state_sync_info.num_processed_chunks = 0; - - let root_chunk_prefix = prefix.to_vec(); - state_sync_info - .pending_chunks - .insert(root_chunk_prefix.clone()); - res.push(root_chunk_prefix); - break; - } - } - } - _ => { - return Err(Error::InternalError("Unable to finalize tree")); - } - } - } - - Ok((res, state_sync_info)) - } } diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index d43c7787..42943c06 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -3,11 +3,18 @@ use std::{ fmt, }; -use grovedb_merk::{merk::restore::Restorer, tree::hash::CryptoHash}; +use grovedb_merk::{ + merk::restore::Restorer, + proofs::Op, + tree::{hash::CryptoHash, kv::ValueDefinedCostType, value_hash}, + ChunkProducer, +}; +use grovedb_path::SubtreePath; +use grovedb_storage::rocksdb_storage::RocksDbStorage; #[rustfmt::skip] use grovedb_storage::rocksdb_storage::storage_context::context_immediate::PrefixedRocksDbImmediateStorageContext; -use crate::Error; +use crate::{replication, Error, GroveDb, Transaction}; pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; @@ -97,3 +104,287 @@ pub fn util_split_global_chunk_id( )), } } + +#[cfg(feature = "full")] +impl GroveDb { + // Returns the discovered subtrees found recursively along with their associated + // metadata Params: + // tx: Transaction. Function returns the data by opening merks at given tx. + // TODO: Add a SubTreePath as param and start searching from that path instead + // of root (as it is now) + pub fn get_subtrees_metadata<'db>( + &'db self, + tx: &'db Transaction, + ) -> Result { + let mut subtrees_metadata = crate::SubtreesMetadata::new(); + + let subtrees_root = self.find_subtrees(&SubtreePath::empty(), Some(tx)).value?; + for subtree in subtrees_root.into_iter() { + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + let prefix = RocksDbStorage::build_prefix(path.as_ref().into()).unwrap(); + + let current_path = SubtreePath::from(path); + + let parent_path_opt = current_path.derive_parent(); + if parent_path_opt.is_some() { + let parent_path = parent_path_opt.unwrap().0; + let parent_merk = self + .open_transactional_merk_at_path(parent_path, tx, None) + .value?; + let parent_key = subtree.last().unwrap(); + let (elem_value, elem_value_hash) = parent_merk + .get_value_and_value_hash( + parent_key, + true, + None::<&fn(&[u8]) -> Option>, + ) + .value + .expect("should get value hash") + .expect("value hash should be some"); + + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert( + prefix, + (current_path.to_vec(), actual_value_hash, elem_value_hash), + ); + } else { + subtrees_metadata.data.insert( + prefix, + ( + current_path.to_vec(), + CryptoHash::default(), + CryptoHash::default(), + ), + ); + } + } + Ok(subtrees_metadata) + } + + // Fetch a chunk by global chunk id (should be called by ABCI when + // LoadSnapshotChunk method is called) Params: + // global_chunk_id: Global chunk id in the following format: + // [SUBTREE_PREFIX:CHUNK_ID] SUBTREE_PREFIX: 32 bytes (mandatory) (All zeros + // = Root subtree) CHUNK_ID: 0.. bytes (optional) Traversal instructions to + // the root of the given chunk. Traversal instructions are "1" for left, and + // "0" for right. TODO: Compact CHUNK_ID into bitset for size optimization + // as a subtree can be big hence traversal instructions for the deepest chunks + // tx: Transaction. Function returns the data by opening merks at given tx. + // TODO: Make this tx optional: None -> Use latest data + // Returns the Chunk proof operators for the requested chunk + pub fn fetch_chunk<'db>( + &'db self, + global_chunk_id: &[u8], + tx: &'db Transaction, + ) -> Result, Error> { + let chunk_prefix_length: usize = 32; + if global_chunk_id.len() < chunk_prefix_length { + return Err(Error::CorruptedData( + "expected global chunk id of at least 32 length".to_string(), + )); + } + + let (chunk_prefix, chunk_id) = global_chunk_id.split_at(chunk_prefix_length); + + let mut array = [0u8; 32]; + array.copy_from_slice(chunk_prefix); + let chunk_prefix_key: crate::SubtreePrefix = array; + + let subtrees_metadata = self.get_subtrees_metadata(tx)?; + + match subtrees_metadata.data.get(&chunk_prefix_key) { + Some(path_data) => { + let subtree = &path_data.0; + let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + let merk = self + .open_transactional_merk_at_path(path.into(), tx, None) + .value?; + + if merk.is_empty_tree().unwrap() { + return Ok(vec![]); + } + + let chunk_producer_res = ChunkProducer::new(&merk); + match chunk_producer_res { + Ok(mut chunk_producer) => { + let chunk_res = chunk_producer + .chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()); + match chunk_res { + Ok((chunk, _)) => Ok(chunk), + Err(_) => Err(Error::CorruptedData( + "Unable to create to load chunk".to_string(), + )), + } + } + Err(_) => Err(Error::CorruptedData( + "Unable to create Chunk producer".to_string(), + )), + } + } + None => Err(Error::CorruptedData("Prefix not found".to_string())), + } + } + + // Starts a state sync process (should be called by ABCI when OfferSnapshot + // method is called) Params: + // state_sync_info: Consumed StateSyncInfo + // app_hash: Snapshot's AppHash + // tx: Transaction for the state sync + // Returns the first set of global chunk ids that can be fetched from sources (+ + // the StateSyncInfo transferring ownership back to the caller) + pub fn start_snapshot_syncing<'db>( + &'db self, + mut state_sync_info: StateSyncInfo<'db>, + app_hash: CryptoHash, + tx: &'db Transaction, + ) -> Result<(Vec>, StateSyncInfo), Error> { + let mut res = vec![]; + + match ( + &mut state_sync_info.restorer, + &state_sync_info.current_prefix, + ) { + (None, None) => { + if state_sync_info.pending_chunks.is_empty() + && state_sync_info.processed_prefixes.is_empty() + { + let root_prefix = [0u8; 32]; + let merk = self + .open_merk_for_replication(SubtreePath::empty(), tx) + .unwrap(); + let restorer = Restorer::new(merk, app_hash, None); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(root_prefix); + state_sync_info.pending_chunks.insert(root_prefix.to_vec()); + + res.push(root_prefix.to_vec()); + } else { + return Err(Error::InternalError("Invalid internal state sync info")); + } + } + _ => { + return Err(Error::InternalError( + "GroveDB has already started a snapshot syncing", + )); + } + } + + Ok((res, state_sync_info)) + } + + // Apply a chunk (should be called by ABCI when ApplySnapshotChunk method is + // called) Params: + // state_sync_info: Consumed StateSyncInfo + // chunk: (Global chunk id, Chunk proof operators) + // tx: Transaction for the state sync + // Returns the next set of global chunk ids that can be fetched from sources (+ + // the StateSyncInfo transferring ownership back to the caller) + pub fn apply_chunk<'db>( + &'db self, + mut state_sync_info: StateSyncInfo<'db>, + chunk: (&[u8], Vec), + tx: &'db Transaction, + ) -> Result<(Vec>, StateSyncInfo), Error> { + let mut res = vec![]; + + let (global_chunk_id, chunk_data) = chunk; + let (chunk_prefix, chunk_id) = replication::util_split_global_chunk_id(global_chunk_id)?; + + match ( + &mut state_sync_info.restorer, + &state_sync_info.current_prefix, + ) { + (Some(restorer), Some(ref current_prefix)) => { + if *current_prefix != chunk_prefix { + return Err(Error::InternalError("Invalid incoming prefix")); + } + if !state_sync_info.pending_chunks.contains(global_chunk_id) { + return Err(Error::InternalError( + "Incoming global_chunk_id not expected", + )); + } + state_sync_info.pending_chunks.remove(global_chunk_id); + if !chunk_data.is_empty() { + match restorer.process_chunk(chunk_id.to_string(), chunk_data) { + Ok(next_chunk_ids) => { + state_sync_info.num_processed_chunks += 1; + for next_chunk_id in next_chunk_ids { + let mut next_global_chunk_id = chunk_prefix.to_vec(); + next_global_chunk_id.extend(next_chunk_id.as_bytes().to_vec()); + state_sync_info + .pending_chunks + .insert(next_global_chunk_id.clone()); + res.push(next_global_chunk_id); + } + } + _ => { + return Err(Error::InternalError("Unable to process incoming chunk")); + } + }; + } + } + _ => { + return Err(Error::InternalError("GroveDB is not in syncing mode")); + } + } + + if res.is_empty() { + if !state_sync_info.pending_chunks.is_empty() { + return Ok((res, state_sync_info)); + } + match ( + state_sync_info.restorer.take(), + state_sync_info.current_prefix.take(), + ) { + (Some(restorer), Some(current_prefix)) => { + if (state_sync_info.num_processed_chunks > 0) && (restorer.finalize().is_err()) + { + return Err(Error::InternalError("Unable to finalize merk")); + } + state_sync_info.processed_prefixes.insert(current_prefix); + + let subtrees_metadata = self.get_subtrees_metadata(tx)?; + if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { + println!( + " path:{:?} done", + replication::util_path_to_string(&value.0) + ); + } + + for (prefix, prefix_metadata) in &subtrees_metadata.data { + if !state_sync_info.processed_prefixes.contains(prefix) { + let (current_path, s_actual_value_hash, s_elem_value_hash) = + &prefix_metadata; + + let subtree_path: Vec<&[u8]> = + current_path.iter().map(|vec| vec.as_slice()).collect(); + let path: &[&[u8]] = &subtree_path; + + let merk = self.open_merk_for_replication(path.into(), tx).unwrap(); + let restorer = + Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(*prefix); + state_sync_info.num_processed_chunks = 0; + + let root_chunk_prefix = prefix.to_vec(); + state_sync_info + .pending_chunks + .insert(root_chunk_prefix.clone()); + res.push(root_chunk_prefix); + break; + } + } + } + _ => { + return Err(Error::InternalError("Unable to finalize tree")); + } + } + } + + Ok((res, state_sync_info)) + } +} From de6b28be7c60e83b39d4ac524ac6233222613415 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 15:29:05 +0300 Subject: [PATCH 25/30] refactor --- grovedb/src/lib.rs | 21 +-------------------- grovedb/src/replication.rs | 14 +++++++++++++- tutorials/src/bin/replication.rs | 2 +- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 35385ba8..fd11f10d 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -160,7 +160,7 @@ pub mod query_result_type; #[cfg(any(feature = "full", feature = "verify"))] pub mod reference_path; #[cfg(feature = "full")] -mod replication; +pub mod replication; #[cfg(all(test, feature = "full"))] mod tests; #[cfg(feature = "full")] @@ -169,7 +169,6 @@ mod versioning; #[cfg(feature = "full")] mod visualize; -use std::collections::BTreeSet; #[cfg(feature = "full")] use std::{collections::HashMap, option::Option::None, path::Path}; @@ -200,8 +199,6 @@ use grovedb_merk::{ tree::{combine_hash, value_hash}, BatchEntry, CryptoHash, KVIterator, Merk, }; -#[cfg(feature = "full")] -use grovedb_merk::{proofs::Op, ChunkProducer, Restorer}; use grovedb_path::SubtreePath; #[cfg(feature = "full")] use grovedb_storage::rocksdb_storage::PrefixedRocksDbImmediateStorageContext; @@ -224,10 +221,6 @@ use crate::element::helpers::raw_decode; #[cfg(any(feature = "full", feature = "verify"))] pub use crate::error::Error; #[cfg(feature = "full")] -pub use crate::replication::StateSyncInfo; -#[cfg(feature = "full")] -use crate::replication::SubtreesMetadata; -#[cfg(feature = "full")] use crate::util::{root_merk_optional_tx, storage_context_optional_tx}; use crate::Error::MerkError; @@ -251,18 +244,6 @@ pub type TransactionArg<'db, 'a> = Option<&'a Transaction<'db>>; #[cfg(feature = "full")] impl GroveDb { - pub fn create_state_sync_info(&self) -> StateSyncInfo { - let pending_chunks = BTreeSet::new(); - let processed_prefixes = BTreeSet::new(); - StateSyncInfo { - restorer: None, - processed_prefixes, - current_prefix: None, - pending_chunks, - num_processed_chunks: 0, - } - } - /// Opens a given path pub fn open>(path: P) -> Result { let db = RocksDbStorage::default_rocksdb_with_path(path)?; diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index 42943c06..a9e60e51 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -107,6 +107,18 @@ pub fn util_split_global_chunk_id( #[cfg(feature = "full")] impl GroveDb { + pub fn create_state_sync_info(&self) -> StateSyncInfo { + let pending_chunks = BTreeSet::new(); + let processed_prefixes = BTreeSet::new(); + StateSyncInfo { + restorer: None, + processed_prefixes, + current_prefix: None, + pending_chunks, + num_processed_chunks: 0, + } + } + // Returns the discovered subtrees found recursively along with their associated // metadata Params: // tx: Transaction. Function returns the data by opening merks at given tx. @@ -116,7 +128,7 @@ impl GroveDb { &'db self, tx: &'db Transaction, ) -> Result { - let mut subtrees_metadata = crate::SubtreesMetadata::new(); + let mut subtrees_metadata = crate::replication::SubtreesMetadata::new(); let subtrees_root = self.find_subtrees(&SubtreePath::empty(), Some(tx)).value?; for subtree in subtrees_root.into_iter() { diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index 3d523c90..b74d3401 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -1,6 +1,6 @@ use std::collections::VecDeque; use std::path::Path; -use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction, StateSyncInfo}; +use grovedb::{operations::insert::InsertOptions, Element, GroveDb, PathQuery, Query, Transaction, replication::StateSyncInfo}; use grovedb::reference_path::ReferencePathType; use rand::{distributions::Alphanumeric, Rng, }; use grovedb::element::SumValue; From e5adf0d30e7b23f2ca960a789419855d207207c8 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 17:08:22 +0300 Subject: [PATCH 26/30] refactor --- grovedb/src/replication.rs | 92 +++++++++++++++++++------------- tutorials/src/bin/replication.rs | 3 +- 2 files changed, 57 insertions(+), 38 deletions(-) diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index a9e60e51..43cdc110 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -14,7 +14,7 @@ use grovedb_storage::rocksdb_storage::RocksDbStorage; #[rustfmt::skip] use grovedb_storage::rocksdb_storage::storage_context::context_immediate::PrefixedRocksDbImmediateStorageContext; -use crate::{replication, Error, GroveDb, Transaction}; +use crate::{replication, Error, GroveDb, Transaction, TransactionArg}; pub(crate) type SubtreePrefix = [u8; blake3::OUT_LEN]; @@ -126,11 +126,11 @@ impl GroveDb { // of root (as it is now) pub fn get_subtrees_metadata<'db>( &'db self, - tx: &'db Transaction, + tx: TransactionArg, ) -> Result { let mut subtrees_metadata = crate::replication::SubtreesMetadata::new(); - let subtrees_root = self.find_subtrees(&SubtreePath::empty(), Some(tx)).value?; + let subtrees_root = self.find_subtrees(&SubtreePath::empty(), tx).value?; for subtree in subtrees_root.into_iter() { let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; @@ -138,37 +138,57 @@ impl GroveDb { let current_path = SubtreePath::from(path); - let parent_path_opt = current_path.derive_parent(); - if parent_path_opt.is_some() { - let parent_path = parent_path_opt.unwrap().0; - let parent_merk = self - .open_transactional_merk_at_path(parent_path, tx, None) - .value?; - let parent_key = subtree.last().unwrap(); - let (elem_value, elem_value_hash) = parent_merk - .get_value_and_value_hash( - parent_key, - true, - None::<&fn(&[u8]) -> Option>, - ) - .value - .expect("should get value hash") - .expect("value hash should be some"); - - let actual_value_hash = value_hash(&elem_value).unwrap(); - subtrees_metadata.data.insert( - prefix, - (current_path.to_vec(), actual_value_hash, elem_value_hash), - ); - } else { - subtrees_metadata.data.insert( - prefix, - ( - current_path.to_vec(), - CryptoHash::default(), - CryptoHash::default(), - ), - ); + match (current_path.derive_parent(), subtree.last()) { + (Some((parent_path, _)), Some(parent_key)) => match tx { + None => { + let parent_merk = self + .open_non_transactional_merk_at_path(parent_path, None) + .value?; + if let Ok((Some((elem_value, elem_value_hash)))) = parent_merk + .get_value_and_value_hash( + parent_key, + true, + None::<&fn(&[u8]) -> Option>, + ) + .value + { + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert( + prefix, + (current_path.to_vec(), actual_value_hash, elem_value_hash), + ); + } + } + Some(t) => { + let parent_merk = self + .open_transactional_merk_at_path(parent_path, t, None) + .value?; + if let Ok((Some((elem_value, elem_value_hash)))) = parent_merk + .get_value_and_value_hash( + parent_key, + true, + None::<&fn(&[u8]) -> Option>, + ) + .value + { + let actual_value_hash = value_hash(&elem_value).unwrap(); + subtrees_metadata.data.insert( + prefix, + (current_path.to_vec(), actual_value_hash, elem_value_hash), + ); + } + } + }, + _ => { + subtrees_metadata.data.insert( + prefix, + ( + current_path.to_vec(), + CryptoHash::default(), + CryptoHash::default(), + ), + ); + } } } Ok(subtrees_metadata) @@ -203,7 +223,7 @@ impl GroveDb { array.copy_from_slice(chunk_prefix); let chunk_prefix_key: crate::SubtreePrefix = array; - let subtrees_metadata = self.get_subtrees_metadata(tx)?; + let subtrees_metadata = self.get_subtrees_metadata(Some(tx))?; match subtrees_metadata.data.get(&chunk_prefix_key) { Some(path_data) => { @@ -358,7 +378,7 @@ impl GroveDb { } state_sync_info.processed_prefixes.insert(current_prefix); - let subtrees_metadata = self.get_subtrees_metadata(tx)?; + let subtrees_metadata = self.get_subtrees_metadata(Some(tx))?; if let Some(value) = subtrees_metadata.data.get(¤t_prefix) { println!( " path:{:?} done", diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index b74d3401..3d7ea1fa 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -78,8 +78,7 @@ fn main() { println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); println!("\n######### source_subtree_metadata"); - let source_tx = db_source.start_transaction(); - let subtrees_metadata = db_source.get_subtrees_metadata(&source_tx).unwrap(); + let subtrees_metadata = db_source.get_subtrees_metadata(None).unwrap(); println!("{:?}", subtrees_metadata); println!("\n######### db_checkpoint_0 -> db_copy state sync"); From ae3c21d23db7dbb924d1343b49fd35c9c9942212 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 18:36:55 +0300 Subject: [PATCH 27/30] more refactoring --- grovedb/src/replication.rs | 7 +++--- tutorials/src/bin/replication.rs | 38 +++++++++++++++----------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index 43cdc110..ccbb1ac5 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -203,12 +203,11 @@ impl GroveDb { // "0" for right. TODO: Compact CHUNK_ID into bitset for size optimization // as a subtree can be big hence traversal instructions for the deepest chunks // tx: Transaction. Function returns the data by opening merks at given tx. - // TODO: Make this tx optional: None -> Use latest data // Returns the Chunk proof operators for the requested chunk pub fn fetch_chunk<'db>( &'db self, global_chunk_id: &[u8], - tx: &'db Transaction, + tx: TransactionArg, ) -> Result, Error> { let chunk_prefix_length: usize = 32; if global_chunk_id.len() < chunk_prefix_length { @@ -223,7 +222,7 @@ impl GroveDb { array.copy_from_slice(chunk_prefix); let chunk_prefix_key: crate::SubtreePrefix = array; - let subtrees_metadata = self.get_subtrees_metadata(Some(tx))?; + let subtrees_metadata = self.get_subtrees_metadata(tx)?; match subtrees_metadata.data.get(&chunk_prefix_key) { Some(path_data) => { @@ -232,7 +231,7 @@ impl GroveDb { let path: &[&[u8]] = &subtree_path; let merk = self - .open_transactional_merk_at_path(path.into(), tx, None) + .open_non_transactional_merk_at_path(path.into(), None) .value?; if merk.is_empty_tree().unwrap() { diff --git a/tutorials/src/bin/replication.rs b/tutorials/src/bin/replication.rs index 3d7ea1fa..fc9c058c 100644 --- a/tutorials/src/bin/replication.rs +++ b/tutorials/src/bin/replication.rs @@ -70,25 +70,24 @@ fn main() { let db_destination = create_empty_db(path_destination.clone()); println!("\n######### root_hashes:"); - let root_hash_0 = db_source.root_hash(None).unwrap().unwrap(); - println!("root_hash_0: {:?}", hex::encode(root_hash_0)); + let root_hash_source = db_source.root_hash(None).unwrap().unwrap(); + println!("root_hash_source: {:?}", hex::encode(root_hash_source)); let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); - let root_hash_copy = db_destination.root_hash(None).unwrap().unwrap(); - println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); + let root_hash_destination = db_destination.root_hash(None).unwrap().unwrap(); + println!("root_hash_destination: {:?}", hex::encode(root_hash_destination)); - println!("\n######### source_subtree_metadata"); - let subtrees_metadata = db_source.get_subtrees_metadata(None).unwrap(); - println!("{:?}", subtrees_metadata); + println!("\n######### source_subtree_metadata of db_source"); + let subtrees_metadata_source = db_source.get_subtrees_metadata(None).unwrap(); + println!("{:?}", subtrees_metadata_source); - println!("\n######### db_checkpoint_0 -> db_copy state sync"); + println!("\n######### db_checkpoint_0 -> db_destination state sync"); let state_info = db_destination.create_state_sync_info(); - let source_tx = db_source.start_transaction(); - let target_tx = db_destination.start_transaction(); - sync_db_demo(&db_checkpoint_0, &db_destination, state_info, &source_tx, &target_tx).unwrap(); - db_destination.commit_transaction(target_tx).unwrap().expect("expected to commit transaction"); + let tx = db_destination.start_transaction(); + sync_db_demo(&db_checkpoint_0, &db_destination, state_info, &tx).unwrap(); + db_destination.commit_transaction(tx).unwrap().expect("expected to commit transaction"); - println!("\n######### verify db_copy"); + println!("\n######### verify db_destination"); let incorrect_hashes = db_destination.verify_grovedb(None).unwrap(); if incorrect_hashes.len() > 0 { println!("DB verification failed!"); @@ -98,18 +97,18 @@ fn main() { } println!("\n######### root_hashes:"); - let root_hash_0 = db_source.root_hash(None).unwrap().unwrap(); - println!("root_hash_0: {:?}", hex::encode(root_hash_0)); + let root_hash_source = db_source.root_hash(None).unwrap().unwrap(); + println!("root_hash_source: {:?}", hex::encode(root_hash_source)); let root_hash_checkpoint_0 = db_checkpoint_0.root_hash(None).unwrap().unwrap(); println!("root_hash_checkpoint_0: {:?}", hex::encode(root_hash_checkpoint_0)); - let root_hash_copy = db_destination.root_hash(None).unwrap().unwrap(); - println!("root_hash_copy: {:?}", hex::encode(root_hash_copy)); + let root_hash_destination = db_destination.root_hash(None).unwrap().unwrap(); + println!("root_hash_destination: {:?}", hex::encode(root_hash_destination)); let query_path = &[MAIN_ΚΕΥ, KEY_INT_0]; let query_key = (20487u32).to_be_bytes().to_vec(); println!("\n######## Query on db_checkpoint_0:"); query_db(&db_checkpoint_0, query_path, query_key.clone()); - println!("\n######## Query on db_copy:"); + println!("\n######## Query on db_destination:"); query_db(&db_destination, query_path, query_key.clone()); return; @@ -224,7 +223,6 @@ fn sync_db_demo( source_db: &GroveDb, target_db: &GroveDb, state_sync_info: StateSyncInfo, - source_tx: &Transaction, target_tx: &Transaction, ) -> Result<(), grovedb::Error> { let app_hash = source_db.root_hash(None).value.unwrap(); @@ -235,7 +233,7 @@ fn sync_db_demo( chunk_queue.extend(chunk_ids); while let Some(chunk_id) = chunk_queue.pop_front() { - let ops = source_db.fetch_chunk(chunk_id.as_slice(), source_tx)?; + let ops = source_db.fetch_chunk(chunk_id.as_slice(), None)?; let (more_chunks, new_state_sync_info) = target_db.apply_chunk(state_sync_info, (chunk_id.as_slice(), ops), target_tx)?; state_sync_info = new_state_sync_info; chunk_queue.extend(more_chunks); From 0e6607599879b8d0c4c492bb236f2aed16efd148 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 18:43:41 +0300 Subject: [PATCH 28/30] more refactoring --- grovedb/src/replication.rs | 49 ++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index ccbb1ac5..da7cbe87 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -283,15 +283,18 @@ impl GroveDb { && state_sync_info.processed_prefixes.is_empty() { let root_prefix = [0u8; 32]; - let merk = self - .open_merk_for_replication(SubtreePath::empty(), tx) - .unwrap(); - let restorer = Restorer::new(merk, app_hash, None); - state_sync_info.restorer = Some(restorer); - state_sync_info.current_prefix = Some(root_prefix); - state_sync_info.pending_chunks.insert(root_prefix.to_vec()); - - res.push(root_prefix.to_vec()); + if let Ok(merk) = self + .open_merk_for_replication(SubtreePath::empty(), tx) { + let restorer = Restorer::new(merk, app_hash, None); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(root_prefix); + state_sync_info.pending_chunks.insert(root_prefix.to_vec()); + + res.push(root_prefix.to_vec()); + } + else { + return Err(Error::InternalError("Unable to open merk for replication")); + } } else { return Err(Error::InternalError("Invalid internal state sync info")); } @@ -394,18 +397,22 @@ impl GroveDb { current_path.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; - let merk = self.open_merk_for_replication(path.into(), tx).unwrap(); - let restorer = - Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); - state_sync_info.restorer = Some(restorer); - state_sync_info.current_prefix = Some(*prefix); - state_sync_info.num_processed_chunks = 0; - - let root_chunk_prefix = prefix.to_vec(); - state_sync_info - .pending_chunks - .insert(root_chunk_prefix.clone()); - res.push(root_chunk_prefix); + if let Ok(merk) = self.open_merk_for_replication(path.into(), tx) { + let restorer = + Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + state_sync_info.restorer = Some(restorer); + state_sync_info.current_prefix = Some(*prefix); + state_sync_info.num_processed_chunks = 0; + + let root_chunk_prefix = prefix.to_vec(); + state_sync_info + .pending_chunks + .insert(root_chunk_prefix.clone()); + res.push(root_chunk_prefix); + } + else { + return Err(Error::InternalError("Unable to open merk for replication")); + } break; } } From 93e70bafc3a03dd2d94d951acc9f12f9598b5b33 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Tue, 30 Apr 2024 18:44:21 +0300 Subject: [PATCH 29/30] fmt --- grovedb/src/replication.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index da7cbe87..6be2d417 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -283,16 +283,14 @@ impl GroveDb { && state_sync_info.processed_prefixes.is_empty() { let root_prefix = [0u8; 32]; - if let Ok(merk) = self - .open_merk_for_replication(SubtreePath::empty(), tx) { + if let Ok(merk) = self.open_merk_for_replication(SubtreePath::empty(), tx) { let restorer = Restorer::new(merk, app_hash, None); state_sync_info.restorer = Some(restorer); state_sync_info.current_prefix = Some(root_prefix); state_sync_info.pending_chunks.insert(root_prefix.to_vec()); res.push(root_prefix.to_vec()); - } - else { + } else { return Err(Error::InternalError("Unable to open merk for replication")); } } else { @@ -398,8 +396,11 @@ impl GroveDb { let path: &[&[u8]] = &subtree_path; if let Ok(merk) = self.open_merk_for_replication(path.into(), tx) { - let restorer = - Restorer::new(merk, *s_elem_value_hash, Some(*s_actual_value_hash)); + let restorer = Restorer::new( + merk, + *s_elem_value_hash, + Some(*s_actual_value_hash), + ); state_sync_info.restorer = Some(restorer); state_sync_info.current_prefix = Some(*prefix); state_sync_info.num_processed_chunks = 0; @@ -409,9 +410,10 @@ impl GroveDb { .pending_chunks .insert(root_chunk_prefix.clone()); res.push(root_chunk_prefix); - } - else { - return Err(Error::InternalError("Unable to open merk for replication")); + } else { + return Err(Error::InternalError( + "Unable to open merk for replication", + )); } break; } From b76af609ed0518ac275bc876a74193d92aaf2b17 Mon Sep 17 00:00:00 2001 From: Odysseas Gabrielides Date: Wed, 1 May 2024 10:44:09 +0300 Subject: [PATCH 30/30] suggestions --- grovedb/src/replication.rs | 96 +++++++++++++++++++++++++++----------- merk/src/merk/chunks.rs | 4 +- merk/src/merk/restore.rs | 2 +- 3 files changed, 70 insertions(+), 32 deletions(-) diff --git a/grovedb/src/replication.rs b/grovedb/src/replication.rs index 6be2d417..0484cfa1 100644 --- a/grovedb/src/replication.rs +++ b/grovedb/src/replication.rs @@ -1,6 +1,7 @@ use std::{ collections::{BTreeMap, BTreeSet}, fmt, + str::Utf8Error, }; use grovedb_merk::{ @@ -71,12 +72,16 @@ impl fmt::Debug for SubtreesMetadata { } } -// Converts a path into a human-readable string (for debuting) +// Converts a path into a human-readable string (for debugging) pub fn util_path_to_string(path: &[Vec]) -> Vec { let mut subtree_path_str: Vec = vec![]; for subtree in path { - let string = std::str::from_utf8(subtree).unwrap(); - subtree_path_str.push(string.parse().unwrap()); + let string = std::str::from_utf8(subtree).expect("should be able to convert path"); + subtree_path_str.push( + string + .parse() + .expect("should be able to parse path to string"), + ); } subtree_path_str } @@ -124,10 +129,7 @@ impl GroveDb { // tx: Transaction. Function returns the data by opening merks at given tx. // TODO: Add a SubTreePath as param and start searching from that path instead // of root (as it is now) - pub fn get_subtrees_metadata<'db>( - &'db self, - tx: TransactionArg, - ) -> Result { + pub fn get_subtrees_metadata(&self, tx: TransactionArg) -> Result { let mut subtrees_metadata = crate::replication::SubtreesMetadata::new(); let subtrees_root = self.find_subtrees(&SubtreePath::empty(), tx).value?; @@ -144,7 +146,7 @@ impl GroveDb { let parent_merk = self .open_non_transactional_merk_at_path(parent_path, None) .value?; - if let Ok((Some((elem_value, elem_value_hash)))) = parent_merk + if let Ok(Some((elem_value, elem_value_hash))) = parent_merk .get_value_and_value_hash( parent_key, true, @@ -163,7 +165,7 @@ impl GroveDb { let parent_merk = self .open_transactional_merk_at_path(parent_path, t, None) .value?; - if let Ok((Some((elem_value, elem_value_hash)))) = parent_merk + if let Ok(Some((elem_value, elem_value_hash))) = parent_merk .get_value_and_value_hash( parent_key, true, @@ -204,8 +206,8 @@ impl GroveDb { // as a subtree can be big hence traversal instructions for the deepest chunks // tx: Transaction. Function returns the data by opening merks at given tx. // Returns the Chunk proof operators for the requested chunk - pub fn fetch_chunk<'db>( - &'db self, + pub fn fetch_chunk( + &self, global_chunk_id: &[u8], tx: TransactionArg, ) -> Result, Error> { @@ -230,29 +232,67 @@ impl GroveDb { let subtree_path: Vec<&[u8]> = subtree.iter().map(|vec| vec.as_slice()).collect(); let path: &[&[u8]] = &subtree_path; - let merk = self - .open_non_transactional_merk_at_path(path.into(), None) - .value?; + match tx { + None => { + let merk = self + .open_non_transactional_merk_at_path(path.into(), None) + .value?; + + if merk.is_empty_tree().unwrap() { + return Ok(vec![]); + } - if merk.is_empty_tree().unwrap() { - return Ok(vec![]); - } + let chunk_producer_res = ChunkProducer::new(&merk); + match chunk_producer_res { + Ok(mut chunk_producer) => match std::str::from_utf8(chunk_id) { + Ok(chunk_id_str) => { + let chunk_res = chunk_producer.chunk(chunk_id_str); + match chunk_res { + Ok((chunk, _)) => Ok(chunk), + Err(_) => Err(Error::CorruptedData( + "Unable to create to load chunk".to_string(), + )), + } + } + Err(_) => Err(Error::CorruptedData( + "Unable to process chunk id".to_string(), + )), + }, + Err(_) => Err(Error::CorruptedData( + "Unable to create Chunk producer".to_string(), + )), + } + } + Some(t) => { + let merk = self + .open_transactional_merk_at_path(path.into(), &t, None) + .value?; + + if merk.is_empty_tree().unwrap() { + return Ok(vec![]); + } - let chunk_producer_res = ChunkProducer::new(&merk); - match chunk_producer_res { - Ok(mut chunk_producer) => { - let chunk_res = chunk_producer - .chunk(String::from_utf8(chunk_id.to_vec()).unwrap().as_str()); - match chunk_res { - Ok((chunk, _)) => Ok(chunk), + let chunk_producer_res = ChunkProducer::new(&merk); + match chunk_producer_res { + Ok(mut chunk_producer) => match std::str::from_utf8(chunk_id) { + Ok(chunk_id_str) => { + let chunk_res = chunk_producer.chunk(chunk_id_str); + match chunk_res { + Ok((chunk, _)) => Ok(chunk), + Err(_) => Err(Error::CorruptedData( + "Unable to create to load chunk".to_string(), + )), + } + } + Err(_) => Err(Error::CorruptedData( + "Unable to process chunk id".to_string(), + )), + }, Err(_) => Err(Error::CorruptedData( - "Unable to create to load chunk".to_string(), + "Unable to create Chunk producer".to_string(), )), } } - Err(_) => Err(Error::CorruptedData( - "Unable to create Chunk producer".to_string(), - )), } } None => Err(Error::CorruptedData("Prefix not found".to_string())), diff --git a/merk/src/merk/chunks.rs b/merk/src/merk/chunks.rs index be2ecffc..8f840f91 100644 --- a/merk/src/merk/chunks.rs +++ b/merk/src/merk/chunks.rs @@ -282,15 +282,13 @@ where index: usize, limit: Option, ) -> Result { - let mut chunk_byte_length = 0; - let max_chunk_index = number_of_chunks(self.height); let mut chunk_index = index; // we first get the chunk at the given index // TODO: use the returned chunk index rather than tracking let (chunk_ops, _) = self.chunk_with_index(chunk_index)?; - chunk_byte_length = chunk_ops.encoding_length().map_err(|_e| { + let mut chunk_byte_length = chunk_ops.encoding_length().map_err(|_e| { Error::ChunkingError(ChunkError::InternalError("can't get encoding length")) })?; chunk_index += 1; diff --git a/merk/src/merk/restore.rs b/merk/src/merk/restore.rs index 44c3f0c6..e2439f5c 100644 --- a/merk/src/merk/restore.rs +++ b/merk/src/merk/restore.rs @@ -72,7 +72,7 @@ impl<'db, S: StorageContext<'db>> Restorer { parent_key_value_hash: Option, ) -> Self { let mut chunk_id_to_root_hash = BTreeMap::new(); - chunk_id_to_root_hash.insert(traversal_instruction_as_string(&vec![]), expected_root_hash); + chunk_id_to_root_hash.insert(traversal_instruction_as_string(&[]), expected_root_hash); Self { merk, chunk_id_to_root_hash,