From b4c9fb92c96d9dd95797dbd7710cdf74717bc0fb Mon Sep 17 00:00:00 2001 From: Theodore Bugnet Date: Mon, 2 Sep 2024 14:03:27 +0100 Subject: [PATCH] Narrowing: edge case fixes, documentation, cleanup - reverted the tendermint version bump - fixed edge cases on trees of total size 0 and 1 - fixed incorrect implementation in nmt_proof.rs (now just wraps the simple proof.rs) - fixed missing test cases for the above two issues - cleaned up inconsistent parameter naming to be more clear - cleaned up large argument sets in a few places to make data flow easier to follow - documented all assertions with a description of why they should never break - added documentation to the functions and descriptions of the narrowing logic --- Cargo.toml | 2 +- src/lib.rs | 129 ++++++++++++++----- src/nmt_proof.rs | 22 ++-- src/simple_merkle/proof.rs | 20 ++- src/simple_merkle/tree.rs | 256 ++++++++++++++++++++++++++----------- 5 files changed, 300 insertions(+), 129 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4dd12fc..fabf37a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ nmt-rs = { path = ".", features = ["borsh", "serde"] } borsh = { version = "1" } serde_json = "1.0.96" postcard = { version = "1.0.4", features = ["use-std"] } -tendermint = { version = "0.39.1" } +tendermint = { version = "0.35.0" } [features] default = ["std"] diff --git a/src/lib.rs b/src/lib.rs index 71f76b5..2a9f479 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -422,6 +422,7 @@ pub enum RangeProofType { #[cfg(test)] mod tests { use crate::maybestd::vec::Vec; + use crate::simple_merkle::error::RangeProofError; use crate::NamespaceMerkleHasher; use crate::{ namespaced_hash::{NamespaceId, NamespacedSha2Hasher}, @@ -458,6 +459,20 @@ mod tests { tree } + fn tree_from_one_namespace( + leaves: u64, + namespace: u64, + ) -> DefaultNmt { + let mut tree = DefaultNmt::new(); + let namespace = ns_id_from_u64(namespace); + for i in 0..leaves { + let data = format!("leaf_{i}"); + tree.push_leaf(data.as_bytes(), namespace) + .expect("Failed to push the leaf"); + } + tree + } + /// Builds a tree with N leaves fn tree_with_n_leaves(n: usize) -> DefaultNmt { tree_from_namespace_ids((0..n as u64).collect::>()) @@ -587,6 +602,63 @@ mod tests { } } + fn test_range_proof_narrowing_within_namespace(n: usize) { + let ns_id = 4; + let mut tree = tree_from_one_namespace::(n as u64, ns_id); // since there's a single namespace, the actual ID shouldn't matter + let root = tree.root(); + for i in 1..=n { + for j in 0..=i { + let proof_nmt = NamespaceProof::PresenceProof { + proof: tree.build_range_proof(j..i), + ignore_max_ns: tree.ignore_max_ns, + }; + for k in (j + 1)..=i { + for l in j..=k { + let left_leaf_datas: Vec<_> = + tree.leaves()[j..l].iter().map(|l| l.data()).collect(); + let right_leaf_datas: Vec<_> = + tree.leaves()[k..i].iter().map(|l| l.data()).collect(); + let narrowed_proof_nmt = proof_nmt.narrow_range( + &left_leaf_datas, + &right_leaf_datas, + ns_id_from_u64(ns_id), + ); + if k == l { + // Cannot prove the empty range! + assert!(narrowed_proof_nmt.is_err()); + assert_eq!( + narrowed_proof_nmt.unwrap_err(), + RangeProofError::NoLeavesProvided + ); + continue; + } else { + assert!(narrowed_proof_nmt.is_ok()); + } + let narrowed_proof = narrowed_proof_nmt.unwrap(); + let new_leaves: Vec<_> = tree.leaves()[l..k] + .iter() + .map(|l| l.hash().clone()) + .collect(); + tree.check_range_proof(&root, &new_leaves, narrowed_proof.siblings(), l) + .unwrap(); + } + } + } + } + test_min_and_max_ns_against(&mut tree) + } + + #[test] + fn test_range_proof_narrowing_nmt() { + for x in 0..20 { + test_range_proof_narrowing_within_namespace::<8>(x); + test_range_proof_narrowing_within_namespace::<17>(x); + test_range_proof_narrowing_within_namespace::<24>(x); + test_range_proof_narrowing_within_namespace::(x); + test_range_proof_narrowing_within_namespace::<32>(x); + } + } + /// Builds a tree with n leaves, and then creates and checks proofs of all valid /// ranges, and attempts to narrow every proof and re-check it for the narrowed range fn test_range_proof_narrowing_with_n_leaves(n: usize) { @@ -595,12 +667,8 @@ mod tests { for i in 1..=n { for j in 0..=i { let proof = tree.build_range_proof(j..i); - let leaf_hashes: Vec<_> = tree.leaves()[j..i] - .iter() - .map(|l| l.hash().clone()) - .collect(); - for k in (j + 1)..i { - for l in j..k { + for k in (j + 1)..=i { + for l in j..=k { let left_hashes: Vec<_> = tree.leaves()[j..l] .iter() .map(|l| l.hash().clone()) @@ -609,47 +677,38 @@ mod tests { .iter() .map(|l| l.hash().clone()) .collect(); - let narrowed_proof = proof - .narrow_range_with_hasher( - &left_hashes, - &right_hashes, - NamespacedSha2Hasher::with_ignore_max_ns(tree.ignore_max_ns), - ) - .unwrap(); + let narrowed_proof_simple = proof.narrow_range_with_hasher( + &left_hashes, + &right_hashes, + NamespacedSha2Hasher::with_ignore_max_ns(tree.ignore_max_ns), + ); + if k == l { + // Cannot prove the empty range! + assert!(narrowed_proof_simple.is_err()); + assert_eq!( + narrowed_proof_simple.unwrap_err(), + RangeProofError::NoLeavesProvided + ); + continue; + } else { + assert!(narrowed_proof_simple.is_ok()); + } + let narrowed_proof = narrowed_proof_simple.unwrap(); let new_leaves: Vec<_> = tree.leaves()[l..k] .iter() .map(|l| l.hash().clone()) .collect(); - let res = tree.check_range_proof( - &root, - &new_leaves, - narrowed_proof.siblings(), - l, - ); - if l != k { - assert!(res.is_ok()); - assert_eq!(res.unwrap(), RangeProofType::Complete) - } else { - // Cannot prove the empty range! - assert!(res.is_err()) - } + tree.check_range_proof(&root, &new_leaves, narrowed_proof.siblings(), l) + .unwrap(); } } - let res = tree.check_range_proof(&root, &leaf_hashes, proof.siblings(), j); - if i != j { - assert!(res.is_ok()); - assert_eq!(res.unwrap(), RangeProofType::Complete) - } else { - // Cannot prove the empty range! - assert!(res.is_err()) - } } } test_min_and_max_ns_against(&mut tree) } #[test] - fn test_range_proof_narrowing() { + fn test_range_proof_narrowing_simple() { for x in 0..20 { test_range_proof_narrowing_with_n_leaves::<8>(x); test_range_proof_narrowing_with_n_leaves::<17>(x); diff --git a/src/nmt_proof.rs b/src/nmt_proof.rs index f46fc57..dcd9317 100644 --- a/src/nmt_proof.rs +++ b/src/nmt_proof.rs @@ -100,6 +100,13 @@ where /// Narrows the proof range: uses an existing proof to create /// a new proof for a subrange of the original proof's range + /// + /// # Arguments + /// - left_extra_raw_leaves: The data for the leaves that will narrow the range from the left + /// side (i.e. all the leaves from the left edge of the currently proven range, to the left + /// edge of the new desired shrunk range) + /// - right_extra_raw_leaves: Analogously, data for all the leaves between the right edge of + /// the desired shrunken range, and the right edge of the current proof's range pub fn narrow_range>( &self, left_extra_raw_leaves: &[L], @@ -112,11 +119,6 @@ where )); } - let new_leaf_len = left_extra_raw_leaves.len() + right_extra_raw_leaves.len(); - if new_leaf_len >= self.range_len() { - return Err(RangeProofError::WrongAmountOfLeavesProvided); - } - let leaves_to_hashes = |l: &[L]| -> Vec> { l.iter() .map(|data| { @@ -128,16 +130,10 @@ where let left_extra_hashes = leaves_to_hashes(left_extra_raw_leaves); let right_extra_hashes = leaves_to_hashes(right_extra_raw_leaves); - let mut tree = NamespaceMerkleTree::::with_hasher( - M::with_ignore_max_ns(self.ignores_max_ns()), - ); - - let proof = tree.inner.narrow_range_proof( + let proof = self.merkle_proof().narrow_range_with_hasher( &left_extra_hashes, - self.start_idx() as usize..(self.range_len() - new_leaf_len), &right_extra_hashes, - &mut self.siblings(), - self.start_idx() as usize, + M::with_ignore_max_ns(self.ignores_max_ns()), )?; Ok(Self::PresenceProof { diff --git a/src/simple_merkle/proof.rs b/src/simple_merkle/proof.rs index 6e8a232..137c048 100644 --- a/src/simple_merkle/proof.rs +++ b/src/simple_merkle/proof.rs @@ -1,4 +1,4 @@ -use core::ops::Range; +use core::{cmp::Ordering, ops::Range}; use super::{ db::NoopDb, @@ -84,6 +84,13 @@ where /// Narrows the proof range: uses an existing proof to create /// a new proof for a subrange of the original proof's range + /// + /// # Arguments + /// - left_extra_leaves: The hashes of the leaves that will narrow the range from the left + /// side (i.e. all the leaves from the left edge of the currently proven range, to the left + /// edge of the new desired shrunk range) + /// - right_extra_leaves: Analogously, hashes of all the leaves between the right edge of + /// the desired shrunken range, and the right edge of the current proof's range pub fn narrow_range_with_hasher( &self, left_extra_leaves: &[M::Output], @@ -94,9 +101,16 @@ where .len() .checked_add(right_extra_leaves.len()) .ok_or(RangeProofError::TreeTooLarge)?; - if new_leaf_len >= self.range_len() { - return Err(RangeProofError::WrongAmountOfLeavesProvided); + match new_leaf_len.cmp(&self.range_len()) { + Ordering::Equal => { + // We cannot prove the empty range! + return Err(RangeProofError::NoLeavesProvided); + } + Ordering::Greater => return Err(RangeProofError::WrongAmountOfLeavesProvided), + Ordering::Less => { /* Ok! */ } } + + // Indices relative to the leaves of the entire tree let new_start_idx = (self.start_idx() as usize) .checked_add(left_extra_leaves.len()) .ok_or(RangeProofError::TreeTooLarge)?; diff --git a/src/simple_merkle/tree.rs b/src/simple_merkle/tree.rs index c13b7ba..8aea55f 100644 --- a/src/simple_merkle/tree.rs +++ b/src/simple_merkle/tree.rs @@ -32,6 +32,20 @@ impl TakeFirst for [T] { type BoxedVisitor = Box::Output)>; +/// Helper data structure for immutable data used during proof narrowing recursion. +/// All indices are relative to the leaves of the entire tree. +struct ProofNarrowingParams<'a, M: MerkleHash> { + /// All the leaves inside the old proof range, but to the left of the new (desired) proof range + left_extra_leaves: &'a [M::Output], + /// The start and end indices of the final, narrower proven range. + narrowed_leaf_range: Range, + /// All the leaves inside the old proof range, but to the right of the new (desired) proof range + right_extra_leaves: &'a [M::Output], + /// The starting index (w.r.t. the tree's leaves) of the old proof; equivalently, the index of + /// the first leaf in left_extra_leaves + leaves_start_idx: usize, +} + /// Implements an RFC 6962 compatible merkle tree over an in-memory data store which maps preimages to hashes. pub struct MerkleTree where @@ -364,79 +378,89 @@ where Ok(self.hasher.hash_nodes(&left, &right)) } - #[allow(clippy::too_many_arguments)] + /// Helper for the proof narrowing operation. + /// + /// # Arguments: + /// - params: the immutable data used during recursion + /// - working_range: The range of leaf indices, relative to the entire tree, being currently + /// considered. Recursion starts with Range(0..tree_size). + /// outside the desired new narrower range. + /// the right edge of the old range. + /// - current_proof: A slice containing the proof of the current, wide range. The slice is + /// mutable as the recursion consumes nodes from it and copies them to the output proof. + /// - out: will contain the new proof after recursion finishes fn narrow_range_proof_inner( &self, + params: &ProofNarrowingParams, working_range: Range, - left_extra_leaves: &[M::Output], - new_leaf_range: &Range, - right_extra_leaves: &[M::Output], current_proof: &mut &[M::Output], - leaves_start_idx: usize, out: &mut Vec, ) -> Result<(), RangeProofError> { - assert!(working_range.len() > 1); // sanity check + // Sanity check. This will always be true because: + // - At the top level, the working_range is the tree size, and we handle sizes 0 and 1 as + // special cases + // - When recursing, working_range of length 1 is a base case (we just return the leaf), + // so we will never recurse on it + assert!(working_range.len() > 1); + let split_point = next_smaller_po2(working_range.len()) + working_range.start; // If the left subtree doesn't overlap with the new leaf, get its root and add it to the proof - if new_leaf_range.start >= (split_point) { + if params.narrowed_leaf_range.start >= (split_point) { let sibling = self.partial_tree_subroot_inner( working_range.start..split_point, - left_extra_leaves, current_proof, - leaves_start_idx, + params.left_extra_leaves, + params.leaves_start_idx, )?; out.push(sibling.clone()); } else { - let subtrie_size = split_point - working_range.start; - assert!(subtrie_size > 0); // sanity check - if subtrie_size == 1 { - // If it's a leaf, ensure we have it, and do nothing + let subtree_size = split_point - working_range.start; + assert!(subtree_size > 0); // sanity check: since working_range > 1, each sub-tree must be >= 1 + if subtree_size == 1 { + // If it's a leaf, do nothing let index = working_range.start; - assert!(new_leaf_range.contains(&index)); + // Sanity check: if this fails, there's a bug in calculating the range limits and + // indices somewhere + assert!(params.narrowed_leaf_range.contains(&index)); } else { // Else, recurse self.narrow_range_proof_inner( + params, working_range.start..split_point, - left_extra_leaves, - new_leaf_range, - right_extra_leaves, current_proof, - leaves_start_idx, out, )?; } } // If the right subtree doesn't overlap with the new leaf, get its root and add it to the proof - if new_leaf_range.end <= (split_point) { - let right_leaves_start_idx = leaves_start_idx - .checked_add(left_extra_leaves.len()) - .and_then(|i| i.checked_add(new_leaf_range.len())) + if params.narrowed_leaf_range.end <= (split_point) { + let right_leaves_start_idx = params + .leaves_start_idx + .checked_add(params.left_extra_leaves.len()) + .and_then(|i| i.checked_add(params.narrowed_leaf_range.len())) .ok_or(RangeProofError::TreeTooLarge)?; let sibling = self.partial_tree_subroot_inner( split_point..working_range.end, - right_extra_leaves, current_proof, + params.right_extra_leaves, right_leaves_start_idx, )?; out.push(sibling.clone()); } else { - let subtrie_size = working_range.end - split_point; - assert!(subtrie_size > 0); // sanity check - if subtrie_size == 1 { - // If it's a leaf, ensure we have it, and do nothing + let subtree_size = working_range.end - split_point; + assert!(subtree_size > 0); // sanity check - see left subtree explanation + if subtree_size == 1 { + // If it's a leaf, do nothing let index = split_point; - assert!(new_leaf_range.contains(&index)); + assert!(params.narrowed_leaf_range.contains(&index)); // sanity check - see left subtree explanation } else { // Else, recurse self.narrow_range_proof_inner( + params, split_point..working_range.end, - left_extra_leaves, - new_leaf_range, - right_extra_leaves, current_proof, - leaves_start_idx, out, )?; } @@ -447,69 +471,98 @@ where /// To be used during the narrowing operation /// Calculates a new subroot to be part of the narrowed proof, - /// in an area covered by the old proof and new leaves + /// in an area covered by the old proof and new leaves. + /// + /// All indices are relative to the entire tree. + /// + /// # Arguments + /// - subtree_range: The indices (in the tree) of the leaves of the subtree that we're + /// calculating the subroot of. + /// - extra_leaves: One of the two sets of hashes supplied by the user to narrow down the + /// proof range. Because the two sets are discontiguous, one on each side of the desired new + /// narrower range, only one set at a time is relevant here. + /// - leaves_start_idx: The start of the extra_leaves (relative to the tree). When calculating + /// subroots to the left of the narrowed range (i.e. extra_leaves == left_extra_leaves), this will + /// simply be the (original) proof's start_idx; when calculating subroots to the right, this will + /// be offset correspondingly (i.e. original_start_idx + left_extra_leaves.len() + desired_range_size.len()). fn partial_tree_subroot_inner( &self, - subtrie_range: Range, - extra_leaves: &[M::Output], + subtree_range: Range, current_proof: &mut &[M::Output], + extra_leaves: &[M::Output], leaves_start_idx: usize, ) -> Result { - fn local_subroot_from_leaves( - leaf_hashes: &[M::Output], + // Helper that essentially replicates `compute_root`, but with no side-effects and with + // only a partial leaf set + struct SubrootParams<'a, M: MerkleHash> { + extra_leaves: &'a [M::Output], leaves_start_idx: usize, + hasher: &'a M, + } + fn local_subroot_from_leaves( range: Range, - hasher: &M, + params: &SubrootParams, ) -> Result { if range.len() == 1 { - return leaf_hashes - .get(range.start - leaves_start_idx) + return params + .extra_leaves + .get(range.start - params.leaves_start_idx) .ok_or(RangeProofError::MissingLeaf) .cloned(); } else { let split_point = next_smaller_po2(range.len()) + range.start; - let left = local_subroot_from_leaves( - leaf_hashes, - leaves_start_idx, - range.start..split_point, - hasher, - )?; - let right = local_subroot_from_leaves( - leaf_hashes, - leaves_start_idx, - split_point..range.end, - hasher, - )?; - Ok(hasher.hash_nodes(&left, &right)) + let left = local_subroot_from_leaves(range.start..split_point, params)?; + let right = local_subroot_from_leaves(split_point..range.end, params)?; + Ok(params.hasher.hash_nodes(&left, &right)) } } - // We are operating on a full subtree. So the base cases are: + // We are operating on a full subtree. So the base cases are (where _ is an unknown leaf, + // and # is a leaf included in extra_leaves): + // // [####] - the added leaves are covering the entire range; use them to calculate the subroot - // [____] - there are no added leaves in the range; there is an existing proof node for this subtree + // [____] - there are no added leaves in the range; there is an existing proof node for this entire subtree // In all other cases, we split as normal and recurse on both subtrees. + // + // For example: + // [___#] - We recurse on the two sub-trees [__] and [_#]. The left one will correspond to + // a single proof node hashing both leaves. On the right one, we recurse again + // into [_] and [#]. The left one is a single leaf and must also have been included in the + // proof; the right one was part of the old proved range, and now supplied as part of + // extra_leaves. Now we can hash these two together, and then hash it with the known parent of + // the unknown left two nodes to obtain the root for the 4-wide subtree. let leaves_end_idx = leaves_start_idx + extra_leaves.len(); - if leaves_start_idx <= subtrie_range.start && leaves_end_idx >= subtrie_range.end { - local_subroot_from_leaves(extra_leaves, leaves_start_idx, subtrie_range, &self.hasher) - } else if leaves_start_idx >= subtrie_range.end || leaves_end_idx <= subtrie_range.start { + if leaves_start_idx <= subtree_range.start && leaves_end_idx >= subtree_range.end { + local_subroot_from_leaves( + subtree_range, + &SubrootParams { + extra_leaves, + leaves_start_idx, + hasher: &self.hasher, + }, + ) + } else if leaves_start_idx >= subtree_range.end || leaves_end_idx <= subtree_range.start { return current_proof .slice_take_first() .ok_or(RangeProofError::MissingProofNode) .cloned(); } else { - assert!(subtrie_range.len() > 1); // sanity check - let split_point = next_smaller_po2(subtrie_range.len()) + subtrie_range.start; + // Sanity check. Both in narrow_range_proof_inner and here, we never recurse on ranges + // < 2, as those are base cases (we return the leaves directly). + assert!(subtree_range.len() > 1); + + let split_point = next_smaller_po2(subtree_range.len()) + subtree_range.start; let left = self.partial_tree_subroot_inner( - subtrie_range.start..split_point, - extra_leaves, + subtree_range.start..split_point, current_proof, + extra_leaves, leaves_start_idx, )?; let right = self.partial_tree_subroot_inner( - split_point..subtrie_range.end, - extra_leaves, + split_point..subtree_range.end, current_proof, + extra_leaves, leaves_start_idx, )?; return Ok(self.hasher.hash_nodes(&left, &right)); @@ -603,11 +656,39 @@ where } /// Narrows the proof range: uses an existing proof to create - /// a new proof for a subrange of the original proof's range + /// a new proof for a subrange of the original proof's range. + /// + /// Effectively, we have two ranges of leaves provided, which can make the range narrower from + /// the left or the right respectively (alongside the original proof). The high level logic of + /// building a proof out of that is very similar to the normal build_range_proof logic, with + /// two exceptions: we don't have the root (or most inner nodes), so we recurse based on the + /// leaves and calculate the intermediate hashes we need as we go; and we don't have all the + /// leaves either, so the partial_tree_subroot_inner function calculates inner node roots using + /// information from both the original proof and the leaves we do have. + /// + /// Example: consider the following merkle tree with eight leaves: + /// ```ascii + /// root + /// / \ + /// A B + /// / \ / \ + /// C D E F + /// / \ / \ / \ / \ + /// G H I J K L M N + /// + /// ``` + /// A proof of [H, I, J, K] will contain nodes [G, L, F]. If we want to turn that into a proof + /// of [J], that would need nodes [I, C, B]. + /// We recursively subdivide the total leaf range to find the subtrees that don't overlap the + /// final desired range, just as in the normal build_range_proof - in this case, [G, H], [I], + /// and [K, L, M, N]. We can then combine the information from the proof and the {left|right}_extra_leaves + /// to calculate the subroots of each of those trees - for example, B = hash(E | F), where F is + /// from the original proof, and E is calculated using K (from right_extra_leaves) and L (from + /// the original proof). Thus we arrive at the new proof for the narrower range. pub fn narrow_range_proof( &mut self, left_extra_leaves: &[M::Output], - new_leaf_range: Range, + narrowed_leaf_range: Range, right_extra_leaves: &[M::Output], current_proof: &mut &[M::Output], leaves_start_idx: usize, @@ -620,26 +701,47 @@ where let current_leaf_size = left_extra_leaves .len() - .checked_add(new_leaf_range.len()) + .checked_add(narrowed_leaf_range.len()) .ok_or(RangeProofError::TreeTooLarge)? .checked_add(right_extra_leaves.len()) .ok_or(RangeProofError::TreeTooLarge)?; let tree_size = compute_tree_size(num_right_siblings, leaves_start_idx + current_leaf_size - 1)?; let mut proof = Vec::new(); - self.narrow_range_proof_inner( - 0..tree_size, - left_extra_leaves, - &new_leaf_range, - right_extra_leaves, - current_proof, - leaves_start_idx, - &mut proof, - )?; + match tree_size { + 0 => { + if !(current_proof.is_empty() + && left_extra_leaves.is_empty() + && right_extra_leaves.is_empty()) + { + return Err(RangeProofError::NoLeavesProvided); + } + } + 1 => { + // For trees of size 1, the root is the only possible proof. An empty proof + // is also valid (as the root is provided anyway when verifying). + // As these are the only possible options and they are both valid, + // there is nothing to be done when narrowing. + proof = current_proof.to_vec(); + } + _ => { + self.narrow_range_proof_inner( + &ProofNarrowingParams { + left_extra_leaves, + narrowed_leaf_range: narrowed_leaf_range.clone(), + right_extra_leaves, + leaves_start_idx, + }, + 0..tree_size, + current_proof, + &mut proof, + )?; + } + }; Ok(Proof { siblings: proof, // TODO: is it really safe to convert usize to and from u32 everywhere in this library? - range: new_leaf_range.start as u32..new_leaf_range.end as u32, + range: narrowed_leaf_range.start as u32..narrowed_leaf_range.end as u32, }) }