diff --git a/Cargo.toml b/Cargo.toml index d808d7d..def0d03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,11 @@ name = "nmt-rs" version = "0.1.0" edition = "2021" +description = "A namespaced merkle tree compatible with Celestia" +license = "MIT OR Apache-2.0" +authors = ["Sovereign Labs "] +homepage = "https://www.sovereign.xyz" + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/lib.rs b/src/lib.rs index 9e8caf9..052ace6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,17 @@ #![cfg_attr(not(feature = "std"), no_std)] +#![deny(missing_docs)] +//! This crate implements a Namespaced Merkle Tree compatible with . To quote from their documentation: +//! +//! > A Namespaced Merkle Tree is an ordered Merkle tree that uses a modified hash function so that each node in the tree +//! includes the range of namespaces of the messages in all of the descendants of each node. The leafs in the tree are +//! ordered by the namespace identifiers of the messages. In a namespaced Merkle tree, each non-leaf node in the tree contains +//! the lowest and highest namespace identifiers found in all the leaf nodes that are descendants of the non-leaf node, in addition +//! to the hash of the concatenation of the children of the node. This enables Merkle inclusion proofs to be created that prove to +//! a verifier that all the elements of the tree for a specific namespace have been included in a Merkle inclusion proof. +//! +//! > The concept was first introduced by [@musalbas](https://github.com/musalbas) in the [LazyLedger academic paper](https://arxiv.org/abs/1905.09274). +//! +//! This implementation was developed independently by Sovereign Labs, and is not endorsed by the Celestia foundation. #[cfg(not(feature = "std"))] extern crate alloc; @@ -38,6 +51,7 @@ pub mod nmt_proof; pub mod simple_merkle; const CELESTIA_NS_ID_SIZE: usize = 29; +/// A namespaced merkle tree as used in Celestia. Uses a sha256 hasher and 29 byte namespace IDs. pub type CelestiaNmt = NamespaceMerkleTree< MemDb>, NamespacedSha2Hasher, @@ -76,6 +90,7 @@ fn check_proof_completeness( proof_type } +/// A namespaced merkle tree, implemented as a wrapper around a simple merkle tree. pub struct NamespaceMerkleTree { namespace_ranges: hash_or_btree_map::Map, Range>, highest_ns: NamespaceId, @@ -86,12 +101,20 @@ pub struct NamespaceMerkleTree { impl NamespaceMerkleTree where Db: PreimageDb, - M: NamespaceMerkleHasher>, + M: NamespaceMerkleHasher> + Default, { + /// Creates a new tree with the default hasher pub fn new() -> Self { Default::default() } +} +impl NamespaceMerkleTree +where + Db: PreimageDb, + M: NamespaceMerkleHasher>, +{ + /// Creates a new nmt with the provided hasher pub fn with_hasher(hasher: M) -> Self { Self { namespace_ranges: Default::default(), @@ -101,19 +124,20 @@ where } } + /// Adds a leaf to the namespaced merkle tree. Leaves must be pushed in namespace order. pub fn push_leaf( &mut self, raw_data: &[u8], namespace: NamespaceId, ) -> Result<(), &'static str> { - let hash = NamespacedHash::hash_leaf(raw_data, namespace); // Force leaves to be pushed in order if namespace < self.highest_ns { return Err("Leaves' namespaces should be inserted in ascending order"); } + let leaf = + LeafWithHash::new_with_namespace(raw_data.to_vec(), namespace, self.ignore_max_ns); self.highest_ns = namespace; - self.inner - .push_leaf_with_hash_unchecked(raw_data.to_vec(), hash); + self.inner.push_leaf_with_hash(leaf); let leaves_len = self.leaves().len(); match self.namespace_ranges.entry(namespace) { @@ -127,6 +151,7 @@ where Ok(()) } + /// Returns the root of the tree, computing it if necessary. Repeated calls return a cached root. pub fn root(&mut self) -> NamespacedHash { self.inner.root() } @@ -143,7 +168,7 @@ where // so we need to ensure that the root has size 2 or greater. match leaves.len() { 0 => { - if root == &NamespacedHash::EMPTY_ROOT && proof.is_empty() { + if root == &M::EMPTY_ROOT && proof.is_empty() { return Ok(RangeProofType::Complete); } return Err(RangeProofError::NoLeavesProvided); @@ -196,6 +221,7 @@ where self.inner.build_range_proof(leaf_range) } + /// Fetch a range of leaves from the tree, along with a proof of their inclusion. pub fn get_range_with_proof( &mut self, leaf_range: Range, @@ -210,10 +236,12 @@ where ) } + /// Get the leaf at a given index in the tree, along with a proof of its inclusion. pub fn get_index_with_proof(&mut self, idx: usize) -> (Vec, Proof) { self.inner.get_index_with_proof(idx) } + /// Get an entire namespace from the tree, along with an inclusion proof for that range. pub fn get_namespace_with_proof( &mut self, namespace: NamespaceId, @@ -228,16 +256,18 @@ where (leaves, self.get_namespace_proof(namespace)) } - pub fn leaves(&self) -> &[LeafWithHash>] { + /// Return all the leaves from the tree. + pub fn leaves(&self) -> &[LeafWithHash] { self.inner.leaves() } + /// Get a proof for the given namespace. pub fn get_namespace_proof( &mut self, namespace: NamespaceId, ) -> NamespaceProof { // If the namespace is outside the range covered by the root, we're done - if !self.root().contains(namespace) { + if !self.root().contains::(namespace) { return NamespaceProof::AbsenceProof { proof: Default::default(), ignore_max_ns: self.ignore_max_ns, @@ -261,7 +291,7 @@ where let namespace = self .inner .leaves() - .binary_search_by(|l| l.hash.min_namespace().cmp(&namespace)); + .binary_search_by(|l| l.hash().min_namespace().cmp(&namespace)); // The builtin binary search method returns the index where the item could be inserted while maintaining sorted order, // which is the index of the leaf we want to prove @@ -274,7 +304,7 @@ where proof, ignore_max_ns: self.ignore_max_ns, }; - proof.convert_to_absence_proof(self.inner.leaves()[idx].hash.clone()); + proof.convert_to_absence_proof(self.inner.leaves()[idx].hash().clone()); proof } @@ -285,23 +315,29 @@ where namespace: NamespaceId, proof: &NamespaceProof, ) -> Result<(), RangeProofError> { - if root.is_empty_root() && raw_leaves.is_empty() { + if root.is_empty_root::() && raw_leaves.is_empty() { return Ok(()); } match proof { NamespaceProof::AbsenceProof { leaf, .. } => { - if !root.contains(namespace) { + if !root.contains::(namespace) { return Ok(()); } - let leaf = leaf.clone().ok_or(RangeProofError::MalformedProof)?; + let leaf = leaf.clone().ok_or(RangeProofError::MalformedProof( + "Absence proof was inside tree range but did not contain a leaf", + ))?; // Check that they haven't provided an absence proof for a non-empty namespace if !raw_leaves.is_empty() { - return Err(RangeProofError::MalformedProof); + return Err(RangeProofError::MalformedProof( + "provided an absence proof for a non-empty namespace", + )); } // Check that the provided namespace actually precedes the leaf if namespace >= leaf.min_namespace() { - return Err(RangeProofError::MalformedProof); + return Err(RangeProofError::MalformedProof( + "provided leaf must have namespace greater than the namespace which is being proven absent", + )); } let num_left_siblings = compute_num_left_siblings(proof.start_idx() as usize); @@ -310,7 +346,7 @@ where if num_left_siblings > 0 { let rightmost_left_sibling = &siblings[num_left_siblings - 1]; if rightmost_left_sibling.max_namespace() >= namespace { - return Err(RangeProofError::MalformedProof); + return Err(RangeProofError::MalformedProof("proven namespace must be greater than the namespace of the rightmost left sibling")); } } // Then, check that the root is real @@ -321,13 +357,16 @@ where proof.start_idx() as usize, )?; } - NamespaceProof::PresenceProof { .. } => { - if !root.contains(namespace) { + NamespaceProof::PresenceProof { ignore_max_ns, .. } => { + if !root.contains::(namespace) { return Err(RangeProofError::TreeDoesNotContainLeaf); } let leaf_hashes: Vec> = raw_leaves .iter() - .map(|data| NamespacedHash::hash_leaf(data.as_ref(), namespace)) + .map(|data| { + M::with_ignore_max_ns(*ignore_max_ns) + .hash_leaf_with_namespace(data.as_ref(), namespace) + }) .collect(); let proof_type = self.check_range_proof( root, @@ -347,7 +386,7 @@ where impl Default for NamespaceMerkleTree where Db: PreimageDb, - M: MerkleHash, + M: MerkleHash + Default, { fn default() -> Self { Self { @@ -359,6 +398,7 @@ where } } +/// Indicates whether the proof includes all leaves from every namespace it covers. #[derive(Debug, PartialEq, Clone, Copy)] pub enum RangeProofType { /// A range proof over a single namespace is complete if it includes all the leaves @@ -375,6 +415,7 @@ pub enum RangeProofType { #[cfg(test)] mod tests { use crate::maybestd::{format, vec::Vec}; + use crate::NamespaceMerkleHasher; use crate::{ namespaced_hash::{NamespaceId, NamespacedSha2Hasher}, nmt_proof::NamespaceProof, @@ -450,7 +491,9 @@ mod tests { unreachable!(); }; let data = format!("leaf_{i}").as_bytes().to_vec(); - *leaf = Some(NamespacedHash::hash_leaf(&data, ns_id_from_u64(i))); + *leaf = Some( + NamespacedSha2Hasher::default().hash_leaf_with_namespace(&data, ns_id_from_u64(i)), + ); proof .verify_complete_namespace(&tree.root(), no_leaves, ns_id_from_u64(2)) .unwrap_err(); @@ -509,8 +552,10 @@ mod tests { for i in 1..=n { for j in 0..=i { let proof = tree.build_range_proof(j..i); - let leaf_hashes: Vec<_> = - tree.leaves()[j..i].iter().map(|l| l.hash.clone()).collect(); + let leaf_hashes: Vec<_> = tree.leaves()[j..i] + .iter() + .map(|l| l.hash().clone()) + .collect(); let res = tree.check_range_proof(&root, &leaf_hashes, proof.siblings(), j); if i != j { assert!(res.is_ok()); @@ -543,7 +588,7 @@ mod tests { let _ = tree.push_leaf(x.to_be_bytes().as_ref(), namespace); } let root = tree.root(); - let leaf_hashes: Vec<_> = tree.leaves().iter().map(|x| x.hash.clone()).collect(); + let leaf_hashes: Vec<_> = tree.leaves().iter().map(|x| x.hash().clone()).collect(); // For each potential range of size four, build and check a range proof for i in 0..=28 { @@ -617,7 +662,7 @@ mod tests { let _ = tree.push_leaf(x.to_be_bytes().as_ref(), namespace); } let root = tree.root(); - let raw_leaves: Vec> = tree.leaves().iter().map(|x| x.data.clone()).collect(); + let raw_leaves: Vec> = tree.leaves().iter().map(|x| x.data().to_vec()).collect(); // Build proofs for each range that's actually included, and check that the range can be retrieved correctly for (namespace, range) in tree.namespace_ranges.clone().iter() { diff --git a/src/namespaced_hash.rs b/src/namespaced_hash.rs index 2b50bbb..2159310 100644 --- a/src/namespaced_hash.rs +++ b/src/namespaced_hash.rs @@ -2,12 +2,17 @@ use crate::maybestd::{cmp, fmt, marker::PhantomData, vec::Vec}; use sha2::{Digest, Sha256}; use crate::simple_merkle::tree::MerkleHash; +/// The length of a hash in bytes pub const HASH_LEN: usize = 32; -pub type Hasher = Sha256; +/// The default hasher. Currently sha256 +pub type DefaultHasher = Sha256; +/// A domain separator indicating that a node is a leaf pub const LEAF_DOMAIN_SEPARATOR: [u8; 1] = [0u8]; +/// A domain separator indicating that a node is internal pub const INTERNAL_NODE_DOMAIN_SEPARATOR: [u8; 1] = [1u8]; +/// A sha256 hasher which also supports namespacing #[derive(Debug, Clone, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct NamespacedSha2Hasher { @@ -15,7 +20,9 @@ pub struct NamespacedSha2Hasher { _data: PhantomData<[u8; NS_ID_SIZE]>, } -impl NamespaceMerkleHasher for NamespacedSha2Hasher { +impl NamespaceMerkleHasher + for NamespacedSha2Hasher +{ fn with_ignore_max_ns(ignore_max_ns: bool) -> Self { Self { ignore_max_ns, @@ -26,8 +33,24 @@ impl NamespaceMerkleHasher for NamespacedSha2Hasher bool { self.ignore_max_ns } + + fn hash_leaf_with_namespace( + &self, + data: &[u8], + namespace: NamespaceId, + ) -> ::Output { + let mut output = NamespacedHash::with_min_and_max_ns(namespace, namespace); + let mut hasher = Sha256::new_with_prefix(LEAF_DOMAIN_SEPARATOR); + hasher.update(namespace.as_ref()); + hasher.update(data.as_ref()); + output.set_hash(hasher.finalize().as_ref()); + output + } } +// For tests, we add a default constructor which ignores the max namespace. +// For actual use, this default be set at the tree level, not the hasher level. +#[cfg(test)] impl Default for NamespacedSha2Hasher { fn default() -> Self { Self { @@ -37,22 +60,39 @@ impl Default for NamespacedSha2Hasher { } } -pub trait NamespaceMerkleHasher: MerkleHash { +/// An extension of [`MerkleHash`] indicating the the hasher is namespace aware. This allows for the creation of +/// namespaced merkle trees and namespaced merkle proofs. +pub trait NamespaceMerkleHasher: MerkleHash { + /// Create a new hasher which ignores the max namespace fn with_ignore_max_ns(ignore_max_ns: bool) -> Self; + /// Check whether the hasher ignores the max namespace fn ignores_max_ns(&self) -> bool; + /// Hash the given data and namespace + fn hash_leaf_with_namespace( + &self, + data: &[u8], + namespace: NamespaceId, + ) -> ::Output; } impl MerkleHash for NamespacedSha2Hasher { type Output = NamespacedHash; - const EMPTY_ROOT: Self::Output = NamespacedHash::EMPTY_ROOT; + const EMPTY_ROOT: NamespacedHash = NamespacedHash { + min_ns: NamespaceId([0; NS_ID_SIZE]), + max_ns: NamespaceId([0; NS_ID_SIZE]), + hash: [ + 227, 176, 196, 66, 152, 252, 28, 20, 154, 251, 244, 200, 153, 111, 185, 36, 39, 174, + 65, 228, 100, 155, 147, 76, 164, 149, 153, 27, 120, 82, 184, 85, + ], + }; fn hash_leaf(&self, data: &[u8]) -> Self::Output { let namespace_bytes = data[..NS_ID_SIZE].try_into().expect("Leaf of invalid size"); let namespace = NamespaceId(namespace_bytes); let mut output = NamespacedHash::with_min_and_max_ns(namespace, namespace); - let mut hasher = Hasher::new_with_prefix(LEAF_DOMAIN_SEPARATOR); + let mut hasher = DefaultHasher::new_with_prefix(LEAF_DOMAIN_SEPARATOR); hasher.update(data.as_ref()); output.set_hash(hasher.finalize().as_ref()); output @@ -62,7 +102,7 @@ impl MerkleHash for NamespacedSha2Hasher { if left.max_namespace() > right.min_namespace() { panic!("Invalid nodes: left max namespace must be <= right min namespace") } - let mut hasher = Hasher::new_with_prefix(INTERNAL_NODE_DOMAIN_SEPARATOR); + let mut hasher = DefaultHasher::new_with_prefix(INTERNAL_NODE_DOMAIN_SEPARATOR); let max_nsid = NamespaceId::::max_id(); let min_ns = cmp::min(left.min_namespace(), right.min_namespace()); @@ -84,6 +124,7 @@ impl MerkleHash for NamespacedSha2Hasher { } } +/// A namespace identifier #[derive(Debug, PartialEq, PartialOrd, Eq, Ord, Copy, Clone, Hash)] #[cfg_attr(any(test, feature = "borsh"), derive(borsh::BorshSerialize))] pub struct NamespaceId(pub [u8; NS_ID_SIZE]); @@ -95,19 +136,23 @@ impl Default for NamespaceId { } impl NamespaceId { + /// The maximum possible namespace id pub const MAX_ID: NamespaceId = NamespaceId([0xff; NS_ID_SIZE]); - pub const MAX_RESERVED_ID: NamespaceId = { + /// In celestia, 256 namespaces are reserved for "system" data. This is the maximum reserved namespace. + pub const MAX_RESERVED_ID_ON_CELESTIA: NamespaceId = { let mut max_reserved = [0; NS_ID_SIZE]; max_reserved[NS_ID_SIZE - 1] = 255; Self(max_reserved) }; + /// Returns maximum possible namespace id pub const fn max_id() -> Self { Self::MAX_ID } - pub fn is_reserved(&self) -> bool { - self <= &Self::MAX_RESERVED_ID + /// Indicates whether the namespace is reserved for system data on Celestia. + pub fn is_reserved_on_celestia(&self) -> bool { + self <= &Self::MAX_RESERVED_ID_ON_CELESTIA } } @@ -117,6 +162,7 @@ impl AsRef<[u8]> for NamespaceId { } } +/// An error indicating that a namespace is invalid #[derive(Debug, PartialEq, Copy, Clone)] pub struct InvalidNamespace; @@ -140,6 +186,7 @@ impl TryFrom<&[u8]> for NamespaceId { } } +/// A hash of some data, together with a namespace range #[derive(Debug, PartialEq, Clone, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(any(test, feature = "borsh"), derive(borsh::BorshSerialize))] pub struct NamespacedHash { @@ -247,19 +294,12 @@ impl Default for NamespacedHash { } impl NamespacedHash { - pub const EMPTY_ROOT: NamespacedHash = Self { - min_ns: NamespaceId([0; NS_ID_SIZE]), - max_ns: NamespaceId([0; NS_ID_SIZE]), - hash: [ - 227, 176, 196, 66, 152, 252, 28, 20, 154, 251, 244, 200, 153, 111, 185, 36, 39, 174, - 65, 228, 100, 155, 147, 76, 164, 149, 153, 27, 120, 82, 184, 85, - ], - }; - + /// Returns the size of the hash in bytes pub const fn size() -> usize { 2 * NS_ID_SIZE + HASH_LEN } + /// Construct a new namespaced hash from the provided components pub const fn new( min_ns: NamespaceId, max_ns: NamespaceId, @@ -272,6 +312,7 @@ impl NamespacedHash { } } + /// Construct a namespaced hash with the provided namespace range and the zero hash pub fn with_min_and_max_ns( min_ns: NamespaceId, max_ns: NamespaceId, @@ -283,14 +324,17 @@ impl NamespacedHash { } } + /// Returns the min namespace id of the hash pub fn min_namespace(&self) -> NamespaceId { self.min_ns } + /// Returns the max namespace id of the hash pub fn max_namespace(&self) -> NamespaceId { self.max_ns } + /// Returns the hash without the namespace range pub fn hash(&self) -> [u8; HASH_LEN] { self.hash } @@ -299,25 +343,22 @@ impl NamespacedHash { self.hash.copy_from_slice(new_hash) } - pub fn contains(&self, namespace: NamespaceId) -> bool { + /// Check if the given hash includes the provided namespace under the given hasher + pub fn contains>( + &self, + namespace: NamespaceId, + ) -> bool { self.min_namespace() <= namespace && self.max_namespace() >= namespace - && !self.is_empty_root() - } - - pub fn is_empty_root(&self) -> bool { - self == &Self::EMPTY_ROOT + && !self.is_empty_root::() } - pub fn hash_leaf(raw_data: impl AsRef<[u8]>, namespace: NamespaceId) -> Self { - let mut output = NamespacedHash::with_min_and_max_ns(namespace, namespace); - let mut hasher = Hasher::new_with_prefix(LEAF_DOMAIN_SEPARATOR); - hasher.update(namespace.as_ref()); - hasher.update(raw_data.as_ref()); - output.set_hash(hasher.finalize().as_ref()); - output + /// Check if the hash is the empty root under the given hasher + pub fn is_empty_root>(&self) -> bool { + self == &M::EMPTY_ROOT } + /// Returns an iterator of the bytes of the namespaced hash pub fn iter(&self) -> impl Iterator { self.min_ns .0 @@ -327,6 +368,7 @@ impl NamespacedHash { } } +/// The error returned when failing to convert a slice to a namespaced hash #[derive(Debug, PartialEq, Copy, Clone)] pub struct InvalidNamespacedHash; diff --git a/src/nmt_proof.rs b/src/nmt_proof.rs index 17606d5..dcf76f7 100644 --- a/src/nmt_proof.rs +++ b/src/nmt_proof.rs @@ -1,3 +1,7 @@ +//! Adds "namespacing" semantics to proofs for the simple merkle tree, enabling +//! consumers to check that +//! - A range of leaves forms a complete namespace +//! - A range of leaves all exists in the same namespace use crate::maybestd::{mem, vec::Vec}; use crate::{ namespaced_hash::{NamespaceId, NamespaceMerkleHasher, NamespacedHash}, @@ -19,22 +23,30 @@ use crate::{ )] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum NamespaceProof { + /// A proof that some item is absent from the tree AbsenceProof { + /// The range proof against the inner merkle tree proof: Proof, + /// Whether to treat the maximum possible namespace as a special marker value and ignore it in computing namespace ranges ignore_max_ns: bool, + /// A leaf that *is* present in the tree, if the namespce being proven absent falls within + /// the namespace range covered by the root. leaf: Option>, }, + /// A proof that some item is included in the tree PresenceProof { + /// The range proof against the inner merkle tree proof: Proof, + /// Whether to treat the maximum possible namespace as a special marker value and ignore it in computing namespace ranges ignore_max_ns: bool, }, } impl NamespaceProof where - M: NamespaceMerkleHasher>, + M: NamespaceMerkleHasher>, { - /// Verify that the provided *raw* leaves occur in the provided namespace, using this proof + /// Verify that the provided *raw* leaves are a complete namespace. This may be a proof of presence or absence. pub fn verify_complete_namespace( &self, root: &NamespacedHash, @@ -51,7 +63,7 @@ where tree.verify_namespace(root, raw_leaves, namespace, self) } - /// Verify a range proof + /// Verify a that the provided *raw* leaves are a (1) present and (2) form a contiguous subset of some namespace pub fn verify_range( &self, root: &NamespacedHash, @@ -59,7 +71,9 @@ where leaf_namespace: NamespaceId, ) -> Result<(), RangeProofError> { if self.is_of_absence() { - return Err(RangeProofError::MalformedProof); + return Err(RangeProofError::MalformedProof( + "Cannot prove that a partial namespace is absent", + )); }; if raw_leaves.len() != self.range_len() { @@ -68,7 +82,10 @@ where let leaf_hashes: Vec<_> = raw_leaves .iter() - .map(|data| NamespacedHash::hash_leaf(data.as_ref(), leaf_namespace)) + .map(|data| { + M::with_ignore_max_ns(self.ignores_max_ns()) + .hash_leaf_with_namespace(data.as_ref(), leaf_namespace) + }) .collect(); let tree = NamespaceMerkleTree::::with_hasher( M::with_ignore_max_ns(self.ignores_max_ns()), @@ -81,6 +98,7 @@ where ) } + /// Convert a proof of the presence of some leaf to the proof of the absence of another leaf pub fn convert_to_absence_proof(&mut self, leaf: NamespacedHash) { match self { NamespaceProof::AbsenceProof { .. } => {} @@ -105,22 +123,27 @@ where } } + /// Returns the siblings provided as part of the proof pub fn siblings(&self) -> &[NamespacedHash] { self.merkle_proof().siblings() } + /// Returns the index of the first leaf in the proof pub fn start_idx(&self) -> u32 { self.merkle_proof().start_idx() } + /// Returns the index *after* the last leaf in the proof pub fn end_idx(&self) -> u32 { self.merkle_proof().end_idx() } + /// Returns the number of leaves covered by the proof fn range_len(&self) -> usize { self.merkle_proof().range_len() } + /// Returns the leftmost node to the right of the proven range, if one exists pub fn leftmost_right_sibling(&self) -> Option<&NamespacedHash> { let siblings = self.siblings(); let num_left_siblings = compute_num_left_siblings(self.start_idx() as usize); @@ -130,6 +153,7 @@ where None } + /// Returns the rightmost node to the left of the proven range, if one exists pub fn rightmost_left_sibling(&self) -> Option<&NamespacedHash> { let siblings = self.siblings(); let num_left_siblings = compute_num_left_siblings(self.start_idx() as usize); @@ -146,6 +170,7 @@ where } } + /// Returns true if the proof is an absence proof pub fn is_of_absence(&self) -> bool { match self { Self::AbsenceProof { .. } => true, @@ -153,6 +178,7 @@ where } } + /// Returns true if the proof is a presence proof pub fn is_of_presence(&self) -> bool { !self.is_of_absence() } diff --git a/src/simple_merkle/db.rs b/src/simple_merkle/db.rs index 5744692..5729cbf 100644 --- a/src/simple_merkle/db.rs +++ b/src/simple_merkle/db.rs @@ -1,4 +1,9 @@ -use crate::maybestd::{hash::Hash, vec::Vec}; +use crate::{ + maybestd::{hash::Hash, vec::Vec}, + NamespaceId, NamespaceMerkleHasher, NamespacedHash, +}; + +use super::tree::MerkleHash; #[cfg(not(feature = "std"))] trait HashType: Eq + Hash + crate::maybestd::cmp::Ord {} @@ -12,6 +17,7 @@ trait HashType: Eq + Hash {} #[cfg(feature = "std")] impl HashType for H {} +/// Maintains a mapping from hash to preimage in memory. Backed by a [`crate::maybestd::hash_or_btree_map::Map>`] #[derive(Default)] pub struct MemDb(crate::maybestd::hash_or_btree_map::Map>); @@ -28,32 +34,85 @@ impl PreimageWriter for MemDb { impl PreimageDb for MemDb {} +/// The raw data of the leaf, together with its hash under some [`MerkleHash`]er #[derive(Clone)] -pub struct LeafWithHash { - pub data: Vec, - pub hash: H, +pub struct LeafWithHash { + data: Vec, + hash: H::Output, +} + +impl LeafWithHash { + /// Construct a [`LeafWithHash`] by hashing the provided data + pub fn new(data: Vec) -> Self { + let hash = H::default().hash_leaf(&data); + Self { data, hash } + } } +impl LeafWithHash { + /// Construct a [`LeafWithHash`] by hashing the provided data + pub fn with_hasher(data: Vec, hasher: &H) -> Self { + let hash = hasher.hash_leaf(&data); + Self { data, hash } + } + + /// Returns the raw data from the leaf + pub fn data(&self) -> &[u8] { + &self.data + } + + /// Returns the hash of the leaf data + pub fn hash(&self) -> &H::Output { + &self.hash + } +} + +impl< + M: NamespaceMerkleHasher>, + const NS_ID_SIZE: usize, + > LeafWithHash +{ + /// Create a new leaf with the provided namespace. Only available if the hasher supports namespacing. + pub fn new_with_namespace( + data: Vec, + namespace: NamespaceId, + ignore_max_ns: bool, + ) -> Self { + let hasher = M::with_ignore_max_ns(ignore_max_ns); + let hash = hasher.hash_leaf_with_namespace(&data, namespace); + Self { data, hash } + } +} + +/// A node of a merkle tree #[derive(PartialEq, Clone, Debug)] pub enum Node { + /// A leaf node contains raw data Leaf(Vec), + /// An inner node is the concatention of two child nodes Inner(H, H), } +/// The reader trait for a data store that maps hashes to preimages pub trait PreimageReader { + /// Get the preimage of a given hash fn get(&self, image: &H) -> Option<&Node>; } +/// The writer trait for a data store that maps hashes to preimages pub trait PreimageWriter { + /// Store the preimage of a given hash fn put(&mut self, image: H, preimage: Node); } +/// A trait representing read and write access to data store that maps hashes to their preimages pub trait PreimageDb: PreimageReader + PreimageWriter + Default {} /// A PreimageDB that drops all stored items. Should only be used in trees that -/// do not create proofs (i.e. trees used only for proof verification) +/// do not create proofs (i.e. trees used only for proof verification) #[derive(Default)] pub struct NoopDb; + impl PreimageReader for NoopDb { fn get(&self, _image: &H) -> Option<&Node> { None diff --git a/src/simple_merkle/error.rs b/src/simple_merkle/error.rs index 7abf60b..ea6cde4 100644 --- a/src/simple_merkle/error.rs +++ b/src/simple_merkle/error.rs @@ -1,14 +1,22 @@ +/// An error that occurred while trying to check a claimed range proof for a merkle tree. #[derive(Debug, PartialEq, Clone, Copy)] pub enum RangeProofError { + /// The tree is not empty, but no leaves were provided. This proof is malformed - even proofs of absence must provide a leaf. NoLeavesProvided, + /// The proof is malformed - the number of leaves provided does not match the claimed size of the range WrongAmountOfLeavesProvided, + /// The claimed proof does not verify against the provided root InvalidRoot, + /// The claimed range was invalid because it left out a leaf MissingLeaf, + /// The proof is missing a node that was needed for verification MissingProofNode, + /// A claimed leaf was not actually present in the tree TreeDoesNotContainLeaf, - TreeIsEmpty, + /// The claimed tree exceeds the maximum allowed size (currently 2^32 leaves) TreeTooLarge, /// Indicates that the tree is not properly ordered by namespace MalformedTree, - MalformedProof, + /// A catch all error which indicates that the proof is malformed + MalformedProof(&'static str), } diff --git a/src/simple_merkle/mod.rs b/src/simple_merkle/mod.rs index cb2eee3..a68967a 100644 --- a/src/simple_merkle/mod.rs +++ b/src/simple_merkle/mod.rs @@ -1,5 +1,13 @@ +//! Implements a simple [RFC 6962](https://www.rfc-editor.org/rfc/rfc6962#section-2.1) compatible merkle tree +//! over an in-memory data store which maps preimages to hashes. + +/// Defines traits and types for storing hashes and preimages. pub mod db; +/// Defines errors that might arise in proof verification. pub mod error; +/// Defines proofs on the tree. pub mod proof; +/// Defines the merkle tree itself. pub mod tree; +/// Utilities for computing facts about trees from proofs. pub mod utils; diff --git a/src/simple_merkle/proof.rs b/src/simple_merkle/proof.rs index 1e42e6a..002d394 100644 --- a/src/simple_merkle/proof.rs +++ b/src/simple_merkle/proof.rs @@ -12,20 +12,31 @@ use crate::maybestd::vec::Vec; /// /// This proof may prove the presence of some set of leaves, or the /// absence of a particular namespace -#[derive(Debug, PartialEq, Clone, Default)] +#[derive(Debug, PartialEq, Clone)] #[cfg_attr( feature = "borsh", derive(borsh::BorshSerialize, borsh::BorshDeserialize) )] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Proof { + /// The siblings to be used to build the path to the root. pub siblings: Vec, + /// The range of indices covered by the proof. pub range: Range, } +impl Default for Proof { + fn default() -> Self { + Self { + siblings: Default::default(), + range: Default::default(), + } + } +} + impl Proof where - M: MerkleHash, + M: MerkleHash + Default, { /// Verify a range proof pub fn verify_range( @@ -45,23 +56,53 @@ where self.start_idx() as usize, ) } +} + +impl Proof +where + M: MerkleHash, +{ + /// Verify a range proof + pub fn verify_range_with_hasher( + &self, + root: &M::Output, + leaf_hashes: &[M::Output], + hasher: M, + ) -> Result<(), RangeProofError> { + if leaf_hashes.len() != self.range_len() { + return Err(RangeProofError::WrongAmountOfLeavesProvided); + } + + let tree = MerkleTree::::with_hasher(hasher); + tree.check_range_proof( + root, + leaf_hashes, + self.siblings(), + self.start_idx() as usize, + ) + } + /// Returns the siblings provided as part of the proof. pub fn siblings(&self) -> &Vec { &self.siblings } + /// Returns the index of the first leaf covered by the proof. pub fn start_idx(&self) -> u32 { self.range.start } + /// Returns the index *after* the last leaf included in the proof. pub fn end_idx(&self) -> u32 { self.range.end } + /// Returns the length of the range covered by the proof. pub fn range_len(&self) -> usize { self.range.end.saturating_sub(self.range.start) as usize } + /// Returns the leftmost node to the right of the proven range, if one exists. pub fn leftmost_right_sibling(&self) -> Option<&M::Output> { let siblings = self.siblings(); let num_left_siblings = compute_num_left_siblings(self.start_idx() as usize); @@ -71,6 +112,7 @@ where None } + /// Returns the rightmost node to the left of the proven range, if one exists. pub fn rightmost_left_sibling(&self) -> Option<&M::Output> { let siblings = self.siblings(); let num_left_siblings = compute_num_left_siblings(self.start_idx() as usize); diff --git a/src/simple_merkle/tree.rs b/src/simple_merkle/tree.rs index 6f22a39..fae4536 100644 --- a/src/simple_merkle/tree.rs +++ b/src/simple_merkle/tree.rs @@ -20,18 +20,21 @@ impl TakeLast for [T] { type BoxedVisitor = Box::Output)>; +/// Implments an RFC 6962 compatible merkle tree over an in-memory data store which maps preimages to hashes. pub struct MerkleTree where M: MerkleHash, { - leaves: Vec>, + leaves: Vec>, db: Db, root: Option, visitor: BoxedVisitor, hasher: M, } -impl::Output>, M: MerkleHash> Default for MerkleTree { +impl::Output>, M: MerkleHash + Default> Default + for MerkleTree +{ fn default() -> Self { Self { leaves: Default::default(), @@ -43,13 +46,17 @@ impl::Output>, M: MerkleHash> Default for Merkl } } -pub trait MerkleHash: Default { +/// A trait for hashing data into a merkle tree +pub trait MerkleHash { + /// The output of this hasher #[cfg(all(not(feature = "serde"), feature = "std"))] type Output: Debug + PartialEq + Eq + Clone + Default + Hash; + /// The output of this hasher #[cfg(all(not(feature = "serde"), not(feature = "std")))] type Output: Debug + PartialEq + Eq + Clone + Default + Hash + Ord; + /// The output of this hasher #[cfg(all(feature = "serde", not(feature = "std")))] type Output: Debug + PartialEq @@ -60,6 +67,7 @@ pub trait MerkleHash: Default { + serde::Serialize + serde::de::DeserializeOwned; + /// The output of this hasher #[cfg(all(feature = "serde", feature = "std"))] type Output: Debug + PartialEq @@ -71,27 +79,32 @@ pub trait MerkleHash: Default { + serde::Serialize + serde::de::DeserializeOwned; + /// The hash of the empty tree. This is often defined as the hash of the empty string const EMPTY_ROOT: Self::Output; + /// Hashes data as a "leaf" of the tree. This operation *should* be domain separated fn hash_leaf(&self, data: &[u8]) -> Self::Output; + /// Hashes two digests into one. This operation *should* be domain separated fn hash_nodes(&self, l: &Self::Output, r: &Self::Output) -> Self::Output; } impl MerkleTree where Db: PreimageDb, - M: MerkleHash, + M: MerkleHash + Default, { + /// Constructs an empty merkle tree with a default hasher pub fn new() -> Self { - Self { - leaves: Vec::new(), - db: Default::default(), - root: Some(M::EMPTY_ROOT), - visitor: Box::new(|_| {}), - hasher: M::default(), - } + Self::with_hasher(Default::default()) } +} +impl MerkleTree +where + Db: PreimageDb, + M: MerkleHash, +{ + /// Constructs an empty merkle tree with the given hasher pub fn with_hasher(hasher: M) -> Self { Self { leaves: Vec::new(), @@ -102,22 +115,19 @@ where } } - pub fn push_leaf(&mut self, raw_leaf: &[u8]) { - self.root = None; - let hash = self.hasher.hash_leaf(raw_leaf); - let leaf = LeafWithHash { - hash, - data: raw_leaf.to_vec(), - }; - self.leaves.push(leaf); + /// Appends the given leaf to the tree + pub fn push_raw_leaf(&mut self, raw_leaf: &[u8]) { + let leaf = LeafWithHash::with_hasher(raw_leaf.to_vec(), &self.hasher); + self.push_leaf_with_hash(leaf); } - pub fn push_leaf_with_hash_unchecked(&mut self, leaf: Vec, hash: M::Output) { + /// Appends a pre-hashed leaf to the tree + pub fn push_leaf_with_hash(&mut self, leaf_with_hash: LeafWithHash) { self.root = None; - let leaf = LeafWithHash { hash, data: leaf }; - self.leaves.push(leaf); + self.leaves.push(leaf_with_hash); } + /// Returns the root of the tree, computing it if necessary. Repeated queries return a cached result. pub fn root(&mut self) -> M::Output { if let Some(inner) = &self.root { return inner.clone(); @@ -127,12 +137,14 @@ where inner } + /// Returns the requested range of leaves pub fn get_leaves(&self, range: Range) -> Vec> { let leaves = &self.leaves[range]; - leaves.iter().map(|leaf| leaf.data.clone()).collect() + leaves.iter().map(|leaf| leaf.data().to_vec()).collect() } - pub fn leaves(&self) -> &[LeafWithHash] { + /// Returns all leaves in the tree + pub fn leaves(&self) -> &[LeafWithHash] { &self.leaves[..] } @@ -145,10 +157,10 @@ where } 1 => { let leaf_with_hash = &self.leaves[leaf_range.start]; - let root = leaf_with_hash.hash.clone(); + let root = leaf_with_hash.hash().clone(); (self.visitor)(&root); self.db - .put(root.clone(), Node::Leaf(leaf_with_hash.data.clone())); + .put(root.clone(), Node::Leaf(leaf_with_hash.data().to_vec())); root } _ => { @@ -374,15 +386,17 @@ where } } + /// Fetches the requested range of leaves, along with a proof of correctness. pub fn get_range_with_proof(&mut self, leaf_range: Range) -> (Vec>, Proof) { let leaves = &self.leaves[leaf_range.clone()]; - let leaves = leaves.iter().map(|leaf| leaf.data.clone()).collect(); + let leaves = leaves.iter().map(|leaf| leaf.data().to_vec()).collect(); (leaves, self.build_range_proof(leaf_range)) } + /// Fetches the leaf at the given index, along with a proof of inclusion. pub fn get_index_with_proof(&mut self, idx: usize) -> (Vec, Proof) { ( - self.leaves[idx].data.clone(), + self.leaves[idx].data().to_vec(), self.build_range_proof(idx..idx + 1), ) }