diff --git a/crypto/statetrie/README.md b/crypto/statetrie/README.md new file mode 100644 index 0000000000..13bd07d644 --- /dev/null +++ b/crypto/statetrie/README.md @@ -0,0 +1,360 @@ + +## State trie + +The state trie, commonly known as a prefix tree, is a tree-like data structure +used for storing an associative array where the keys are sequences of 4-bit +bytes (Nibbles) and the values are SHA-512/256 hashes of the key values. + +Each node of the trie contains a cryptographic hash of itself and any children +nodes. This design ensures that the entire structure is tamper-evident, as a +proof can be provided to the user to show membership (or lack of membership) of +a key value. The proof can be verified by checking that the proof provides the +necessary missing value to hash to the known root hash. + +The root hash is only dependent on the keys and values in the trie when the hash +is calculated, and not on the order in which the keys were added, or perhaps +added, deleted and re-added, etc. + +Keys are stored by traversing the root node, using an expanding prefix of the +new key to make branching decisions, until a unique spot can be found for the +key value, then adjusting the trie and marking path nodes for rehashing. + +Keys are deleted by first searching for the key, and then performing trie +transformations until the key is removed. The rest of the trie is reconfigured +to eliminate the impact the deleted key had made on the nodes around it. + +This trie has built-in support for generic backing stores, which are essential +for persistent data storage and operation on very large tries. The package +provides both an in-memory storage backstore and a pebble-based backstore +implementation. + +*Key Features:* + +* Hashing: The trie provides a SHA-512/256 checksum at its root, ensuring data +integrity. + +* Adding and removing key/value pairs: Through specific operations, users can +efficiently add new key-value pairs to the trie or remove existing ones. The +trie ensures consistent state transitions and optimal space usage during these +operations. + +* Child and Merge Operations: The trie supports operations to manage child +tries, enabling the creation, discard, and merge actions for subtries. + +* Backstore commit: The trie supports committing changes to the trie to a +backing store that fuctions like a batched kv interface. + +* Preloading: Though the trie is designed to keep only parts of it in memory for +efficiency, it offers a preloading feature to sweep all nodes with keylengths +less than a provided parameter out of the backstore and into memory. + +* Nibble-sized proofs: 4-bit trie keys are represented as `Nibbles`. Each +branch node has one child slot for each of the 16 nibble values. Using +nibbles over 8-bit bytes allows for smaller proofs, but creates smaller and +more frequent backstore reads and taller tries. + +### Trie operation and usage + +Tries are initialized against a backing store (an empty memory one will be +constructed if not provided by the user) where the full trie ultimately resides +on Commit. + +``` +mt := MakeTrie(nil) +key1 := MakeNibbles({0x8e, 0x28}) +key2 := MakeNibbles({0x8d, 0x28}) +val1 := []byte{0x13, 0x19, 0x2a, 0x3c} +val2 := []byte{0x13, 0x19, 0x2a, 0x9f} + +mt.Add(key1, val1) +fmt.Println("K1:V1 Hash:", mt.Hash()) + +mt.Add(key2, val2) +fmt.Println("K1:V1,K2:V2 Hash:", mt.Hash()) + +mt.Delete(key2) +fmt.Println("K1:V1 Hash:", mt.Hash()) + +mt.Commit(nil) +``` + +The trie maintains an interface reference to the root of the trie, which is one +of five possible trie node types described below. Trie operations Add and +Delete descend the trie from this node, loading in nodes from the backstore (as +necessary), creating new nodes (if the key added is unique or a key is found +for deletion), and keeping track of nodes that can be deleted from the +backstore on the next Commit. + +New `statetrie` objects that operate on a (potentially massive) trie residing +on a backing store are created by `MakeTrie (store)` and are initialized by +loading and deserializing the root node from the store. References pointing +down from this node are represented by shallow backing node objects. + +When Add or Delete operations want to descend through one of these backing +nodes, the bytes are obtained from the backing store and deserialized into one +of the three main trie node types (branch, extension, or leaf). + +In this way, trie operations 'unroll' paths from the trie store into working +memory as necessary to complete the operation. + +``` +Trie residing on backing store like Pebble with a branch node (BR1) as the root node: + _____ + | BR1 | + ____|_____|____ + / \ + / \ + __O__ __O__ + / \ / \ + O O O O + / \ / \ / \ / \ + O O O O O O O O + / \ / \ / \ / \ / \ / \ / \ / \ +O O O O O O O O O O + + +Below is a statetrie pointed at that backing store trie, immediately after MakeTrie. +It has a root of one branch node, with its two child nodes held as shallow +backing nodes (labeled 'b'), with known hashes that came from the branch node +deserialization. The root hash of this trie can be calculated immediately without +further backstore access: + + ___ + |BR1| + / \ + / \ + b b + +Below is that same statetrie after a few update Add operations, with more paths +unrolled from the backing store: + + ___ + |BR1| + / \ + / \ + __O__ __O__ + / \ \ + O O O + / / \ / \ + O O b b O + / / / \ +O O O O +``` + +Nodes that can be reached from the statetrie root node represent: + +1. uncommitted new intermediary or leaf nodes created in support of the Add or + Delete and are not yet hashed + +2. altered nodes created from prior operations that were never evicted + (replaced with backing nodes), with their hash now zeroed as they were + modified since the last Commit, + +3. unaltered nodes created from prior operations in the past that were never + evicted (replaced with backing nodes), and still have their original, known + hash + +4. references to nodes on the backing store (with a known hash) + +5. references to nodes in the parent trie, which act as lazy copies of the + parent nodes and disappear on merge + +On Commit, the first two node categories reachable from the root node +(following parent links) are hashed and committed to the backstore, and any +keys marked for deletion are removed from the store. + +Unmodified unrolls or committed nodes (categories 3 and 4) can either stay in +memory or face eviction by their parent node through an eviction function +evaluated as the nodes are committed (by calling a node's evict method). + +Eviction of branching and extension nodes replaces their lower subtries with +backing nodes. A nil eviction function, as above, keeps all nodes in memory. +A lambda that always returns true would collapse the trie to only the root +node, with any subtries replaced by backing nodes. + +``` +The statetrie above after committing with an eviction lambda returning true if +the node key length is three. + ___ + |BR1| + / \ + / \ + __O__ __O__ + / \ \ + O O O + / / \ / \ + b b b b b + +``` + +### Trie node types + +There are three main trie nodes, leaf ndoes, branch nodes, and extension nodes. +These are the nodes that are hashed to calculate the hash root, and which are +serialized to the backing store. The statetrie object uses two other +unserialized node objects, parent nodes and backing nodes, which convert into +one of the three main trie nodes as necessary. + +| Node Type | Description | Value Holding | Stored in Backstore | +|----------------|-------------------------------------------------------------------------------------------------------|---------------|---------------------| +| Leaf Nodes | Contains the remainder of the search key (`keyEnd`) and the hash of the value. | Yes | Yes | +| Branch Nodes | Holds references to 16 children nodes and an optional "value slot" for keys that terminate at the node. | Optional | Yes | +| Extension Nodes| Contains a run of commonly shared key nibbles that lead to the next node. No value is held. | No | Yes | +| Parent Nodes | Soft-links back to a node in a parent trie. They expand into copies if edited. | Varies | No | +| Backing Nodes | Soft links back to a node in the backing store. They are expanded into one of the main nodes if read. | Varies | No | + + +All trie nodes hold a key representing the nibble position of the node in the +trie, and a hash of the node itself. + +Any of the node types can be the root of a trie. + +The node key is the key used with the backing store to insert, alter or delete +the serialized node. The key is limited to MaxKeyLength (65,536) nibbles in +size, and cannot be empty (the root node is the empty nibble). + +The node hash is set to the zero value if it is not yet known or if the +contents of the node were altered in a trie operation. The hash is calculated +by either of the trie methods `Hash()` or `Commit()`, the later which hashes +and commits the node changes with node method `hashingCommit(store)`. In these +operations, the node hash is set to the SHA-256 hash of the serialization of +the node. The hashing scheme requires the lower levels of the trie to be +hashed before the higher levels. + +* Leaf nodes + +This value-holding nodes contain the remainder of the search key (the `keyEnd`) +and the hash of the value. + +* Branch nodes + +Branch nodes hold references to 16 children nodes indexed by the next nibble of +the search key, plut a "value slot" to hold values for keys th at terminate at +the branch node. + +* Extension nodes + +Extension nodes contain an addition run of commonly shared key nibbles that +send you along to the next node. No value is held at an extension node. There +are no extension nodes with no next node. + +* Parent nodes + +These nodes are soft-links back to a node in a parent trie from a child trie. +They are expanded into copies of their nodes they link to if the node is edited +or replaced in an Add or Delete operation. + +* Backing nodes + +These nodes are soft links back to a node in the backing store, containing the +key and the hash of the node. They are expanded into one of the three main +nodes if the node is read. + +When the trie is hashed, these nodes contain their own hash and thus do not +require the hash algorithm to descend that subtree from the backing store any +further. In this way the hashing function continues to function without +loading the entire trie structure into memory. + +When operated on, backing nodes deserialize themselves from the backing store +by calling `get`, which calls a node deserialization method to determine the +node type (from a prefix), and then the specific node type handles the rest of +the deserialization into a node object. This deserialization provides a hash +value to the new node object, as this value is recorded from the +deserialization of its parent node in the trie when the backing node was +constructed. + +If the deserialized branch or extension node points at another node, that +"pointed-at" node reference is stored as another backing node with its key set +to the location in the trie and with its hash set to the SHA-256 hash of the +node taken from the store bytes. If later trie operations need to descend +through these nodes, they are in turn deseralized as described. + +### Trie child and merge operations + +Child tries are represented as tries with unexplored node references ("parent +nodes") that point back to unmodified node objects of the parent trie. + +Obtaining a child trie from a trie allows the user to easily dispose of stacks +of changes to a child trie at an arbitrary time while retaining the parent. + +Parent tries must remain read-only until after the child is disregared or until +after it is merged back into the parent. + +When a child trie is initialized, it is anchored to the parent by initializing +its root node to a parent node object that points back to the parent trie root +node object. Accessing this parent node to service an Add or Delete operation +converts the parent node into a copy of the original parent node (with the +`child` node method), and from there the operations continue with the copy +holding any alterations. + +When merging child tries back into their parents, the in-memory node objects in +a child trie undergoes a traversal when merging back into the parent. This +search aims to identify parent nodes, which are then replaced by their original +references, effectively stitching the child trie's modifications into the +parent trie. + +Node deletion lists are propagated into the parent in a merge to be handled by +a future parent backstore commit. + +### `statetrie` cache operations +* Eviction + +Nodes can be evicted from memory during Commit and all their subtree replaced by +a single backing node according to eviction policy, which is the binary output of +function which operates on each node. There are three eviction strategies, EvictAll, +EvictNone, and EvictLevel(n), which evicts nodes with a key length of n. Evicted nodes +would have to be read back in from the backing store to resume operations on them. + +Eviction of a node only affects branch and extension nodes, which replace their +children with backing nodes after they are committed. + +* Preloading + +Normally only part of the trie is kept in memory. However, the trie can sweep +nodes out of the backstore and into memory by calling Preload. + +Preload loads into trie memory all backing nodes reachable from the root that +keys with length less than or equal to the one provided by obtaining them from +the store. + +In a full (and therefore balanced) trie, preloading lengths has the effect of +loading the top levels of the trie. + +### Raising nodes + +Some delete operations require a trie transformation that relocates a node +"earlier" in the trie. These relocations shorten the key from the original key. +Relocating a leaf node merely reassigns the key value and adjusts the ending +key value in the node to compensate. But raising a branch node creates a new +extension node and places it just above the branch node. Raising an extension +node extends its shared key and relocates its key. Raising a backing node gets +the node from the store and then immediately raises it. Similarly, raising a +parent node copies the parent node by evoking `child` on it and immediately +raises it. After a raising operation, there is guaranteed to be a node at the +new location in the trie. + +### Backing stores + +In large backing store tries, only a fraction of the trie nodes are represented +by in-memory trie node objects. The rest of the nodes live in the backing +store. + +Backing stores are kv stores which maintain all the mapping between committed +trie keys and node serialization data (which includes the hash of the key +value). + +Backing stores must "set" byte data containing serialized nodes, and "get" +nodes back from the store by deserializing them into trie nodes that (may) +contain deferred references to further backing store nodes. A simple backing +store is a golang map from strings to nodes which uses the provided node +serialization / deserialization utilites. This is implemented as +`memoryBackstore`. + +`BatchStart()` methods on backing stores called before any store set operations +are begun, and `BatchEnd()` is called after there are no more, to allow for +preparations around batch commits. + +Committing the trie to the backing store will trigger hashing of the trie, as +committing requires node serialization and node serialization requires the hash +of subtree elements in branch and extension nodes. + diff --git a/crypto/statetrie/backing.go b/crypto/statetrie/backing.go new file mode 100644 index 0000000000..bcb5f6d89e --- /dev/null +++ b/crypto/statetrie/backing.go @@ -0,0 +1,65 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "github.com/algorand/go-algorand/crypto" + "github.com/algorand/go-algorand/crypto/statetrie/nibbles" + "sync" +) + +// Backing nodes are placeholders for nodes that have been stored in the +// backing store. All we need is the full key of the node and its hash. +type backingNode struct { + key nibbles.Nibbles + hash crypto.Digest +} + +var backingNodePool = sync.Pool{ + New: func() interface{} { + return &backingNode{ + key: make(nibbles.Nibbles, 0), + } + }, +} + +func makeBackingNode(hash crypto.Digest, key nibbles.Nibbles) *backingNode { + stats.makebanodes++ + ba := backingNodePool.Get().(*backingNode) + ba.hash = hash + ba.key = append(ba.key[:0], key...) + return ba +} +func (ba *backingNode) setHash(hash crypto.Digest) { + ba.hash = hash +} +func (ba *backingNode) add(mt *Trie, pathKey nibbles.Nibbles, remainingKey nibbles.Nibbles, valueHash crypto.Digest) (node, error) { + // will be provided in the subsequent backing store PR + return nil, nil +} +func (ba *backingNode) hashing() error { + return nil +} +func (ba *backingNode) getKey() nibbles.Nibbles { + return ba.key +} +func (ba *backingNode) getHash() *crypto.Digest { + return &ba.hash +} +func (ba *backingNode) serialize() ([]byte, error) { + panic("backingNode cannot be serialized") +} diff --git a/crypto/statetrie/branch.go b/crypto/statetrie/branch.go new file mode 100644 index 0000000000..72502efd09 --- /dev/null +++ b/crypto/statetrie/branch.go @@ -0,0 +1,174 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "bytes" + + "github.com/algorand/go-algorand/crypto" + "github.com/algorand/go-algorand/crypto/statetrie/nibbles" +) + +type branchNode struct { + children [16]node + valueHash crypto.Digest + key nibbles.Nibbles + hash crypto.Digest +} + +// makeBranchNode creates a branch node with the provided children nodes, valueHash, +// and full key. +func makeBranchNode(children [16]node, valueHash crypto.Digest, key nibbles.Nibbles) *branchNode { + stats.makebranches++ + bn := &branchNode{children: children, valueHash: valueHash, key: make(nibbles.Nibbles, len(key))} + copy(bn.key, key) + return bn +} +func (bn *branchNode) add(mt *Trie, pathKey nibbles.Nibbles, remainingKey nibbles.Nibbles, valueHash crypto.Digest) (node, error) { + //Three operational transitions: + // + //- BN.ADD.1: Store the new value in the branch node value slot. This overwrites + // the branch node slot value. + // + //- BN.ADD.2: Make a new leaf node with the new value, and point an available + // branch child slot at it. This stores a new leaf node in a child slot. + // + //- BN.ADD.3: This repoints the child node to a new/existing node resulting from + // performing the Add operation on the child node. + if len(remainingKey) == 0 { + // If we're here, then set the value hash in this node, overwriting the old one. + if bn.valueHash == valueHash { + // If it is the same value, do not zero the hash + return bn, nil + } + + bn.valueHash = valueHash + // transition BN.ADD.1 + bn.hash = crypto.Digest{} + return bn, nil + } + + // Otherwise, shift out the first nibble and check the children for it. + shifted := nibbles.ShiftLeft(remainingKey, 1) + slot := remainingKey[0] + if bn.children[slot] == nil { + // nil children are available. + lnKey := pathKey[:] + lnKey = append(lnKey, slot) + + // transition BN.ADD.2 + bn.hash = crypto.Digest{} + bn.children[slot] = makeLeafNode(shifted, valueHash, lnKey) + } else { + // Not available. Descend down the branch. + replacement, err := bn.children[slot].add(mt, append(pathKey, remainingKey[0]), shifted, valueHash) + if err != nil { + return nil, err + } + // If the replacement hash is zero, zero the branch node hash + if replacement.getHash().IsZero() { + bn.hash = crypto.Digest{} + } + // transition BN.ADD.3 + bn.children[slot] = replacement + } + + return bn, nil +} + +// hashing serializes the node and then hashes it, storing the hash in the node. +func (bn *branchNode) hashing() error { + if bn.hash.IsZero() { + for i := 0; i < 16; i++ { + if bn.children[i] != nil && bn.children[i].getHash().IsZero() { + err := bn.children[i].hashing() + if err != nil { + return err + } + } + } + bytes, err := bn.serialize() + if err != nil { + return err + } + stats.cryptohashes++ + bn.hash = crypto.Hash(bytes) + } + return nil +} + +// deserializeBranchNode turns a data array and its key in the trie into +// a branch node. +func deserializeBranchNode(data []byte, key nibbles.Nibbles) *branchNode { + if data[0] != 5 { + panic("invalid prefix for branch node") + } + if len(data) < (1 + 17*crypto.DigestSize) { + panic("data too short to be a branch node") + } + + var children [16]node + for i := 0; i < 16; i++ { + var hash crypto.Digest + + copy(hash[:], data[1+i*crypto.DigestSize:(1+crypto.DigestSize)+i*crypto.DigestSize]) + if !hash.IsZero() { + chKey := key[:] + chKey = append(chKey, byte(i)) + children[i] = makeBackingNode(hash, chKey) + } + } + var valueHash crypto.Digest + copy(valueHash[:], data[(1+16*crypto.DigestSize):(1+17*crypto.DigestSize)]) + return makeBranchNode(children, valueHash, key) +} + +// setHash sets the value of the hash for the node. +func (bn *branchNode) setHash(hash crypto.Digest) { + bn.hash = hash +} + +var bnbuffer bytes.Buffer + +func (bn *branchNode) serialize() ([]byte, error) { + bnbuffer.Reset() + var empty crypto.Digest + prefix := byte(5) + + bnbuffer.WriteByte(prefix) + for i := 0; i < 16; i++ { + if bn.children[i] != nil { + bnbuffer.Write(bn.children[i].getHash().ToSlice()) + } else { + bnbuffer.Write(empty[:]) + } + } + bnbuffer.Write(bn.valueHash[:]) + return bnbuffer.Bytes(), nil +} + +// getKey gets the nibbles of the full key for this node. +func (bn *branchNode) getKey() nibbles.Nibbles { + return bn.key +} + +// getHash gets the hash for this node. If the hash has not been set by a +// hashing operation like branchNode.hashing, getHash will not calculate it +// (instead it will return the empty hash, crypto.Digest{}) +func (bn *branchNode) getHash() *crypto.Digest { + return &bn.hash +} diff --git a/crypto/statetrie/ext.go b/crypto/statetrie/ext.go new file mode 100644 index 0000000000..64ccbfde3c --- /dev/null +++ b/crypto/statetrie/ext.go @@ -0,0 +1,232 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "fmt" + + "github.com/algorand/go-algorand/crypto" + "github.com/algorand/go-algorand/crypto/statetrie/nibbles" +) + +type extensionNode struct { + key nibbles.Nibbles + sharedKey nibbles.Nibbles + next node + hash crypto.Digest +} + +// makeExtensionNode creates a extension node with the provided shared prefix, +// next node, and full key in the trie. +func makeExtensionNode(sharedKey nibbles.Nibbles, next node, key nibbles.Nibbles) *extensionNode { + stats.makeextensions++ + en := &extensionNode{sharedKey: make(nibbles.Nibbles, len(sharedKey)), next: next, key: make(nibbles.Nibbles, len(key))} + copy(en.key, key) + copy(en.sharedKey, sharedKey) + return en +} +func (en *extensionNode) add(mt *Trie, pathKey nibbles.Nibbles, remainingKey nibbles.Nibbles, valueHash crypto.Digest) (node, error) { + //- EN.ADD.1: Point the existing extension node at a (possibly new or existing) node resulting + // from performing the Add operation on the child node. + //- EN.ADD.2: Create an extension node for the current child and store it in a new branch node child slot. + //- EN.ADD.3: Store the existing extension node child in a new branch node child slot. + //- EN.ADD.4: Store the new value in a new leaf node stored in an available child slot of the new branch node. + //- EN.ADD.5: Store the new value in the value slot of the new branch node. + //- EN.ADD.6: Modify the existing extension node shared key and point the child at the new branch node. + //- EN.ADD.7: Replace the extension node with the branch node created earlier. + // + //Codepaths: + // + // * Codepath 1: EN.ADD.1 + // + // This redirects the extension node to a new/existing node resulting from + // performing the Add operation on the extension child. + // + // * Codepaths 2 - 5: EN.ADD.2|EN.ADD.3 then EN.ADD.4|EN.ADD.5 then EN.ADD.6 + // + // This stores the current extension node child in either a new branch node + // child slot or by creating a new extension node at a new key pointing at the + // child, and attaching that to a new branch node. Either way, the new branch + // node also receives a new leaf node with the new value or has its value slot + // assigned, and another extension node is created to replace it pointed at the + // branch node as its target. + // + // * Codepaths 6 - 9: EN.ADD.2|EN.ADD.3 then EN.ADD.4|EN.ADD.5 then EN.ADD.7 + // + // Same as above, only the new branch node replaceds the existing extension node + // outright, without the additional extension node. + // + + // Calculate the shared Nibbles between the key we're adding and this extension node. + // shNibbles is a slice from en.sharedKey and is read-only + shNibbles := nibbles.SharedPrefix(en.sharedKey, remainingKey) + if len(shNibbles) == len(en.sharedKey) { + // The entire extension node is shared. descend. + shifted := nibbles.ShiftLeft(remainingKey, len(shNibbles)) + replacement, err := en.next.add(mt, append(pathKey, shNibbles...), shifted, valueHash) + if err != nil { + panic(fmt.Sprintf("extensionNode.add: %v", err)) + } + if replacement.getHash().IsZero() { + en.setHash(crypto.Digest{}) + } + // EN.ADD.1 + en.next = replacement + return en, nil + } + + // we have to upgrade part or all of this extension node into a branch node. + var children [16]node + branchHash := crypto.Digest{} + // what's left of the extension node shared key after removing the shared part gets + // attached to the new branch node. + shifted := nibbles.ShiftLeft(en.sharedKey, len(shNibbles)) + if len(shifted) >= 2 { + // if there's two or more Nibbles left, make another extension node. + shifted2 := nibbles.ShiftLeft(shifted, 1) + enKey := pathKey[:] + enKey = append(enKey, shNibbles...) + enKey = append(enKey, shifted[0]) + en2 := makeExtensionNode(shifted2, en.next, enKey) + // EN.ADD.2 + children[shifted[0]] = en2 + } else { + // if there's only one nibble left, store the child in the branch node. + // there can't be no Nibbles left, or the earlier entire-node-shared case would have been triggered. + // EN.ADD.3 + children[shifted[0]] = en.next + } + + //what's left of the new remaining key gets put into the branch node bucket corresponding + //with its first nibble, or into the valueHash if it's now empty. + shifted = nibbles.ShiftLeft(remainingKey, len(shNibbles)) + if len(shifted) > 0 { + shifted3 := nibbles.ShiftLeft(shifted, 1) + // we know this slot will be empty because it's the first nibble that differed from the + // only other occupant in the child arrays, the one that leads to the extension node's child. + lnKey := pathKey[:] + lnKey = append(lnKey, shNibbles...) + lnKey = append(lnKey, shifted[0]) + ln := makeLeafNode(shifted3, valueHash, lnKey) + // EN.ADD.4 + children[shifted[0]] = ln + } else { + // if the key is no more, store it in the branch node's value hash slot. + // EN.ADD.5 + branchHash = valueHash + } + + bnKey := pathKey[:] + bnKey = append(bnKey, shNibbles...) + replacement := makeBranchNode(children, branchHash, bnKey) + // the shared bits of the extension node get smaller + if len(shNibbles) > 0 { + // still some shared key left, store them in an extension node + // and point in to the new branch node + en.sharedKey = shNibbles + en.next = replacement + en.setHash(crypto.Digest{}) + // EN.ADD.6 + return en, nil + } + // or else there there is no shared key left, and the extension node is destroyed. + // EN.ADD.7 + return replacement, nil +} + +// setHash sets the value of the hash for the node. +func (en *extensionNode) setHash(hash crypto.Digest) { + en.hash = hash +} + +// hashing serializes the node and then hashes it, storing the hash in the node. +func (en *extensionNode) hashing() error { + if en.hash.IsZero() { + if en.next.getHash().IsZero() { + err := en.next.hashing() + if err != nil { + return err + } + } + bytes, err := en.serialize() + if err != nil { + return err + } + + stats.cryptohashes++ + en.setHash(crypto.Hash(bytes)) + } + return nil +} + +// serialize creates a byte array containing an identifier prefix +// (2 if the nibble length of the keyEnd is even, 1 if it is odd) +// as well as the hash of the next node and the shared key prefix. +func (en *extensionNode) serialize() ([]byte, error) { + pack, half := nibbles.Pack(en.sharedKey) + data := make([]byte, 1+crypto.DigestSize+len(pack)) + if half { + data[0] = 1 + } else { + data[0] = 2 + } + + copy(data[1:(1+crypto.DigestSize)], en.next.getHash()[:]) + copy(data[(1+crypto.DigestSize):], pack) + return data, nil +} + +// deserializeExtensionNode turns a data array and its key in the trie into +// an extension node. +func deserializeExtensionNode(data []byte, key nibbles.Nibbles) *extensionNode { + if data[0] != 1 && data[0] != 2 { + panic("invalid prefix for extension node") + } + + if len(data) < (1 + crypto.DigestSize) { + panic("data too short to be an extension node") + } + + sharedKey := nibbles.MakeNibbles(data[(1+crypto.DigestSize):], data[0] == 1) + if len(sharedKey) == 0 { + panic("sharedKey can't be empty in an extension node") + } + var hash crypto.Digest + copy(hash[:], data[1:(1+crypto.DigestSize)]) + var child node + if !hash.IsZero() { + chKey := key[:] + chKey = append(chKey, sharedKey...) + child = makeBackingNode(hash, chKey) + } else { + panic("next node hash can't be zero in an extension node") + } + + return makeExtensionNode(sharedKey, child, key) +} + +// getKey gets the nibbles of the full key for this node. +func (en *extensionNode) getKey() nibbles.Nibbles { + return en.key +} + +// getHash gets the hash for this node. If the hash has not been set by a +// hashing operation like extNode.hashing, getHash will not calculate it +// (instead it will return the empty hash, crypto.Digest{}) +func (en *extensionNode) getHash() *crypto.Digest { + return &en.hash +} diff --git a/crypto/statetrie/leaf.go b/crypto/statetrie/leaf.go new file mode 100644 index 0000000000..b79f0c1fd1 --- /dev/null +++ b/crypto/statetrie/leaf.go @@ -0,0 +1,261 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "bytes" + + "github.com/algorand/go-algorand/crypto" + "github.com/algorand/go-algorand/crypto/statetrie/nibbles" +) + +type leafNode struct { + key nibbles.Nibbles + keyEnd nibbles.Nibbles + valueHash crypto.Digest + hash crypto.Digest +} + +// makeLeafNode creates a leaf node with the provided valueHash, key and keyEnd. +// The full key of the value represented by the node is key + keyEnd. +func makeLeafNode(keyEnd nibbles.Nibbles, valueHash crypto.Digest, key nibbles.Nibbles) *leafNode { + stats.makeleaves++ + ln := &leafNode{keyEnd: make(nibbles.Nibbles, len(keyEnd)), valueHash: valueHash, key: make(nibbles.Nibbles, len(key))} + copy(ln.key, key) + copy(ln.keyEnd, keyEnd) + return ln +} + +// setHash sets the value of the hash for the node. +func (ln *leafNode) setHash(hash crypto.Digest) { + ln.hash = hash +} +func (ln *leafNode) add(mt *Trie, pathKey nibbles.Nibbles, remainingKey nibbles.Nibbles, valueHash crypto.Digest) (node, error) { + //Add operation transitions: + // + //- LN.ADD.0: The key and value already exist in the trie. + //- LN.ADD.1: Store the new value in the existing leaf node, overwriting it. + //- LN.ADD.2: Store the existing leaf value in a new branch node value space. + //- LN.ADD.3: Store the existing leaf value in a new leaf node attached to a new branch node. + //- LN.ADD.4: Store the new value in the new branch node value space. + //- LN.ADD.5: Store the new value in a new leaf node attached to the new branch node. + //- LN.ADD.6: Replace the leaf node with a new extension node in front of the new branch node. + //- LN.ADD.7: Replace the leaf node with the branch node created earlier. + // + // Codepath 0: + // This codepath is triggered when the added key/value already exists in the trie. + // The leaf node is returned as the replacement node (eg no replacement), and the + // hash is left unchanged. + // Codepath 1: LN.ADD.3 then LN.ADD.5 then LN.ADD.7 + // This codepath is triggered when the existing leaf node keyEnd has nothing in common + // with the added key. This results in a new branch node, with two leaf nodes attached to + // it, representing the original leaf and the new key. The branch node is returned as the + // replacement for this leaf node. + // Example: + // {key="AB", value="DEF"} // added to trie as a leaf node which is now the root node. + // {key="CD", value="GHI"} // adding this node triggers codepath 1 on the first node + // + // Codepath 2: LN.ADD.1 + // This codepath is triggered when the added key is already in the trie. The existing + // leaf node is modified to change its value, and the hash is blanked, then this node is + // returned as the replacement (so, not really a replacement). + // Example: + // {key="A", value="DEF"} // added to trie as a leaf node which is now the root node. + // {key="A", value="GHI"} // adding this node triggers Codepath 2 on the first node + // + // Codepath 3: LN.ADD.2 then LN.ADD.5 then LN.ADD.6 + // This codepath is triggered when there is an existing leaf node whose key is + // a complete prefix of the added key. The result is a new branch node with the + // existing value stored in its branch value slot and a new child leaf node hanging + // off of the new branch node. The branch node is returned as the replacement node. + // Example: + // {key="A", value="DEF"} // added to trie as a leaf node which is now the root node. + // {key="AB", value="GHI"} // adding this node triggers Codepath 3 on the first node + // + // Codepath 4: LN.ADD.3 then LN.ADD.4 then LN.ADD.6 + // This codepath is triggered when the added key is a prefix of the leaf node key. + // The result is an extension node containing the shared prefix, which points to a + // new branch node containing the new key in the branch node value slot and a + // additional leaf node attached to the branch node to contain the original leaf node. + // The extension node is returned as the leaf node replacement. + // Example: + // {key="AB", value="DEF"} // added to trie as a leaf node which is now the root node. + // {key="A", value="GHI"} // adding this node triggers Codepath 4 on the first node + // + // Codepath 5: LN.ADD.3 then LN.ADD.5 then LN.ADD.6 + // This codepath is triggered when the added key shares a prefix with the existing + // leaf node, but then diverges. The leaf node is replaced with an extension node + // containing the shared prefix, which points to a new branch node, and both the original + // leaf node and the added key/value are attached to that node. The extension node + // is returned as the leaf node replacement. + // Example: + // {key="AB", value="DEF"} // added to trie as a leaf node which is now the root node. + // {key="AC", value="GHI"} // adding this node triggers Codepath 5 on the first node + // + // Codepath 6: LN.ADD.2 then LN.ADD.5 then LN.ADD.7 + // This codepath is triggered when the existing leaf node has no more keyEnd (as the + // branch slot it is attached to completes the key) but the added node still has + // additional nibbles in it. A new branch node is created, and the original leaf + // value is stored in its branch value slot, and the added key is stored in a leaf + // node attached to that new branch node. The branch node is returned as the leaf + // node replacement. + // Example: + // {key="A", value="DEF"} // added to trie as a leaf node which is now the root node. + // {key="B", value="GHI"} // added to trie, creating a branch node via codepath 1 with + // // two leaf nodes, each with no keyEnd (as they are attached to + // // the A and B slots in the new branch node) + // {key="AB", value="JKL"} // adding this node triggers codepath 6 on the leaf node in + // // the "A" slot (value DEF) + // + // Codepath 7: LN.ADD.3 then LN.ADD.4 then LN.ADD.7 + // This codepath is triggered when the added key is a prefix of the existing leaf node, + // but unlike codepath 4, the existing leaf node shares no nibbles with the added key. + // The new key is added into the value slot for a new branch node and the original key + // is stored in a leaf node attached to the branch. The branch node is returned as the + // leaf node replacement. + // Example: + // {key="AB", value="DEF"} // added to trie as a leaf node which is now the root node. + // {key="B", value="GHI"} // added to trie, creating a branch node via codepath 1, with + // // two new leaf nodes, one with keyEnd {} attached to the "B" slot (value GHI) + // // and one with keyEnd {"B"} attached to the "A" slot (value DEF) + // {key="A", value="JKL"} // adding this node triggers codepath 7 on the node with value DEF + // // from above, placing JKL in a new branch node value slot and attaching + // // the DEF node to that branch node's "B" slot. + // + if nibbles.Equal(ln.keyEnd, remainingKey) { + // The two keys are the same. Replace the value. + if ln.valueHash == valueHash { + // The two values are the same. No change, don't clear the hash. + return ln, nil + } + // LN.ADD.1 + ln.valueHash = valueHash + ln.setHash(crypto.Digest{}) + return ln, nil + } + + // Calculate the shared Nibbles between the leaf node we're on and the key we're inserting. + // sharedNibbles returns the shared slice from the first argmuent, ln.keyEnd, and is read-only. + shNibbles := nibbles.SharedPrefix(ln.keyEnd, remainingKey) + // Shift away the common Nibbles from both the keys. + shiftedLn1 := nibbles.ShiftLeft(ln.keyEnd, len(shNibbles)) + shiftedLn2 := nibbles.ShiftLeft(remainingKey, len(shNibbles)) + + // Make a branch node. + var children [16]node + branchHash := crypto.Digest{} + + // If the existing leaf node has no more Nibbles, then store it in the branch node's value slot. + if len(shiftedLn1) == 0 { + // LN.ADD.2 + branchHash = ln.valueHash + } else { + // Otherwise, make a new leaf node that shifts away one nibble, and store it in that nibble's slot + // in the branch node. + key1 := append(append(pathKey, shNibbles...), shiftedLn1[0]) + ln1 := makeLeafNode(nibbles.ShiftLeft(shiftedLn1, 1), ln.valueHash, key1) + // LN.ADD.3 + children[shiftedLn1[0]] = ln1 + } + + // Similarly, for our new insertion, if it has no more Nibbles, store it in the + // branch node's value slot. + if len(shiftedLn2) == 0 { + // LN.ADD.4 + branchHash = valueHash + } else { + // Otherwise, make a new leaf node that shifts away one + // nibble, and store it in that nibble's slot in the branch node. + key2 := pathKey[:] + key2 = append(key2, shNibbles...) + key2 = append(key2, shiftedLn2[0]) + ln2 := makeLeafNode(nibbles.ShiftLeft(shiftedLn2, 1), valueHash, key2) + // LN.ADD.5 + children[shiftedLn2[0]] = ln2 + } + bn2key := pathKey[:] + bn2key = append(bn2key, shNibbles...) + bn2 := makeBranchNode(children, branchHash, bn2key) + + if len(shNibbles) >= 1 { + // If there was more than one shared nibble, insert an extension node before the branch node. + enKey := pathKey[:] + en := makeExtensionNode(shNibbles, bn2, enKey) + // LN.ADD.6 + return en, nil + } + // LN.ADD.7 + return bn2, nil +} + +// hashing serializes the node and then hashes it, storing the hash in the node. +func (ln *leafNode) hashing() error { + if ln.hash.IsZero() { + bytes, err := ln.serialize() + if err == nil { + stats.cryptohashes++ + ln.setHash(crypto.Hash(bytes)) + } + } + return nil +} + +var lnbuffer bytes.Buffer + +// serialize creates a byte array containing an identifier prefix +// (4 if the nibble length of the keyEnd is even, 3 if it is odd) +// as well as the keyEnd and the valueHash themselves. +func (ln *leafNode) serialize() ([]byte, error) { + lnbuffer.Reset() + + prefix := byte(4) + pack, half := nibbles.Pack(ln.keyEnd) + if half { + prefix = byte(3) + } + lnbuffer.WriteByte(prefix) + lnbuffer.Write(ln.valueHash[:]) + lnbuffer.Write(pack) + return lnbuffer.Bytes(), nil +} + +// deserializeLeafNode turns a data array and its key in the trie into +// a leaf node. +func deserializeLeafNode(data []byte, key nibbles.Nibbles) *leafNode { + if data[0] != 3 && data[0] != 4 { + panic("invalid leaf node") + } + if len(data) < 1+crypto.DigestSize { + panic("data too short to be a leaf node") + } + + keyEnd := nibbles.MakeNibbles(data[(1+crypto.DigestSize):], data[0] == 3) + lnKey := key[:] + return makeLeafNode(keyEnd, crypto.Digest(data[1:(1+crypto.DigestSize)]), lnKey) +} + +// getKey gets the nibbles of the full key for this node. +func (ln *leafNode) getKey() nibbles.Nibbles { + return ln.key +} + +// getHash gets the hash for this node. If the hash has not been set by a +// hashing operation like leafNode.hashing, getHash will not calculate it +// (instead it will return the empty hash, crypto.Digest{}) +func (ln *leafNode) getHash() *crypto.Digest { + return &ln.hash +} diff --git a/crypto/statetrie/nibbles/nibbles.go b/crypto/statetrie/nibbles/nibbles.go index d2f95678fd..f2d98477e8 100644 --- a/crypto/statetrie/nibbles/nibbles.go +++ b/crypto/statetrie/nibbles/nibbles.go @@ -123,16 +123,16 @@ func Deserialize(encoding []byte) (Nibbles, error) { if length == 1 { return nil, errors.New("invalid encoding") } - ns = makeNibbles(encoding[:length-1], true) + ns = MakeNibbles(encoding[:length-1], true) } else if encoding[length-1] == evenIndicator { - ns = makeNibbles(encoding[:length-1], false) + ns = MakeNibbles(encoding[:length-1], false) } else { return nil, errors.New("invalid encoding") } return ns, nil } -// makeNibbles returns a nibble array from the byte array. If oddLength is true, +// MakeNibbles returns a nibble array from the byte array. If oddLength is true, // the last 4 bits of the last byte of the array are ignored. // // [0x12, 0x30], true -> [0x1, 0x2, 0x3] @@ -141,7 +141,7 @@ func Deserialize(encoding []byte) (Nibbles, error) { // [], false -> [] // never to be called with [], true // Allocates a new byte slice. -func makeNibbles(data []byte, oddLength bool) Nibbles { +func MakeNibbles(data []byte, oddLength bool) Nibbles { length := len(data) * 2 if oddLength { length = length - 1 diff --git a/crypto/statetrie/nibbles/nibbles_test.go b/crypto/statetrie/nibbles/nibbles_test.go index 7b355c0386..3d97dbe135 100644 --- a/crypto/statetrie/nibbles/nibbles_test.go +++ b/crypto/statetrie/nibbles/nibbles_test.go @@ -48,7 +48,7 @@ func TestNibblesRandom(t *testing.T) { if half && localRand.Intn(2) == 0 { data[len(data)-1] &= 0xf0 // sometimes clear the last nibble, sometimes do not } - nibbles := makeNibbles(data, half) + nibbles := MakeNibbles(data, half) data2 := Serialize(nibbles) nibbles2, err := Deserialize(data2) @@ -61,13 +61,13 @@ func TestNibblesRandom(t *testing.T) { packed, odd := Pack(nibbles) require.Equal(t, odd, half) require.Equal(t, packed, data) - unpacked := makeNibbles(packed, odd) + unpacked := MakeNibbles(packed, odd) require.Equal(t, nibbles, unpacked) packed, odd = Pack(nibbles2) require.Equal(t, odd, half) require.Equal(t, packed, data) - unpacked = makeNibbles(packed, odd) + unpacked = MakeNibbles(packed, odd) require.Equal(t, nibbles2, unpacked) } } @@ -130,7 +130,7 @@ func TestNibbles(t *testing.T) { require.Equal(t, oddLength == (len(n)%2 == 1), true) require.Equal(t, bytes.Equal(b, sampleNibblesPacked[i]), true) - unp := makeNibbles(b, oddLength) + unp := MakeNibbles(b, oddLength) require.Equal(t, bytes.Equal(unp, n), true) } @@ -183,10 +183,10 @@ func TestNibbles(t *testing.T) { makeNibblesTestExpected := Nibbles{0x0, 0x1, 0x2, 0x9, 0x2} makeNibblesTestData := []byte{0x01, 0x29, 0x20} - mntr := makeNibbles(makeNibblesTestData, true) + mntr := MakeNibbles(makeNibblesTestData, true) require.Equal(t, bytes.Equal(mntr, makeNibblesTestExpected), true) makeNibblesTestExpectedFW := Nibbles{0x0, 0x1, 0x2, 0x9, 0x2, 0x0} - mntr2 := makeNibbles(makeNibblesTestData, false) + mntr2 := MakeNibbles(makeNibblesTestData, false) require.Equal(t, bytes.Equal(mntr2, makeNibblesTestExpectedFW), true) sampleEqualFalse := [][]Nibbles{ diff --git a/crypto/statetrie/nodes.go b/crypto/statetrie/nodes.go new file mode 100644 index 0000000000..422b753908 --- /dev/null +++ b/crypto/statetrie/nodes.go @@ -0,0 +1,63 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "fmt" + + "github.com/algorand/go-algorand/crypto" + "github.com/algorand/go-algorand/crypto/statetrie/nibbles" +) + +// Trie nodes + +type node interface { + // add to the trie the key (represented by pathKey + remainingKey) and the value (represented + // by valueHash) into the trie provided. + add(mt *Trie, pathKey nibbles.Nibbles, remainingKey nibbles.Nibbles, valueHash crypto.Digest) (node, error) + + hashing() error // calculate the hash of the node + serialize() ([]byte, error) // serialize the node + getHash() *crypto.Digest // the hash of the node, if it has been hashed + setHash(hash crypto.Digest) // set the hash of the node + getKey() nibbles.Nibbles // the key of the node in the trie +} + +// First byte of a committed node indicates the type of node. +// +// 1 == extension, half nibble +// 2 == extension, full +// 3 == leaf, half nibble +// 4 == leaf, full +// 5 == branch +// + +func deserializeNode(nbytes []byte, key nibbles.Nibbles) node { + if len(nbytes) == 0 { + panic("deserializeNode: zero length node") + } + switch nbytes[0] { + case 1, 2: + return deserializeExtensionNode(nbytes, key) + case 3, 4: + return deserializeLeafNode(nbytes, key) + case 5: + return deserializeBranchNode(nbytes, key) + default: + panic(fmt.Sprintf("deserializeNode: invalid node type %d", nbytes[0])) + } +} diff --git a/crypto/statetrie/stats.go b/crypto/statetrie/stats.go new file mode 100644 index 0000000000..3cef28e27f --- /dev/null +++ b/crypto/statetrie/stats.go @@ -0,0 +1,65 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "fmt" +) + +// Helper class for keeping track of stats on the trie. + +type triestats struct { + dbsets int + dbgets int + dbdeletes int + cryptohashes int + makeleaves int + makeextensions int + makebranches int + makepanodes int + makebanodes int + newrootnode int + addnode int + delnode int + getnode int + evictions int +} + +var stats triestats + +func (s triestats) diff(s1 triestats) triestats { + return triestats{ + dbsets: s.dbsets - s1.dbsets, + dbgets: s.dbgets - s1.dbgets, + dbdeletes: s.dbdeletes - s1.dbdeletes, + cryptohashes: s.cryptohashes - s1.cryptohashes, + makeleaves: s.makeleaves - s1.makeleaves, + makeextensions: s.makeextensions - s1.makeextensions, + makebranches: s.makebranches - s1.makebranches, + makepanodes: s.makepanodes - s1.makepanodes, + makebanodes: s.makebanodes - s1.makebanodes, + newrootnode: s.newrootnode - s1.newrootnode, + addnode: s.addnode - s1.addnode, + delnode: s.delnode - s1.delnode, + getnode: s.getnode - s1.getnode, + evictions: s.evictions - s1.evictions, + } +} +func (s triestats) String() string { + return fmt.Sprintf("dbsets: %d, dbgets: %d, dbdeletes: %d, cryptohashes: %d, makeleaves: %d, makeextensions: %d, makebranches: %d, makepanodes: %d, makebanodes: %d, newrootnode: %d, addnode: %d, delnode: %d, getnode: %d, evictions: %d", + s.dbsets, s.dbgets, s.dbdeletes, s.cryptohashes, s.makeleaves, s.makeextensions, s.makebranches, s.makepanodes, s.makebanodes, s.newrootnode, s.addnode, s.delnode, s.getnode, s.evictions) +} diff --git a/crypto/statetrie/trie.go b/crypto/statetrie/trie.go new file mode 100644 index 0000000000..e8d8f1736e --- /dev/null +++ b/crypto/statetrie/trie.go @@ -0,0 +1,93 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "errors" + "github.com/algorand/go-algorand/crypto" + "github.com/algorand/go-algorand/crypto/statetrie/nibbles" +) + +const ( + // MaxKeyLength is the maximum key length in bytes that can be added to the trie + MaxKeyLength = 65535 +) + +// Trie is a hashable 16-way radix tree +type Trie struct { + root node +} + +// MakeTrie constructs a Trie +func MakeTrie() *Trie { + mt := &Trie{} + return mt +} + +// Hash provides the root hash for this trie. +// The root hash is the secure hash for all the nodes in the trie. +func (mt *Trie) Hash() crypto.Digest { + if mt.root == nil { + return crypto.Digest{} + } + if mt.root.getHash().IsZero() { + err := mt.root.hashing() + if err != nil { + panic(err) + } + } + return *(mt.root.getHash()) +} + +// Add adds the given key/value pair to the trie. The value stored with +// the key is immediately hashed, however parent nodes are not re-hashed +// with the new child hash until Trie.Hash() is called. +func (mt *Trie) Add(key nibbles.Nibbles, value []byte) (err error) { + if len(key) == 0 { + return errors.New("empty key not allowed") + } + + if len(key) > MaxKeyLength { + return errors.New("key too long") + } + + if mt.root == nil { + // If there are no nodes in the trie, make a leaf node for this + // key/value pair and return. + stats.cryptohashes++ + stats.newrootnode++ + mt.root = makeLeafNode(key, crypto.Hash(value), nibbles.Nibbles{}) + return nil + } + + // Add the key/value pair to the trie, and replace the root node with the + // new modified node that results from the operation. If the root node has + // no hash, then the key/value pair resulted in a new root hash (i.e. it was + // not a duplicate key/value pair) + stats.cryptohashes++ + replacement, err := mt.root.add(mt, nibbles.Nibbles{}, key, crypto.Hash(value)) + if err != nil { + return err + } + if replacement.getHash().IsZero() { + stats.newrootnode++ + } + + // Replace the root with the replacement node. + mt.root = replacement + return nil +} diff --git a/crypto/statetrie/trie_test.go b/crypto/statetrie/trie_test.go new file mode 100644 index 0000000000..269bea4fb2 --- /dev/null +++ b/crypto/statetrie/trie_test.go @@ -0,0 +1,457 @@ +// Copyright (C) 2019-2024 Algorand, Inc. +// This file is part of go-algorand +// +// go-algorand is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// go-algorand is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with go-algorand. If not, see . + +package statetrie + +import ( + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/algorand/go-algorand/crypto" + "github.com/algorand/go-algorand/crypto/statetrie/nibbles" + "github.com/algorand/go-algorand/test/partitiontest" +) + +func makenode(hash crypto.Digest) node { + return makeBackingNode(hash, []byte{0x01, 0x02, 0x03, 0x04}) +} + +func TestNodeSerialization(t *testing.T) { + ln := &leafNode{} + ln.keyEnd = []byte("leafendkey") + for i := range ln.keyEnd { + ln.keyEnd[i] &= 0x0f + } + ln.valueHash = crypto.Hash([]byte("leafvalue")) + ln.key = nibbles.Nibbles{0x0, 0x1, 0x2, 0x3} + data, err := ln.serialize() + require.NoError(t, err) + expected := []byte{0x4, 0x9a, 0xf2, 0xee, 0x24, 0xf9, 0xd3, 0xde, 0x8d, 0xdb, 0x45, 0x71, 0x82, 0x90, 0xca, 0x38, 0x42, 0xad, 0x8e, 0xcf, 0x81, 0x56, 0x17, 0x16, 0x55, 0x42, 0x73, 0x6, 0xaa, 0xd0, 0x16, 0x87, 0x45, 0xc5, 0x16, 0x5e, 0x4b, 0x59} + require.Equal(t, expected, data) + + ln.keyEnd = []byte("leafendke") + for i := range ln.keyEnd { + ln.keyEnd[i] &= 0x0f + } + data, err = ln.serialize() + require.NoError(t, err) + expected = []byte{0x3, 0x9a, 0xf2, 0xee, 0x24, 0xf9, 0xd3, 0xde, 0x8d, 0xdb, 0x45, 0x71, 0x82, 0x90, 0xca, 0x38, 0x42, 0xad, 0x8e, 0xcf, 0x81, 0x56, 0x17, 0x16, 0x55, 0x42, 0x73, 0x6, 0xaa, 0xd0, 0x16, 0x87, 0x45, 0xc5, 0x16, 0x5e, 0x4b, 0x50} + require.Equal(t, expected, data) + + bn := &branchNode{} + bn.key = nibbles.Nibbles{0x01, 0x02, 0x03} + bn.children[0] = makenode(crypto.Hash([]byte("branchchild0"))) + bn.children[1] = makenode(crypto.Hash([]byte("branchchild1"))) + bn.children[2] = makenode(crypto.Hash([]byte("branchchild2"))) + bn.children[3] = makenode(crypto.Hash([]byte("branchchild3"))) + bn.children[4] = makenode(crypto.Hash([]byte("branchchild4"))) + bn.children[5] = makenode(crypto.Hash([]byte("branchchild5"))) + bn.children[6] = makenode(crypto.Hash([]byte("branchchild6"))) + bn.children[7] = makenode(crypto.Hash([]byte("branchchild7"))) + bn.children[8] = makenode(crypto.Hash([]byte("branchchild8"))) + bn.children[9] = makenode(crypto.Hash([]byte("branchchild9"))) + bn.children[10] = makenode(crypto.Hash([]byte("branchchild10"))) + bn.children[11] = makenode(crypto.Hash([]byte("branchchild11"))) + bn.children[12] = makenode(crypto.Hash([]byte("branchchild12"))) + bn.children[13] = makenode(crypto.Hash([]byte("branchchild13"))) + bn.children[14] = makenode(crypto.Hash([]byte("branchchild14"))) + bn.children[15] = makenode(crypto.Hash([]byte("branchchild15"))) + bn.valueHash = crypto.Hash([]byte("branchvalue")) + data, err = bn.serialize() + require.NoError(t, err) + expected = []byte{0x5, 0xe8, 0x31, 0x2c, 0x27, 0xec, 0x3d, 0x32, 0x7, 0x48, 0xab, 0x13, 0xed, 0x2f, 0x67, 0x94, 0xb3, 0x34, 0x8f, 0x1e, 0x14, 0xe5, 0xac, 0x87, 0x6e, 0x7, 0x68, 0xd6, 0xf6, 0x92, 0x99, 0x4b, 0xc8, 0x2e, 0x93, 0xde, 0xf1, 0x72, 0xc8, 0x55, 0xbb, 0x7e, 0xd1, 0x1d, 0x38, 0x6, 0xd2, 0x97, 0xd7, 0x2, 0x2, 0x86, 0x93, 0x37, 0x57, 0xce, 0xa4, 0xc5, 0x7e, 0x4c, 0xd4, 0x50, 0x94, 0x2e, 0x75, 0xeb, 0xcd, 0x9b, 0x80, 0xa2, 0xf5, 0xf3, 0x15, 0x4a, 0xf2, 0x62, 0x6, 0x7d, 0x6d, 0xdd, 0xe9, 0x20, 0xe1, 0x1a, 0x95, 0x3b, 0x2b, 0xb9, 0xc1, 0xaf, 0x3e, 0xcb, 0x72, 0x1d, 0x3f, 0xad, 0xe9, 0xa6, 0x30, 0xc6, 0xc5, 0x65, 0xf, 0x86, 0xb2, 0x3a, 0x5b, 0x47, 0xcb, 0x29, 0x31, 0xf7, 0x8a, 0xdf, 0xe0, 0x41, 0x6b, 0x11, 0xc0, 0xd, 0xbc, 0x80, 0xa7, 0x48, 0x97, 0x21, 0xbd, 0xee, 0x6f, 0x36, 0xf4, 0x7b, 0x6d, 0x68, 0xa1, 0x43, 0x31, 0x90, 0xf8, 0x56, 0x69, 0x4c, 0xee, 0x88, 0x76, 0x9c, 0xd1, 0xde, 0xe4, 0xbd, 0x64, 0x7d, 0x18, 0xce, 0xd6, 0xdb, 0xf8, 0x85, 0x84, 0x88, 0x5d, 0x7e, 0xda, 0xe0, 0xf2, 0xa0, 0x6d, 0x24, 0x4f, 0xcf, 0xb, 0x8c, 0x34, 0x57, 0x2a, 0x13, 0x22, 0xd9, 0x8d, 0x79, 0x8, 0xa4, 0x22, 0x91, 0x45, 0x64, 0x7b, 0xf3, 0xad, 0xe8, 0x9b, 0x5f, 0x7c, 0x5c, 0xbd, 0x9, 0xd3, 0xc7, 0x3, 0xe2, 0xef, 0x6b, 0x8, 0x8, 0x98, 0x52, 0xb, 0xd1, 0x6a, 0x5a, 0x18, 0x89, 0x44, 0x4f, 0xf1, 0xb0, 0x37, 0xd9, 0x7f, 0x99, 0x3f, 0x6a, 0x84, 0x46, 0x83, 0x2c, 0x91, 0x58, 0xa8, 0xb3, 0xda, 0xd8, 0x26, 0x2e, 0x8a, 0x4, 0x8f, 0x81, 0xa5, 0xf3, 0xef, 0x46, 0x34, 0x4a, 0x8f, 0x6a, 0x61, 0x2f, 0x3, 0x26, 0x9d, 0xe6, 0x77, 0xee, 0xec, 0xe2, 0xa4, 0x84, 0x38, 0x6b, 0x6e, 0x7e, 0xf0, 0xef, 0xaa, 0x29, 0xa5, 0x13, 0x0, 0xef, 0xff, 0xdf, 0xb5, 0xd7, 0x4e, 0x41, 0x75, 0x4d, 0x2, 0x84, 0x20, 0xe2, 0x18, 0x50, 0x52, 0xae, 0xf4, 0xea, 0xeb, 0x84, 0xb3, 0x91, 0x85, 0xa8, 0xa, 0xba, 0xc9, 0x31, 0x9f, 0x5e, 0x3e, 0xf8, 0xb5, 0xf4, 0x4b, 0xf8, 0xf2, 0xf0, 0x76, 0xa1, 0x6d, 0xec, 0x57, 0x65, 0xbd, 0x2e, 0x78, 0xbe, 0xf4, 0x7c, 0xe4, 0xf2, 0x45, 0xc0, 0xaf, 0x94, 0xb, 0x45, 0x1b, 0xd3, 0xcf, 0x9f, 0x17, 0x7e, 0x1a, 0x52, 0x6d, 0x18, 0xe5, 0x1a, 0x7c, 0xd9, 0x9d, 0xef, 0x8a, 0xe3, 0xe9, 0xe6, 0xf6, 0x76, 0x5e, 0x12, 0xbf, 0xd2, 0xe8, 0xaa, 0x8, 0x88, 0x15, 0x81, 0x99, 0x4e, 0xa3, 0x12, 0x98, 0xc1, 0xb3, 0xde, 0x42, 0x53, 0x2, 0x29, 0x82, 0x87, 0xfe, 0x3d, 0x8, 0xe0, 0xc2, 0x3, 0x70, 0x56, 0xd, 0x9, 0xad, 0xe4, 0x1a, 0xa5, 0xf6, 0x4, 0xdb, 0x63, 0xd0, 0x49, 0x6b, 0x5b, 0xa2, 0x56, 0xb1, 0xd1, 0x4b, 0x56, 0xc3, 0x7e, 0x4b, 0xec, 0xb5, 0xdb, 0xd4, 0xd9, 0xe1, 0x20, 0x99, 0x80, 0x71, 0x9, 0x72, 0x3b, 0xc, 0x8b, 0x56, 0x4, 0x94, 0xe6, 0x4e, 0x35, 0xd, 0x3e, 0x7, 0x8b, 0x86, 0x73, 0x62, 0x5f, 0x61, 0x8d, 0x70, 0x68, 0x86, 0xe8, 0x65, 0xbe, 0x18, 0xa8, 0x4a, 0xac, 0x6d, 0x81, 0x15, 0xde, 0x1b, 0xe1, 0xb3, 0xe8, 0x6a, 0x46, 0xdf, 0xdc, 0xf1, 0x6, 0x3c, 0xa6, 0x1c, 0xc9, 0xcd, 0x12, 0x5e, 0x5f, 0x28, 0xd1, 0x71, 0x6e, 0x9f, 0xc7, 0xdc, 0x77, 0x98, 0x47, 0x7, 0x94, 0x38, 0x4, 0xc4, 0xc4, 0xfe, 0x17, 0x12, 0x1b, 0xcf, 0x96, 0xd8, 0xb1, 0xf2, 0x1e, 0x81, 0xab, 0x15, 0x86, 0x75, 0x5a, 0x39, 0x13, 0xdb, 0xe, 0x1a, 0xd9, 0xa9, 0x70, 0x7d, 0xdd, 0xaf, 0x64, 0x12, 0x27, 0xe5, 0x97, 0xa1, 0x34, 0xb8, 0x1a, 0x61, 0x48, 0x29, 0x61, 0x62, 0xe4, 0x40, 0xba, 0x5, 0x44, 0x24, 0x51, 0xc1, 0x9b, 0x8e, 0x62, 0xf2, 0x1c, 0x6f, 0xd6, 0x8, 0x3, 0xbe, 0x88, 0xf} + require.Equal(t, expected, data) + + bn.children[7] = nil + data, err = bn.serialize() + require.NoError(t, err) + expected = []byte{0x5, 0xe8, 0x31, 0x2c, 0x27, 0xec, 0x3d, 0x32, 0x7, 0x48, 0xab, 0x13, 0xed, 0x2f, 0x67, 0x94, 0xb3, 0x34, 0x8f, 0x1e, 0x14, 0xe5, 0xac, 0x87, 0x6e, 0x7, 0x68, 0xd6, 0xf6, 0x92, 0x99, 0x4b, 0xc8, 0x2e, 0x93, 0xde, 0xf1, 0x72, 0xc8, 0x55, 0xbb, 0x7e, 0xd1, 0x1d, 0x38, 0x6, 0xd2, 0x97, 0xd7, 0x2, 0x2, 0x86, 0x93, 0x37, 0x57, 0xce, 0xa4, 0xc5, 0x7e, 0x4c, 0xd4, 0x50, 0x94, 0x2e, 0x75, 0xeb, 0xcd, 0x9b, 0x80, 0xa2, 0xf5, 0xf3, 0x15, 0x4a, 0xf2, 0x62, 0x6, 0x7d, 0x6d, 0xdd, 0xe9, 0x20, 0xe1, 0x1a, 0x95, 0x3b, 0x2b, 0xb9, 0xc1, 0xaf, 0x3e, 0xcb, 0x72, 0x1d, 0x3f, 0xad, 0xe9, 0xa6, 0x30, 0xc6, 0xc5, 0x65, 0xf, 0x86, 0xb2, 0x3a, 0x5b, 0x47, 0xcb, 0x29, 0x31, 0xf7, 0x8a, 0xdf, 0xe0, 0x41, 0x6b, 0x11, 0xc0, 0xd, 0xbc, 0x80, 0xa7, 0x48, 0x97, 0x21, 0xbd, 0xee, 0x6f, 0x36, 0xf4, 0x7b, 0x6d, 0x68, 0xa1, 0x43, 0x31, 0x90, 0xf8, 0x56, 0x69, 0x4c, 0xee, 0x88, 0x76, 0x9c, 0xd1, 0xde, 0xe4, 0xbd, 0x64, 0x7d, 0x18, 0xce, 0xd6, 0xdb, 0xf8, 0x85, 0x84, 0x88, 0x5d, 0x7e, 0xda, 0xe0, 0xf2, 0xa0, 0x6d, 0x24, 0x4f, 0xcf, 0xb, 0x8c, 0x34, 0x57, 0x2a, 0x13, 0x22, 0xd9, 0x8d, 0x79, 0x8, 0xa4, 0x22, 0x91, 0x45, 0x64, 0x7b, 0xf3, 0xad, 0xe8, 0x9b, 0x5f, 0x7c, 0x5c, 0xbd, 0x9, 0xd3, 0xc7, 0x3, 0xe2, 0xef, 0x6b, 0x8, 0x8, 0x98, 0x52, 0xb, 0xd1, 0x6a, 0x5a, 0x18, 0x89, 0x44, 0x4f, 0xf1, 0xb0, 0x37, 0xd9, 0x7f, 0x99, 0x3f, 0x6a, 0x84, 0x46, 0x83, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x84, 0x38, 0x6b, 0x6e, 0x7e, 0xf0, 0xef, 0xaa, 0x29, 0xa5, 0x13, 0x0, 0xef, 0xff, 0xdf, 0xb5, 0xd7, 0x4e, 0x41, 0x75, 0x4d, 0x2, 0x84, 0x20, 0xe2, 0x18, 0x50, 0x52, 0xae, 0xf4, 0xea, 0xeb, 0x84, 0xb3, 0x91, 0x85, 0xa8, 0xa, 0xba, 0xc9, 0x31, 0x9f, 0x5e, 0x3e, 0xf8, 0xb5, 0xf4, 0x4b, 0xf8, 0xf2, 0xf0, 0x76, 0xa1, 0x6d, 0xec, 0x57, 0x65, 0xbd, 0x2e, 0x78, 0xbe, 0xf4, 0x7c, 0xe4, 0xf2, 0x45, 0xc0, 0xaf, 0x94, 0xb, 0x45, 0x1b, 0xd3, 0xcf, 0x9f, 0x17, 0x7e, 0x1a, 0x52, 0x6d, 0x18, 0xe5, 0x1a, 0x7c, 0xd9, 0x9d, 0xef, 0x8a, 0xe3, 0xe9, 0xe6, 0xf6, 0x76, 0x5e, 0x12, 0xbf, 0xd2, 0xe8, 0xaa, 0x8, 0x88, 0x15, 0x81, 0x99, 0x4e, 0xa3, 0x12, 0x98, 0xc1, 0xb3, 0xde, 0x42, 0x53, 0x2, 0x29, 0x82, 0x87, 0xfe, 0x3d, 0x8, 0xe0, 0xc2, 0x3, 0x70, 0x56, 0xd, 0x9, 0xad, 0xe4, 0x1a, 0xa5, 0xf6, 0x4, 0xdb, 0x63, 0xd0, 0x49, 0x6b, 0x5b, 0xa2, 0x56, 0xb1, 0xd1, 0x4b, 0x56, 0xc3, 0x7e, 0x4b, 0xec, 0xb5, 0xdb, 0xd4, 0xd9, 0xe1, 0x20, 0x99, 0x80, 0x71, 0x9, 0x72, 0x3b, 0xc, 0x8b, 0x56, 0x4, 0x94, 0xe6, 0x4e, 0x35, 0xd, 0x3e, 0x7, 0x8b, 0x86, 0x73, 0x62, 0x5f, 0x61, 0x8d, 0x70, 0x68, 0x86, 0xe8, 0x65, 0xbe, 0x18, 0xa8, 0x4a, 0xac, 0x6d, 0x81, 0x15, 0xde, 0x1b, 0xe1, 0xb3, 0xe8, 0x6a, 0x46, 0xdf, 0xdc, 0xf1, 0x6, 0x3c, 0xa6, 0x1c, 0xc9, 0xcd, 0x12, 0x5e, 0x5f, 0x28, 0xd1, 0x71, 0x6e, 0x9f, 0xc7, 0xdc, 0x77, 0x98, 0x47, 0x7, 0x94, 0x38, 0x4, 0xc4, 0xc4, 0xfe, 0x17, 0x12, 0x1b, 0xcf, 0x96, 0xd8, 0xb1, 0xf2, 0x1e, 0x81, 0xab, 0x15, 0x86, 0x75, 0x5a, 0x39, 0x13, 0xdb, 0xe, 0x1a, 0xd9, 0xa9, 0x70, 0x7d, 0xdd, 0xaf, 0x64, 0x12, 0x27, 0xe5, 0x97, 0xa1, 0x34, 0xb8, 0x1a, 0x61, 0x48, 0x29, 0x61, 0x62, 0xe4, 0x40, 0xba, 0x5, 0x44, 0x24, 0x51, 0xc1, 0x9b, 0x8e, 0x62, 0xf2, 0x1c, 0x6f, 0xd6, 0x8, 0x3, 0xbe, 0x88, 0xf} + + require.Equal(t, expected, data) + + en := &extensionNode{} + en.key = nibbles.Nibbles{0x01, 0x02, 0x03} + en.sharedKey = []byte("extensionkey") + for i := range en.sharedKey { + en.sharedKey[i] &= 0x0f + } + en.next = makenode(crypto.Hash([]byte("extensionnext"))) + data, err = en.serialize() + require.NoError(t, err) + expected = []byte{0x2, 0xea, 0x24, 0x1a, 0x68, 0x6c, 0x5, 0xc8, 0x4, 0xda, 0x0, 0x66, 0x76, 0x8e, 0xb, 0x1d, 0x12, 0x7c, 0x82, 0x7f, 0x5f, 0xc5, 0x81, 0x97, 0x6c, 0x9c, 0xf0, 0xe6, 0xf2, 0x42, 0x33, 0xa, 0xad, 0x58, 0x45, 0xe3, 0x9f, 0xeb, 0x59} + require.Equal(t, expected, data) + en2 := deserializeExtensionNode(data, nibbles.Nibbles{0x01, 0x02, 0x03}) + require.NoError(t, err) + en2data, err := en2.serialize() + require.NoError(t, err) + require.Equal(t, en2data, data) + en.sharedKey = []byte("extensionke") // [5 8 4 5 14 3 9 15 14 11 5] + for i := range en.sharedKey { + en.sharedKey[i] &= 0x0f + } + fmt.Println("extensionke: ", en.sharedKey) + data, err = en.serialize() + require.NoError(t, err) + expected = []byte{0x1, 0xea, 0x24, 0x1a, 0x68, 0x6c, 0x5, 0xc8, 0x4, 0xda, 0x0, 0x66, 0x76, 0x8e, 0xb, 0x1d, 0x12, 0x7c, 0x82, 0x7f, 0x5f, 0xc5, 0x81, 0x97, 0x6c, 0x9c, 0xf0, 0xe6, 0xf2, 0x42, 0x33, 0xa, 0xad, 0x58, 0x45, 0xe3, 0x9f, 0xeb, 0x50} + + require.Equal(t, expected, data) + en3 := deserializeExtensionNode(data, nibbles.Nibbles{0x01, 0x02, 0x03}) + require.NoError(t, err) + en3data, err := en3.serialize() + require.NoError(t, err) + require.Equal(t, en3data, data) +} + +func buildDotGraph(t *testing.T, mt *Trie, keys [][]byte, values [][]byte, fn string, title string) { + dot := dotGraph(mt, keys, values, title) + file, err := os.Create(fn) + require.NoError(t, err) + defer file.Close() + _, err = file.WriteString(dot) +} + +func TestTrieAddSimpleSequenceNoCache(t *testing.T) { + partitiontest.PartitionTest(t) + t.Parallel() + + mt := MakeTrie() + var k []byte + var v []byte + var kk [][]byte + var vv [][]byte + k = []byte{0x01, 0x02, 0x03} + v = []byte{0x04, 0x05, 0x06} + kk = append(kk, k) + vv = append(vv, v) + + mt.Add(k, v) + v = []byte{0x04, 0x05, 0x07} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + v = []byte{0x04, 0x05, 0x09} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + k = []byte{0x01, 0x02} + v = []byte{0x04, 0x05, 0x09} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + k = []byte{0x01, 0x02} + v = []byte{0x04, 0x05, 0x0a} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + k = []byte{0x01, 0x02, 0x03, 0x04} + v = []byte{0x04, 0x05, 0x0b} + kk = append(kk, k) + vv = append(vv, v) + k = []byte{0x01, 0x02, 0x03, 0x06, 0x06, 0x07, 0x06} + v = []byte{0x04, 0x05, 0x0c} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + k = []byte{0x01, 0x0d, 0x02, 0x03, 0x06, 0x06, 0x07, 0x06} + v = []byte{0x04, 0x05, 0x0c} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, t.TempDir()+"trieSimpleSequence.dot", "Trie Simple Sequence") +} + +func TestTrieLeafAddTransitions(t *testing.T) { + partitiontest.PartitionTest(t) + t.Parallel() + + var mt *Trie + var err error + + tempDir := t.TempDir() + + rootHashCP1s0, _ := crypto.DigestFromString(string("EJKKWSBR6ND6FWXTHTKIANNHEAR5JZHNK4DCQIRAUN4K65F4P6KA")) + rootHashCP1s1, _ := crypto.DigestFromString(string("LTB7VIEISQQD5DN6WBKVD3ZB2BBXEFQVC7AVU7PXFFS6ZEU2ZQIQ")) + rootHashCP2s0, _ := crypto.DigestFromString(string("FFQBBL5UTISGD3D226DE3MBE57Z4F5J4KI5LWBZD35MHKOG2HWWQ")) + rootHashCP2s2, _ := crypto.DigestFromString(string("3HXXEYIIWJHMQTGQF6SRREUM3BHYFC5DAIZ3YPRH73P2IAMV32LA")) + rootHashCP3s0, _ := crypto.DigestFromString(string("FFQBBL5UTISGD3D226DE3MBE57Z4F5J4KI5LWBZD35MHKOG2HWWQ")) + rootHashCP3s2, _ := crypto.DigestFromString(string("YEWXAECRC4UKLBV6TJJGQTE7Z5BMUWLO27CAIXV4QY5LYBFZCGGA")) + rootHashCP4s0, _ := crypto.DigestFromString(string("EJKKWSBR6ND6FWXTHTKIANNHEAR5JZHNK4DCQIRAUN4K65F4P6KA")) + rootHashCP4s2, _ := crypto.DigestFromString(string("VRSSXJ74NVQXRBQOIOIIQHRX5D6WEWBG3LKGKHEOYQPA7TEQ2U6A")) + rootHashCP5s0, _ := crypto.DigestFromString(string("EJKKWSBR6ND6FWXTHTKIANNHEAR5JZHNK4DCQIRAUN4K65F4P6KA")) + rootHashCP5s2, _ := crypto.DigestFromString(string("FS3AP7ELN3VXLPY7NZTU7DOX5YOV2E4IBK2PXQIU562EVFZAAP5Q")) + rootHashCP6s0, _ := crypto.DigestFromString(string("FFQBBL5UTISGD3D226DE3MBE57Z4F5J4KI5LWBZD35MHKOG2HWWQ")) + rootHashCP6s1, _ := crypto.DigestFromString(string("O5OVEUEVNBYQW4USTIBAQDU2JHNGUWGLNCVQXSS3NPUONVZ2LM6A")) + rootHashCP6s2, _ := crypto.DigestFromString(string("DHL5EA3QIAQSNKHRUZBUBNYEY52HFHOOSX2CWDQDQCQORXJZRP2Q")) + rootHashCP7s0, _ := crypto.DigestFromString(string("EJKKWSBR6ND6FWXTHTKIANNHEAR5JZHNK4DCQIRAUN4K65F4P6KA")) + rootHashCP7s1, _ := crypto.DigestFromString(string("OHSKGDYIVZR34PLQ5YI7E7KYLLA5SFUDNN324YSHTPCLQB2WGXZA")) + rootHashCP7s2, _ := crypto.DigestFromString(string("LLXHTSABWNRPDEV5Z7JB3OV4R576AZQ57UIODWRCOMN2I6ELMSTQ")) + + keyA := []byte{0x01} + keyB := []byte{0x02} + keyAB := []byte{0x01, 0x02} + keyAC := []byte{0x01, 0x03} + keyCD := []byte{0x03, 0x04} + valDEF := []byte{0x04, 0x05, 0x06} + valGHI := []byte{0x07, 0x08, 0x09} + valJKL := []byte{0x0a, 0x0b, 0x0c} + + var kk [][]byte + var vv [][]byte + + mt = MakeTrie() + kk = [][]byte{} + vv = [][]byte{} + mt.Add(keyAB, valDEF) + kk = append(kk, keyAB) + vv = append(vv, valDEF) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath1.0.dot", "Leaf add: Codepath 7, before") + require.Equal(t, rootHashCP1s0, mt.Hash()) + mt.Add(keyCD, valGHI) + kk = append(kk, keyCD) + vv = append(vv, valGHI) + require.Equal(t, rootHashCP1s1, mt.Hash()) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath1.2.dot", "Leaf add: Codepath 7, after") + + require.NoError(t, err) + + mt = MakeTrie() + kk = [][]byte{} + vv = [][]byte{} + mt.Add(keyA, valDEF) + kk = append(kk, keyA) + vv = append(vv, valDEF) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath2.0.dot", "Leaf add: Codepath 2, before") + require.Equal(t, rootHashCP2s0, mt.Hash()) + + mt.Add(keyA, valGHI) + kk = [][]byte{} + vv = [][]byte{} + kk = append(kk, keyA) + vv = append(vv, valGHI) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath2.2.dot", "Leaf add: Codepath 2, after") + require.Equal(t, rootHashCP2s2, mt.Hash()) + + mt = MakeTrie() + kk = [][]byte{} + vv = [][]byte{} + mt.Add(keyA, valDEF) + kk = append(kk, keyA) + vv = append(vv, valDEF) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath3.0.dot", "Leaf add: Codepath 3, before") + require.Equal(t, rootHashCP3s0, mt.Hash()) + mt.Add(keyAB, valGHI) + kk = append(kk, keyAB) + vv = append(vv, valGHI) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath3.2.dot", "Leaf add: Codepath 3, after") + require.Equal(t, rootHashCP3s2, mt.Hash()) + + mt = MakeTrie() + kk = [][]byte{} + vv = [][]byte{} + mt.Add(keyAB, valDEF) + kk = append(kk, keyAB) + vv = append(vv, valDEF) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath4.0.dot", "Leaf add: Codepath 4, before") + require.Equal(t, rootHashCP4s0, mt.Hash()) + mt.Add(keyA, valGHI) + kk = append(kk, keyA) + vv = append(vv, valGHI) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath4.2.dot", "Leaf add: Codepath 4, after") + require.Equal(t, rootHashCP4s2, mt.Hash()) + + mt = MakeTrie() + kk = [][]byte{} + vv = [][]byte{} + mt.Add(keyAB, valDEF) + kk = append(kk, keyAB) + vv = append(vv, valDEF) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath5.0.dot", "Leaf add: Codepath 5, before") + require.Equal(t, rootHashCP5s0, mt.Hash()) + mt.Add(keyAC, valGHI) + kk = append(kk, keyAC) + vv = append(vv, valGHI) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath5.2.dot", "Leaf add: Codepath 5, after") + require.Equal(t, rootHashCP5s2, mt.Hash()) + + mt = MakeTrie() + kk = [][]byte{} + vv = [][]byte{} + mt.Add(keyA, valDEF) + kk = append(kk, keyA) + vv = append(vv, valDEF) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath6.0.dot", "Leaf add: Codepath 6, setup") + require.Equal(t, rootHashCP6s0, mt.Hash()) + mt.Add(keyB, valGHI) + kk = append(kk, keyB) + vv = append(vv, valGHI) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath6.1.dot", "Leaf add: Codepath 6, before") + require.Equal(t, rootHashCP6s1, mt.Hash()) + mt.Add(keyAB, valJKL) + kk = append(kk, keyAB) + vv = append(vv, valJKL) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath6.2.dot", "Leaf add: Codepath 6, after") + require.Equal(t, rootHashCP6s2, mt.Hash()) + + mt = MakeTrie() + kk = [][]byte{} + vv = [][]byte{} + mt.Add(keyAB, valDEF) + kk = append(kk, keyAB) + vv = append(vv, valDEF) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath7.0.dot", "Leaf add: Codepath 7, setup") + require.Equal(t, rootHashCP7s0, mt.Hash()) + mt.Add(keyB, valGHI) + kk = append(kk, keyB) + vv = append(vv, valGHI) + require.Equal(t, rootHashCP7s1, mt.Hash()) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath7.1.dot", "Leaf add: Codepath 7, before") + mt.Add(keyA, valJKL) + kk = append(kk, keyA) + vv = append(vv, valJKL) + require.Equal(t, rootHashCP7s2, mt.Hash()) + buildDotGraph(t, mt, kk, vv, tempDir+"codepath7.2.dot", "Leaf add: Codepath 7, after") + +} + +func TestTrieAddSimpleSequence(t *testing.T) { + partitiontest.PartitionTest(t) + t.Parallel() + tempDir := t.TempDir() + + mt := MakeTrie() + var k []byte + var v []byte + var kk [][]byte + var vv [][]byte + k = []byte{0x01, 0x02, 0x03} + v = []byte{0x03, 0x05, 0x06} + kk = append(kk, k) + vv = append(vv, v) + + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie0.dot", "Trie Simple") + fmt.Printf("done with that") + + v = []byte{0x04, 0x05, 0x07} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + + buildDotGraph(t, mt, kk, vv, tempDir+"trie2.dot", "Trie Simple") + v = []byte{0x04, 0x05, 0x09} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie3.dot", "Trie Simple") + + k = []byte{0x01, 0x02} + v = []byte{0x04, 0x05, 0x09} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie4.dot", "Trie Simple") + + k = []byte{0x01, 0x02} + v = []byte{0x04, 0x05, 0x0a} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie5.dot", "Trie Simple") + + k = []byte{0x01, 0x02, 0x03, 0x04} + v = []byte{0x04, 0x05, 0x0b} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie6.dot", "Trie Simple") + + k = []byte{0x01, 0x02, 0x03, 0x06, 0x06, 0x07, 0x06} + v = []byte{0x04, 0x05, 0x0c} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie7.dot", "Trie Simple") + + k = []byte{0x01, 0x0d, 0x02, 0x03, 0x06, 0x06, 0x07, 0x06} + v = []byte{0x04, 0x05, 0x0c} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie8.dot", "Trie Simple") + + //duplicate key and value + k = []byte{0x01, 0x0d, 0x02, 0x03, 0x06, 0x06, 0x07, 0x06} + v = []byte{0x04, 0x05, 0x0c} + kk = append(kk, k) + vv = append(vv, v) + mt.Add(k, v) + buildDotGraph(t, mt, kk, vv, tempDir+"trie9.dot", "Trie Simple") +} + +// DotGraph returns a dot graph of the trie +func dotGraph(mt *Trie, keysAdded [][]byte, valuesAdded [][]byte, title string) string { + var keys string + for i := 0; i < len(keysAdded); i++ { + keys += fmt.Sprintf("%x = %x\\n", keysAdded[i], valuesAdded[i]) + } + fmt.Printf("root: %v\n", mt.root) + return fmt.Sprintf("digraph trie { key [shape=box, label=\"%s\\nkey/value inserted:\\n%s\"];\n %s }\n", title, keys, dotGraphHelper(mt, mt.root, nibbles.Nibbles{})) +} + +// dot graph generation helper +func dotGraphHelper(mt *Trie, n node, path nibbles.Nibbles) string { + + switch tn := n.(type) { + case *backingNode: + return "backingnode" + case *leafNode: + ln := tn + return fmt.Sprintf("n%p [label=\"leaf\\nkeyEnd:%x\\nvalueHash:%s\" shape=box];\n", tn, ln.keyEnd, ln.valueHash) + case *extensionNode: + en := tn + return fmt.Sprintf("n%p [label=\"extension\\nshKey:%x\" shape=box];\n", tn, en.sharedKey) + + fmt.Sprintf("n%p -> n%p;\n", en, en.next) + + dotGraphHelper(mt, en.next, append(path, en.sharedKey...)) + case *branchNode: + bn := tn + var indexesFilled string + indexesFilled = "--" + for i, ch := range bn.children { + if ch != nil { + indexesFilled += fmt.Sprintf("%x ", i) + } + } + indexesFilled += "--" + + s := fmt.Sprintf("n%p [label=\"branch\\nindexesFilled:%s\\nvalueHash:%s\" shape=box];\n", tn, indexesFilled, bn.valueHash) + for _, child := range bn.children { + if child != nil { + s += fmt.Sprintf("n%p -> n%p;\n", tn, child) + } + } + for childrenIndex, ch := range bn.children { + if ch != nil { + s += dotGraphHelper(mt, ch, append(path, byte(childrenIndex))) + } + } + return s + default: + return "" + } +}