Skip to content

Commit

Permalink
fix: provide a safe way to obtain leaf index from mmrIndex (#19)
Browse files Browse the repository at this point in the history
When the merklebuilder is adding a node it is guaranteed that the before
and after mmrIndex are also valid mmrSizes. This lets it use LeafCount
to directly obtain the leafIndex of the item just added.

This led to the regular use of LeafCount as the way to get the leaf
index. It is not safe to do this in general.

This change adds FirstMMRSize which returns the size of the smallest
valid MMR which containes the node identified by mmrIndex

To provide a clear idiom for safely getting the leaf index from a node
index (mmrIndex), LeafIndex(mmrIndex) is additionaly provided as a
convenience for

  LeafCount(FirstMMRSize(mmrIndex)) - 1

AB#9551

Co-authored-by: Robin Bryce <[email protected]>
  • Loading branch information
robinbryce and Robin Bryce authored Jun 5, 2024
1 parent c09ee35 commit 6bea50d
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 14 deletions.
40 changes: 40 additions & 0 deletions mmr/leafcount.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,50 @@ import (

// LeafCount returns the number of leaves in the largest mmr whose size is <=
// the supplied size. See also [merklelog/mmr/PeakBitmap]
//
// This can safely be use to obtain the leaf index *only* when size is known to
// be a valid mmr size. Typically just before or just after calling AddHashedLeaf
// If in any doubt, instead do:
//
// leafIndex = LeafCount(FirstMMRSize(mmrIndex)) -1
func LeafCount(size uint64) uint64 {
return PeaksBitmap(size)
}

// FirstMMRSize returns the first complete MMRSize that contains the provided
// mmrIndex. mmrIndices are used to identify nodes. mmrSizes are the result of
// *adding* nodes to mmr's, and, because of adding the back fill nodes for the
// leaves, the range of valid sizes is not continuous. Typically, it is
// possible to "do the right thing" with just LeafCount, but its use is error
// prone because of this fact.
//
// The outputs of this function for the following mmrIndices are
//
// [1, 3, 3, 4, 7, 7, 7, 8, 10, 10, 11]
//
// 2 6
// / \
// 1 2 5 9
// / \ / \ / \
// 0 0 1 3 4 7 8 10
func FirstMMRSize(mmrIndex uint64) uint64 {

i := mmrIndex
h0 := IndexHeight(i)
h1 := IndexHeight(i + 1)
for h0 < h1 {
i++
h0 = h1
h1 = IndexHeight(i + 1)
}

return i + 1
}

func LeafIndex(mmrIndex uint64) uint64 {
return LeafCount(FirstMMRSize(mmrIndex)) - 1
}

// PeakMap returns a bit mask where a 1 corresponds to a peak and the position
// of the bit is the height of that peak. The resulting value is also the count
// of leaves. This is due to the binary nature of the tree.
Expand Down
91 changes: 86 additions & 5 deletions mmr/leafcount_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,27 +73,108 @@ func TestLeafCountFirst26(t *testing.T) {
// the PeaksBitmap (which is how LeafCount works), on the intermediate
// values, it terminates at the last valid mmrSize.
expectLeafCounts := []uint64{
// 0 1 .2 .3 .4 5 .6 .7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// 1 .2 .3 .4 5 .6 .7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
// 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9, 9, 10, 11, 11, 11, 12, 13, 13, 14, 15,
// 0, 0, 1, 2, 2, 2, 3, 4, 4, 5, 6, 6, 6, 6, 7, 8, 8, 9, 10, 10, 10, 11, 12, 12, 13, 14,
0b1, 0b1, 0b10, 0b11, 0b11, 0b11, 0b100, 0b101, 0b101, 0b110, 0b111, 0b111, 0b111, 0b111,
0b1000, 0b1001, 0b1001, 0b1010, 0b1011, 0b1011, 0b1011, 0b1100, 0b1101, 0b1101, 0b1110, 0b1111,
}

var leafCounts []uint64

for mmrIndex := uint64(0); mmrIndex < 26; mmrIndex++ {
// for mmrIndex := uint64(0); mmrIndex < 26; mmrIndex++ {
for mmrIndex := uint64(0); mmrIndex < 38; mmrIndex++ {
// i+1 converts from mmrIndex to mmrSize
mmrSize := mmrIndex + 1
got := LeafCount(mmrSize)
assert.Equal(t, got, expectLeafCounts[mmrIndex])
if len(expectLeafCounts) > int(mmrIndex) {
assert.Equal(t, got, expectLeafCounts[mmrIndex])
}
leafCounts = append(leafCounts, got)
}
for i := range leafCounts {
fmt.Printf("%04d, ", i+1)
fmt.Printf("%05d, ", i)
}
fmt.Printf("\n")
for _, l := range leafCounts {
fmt.Printf("%04b, ", l)
fmt.Printf("%05b, ", l)
}
fmt.Printf("\n")
for _, l := range leafCounts {
fmt.Printf("%05d, ", l)
}
fmt.Printf("\n")
for _, l := range leafCounts {
fmt.Printf("%05d, ", l-1)
}

fmt.Printf("\n")

}

func TestFirstMMRSize(t *testing.T) {

// 3 14
// / \
// / \
// / \
// / \
// 2 6 13 21
// / \ / \
// 1 2 5 9 12 17 20 24
// / \ / \ / \ / \ / \
// 0 0 1 3 4 7 8 10 11 15 16 18 19 22 23 25

// This test iterates through a sequential range of mmrIndices, the test values are the sizes we expect.
tests := []uint64{
//0 1 2 3 4 5 6 7 8 9 10 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
1, 3, 3, 4, 7, 7, 7, 8, 10, 10, 11, 15, 15, 15, 15, 16, 18, 18, 19, 22, 22, 22, 23, 25, 25, 26,
}

for i, want := range tests {
t.Run(fmt.Sprintf("mmrIndex %d", i), func(t *testing.T) {
got := FirstMMRSize(uint64(i))
if got != want {
t.Errorf("FirstMMRSize() = %v, want %v", got, want)
}
// this is to illustrate the confusion that arises from using LeafCount directly on arbitrary indices
leavesFromIndex := LeafCount(uint64(i) + 1)
leavesFromSize := LeafCount(got)
fmt.Printf("i=%02d, LeafCount(i+1) = %d, LeafCount(FirstMMRSize(i)) = %d\n", i, leavesFromIndex, leavesFromSize)
})
}
}

func TestLeafIndex(t *testing.T) {

// 3 14
// / \
// / \
// / \
// / \
// 2 6 13 21
// / \ / \
// 1 2 5 9 12 17 20 24
// / \ / \ / \ / \ / \
// 0 0 1 3 4 7 8 10 11 15 16 18 19 22 23 25
// 0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14

// This test iterates through a sequential range of mmrIndices, the test values are the sizes we expect.
tests := []uint64{
//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22, 23, 24, 25
0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9, 9, 10, 11, 11, 11, 12, 13, 13, 14,
}

for i, want := range tests {
t.Run(fmt.Sprintf("mmrIndex %d", i), func(t *testing.T) {
got := LeafIndex(uint64(i))
if got != want {
t.Errorf("LeafIndex(%d) = %d, want %d", i, got, want)
}
// this is to illustrate the confusion that arises from using LeafCount directly on arbitrary indices
leavesFromIndex := LeafCount(uint64(i) + 1)
fmt.Printf("i=%02d, LeafCount(i+1) = %02d, LeafIndex(i) = %02d\n", i, leavesFromIndex, got)
})
}

}
18 changes: 9 additions & 9 deletions mmr/spurs.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,18 @@ func SpurSumHeight(height uint64) uint64 {
// Due to the binary nature of the tree, the set reduction is just dividing the
// current number of spurs by 2 and the count to subtract is exactly the result
// of that.
func LeafMinusSpurSum(iLeaf uint64) uint64 {
func LeafMinusSpurSum(leafIndex uint64) uint64 {

// XXX: TODO: I think there is a more efficient approach which recursively
// splits the leaf index into perfect sub trees based on the most sig bit
// set, and then uses sum = 2i at each round. But this approach, especially
// given it is used mostly for the much smaller logical massif connecting
// tree, is fine too.

sum := iLeaf
iLeaf >>= 1
for ; iLeaf > 0; iLeaf >>= 1 {
sum -= iLeaf
sum := leafIndex
leafIndex >>= 1
for ; leafIndex > 0; leafIndex >>= 1 {
sum -= leafIndex
}
return sum
}
Expand All @@ -124,23 +124,23 @@ func LeafMinusSpurSum(iLeaf uint64) uint64 {
//
// iLeaf = 3 returns 2, iLeaf 7 returns 3, iLeaf 9 returns 1
// Notice that all the even numbered iLeaf, eg 2, 4, 6, 8 all return 0,
func SpurHeightLeaf(iLeaf uint64) uint64 {
func SpurHeightLeaf(leafIndex uint64) uint64 {
// The binary tree structure means we can use the count of least significant
// zero bits as a proxy for height
return uint64(bits.TrailingZeros64(iLeaf + 1))
return uint64(bits.TrailingZeros64(leafIndex + 1))
}

// TreeIndex returns the mmr index of the i'th leaf It can also be used to
// calculate the sum of all the 'alpine nodes' in the mmr blobs preceding the
// blob if the blob index is substituted for iLeaf
func TreeIndex(iLeaf uint64) uint64 {
func TreeIndex(leafIndex uint64) uint64 {

// XXX: TODO it feels like there is a way to initialise using SpurSumHeight
// then accumulate using some variation of the inner term of SpurSumHeight.
// But the approach is already O(Log 2 n) ish.

sum := uint64(0)
for i := iLeaf; i > 0; {
for i := leafIndex; i > 0; {
height := Log2Uint64(i) + 1
sum += SpurSumHeight(height) + height
half := uint64(1 << (height - 1))
Expand Down
1 change: 1 addition & 0 deletions mmr/testdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ func NewCanonicalTestDB(t *testing.T) *testDb {
// 1 3 6 10 13 18 21 25
// / \ / \ / \ / \ / \ / \ / \
// 0 1 2 4 5 8 9 11 12 16 17 19 20 23 24 26
// 1 . 2 3 . 4 5 . 6 7 . 8 .9 . 10 11 12 13

// the 0 based tree
// 3 14
Expand Down

0 comments on commit 6bea50d

Please sign in to comment.