diff --git a/mmr/leafcount.go b/mmr/leafcount.go index 5517b7e..eb4dff9 100644 --- a/mmr/leafcount.go +++ b/mmr/leafcount.go @@ -7,10 +7,50 @@ import ( // LeafCount returns the number of leaves in the largest mmr whose size is <= // the supplied size. See also [merklelog/mmr/PeakBitmap] +// +// This can safely be use to obtain the leaf index *only* when size is known to +// be a valid mmr size. Typically just before or just after calling AddHashedLeaf +// If in any doubt, instead do: +// +// leafIndex = LeafCount(FirstMMRSize(mmrIndex)) -1 func LeafCount(size uint64) uint64 { return PeaksBitmap(size) } +// FirstMMRSize returns the first complete MMRSize that contains the provided +// mmrIndex. mmrIndices are used to identify nodes. mmrSizes are the result of +// *adding* nodes to mmr's, and, because of adding the back fill nodes for the +// leaves, the range of valid sizes is not continuous. Typically, it is +// possible to "do the right thing" with just LeafCount, but its use is error +// prone because of this fact. +// +// The outputs of this function for the following mmrIndices are +// +// [1, 3, 3, 4, 7, 7, 7, 8, 10, 10, 11] +// +// 2 6 +// / \ +// 1 2 5 9 +// / \ / \ / \ +// 0 0 1 3 4 7 8 10 +func FirstMMRSize(mmrIndex uint64) uint64 { + + i := mmrIndex + h0 := IndexHeight(i) + h1 := IndexHeight(i + 1) + for h0 < h1 { + i++ + h0 = h1 + h1 = IndexHeight(i + 1) + } + + return i + 1 +} + +func LeafIndex(mmrIndex uint64) uint64 { + return LeafCount(FirstMMRSize(mmrIndex)) - 1 +} + // PeakMap returns a bit mask where a 1 corresponds to a peak and the position // of the bit is the height of that peak. The resulting value is also the count // of leaves. This is due to the binary nature of the tree. diff --git a/mmr/leafcount_test.go b/mmr/leafcount_test.go index 63a3a14..4efd2d6 100644 --- a/mmr/leafcount_test.go +++ b/mmr/leafcount_test.go @@ -73,27 +73,108 @@ func TestLeafCountFirst26(t *testing.T) { // the PeaksBitmap (which is how LeafCount works), on the intermediate // values, it terminates at the last valid mmrSize. expectLeafCounts := []uint64{ + // 0 1 .2 .3 .4 5 .6 .7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 + // 1 .2 .3 .4 5 .6 .7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 // 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9, 9, 10, 11, 11, 11, 12, 13, 13, 14, 15, + // 0, 0, 1, 2, 2, 2, 3, 4, 4, 5, 6, 6, 6, 6, 7, 8, 8, 9, 10, 10, 10, 11, 12, 12, 13, 14, 0b1, 0b1, 0b10, 0b11, 0b11, 0b11, 0b100, 0b101, 0b101, 0b110, 0b111, 0b111, 0b111, 0b111, 0b1000, 0b1001, 0b1001, 0b1010, 0b1011, 0b1011, 0b1011, 0b1100, 0b1101, 0b1101, 0b1110, 0b1111, } var leafCounts []uint64 - for mmrIndex := uint64(0); mmrIndex < 26; mmrIndex++ { + // for mmrIndex := uint64(0); mmrIndex < 26; mmrIndex++ { + for mmrIndex := uint64(0); mmrIndex < 38; mmrIndex++ { // i+1 converts from mmrIndex to mmrSize mmrSize := mmrIndex + 1 got := LeafCount(mmrSize) - assert.Equal(t, got, expectLeafCounts[mmrIndex]) + if len(expectLeafCounts) > int(mmrIndex) { + assert.Equal(t, got, expectLeafCounts[mmrIndex]) + } leafCounts = append(leafCounts, got) } for i := range leafCounts { - fmt.Printf("%04d, ", i+1) + fmt.Printf("%05d, ", i) } fmt.Printf("\n") for _, l := range leafCounts { - fmt.Printf("%04b, ", l) + fmt.Printf("%05b, ", l) + } + fmt.Printf("\n") + for _, l := range leafCounts { + fmt.Printf("%05d, ", l) + } + fmt.Printf("\n") + for _, l := range leafCounts { + fmt.Printf("%05d, ", l-1) } - fmt.Printf("\n") + +} + +func TestFirstMMRSize(t *testing.T) { + + // 3 14 + // / \ + // / \ + // / \ + // / \ + // 2 6 13 21 + // / \ / \ + // 1 2 5 9 12 17 20 24 + // / \ / \ / \ / \ / \ + // 0 0 1 3 4 7 8 10 11 15 16 18 19 22 23 25 + + // This test iterates through a sequential range of mmrIndices, the test values are the sizes we expect. + tests := []uint64{ + //0 1 2 3 4 5 6 7 8 9 10 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + 1, 3, 3, 4, 7, 7, 7, 8, 10, 10, 11, 15, 15, 15, 15, 16, 18, 18, 19, 22, 22, 22, 23, 25, 25, 26, + } + + for i, want := range tests { + t.Run(fmt.Sprintf("mmrIndex %d", i), func(t *testing.T) { + got := FirstMMRSize(uint64(i)) + if got != want { + t.Errorf("FirstMMRSize() = %v, want %v", got, want) + } + // this is to illustrate the confusion that arises from using LeafCount directly on arbitrary indices + leavesFromIndex := LeafCount(uint64(i) + 1) + leavesFromSize := LeafCount(got) + fmt.Printf("i=%02d, LeafCount(i+1) = %d, LeafCount(FirstMMRSize(i)) = %d\n", i, leavesFromIndex, leavesFromSize) + }) + } +} + +func TestLeafIndex(t *testing.T) { + + // 3 14 + // / \ + // / \ + // / \ + // / \ + // 2 6 13 21 + // / \ / \ + // 1 2 5 9 12 17 20 24 + // / \ / \ / \ / \ / \ + // 0 0 1 3 4 7 8 10 11 15 16 18 19 22 23 25 + // 0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + + // This test iterates through a sequential range of mmrIndices, the test values are the sizes we expect. + tests := []uint64{ + //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22, 23, 24, 25 + 0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9, 9, 10, 11, 11, 11, 12, 13, 13, 14, + } + + for i, want := range tests { + t.Run(fmt.Sprintf("mmrIndex %d", i), func(t *testing.T) { + got := LeafIndex(uint64(i)) + if got != want { + t.Errorf("LeafIndex(%d) = %d, want %d", i, got, want) + } + // this is to illustrate the confusion that arises from using LeafCount directly on arbitrary indices + leavesFromIndex := LeafCount(uint64(i) + 1) + fmt.Printf("i=%02d, LeafCount(i+1) = %02d, LeafIndex(i) = %02d\n", i, leavesFromIndex, got) + }) + } + } diff --git a/mmr/spurs.go b/mmr/spurs.go index 6386607..8faf9d0 100644 --- a/mmr/spurs.go +++ b/mmr/spurs.go @@ -88,7 +88,7 @@ func SpurSumHeight(height uint64) uint64 { // Due to the binary nature of the tree, the set reduction is just dividing the // current number of spurs by 2 and the count to subtract is exactly the result // of that. -func LeafMinusSpurSum(iLeaf uint64) uint64 { +func LeafMinusSpurSum(leafIndex uint64) uint64 { // XXX: TODO: I think there is a more efficient approach which recursively // splits the leaf index into perfect sub trees based on the most sig bit @@ -96,10 +96,10 @@ func LeafMinusSpurSum(iLeaf uint64) uint64 { // given it is used mostly for the much smaller logical massif connecting // tree, is fine too. - sum := iLeaf - iLeaf >>= 1 - for ; iLeaf > 0; iLeaf >>= 1 { - sum -= iLeaf + sum := leafIndex + leafIndex >>= 1 + for ; leafIndex > 0; leafIndex >>= 1 { + sum -= leafIndex } return sum } @@ -124,23 +124,23 @@ func LeafMinusSpurSum(iLeaf uint64) uint64 { // // iLeaf = 3 returns 2, iLeaf 7 returns 3, iLeaf 9 returns 1 // Notice that all the even numbered iLeaf, eg 2, 4, 6, 8 all return 0, -func SpurHeightLeaf(iLeaf uint64) uint64 { +func SpurHeightLeaf(leafIndex uint64) uint64 { // The binary tree structure means we can use the count of least significant // zero bits as a proxy for height - return uint64(bits.TrailingZeros64(iLeaf + 1)) + return uint64(bits.TrailingZeros64(leafIndex + 1)) } // TreeIndex returns the mmr index of the i'th leaf It can also be used to // calculate the sum of all the 'alpine nodes' in the mmr blobs preceding the // blob if the blob index is substituted for iLeaf -func TreeIndex(iLeaf uint64) uint64 { +func TreeIndex(leafIndex uint64) uint64 { // XXX: TODO it feels like there is a way to initialise using SpurSumHeight // then accumulate using some variation of the inner term of SpurSumHeight. // But the approach is already O(Log 2 n) ish. sum := uint64(0) - for i := iLeaf; i > 0; { + for i := leafIndex; i > 0; { height := Log2Uint64(i) + 1 sum += SpurSumHeight(height) + height half := uint64(1 << (height - 1)) diff --git a/mmr/testdb_test.go b/mmr/testdb_test.go index 1a4e7a0..0f182cf 100644 --- a/mmr/testdb_test.go +++ b/mmr/testdb_test.go @@ -101,6 +101,7 @@ func NewCanonicalTestDB(t *testing.T) *testDb { // 1 3 6 10 13 18 21 25 // / \ / \ / \ / \ / \ / \ / \ // 0 1 2 4 5 8 9 11 12 16 17 19 20 23 24 26 + // 1 . 2 3 . 4 5 . 6 7 . 8 .9 . 10 11 12 13 // the 0 based tree // 3 14