Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMT benchmarks #40

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions amt_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
package amt

import (
"context"
"fmt"
"math/rand"
"testing"

cbor "github.com/ipfs/go-ipld-cbor"
"github.com/stretchr/testify/require"
cbg "github.com/whyrusleeping/cbor-gen"
)

type rander struct {
r *rand.Rand
}

func (r *rander) randKey(keyRange uint64) uint64 {
return r.r.Uint64() % keyRange
}

func (r *rander) randValue(datasize int) []byte {
buf := make([]byte, datasize)
rand.Read(buf)
return buf
}

func (r *rander) selectKey(keys []uint64) uint64 {
i := rand.Int() % len(keys)
return keys[i]
}

type amtParams struct {
id string
count int
datasize int
keyrange int
}

type benchCase struct {
id string
count int
bitwidth int
datasize int
keyrange int
}

var caseTable []benchCase

func init() {

bitwidths := []int{
1,
2,
3,
4,
5,
6,
7,
8,
}

amts := []amtParams{
amtParams{
id: "example.Full",
count: 5000,
datasize: 4,
keyrange: 5000,
},
amtParams{
id: "example.Sparse",
count: 5000,
datasize: 4,
keyrange: 5000000,
},
amtParams{
id: "example.AlmostFull",
count: 5000,
datasize: 4,
keyrange: 10000,
},
}

for _, a := range amts {
for _, bw := range bitwidths {
caseTable = append(caseTable,
benchCase{
id: fmt.Sprintf("%s -- bw=%d", a.id, bw),
count: a.count,
bitwidth: bw,
datasize: a.datasize,
keyrange: a.keyrange,
})
}
}
}

func fillContinuous(ctx context.Context, b *testing.B, a *Root, count uint64, dataSize int, r rander) []uint64 {
keys := make([]uint64, 0)
for i := uint64(0); i < count; i++ {
require.NoError(b, a.Set(ctx, i, r.randValue(dataSize)))
keys = append(keys, i)
}
return keys
}

func fillSparse(ctx context.Context, b *testing.B, a *Root, count int, keyrange int, dataSize int, r rander) []uint64 {
keys := make(map[uint64]struct{})
keysSlice := make([]uint64, 0)
for j := 0; j < count; j++ {
for {
key := r.randKey(uint64(keyrange))
_, dup := keys[key]
if !dup {
require.NoError(b, a.Set(ctx, key, r.randValue(dataSize)))
keys[key] = struct{}{}
keysSlice = append(keysSlice, key)
break
}
}
}
return keysSlice
}

func fill(ctx context.Context, b *testing.B, a *Root, count int, dataSize int, keyrange int, r rander) []uint64 {
if count >= keyrange {
return fillContinuous(ctx, b, a, uint64(count), dataSize, r)
} else {
return fillSparse(ctx, b, a, count, keyrange, dataSize, r)
}
}

// Note this is only intended for use measuring size as timing and memory usage
// may not be optimal to handle no duplicate writes.
func BenchmarkFill(b *testing.B) {
ctx := context.Background()
for _, t := range caseTable {
b.Run(fmt.Sprintf("%s", t.id), func(b *testing.B) {
for i := 0; i < b.N; i++ {
r := rander{rand.New(rand.NewSource(int64(i)))}
mock := newMockBlocks()
cst := cbor.NewCborStore(mock)
a, err := NewAMT(cst, UseTreeBitWidth(t.bitwidth))
require.NoError(b, err)

// Fill the tree
fill(ctx, b, a, t.count, t.datasize, t.keyrange, r)
_, err = a.Flush(ctx)
require.NoError(b, err)
b.StopTimer()
b.ReportMetric(float64(len(mock.data))/float64(t.count), "blocks")
b.ReportMetric(float64(mock.totalBlockSizes())/float64(t.count), "bytes(blockstoreSize)/entry")
binarySize, err := a.node.checkSize(ctx, cst, uint(t.bitwidth), a.height)
require.NoError(b, err)
b.ReportMetric(float64(binarySize), "binarySize")
b.ReportMetric(float64(binarySize)/float64(t.count), "bytes(amtSize)/entry")
b.StartTimer()
}
})
}
}

// 0. Fill AMT with t.count keys selected between 0 and t.keyrange.
// 1. Perform 1000 sets on a random key from t.keyrange on the base AMT
// 2. Report average over sets
func BenchmarkSetIndividual(b *testing.B) {
ctx := context.Background()
for _, t := range caseTable {
b.Run(fmt.Sprintf("%s", t.id), func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
r := rander{rand.New(rand.NewSource(int64(i)))}
mock := newMockBlocks()
cst := cbor.NewCborStore(mock)
a, err := NewAMT(cst, UseTreeBitWidth(t.bitwidth))
require.NoError(b, err)

// Initial fill
fill(ctx, b, a, t.count, t.datasize, t.keyrange, r)
aCid, err := a.Flush(ctx)
require.NoError(b, err)

mock.stats = blockstoreStats{}
b.ReportAllocs()
b.StartTimer()
for j := 0; j < 1000; j++ {
// Load AMT, perform a set at random within key range, flush
a, err = LoadAMT(ctx, cst, aCid, UseTreeBitWidth(t.bitwidth))
require.NoError(b, err)

key := r.randKey(uint64(t.keyrange))
require.NoError(b, a.Set(ctx, key, r.randValue(t.datasize)))
_, err = a.Flush(ctx)
require.NoError(b, err)
}
b.StopTimer()
b.ReportMetric(float64(mock.stats.evtcntGet)/1000, "getEvts")
b.ReportMetric(float64(mock.stats.evtcntPut)/1000, "putEvts")
b.ReportMetric(float64(mock.stats.bytesPut)/1000, "bytesPut")
}
})
}
}

func BenchmarkGetIndividual(b *testing.B) {
ctx := context.Background()

for _, t := range caseTable {
b.Run(fmt.Sprintf("%s", t.id), func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
r := rander{rand.New(rand.NewSource(int64(i)))}
mock := newMockBlocks()
cst := cbor.NewCborStore(mock)
a, err := NewAMT(cst, UseTreeBitWidth(t.bitwidth))
require.NoError(b, err)

// Initial fill
amtKeys := fill(ctx, b, a, t.count, t.datasize, t.keyrange, r)
aCid, err := a.Flush(ctx)
require.NoError(b, err)

mock.stats = blockstoreStats{}
b.ReportAllocs()
b.StartTimer()
var d cbg.Deferred
for j := 0; j < 1000; j++ {
// Load AMT, perform a set on a random existing key
a, err = LoadAMT(ctx, cst, aCid, UseTreeBitWidth(t.bitwidth))
require.NoError(b, err)
require.NoError(b, a.Get(ctx, r.selectKey(amtKeys), &d))
}
b.StopTimer()
b.ReportMetric(float64(mock.stats.evtcntGet)/float64(1000), "getEvts")
b.ReportMetric(float64(mock.stats.evtcntPut)/float64(1000), "putEvts")
}
})
}
}
35 changes: 28 additions & 7 deletions amt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,32 +40,53 @@ func TestMain(m *testing.M) {
}

type mockBlocks struct {
data map[cid.Cid]block.Block
getCount, putCount int
data map[cid.Cid]block.Block
stats blockstoreStats
}

func newMockBlocks() *mockBlocks {
return &mockBlocks{make(map[cid.Cid]block.Block), 0, 0}
return &mockBlocks{make(map[cid.Cid]block.Block),
blockstoreStats{0, 0, 0, 0},
}
}

func (mb *mockBlocks) totalBlockSizes() int {
sum := 0
for _, v := range mb.data {
sum += len(v.RawData())
}
return sum
}

func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) {
mb.stats.evtcntGet++
d, ok := mb.data[c]
mb.getCount++
if ok {
return d, nil
}
return nil, fmt.Errorf("Not Found")
}

func (mb *mockBlocks) Put(b block.Block) error {
mb.putCount++
mb.stats.evtcntPut++
mb.stats.bytesPut += len(b.RawData())
if _, exists := mb.data[b.Cid()]; exists {
mb.stats.evtcntPutDup++
}
mb.data[b.Cid()] = b
return nil
}

type blockstoreStats struct {
evtcntGet int
evtcntPut int
bytesPut int
evtcntPutDup int
}

func (mb *mockBlocks) report(b *testing.B) {
b.ReportMetric(float64(mb.getCount)/float64(b.N), "gets/op")
b.ReportMetric(float64(mb.putCount)/float64(b.N), "puts/op")
b.ReportMetric(float64(mb.stats.evtcntGet)/float64(b.N), "gets/op")
b.ReportMetric(float64(mb.stats.evtcntPut)/float64(b.N), "puts/op")
}

func TestBasicSetGet(t *testing.T) {
Expand Down
59 changes: 59 additions & 0 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,65 @@ func (n *node) flush(ctx context.Context, bs cbor.IpldStore, bitWidth uint, heig
return nd, nil
}

// checkSize computes the serialized size of the entire AMT.
// It puts and gets blocks as necessary to do this.
// This is an expensive operation and should only be used in testing and analysis.
//
// Precondition: the node has not been modified since flush. The dirty bits are
// not checked and link cid when exists is assumed to be source of truth
func (n *node) checkSize(ctx context.Context, bs cbor.IpldStore, bitWidth uint, height int) (uint64, error) {
// Get size of this node
nd := new(internal.Node)
nd.Bmap = make([]byte, bmapBytes(bitWidth))
if height == 0 {
for i, val := range n.values {
if val == nil {
continue
}
nd.Values = append(nd.Values, val)
nd.Bmap[i/8] |= 1 << (uint(i) % 8)
}
} else {
for i, ln := range n.links {
if ln == nil {
continue
}
// Precondition that no link cids are out of date applied here.
// For the current implementation this should not actually impact
// the final result as cids are all sized the same.
nd.Links = append(nd.Links, ln.cid)
nd.Bmap[i/8] |= 1 << (uint(1) % 8)
}
}
c, err := bs.Put(ctx, nd)
if err != nil {
return 0, err
}
var def cbg.Deferred
if err := bs.Get(ctx, c, &def); err != nil {
return 0, err
}
totsize := uint64(len(def.Raw))

// Recurse
for _, ln := range n.links {
if ln == nil {
continue
}
chnd, err := ln.load(ctx, bs, bitWidth, height)
if err != nil {
return 0, err
}
chsize, err := chnd.checkSize(ctx, bs, bitWidth, height-1)
if err != nil {
return 0, err
}
totsize += chsize

}
return totsize, nil
}

func (n *node) setLink(bitWidth uint, i uint64, l *link) {
if n.links == nil {
if l == nil {
Expand Down