diff --git a/allocate_test.go b/allocate_test.go index 9f08be1cf..d8dedfb7b 100644 --- a/allocate_test.go +++ b/allocate_test.go @@ -4,32 +4,36 @@ import ( "testing" "go.etcd.io/bbolt/internal/common" + "go.etcd.io/bbolt/internal/freelist" ) func TestTx_allocatePageStats(t *testing.T) { - f := newTestFreelist() - ids := []common.Pgid{2, 3} - f.readIDs(ids) + for n, f := range map[string]freelist.Interface{"hashmap": freelist.NewHashMapFreelist(), "array": freelist.NewArrayFreelist()} { + t.Run(n, func(t *testing.T) { + ids := []common.Pgid{2, 3} + f.Init(ids) - tx := &Tx{ - db: &DB{ - freelist: f, - pageSize: common.DefaultPageSize, - }, - meta: &common.Meta{}, - pages: make(map[common.Pgid]*common.Page), - } + tx := &Tx{ + db: &DB{ + freelist: f, + pageSize: common.DefaultPageSize, + }, + meta: &common.Meta{}, + pages: make(map[common.Pgid]*common.Page), + } - txStats := tx.Stats() - prePageCnt := txStats.GetPageCount() - allocateCnt := f.free_count() + txStats := tx.Stats() + prePageCnt := txStats.GetPageCount() + allocateCnt := f.FreeCount() - if _, err := tx.allocate(allocateCnt); err != nil { - t.Fatal(err) - } + if _, err := tx.allocate(allocateCnt); err != nil { + t.Fatal(err) + } - txStats = tx.Stats() - if txStats.GetPageCount() != prePageCnt+int64(allocateCnt) { - t.Errorf("Allocated %d but got %d page in stats", allocateCnt, txStats.GetPageCount()) + txStats = tx.Stats() + if txStats.GetPageCount() != prePageCnt+int64(allocateCnt) { + t.Errorf("Allocated %d but got %d page in stats", allocateCnt, txStats.GetPageCount()) + } + }) } } diff --git a/bucket.go b/bucket.go index 785ad9bd5..6371ace97 100644 --- a/bucket.go +++ b/bucket.go @@ -903,7 +903,7 @@ func (b *Bucket) free() { var tx = b.tx b.forEachPageNode(func(p *common.Page, n *node, _ int) { if p != nil { - tx.db.freelist.free(tx.meta.Txid(), p) + tx.db.freelist.Free(tx.meta.Txid(), p) } else { n.free() } diff --git a/bucket_test.go b/bucket_test.go index 3255e7b89..493d133a7 100644 --- a/bucket_test.go +++ b/bucket_test.go @@ -430,6 +430,9 @@ func TestBucket_Delete_FreelistOverflow(t *testing.T) { if reopenFreePages := db.Stats().FreePageN; freePages != reopenFreePages { t.Fatalf("expected %d free pages, got %+v", freePages, db.Stats()) } + if reopenPendingPages := db.Stats().PendingPageN; reopenPendingPages != 0 { + t.Fatalf("expected no pending pages, got %+v", db.Stats()) + } } // Ensure that deleting of non-existing key is a no-op. diff --git a/cmd/bbolt/command_version.go b/cmd/bbolt/command_version.go index 73019c798..39d756bd9 100644 --- a/cmd/bbolt/command_version.go +++ b/cmd/bbolt/command_version.go @@ -5,6 +5,7 @@ import ( "runtime" "github.com/spf13/cobra" + "go.etcd.io/bbolt/version" ) diff --git a/db.go b/db.go index 236698212..349f187ae 100644 --- a/db.go +++ b/db.go @@ -12,6 +12,7 @@ import ( berrors "go.etcd.io/bbolt/errors" "go.etcd.io/bbolt/internal/common" + fl "go.etcd.io/bbolt/internal/freelist" ) // The time elapsed between consecutive file locking attempts. @@ -133,7 +134,7 @@ type DB struct { rwtx *Tx txs []*Tx - freelist *freelist + freelist fl.Interface freelistLoad sync.Once pagePool sync.Pool @@ -418,12 +419,12 @@ func (db *DB) loadFreelist() { db.freelist = newFreelist(db.FreelistType) if !db.hasSyncedFreelist() { // Reconstruct free list by scanning the DB. - db.freelist.readIDs(db.freepages()) + db.freelist.Init(db.freepages()) } else { // Read free list from freelist page. - db.freelist.read(db.page(db.meta().Freelist())) + db.freelist.Read(db.page(db.meta().Freelist())) } - db.stats.FreePageN = db.freelist.free_count() + db.stats.FreePageN = db.freelist.FreeCount() }) } @@ -797,7 +798,7 @@ func (db *DB) beginTx() (*Tx, error) { db.txs = append(db.txs, t) n := len(db.txs) if db.freelist != nil { - db.freelist.addReadonlyTXID(t.meta.Txid()) + db.freelist.AddReadonlyTXID(t.meta.Txid()) } // Unlock the meta pages. @@ -843,7 +844,7 @@ func (db *DB) beginRWTx() (*Tx, error) { t := &Tx{writable: true} t.init(db) db.rwtx = t - db.freelist.freePages() + db.freelist.ReleasePendingPages() return t, nil } @@ -867,7 +868,7 @@ func (db *DB) removeTx(tx *Tx) { } n := len(db.txs) if db.freelist != nil { - db.freelist.removeReadonlyTXID(tx.meta.Txid()) + db.freelist.RemoveReadonlyTXID(tx.meta.Txid()) } // Unlock the meta pages. @@ -1155,7 +1156,7 @@ func (db *DB) allocate(txid common.Txid, count int) (*common.Page, error) { p.SetOverflow(uint32(count - 1)) // Use pages from the freelist if they are available. - p.SetId(db.freelist.allocate(txid, count)) + p.SetId(db.freelist.Allocate(txid, count)) if p.Id() != 0 { return p, nil } @@ -1261,6 +1262,13 @@ func (db *DB) freepages() []common.Pgid { return fids } +func newFreelist(freelistType FreelistType) fl.Interface { + if freelistType == FreelistMapType { + return fl.NewHashMapFreelist() + } + return fl.NewArrayFreelist() +} + // Options represents the options that can be set when opening a database. type Options struct { // Timeout is the amount of time to wait to obtain a file lock. diff --git a/freelist.go b/freelist.go deleted file mode 100644 index 49b6c200b..000000000 --- a/freelist.go +++ /dev/null @@ -1,371 +0,0 @@ -package bbolt - -import ( - "fmt" - "math" - "sort" - "unsafe" - - "go.etcd.io/bbolt/internal/common" -) - -// txPending holds a list of pgids and corresponding allocation txns -// that are pending to be freed. -type txPending struct { - ids []common.Pgid - alloctx []common.Txid // txids allocating the ids - lastReleaseBegin common.Txid // beginning txid of last matching releaseRange -} - -// pidSet holds the set of starting pgids which have the same span size -type pidSet map[common.Pgid]struct{} - -// freelist represents a list of all pages that are available for allocation. -// It also tracks pages that have been freed but are still in use by open transactions. -type freelist struct { - freelistType FreelistType // freelist type - ids []common.Pgid // all free and available free page ids. - readonlyTXIDs []common.Txid // all readonly transaction IDs. - allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid. - pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx. - cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids. - freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size - forwardMap map[common.Pgid]uint64 // key is start pgid, value is its span size - backwardMap map[common.Pgid]uint64 // key is end pgid, value is its span size - freePagesCount uint64 // count of free pages(hashmap version) - allocate func(txid common.Txid, n int) common.Pgid // the freelist allocate func - free_count func() int // the function which gives you free page number - mergeSpans func(ids common.Pgids) // the mergeSpan func - getFreePageIDs func() []common.Pgid // get free pgids func - readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist -} - -// newFreelist returns an empty, initialized freelist. -func newFreelist(freelistType FreelistType) *freelist { - f := &freelist{ - freelistType: freelistType, - allocs: make(map[common.Pgid]common.Txid), - pending: make(map[common.Txid]*txPending), - cache: make(map[common.Pgid]struct{}), - freemaps: make(map[uint64]pidSet), - forwardMap: make(map[common.Pgid]uint64), - backwardMap: make(map[common.Pgid]uint64), - } - - if freelistType == FreelistMapType { - f.allocate = f.hashmapAllocate - f.free_count = f.hashmapFreeCount - f.mergeSpans = f.hashmapMergeSpans - f.getFreePageIDs = f.hashmapGetFreePageIDs - f.readIDs = f.hashmapReadIDs - } else { - f.allocate = f.arrayAllocate - f.free_count = f.arrayFreeCount - f.mergeSpans = f.arrayMergeSpans - f.getFreePageIDs = f.arrayGetFreePageIDs - f.readIDs = f.arrayReadIDs - } - - return f -} - -// size returns the size of the page after serialization. -func (f *freelist) size() int { - n := f.count() - if n >= 0xFFFF { - // The first element will be used to store the count. See freelist.write. - n++ - } - return int(common.PageHeaderSize) + (int(unsafe.Sizeof(common.Pgid(0))) * n) -} - -// count returns count of pages on the freelist -func (f *freelist) count() int { - return f.free_count() + f.pending_count() -} - -// pending_count returns count of pending pages -func (f *freelist) pending_count() int { - var count int - for _, txp := range f.pending { - count += len(txp.ids) - } - return count -} - -// copyall copies a list of all free ids and all pending ids in one sorted list. -// f.count returns the minimum length required for dst. -func (f *freelist) copyall(dst []common.Pgid) { - m := make(common.Pgids, 0, f.pending_count()) - for _, txp := range f.pending { - m = append(m, txp.ids...) - } - sort.Sort(m) - common.Mergepgids(dst, f.getFreePageIDs(), m) -} - -// free releases a page and its overflow for a given transaction id. -// If the page is already free then a panic will occur. -func (f *freelist) free(txid common.Txid, p *common.Page) { - if p.Id() <= 1 { - panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.Id())) - } - - // Free page and all its overflow pages. - txp := f.pending[txid] - if txp == nil { - txp = &txPending{} - f.pending[txid] = txp - } - allocTxid, ok := f.allocs[p.Id()] - if ok { - delete(f.allocs, p.Id()) - } else if p.IsFreelistPage() { - // Freelist is always allocated by prior tx. - allocTxid = txid - 1 - } - - for id := p.Id(); id <= p.Id()+common.Pgid(p.Overflow()); id++ { - // Verify that page is not already free. - if _, ok := f.cache[id]; ok { - panic(fmt.Sprintf("page %d already freed", id)) - } - // Add to the freelist and cache. - txp.ids = append(txp.ids, id) - txp.alloctx = append(txp.alloctx, allocTxid) - f.cache[id] = struct{}{} - } -} - -// release moves all page ids for a transaction id (or older) to the freelist. -func (f *freelist) release(txid common.Txid) { - m := make(common.Pgids, 0) - for tid, txp := range f.pending { - if tid <= txid { - // Move transaction's pending pages to the available freelist. - // Don't remove from the cache since the page is still free. - m = append(m, txp.ids...) - delete(f.pending, tid) - } - } - f.mergeSpans(m) -} - -// releaseRange moves pending pages allocated within an extent [begin,end] to the free list. -func (f *freelist) releaseRange(begin, end common.Txid) { - if begin > end { - return - } - var m common.Pgids - for tid, txp := range f.pending { - if tid < begin || tid > end { - continue - } - // Don't recompute freed pages if ranges haven't updated. - if txp.lastReleaseBegin == begin { - continue - } - for i := 0; i < len(txp.ids); i++ { - if atx := txp.alloctx[i]; atx < begin || atx > end { - continue - } - m = append(m, txp.ids[i]) - txp.ids[i] = txp.ids[len(txp.ids)-1] - txp.ids = txp.ids[:len(txp.ids)-1] - txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1] - txp.alloctx = txp.alloctx[:len(txp.alloctx)-1] - i-- - } - txp.lastReleaseBegin = begin - if len(txp.ids) == 0 { - delete(f.pending, tid) - } - } - f.mergeSpans(m) -} - -// rollback removes the pages from a given pending tx. -func (f *freelist) rollback(txid common.Txid) { - // Remove page ids from cache. - txp := f.pending[txid] - if txp == nil { - return - } - var m common.Pgids - for i, pgid := range txp.ids { - delete(f.cache, pgid) - tx := txp.alloctx[i] - if tx == 0 { - continue - } - if tx != txid { - // Pending free aborted; restore page back to alloc list. - f.allocs[pgid] = tx - } else { - // Freed page was allocated by this txn; OK to throw away. - m = append(m, pgid) - } - } - // Remove pages from pending list and mark as free if allocated by txid. - delete(f.pending, txid) - f.mergeSpans(m) -} - -// freed returns whether a given page is in the free list. -func (f *freelist) freed(pgId common.Pgid) bool { - _, ok := f.cache[pgId] - return ok -} - -// read initializes the freelist from a freelist page. -func (f *freelist) read(p *common.Page) { - if !p.IsFreelistPage() { - panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.Id(), p.Typ())) - } - - ids := p.FreelistPageIds() - - // Copy the list of page ids from the freelist. - if len(ids) == 0 { - f.ids = nil - } else { - // copy the ids, so we don't modify on the freelist page directly - idsCopy := make([]common.Pgid, len(ids)) - copy(idsCopy, ids) - // Make sure they're sorted. - sort.Sort(common.Pgids(idsCopy)) - - f.readIDs(idsCopy) - } -} - -// write writes the page ids onto a freelist page. All free and pending ids are -// saved to disk since in the event of a program crash, all pending ids will -// become free. -func (f *freelist) write(p *common.Page) error { - // Combine the old free pgids and pgids waiting on an open transaction. - - // Update the header flag. - p.SetFlags(common.FreelistPageFlag) - - // The page.count can only hold up to 64k elements so if we overflow that - // number then we handle it by putting the size in the first element. - l := f.count() - if l == 0 { - p.SetCount(uint16(l)) - } else if l < 0xFFFF { - p.SetCount(uint16(l)) - data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - ids := unsafe.Slice((*common.Pgid)(data), l) - f.copyall(ids) - } else { - p.SetCount(0xFFFF) - data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - ids := unsafe.Slice((*common.Pgid)(data), l+1) - ids[0] = common.Pgid(l) - f.copyall(ids[1:]) - } - - return nil -} - -// reload reads the freelist from a page and filters out pending items. -func (f *freelist) reload(p *common.Page) { - f.read(p) - - // Build a cache of only pending pages. - pcache := make(map[common.Pgid]bool) - for _, txp := range f.pending { - for _, pendingID := range txp.ids { - pcache[pendingID] = true - } - } - - // Check each page in the freelist and build a new available freelist - // with any pages not in the pending lists. - var a []common.Pgid - for _, id := range f.getFreePageIDs() { - if !pcache[id] { - a = append(a, id) - } - } - - f.readIDs(a) -} - -// noSyncReload reads the freelist from Pgids and filters out pending items. -func (f *freelist) noSyncReload(Pgids []common.Pgid) { - // Build a cache of only pending pages. - pcache := make(map[common.Pgid]bool) - for _, txp := range f.pending { - for _, pendingID := range txp.ids { - pcache[pendingID] = true - } - } - - // Check each page in the freelist and build a new available freelist - // with any pages not in the pending lists. - var a []common.Pgid - for _, id := range Pgids { - if !pcache[id] { - a = append(a, id) - } - } - - f.readIDs(a) -} - -// reindex rebuilds the free cache based on available and pending free lists. -func (f *freelist) reindex() { - ids := f.getFreePageIDs() - f.cache = make(map[common.Pgid]struct{}, len(ids)) - for _, id := range ids { - f.cache[id] = struct{}{} - } - for _, txp := range f.pending { - for _, pendingID := range txp.ids { - f.cache[pendingID] = struct{}{} - } - } -} - -func (f *freelist) addReadonlyTXID(tid common.Txid) { - f.readonlyTXIDs = append(f.readonlyTXIDs, tid) -} - -func (f *freelist) removeReadonlyTXID(tid common.Txid) { - for i := range f.readonlyTXIDs { - if f.readonlyTXIDs[i] == tid { - last := len(f.readonlyTXIDs) - 1 - f.readonlyTXIDs[i] = f.readonlyTXIDs[last] - f.readonlyTXIDs = f.readonlyTXIDs[:last] - break - } - } -} - -type txIDx []common.Txid - -func (t txIDx) Len() int { return len(t) } -func (t txIDx) Swap(i, j int) { t[i], t[j] = t[j], t[i] } -func (t txIDx) Less(i, j int) bool { return t[i] < t[j] } - -// freePages releases any pages associated with closed read-only transactions. -func (f *freelist) freePages() { - // Free all pending pages prior to the earliest open transaction. - sort.Sort(txIDx(f.readonlyTXIDs)) - minid := common.Txid(math.MaxUint64) - if len(f.readonlyTXIDs) > 0 { - minid = f.readonlyTXIDs[0] - } - if minid > 0 { - f.release(minid - 1) - } - // Release unused txid extents. - for _, tid := range f.readonlyTXIDs { - f.releaseRange(minid, tid-1) - minid = tid + 1 - } - f.releaseRange(minid, common.Txid(math.MaxUint64)) - // Any page both allocated and freed in an extent is safe to release. -} diff --git a/freelist_test.go b/freelist_test.go deleted file mode 100644 index 5cf40bd1c..000000000 --- a/freelist_test.go +++ /dev/null @@ -1,485 +0,0 @@ -package bbolt - -import ( - "math/rand" - "os" - "reflect" - "sort" - "testing" - "unsafe" - - "go.etcd.io/bbolt/internal/common" -) - -// TestFreelistType is used as a env variable for test to indicate the backend type -const TestFreelistType = "TEST_FREELIST_TYPE" - -// Ensure that a page is added to a transaction's freelist. -func TestFreelist_free(t *testing.T) { - f := newTestFreelist() - f.free(100, common.NewPage(12, 0, 0, 0)) - if !reflect.DeepEqual([]common.Pgid{12}, f.pending[100].ids) { - t.Fatalf("exp=%v; got=%v", []common.Pgid{12}, f.pending[100].ids) - } -} - -// Ensure that a page and its overflow is added to a transaction's freelist. -func TestFreelist_free_overflow(t *testing.T) { - f := newTestFreelist() - f.free(100, common.NewPage(12, 0, 0, 3)) - if exp := []common.Pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) { - t.Fatalf("exp=%v; got=%v", exp, f.pending[100].ids) - } -} - -// Ensure that a transaction's free pages can be released. -func TestFreelist_release(t *testing.T) { - f := newTestFreelist() - f.free(100, common.NewPage(12, 0, 0, 1)) - f.free(100, common.NewPage(9, 0, 0, 0)) - f.free(102, common.NewPage(39, 0, 0, 0)) - f.release(100) - f.release(101) - if exp := []common.Pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { - t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) - } - - f.release(102) - if exp := []common.Pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { - t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) - } -} - -// Ensure that releaseRange handles boundary conditions correctly -func TestFreelist_releaseRange(t *testing.T) { - type testRange struct { - begin, end common.Txid - } - - type testPage struct { - id common.Pgid - n int - allocTxn common.Txid - freeTxn common.Txid - } - - var releaseRangeTests = []struct { - title string - pagesIn []testPage - releaseRanges []testRange - wantFree []common.Pgid - }{ - { - title: "Single pending in range", - pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, - releaseRanges: []testRange{{1, 300}}, - wantFree: []common.Pgid{3}, - }, - { - title: "Single pending with minimum end range", - pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, - releaseRanges: []testRange{{1, 200}}, - wantFree: []common.Pgid{3}, - }, - { - title: "Single pending outsize minimum end range", - pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, - releaseRanges: []testRange{{1, 199}}, - wantFree: nil, - }, - { - title: "Single pending with minimum begin range", - pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, - releaseRanges: []testRange{{100, 300}}, - wantFree: []common.Pgid{3}, - }, - { - title: "Single pending outside minimum begin range", - pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, - releaseRanges: []testRange{{101, 300}}, - wantFree: nil, - }, - { - title: "Single pending in minimum range", - pagesIn: []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}}, - releaseRanges: []testRange{{199, 200}}, - wantFree: []common.Pgid{3}, - }, - { - title: "Single pending and read transaction at 199", - pagesIn: []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}}, - releaseRanges: []testRange{{100, 198}, {200, 300}}, - wantFree: nil, - }, - { - title: "Adjacent pending and read transactions at 199, 200", - pagesIn: []testPage{ - {id: 3, n: 1, allocTxn: 199, freeTxn: 200}, - {id: 4, n: 1, allocTxn: 200, freeTxn: 201}, - }, - releaseRanges: []testRange{ - {100, 198}, - {200, 199}, // Simulate the ranges db.freePages might produce. - {201, 300}, - }, - wantFree: nil, - }, - { - title: "Out of order ranges", - pagesIn: []testPage{ - {id: 3, n: 1, allocTxn: 199, freeTxn: 200}, - {id: 4, n: 1, allocTxn: 200, freeTxn: 201}, - }, - releaseRanges: []testRange{ - {201, 199}, - {201, 200}, - {200, 200}, - }, - wantFree: nil, - }, - { - title: "Multiple pending, read transaction at 150", - pagesIn: []testPage{ - {id: 3, n: 1, allocTxn: 100, freeTxn: 200}, - {id: 4, n: 1, allocTxn: 100, freeTxn: 125}, - {id: 5, n: 1, allocTxn: 125, freeTxn: 150}, - {id: 6, n: 1, allocTxn: 125, freeTxn: 175}, - {id: 7, n: 2, allocTxn: 150, freeTxn: 175}, - {id: 9, n: 2, allocTxn: 175, freeTxn: 200}, - }, - releaseRanges: []testRange{{50, 149}, {151, 300}}, - wantFree: []common.Pgid{4, 9, 10}, - }, - } - - for _, c := range releaseRangeTests { - f := newTestFreelist() - var ids []common.Pgid - for _, p := range c.pagesIn { - for i := uint64(0); i < uint64(p.n); i++ { - ids = append(ids, common.Pgid(uint64(p.id)+i)) - } - } - f.readIDs(ids) - for _, p := range c.pagesIn { - f.allocate(p.allocTxn, p.n) - } - - for _, p := range c.pagesIn { - f.free(p.freeTxn, common.NewPage(p.id, 0, 0, uint32(p.n-1))) - } - - for _, r := range c.releaseRanges { - f.releaseRange(r.begin, r.end) - } - - if exp := c.wantFree; !reflect.DeepEqual(exp, f.getFreePageIDs()) { - t.Errorf("exp=%v; got=%v for %s", exp, f.getFreePageIDs(), c.title) - } - } -} - -func TestFreelistHashmap_allocate(t *testing.T) { - f := newTestFreelist() - if f.freelistType != FreelistMapType { - t.Skip() - } - - ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} - f.readIDs(ids) - - f.allocate(1, 3) - if x := f.free_count(); x != 6 { - t.Fatalf("exp=6; got=%v", x) - } - - f.allocate(1, 2) - if x := f.free_count(); x != 4 { - t.Fatalf("exp=4; got=%v", x) - } - f.allocate(1, 1) - if x := f.free_count(); x != 3 { - t.Fatalf("exp=3; got=%v", x) - } - - f.allocate(1, 0) - if x := f.free_count(); x != 3 { - t.Fatalf("exp=3; got=%v", x) - } -} - -// Ensure that a freelist can find contiguous blocks of pages. -func TestFreelistArray_allocate(t *testing.T) { - f := newTestFreelist() - if f.freelistType != FreelistArrayType { - t.Skip() - } - ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} - f.readIDs(ids) - if id := int(f.allocate(1, 3)); id != 3 { - t.Fatalf("exp=3; got=%v", id) - } - if id := int(f.allocate(1, 1)); id != 6 { - t.Fatalf("exp=6; got=%v", id) - } - if id := int(f.allocate(1, 3)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if id := int(f.allocate(1, 2)); id != 12 { - t.Fatalf("exp=12; got=%v", id) - } - if id := int(f.allocate(1, 1)); id != 7 { - t.Fatalf("exp=7; got=%v", id) - } - if id := int(f.allocate(1, 0)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if id := int(f.allocate(1, 0)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if exp := []common.Pgid{9, 18}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { - t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) - } - - if id := int(f.allocate(1, 1)); id != 9 { - t.Fatalf("exp=9; got=%v", id) - } - if id := int(f.allocate(1, 1)); id != 18 { - t.Fatalf("exp=18; got=%v", id) - } - if id := int(f.allocate(1, 1)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if exp := []common.Pgid{}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { - t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) - } -} - -// Ensure that a freelist can deserialize from a freelist page. -func TestFreelist_read(t *testing.T) { - // Create a page. - var buf [4096]byte - page := (*common.Page)(unsafe.Pointer(&buf[0])) - page.SetFlags(common.FreelistPageFlag) - page.SetCount(2) - - // Insert 2 page ids. - ids := (*[3]common.Pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page))) - ids[0] = 23 - ids[1] = 50 - - // Deserialize page into a freelist. - f := newTestFreelist() - f.read(page) - - // Ensure that there are two page ids in the freelist. - if exp := []common.Pgid{23, 50}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { - t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) - } -} - -// Ensure that a freelist can serialize into a freelist page. -func TestFreelist_write(t *testing.T) { - // Create a freelist and write it to a page. - var buf [4096]byte - f := newTestFreelist() - - f.readIDs([]common.Pgid{12, 39}) - f.pending[100] = &txPending{ids: []common.Pgid{28, 11}} - f.pending[101] = &txPending{ids: []common.Pgid{3}} - p := (*common.Page)(unsafe.Pointer(&buf[0])) - if err := f.write(p); err != nil { - t.Fatal(err) - } - - // Read the page back out. - f2 := newTestFreelist() - f2.read(p) - - // Ensure that the freelist is correct. - // All pages should be present and in reverse order. - if exp := []common.Pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.getFreePageIDs()) { - t.Fatalf("exp=%v; got=%v", exp, f2.getFreePageIDs()) - } -} - -func Benchmark_FreelistRelease10K(b *testing.B) { benchmark_FreelistRelease(b, 10000) } -func Benchmark_FreelistRelease100K(b *testing.B) { benchmark_FreelistRelease(b, 100000) } -func Benchmark_FreelistRelease1000K(b *testing.B) { benchmark_FreelistRelease(b, 1000000) } -func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) } - -func benchmark_FreelistRelease(b *testing.B, size int) { - ids := randomPgids(size) - pending := randomPgids(len(ids) / 400) - b.ResetTimer() - for i := 0; i < b.N; i++ { - txp := &txPending{ids: pending} - f := newTestFreelist() - f.pending = map[common.Txid]*txPending{1: txp} - f.readIDs(ids) - f.release(1) - } -} - -func randomPgids(n int) []common.Pgid { - pgids := make(common.Pgids, n) - for i := range pgids { - pgids[i] = common.Pgid(rand.Int63()) - } - sort.Sort(pgids) - return pgids -} - -func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) { - f := newTestFreelist() - exp := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} - - f.readIDs(exp) - - if got := f.getFreePageIDs(); !reflect.DeepEqual(exp, got) { - t.Fatalf("exp=%v; got=%v", exp, got) - } - - f2 := newTestFreelist() - var exp2 []common.Pgid - f2.readIDs(exp2) - - if got2 := f2.getFreePageIDs(); !reflect.DeepEqual(got2, exp2) { - t.Fatalf("exp2=%#v; got2=%#v", exp2, got2) - } - -} - -func Test_freelist_mergeWithExist(t *testing.T) { - bm1 := pidSet{1: struct{}{}} - - bm2 := pidSet{5: struct{}{}} - tests := []struct { - name string - ids []common.Pgid - pgid common.Pgid - want []common.Pgid - wantForwardmap map[common.Pgid]uint64 - wantBackwardmap map[common.Pgid]uint64 - wantfreemap map[uint64]pidSet - }{ - { - name: "test1", - ids: []common.Pgid{1, 2, 4, 5, 6}, - pgid: 3, - want: []common.Pgid{1, 2, 3, 4, 5, 6}, - wantForwardmap: map[common.Pgid]uint64{1: 6}, - wantBackwardmap: map[common.Pgid]uint64{6: 6}, - wantfreemap: map[uint64]pidSet{6: bm1}, - }, - { - name: "test2", - ids: []common.Pgid{1, 2, 5, 6}, - pgid: 3, - want: []common.Pgid{1, 2, 3, 5, 6}, - wantForwardmap: map[common.Pgid]uint64{1: 3, 5: 2}, - wantBackwardmap: map[common.Pgid]uint64{6: 2, 3: 3}, - wantfreemap: map[uint64]pidSet{3: bm1, 2: bm2}, - }, - { - name: "test3", - ids: []common.Pgid{1, 2}, - pgid: 3, - want: []common.Pgid{1, 2, 3}, - wantForwardmap: map[common.Pgid]uint64{1: 3}, - wantBackwardmap: map[common.Pgid]uint64{3: 3}, - wantfreemap: map[uint64]pidSet{3: bm1}, - }, - { - name: "test4", - ids: []common.Pgid{2, 3}, - pgid: 1, - want: []common.Pgid{1, 2, 3}, - wantForwardmap: map[common.Pgid]uint64{1: 3}, - wantBackwardmap: map[common.Pgid]uint64{3: 3}, - wantfreemap: map[uint64]pidSet{3: bm1}, - }, - } - for _, tt := range tests { - f := newTestFreelist() - if f.freelistType == FreelistArrayType { - t.Skip() - } - f.readIDs(tt.ids) - - f.mergeWithExistingSpan(tt.pgid) - - if got := f.getFreePageIDs(); !reflect.DeepEqual(tt.want, got) { - t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.want, got) - } - if got := f.forwardMap; !reflect.DeepEqual(tt.wantForwardmap, got) { - t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantForwardmap, got) - } - if got := f.backwardMap; !reflect.DeepEqual(tt.wantBackwardmap, got) { - t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantBackwardmap, got) - } - if got := f.freemaps; !reflect.DeepEqual(tt.wantfreemap, got) { - t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantfreemap, got) - } - } -} - -// newTestFreelist get the freelist type from env and initial the freelist -func newTestFreelist() *freelist { - freelistType := FreelistArrayType - if env := os.Getenv(TestFreelistType); env == string(FreelistMapType) { - freelistType = FreelistMapType - } - - return newFreelist(freelistType) -} - -func Test_freelist_hashmapGetFreePageIDs(t *testing.T) { - f := newTestFreelist() - if f.freelistType == FreelistArrayType { - t.Skip() - } - - N := int32(100000) - fm := make(map[common.Pgid]uint64) - i := int32(0) - val := int32(0) - for i = 0; i < N; { - val = rand.Int31n(1000) - fm[common.Pgid(i)] = uint64(val) - i += val - f.freePagesCount += uint64(val) - } - - f.forwardMap = fm - res := f.hashmapGetFreePageIDs() - - if !sort.SliceIsSorted(res, func(i, j int) bool { return res[i] < res[j] }) { - t.Fatalf("pgids not sorted") - } -} - -func Benchmark_freelist_hashmapGetFreePageIDs(b *testing.B) { - f := newTestFreelist() - if f.freelistType == FreelistArrayType { - b.Skip() - } - - N := int32(100000) - fm := make(map[common.Pgid]uint64) - i := int32(0) - val := int32(0) - for i = 0; i < N; { - val = rand.Int31n(1000) - fm[common.Pgid(i)] = uint64(val) - i += val - } - - f.forwardMap = fm - - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - f.hashmapGetFreePageIDs() - } -} diff --git a/freelist_array.go b/internal/freelist/array.go similarity index 74% rename from freelist_array.go rename to internal/freelist/array.go index 2f0a7e4aa..93ccc5edc 100644 --- a/freelist_array.go +++ b/internal/freelist/array.go @@ -1,4 +1,4 @@ -package bbolt +package freelist import ( "fmt" @@ -7,14 +7,18 @@ import ( "go.etcd.io/bbolt/internal/common" ) -// arrayFreeCount returns count of free pages(array version) -func (f *freelist) arrayFreeCount() int { - return len(f.ids) +type array struct { + *shared + + ids []common.Pgid // all free and available free page ids. +} + +func (f *array) Init(ids common.Pgids) { + f.ids = ids + f.reindex() } -// arrayAllocate returns the starting page id of a contiguous list of pages of a given size. -// If a contiguous block cannot be found then 0 is returned. -func (f *freelist) arrayAllocate(txid common.Txid, n int) common.Pgid { +func (f *array) Allocate(txid common.Txid, n int) common.Pgid { if len(f.ids) == 0 { return 0 } @@ -56,18 +60,15 @@ func (f *freelist) arrayAllocate(txid common.Txid, n int) common.Pgid { return 0 } -// arrayReadIDs initializes the freelist from a given list of ids. -func (f *freelist) arrayReadIDs(ids []common.Pgid) { - f.ids = ids - f.reindex() +func (f *array) FreeCount() int { + return len(f.ids) } -func (f *freelist) arrayGetFreePageIDs() []common.Pgid { +func (f *array) freePageIds() common.Pgids { return f.ids } -// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array -func (f *freelist) arrayMergeSpans(ids common.Pgids) { +func (f *array) mergeSpans(ids common.Pgids) { sort.Sort(ids) common.Verify(func() { idsIdx := make(map[common.Pgid]struct{}) @@ -96,3 +97,11 @@ func (f *freelist) arrayMergeSpans(ids common.Pgids) { }) f.ids = common.Pgids(f.ids).Merge(ids) } + +func NewArrayFreelist() Interface { + a := &array{ + shared: newShared(), + } + a.Interface = a + return a +} diff --git a/internal/freelist/array_test.go b/internal/freelist/array_test.go new file mode 100644 index 000000000..31b0702dc --- /dev/null +++ b/internal/freelist/array_test.go @@ -0,0 +1,52 @@ +package freelist + +import ( + "reflect" + "testing" + + "go.etcd.io/bbolt/internal/common" +) + +// Ensure that a freelist can find contiguous blocks of pages. +func TestFreelistArray_allocate(t *testing.T) { + f := NewArrayFreelist() + ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} + f.Init(ids) + if id := int(f.Allocate(1, 3)); id != 3 { + t.Fatalf("exp=3; got=%v", id) + } + if id := int(f.Allocate(1, 1)); id != 6 { + t.Fatalf("exp=6; got=%v", id) + } + if id := int(f.Allocate(1, 3)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if id := int(f.Allocate(1, 2)); id != 12 { + t.Fatalf("exp=12; got=%v", id) + } + if id := int(f.Allocate(1, 1)); id != 7 { + t.Fatalf("exp=7; got=%v", id) + } + if id := int(f.Allocate(1, 0)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if id := int(f.Allocate(1, 0)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if exp := common.Pgids([]common.Pgid{9, 18}); !reflect.DeepEqual(exp, f.freePageIds()) { + t.Fatalf("exp=%v; got=%v", exp, f.freePageIds()) + } + + if id := int(f.Allocate(1, 1)); id != 9 { + t.Fatalf("exp=9; got=%v", id) + } + if id := int(f.Allocate(1, 1)); id != 18 { + t.Fatalf("exp=18; got=%v", id) + } + if id := int(f.Allocate(1, 1)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if exp := common.Pgids([]common.Pgid{}); !reflect.DeepEqual(exp, f.freePageIds()) { + t.Fatalf("exp=%v; got=%v", exp, f.freePageIds()) + } +} diff --git a/internal/freelist/freelist.go b/internal/freelist/freelist.go new file mode 100644 index 000000000..3d77d8f94 --- /dev/null +++ b/internal/freelist/freelist.go @@ -0,0 +1,82 @@ +package freelist + +import ( + "go.etcd.io/bbolt/internal/common" +) + +type ReadWriter interface { + // Read calls Init with the page ids stored in the given page. + Read(page *common.Page) + + // Write writes the freelist into the given page. + Write(page *common.Page) + + // EstimatedWritePageSize returns the size of the freelist after serialization in Write. + // This should never underestimate the size. + EstimatedWritePageSize() int +} + +type Interface interface { + ReadWriter + + // Init initializes this freelist with the given list of pages. + Init(ids common.Pgids) + + // Allocate tries to allocate the given number of contiguous pages + // from the free list pages. It returns the starting page ID if + // available; otherwise, it returns 0. + Allocate(txid common.Txid, numPages int) common.Pgid + + // Count returns the number of free and pending pages. + Count() int + + // FreeCount returns the number of free pages. + FreeCount() int + + // PendingCount returns the number of pending pages. + PendingCount() int + + // AddReadonlyTXID adds a given read-only transaction id for pending page tracking. + AddReadonlyTXID(txid common.Txid) + + // RemoveReadonlyTXID removes a given read-only transaction id for pending page tracking. + RemoveReadonlyTXID(txid common.Txid) + + // ReleasePendingPages releases any pages associated with closed read-only transactions. + ReleasePendingPages() + + // Free releases a page and its overflow for a given transaction id. + // If the page is already free then a panic will occur. + Free(txId common.Txid, p *common.Page) + + // Freed returns whether a given page is in the free list. + Freed(pgId common.Pgid) bool + + // Rollback removes the pages from a given pending tx. + Rollback(txId common.Txid) + + // Copyall copies a list of all free ids and all pending ids in one sorted list. + // f.count returns the minimum length required for dst. + Copyall(dst []common.Pgid) + + // Reload reads the freelist from a page and filters out pending items. + Reload(p *common.Page) + + // NoSyncReload reads the freelist from Pgids and filters out pending items. + NoSyncReload(pgIds common.Pgids) + + // freePageIds returns the IDs of all free pages. + freePageIds() common.Pgids + + // pendingPageIds returns all pending pages by transaction id. + pendingPageIds() map[common.Txid]*txPending + + // release moves all page ids for a transaction id (or older) to the freelist. + release(txId common.Txid) + + // releaseRange moves pending pages allocated within an extent [begin,end] to the free list. + releaseRange(begin, end common.Txid) + + // mergeSpans is merging the given pages into the freelist + mergeSpans(ids common.Pgids) +} diff --git a/internal/freelist/freelist_test.go b/internal/freelist/freelist_test.go new file mode 100644 index 000000000..df7c7697e --- /dev/null +++ b/internal/freelist/freelist_test.go @@ -0,0 +1,282 @@ +package freelist + +import ( + "math/rand" + "os" + "reflect" + "sort" + "testing" + "unsafe" + + "go.etcd.io/bbolt/internal/common" +) + +// TestFreelistType is used as a env variable for test to indicate the backend type +const TestFreelistType = "TEST_FREELIST_TYPE" + +// Ensure that a page is added to a transaction's freelist. +func TestFreelist_free(t *testing.T) { + f := newTestFreelist() + f.Free(100, common.NewPage(12, 0, 0, 0)) + if !reflect.DeepEqual([]common.Pgid{12}, f.pendingPageIds()[100].ids) { + t.Fatalf("exp=%v; got=%v", []common.Pgid{12}, f.pendingPageIds()[100].ids) + } +} + +// Ensure that a page and its overflow is added to a transaction's freelist. +func TestFreelist_free_overflow(t *testing.T) { + f := newTestFreelist() + f.Free(100, common.NewPage(12, 0, 0, 3)) + if exp := []common.Pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pendingPageIds()[100].ids) { + t.Fatalf("exp=%v; got=%v", exp, f.pendingPageIds()[100].ids) + } +} + +// Ensure that a transaction's free pages can be released. +func TestFreelist_release(t *testing.T) { + f := newTestFreelist() + f.Free(100, common.NewPage(12, 0, 0, 1)) + f.Free(100, common.NewPage(9, 0, 0, 0)) + f.Free(102, common.NewPage(39, 0, 0, 0)) + f.release(100) + f.release(101) + if exp := common.Pgids([]common.Pgid{9, 12, 13}); !reflect.DeepEqual(exp, f.freePageIds()) { + t.Fatalf("exp=%v; got=%v", exp, f.freePageIds()) + } + + f.release(102) + if exp := common.Pgids([]common.Pgid{9, 12, 13, 39}); !reflect.DeepEqual(exp, f.freePageIds()) { + t.Fatalf("exp=%v; got=%v", exp, f.freePageIds()) + } +} + +// Ensure that releaseRange handles boundary conditions correctly +func TestFreelist_releaseRange(t *testing.T) { + type testRange struct { + begin, end common.Txid + } + + type testPage struct { + id common.Pgid + n int + allocTxn common.Txid + freeTxn common.Txid + } + + var releaseRangeTests = []struct { + title string + pagesIn []testPage + releaseRanges []testRange + wantFree []common.Pgid + }{ + { + title: "Single pending in range", + pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, + releaseRanges: []testRange{{1, 300}}, + wantFree: []common.Pgid{3}, + }, + { + title: "Single pending with minimum end range", + pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, + releaseRanges: []testRange{{1, 200}}, + wantFree: []common.Pgid{3}, + }, + { + title: "Single pending outsize minimum end range", + pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, + releaseRanges: []testRange{{1, 199}}, + wantFree: nil, + }, + { + title: "Single pending with minimum begin range", + pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, + releaseRanges: []testRange{{100, 300}}, + wantFree: []common.Pgid{3}, + }, + { + title: "Single pending outside minimum begin range", + pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, + releaseRanges: []testRange{{101, 300}}, + wantFree: nil, + }, + { + title: "Single pending in minimum range", + pagesIn: []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}}, + releaseRanges: []testRange{{199, 200}}, + wantFree: []common.Pgid{3}, + }, + { + title: "Single pending and read transaction at 199", + pagesIn: []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}}, + releaseRanges: []testRange{{100, 198}, {200, 300}}, + wantFree: nil, + }, + { + title: "Adjacent pending and read transactions at 199, 200", + pagesIn: []testPage{ + {id: 3, n: 1, allocTxn: 199, freeTxn: 200}, + {id: 4, n: 1, allocTxn: 200, freeTxn: 201}, + }, + releaseRanges: []testRange{ + {100, 198}, + {200, 199}, // Simulate the ranges db.freePages might produce. + {201, 300}, + }, + wantFree: nil, + }, + { + title: "Out of order ranges", + pagesIn: []testPage{ + {id: 3, n: 1, allocTxn: 199, freeTxn: 200}, + {id: 4, n: 1, allocTxn: 200, freeTxn: 201}, + }, + releaseRanges: []testRange{ + {201, 199}, + {201, 200}, + {200, 200}, + }, + wantFree: nil, + }, + { + title: "Multiple pending, read transaction at 150", + pagesIn: []testPage{ + {id: 3, n: 1, allocTxn: 100, freeTxn: 200}, + {id: 4, n: 1, allocTxn: 100, freeTxn: 125}, + {id: 5, n: 1, allocTxn: 125, freeTxn: 150}, + {id: 6, n: 1, allocTxn: 125, freeTxn: 175}, + {id: 7, n: 2, allocTxn: 150, freeTxn: 175}, + {id: 9, n: 2, allocTxn: 175, freeTxn: 200}, + }, + releaseRanges: []testRange{{50, 149}, {151, 300}}, + wantFree: []common.Pgid{4, 9, 10}, + }, + } + + for _, c := range releaseRangeTests { + f := newTestFreelist() + var ids []common.Pgid + for _, p := range c.pagesIn { + for i := uint64(0); i < uint64(p.n); i++ { + ids = append(ids, common.Pgid(uint64(p.id)+i)) + } + } + f.Init(ids) + for _, p := range c.pagesIn { + f.Allocate(p.allocTxn, p.n) + } + + for _, p := range c.pagesIn { + f.Free(p.freeTxn, common.NewPage(p.id, 0, 0, uint32(p.n-1))) + } + + for _, r := range c.releaseRanges { + f.releaseRange(r.begin, r.end) + } + + if exp := common.Pgids(c.wantFree); !reflect.DeepEqual(exp, f.freePageIds()) { + t.Errorf("exp=%v; got=%v for %s", exp, f.freePageIds(), c.title) + } + } +} + +// Ensure that a freelist can deserialize from a freelist page. +func TestFreelist_read(t *testing.T) { + // Create a page. + var buf [4096]byte + page := (*common.Page)(unsafe.Pointer(&buf[0])) + page.SetFlags(common.FreelistPageFlag) + page.SetCount(2) + + // Insert 2 page ids. + ids := (*[3]common.Pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page))) + ids[0] = 23 + ids[1] = 50 + + // Deserialize page into a freelist. + f := newTestFreelist() + f.Read(page) + + // Ensure that there are two page ids in the freelist. + if exp := common.Pgids([]common.Pgid{23, 50}); !reflect.DeepEqual(exp, f.freePageIds()) { + t.Fatalf("exp=%v; got=%v", exp, f.freePageIds()) + } +} + +// Ensure that a freelist can serialize into a freelist page. +func TestFreelist_write(t *testing.T) { + // Create a freelist and write it to a page. + var buf [4096]byte + f := newTestFreelist() + + f.Init([]common.Pgid{12, 39}) + f.pendingPageIds()[100] = &txPending{ids: []common.Pgid{28, 11}} + f.pendingPageIds()[101] = &txPending{ids: []common.Pgid{3}} + p := (*common.Page)(unsafe.Pointer(&buf[0])) + f.Write(p) + + // Read the page back out. + f2 := newTestFreelist() + f2.Read(p) + + // Ensure that the freelist is correct. + // All pages should be present and in reverse order. + if exp := common.Pgids([]common.Pgid{3, 11, 12, 28, 39}); !reflect.DeepEqual(exp, f2.freePageIds()) { + t.Fatalf("exp=%v; got=%v", exp, f2.freePageIds()) + } +} + +func Benchmark_FreelistRelease10K(b *testing.B) { benchmark_FreelistRelease(b, 10000) } +func Benchmark_FreelistRelease100K(b *testing.B) { benchmark_FreelistRelease(b, 100000) } +func Benchmark_FreelistRelease1000K(b *testing.B) { benchmark_FreelistRelease(b, 1000000) } +func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) } + +func benchmark_FreelistRelease(b *testing.B, size int) { + ids := randomPgids(size) + pending := randomPgids(len(ids) / 400) + b.ResetTimer() + for i := 0; i < b.N; i++ { + txp := &txPending{ids: pending} + f := newTestFreelist() + f.pendingPageIds()[1] = txp + f.Init(ids) + f.release(1) + } +} + +func randomPgids(n int) []common.Pgid { + pgids := make(common.Pgids, n) + for i := range pgids { + pgids[i] = common.Pgid(rand.Int63()) + } + sort.Sort(pgids) + return pgids +} + +func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) { + f := newTestFreelist() + exp := common.Pgids([]common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}) + + f.Init(exp) + + if got := f.freePageIds(); !reflect.DeepEqual(exp, got) { + t.Fatalf("exp=%v; got=%v", exp, got) + } + + f2 := newTestFreelist() + var exp2 []common.Pgid + f2.Init(exp2) + + if got2 := f2.freePageIds(); !reflect.DeepEqual(got2, common.Pgids(exp2)) { + t.Fatalf("exp2=%#v; got2=%#v", exp2, got2) + } + +} + +// newTestFreelist get the freelist type from env and initial the freelist +func newTestFreelist() Interface { + if env := os.Getenv(TestFreelistType); env == "hashmap" { + return NewHashMapFreelist() + } + + return NewArrayFreelist() +} diff --git a/freelist_hmap.go b/internal/freelist/hashmap.go similarity index 79% rename from freelist_hmap.go rename to internal/freelist/hashmap.go index c5c09f55e..a6bad8976 100644 --- a/freelist_hmap.go +++ b/internal/freelist/hashmap.go @@ -1,4 +1,4 @@ -package bbolt +package freelist import ( "fmt" @@ -8,26 +8,57 @@ import ( "go.etcd.io/bbolt/internal/common" ) -// hashmapFreeCount returns count of free pages(hashmap version) -func (f *freelist) hashmapFreeCount() int { - common.Verify(func() { - expectedFreePageCount := f.hashmapFreeCountSlow() - common.Assert(int(f.freePagesCount) == expectedFreePageCount, - "freePagesCount (%d) is out of sync with free pages map (%d)", f.freePagesCount, expectedFreePageCount) - }) - return int(f.freePagesCount) +// pidSet holds the set of starting pgids which have the same span size +type pidSet map[common.Pgid]struct{} + +type hashMap struct { + *shared + + freePagesCount uint64 // count of free pages(hashmap version) + freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size + forwardMap map[common.Pgid]uint64 // key is start pgid, value is its span size + backwardMap map[common.Pgid]uint64 // key is end pgid, value is its span size } -func (f *freelist) hashmapFreeCountSlow() int { - count := 0 - for _, size := range f.forwardMap { - count += int(size) +func (f *hashMap) Init(pgids common.Pgids) { + if len(pgids) == 0 { + return } - return count + + size := uint64(1) + start := pgids[0] + // reset the counter when freelist init + f.freePagesCount = 0 + + if !sort.SliceIsSorted([]common.Pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) { + panic("pgids not sorted") + } + + f.freemaps = make(map[uint64]pidSet) + f.forwardMap = make(map[common.Pgid]uint64) + f.backwardMap = make(map[common.Pgid]uint64) + + for i := 1; i < len(pgids); i++ { + // continuous page + if pgids[i] == pgids[i-1]+1 { + size++ + } else { + f.addSpan(start, size) + + size = 1 + start = pgids[i] + } + } + + // init the tail + if size != 0 && start != 0 { + f.addSpan(start, size) + } + + f.reindex() } -// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend -func (f *freelist) hashmapAllocate(txid common.Txid, n int) common.Pgid { +func (f *hashMap) Allocate(txid common.Txid, n int) common.Pgid { if n == 0 { return 0 } @@ -74,17 +105,17 @@ func (f *freelist) hashmapAllocate(txid common.Txid, n int) common.Pgid { return 0 } -// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version) -func (f *freelist) hashmapReadIDs(pgids []common.Pgid) { - f.init(pgids) - - // Rebuild the page cache. - f.reindex() +func (f *hashMap) FreeCount() int { + common.Verify(func() { + expectedFreePageCount := f.hashmapFreeCountSlow() + common.Assert(int(f.freePagesCount) == expectedFreePageCount, + "freePagesCount (%d) is out of sync with free pages map (%d)", f.freePagesCount, expectedFreePageCount) + }) + return int(f.freePagesCount) } -// hashmapGetFreePageIDs returns the sorted free page ids -func (f *freelist) hashmapGetFreePageIDs() []common.Pgid { - count := f.free_count() +func (f *hashMap) freePageIds() common.Pgids { + count := f.FreeCount() if count == 0 { return nil } @@ -108,8 +139,36 @@ func (f *freelist) hashmapGetFreePageIDs() []common.Pgid { return m } -// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans -func (f *freelist) hashmapMergeSpans(ids common.Pgids) { +func (f *hashMap) hashmapFreeCountSlow() int { + count := 0 + for _, size := range f.forwardMap { + count += int(size) + } + return count +} + +func (f *hashMap) addSpan(start common.Pgid, size uint64) { + f.backwardMap[start-1+common.Pgid(size)] = size + f.forwardMap[start] = size + if _, ok := f.freemaps[size]; !ok { + f.freemaps[size] = make(map[common.Pgid]struct{}) + } + + f.freemaps[size][start] = struct{}{} + f.freePagesCount += size +} + +func (f *hashMap) delSpan(start common.Pgid, size uint64) { + delete(f.forwardMap, start) + delete(f.backwardMap, start+common.Pgid(size-1)) + delete(f.freemaps[size], start) + if len(f.freemaps[size]) == 0 { + delete(f.freemaps, size) + } + f.freePagesCount -= size +} + +func (f *hashMap) mergeSpans(ids common.Pgids) { common.Verify(func() { ids1Freemap := f.idsFromFreemaps() ids2Forward := f.idsFromForwardMap() @@ -144,7 +203,7 @@ func (f *freelist) hashmapMergeSpans(ids common.Pgids) { } // mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward -func (f *freelist) mergeWithExistingSpan(pid common.Pgid) { +func (f *hashMap) mergeWithExistingSpan(pid common.Pgid) { prev := pid - 1 next := pid + 1 @@ -171,68 +230,9 @@ func (f *freelist) mergeWithExistingSpan(pid common.Pgid) { f.addSpan(newStart, newSize) } -func (f *freelist) addSpan(start common.Pgid, size uint64) { - f.backwardMap[start-1+common.Pgid(size)] = size - f.forwardMap[start] = size - if _, ok := f.freemaps[size]; !ok { - f.freemaps[size] = make(map[common.Pgid]struct{}) - } - - f.freemaps[size][start] = struct{}{} - f.freePagesCount += size -} - -func (f *freelist) delSpan(start common.Pgid, size uint64) { - delete(f.forwardMap, start) - delete(f.backwardMap, start+common.Pgid(size-1)) - delete(f.freemaps[size], start) - if len(f.freemaps[size]) == 0 { - delete(f.freemaps, size) - } - f.freePagesCount -= size -} - -// initial from pgids using when use hashmap version -// pgids must be sorted -func (f *freelist) init(pgids []common.Pgid) { - if len(pgids) == 0 { - return - } - - size := uint64(1) - start := pgids[0] - // reset the counter when freelist init - f.freePagesCount = 0 - - if !sort.SliceIsSorted([]common.Pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) { - panic("pgids not sorted") - } - - f.freemaps = make(map[uint64]pidSet) - f.forwardMap = make(map[common.Pgid]uint64) - f.backwardMap = make(map[common.Pgid]uint64) - - for i := 1; i < len(pgids); i++ { - // continuous page - if pgids[i] == pgids[i-1]+1 { - size++ - } else { - f.addSpan(start, size) - - size = 1 - start = pgids[i] - } - } - - // init the tail - if size != 0 && start != 0 { - f.addSpan(start, size) - } -} - // idsFromFreemaps get all free page IDs from f.freemaps. // used by test only. -func (f *freelist) idsFromFreemaps() map[common.Pgid]struct{} { +func (f *hashMap) idsFromFreemaps() map[common.Pgid]struct{} { ids := make(map[common.Pgid]struct{}) for size, idSet := range f.freemaps { for start := range idSet { @@ -250,7 +250,7 @@ func (f *freelist) idsFromFreemaps() map[common.Pgid]struct{} { // idsFromForwardMap get all free page IDs from f.forwardMap. // used by test only. -func (f *freelist) idsFromForwardMap() map[common.Pgid]struct{} { +func (f *hashMap) idsFromForwardMap() map[common.Pgid]struct{} { ids := make(map[common.Pgid]struct{}) for start, size := range f.forwardMap { for i := 0; i < int(size); i++ { @@ -266,7 +266,7 @@ func (f *freelist) idsFromForwardMap() map[common.Pgid]struct{} { // idsFromBackwardMap get all free page IDs from f.backwardMap. // used by test only. -func (f *freelist) idsFromBackwardMap() map[common.Pgid]struct{} { +func (f *hashMap) idsFromBackwardMap() map[common.Pgid]struct{} { ids := make(map[common.Pgid]struct{}) for end, size := range f.backwardMap { for i := 0; i < int(size); i++ { @@ -279,3 +279,14 @@ func (f *freelist) idsFromBackwardMap() map[common.Pgid]struct{} { } return ids } + +func NewHashMapFreelist() Interface { + hm := &hashMap{ + shared: newShared(), + freemaps: make(map[uint64]pidSet), + forwardMap: make(map[common.Pgid]uint64), + backwardMap: make(map[common.Pgid]uint64), + } + hm.Interface = hm + return hm +} diff --git a/internal/freelist/hashmap_test.go b/internal/freelist/hashmap_test.go new file mode 100644 index 000000000..32cc5dfa0 --- /dev/null +++ b/internal/freelist/hashmap_test.go @@ -0,0 +1,155 @@ +package freelist + +import ( + "math/rand" + "reflect" + "sort" + "testing" + + "go.etcd.io/bbolt/internal/common" +) + +func TestFreelistHashmap_allocate(t *testing.T) { + f := NewHashMapFreelist() + + ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} + f.Init(ids) + + f.Allocate(1, 3) + if x := f.FreeCount(); x != 6 { + t.Fatalf("exp=6; got=%v", x) + } + + f.Allocate(1, 2) + if x := f.FreeCount(); x != 4 { + t.Fatalf("exp=4; got=%v", x) + } + f.Allocate(1, 1) + if x := f.FreeCount(); x != 3 { + t.Fatalf("exp=3; got=%v", x) + } + + f.Allocate(1, 0) + if x := f.FreeCount(); x != 3 { + t.Fatalf("exp=3; got=%v", x) + } +} + +func TestFreelistHashmap_mergeWithExist(t *testing.T) { + bm1 := pidSet{1: struct{}{}} + + bm2 := pidSet{5: struct{}{}} + tests := []struct { + name string + ids common.Pgids + pgid common.Pgid + want common.Pgids + wantForwardmap map[common.Pgid]uint64 + wantBackwardmap map[common.Pgid]uint64 + wantfreemap map[uint64]pidSet + }{ + { + name: "test1", + ids: []common.Pgid{1, 2, 4, 5, 6}, + pgid: 3, + want: []common.Pgid{1, 2, 3, 4, 5, 6}, + wantForwardmap: map[common.Pgid]uint64{1: 6}, + wantBackwardmap: map[common.Pgid]uint64{6: 6}, + wantfreemap: map[uint64]pidSet{6: bm1}, + }, + { + name: "test2", + ids: []common.Pgid{1, 2, 5, 6}, + pgid: 3, + want: []common.Pgid{1, 2, 3, 5, 6}, + wantForwardmap: map[common.Pgid]uint64{1: 3, 5: 2}, + wantBackwardmap: map[common.Pgid]uint64{6: 2, 3: 3}, + wantfreemap: map[uint64]pidSet{3: bm1, 2: bm2}, + }, + { + name: "test3", + ids: []common.Pgid{1, 2}, + pgid: 3, + want: []common.Pgid{1, 2, 3}, + wantForwardmap: map[common.Pgid]uint64{1: 3}, + wantBackwardmap: map[common.Pgid]uint64{3: 3}, + wantfreemap: map[uint64]pidSet{3: bm1}, + }, + { + name: "test4", + ids: []common.Pgid{2, 3}, + pgid: 1, + want: []common.Pgid{1, 2, 3}, + wantForwardmap: map[common.Pgid]uint64{1: 3}, + wantBackwardmap: map[common.Pgid]uint64{3: 3}, + wantfreemap: map[uint64]pidSet{3: bm1}, + }, + } + for _, tt := range tests { + f := newTestHashMapFreelist() + f.Init(tt.ids) + + f.mergeWithExistingSpan(tt.pgid) + + if got := f.freePageIds(); !reflect.DeepEqual(tt.want, got) { + t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.want, got) + } + if got := f.forwardMap; !reflect.DeepEqual(tt.wantForwardmap, got) { + t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantForwardmap, got) + } + if got := f.backwardMap; !reflect.DeepEqual(tt.wantBackwardmap, got) { + t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantBackwardmap, got) + } + if got := f.freemaps; !reflect.DeepEqual(tt.wantfreemap, got) { + t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantfreemap, got) + } + } +} + +func TestFreelistHashmap_GetFreePageIDs(t *testing.T) { + f := newTestHashMapFreelist() + + N := int32(100000) + fm := make(map[common.Pgid]uint64) + i := int32(0) + val := int32(0) + for i = 0; i < N; { + val = rand.Int31n(1000) + fm[common.Pgid(i)] = uint64(val) + i += val + f.freePagesCount += uint64(val) + } + + f.forwardMap = fm + res := f.freePageIds() + + if !sort.SliceIsSorted(res, func(i, j int) bool { return res[i] < res[j] }) { + t.Fatalf("pgids not sorted") + } +} + +func Benchmark_freelist_hashmapGetFreePageIDs(b *testing.B) { + f := newTestHashMapFreelist() + N := int32(100000) + fm := make(map[common.Pgid]uint64) + i := int32(0) + val := int32(0) + for i = 0; i < N; { + val = rand.Int31n(1000) + fm[common.Pgid(i)] = uint64(val) + i += val + } + + f.forwardMap = fm + + b.ReportAllocs() + b.ResetTimer() + for n := 0; n < b.N; n++ { + f.freePageIds() + } +} + +func newTestHashMapFreelist() *hashMap { + f := NewHashMapFreelist() + return f.(*hashMap) +} diff --git a/internal/freelist/shared.go b/internal/freelist/shared.go new file mode 100644 index 000000000..ac06309df --- /dev/null +++ b/internal/freelist/shared.go @@ -0,0 +1,321 @@ +package freelist + +import ( + "fmt" + "math" + "sort" + "unsafe" + + "go.etcd.io/bbolt/internal/common" +) + +type txPending struct { + ids []common.Pgid + alloctx []common.Txid // txids allocating the ids + lastReleaseBegin common.Txid // beginning txid of last matching releaseRange +} + +type shared struct { + Interface + + readonlyTXIDs []common.Txid // all readonly transaction IDs. + allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid. + cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids. + pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx. +} + +func newShared() *shared { + return &shared{ + pending: make(map[common.Txid]*txPending), + allocs: make(map[common.Pgid]common.Txid), + cache: make(map[common.Pgid]struct{}), + } +} + +func (t *shared) pendingPageIds() map[common.Txid]*txPending { + return t.pending +} + +func (t *shared) PendingCount() int { + var count int + for _, txp := range t.pending { + count += len(txp.ids) + } + return count +} + +func (t *shared) Count() int { + return t.FreeCount() + t.PendingCount() +} + +func (t *shared) Freed(pgId common.Pgid) bool { + _, ok := t.cache[pgId] + return ok +} + +func (t *shared) Free(txid common.Txid, p *common.Page) { + if p.Id() <= 1 { + panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.Id())) + } + + // Free page and all its overflow pages. + txp := t.pending[txid] + if txp == nil { + txp = &txPending{} + t.pending[txid] = txp + } + allocTxid, ok := t.allocs[p.Id()] + if ok { + delete(t.allocs, p.Id()) + } else if p.IsFreelistPage() { + // Freelist is always allocated by prior tx. + allocTxid = txid - 1 + } + + for id := p.Id(); id <= p.Id()+common.Pgid(p.Overflow()); id++ { + // Verify that page is not already free. + if _, ok := t.cache[id]; ok { + panic(fmt.Sprintf("page %d already freed", id)) + } + // Add to the freelist and cache. + txp.ids = append(txp.ids, id) + txp.alloctx = append(txp.alloctx, allocTxid) + t.cache[id] = struct{}{} + } +} + +func (t *shared) Rollback(txid common.Txid) { + // Remove page ids from cache. + txp := t.pending[txid] + if txp == nil { + return + } + var m common.Pgids + for i, pgid := range txp.ids { + delete(t.cache, pgid) + tx := txp.alloctx[i] + if tx == 0 { + continue + } + if tx != txid { + // Pending free aborted; restore page back to alloc list. + t.allocs[pgid] = tx + } else { + // Freed page was allocated by this txn; OK to throw away. + m = append(m, pgid) + } + } + // Remove pages from pending list and mark as free if allocated by txid. + delete(t.pending, txid) + t.mergeSpans(m) +} + +func (t *shared) AddReadonlyTXID(tid common.Txid) { + t.readonlyTXIDs = append(t.readonlyTXIDs, tid) +} + +func (t *shared) RemoveReadonlyTXID(tid common.Txid) { + for i := range t.readonlyTXIDs { + if t.readonlyTXIDs[i] == tid { + last := len(t.readonlyTXIDs) - 1 + t.readonlyTXIDs[i] = t.readonlyTXIDs[last] + t.readonlyTXIDs = t.readonlyTXIDs[:last] + break + } + } +} + +type txIDx []common.Txid + +func (t txIDx) Len() int { return len(t) } +func (t txIDx) Swap(i, j int) { t[i], t[j] = t[j], t[i] } +func (t txIDx) Less(i, j int) bool { return t[i] < t[j] } + +func (t *shared) ReleasePendingPages() { + // Free all pending pages prior to the earliest open transaction. + sort.Sort(txIDx(t.readonlyTXIDs)) + minid := common.Txid(math.MaxUint64) + if len(t.readonlyTXIDs) > 0 { + minid = t.readonlyTXIDs[0] + } + if minid > 0 { + t.release(minid - 1) + } + // Release unused txid extents. + for _, tid := range t.readonlyTXIDs { + t.releaseRange(minid, tid-1) + minid = tid + 1 + } + t.releaseRange(minid, common.Txid(math.MaxUint64)) + // Any page both allocated and freed in an extent is safe to release. +} + +func (t *shared) release(txid common.Txid) { + m := make(common.Pgids, 0) + for tid, txp := range t.pending { + if tid <= txid { + // Move transaction's pending pages to the available freelist. + // Don't remove from the cache since the page is still free. + m = append(m, txp.ids...) + delete(t.pending, tid) + } + } + t.mergeSpans(m) +} + +func (t *shared) releaseRange(begin, end common.Txid) { + if begin > end { + return + } + var m common.Pgids + for tid, txp := range t.pending { + if tid < begin || tid > end { + continue + } + // Don't recompute freed pages if ranges haven't updated. + if txp.lastReleaseBegin == begin { + continue + } + for i := 0; i < len(txp.ids); i++ { + if atx := txp.alloctx[i]; atx < begin || atx > end { + continue + } + m = append(m, txp.ids[i]) + txp.ids[i] = txp.ids[len(txp.ids)-1] + txp.ids = txp.ids[:len(txp.ids)-1] + txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1] + txp.alloctx = txp.alloctx[:len(txp.alloctx)-1] + i-- + } + txp.lastReleaseBegin = begin + if len(txp.ids) == 0 { + delete(t.pending, tid) + } + } + t.mergeSpans(m) +} + +// Copyall copies a list of all free ids and all pending ids in one sorted list. +// f.count returns the minimum length required for dst. +func (t *shared) Copyall(dst []common.Pgid) { + m := make(common.Pgids, 0, t.PendingCount()) + for _, txp := range t.pendingPageIds() { + m = append(m, txp.ids...) + } + sort.Sort(m) + common.Mergepgids(dst, t.freePageIds(), m) +} + +func (t *shared) Reload(p *common.Page) { + t.Read(p) + + // Build a cache of only pending pages. + pcache := make(map[common.Pgid]bool) + for _, txp := range t.pending { + for _, pendingID := range txp.ids { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []common.Pgid + for _, id := range t.freePageIds() { + if !pcache[id] { + a = append(a, id) + } + } + + t.Init(a) +} + +func (t *shared) NoSyncReload(pgIds common.Pgids) { + // Build a cache of only pending pages. + pcache := make(map[common.Pgid]bool) + for _, txp := range t.pending { + for _, pendingID := range txp.ids { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []common.Pgid + for _, id := range pgIds { + if !pcache[id] { + a = append(a, id) + } + } + + t.Init(a) +} + +// reindex rebuilds the free cache based on available and pending free lists. +func (t *shared) reindex() { + free := t.freePageIds() + pending := t.pendingPageIds() + t.cache = make(map[common.Pgid]struct{}, len(free)) + for _, id := range free { + t.cache[id] = struct{}{} + } + for _, txp := range pending { + for _, pendingID := range txp.ids { + t.cache[pendingID] = struct{}{} + } + } +} + +func (t *shared) Read(p *common.Page) { + if !p.IsFreelistPage() { + panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.Id(), p.Typ())) + } + + ids := p.FreelistPageIds() + + // Copy the list of page ids from the freelist. + if len(ids) == 0 { + t.Init(nil) + } else { + // copy the ids, so we don't modify on the freelist page directly + idsCopy := make([]common.Pgid, len(ids)) + copy(idsCopy, ids) + // Make sure they're sorted. + sort.Sort(common.Pgids(idsCopy)) + + t.Init(idsCopy) + } +} + +func (t *shared) EstimatedWritePageSize() int { + n := t.Count() + if n >= 0xFFFF { + // The first element will be used to store the count. See freelist.write. + n++ + } + return int(common.PageHeaderSize) + (int(unsafe.Sizeof(common.Pgid(0))) * n) +} + +func (t *shared) Write(p *common.Page) { + // Combine the old free pgids and pgids waiting on an open transaction. + + // Update the header flag. + p.SetFlags(common.FreelistPageFlag) + + // The page.count can only hold up to 64k elements so if we overflow that + // number then we handle it by putting the size in the first element. + l := t.Count() + if l == 0 { + p.SetCount(uint16(l)) + } else if l < 0xFFFF { + p.SetCount(uint16(l)) + data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + ids := unsafe.Slice((*common.Pgid)(data), l) + t.Copyall(ids) + } else { + p.SetCount(0xFFFF) + data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + ids := unsafe.Slice((*common.Pgid)(data), l+1) + ids[0] = common.Pgid(l) + t.Copyall(ids[1:]) + } +} diff --git a/node.go b/node.go index fe67c3c89..022b1001e 100644 --- a/node.go +++ b/node.go @@ -316,7 +316,7 @@ func (n *node) spill() error { for _, node := range nodes { // Add node's page to the freelist if it's not new. if node.pgid > 0 { - tx.db.freelist.free(tx.meta.Txid(), tx.page(node.pgid)) + tx.db.freelist.Free(tx.meta.Txid(), tx.page(node.pgid)) node.pgid = 0 } @@ -493,7 +493,7 @@ func (n *node) dereference() { // free adds the node's underlying page to the freelist. func (n *node) free() { if n.pgid != 0 { - n.bucket.tx.db.freelist.free(n.bucket.tx.meta.Txid(), n.bucket.tx.page(n.pgid)) + n.bucket.tx.db.freelist.Free(n.bucket.tx.meta.Txid(), n.bucket.tx.page(n.pgid)) n.pgid = 0 } } diff --git a/tx.go b/tx.go index 011e2c382..e03db9154 100644 --- a/tx.go +++ b/tx.go @@ -213,7 +213,7 @@ func (tx *Tx) Commit() (err error) { // Free the old freelist because commit writes out a fresh freelist. if tx.meta.Freelist() != common.PgidNoFreelist { - tx.db.freelist.free(tx.meta.Txid(), tx.db.page(tx.meta.Freelist())) + tx.db.freelist.Free(tx.meta.Txid(), tx.db.page(tx.meta.Freelist())) } if !tx.db.NoFreelistSync { @@ -285,15 +285,13 @@ func (tx *Tx) Commit() (err error) { func (tx *Tx) commitFreelist() error { // Allocate new pages for the new free list. This will overestimate // the size of the freelist but not underestimate the size (which would be bad). - p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1) + p, err := tx.allocate((tx.db.freelist.EstimatedWritePageSize() / tx.db.pageSize) + 1) if err != nil { tx.rollback() return err } - if err := tx.db.freelist.write(p); err != nil { - tx.rollback() - return err - } + + tx.db.freelist.Write(p) tx.meta.SetFreelist(p.Id()) return nil @@ -316,7 +314,7 @@ func (tx *Tx) nonPhysicalRollback() { return } if tx.writable { - tx.db.freelist.rollback(tx.meta.Txid()) + tx.db.freelist.Rollback(tx.meta.Txid()) } tx.close() } @@ -327,17 +325,17 @@ func (tx *Tx) rollback() { return } if tx.writable { - tx.db.freelist.rollback(tx.meta.Txid()) + tx.db.freelist.Rollback(tx.meta.Txid()) // When mmap fails, the `data`, `dataref` and `datasz` may be reset to // zero values, and there is no way to reload free page IDs in this case. if tx.db.data != nil { if !tx.db.hasSyncedFreelist() { // Reconstruct free page list by scanning the DB to get the whole free page list. - // Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode. - tx.db.freelist.noSyncReload(tx.db.freepages()) + // Note: scanning the whole db is heavy if your db size is large in NoSyncFreeList mode. + tx.db.freelist.NoSyncReload(tx.db.freepages()) } else { // Read free page list from freelist page. - tx.db.freelist.reload(tx.db.page(tx.db.meta().Freelist())) + tx.db.freelist.Reload(tx.db.page(tx.db.meta().Freelist())) } } } @@ -350,9 +348,9 @@ func (tx *Tx) close() { } if tx.writable { // Grab freelist stats. - var freelistFreeN = tx.db.freelist.free_count() - var freelistPendingN = tx.db.freelist.pending_count() - var freelistAlloc = tx.db.freelist.size() + var freelistFreeN = tx.db.freelist.FreeCount() + var freelistPendingN = tx.db.freelist.PendingCount() + var freelistAlloc = tx.db.freelist.EstimatedWritePageSize() // Remove transaction ref & writer lock. tx.db.rwtx = nil @@ -639,7 +637,7 @@ func (tx *Tx) Page(id int) (*common.PageInfo, error) { } // Determine the type (or if it's free). - if tx.db.freelist.freed(common.Pgid(id)) { + if tx.db.freelist.Freed(common.Pgid(id)) { info.Type = "free" } else { info.Type = p.Typ() diff --git a/tx_check.go b/tx_check.go index 4e3c41ae4..c3ecbb975 100644 --- a/tx_check.go +++ b/tx_check.go @@ -41,8 +41,8 @@ func (tx *Tx) check(cfg checkConfig, ch chan error) { // Check if any pages are double freed. freed := make(map[common.Pgid]bool) - all := make([]common.Pgid, tx.db.freelist.count()) - tx.db.freelist.copyall(all) + all := make([]common.Pgid, tx.db.freelist.Count()) + tx.db.freelist.Copyall(all) for _, id := range all { if freed[id] { ch <- fmt.Errorf("page %d: already freed", id)