From f9ce17b7532c3926e9db00b7a705868b718c7b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20R=C3=BCger?= Date: Thu, 11 Jan 2024 00:19:09 +0100 Subject: [PATCH] Improve memory alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bbolt/db.go:38:9: struct of size 528 could be 496 bbolt/db.go:1012:12: struct with 40 pointer bytes could be 24 bbolt/db.go:1280:14: struct of size 104 could be 80 bbolt/freelist.go:24:15: struct with 136 pointer bytes could be 112 bbolt/node.go:12:11: struct with 88 pointer bytes could be 72 bbolt/tx.go:27:9: struct with 192 pointer bytes could be 88 bbolt/internal/common/inode.go:8:12: struct with 48 pointer bytes could be 32 bbolt/internal/common/page.go:324:15: struct with 16 pointer bytes could be 8 bbolt/cmd/bbolt/main.go:445:22: struct with 16 pointer bytes could be 8 bbolt/cmd/bbolt/main.go:1489:19: struct with 160 pointer bytes could be 104 bbolt/cmd/bbolt/main.go:1546:16: struct with 24 pointer bytes could be 16 bbolt/internal/btesting/btesting.go:28:9: struct with 48 pointer bytes could be 40 bbolt/tests/dmflakey/dmflakey.go:133:13: struct with 64 pointer bytes could be 56 Signed-off-by: Manuel RĂ¼ger --- cmd/bbolt/main.go | 12 +-- db.go | 185 +++++++++++++++++----------------- freelist.go | 6 +- internal/btesting/btesting.go | 4 +- internal/common/inode.go | 4 +- internal/common/page.go | 2 +- node.go | 10 +- tests/dmflakey/dmflakey.go | 2 +- tx.go | 9 +- 9 files changed, 120 insertions(+), 114 deletions(-) diff --git a/cmd/bbolt/main.go b/cmd/bbolt/main.go index ea284539e..17a3964ff 100644 --- a/cmd/bbolt/main.go +++ b/cmd/bbolt/main.go @@ -443,10 +443,10 @@ func newPageItemCommand(m *Main) *pageItemCommand { } type pageItemOptions struct { + format string help bool keyOnly bool valueOnly bool - format string } // Run executes the command. @@ -1490,18 +1490,18 @@ type BenchOptions struct { ProfileMode string WriteMode string ReadMode string + CPUProfile string + MemProfile string + BlockProfile string + Path string Iterations int64 BatchSize int64 KeySize int ValueSize int - CPUProfile string - MemProfile string - BlockProfile string StatsInterval time.Duration FillPercent float64 NoSync bool Work bool - Path string } // BenchResults represents the performance results of the benchmark and is thread-safe. @@ -1544,8 +1544,8 @@ func (r *BenchResults) OpsPerSecond() int { } type PageError struct { - ID int Err error + ID int } func (e *PageError) Error() string { diff --git a/db.go b/db.go index db9dbafed..40252a720 100644 --- a/db.go +++ b/db.go @@ -36,12 +36,81 @@ const ( // All data access is performed through transactions which can be obtained through the DB. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. type DB struct { + pagePool sync.Pool + + logger Logger + + openFile func(string, int, os.FileMode) (*os.File, error) + file *os.File + data *[maxMapSize]byte + meta0 *common.Meta + meta1 *common.Meta + rwtx *Tx + + freelist *freelist + batch *batch + + ops struct { + writeAt func(b []byte, off int64) (n int, err error) + } + + // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures + // dramatic performance degradation if database is large and fragmentation in freelist is common. + // The alternative one is using hashmap, it is faster in almost all circumstances + // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. + // The default type is array + FreelistType FreelistType + + path string + // `dataref` isn't used at all on Windows, and the golangci-lint + // always fails on Windows platform. + //nolint + dataref []byte // mmap'ed readonly, write throws SEGV + txs []*Tx + // Put `stats` at the first field to ensure it's 64-bit aligned. Note that // the first word in an allocated struct can be relied upon to be 64-bit // aligned. Refer to https://pkg.go.dev/sync/atomic#pkg-note-BUG. Also // refer to discussion in https://github.com/etcd-io/bbolt/issues/577. stats Stats + // If you want to read the entire database fast, you can set MmapFlag to + // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. + MmapFlags int + + // MaxBatchSize is the maximum size of a batch. Default value is + // copied from DefaultMaxBatchSize in Open. + // + // If <=0, disables batching. + // + // Do not change concurrently with calls to Batch. + MaxBatchSize int + + // MaxBatchDelay is the maximum delay before a batch starts. + // Default value is copied from DefaultMaxBatchDelay in Open. + // + // If <=0, effectively disables batching. + // + // Do not change concurrently with calls to Batch. + MaxBatchDelay time.Duration + + // AllocSize is the amount of space allocated when the database + // needs to create new pages. This is done to amortize the cost + // of truncate() and fsync() when growing the data file. + AllocSize int + + datasz int + pageSize int + mmaplock sync.RWMutex // Protects mmap access during remapping. + statlock sync.RWMutex // Protects stats access. + + freelistLoad sync.Once + + batchMu sync.Mutex + + rwlock sync.Mutex // Allows only one writer at a time. + metalock sync.Mutex // Protects meta page access. + // When enabled, the database will perform a Check() after every commit. // A panic is issued if the database is in an inconsistent state. This // flag has a large performance impact so it should only be used for @@ -65,13 +134,6 @@ type DB struct { // re-sync during recovery. NoFreelistSync bool - // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures - // dramatic performance degradation if database is large and fragmentation in freelist is common. - // The alternative one is using hashmap, it is faster in almost all circumstances - // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. - // The default type is array - FreelistType FreelistType - // When true, skips the truncate call when growing the database. // Setting this to true is only safe on non-ext3/ext4 systems. // Skipping truncation avoids preallocation of hard drive space and @@ -85,71 +147,13 @@ type DB struct { // set to `true`. PreLoadFreelist bool - // If you want to read the entire database fast, you can set MmapFlag to - // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. - MmapFlags int - - // MaxBatchSize is the maximum size of a batch. Default value is - // copied from DefaultMaxBatchSize in Open. - // - // If <=0, disables batching. - // - // Do not change concurrently with calls to Batch. - MaxBatchSize int - - // MaxBatchDelay is the maximum delay before a batch starts. - // Default value is copied from DefaultMaxBatchDelay in Open. - // - // If <=0, effectively disables batching. - // - // Do not change concurrently with calls to Batch. - MaxBatchDelay time.Duration - - // AllocSize is the amount of space allocated when the database - // needs to create new pages. This is done to amortize the cost - // of truncate() and fsync() when growing the data file. - AllocSize int - // Mlock locks database file in memory when set to true. // It prevents major page faults, however used memory can't be reclaimed. // // Supported only on Unix via mlock/munlock syscalls. Mlock bool - logger Logger - - path string - openFile func(string, int, os.FileMode) (*os.File, error) - file *os.File - // `dataref` isn't used at all on Windows, and the golangci-lint - // always fails on Windows platform. - //nolint - dataref []byte // mmap'ed readonly, write throws SEGV - data *[maxMapSize]byte - datasz int - meta0 *common.Meta - meta1 *common.Meta - pageSize int - opened bool - rwtx *Tx - txs []*Tx - - freelist *freelist - freelistLoad sync.Once - - pagePool sync.Pool - - batchMu sync.Mutex - batch *batch - - rwlock sync.Mutex // Allows only one writer at a time. - metalock sync.Mutex // Protects meta page access. - mmaplock sync.RWMutex // Protects mmap access during remapping. - statlock sync.RWMutex // Protects stats access. - - ops struct { - writeAt func(b []byte, off int64) (n int, err error) - } + opened bool // Read only mode. // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately. @@ -1012,8 +1016,8 @@ type call struct { type batch struct { db *DB timer *time.Timer - start sync.Once calls []call + start sync.Once } // trigger runs the batch if it hasn't already been run. @@ -1278,21 +1282,13 @@ func (db *DB) freepages() []common.Pgid { // Options represents the options that can be set when opening a database. type Options struct { - // Timeout is the amount of time to wait to obtain a file lock. - // When set to zero it will wait indefinitely. - Timeout time.Duration - - // Sets the DB.NoGrowSync flag before memory mapping the file. - NoGrowSync bool - // Do not sync freelist to disk. This improves the database write performance - // under normal operation, but requires a full database re-sync during recovery. - NoFreelistSync bool + // Logger is the logger used for bbolt. + Logger Logger - // PreLoadFreelist sets whether to load the free pages when opening - // the db file. Note when opening db in write mode, bbolt will always - // load the free pages. - PreLoadFreelist bool + // OpenFile is used to open files. It defaults to os.OpenFile. This option + // is useful for writing hermetic tests. + OpenFile func(string, int, os.FileMode) (*os.File, error) // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures // dramatic performance degradation if database is large and fragmentation in freelist is common. @@ -1301,9 +1297,9 @@ type Options struct { // The default type is array FreelistType FreelistType - // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to - // grab a shared lock (UNIX). - ReadOnly bool + // Timeout is the amount of time to wait to obtain a file lock. + // When set to zero it will wait indefinitely. + Timeout time.Duration // Sets the DB.MmapFlags flag before memory mapping the file. MmapFlags int @@ -1321,22 +1317,31 @@ type Options struct { // PageSize overrides the default OS page size. PageSize int + // Sets the DB.NoGrowSync flag before memory mapping the file. + NoGrowSync bool + + // Do not sync freelist to disk. This improves the database write performance + // under normal operation, but requires a full database re-sync during recovery. + NoFreelistSync bool + + // PreLoadFreelist sets whether to load the free pages when opening + // the db file. Note when opening db in write mode, bbolt will always + // load the free pages. + PreLoadFreelist bool + + // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to + // grab a shared lock (UNIX). + ReadOnly bool + // NoSync sets the initial value of DB.NoSync. Normally this can just be // set directly on the DB itself when returned from Open(), but this option // is useful in APIs which expose Options but not the underlying DB. NoSync bool - // OpenFile is used to open files. It defaults to os.OpenFile. This option - // is useful for writing hermetic tests. - OpenFile func(string, int, os.FileMode) (*os.File, error) - // Mlock locks database file in memory when set to true. // It prevents potential page faults, however // used memory can't be reclaimed. (UNIX only) Mlock bool - - // Logger is the logger used for bbolt. - Logger Logger } func (o *Options) String() string { diff --git a/freelist.go b/freelist.go index 731d75c46..7c45fa01a 100644 --- a/freelist.go +++ b/freelist.go @@ -22,20 +22,20 @@ type pidSet map[common.Pgid]struct{} // freelist represents a list of all pages that are available for allocation. // It also tracks pages that have been freed but are still in use by open transactions. type freelist struct { - freelistType FreelistType // freelist type - ids []common.Pgid // all free and available free page ids. allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid. pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx. cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids. freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size forwardMap map[common.Pgid]uint64 // key is start pgid, value is its span size backwardMap map[common.Pgid]uint64 // key is end pgid, value is its span size - freePagesCount uint64 // count of free pages(hashmap version) allocate func(txid common.Txid, n int) common.Pgid // the freelist allocate func free_count func() int // the function which gives you free page number mergeSpans func(ids common.Pgids) // the mergeSpan func getFreePageIDs func() []common.Pgid // get free pgids func readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist + freelistType FreelistType // freelist type + ids []common.Pgid // all free and available free page ids. + freePagesCount uint64 // count of free pages(hashmap version) } // newFreelist returns an empty, initialized freelist. diff --git a/internal/btesting/btesting.go b/internal/btesting/btesting.go index c83369f09..d29a87c63 100644 --- a/internal/btesting/btesting.go +++ b/internal/btesting/btesting.go @@ -26,10 +26,10 @@ const ( // DB is a test wrapper for bolt.DB. type DB struct { + t testing.TB *bolt.DB - f string o *bolt.Options - t testing.TB + f string } // MustCreateDB returns a new, open DB at a temporary location. diff --git a/internal/common/inode.go b/internal/common/inode.go index 080b9af78..43333396b 100644 --- a/internal/common/inode.go +++ b/internal/common/inode.go @@ -6,10 +6,10 @@ import "unsafe" // It can be used to point to elements in a page or point // to an element which hasn't been added to a page yet. type Inode struct { - flags uint32 - pgid Pgid key []byte value []byte + pgid Pgid + flags uint32 } type Inodes []Inode diff --git a/internal/common/page.go b/internal/common/page.go index ee808967c..7a67f4c26 100644 --- a/internal/common/page.go +++ b/internal/common/page.go @@ -322,8 +322,8 @@ func (n *leafPageElement) Bucket() *InBucket { // PageInfo represents human readable information about a page. type PageInfo struct { - ID int Type string + ID int Count int OverflowCount int } diff --git a/node.go b/node.go index fe67c3c89..ec7675a7b 100644 --- a/node.go +++ b/node.go @@ -11,14 +11,14 @@ import ( // node represents an in-memory, deserialized page. type node struct { bucket *Bucket - isLeaf bool - unbalanced bool - spilled bool - key []byte - pgid common.Pgid parent *node + key []byte children nodes inodes common.Inodes + pgid common.Pgid + isLeaf bool + unbalanced bool + spilled bool } // root returns the top-level node this node is attached to. diff --git a/tests/dmflakey/dmflakey.go b/tests/dmflakey/dmflakey.go index 25061a4cb..89ad468d4 100644 --- a/tests/dmflakey/dmflakey.go +++ b/tests/dmflakey/dmflakey.go @@ -133,10 +133,10 @@ func InitFlakey(flakeyDevice, dataStorePath string, fsType FSType) (_ Flakey, re type flakey struct { fsType FSType imgPath string - imgSize int64 loopDevice string flakeyDevice string + imgSize int64 } // DevicePath returns the flakey device path. diff --git a/tx.go b/tx.go index 81913b0fe..1ca24a5eb 100644 --- a/tx.go +++ b/tx.go @@ -25,15 +25,14 @@ import ( // are using them. A long running read transaction can cause the database to // quickly grow. type Tx struct { - writable bool - managed bool db *DB meta *common.Meta - root Bucket pages map[common.Pgid]*common.Page - stats TxStats + root Bucket commitHandlers []func() + stats TxStats + // WriteFlag specifies the flag for write-related methods like WriteTo(). // Tx opens the database file with the specified flag to copy the data. // @@ -41,6 +40,8 @@ type Tx struct { // workloads. For databases that are much larger than available RAM, // set the flag to syscall.O_DIRECT to avoid trashing the page cache. WriteFlag int + writable bool + managed bool } // init initializes the transaction.