From 461ebc3f67185da800ef472d51b0ce04a0bac893 Mon Sep 17 00:00:00 2001 From: sjdot Date: Sat, 18 Mar 2023 12:38:51 -0400 Subject: [PATCH 1/3] backend: add experimental defrag txn limit flag Signed-off-by: Steven Johnson Signed-off-by: sjdot --- server/config/config.go | 2 ++ server/embed/config.go | 2 ++ server/embed/etcd.go | 1 + server/etcdmain/config.go | 1 + server/etcdmain/help.go | 2 ++ server/storage/backend.go | 6 ++++++ server/storage/backend/backend.go | 10 ++++++++-- 7 files changed, 22 insertions(+), 2 deletions(-) diff --git a/server/config/config.go b/server/config/config.go index 48de650b8a1..90a1b1e39fa 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -65,6 +65,8 @@ type ServerConfig struct { BackendBatchInterval time.Duration // BackendBatchLimit is the maximum operations before commit the backend transaction. BackendBatchLimit int + // DefragLimit is the number of keys iterated before committing a transaction during defragmentation. + DefragLimit int // BackendFreelistType is the type of the backend boltdb freelist. BackendFreelistType bolt.FreelistType diff --git a/server/embed/config.go b/server/embed/config.go index 2f4a3416742..6d9495d2e81 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -338,6 +338,8 @@ type Config struct { // TODO: Delete in v3.7 ExperimentalEnableLeaseCheckpointPersist bool `json:"experimental-enable-lease-checkpoint-persist"` ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"` + // ExperimentalDefragLimit is the number of keys iterated before committing a transaction during defragmentation. + ExperimentalDefragLimit int `json:"experimental-defrag-limit"` // ExperimentalCompactionSleepInterval is the sleep interval between every etcd compaction loop. ExperimentalCompactionSleepInterval time.Duration `json:"experimental-compaction-sleep-interval"` ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"` diff --git a/server/embed/etcd.go b/server/embed/etcd.go index 3b0e4db7c01..c490be42bf7 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -213,6 +213,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint, LeaseCheckpointPersist: cfg.ExperimentalEnableLeaseCheckpointPersist, CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit, + DefragLimit: cfg.ExperimentalDefragLimit, CompactionSleepInterval: cfg.ExperimentalCompactionSleepInterval, WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval, DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime, diff --git a/server/etcdmain/config.go b/server/etcdmain/config.go index 954a08727d1..0cfe5826288 100644 --- a/server/etcdmain/config.go +++ b/server/etcdmain/config.go @@ -283,6 +283,7 @@ func newConfig() *config { fs.IntVar(&cfg.ec.ExperimentalMaxLearners, "experimental-max-learners", membership.DefaultMaxLearners, "Sets the maximum number of learners that can be available in the cluster membership.") fs.DurationVar(&cfg.ec.ExperimentalWaitClusterReadyTimeout, "experimental-wait-cluster-ready-timeout", cfg.ec.ExperimentalWaitClusterReadyTimeout, "Maximum duration to wait for the cluster to be ready.") fs.Uint64Var(&cfg.ec.SnapshotCatchUpEntries, "experimental-snapshot-catchup-entries", cfg.ec.SnapshotCatchUpEntries, "Number of entries for a slow follower to catch up after compacting the the raft storage entries.") + fs.IntVar(&cfg.ec.ExperimentalDefragLimit, "experimental-defrag-limit", cfg.ec.ExperimentalDefragLimit, "Number of keys iterated before committing a transaction during defragmentation.") // unsafe fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.") diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index 67b67add816..c509b5edc8b 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -257,6 +257,8 @@ Experimental feature: ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. --experimental-compaction-batch-limit 1000 ExperimentalCompactionBatchLimit sets the maximum revisions deleted in each compaction batch. + --experimental-defrag-limit 10000 + ExperimentalDefragLimit sets the maximum revisions deleted in each compaction batch. --experimental-peer-skip-client-san-verification 'false' Skip verification of SAN field in client certificate for peer connections. --experimental-watch-progress-notify-interval '10m' diff --git a/server/storage/backend.go b/server/storage/backend.go index a93fd8a3f11..a70fd16ba60 100644 --- a/server/storage/backend.go +++ b/server/storage/backend.go @@ -44,6 +44,12 @@ func newBackend(cfg config.ServerConfig, hooks backend.Hooks) backend.Backend { cfg.Logger.Info("setting backend batch interval", zap.Duration("batch interval", cfg.BackendBatchInterval)) } } + if cfg.DefragLimit != 0 { + bcfg.DefragLimit = cfg.DefragLimit + if cfg.Logger != nil { + cfg.Logger.Info("setting backend defrag limit", zap.Int("defrag limit", cfg.DefragLimit)) + } + } bcfg.BackendFreelistType = cfg.BackendFreelistType bcfg.Logger = cfg.Logger if cfg.QuotaBackendBytes > 0 && cfg.QuotaBackendBytes != DefaultQuotaBytes { diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index e7b951ee7e6..8de285218fe 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -34,7 +34,7 @@ var ( defaultBatchLimit = 10000 defaultBatchInterval = 100 * time.Millisecond - defragLimit = 10000 + defaultDefragLimit = 10000 // initialMmapSize is the initial size of the mmapped region. Setting this larger than // the potential max db size can prevent writer from blocking reader. @@ -111,6 +111,8 @@ type backend struct { batchLimit int batchTx *batchTxBuffered + defragLimit int + readTx *readTx // txReadBufferCache mirrors "txReadBuffer" within "readTx" -- readTx.baseReadTx.buf. // When creating "concurrentReadTx": @@ -136,6 +138,8 @@ type BackendConfig struct { BatchInterval time.Duration // BatchLimit is the maximum puts before flushing the BatchTx. BatchLimit int + // DefragLimit is the number of keys iterated before committing a transaction during defragmentation. + DefragLimit int // BackendFreelistType is the backend boltdb's freelist type. BackendFreelistType bolt.FreelistType // MmapSize is the number of bytes to mmap for the backend. @@ -155,6 +159,7 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig { return BackendConfig{ BatchInterval: defaultBatchInterval, BatchLimit: defaultBatchLimit, + DefragLimit: defaultDefragLimit, MmapSize: initialMmapSize, Logger: lg, } @@ -194,6 +199,7 @@ func newBackend(bcfg BackendConfig) *backend { batchInterval: bcfg.BatchInterval, batchLimit: bcfg.BatchLimit, + defragLimit: bcfg.DefragLimit, mlock: bcfg.Mlock, readTx: &readTx{ @@ -503,7 +509,7 @@ func (b *backend) defrag() error { ) } // gofail: var defragBeforeCopy struct{} - err = defragdb(b.db, tmpdb, defragLimit) + err = defragdb(b.db, tmpdb, b.defragLimit) if err != nil { tmpdb.Close() if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { From e9b810bfae483fdabbea8df87e68e9cc43a3cd37 Mon Sep 17 00:00:00 2001 From: sjdot Date: Sat, 18 Mar 2023 12:46:05 -0400 Subject: [PATCH 2/3] Fix comment Signed-off-by: Steven Johnson Signed-off-by: sjdot --- server/etcdmain/help.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index c509b5edc8b..f3e80f3713e 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -258,7 +258,7 @@ Experimental feature: --experimental-compaction-batch-limit 1000 ExperimentalCompactionBatchLimit sets the maximum revisions deleted in each compaction batch. --experimental-defrag-limit 10000 - ExperimentalDefragLimit sets the maximum revisions deleted in each compaction batch. + ExperimentalDefragLimit sets the number of keys iterated before committing a transaction during defragmentation. --experimental-peer-skip-client-san-verification 'false' Skip verification of SAN field in client certificate for peer connections. --experimental-watch-progress-notify-interval '10m' From 201fad6c1e1954ba3e1c1e72d61e52cd4e516f7a Mon Sep 17 00:00:00 2001 From: sjdot Date: Sat, 18 Mar 2023 17:40:10 -0400 Subject: [PATCH 3/3] Rename to DefragBatchLimit Signed-off-by: sjdot --- server/config/config.go | 4 ++-- server/embed/config.go | 4 ++-- server/embed/etcd.go | 2 +- server/etcdmain/config.go | 2 +- server/etcdmain/help.go | 2 +- server/storage/backend.go | 6 +++--- server/storage/backend/backend.go | 10 +++++----- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/server/config/config.go b/server/config/config.go index 90a1b1e39fa..d8c9991cdad 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -65,8 +65,8 @@ type ServerConfig struct { BackendBatchInterval time.Duration // BackendBatchLimit is the maximum operations before commit the backend transaction. BackendBatchLimit int - // DefragLimit is the number of keys iterated before committing a transaction during defragmentation. - DefragLimit int + // DefragBatchLimit is the number of keys iterated before committing a transaction during defragmentation. + DefragBatchLimit int // BackendFreelistType is the type of the backend boltdb freelist. BackendFreelistType bolt.FreelistType diff --git a/server/embed/config.go b/server/embed/config.go index 6d9495d2e81..ccd675a923a 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -338,8 +338,8 @@ type Config struct { // TODO: Delete in v3.7 ExperimentalEnableLeaseCheckpointPersist bool `json:"experimental-enable-lease-checkpoint-persist"` ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"` - // ExperimentalDefragLimit is the number of keys iterated before committing a transaction during defragmentation. - ExperimentalDefragLimit int `json:"experimental-defrag-limit"` + // ExperimentalDefragBatchLimit is the number of keys iterated before committing a transaction during defragmentation. + ExperimentalDefragBatchLimit int `json:"experimental-defrag-limit"` // ExperimentalCompactionSleepInterval is the sleep interval between every etcd compaction loop. ExperimentalCompactionSleepInterval time.Duration `json:"experimental-compaction-sleep-interval"` ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"` diff --git a/server/embed/etcd.go b/server/embed/etcd.go index c490be42bf7..cbcdf2c468b 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -213,7 +213,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint, LeaseCheckpointPersist: cfg.ExperimentalEnableLeaseCheckpointPersist, CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit, - DefragLimit: cfg.ExperimentalDefragLimit, + DefragBatchLimit: cfg.ExperimentalDefragBatchLimit, CompactionSleepInterval: cfg.ExperimentalCompactionSleepInterval, WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval, DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime, diff --git a/server/etcdmain/config.go b/server/etcdmain/config.go index 0cfe5826288..0d5ef875e6f 100644 --- a/server/etcdmain/config.go +++ b/server/etcdmain/config.go @@ -283,7 +283,7 @@ func newConfig() *config { fs.IntVar(&cfg.ec.ExperimentalMaxLearners, "experimental-max-learners", membership.DefaultMaxLearners, "Sets the maximum number of learners that can be available in the cluster membership.") fs.DurationVar(&cfg.ec.ExperimentalWaitClusterReadyTimeout, "experimental-wait-cluster-ready-timeout", cfg.ec.ExperimentalWaitClusterReadyTimeout, "Maximum duration to wait for the cluster to be ready.") fs.Uint64Var(&cfg.ec.SnapshotCatchUpEntries, "experimental-snapshot-catchup-entries", cfg.ec.SnapshotCatchUpEntries, "Number of entries for a slow follower to catch up after compacting the the raft storage entries.") - fs.IntVar(&cfg.ec.ExperimentalDefragLimit, "experimental-defrag-limit", cfg.ec.ExperimentalDefragLimit, "Number of keys iterated before committing a transaction during defragmentation.") + fs.IntVar(&cfg.ec.ExperimentalDefragBatchLimit, "experimental-defrag-limit", cfg.ec.ExperimentalDefragBatchLimit, "Number of keys iterated before committing a transaction during defragmentation.") // unsafe fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.") diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index f3e80f3713e..242e28aebd0 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -258,7 +258,7 @@ Experimental feature: --experimental-compaction-batch-limit 1000 ExperimentalCompactionBatchLimit sets the maximum revisions deleted in each compaction batch. --experimental-defrag-limit 10000 - ExperimentalDefragLimit sets the number of keys iterated before committing a transaction during defragmentation. + ExperimentalDefragBatchLimit sets the number of keys iterated before committing a transaction during defragmentation. --experimental-peer-skip-client-san-verification 'false' Skip verification of SAN field in client certificate for peer connections. --experimental-watch-progress-notify-interval '10m' diff --git a/server/storage/backend.go b/server/storage/backend.go index a70fd16ba60..fcf0de57932 100644 --- a/server/storage/backend.go +++ b/server/storage/backend.go @@ -44,10 +44,10 @@ func newBackend(cfg config.ServerConfig, hooks backend.Hooks) backend.Backend { cfg.Logger.Info("setting backend batch interval", zap.Duration("batch interval", cfg.BackendBatchInterval)) } } - if cfg.DefragLimit != 0 { - bcfg.DefragLimit = cfg.DefragLimit + if cfg.DefragBatchLimit != 0 { + bcfg.DefragBatchLimit = cfg.DefragBatchLimit if cfg.Logger != nil { - cfg.Logger.Info("setting backend defrag limit", zap.Int("defrag limit", cfg.DefragLimit)) + cfg.Logger.Info("setting backend defrag limit", zap.Int("defrag limit", cfg.DefragBatchLimit)) } } bcfg.BackendFreelistType = cfg.BackendFreelistType diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index 8de285218fe..659b848b6ed 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -34,7 +34,7 @@ var ( defaultBatchLimit = 10000 defaultBatchInterval = 100 * time.Millisecond - defaultDefragLimit = 10000 + defaultDefragBatchLimit = 10000 // initialMmapSize is the initial size of the mmapped region. Setting this larger than // the potential max db size can prevent writer from blocking reader. @@ -138,8 +138,8 @@ type BackendConfig struct { BatchInterval time.Duration // BatchLimit is the maximum puts before flushing the BatchTx. BatchLimit int - // DefragLimit is the number of keys iterated before committing a transaction during defragmentation. - DefragLimit int + // DefragBatchLimit is the number of keys iterated before committing a transaction during defragmentation. + DefragBatchLimit int // BackendFreelistType is the backend boltdb's freelist type. BackendFreelistType bolt.FreelistType // MmapSize is the number of bytes to mmap for the backend. @@ -159,7 +159,7 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig { return BackendConfig{ BatchInterval: defaultBatchInterval, BatchLimit: defaultBatchLimit, - DefragLimit: defaultDefragLimit, + DefragBatchLimit: defaultDefragBatchLimit, MmapSize: initialMmapSize, Logger: lg, } @@ -199,7 +199,7 @@ func newBackend(bcfg BackendConfig) *backend { batchInterval: bcfg.BatchInterval, batchLimit: bcfg.BatchLimit, - defragLimit: bcfg.DefragLimit, + defragLimit: bcfg.DefragBatchLimit, mlock: bcfg.Mlock, readTx: &readTx{