Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backend: add experimental defrag txn limit flag #15511

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ type ServerConfig struct {
BackendBatchInterval time.Duration
// BackendBatchLimit is the maximum operations before commit the backend transaction.
BackendBatchLimit int
// DefragLimit is the number of keys iterated before committing a transaction during defragmentation.
DefragLimit int

// BackendFreelistType is the type of the backend boltdb freelist.
BackendFreelistType bolt.FreelistType
Expand Down
2 changes: 2 additions & 0 deletions server/embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ type Config struct {
// TODO: Delete in v3.7
ExperimentalEnableLeaseCheckpointPersist bool `json:"experimental-enable-lease-checkpoint-persist"`
ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"`
// ExperimentalDefragLimit is the number of keys iterated before committing a transaction during defragmentation.
ExperimentalDefragLimit int `json:"experimental-defrag-limit"`
// ExperimentalCompactionSleepInterval is the sleep interval between every etcd compaction loop.
ExperimentalCompactionSleepInterval time.Duration `json:"experimental-compaction-sleep-interval"`
ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"`
Expand Down
1 change: 1 addition & 0 deletions server/embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint,
LeaseCheckpointPersist: cfg.ExperimentalEnableLeaseCheckpointPersist,
CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit,
DefragLimit: cfg.ExperimentalDefragLimit,
CompactionSleepInterval: cfg.ExperimentalCompactionSleepInterval,
WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval,
DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime,
Expand Down
1 change: 1 addition & 0 deletions server/etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ func newConfig() *config {
fs.IntVar(&cfg.ec.ExperimentalMaxLearners, "experimental-max-learners", membership.DefaultMaxLearners, "Sets the maximum number of learners that can be available in the cluster membership.")
fs.DurationVar(&cfg.ec.ExperimentalWaitClusterReadyTimeout, "experimental-wait-cluster-ready-timeout", cfg.ec.ExperimentalWaitClusterReadyTimeout, "Maximum duration to wait for the cluster to be ready.")
fs.Uint64Var(&cfg.ec.SnapshotCatchUpEntries, "experimental-snapshot-catchup-entries", cfg.ec.SnapshotCatchUpEntries, "Number of entries for a slow follower to catch up after compacting the the raft storage entries.")
fs.IntVar(&cfg.ec.ExperimentalDefragLimit, "experimental-defrag-limit", cfg.ec.ExperimentalDefragLimit, "Number of keys iterated before committing a transaction during defragmentation.")

// unsafe
fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.")
Expand Down
2 changes: 2 additions & 0 deletions server/etcdmain/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ Experimental feature:
ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases.
--experimental-compaction-batch-limit 1000
ExperimentalCompactionBatchLimit sets the maximum revisions deleted in each compaction batch.
--experimental-defrag-limit 10000
ExperimentalDefragLimit sets the number of keys iterated before committing a transaction during defragmentation.
--experimental-peer-skip-client-san-verification 'false'
Skip verification of SAN field in client certificate for peer connections.
--experimental-watch-progress-notify-interval '10m'
Expand Down
6 changes: 6 additions & 0 deletions server/storage/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ func newBackend(cfg config.ServerConfig, hooks backend.Hooks) backend.Backend {
cfg.Logger.Info("setting backend batch interval", zap.Duration("batch interval", cfg.BackendBatchInterval))
}
}
if cfg.DefragLimit != 0 {
bcfg.DefragLimit = cfg.DefragLimit
if cfg.Logger != nil {
cfg.Logger.Info("setting backend defrag limit", zap.Int("defrag limit", cfg.DefragLimit))
}
}
bcfg.BackendFreelistType = cfg.BackendFreelistType
bcfg.Logger = cfg.Logger
if cfg.QuotaBackendBytes > 0 && cfg.QuotaBackendBytes != DefaultQuotaBytes {
Expand Down
10 changes: 8 additions & 2 deletions server/storage/backend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ var (
defaultBatchLimit = 10000
defaultBatchInterval = 100 * time.Millisecond

defragLimit = 10000
defaultDefragLimit = 10000

// initialMmapSize is the initial size of the mmapped region. Setting this larger than
// the potential max db size can prevent writer from blocking reader.
Expand Down Expand Up @@ -111,6 +111,8 @@ type backend struct {
batchLimit int
batchTx *batchTxBuffered

defragLimit int

readTx *readTx
// txReadBufferCache mirrors "txReadBuffer" within "readTx" -- readTx.baseReadTx.buf.
// When creating "concurrentReadTx":
Expand All @@ -136,6 +138,8 @@ type BackendConfig struct {
BatchInterval time.Duration
// BatchLimit is the maximum puts before flushing the BatchTx.
BatchLimit int
// DefragLimit is the number of keys iterated before committing a transaction during defragmentation.
DefragLimit int
// BackendFreelistType is the backend boltdb's freelist type.
BackendFreelistType bolt.FreelistType
// MmapSize is the number of bytes to mmap for the backend.
Expand All @@ -155,6 +159,7 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig {
return BackendConfig{
BatchInterval: defaultBatchInterval,
BatchLimit: defaultBatchLimit,
DefragLimit: defaultDefragLimit,
MmapSize: initialMmapSize,
Logger: lg,
}
Expand Down Expand Up @@ -194,6 +199,7 @@ func newBackend(bcfg BackendConfig) *backend {

batchInterval: bcfg.BatchInterval,
batchLimit: bcfg.BatchLimit,
defragLimit: bcfg.DefragLimit,
mlock: bcfg.Mlock,

readTx: &readTx{
Expand Down Expand Up @@ -503,7 +509,7 @@ func (b *backend) defrag() error {
)
}
// gofail: var defragBeforeCopy struct{}
err = defragdb(b.db, tmpdb, defragLimit)
err = defragdb(b.db, tmpdb, b.defragLimit)
if err != nil {
tmpdb.Close()
if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil {
Expand Down