Skip to content

Commit

Permalink
feat(cosmovisor): graceful shutdown (#16963)
Browse files Browse the repository at this point in the history
  • Loading branch information
chillyvee authored Jul 13, 2023
1 parent 80e3c7a commit 6a0ab4f
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 66 deletions.
1 change: 1 addition & 0 deletions tools/cosmovisor/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Ref: https://keepachangelog.com/en/1.0.0/
* [#16413](https://github.com/cosmos/cosmos-sdk/issues/16413) Add `cosmovisor pre-upgrade` command to manually add an upgrade to cosmovisor.
* [#16573](https://github.com/cosmos/cosmos-sdk/pull/16573) Extend `cosmovisor` configuration with new log format options
* [#16550](https://github.com/cosmos/cosmos-sdk/pull/16550) Add COSMOVISOR_CUSTOM_PREUPGRADE to cosmovisor to execute custom pre-upgrade scripts (separate from daemon pre-upgrade).
* [#16963](https://github.com/cosmos/cosmos-sdk/pull/69630) Add DAEMON_SHUTDOWN_GRACE to send interrupt and wait before sending kill
* [#15361](https://github.com/cosmos/cosmos-sdk/pull/15361) Add `cosmovisor config` command to display the configuration used by cosmovisor.

## Improvements
Expand Down
1 change: 1 addition & 0 deletions tools/cosmovisor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ Use of `cosmovisor` without one of the action arguments is deprecated. For backw
* `DAEMON_DOWNLOAD_MUST_HAVE_CHECKSUM` (*optional*, default = `false`), if `true` cosmovisor will require that a checksum is provided in the upgrade plan for the binary to be downloaded. If `false`, cosmovisor will not require a checksum to be provided, but still check the checksum if one is provided.
* `DAEMON_RESTART_AFTER_UPGRADE` (*optional*, default = `true`), if `true`, restarts the subprocess with the same command-line arguments and flags (but with the new binary) after a successful upgrade. Otherwise (`false`), `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note restart is only after the upgrade and does not auto-restart the subprocess after an error occurs.
* `DAEMON_RESTART_DELAY` (*optional*, default none), allow a node operator to define a delay between the node halt (for upgrade) and backup by the specified time. The value must be a duration (e.g. `1s`).
* `DAEMON_SHUTDOWN_GRACE` (*optional*, default none), if set, send interrupt to binary and wait the specified time to allow for cleanup/cache flush to disk before sending the kill signal. The value must be a duration (e.g. `1s`).
* `DAEMON_POLL_INTERVAL` (*optional*, default 300 milliseconds), is the interval length for polling the upgrade plan file. The value must be a duration (e.g. `1s`).
* `DAEMON_DATA_BACKUP_DIR` option to set a custom backup directory. If not set, `DAEMON_HOME` is used.
* `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `true`, upgrades directly without performing a backup. Otherwise (`false`, default) backs up the data before trying the upgrade. The default value of false is useful and recommended in case of failures and when a backup needed to rollback. We recommend using the default backup option `UNSAFE_SKIP_BACKUP=false`.
Expand Down
15 changes: 15 additions & 0 deletions tools/cosmovisor/args.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const (
EnvDownloadMustHaveChecksum = "DAEMON_DOWNLOAD_MUST_HAVE_CHECKSUM"
EnvRestartUpgrade = "DAEMON_RESTART_AFTER_UPGRADE"
EnvRestartDelay = "DAEMON_RESTART_DELAY"
EnvShutdownGrace = "DAEMON_SHUTDOWN_GRACE"
EnvSkipBackup = "UNSAFE_SKIP_BACKUP"
EnvDataBackupPath = "DAEMON_DATA_BACKUP_DIR"
EnvInterval = "DAEMON_POLL_INTERVAL"
Expand All @@ -51,6 +52,7 @@ type Config struct {
DownloadMustHaveChecksum bool
RestartAfterUpgrade bool
RestartDelay time.Duration
ShutdownGrace time.Duration
PollInterval time.Duration
UnsafeSkipBackup bool
DataBackupPath string
Expand Down Expand Up @@ -207,6 +209,17 @@ func GetConfigFromEnv() (*Config, error) {
}
}

cfg.ShutdownGrace = 0 // default value but makes it explicit
shutdownGrace := os.Getenv(EnvShutdownGrace)
if shutdownGrace != "" {
val, err := parseEnvDuration(shutdownGrace)
if err != nil {
errs = append(errs, fmt.Errorf("invalid: %s: %w", EnvShutdownGrace, err))
} else {
cfg.ShutdownGrace = val
}
}

envPreupgradeMaxRetriesVal := os.Getenv(EnvPreupgradeMaxRetries)
if cfg.PreupgradeMaxRetries, err = strconv.Atoi(envPreupgradeMaxRetriesVal); err != nil && envPreupgradeMaxRetriesVal != "" {
errs = append(errs, fmt.Errorf("%s could not be parsed to int: %w", EnvPreupgradeMaxRetries, err))
Expand Down Expand Up @@ -428,6 +441,7 @@ func (cfg Config) DetailString() string {
{EnvDownloadMustHaveChecksum, fmt.Sprintf("%t", cfg.DownloadMustHaveChecksum)},
{EnvRestartUpgrade, fmt.Sprintf("%t", cfg.RestartAfterUpgrade)},
{EnvRestartDelay, cfg.RestartDelay.String()},
{EnvShutdownGrace, cfg.ShutdownGrace.String()},
{EnvInterval, cfg.PollInterval.String()},
{EnvSkipBackup, fmt.Sprintf("%t", cfg.UnsafeSkipBackup)},
{EnvDataBackupPath, cfg.DataBackupPath},
Expand All @@ -436,6 +450,7 @@ func (cfg Config) DetailString() string {
{EnvColorLogs, fmt.Sprintf("%t", cfg.ColorLogs)},
{EnvTimeFormatLogs, cfg.TimeFormatLogs},
{EnvCustomPreupgrade, cfg.CustomPreupgrade},
{EnvDisableRecase, fmt.Sprintf("%t", cfg.DisableRecase)},
}

derivedEntries := []struct{ name, value string }{
Expand Down
Loading

0 comments on commit 6a0ab4f

Please sign in to comment.