diff --git a/cosmovisor/CHANGELOG.md b/cosmovisor/CHANGELOG.md index 51cce4b77a..6b5e609ea4 100644 --- a/cosmovisor/CHANGELOG.md +++ b/cosmovisor/CHANGELOG.md @@ -41,7 +41,8 @@ Ref: https://keepachangelog.com/en/1.0.0/ + [\#8590](https://github.com/cosmos/cosmos-sdk/pull/8590) File watcher for cosmovisor. Instead of parsing logs from stdin and stderr, we watch the `/data/upgrade-info.json` file updates using polling mechanism. + [\#10128](https://github.com/cosmos/cosmos-sdk/pull/10128) Change default value of `DAEMON_RESTART_AFTER_UPGRADE` to `true`. -+ [\#9999](https://github.com/cosmos/cosmos-sdk/issues/9999) Added `version` command that returns the cosmovisor version and the application version. ++ [\#9999](https://github.com/cosmos/cosmos-sdk/pull/10103) Added `version` command that returns the cosmovisor version and the application version. ++ [\#9973](https://github.com/cosmos/cosmos-sdk/pull/10056) Added support for pre-upgrade command in Cosmovisor to be called before the binary is upgraded. Added new environmental variable `DAEMON_PREUPGRADE_MAX_RETRIES` that holds the maximum number of times to reattempt pre-upgrade before failing. ### Improvements diff --git a/cosmovisor/README.md b/cosmovisor/README.md index ce6193343b..1d3eb2b00f 100644 --- a/cosmovisor/README.md +++ b/cosmovisor/README.md @@ -43,6 +43,8 @@ All arguments passed to `cosmovisor` will be passed to the application binary (a * `DAEMON_RESTART_AFTER_UPGRADE` (*optional*, default = `true`), if `true`, restarts the subprocess with the same command-line arguments and flags (but with the new binary) after a successful upgrade. Otherwise (`false`), `cosmovisor` stops running after an upgrade and requires the system administrator to manually restart it. Note restart is only after the upgrade and does not auto-restart the subprocess after an error occurs. * `DAEMON_POLL_INTERVAL` is the interval length in milliseconds for polling the upgrade plan file. Default: 300. * `UNSAFE_SKIP_BACKUP` (defaults to `false`), if set to `false`, backs up the data before trying the upgrade. Otherwise (`true`), upgrades directly without performing a backup. The default value of false is useful and recommended in case of failures and when a backup needed to rollback. We recommend using the default backup option `UNSAFE_SKIP_BACKUP=false`. +* `DAEMON_PREUPGRADE_MAX_RETRIES` (defaults to `0`). The maximum number of times to call `pre-upgrade` in the application after exit status of `31`. After the maximum number of retries, cosmovisor fails the upgrade. + ### Folder Layout diff --git a/cosmovisor/args.go b/cosmovisor/args.go index 192b60ce53..6e96e5ab23 100644 --- a/cosmovisor/args.go +++ b/cosmovisor/args.go @@ -15,12 +15,13 @@ import ( // environment variable names const ( - envHome = "DAEMON_HOME" - envName = "DAEMON_NAME" - envDownloadBin = "DAEMON_ALLOW_DOWNLOAD_BINARIES" - envRestartUpgrade = "DAEMON_RESTART_AFTER_UPGRADE" - envSkipBackup = "UNSAFE_SKIP_BACKUP" - envInterval = "DAEMON_POLL_INTERVAL" + envHome = "DAEMON_HOME" + envName = "DAEMON_NAME" + envDownloadBin = "DAEMON_ALLOW_DOWNLOAD_BINARIES" + envRestartUpgrade = "DAEMON_RESTART_AFTER_UPGRADE" + envSkipBackup = "UNSAFE_SKIP_BACKUP" + envInterval = "DAEMON_POLL_INTERVAL" + envPreupgradeMaxRetries = "DAEMON_PREUPGRADE_MAX_RETRIES" ) const ( @@ -42,6 +43,7 @@ type Config struct { RestartAfterUpgrade bool PollInterval time.Duration UnsafeSkipBackup bool + PreupgradeMaxRetries int // currently running upgrade currentUpgrade UpgradeInfo @@ -146,6 +148,12 @@ func GetConfigFromEnv() (*Config, error) { if err := cfg.validate(); err != nil { return nil, err } + + envPreupgradeMaxRetriesVal := os.Getenv(envPreupgradeMaxRetries) + if cfg.PreupgradeMaxRetries, err = strconv.Atoi(envPreupgradeMaxRetriesVal); err != nil && envPreupgradeMaxRetriesVal != "" { + return nil, fmt.Errorf("%s could not be parsed to int: %w", envPreupgradeMaxRetries, err) + } + return cfg, nil } diff --git a/cosmovisor/process.go b/cosmovisor/process.go index bc7b2e6133..e68255c038 100644 --- a/cosmovisor/process.go +++ b/cosmovisor/process.go @@ -61,13 +61,13 @@ func (l Launcher) Run(args []string, stdout, stderr io.Writer) (bool, error) { if err != nil || !needsUpdate { return false, err } - if err := doBackup(l.cfg); err != nil { - return false, err - } - if !SkipUpgrade(args, l.fw.currentInfo) { - err = doPreUpgrade(l.cfg) - if err != nil { + if !IsSkipUpgradeHeight(args, l.fw.currentInfo) { + if err := doBackup(l.cfg); err != nil { + return false, err + } + + if err = doPreUpgrade(l.cfg); err != nil { return false, err } } @@ -152,32 +152,52 @@ func doBackup(cfg *Config) error { return nil } -// doPreUpgrade runs the pre-upgrade command defined by the application +// doPreUpgrade runs the pre-upgrade command defined by the application and handles respective error codes +// cfg contains the cosmovisor config from env var func doPreUpgrade(cfg *Config) error { - bin, err := cfg.CurrentBin() - preUpgradeCmd := exec.Command(bin, "pre-upgrade") + counter := 0 + for { + if counter > cfg.PreupgradeMaxRetries { + return fmt.Errorf("pre-upgrade command failed. reached max attempt of retries - %d", cfg.PreupgradeMaxRetries) + } - _, err = preUpgradeCmd.Output() + err := executePreUpgradeCmd(cfg) + counter += 1 - if err != nil { - if err.(*exec.ExitError).ProcessState.ExitCode() == 1 { - fmt.Println("pre-upgrade command does not exist. continuing the upgrade.") - return nil - } - if err.(*exec.ExitError).ProcessState.ExitCode() == 30 { - return fmt.Errorf("pre-upgrade command failed : %w", err) - } - if err.(*exec.ExitError).ProcessState.ExitCode() == 31 { - fmt.Println("pre-upgrade command failed. retrying.") - return doPreUpgrade(cfg) + if err != nil { + if err.(*exec.ExitError).ProcessState.ExitCode() == 1 { + fmt.Println("pre-upgrade command does not exist. continuing the upgrade.") + return nil + } + if err.(*exec.ExitError).ProcessState.ExitCode() == 30 { + return fmt.Errorf("pre-upgrade command failed : %w", err) + } + if err.(*exec.ExitError).ProcessState.ExitCode() == 31 { + fmt.Println("pre-upgrade command failed. retrying. attempt:", counter) + fmt.Println(err) + continue + } } + fmt.Println("pre-upgrade successful. continuing the upgrade.") + return nil } - fmt.Println("pre-upgrade successful. continuing the upgrade.") - return nil } -// skipUpgrade checks if pre-upgrade script must be run. If the height in the upgrade plan matches any of the heights provided in --safe-skip-upgrade, the script is not run -func SkipUpgrade(args []string, upgradeInfo UpgradeInfo) bool { +// executePreUpgradeCmd runs the pre-upgrade command defined by the application +// cfg contains the cosmosvisor config from the env vars +func executePreUpgradeCmd(cfg *Config) error { + bin, err := cfg.CurrentBin() + if err != nil { + return err + } + + preUpgradeCmd := exec.Command(bin, "pre-upgrade") + _, err = preUpgradeCmd.Output() + return err +} + +// IsSkipUpgradeHeight checks if pre-upgrade script must be run. If the height in the upgrade plan matches any of the heights provided in --safe-skip-upgrade, the script is not run +func IsSkipUpgradeHeight(args []string, upgradeInfo UpgradeInfo) bool { skipUpgradeHeights := UpgradeSkipHeights(args) for _, h := range skipUpgradeHeights { if h == int(upgradeInfo.Height) { diff --git a/cosmovisor/process_test.go b/cosmovisor/process_test.go index d2e9794fa2..f86ea1eb0b 100644 --- a/cosmovisor/process_test.go +++ b/cosmovisor/process_test.go @@ -160,7 +160,7 @@ func TestSkipUpgrade(t *testing.T) { for i := range cases { tc := cases[i] require := require.New(t) - h := cosmovisor.SkipUpgrade(tc.args, tc.upgradeInfo) + h := cosmovisor.IsSkipUpgradeHeight(tc.args, tc.upgradeInfo) require.Equal(h, tc.expectRes) } }