From 8062f200bdcdf2753a54f3221839fdcd56bf317f Mon Sep 17 00:00:00 2001 From: LexLuthr <88259624+LexLuthr@users.noreply.github.com> Date: Sat, 30 Mar 2024 20:40:32 +0400 Subject: [PATCH] fix: curio: refactor curio graceful shutdown (#11794) * refactor curio shutdown * apply suggestions, remove provider cli --- cmd/curio/run.go | 3 +- documentation/en/cli-lotus-provider.md | 410 ------------------------- itests/harmonytask_test.go | 12 +- itests/kit/ensemble.go | 2 +- lib/harmony/harmonytask/harmonytask.go | 56 +++- 5 files changed, 53 insertions(+), 430 deletions(-) delete mode 100644 documentation/en/cli-lotus-provider.md diff --git a/cmd/curio/run.go b/cmd/curio/run.go index 35fdf4a4d..5974a5405 100644 --- a/cmd/curio/run.go +++ b/cmd/curio/run.go @@ -5,7 +5,6 @@ import ( "fmt" "os" "strings" - "time" "github.com/pkg/errors" "github.com/urfave/cli/v2" @@ -128,7 +127,7 @@ var runCmd = &cli.Command{ if err != nil { return nil } - defer taskEngine.GracefullyTerminate(time.Hour) + defer taskEngine.GracefullyTerminate() err = rpc.ListenAndServe(ctx, dependencies, shutdownChan) // Monitor for shutdown. if err != nil { diff --git a/documentation/en/cli-lotus-provider.md b/documentation/en/cli-lotus-provider.md deleted file mode 100644 index 29cfb7721..000000000 --- a/documentation/en/cli-lotus-provider.md +++ /dev/null @@ -1,410 +0,0 @@ -# lotus-provider -``` -NAME: - lotus-provider - Filecoin decentralized storage network provider - -USAGE: - lotus-provider [global options] command [command options] [arguments...] - -VERSION: - 1.26.0 - -COMMANDS: - run Start a lotus provider process - stop Stop a running lotus provider - config Manage node config by layers. The layer 'base' will always be applied. - test Utility functions for testing - version Print version - help, h Shows a list of commands or help for one command - DEVELOPER: - auth Manage RPC permissions - log Manage logging - wait-api Wait for lotus api to come online - fetch-params Fetch proving parameters - -GLOBAL OPTIONS: - --color use color in display output (default: depends on output being a TTY) - --db-host value Command separated list of hostnames for yugabyte cluster (default: "yugabyte") [$LOTUS_DB_HOST] - --db-name value (default: "yugabyte") [$LOTUS_DB_NAME, $LOTUS_HARMONYDB_HOSTS] - --db-user value (default: "yugabyte") [$LOTUS_DB_USER, $LOTUS_HARMONYDB_USERNAME] - --db-password value (default: "yugabyte") [$LOTUS_DB_PASSWORD, $LOTUS_HARMONYDB_PASSWORD] - --layers value (default: "base") [$LOTUS_LAYERS, $LOTUS_CONFIG_LAYERS] - --repo-path value (default: "~/.lotusprovider") [$LOTUS_REPO_PATH] - --vv enables very verbose mode, useful for debugging the CLI (default: false) - --help, -h show help - --version, -v print the version -``` - -## lotus-provider run -``` -NAME: - lotus-provider run - Start a lotus provider process - -USAGE: - lotus-provider run [command options] [arguments...] - -OPTIONS: - --listen value host address and port the worker api will listen on (default: "0.0.0.0:12300") [$LOTUS_WORKER_LISTEN] - --nosync don't check full-node sync status (default: false) - --manage-fdlimit manage open file limit (default: true) - --layers value [ --layers value ] list of layers to be interpreted (atop defaults). Default: base (default: "base") - --storage-json value path to json file containing storage config (default: "~/.lotus-provider/storage.json") - --journal value path to journal files (default: "~/.lotus-provider/") - --help, -h show help -``` - -## lotus-provider stop -``` -NAME: - lotus-provider stop - Stop a running lotus provider - -USAGE: - lotus-provider stop [command options] [arguments...] - -OPTIONS: - --help, -h show help -``` - -## lotus-provider config -``` -NAME: - lotus-provider config - Manage node config by layers. The layer 'base' will always be applied. - -USAGE: - lotus-provider config command [command options] [arguments...] - -COMMANDS: - default, defaults Print default node config - set, add, update, create Set a config layer or the base by providing a filename or stdin. - get, cat, show Get a config layer by name. You may want to pipe the output to a file, or use 'less' - list, ls List config layers you can get. - interpret, view, stacked, stack Interpret stacked config layers by this version of lotus-provider, with system-generated comments. - remove, rm, del, delete Remove a named config layer. - from-miner Express a database config (for lotus-provider) from an existing miner. - help, h Shows a list of commands or help for one command - -OPTIONS: - --help, -h show help -``` - -### lotus-provider config default -``` -NAME: - lotus-provider config default - Print default node config - -USAGE: - lotus-provider config default [command options] [arguments...] - -OPTIONS: - --no-comment don't comment default values (default: false) - --help, -h show help -``` - -### lotus-provider config set -``` -NAME: - lotus-provider config set - Set a config layer or the base by providing a filename or stdin. - -USAGE: - lotus-provider config set [command options] a layer's file name - -OPTIONS: - --title value title of the config layer (req'd for stdin) - --help, -h show help -``` - -### lotus-provider config get -``` -NAME: - lotus-provider config get - Get a config layer by name. You may want to pipe the output to a file, or use 'less' - -USAGE: - lotus-provider config get [command options] layer name - -OPTIONS: - --help, -h show help -``` - -### lotus-provider config list -``` -NAME: - lotus-provider config list - List config layers you can get. - -USAGE: - lotus-provider config list [command options] [arguments...] - -OPTIONS: - --help, -h show help -``` - -### lotus-provider config interpret -``` -NAME: - lotus-provider config interpret - Interpret stacked config layers by this version of lotus-provider, with system-generated comments. - -USAGE: - lotus-provider config interpret [command options] a list of layers to be interpreted as the final config - -OPTIONS: - --layers value [ --layers value ] comma or space separated list of layers to be interpreted (default: "base") - --help, -h show help -``` - -### lotus-provider config remove -``` -NAME: - lotus-provider config remove - Remove a named config layer. - -USAGE: - lotus-provider config remove [command options] [arguments...] - -OPTIONS: - --help, -h show help -``` - -### lotus-provider config from-miner -``` -NAME: - lotus-provider config from-miner - Express a database config (for lotus-provider) from an existing miner. - -USAGE: - lotus-provider config from-miner [command options] [arguments...] - -DESCRIPTION: - Express a database config (for lotus-provider) from an existing miner. - -OPTIONS: - --miner-repo value, --storagerepo value Specify miner repo path. flag(storagerepo) and env(LOTUS_STORAGE_PATH) are DEPRECATION, will REMOVE SOON (default: "~/.lotusminer") [$LOTUS_MINER_PATH, $LOTUS_STORAGE_PATH] - --to-layer value, -t value The layer name for this data push. 'base' is recommended for single-miner setup. - --overwrite, -o Use this with --to-layer to replace an existing layer (default: false) - --help, -h show help -``` - -## lotus-provider test -``` -NAME: - lotus-provider test - Utility functions for testing - -USAGE: - lotus-provider test command [command options] [arguments...] - -COMMANDS: - window-post, wd, windowpost, wdpost Compute a proof-of-spacetime for a sector (requires the sector to be pre-sealed). These will not send to the chain. - help, h Shows a list of commands or help for one command - -OPTIONS: - --help, -h show help -``` - -### lotus-provider test window-post -``` -NAME: - lotus-provider test window-post - Compute a proof-of-spacetime for a sector (requires the sector to be pre-sealed). These will not send to the chain. - -USAGE: - lotus-provider test window-post command [command options] [arguments...] - -COMMANDS: - here, cli Compute WindowPoSt for performance and configuration testing. - task, scheduled, schedule, async, asynchronous Test the windowpost scheduler by running it on the next available lotus-provider. - help, h Shows a list of commands or help for one command - -OPTIONS: - --help, -h show help -``` - -#### lotus-provider test window-post here -``` -NAME: - lotus-provider test window-post here - Compute WindowPoSt for performance and configuration testing. - -USAGE: - lotus-provider test window-post here [command options] [deadline index] - -DESCRIPTION: - Note: This command is intended to be used to verify PoSt compute performance. - It will not send any messages to the chain. Since it can compute any deadline, output may be incorrectly timed for the chain. - -OPTIONS: - --deadline value deadline to compute WindowPoSt for (default: 0) - --layers value [ --layers value ] list of layers to be interpreted (atop defaults). Default: base (default: "base") - --storage-json value path to json file containing storage config (default: "~/.lotus-provider/storage.json") - --partition value partition to compute WindowPoSt for (default: 0) - --help, -h show help -``` - -#### lotus-provider test window-post task -``` -NAME: - lotus-provider test window-post task - Test the windowpost scheduler by running it on the next available lotus-provider. - -USAGE: - lotus-provider test window-post task [command options] [arguments...] - -OPTIONS: - --deadline value deadline to compute WindowPoSt for (default: 0) - --layers value [ --layers value ] list of layers to be interpreted (atop defaults). Default: base (default: "base") - --help, -h show help -``` - -## lotus-provider version -``` -NAME: - lotus-provider version - Print version - -USAGE: - lotus-provider version [command options] [arguments...] - -OPTIONS: - --help, -h show help -``` - -## lotus-provider auth -``` -NAME: - lotus-provider auth - Manage RPC permissions - -USAGE: - lotus-provider auth command [command options] [arguments...] - -COMMANDS: - create-token Create token - api-info Get token with API info required to connect to this node - help, h Shows a list of commands or help for one command - -OPTIONS: - --help, -h show help -``` - -### lotus-provider auth create-token -``` -NAME: - lotus-provider auth create-token - Create token - -USAGE: - lotus-provider auth create-token [command options] [arguments...] - -OPTIONS: - --perm value permission to assign to the token, one of: read, write, sign, admin - --help, -h show help -``` - -### lotus-provider auth api-info -``` -NAME: - lotus-provider auth api-info - Get token with API info required to connect to this node - -USAGE: - lotus-provider auth api-info [command options] [arguments...] - -OPTIONS: - --perm value permission to assign to the token, one of: read, write, sign, admin - --help, -h show help -``` - -## lotus-provider log -``` -NAME: - lotus-provider log - Manage logging - -USAGE: - lotus-provider log command [command options] [arguments...] - -COMMANDS: - list List log systems - set-level Set log level - alerts Get alert states - help, h Shows a list of commands or help for one command - -OPTIONS: - --help, -h show help -``` - -### lotus-provider log list -``` -NAME: - lotus-provider log list - List log systems - -USAGE: - lotus-provider log list [command options] [arguments...] - -OPTIONS: - --help, -h show help -``` - -### lotus-provider log set-level -``` -NAME: - lotus-provider log set-level - Set log level - -USAGE: - lotus-provider log set-level [command options] [level] - -DESCRIPTION: - Set the log level for logging systems: - - The system flag can be specified multiple times. - - eg) log set-level --system chain --system chainxchg debug - - Available Levels: - debug - info - warn - error - - Environment Variables: - GOLOG_LOG_LEVEL - Default log level for all log systems - GOLOG_LOG_FMT - Change output log format (json, nocolor) - GOLOG_FILE - Write logs to file - GOLOG_OUTPUT - Specify whether to output to file, stderr, stdout or a combination, i.e. file+stderr - - -OPTIONS: - --system value [ --system value ] limit to log system - --help, -h show help -``` - -### lotus-provider log alerts -``` -NAME: - lotus-provider log alerts - Get alert states - -USAGE: - lotus-provider log alerts [command options] [arguments...] - -OPTIONS: - --all get all (active and inactive) alerts (default: false) - --help, -h show help -``` - -## lotus-provider wait-api -``` -NAME: - lotus-provider wait-api - Wait for lotus api to come online - -USAGE: - lotus-provider wait-api [command options] [arguments...] - -CATEGORY: - DEVELOPER - -OPTIONS: - --timeout value duration to wait till fail (default: 30s) - --help, -h show help -``` - -## lotus-provider fetch-params -``` -NAME: - lotus-provider fetch-params - Fetch proving parameters - -USAGE: - lotus-provider fetch-params [command options] [sectorSize] - -CATEGORY: - DEVELOPER - -OPTIONS: - --help, -h show help -``` diff --git a/itests/harmonytask_test.go b/itests/harmonytask_test.go index 463f131d8..beef04c8d 100644 --- a/itests/harmonytask_test.go +++ b/itests/harmonytask_test.go @@ -90,7 +90,7 @@ func TestHarmonyTasks(t *testing.T) { e, err := harmonytask.New(cdb, []harmonytask.TaskInterface{t1}, "test:1") require.NoError(t, err) time.Sleep(time.Second) // do the work. FLAKYNESS RISK HERE. - e.GracefullyTerminate(time.Minute) + e.GracefullyTerminate() expected := []string{"taskResult56", "taskResult73"} sort.Strings(t1.WorkCompleted) require.Equal(t, expected, t1.WorkCompleted, "unexpected results") @@ -173,8 +173,8 @@ func TestHarmonyTasksWith2PartiesPolling(t *testing.T) { worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{workerParty}, "test:2") require.NoError(t, err) time.Sleep(time.Second) // do the work. FLAKYNESS RISK HERE. - sender.GracefullyTerminate(time.Second * 5) - worker.GracefullyTerminate(time.Second * 5) + sender.GracefullyTerminate() + worker.GracefullyTerminate() sort.Strings(dest) require.Equal(t, []string{"A", "B"}, dest) }) @@ -204,7 +204,7 @@ func TestWorkStealing(t *testing.T) { worker, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fooLetterSaver(t, cdb, &dest)}, "test:2") require.ErrorIs(t, err, nil) time.Sleep(time.Second) // do the work. FLAKYNESS RISK HERE. - worker.GracefullyTerminate(time.Second * 5) + worker.GracefullyTerminate() require.Equal(t, []string{"M"}, dest) }) } @@ -243,8 +243,8 @@ func TestTaskRetry(t *testing.T) { rcv, err := harmonytask.New(cdb, []harmonytask.TaskInterface{fails2xPerMsg}, "test:2") require.NoError(t, err) time.Sleep(time.Second) - sender.GracefullyTerminate(time.Hour) - rcv.GracefullyTerminate(time.Hour) + sender.GracefullyTerminate() + rcv.GracefullyTerminate() sort.Strings(dest) require.Equal(t, []string{"A", "B"}, dest) type hist struct { diff --git a/itests/kit/ensemble.go b/itests/kit/ensemble.go index 9588d2526..03a36dc45 100644 --- a/itests/kit/ensemble.go +++ b/itests/kit/ensemble.go @@ -913,7 +913,7 @@ func (n *Ensemble) Start() *Ensemble { if err != nil { return nil } - defer taskEngine.GracefullyTerminate(time.Hour) + defer taskEngine.GracefullyTerminate() err = rpc.ListenAndServe(ctx, p.Deps, shutdownChan) // Monitor for shutdown. require.NoError(n.t, err) diff --git a/lib/harmony/harmonytask/harmonytask.go b/lib/harmony/harmonytask/harmonytask.go index 2d8036e2f..dc71b299c 100644 --- a/lib/harmony/harmonytask/harmonytask.go +++ b/lib/harmony/harmonytask/harmonytask.go @@ -193,22 +193,56 @@ func New( // GracefullyTerminate hangs until all present tasks have completed. // Call this to cleanly exit the process. As some processes are long-running, // passing a deadline will ignore those still running (to be picked-up later). -func (e *TaskEngine) GracefullyTerminate(deadline time.Duration) { +func (e *TaskEngine) GracefullyTerminate() { + + // call the cancel func to avoid picking up any new tasks. Running tasks have context.Background() + // Call shutdown to stop posting heartbeat to DB. e.grace() e.reg.Shutdown() - deadlineChan := time.NewTimer(deadline).C -top: - for _, h := range e.handlers { - if h.Count.Load() > 0 { - select { - case <-deadlineChan: - return - default: - time.Sleep(time.Millisecond) - goto top + + // If there are any Post tasks then wait till Timeout and check again + // When no Post tasks are active, break out of loop and call the shutdown function + for { + timeout := time.Millisecond + for _, h := range e.handlers { + if h.TaskTypeDetails.Name == "WinPost" && h.Count.Load() > 0 { + timeout = time.Second + log.Infof("node shutdown deferred for %f seconds", timeout.Seconds()) + continue + } + if h.TaskTypeDetails.Name == "WdPost" && h.Count.Load() > 0 { + timeout = time.Second * 3 + log.Infof("node shutdown deferred for %f seconds due to running WdPost task", timeout.Seconds()) + continue + } + + if h.TaskTypeDetails.Name == "WdPostSubmit" && h.Count.Load() > 0 { + timeout = time.Second + log.Infof("node shutdown deferred for %f seconds due to running WdPostSubmit task", timeout.Seconds()) + continue + } + + if h.TaskTypeDetails.Name == "WdPostRecover" && h.Count.Load() > 0 { + timeout = time.Second + log.Infof("node shutdown deferred for %f seconds due to running WdPostRecover task", timeout.Seconds()) + continue + } + + // Test tasks for itest + if h.TaskTypeDetails.Name == "ThingOne" && h.Count.Load() > 0 { + timeout = time.Second + log.Infof("node shutdown deferred for %f seconds due to running itest task", timeout.Seconds()) + continue } } + if timeout > time.Millisecond { + time.Sleep(timeout) + continue + } + break } + + return } func (e *TaskEngine) poller() {