worker: Handle multiple manager restarts while waiting for tasks
This commit is contained in:
parent
c3d00b0ac6
commit
af1d45d969
@ -451,14 +451,24 @@ var runCmd = &cli.Command{
|
|||||||
return xerrors.Errorf("getting miner session: %w", err)
|
return xerrors.Errorf("getting miner session: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
waitQuietCh := func() chan struct{} {
|
||||||
|
out := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
workerApi.LocalWorker.WaitQuiet()
|
||||||
|
close(out)
|
||||||
|
}()
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
heartbeats := time.NewTicker(stores.HeartbeatInterval)
|
heartbeats := time.NewTicker(stores.HeartbeatInterval)
|
||||||
defer heartbeats.Stop()
|
defer heartbeats.Stop()
|
||||||
|
|
||||||
var connected, reconnect bool
|
var redeclareStorage bool
|
||||||
|
var readyCh chan struct{}
|
||||||
for {
|
for {
|
||||||
// If we're reconnecting, redeclare storage first
|
// If we're reconnecting, redeclare storage first
|
||||||
if reconnect {
|
if redeclareStorage {
|
||||||
log.Info("Redeclaring local storage")
|
log.Info("Redeclaring local storage")
|
||||||
|
|
||||||
if err := localStore.Redeclare(ctx); err != nil {
|
if err := localStore.Redeclare(ctx); err != nil {
|
||||||
@ -471,14 +481,13 @@ var runCmd = &cli.Command{
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
connected = false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("Making sure no local tasks are running")
|
|
||||||
|
|
||||||
// TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly
|
// TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly
|
||||||
workerApi.LocalWorker.WaitQuiet()
|
if readyCh == nil {
|
||||||
|
log.Info("Making sure no local tasks are running")
|
||||||
|
readyCh = waitQuietCh()
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
curSession, err := nodeApi.Session(ctx)
|
curSession, err := nodeApi.Session(ctx)
|
||||||
@ -489,8 +498,10 @@ var runCmd = &cli.Command{
|
|||||||
minerSession = curSession
|
minerSession = curSession
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if !connected {
|
select {
|
||||||
|
case <-readyCh:
|
||||||
if err := nodeApi.WorkerConnect(ctx, "http://"+address+"/rpc/v0"); err != nil {
|
if err := nodeApi.WorkerConnect(ctx, "http://"+address+"/rpc/v0"); err != nil {
|
||||||
log.Errorf("Registering worker failed: %+v", err)
|
log.Errorf("Registering worker failed: %+v", err)
|
||||||
cancel()
|
cancel()
|
||||||
@ -498,20 +509,17 @@ var runCmd = &cli.Command{
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Info("Worker registered successfully, waiting for tasks")
|
log.Info("Worker registered successfully, waiting for tasks")
|
||||||
connected = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
select {
|
readyCh = nil
|
||||||
|
case <-heartbeats.C:
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return // graceful shutdown
|
return // graceful shutdown
|
||||||
case <-heartbeats.C:
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Errorf("LOTUS-MINER CONNECTION LOST")
|
log.Errorf("LOTUS-MINER CONNECTION LOST")
|
||||||
|
|
||||||
reconnect = true
|
redeclareStorage = true
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user