From d2299dfbf8cf9a7e59de5b84aad80d4854692a88 Mon Sep 17 00:00:00 2001 From: Cory Schwartz Date: Tue, 24 May 2022 10:24:37 -0700 Subject: [PATCH] backoff/reconnect loop --- node/health.go | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/node/health.go b/node/health.go index a61d76164..7291e7bc8 100644 --- a/node/health.go +++ b/node/health.go @@ -39,23 +39,49 @@ func NewLiveHandler(api lapi.FullNode) *HealthHandler { ctx := context.Background() h := HealthHandler{} go func() { - const reset int32 = 5 - var countdown int32 = 0 + const ( + reset int32 = 5 + maxbackoff time.Duration = time.Minute + minbackoff time.Duration = time.Second + ) + var ( + countdown int32 + headCh <-chan []*lapi.HeadChange + backoff time.Duration = minbackoff + err error + ) minutely := time.NewTicker(time.Minute) - headCh, err := api.ChainNotify(ctx) - if err != nil { - healthlog.Warnf("failed to instantiate chain notify channel; liveness cannot be determined. %s", err) - h.SetHealthy(false) - return - } for { + if headCh == nil { + healthlog.Infof("waiting %v before starting ChainNotify channel", backoff) + <-time.After(backoff) + headCh, err = api.ChainNotify(ctx) + if err != nil { + healthlog.Warnf("failed to instantiate ChainNotify channel; cannot determine liveness. %s", err) + h.SetHealthy(false) + nextbackoff := 2 * backoff + if nextbackoff > maxbackoff { + nextbackoff = maxbackoff + } + backoff = nextbackoff + continue + } else { + healthlog.Infof("started ChainNotify channel") + backoff = minbackoff + } + } select { case <-minutely.C: atomic.AddInt32(&countdown, -1) if countdown <= 0 { h.SetHealthy(false) } - case <-headCh: + case _, ok := <-headCh: + if !ok { // channel is closed, enter reconnect loop. + h.SetHealthy(false) + headCh = nil + continue + } atomic.StoreInt32(&countdown, reset) h.SetHealthy(true) }