2022-05-21 01:38:17 +00:00
|
|
|
package node
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"net/http"
|
2022-05-23 19:04:13 +00:00
|
|
|
"sync/atomic"
|
2022-05-21 01:38:17 +00:00
|
|
|
"time"
|
|
|
|
|
2022-05-23 16:29:11 +00:00
|
|
|
logging "github.com/ipfs/go-log/v2"
|
2022-08-25 18:20:41 +00:00
|
|
|
"github.com/libp2p/go-libp2p/core/network"
|
2022-06-14 15:00:51 +00:00
|
|
|
|
|
|
|
lapi "github.com/filecoin-project/lotus/api"
|
2022-05-21 01:38:17 +00:00
|
|
|
)
|
|
|
|
|
2022-05-23 16:29:11 +00:00
|
|
|
var healthlog = logging.Logger("healthcheck")
|
|
|
|
|
2022-05-21 01:38:17 +00:00
|
|
|
type HealthHandler struct {
|
2022-05-23 18:11:45 +00:00
|
|
|
healthy int32
|
2022-05-21 01:38:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *HealthHandler) SetHealthy(healthy bool) {
|
2022-05-23 19:04:13 +00:00
|
|
|
var hi32 int32
|
2022-05-23 18:11:45 +00:00
|
|
|
if healthy {
|
2022-05-23 19:04:13 +00:00
|
|
|
hi32 = 1
|
2022-05-23 18:11:45 +00:00
|
|
|
}
|
2022-05-23 19:04:13 +00:00
|
|
|
atomic.StoreInt32(&h.healthy, hi32)
|
2022-05-21 01:38:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (h *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
2022-05-23 18:11:45 +00:00
|
|
|
if atomic.LoadInt32(&h.healthy) != 1 {
|
2022-05-21 01:38:17 +00:00
|
|
|
w.WriteHeader(http.StatusServiceUnavailable)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
w.WriteHeader(http.StatusOK)
|
|
|
|
}
|
|
|
|
|
2022-05-23 19:04:13 +00:00
|
|
|
// Check that the node is still working. That is, that it's still processing the chain.
|
|
|
|
// If there have been no recent changes, consider the node to be dead.
|
2022-05-21 01:38:17 +00:00
|
|
|
func NewLiveHandler(api lapi.FullNode) *HealthHandler {
|
|
|
|
ctx := context.Background()
|
|
|
|
h := HealthHandler{}
|
|
|
|
go func() {
|
2022-05-24 17:24:37 +00:00
|
|
|
const (
|
|
|
|
reset int32 = 5
|
|
|
|
maxbackoff time.Duration = time.Minute
|
|
|
|
minbackoff time.Duration = time.Second
|
|
|
|
)
|
|
|
|
var (
|
|
|
|
countdown int32
|
|
|
|
headCh <-chan []*lapi.HeadChange
|
2024-05-09 02:15:35 +00:00
|
|
|
backoff = minbackoff
|
2022-05-24 17:24:37 +00:00
|
|
|
err error
|
|
|
|
)
|
2022-05-21 01:38:17 +00:00
|
|
|
minutely := time.NewTicker(time.Minute)
|
|
|
|
for {
|
2022-05-24 17:24:37 +00:00
|
|
|
if headCh == nil {
|
|
|
|
healthlog.Infof("waiting %v before starting ChainNotify channel", backoff)
|
|
|
|
<-time.After(backoff)
|
|
|
|
headCh, err = api.ChainNotify(ctx)
|
|
|
|
if err != nil {
|
|
|
|
healthlog.Warnf("failed to instantiate ChainNotify channel; cannot determine liveness. %s", err)
|
|
|
|
h.SetHealthy(false)
|
|
|
|
nextbackoff := 2 * backoff
|
|
|
|
if nextbackoff > maxbackoff {
|
|
|
|
nextbackoff = maxbackoff
|
|
|
|
}
|
|
|
|
backoff = nextbackoff
|
|
|
|
continue
|
|
|
|
}
|
2024-05-09 02:15:35 +00:00
|
|
|
healthlog.Infof("started ChainNotify channel")
|
|
|
|
backoff = minbackoff
|
2022-05-24 17:24:37 +00:00
|
|
|
}
|
2022-05-21 01:38:17 +00:00
|
|
|
select {
|
|
|
|
case <-minutely.C:
|
2022-05-23 19:04:13 +00:00
|
|
|
atomic.AddInt32(&countdown, -1)
|
|
|
|
if countdown <= 0 {
|
2022-05-21 01:38:17 +00:00
|
|
|
h.SetHealthy(false)
|
|
|
|
}
|
2022-05-24 17:24:37 +00:00
|
|
|
case _, ok := <-headCh:
|
|
|
|
if !ok { // channel is closed, enter reconnect loop.
|
|
|
|
h.SetHealthy(false)
|
|
|
|
headCh = nil
|
|
|
|
continue
|
|
|
|
}
|
2022-05-23 19:04:13 +00:00
|
|
|
atomic.StoreInt32(&countdown, reset)
|
2022-05-21 01:38:17 +00:00
|
|
|
h.SetHealthy(true)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
return &h
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we are ready to handle traffic.
|
2022-05-23 19:04:13 +00:00
|
|
|
// 1. sync workers are reasonably up to date.
|
|
|
|
// 2. libp2p is servicable
|
2022-05-21 01:38:17 +00:00
|
|
|
func NewReadyHandler(api lapi.FullNode) *HealthHandler {
|
|
|
|
ctx := context.Background()
|
|
|
|
h := HealthHandler{}
|
|
|
|
go func() {
|
|
|
|
const heightTolerance = uint64(5)
|
|
|
|
var nethealth, synchealth bool
|
|
|
|
minutely := time.NewTicker(time.Minute)
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-minutely.C:
|
|
|
|
netstat, err := api.NetAutoNatStatus(ctx)
|
|
|
|
nethealth = err == nil && netstat.Reachability != network.ReachabilityUnknown
|
|
|
|
|
|
|
|
nodestat, err := api.NodeStatus(ctx, false)
|
|
|
|
synchealth = err == nil && nodestat.SyncStatus.Behind < heightTolerance
|
|
|
|
|
|
|
|
h.SetHealthy(nethealth && synchealth)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
return &h
|
|
|
|
}
|