lotus/node/health.go

91 lines
2.0 KiB
Go
Raw Normal View History

2022-05-21 01:38:17 +00:00
package node
import (
"context"
"net/http"
"time"
lapi "github.com/filecoin-project/lotus/api"
2022-05-23 16:29:11 +00:00
logging "github.com/ipfs/go-log/v2"
2022-05-21 01:38:17 +00:00
"github.com/libp2p/go-libp2p-core/network"
)
2022-05-23 16:29:11 +00:00
var healthlog = logging.Logger("healthcheck")
2022-05-21 01:38:17 +00:00
type HealthHandler struct {
healthy int32
2022-05-21 01:38:17 +00:00
}
func (h *HealthHandler) SetHealthy(healthy bool) {
h := int32(0)
if healthy {
h = 1
}
atomic.StoreInt32(&h.healthy, h)
2022-05-21 01:38:17 +00:00
}
func (h *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if atomic.LoadInt32(&h.healthy) != 1 {
2022-05-21 01:38:17 +00:00
w.WriteHeader(http.StatusServiceUnavailable)
return
}
w.WriteHeader(http.StatusOK)
}
// The backend is considered alive so long as there have been recent
// head changes. Being alive doesn't mean we are up to date, just moving.
func NewLiveHandler(api lapi.FullNode) *HealthHandler {
ctx := context.Background()
h := HealthHandler{}
go func() {
const reset = 5
var countdown = 0
minutely := time.NewTicker(time.Minute)
headCh, err := api.ChainNotify(ctx)
if err != nil {
2022-05-23 16:29:11 +00:00
healthlog.Warnf("failed to instantiate chain notify channel; liveliness cannot be determined. %s", err)
h.SetHealthy(false)
return
2022-05-21 01:38:17 +00:00
}
for {
select {
case <-minutely.C:
countdown = countdown - 1
if countdown == 0 {
h.SetHealthy(false)
}
case <-headCh:
countdown = reset
h.SetHealthy(true)
}
}
}()
return &h
}
// Check if we are ready to handle traffic.
// 1. sync workers are caught up.
// 2
func NewReadyHandler(api lapi.FullNode) *HealthHandler {
ctx := context.Background()
h := HealthHandler{}
go func() {
const heightTolerance = uint64(5)
var nethealth, synchealth bool
minutely := time.NewTicker(time.Minute)
for {
select {
case <-minutely.C:
netstat, err := api.NetAutoNatStatus(ctx)
nethealth = err == nil && netstat.Reachability != network.ReachabilityUnknown
nodestat, err := api.NodeStatus(ctx, false)
synchealth = err == nil && nodestat.SyncStatus.Behind < heightTolerance
h.SetHealthy(nethealth && synchealth)
}
}
}()
return &h
}