Merge pull request #986 from filecoin-project/feat/update-stats-to-golog
Use golog in stats tool, and better sync control
This commit is contained in:
commit
11bfab67f9
@ -3,11 +3,14 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"log"
|
|
||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
logging "github.com/ipfs/go-log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var log = logging.Logger("stats")
|
||||||
|
|
||||||
const (
|
const (
|
||||||
INFLUX_ADDR = "INFLUX_ADDR"
|
INFLUX_ADDR = "INFLUX_ADDR"
|
||||||
INFLUX_USER = "INFLUX_USER"
|
INFLUX_USER = "INFLUX_USER"
|
||||||
@ -47,7 +50,7 @@ func main() {
|
|||||||
if !reset && height == 0 {
|
if !reset && height == 0 {
|
||||||
h, err := GetLastRecordedHeight(influx, database)
|
h, err := GetLastRecordedHeight(influx, database)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Print(err)
|
log.Info(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
height = h
|
height = h
|
||||||
@ -76,17 +79,17 @@ func main() {
|
|||||||
height := tipset.Height()
|
height := tipset.Height()
|
||||||
|
|
||||||
if err := RecordTipsetPoints(ctx, api, pl, tipset); err != nil {
|
if err := RecordTipsetPoints(ctx, api, pl, tipset); err != nil {
|
||||||
log.Printf("Failed to record tipset at height %d: %w", height, err)
|
log.Warnw("Failed to record tipset", "height", height, "error", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := RecordTipsetMessagesPoints(ctx, api, pl, tipset); err != nil {
|
if err := RecordTipsetMessagesPoints(ctx, api, pl, tipset); err != nil {
|
||||||
log.Printf("Failed to record messages at height %d: %w", height, err)
|
log.Warnw("Failed to record messages", "height", height, "error", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := RecordTipsetStatePoints(ctx, api, pl, tipset); err != nil {
|
if err := RecordTipsetStatePoints(ctx, api, pl, tipset); err != nil {
|
||||||
log.Printf("Failed to record state at height %d: %w", height, err)
|
log.Warnw("Failed to record state", "height", height, "error", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,7 +111,7 @@ func main() {
|
|||||||
|
|
||||||
nb.SetDatabase(database)
|
nb.SetDatabase(database)
|
||||||
|
|
||||||
log.Printf("Writing %d points for height %d", len(nb.Points()), tipset.Height())
|
log.Infow("Adding points", "count", len(nb.Points()), "height", tipset.Height())
|
||||||
|
|
||||||
wq.AddBatch(nb)
|
wq.AddBatch(nb)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"math/big"
|
"math/big"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -57,7 +56,7 @@ func NewInfluxWriteQueue(ctx context.Context, influx client.Client) *InfluxWrite
|
|||||||
case batch := <-ch:
|
case batch := <-ch:
|
||||||
for i := 0; i < maxRetries; i++ {
|
for i := 0; i < maxRetries; i++ {
|
||||||
if err := influx.Write(batch); err != nil {
|
if err := influx.Write(batch); err != nil {
|
||||||
log.Printf("Failed to write batch: %w", err)
|
log.Warnw("Failed to write batch", "error", err)
|
||||||
time.Sleep(time.Second * 15)
|
time.Sleep(time.Second * 15)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -65,7 +64,7 @@ func NewInfluxWriteQueue(ctx context.Context, influx client.Client) *InfluxWrite
|
|||||||
continue main
|
continue main
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Dropping batch due to failure to write")
|
log.Error("Dropping batch due to failure to write")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@ -259,14 +258,14 @@ func RecordTipsetMessagesPoints(ctx context.Context, api api.FullNode, pl *Point
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ResetDatabase(influx client.Client, database string) error {
|
func ResetDatabase(influx client.Client, database string) error {
|
||||||
log.Print("Resetting database")
|
log.Info("Resetting database")
|
||||||
q := client.NewQuery(fmt.Sprintf(`DROP DATABASE "%s"; CREATE DATABASE "%s";`, database, database), "", "")
|
q := client.NewQuery(fmt.Sprintf(`DROP DATABASE "%s"; CREATE DATABASE "%s";`, database, database), "", "")
|
||||||
_, err := influx.Query(q)
|
_, err := influx.Query(q)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetLastRecordedHeight(influx client.Client, database string) (int64, error) {
|
func GetLastRecordedHeight(influx client.Client, database string) (int64, error) {
|
||||||
log.Print("Retrieving last record height")
|
log.Info("Retrieving last record height")
|
||||||
q := client.NewQuery(`SELECT "value" FROM "chain.height" ORDER BY time DESC LIMIT 1`, database, "")
|
q := client.NewQuery(`SELECT "value" FROM "chain.height" ORDER BY time DESC LIMIT 1`, database, "")
|
||||||
res, err := influx.Query(q)
|
res, err := influx.Query(q)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -286,7 +285,7 @@ func GetLastRecordedHeight(influx client.Client, database string) (int64, error)
|
|||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Last record height %d", height)
|
log.Infow("Last record height", "height", height)
|
||||||
|
|
||||||
return height, nil
|
return height, nil
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"log"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -13,6 +12,7 @@ import (
|
|||||||
"github.com/filecoin-project/lotus/api"
|
"github.com/filecoin-project/lotus/api"
|
||||||
"github.com/filecoin-project/lotus/api/client"
|
"github.com/filecoin-project/lotus/api/client"
|
||||||
"github.com/filecoin-project/lotus/build"
|
"github.com/filecoin-project/lotus/build"
|
||||||
|
"github.com/filecoin-project/lotus/chain"
|
||||||
"github.com/filecoin-project/lotus/chain/store"
|
"github.com/filecoin-project/lotus/chain/store"
|
||||||
"github.com/filecoin-project/lotus/chain/types"
|
"github.com/filecoin-project/lotus/chain/types"
|
||||||
"github.com/filecoin-project/lotus/lib/jsonrpc"
|
"github.com/filecoin-project/lotus/lib/jsonrpc"
|
||||||
@ -36,7 +36,7 @@ func getAPI(path string) (string, http.Header, error) {
|
|||||||
var headers http.Header
|
var headers http.Header
|
||||||
token, err := r.APIToken()
|
token, err := r.APIToken()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Couldn't load CLI token, capabilities may be limited: %w", err)
|
log.Warnw("Couldn't load CLI token, capabilities may be limited", "error", err)
|
||||||
} else {
|
} else {
|
||||||
headers = http.Header{}
|
headers = http.Header{}
|
||||||
headers.Add("Authorization", "Bearer "+string(token))
|
headers.Add("Authorization", "Bearer "+string(token))
|
||||||
@ -46,19 +46,74 @@ func getAPI(path string) (string, http.Header, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func WaitForSyncComplete(ctx context.Context, napi api.FullNode) error {
|
func WaitForSyncComplete(ctx context.Context, napi api.FullNode) error {
|
||||||
|
sync_complete:
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
case <-time.After(3 * time.Second):
|
case <-time.After(5 * time.Second):
|
||||||
|
state, err := napi.SyncState(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, w := range state.ActiveSyncs {
|
||||||
|
if w.Target == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.Stage == api.StageSyncErrored {
|
||||||
|
log.Errorw(
|
||||||
|
"Syncing",
|
||||||
|
"worker", i,
|
||||||
|
"base", w.Base.Key(),
|
||||||
|
"target", w.Target.Key(),
|
||||||
|
"target_height", w.Target.Height(),
|
||||||
|
"height", w.Height,
|
||||||
|
"error", w.Message,
|
||||||
|
"stage", chain.SyncStageString(w.Stage),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
log.Infow(
|
||||||
|
"Syncing",
|
||||||
|
"worker", i,
|
||||||
|
"base", w.Base.Key(),
|
||||||
|
"target", w.Target.Key(),
|
||||||
|
"target_height", w.Target.Height(),
|
||||||
|
"height", w.Height,
|
||||||
|
"stage", chain.SyncStageString(w.Stage),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.Stage == api.StageSyncComplete {
|
||||||
|
break sync_complete
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
head, err := napi.ChainHead(ctx)
|
head, err := napi.ChainHead(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Height %d", head.Height())
|
timestampDelta := time.Now().Unix() - int64(head.MinTimestamp())
|
||||||
|
|
||||||
if time.Now().Unix()-int64(head.MinTimestamp()) < build.BlockDelay {
|
log.Infow(
|
||||||
|
"Waiting for reasonable head height",
|
||||||
|
"height", head.Height(),
|
||||||
|
"timestamp_delta", timestampDelta,
|
||||||
|
)
|
||||||
|
|
||||||
|
// If we get within 20 blocks of the current exected block height we
|
||||||
|
// consider sync complete. Block propagation is not always great but we still
|
||||||
|
// want to be recording stats as soon as we can
|
||||||
|
if timestampDelta < build.BlockDelay*20 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -82,13 +137,13 @@ func GetTips(ctx context.Context, api api.FullNode, lastHeight uint64) (<-chan *
|
|||||||
select {
|
select {
|
||||||
case changes := <-notif:
|
case changes := <-notif:
|
||||||
for _, change := range changes {
|
for _, change := range changes {
|
||||||
log.Printf("Head event { height:%d; type: %s }", change.Val.Height(), change.Type)
|
log.Infow("Head event", "height", change.Val.Height(), "type", change.Type)
|
||||||
|
|
||||||
switch change.Type {
|
switch change.Type {
|
||||||
case store.HCCurrent:
|
case store.HCCurrent:
|
||||||
tipsets, err := loadTipsets(ctx, api, change.Val, lastHeight)
|
tipsets, err := loadTipsets(ctx, api, change.Val, lastHeight)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Print(err)
|
log.Info(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,17 +155,17 @@ func GetTips(ctx context.Context, api api.FullNode, lastHeight uint64) (<-chan *
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
case <-ping:
|
case <-ping:
|
||||||
log.Print("Running health check")
|
log.Info("Running health check")
|
||||||
|
|
||||||
cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
if _, err := api.ID(cctx); err != nil {
|
if _, err := api.ID(cctx); err != nil {
|
||||||
log.Print("Health check failed")
|
log.Error("Health check failed")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
cancel()
|
cancel()
|
||||||
|
|
||||||
log.Print("Node online")
|
log.Info("Node online")
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -131,7 +186,7 @@ func loadTipsets(ctx context.Context, api api.FullNode, curr *types.TipSet, lowe
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Walking back { height:%d }", curr.Height())
|
log.Infow("Walking back", "height", curr.Height())
|
||||||
tipsets = append(tipsets, curr)
|
tipsets = append(tipsets, curr)
|
||||||
|
|
||||||
tsk := curr.Parents()
|
tsk := curr.Parents()
|
||||||
|
Loading…
Reference in New Issue
Block a user