Merge pull request #986 from filecoin-project/feat/update-stats-to-golog

Use golog in stats tool, and better sync control
This commit is contained in:
Whyrusleeping 2020-01-07 21:38:56 -08:00 committed by GitHub
commit 11bfab67f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 23 deletions

View File

@ -3,11 +3,14 @@ package main
import (
"context"
"flag"
"log"
"os"
"time"
logging "github.com/ipfs/go-log"
)
var log = logging.Logger("stats")
const (
INFLUX_ADDR = "INFLUX_ADDR"
INFLUX_USER = "INFLUX_USER"
@ -47,7 +50,7 @@ func main() {
if !reset && height == 0 {
h, err := GetLastRecordedHeight(influx, database)
if err != nil {
log.Print(err)
log.Info(err)
}
height = h
@ -76,17 +79,17 @@ func main() {
height := tipset.Height()
if err := RecordTipsetPoints(ctx, api, pl, tipset); err != nil {
log.Printf("Failed to record tipset at height %d: %w", height, err)
log.Warnw("Failed to record tipset", "height", height, "error", err)
continue
}
if err := RecordTipsetMessagesPoints(ctx, api, pl, tipset); err != nil {
log.Printf("Failed to record messages at height %d: %w", height, err)
log.Warnw("Failed to record messages", "height", height, "error", err)
continue
}
if err := RecordTipsetStatePoints(ctx, api, pl, tipset); err != nil {
log.Printf("Failed to record state at height %d: %w", height, err)
log.Warnw("Failed to record state", "height", height, "error", err)
continue
}
@ -108,7 +111,7 @@ func main() {
nb.SetDatabase(database)
log.Printf("Writing %d points for height %d", len(nb.Points()), tipset.Height())
log.Infow("Adding points", "count", len(nb.Points()), "height", tipset.Height())
wq.AddBatch(nb)
}

View File

@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"log"
"math/big"
"strings"
"time"
@ -57,7 +56,7 @@ func NewInfluxWriteQueue(ctx context.Context, influx client.Client) *InfluxWrite
case batch := <-ch:
for i := 0; i < maxRetries; i++ {
if err := influx.Write(batch); err != nil {
log.Printf("Failed to write batch: %w", err)
log.Warnw("Failed to write batch", "error", err)
time.Sleep(time.Second * 15)
continue
}
@ -65,7 +64,7 @@ func NewInfluxWriteQueue(ctx context.Context, influx client.Client) *InfluxWrite
continue main
}
log.Printf("Dropping batch due to failure to write")
log.Error("Dropping batch due to failure to write")
}
}
}()
@ -259,14 +258,14 @@ func RecordTipsetMessagesPoints(ctx context.Context, api api.FullNode, pl *Point
}
func ResetDatabase(influx client.Client, database string) error {
log.Print("Resetting database")
log.Info("Resetting database")
q := client.NewQuery(fmt.Sprintf(`DROP DATABASE "%s"; CREATE DATABASE "%s";`, database, database), "", "")
_, err := influx.Query(q)
return err
}
func GetLastRecordedHeight(influx client.Client, database string) (int64, error) {
log.Print("Retrieving last record height")
log.Info("Retrieving last record height")
q := client.NewQuery(`SELECT "value" FROM "chain.height" ORDER BY time DESC LIMIT 1`, database, "")
res, err := influx.Query(q)
if err != nil {
@ -286,7 +285,7 @@ func GetLastRecordedHeight(influx client.Client, database string) (int64, error)
return 0, err
}
log.Printf("Last record height %d", height)
log.Infow("Last record height", "height", height)
return height, nil
}

View File

@ -2,7 +2,6 @@ package main
import (
"context"
"log"
"net/http"
"time"
@ -13,6 +12,7 @@ import (
"github.com/filecoin-project/lotus/api"
"github.com/filecoin-project/lotus/api/client"
"github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain"
"github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/lib/jsonrpc"
@ -36,7 +36,7 @@ func getAPI(path string) (string, http.Header, error) {
var headers http.Header
token, err := r.APIToken()
if err != nil {
log.Printf("Couldn't load CLI token, capabilities may be limited: %w", err)
log.Warnw("Couldn't load CLI token, capabilities may be limited", "error", err)
} else {
headers = http.Header{}
headers.Add("Authorization", "Bearer "+string(token))
@ -46,19 +46,74 @@ func getAPI(path string) (string, http.Header, error) {
}
func WaitForSyncComplete(ctx context.Context, napi api.FullNode) error {
sync_complete:
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(3 * time.Second):
case <-time.After(5 * time.Second):
state, err := napi.SyncState(ctx)
if err != nil {
return err
}
for i, w := range state.ActiveSyncs {
if w.Target == nil {
continue
}
if w.Stage == api.StageSyncErrored {
log.Errorw(
"Syncing",
"worker", i,
"base", w.Base.Key(),
"target", w.Target.Key(),
"target_height", w.Target.Height(),
"height", w.Height,
"error", w.Message,
"stage", chain.SyncStageString(w.Stage),
)
} else {
log.Infow(
"Syncing",
"worker", i,
"base", w.Base.Key(),
"target", w.Target.Key(),
"target_height", w.Target.Height(),
"height", w.Height,
"stage", chain.SyncStageString(w.Stage),
)
}
if w.Stage == api.StageSyncComplete {
break sync_complete
}
}
}
}
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(5 * time.Second):
head, err := napi.ChainHead(ctx)
if err != nil {
return err
}
log.Printf("Height %d", head.Height())
timestampDelta := time.Now().Unix() - int64(head.MinTimestamp())
if time.Now().Unix()-int64(head.MinTimestamp()) < build.BlockDelay {
log.Infow(
"Waiting for reasonable head height",
"height", head.Height(),
"timestamp_delta", timestampDelta,
)
// If we get within 20 blocks of the current exected block height we
// consider sync complete. Block propagation is not always great but we still
// want to be recording stats as soon as we can
if timestampDelta < build.BlockDelay*20 {
return nil
}
}
@ -82,13 +137,13 @@ func GetTips(ctx context.Context, api api.FullNode, lastHeight uint64) (<-chan *
select {
case changes := <-notif:
for _, change := range changes {
log.Printf("Head event { height:%d; type: %s }", change.Val.Height(), change.Type)
log.Infow("Head event", "height", change.Val.Height(), "type", change.Type)
switch change.Type {
case store.HCCurrent:
tipsets, err := loadTipsets(ctx, api, change.Val, lastHeight)
if err != nil {
log.Print(err)
log.Info(err)
return
}
@ -100,17 +155,17 @@ func GetTips(ctx context.Context, api api.FullNode, lastHeight uint64) (<-chan *
}
}
case <-ping:
log.Print("Running health check")
log.Info("Running health check")
cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
if _, err := api.ID(cctx); err != nil {
log.Print("Health check failed")
log.Error("Health check failed")
return
}
cancel()
log.Print("Node online")
log.Info("Node online")
case <-ctx.Done():
return
}
@ -131,7 +186,7 @@ func loadTipsets(ctx context.Context, api api.FullNode, curr *types.TipSet, lowe
break
}
log.Printf("Walking back { height:%d }", curr.Height())
log.Infow("Walking back", "height", curr.Height())
tipsets = append(tipsets, curr)
tsk := curr.Parents()