Merge pull request #986 from filecoin-project/feat/update-stats-to-golog

Use golog in stats tool, and better sync control
This commit is contained in:
Whyrusleeping 2020-01-07 21:38:56 -08:00 committed by GitHub
commit 11bfab67f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 23 deletions

View File

@ -3,11 +3,14 @@ package main
import ( import (
"context" "context"
"flag" "flag"
"log"
"os" "os"
"time" "time"
logging "github.com/ipfs/go-log"
) )
var log = logging.Logger("stats")
const ( const (
INFLUX_ADDR = "INFLUX_ADDR" INFLUX_ADDR = "INFLUX_ADDR"
INFLUX_USER = "INFLUX_USER" INFLUX_USER = "INFLUX_USER"
@ -47,7 +50,7 @@ func main() {
if !reset && height == 0 { if !reset && height == 0 {
h, err := GetLastRecordedHeight(influx, database) h, err := GetLastRecordedHeight(influx, database)
if err != nil { if err != nil {
log.Print(err) log.Info(err)
} }
height = h height = h
@ -76,17 +79,17 @@ func main() {
height := tipset.Height() height := tipset.Height()
if err := RecordTipsetPoints(ctx, api, pl, tipset); err != nil { if err := RecordTipsetPoints(ctx, api, pl, tipset); err != nil {
log.Printf("Failed to record tipset at height %d: %w", height, err) log.Warnw("Failed to record tipset", "height", height, "error", err)
continue continue
} }
if err := RecordTipsetMessagesPoints(ctx, api, pl, tipset); err != nil { if err := RecordTipsetMessagesPoints(ctx, api, pl, tipset); err != nil {
log.Printf("Failed to record messages at height %d: %w", height, err) log.Warnw("Failed to record messages", "height", height, "error", err)
continue continue
} }
if err := RecordTipsetStatePoints(ctx, api, pl, tipset); err != nil { if err := RecordTipsetStatePoints(ctx, api, pl, tipset); err != nil {
log.Printf("Failed to record state at height %d: %w", height, err) log.Warnw("Failed to record state", "height", height, "error", err)
continue continue
} }
@ -108,7 +111,7 @@ func main() {
nb.SetDatabase(database) nb.SetDatabase(database)
log.Printf("Writing %d points for height %d", len(nb.Points()), tipset.Height()) log.Infow("Adding points", "count", len(nb.Points()), "height", tipset.Height())
wq.AddBatch(nb) wq.AddBatch(nb)
} }

View File

@ -4,7 +4,6 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log"
"math/big" "math/big"
"strings" "strings"
"time" "time"
@ -57,7 +56,7 @@ func NewInfluxWriteQueue(ctx context.Context, influx client.Client) *InfluxWrite
case batch := <-ch: case batch := <-ch:
for i := 0; i < maxRetries; i++ { for i := 0; i < maxRetries; i++ {
if err := influx.Write(batch); err != nil { if err := influx.Write(batch); err != nil {
log.Printf("Failed to write batch: %w", err) log.Warnw("Failed to write batch", "error", err)
time.Sleep(time.Second * 15) time.Sleep(time.Second * 15)
continue continue
} }
@ -65,7 +64,7 @@ func NewInfluxWriteQueue(ctx context.Context, influx client.Client) *InfluxWrite
continue main continue main
} }
log.Printf("Dropping batch due to failure to write") log.Error("Dropping batch due to failure to write")
} }
} }
}() }()
@ -259,14 +258,14 @@ func RecordTipsetMessagesPoints(ctx context.Context, api api.FullNode, pl *Point
} }
func ResetDatabase(influx client.Client, database string) error { func ResetDatabase(influx client.Client, database string) error {
log.Print("Resetting database") log.Info("Resetting database")
q := client.NewQuery(fmt.Sprintf(`DROP DATABASE "%s"; CREATE DATABASE "%s";`, database, database), "", "") q := client.NewQuery(fmt.Sprintf(`DROP DATABASE "%s"; CREATE DATABASE "%s";`, database, database), "", "")
_, err := influx.Query(q) _, err := influx.Query(q)
return err return err
} }
func GetLastRecordedHeight(influx client.Client, database string) (int64, error) { func GetLastRecordedHeight(influx client.Client, database string) (int64, error) {
log.Print("Retrieving last record height") log.Info("Retrieving last record height")
q := client.NewQuery(`SELECT "value" FROM "chain.height" ORDER BY time DESC LIMIT 1`, database, "") q := client.NewQuery(`SELECT "value" FROM "chain.height" ORDER BY time DESC LIMIT 1`, database, "")
res, err := influx.Query(q) res, err := influx.Query(q)
if err != nil { if err != nil {
@ -286,7 +285,7 @@ func GetLastRecordedHeight(influx client.Client, database string) (int64, error)
return 0, err return 0, err
} }
log.Printf("Last record height %d", height) log.Infow("Last record height", "height", height)
return height, nil return height, nil
} }

View File

@ -2,7 +2,6 @@ package main
import ( import (
"context" "context"
"log"
"net/http" "net/http"
"time" "time"
@ -13,6 +12,7 @@ import (
"github.com/filecoin-project/lotus/api" "github.com/filecoin-project/lotus/api"
"github.com/filecoin-project/lotus/api/client" "github.com/filecoin-project/lotus/api/client"
"github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/build"
"github.com/filecoin-project/lotus/chain"
"github.com/filecoin-project/lotus/chain/store" "github.com/filecoin-project/lotus/chain/store"
"github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/types"
"github.com/filecoin-project/lotus/lib/jsonrpc" "github.com/filecoin-project/lotus/lib/jsonrpc"
@ -36,7 +36,7 @@ func getAPI(path string) (string, http.Header, error) {
var headers http.Header var headers http.Header
token, err := r.APIToken() token, err := r.APIToken()
if err != nil { if err != nil {
log.Printf("Couldn't load CLI token, capabilities may be limited: %w", err) log.Warnw("Couldn't load CLI token, capabilities may be limited", "error", err)
} else { } else {
headers = http.Header{} headers = http.Header{}
headers.Add("Authorization", "Bearer "+string(token)) headers.Add("Authorization", "Bearer "+string(token))
@ -46,19 +46,74 @@ func getAPI(path string) (string, http.Header, error) {
} }
func WaitForSyncComplete(ctx context.Context, napi api.FullNode) error { func WaitForSyncComplete(ctx context.Context, napi api.FullNode) error {
sync_complete:
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
case <-time.After(3 * time.Second): case <-time.After(5 * time.Second):
state, err := napi.SyncState(ctx)
if err != nil {
return err
}
for i, w := range state.ActiveSyncs {
if w.Target == nil {
continue
}
if w.Stage == api.StageSyncErrored {
log.Errorw(
"Syncing",
"worker", i,
"base", w.Base.Key(),
"target", w.Target.Key(),
"target_height", w.Target.Height(),
"height", w.Height,
"error", w.Message,
"stage", chain.SyncStageString(w.Stage),
)
} else {
log.Infow(
"Syncing",
"worker", i,
"base", w.Base.Key(),
"target", w.Target.Key(),
"target_height", w.Target.Height(),
"height", w.Height,
"stage", chain.SyncStageString(w.Stage),
)
}
if w.Stage == api.StageSyncComplete {
break sync_complete
}
}
}
}
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(5 * time.Second):
head, err := napi.ChainHead(ctx) head, err := napi.ChainHead(ctx)
if err != nil { if err != nil {
return err return err
} }
log.Printf("Height %d", head.Height()) timestampDelta := time.Now().Unix() - int64(head.MinTimestamp())
if time.Now().Unix()-int64(head.MinTimestamp()) < build.BlockDelay { log.Infow(
"Waiting for reasonable head height",
"height", head.Height(),
"timestamp_delta", timestampDelta,
)
// If we get within 20 blocks of the current exected block height we
// consider sync complete. Block propagation is not always great but we still
// want to be recording stats as soon as we can
if timestampDelta < build.BlockDelay*20 {
return nil return nil
} }
} }
@ -82,13 +137,13 @@ func GetTips(ctx context.Context, api api.FullNode, lastHeight uint64) (<-chan *
select { select {
case changes := <-notif: case changes := <-notif:
for _, change := range changes { for _, change := range changes {
log.Printf("Head event { height:%d; type: %s }", change.Val.Height(), change.Type) log.Infow("Head event", "height", change.Val.Height(), "type", change.Type)
switch change.Type { switch change.Type {
case store.HCCurrent: case store.HCCurrent:
tipsets, err := loadTipsets(ctx, api, change.Val, lastHeight) tipsets, err := loadTipsets(ctx, api, change.Val, lastHeight)
if err != nil { if err != nil {
log.Print(err) log.Info(err)
return return
} }
@ -100,17 +155,17 @@ func GetTips(ctx context.Context, api api.FullNode, lastHeight uint64) (<-chan *
} }
} }
case <-ping: case <-ping:
log.Print("Running health check") log.Info("Running health check")
cctx, cancel := context.WithTimeout(ctx, 5*time.Second) cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
if _, err := api.ID(cctx); err != nil { if _, err := api.ID(cctx); err != nil {
log.Print("Health check failed") log.Error("Health check failed")
return return
} }
cancel() cancel()
log.Print("Node online") log.Info("Node online")
case <-ctx.Done(): case <-ctx.Done():
return return
} }
@ -131,7 +186,7 @@ func loadTipsets(ctx context.Context, api api.FullNode, curr *types.TipSet, lowe
break break
} }
log.Printf("Walking back { height:%d }", curr.Height()) log.Infow("Walking back", "height", curr.Height())
tipsets = append(tipsets, curr) tipsets = append(tipsets, curr)
tsk := curr.Parents() tsk := curr.Parents()