From 0570306bb6d73d8b025f11ea9062dadcfdfb951d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 10 Dec 2020 14:48:37 +0000 Subject: [PATCH] add metrics for delayed blocks. --- chain/sub/incoming.go | 4 ++++ metrics/metrics.go | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/chain/sub/incoming.go b/chain/sub/incoming.go index f161bca57..eeaa9af72 100644 --- a/chain/sub/incoming.go +++ b/chain/sub/incoming.go @@ -97,6 +97,10 @@ func HandleIncomingBlocks(ctx context.Context, bsub *pubsub.Subscription, s *cha log.Warnw("Slow msg fetch", "cid", blk.Header.Cid(), "source", msg.GetFrom(), "msgfetch", took) } if delay := build.Clock.Now().Unix() - int64(blk.Header.Timestamp); delay > 5 { + _ = stats.RecordWithTags(ctx, + []tag.Mutator{tag.Insert(metrics.MinerID, blk.Header.Miner.String())}, + metrics.BlockDelay.M(delay), + ) log.Warnf("Received block with large delay %d from miner %s", delay, blk.Header.Miner) } diff --git a/metrics/metrics.go b/metrics/metrics.go index 9f0cad27f..996fa95b9 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -19,6 +19,7 @@ var ( Version, _ = tag.NewKey("version") Commit, _ = tag.NewKey("commit") PeerID, _ = tag.NewKey("peer_id") + MinerID, _ = tag.NewKey("miner_id") FailureType, _ = tag.NewKey("failure_type") Local, _ = tag.NewKey("local") MessageFrom, _ = tag.NewKey("message_from") @@ -44,6 +45,7 @@ var ( BlockValidationFailure = stats.Int64("block/failure", "Counter for block validation failures", stats.UnitDimensionless) BlockValidationSuccess = stats.Int64("block/success", "Counter for block validation successes", stats.UnitDimensionless) BlockValidationDurationMilliseconds = stats.Float64("block/validation_ms", "Duration for Block Validation in ms", stats.UnitMilliseconds) + BlockDelay = stats.Int64("block/delay", "Delay of accepted blocks, where delay is >5s", stats.UnitMilliseconds) PeerCount = stats.Int64("peer/count", "Current number of FIL peers", stats.UnitDimensionless) PubsubPublishMessage = stats.Int64("pubsub/published", "Counter for total published messages", stats.UnitDimensionless) PubsubDeliverMessage = stats.Int64("pubsub/delivered", "Counter for total delivered messages", stats.UnitDimensionless) @@ -94,6 +96,24 @@ var ( Measure: BlockValidationDurationMilliseconds, Aggregation: defaultMillisecondsDistribution, } + BlockDelayView = &view.View{ + Measure: BlockDelay, + TagKeys: []tag.Key{MinerID}, + Aggregation: func() *view.Aggregation { + var bounds []float64 + for i := 5; i < 29; i++ { // 5-29s, step 1s + bounds = append(bounds, float64(i*1000)) + } + for i := 30; i < 60; i += 2 { // 30-58s, step 2s + bounds = append(bounds, float64(i*1000)) + } + for i := 60; i <= 300; i += 10 { // 60-300s, step 10s + bounds = append(bounds, float64(i*1000)) + } + bounds = append(bounds, 600*1000) // final cutoff at 10m + return view.Distribution(bounds...) + }(), + } MessagePublishedView = &view.View{ Measure: MessagePublished, Aggregation: view.Count(), @@ -168,6 +188,7 @@ var DefaultViews = append([]*view.View{ BlockValidationFailureView, BlockValidationSuccessView, BlockValidationDurationView, + BlockDelayView, MessagePublishedView, MessageReceivedView, MessageValidationFailureView,