From 53537a0af28d451e841174797d1786eb53f3f2db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Thu, 15 Apr 2021 19:43:16 +0200 Subject: [PATCH 1/3] stmgr: Improve ApplyBlocks metrics --- chain/stmgr/stmgr.go | 27 ++++++++++++++++++++++++--- metrics/metrics.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/chain/stmgr/stmgr.go b/chain/stmgr/stmgr.go index ffbe08474..9dba6a781 100644 --- a/chain/stmgr/stmgr.go +++ b/chain/stmgr/stmgr.go @@ -4,14 +4,15 @@ import ( "context" "errors" "fmt" - "sync" - "github.com/ipfs/go-cid" cbor "github.com/ipfs/go-ipld-cbor" logging "github.com/ipfs/go-log/v2" cbg "github.com/whyrusleeping/cbor-gen" + "go.opencensus.io/stats" "go.opencensus.io/trace" "golang.org/x/xerrors" + "sync" + "sync/atomic" "github.com/filecoin-project/go-address" "github.com/filecoin-project/go-state-types/abi" @@ -43,6 +44,7 @@ import ( "github.com/filecoin-project/lotus/chain/store" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/chain/vm" + "github.com/filecoin-project/lotus/metrics" ) const LookbackNoLimit = abi.ChainEpoch(-1) @@ -280,6 +282,11 @@ func (sm *StateManager) ExecutionTrace(ctx context.Context, ts *types.TipSet) (c type ExecCallback func(cid.Cid, *types.Message, *vm.ApplyRet) error func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEpoch, pstate cid.Cid, bms []store.BlockMessages, epoch abi.ChainEpoch, r vm.Rand, cb ExecCallback, baseFee abi.TokenAmount, ts *types.TipSet) (cid.Cid, cid.Cid, error) { + done := metrics.Timer(ctx, metrics.VMApplyBlocksTotal) + defer done() + + partDone := metrics.Timer(ctx, metrics.VMApplyEarly) + defer partDone() makeVmWithBaseState := func(base cid.Cid) (*vm.VM, error) { vmopt := &vm.VMOpts{ @@ -303,7 +310,6 @@ func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEp } runCron := func(epoch abi.ChainEpoch) error { - cronMsg := &types.Message{ To: cron.Address, From: builtin.SystemActorAddr, @@ -362,6 +368,12 @@ func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEp pstate = newState } + partDone() + partDone = metrics.Timer(ctx, metrics.VMApplyMessages) + + earlyDone := metrics.Timer(ctx, metrics.VMApplyEarly) + defer earlyDone() + var receipts []cbg.CBORMarshaler processedMsgs := make(map[cid.Cid]struct{}) for _, b := range bms { @@ -426,10 +438,16 @@ func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEp } } + partDone() + partDone = metrics.Timer(ctx, metrics.VMApplyCron) + if err := runCron(epoch); err != nil { return cid.Cid{}, cid.Cid{}, err } + partDone() + partDone = metrics.Timer(ctx, metrics.VMApplyFlush) + rectarr := blockadt.MakeEmptyArray(sm.cs.ActorStore(ctx)) for i, receipt := range receipts { if err := rectarr.Set(uint64(i), receipt); err != nil { @@ -446,6 +464,9 @@ func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEp return cid.Undef, cid.Undef, xerrors.Errorf("vm flush failed: %w", err) } + stats.Record(ctx, metrics.VMSends.M(int64(atomic.LoadUint64(&vm.StatSends))), + metrics.VMApplied.M(int64(atomic.LoadUint64(&vm.StatApplied)))) + return st, rectroot, nil } diff --git a/metrics/metrics.go b/metrics/metrics.go index 5428a81bc..9f6eb4b42 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -76,6 +76,13 @@ var ( PubsubDropRPC = stats.Int64("pubsub/drop_rpc", "Counter for total dropped RPCs", stats.UnitDimensionless) VMFlushCopyDuration = stats.Float64("vm/flush_copy_ms", "Time spent in VM Flush Copy", stats.UnitMilliseconds) VMFlushCopyCount = stats.Int64("vm/flush_copy_count", "Number of copied objects", stats.UnitDimensionless) + VMApplyBlocksTotal = stats.Float64("vm/applyblocks_total_ms", "Time spent applying block state", stats.UnitMilliseconds) + VMApplyMessages = stats.Float64("vm/applyblocks_messages", "Time spent applying block messages", stats.UnitMilliseconds) + VMApplyEarly = stats.Float64("vm/applyblocks_early", "Time spent in early apply-blocks (null cron, upgrades)", stats.UnitMilliseconds) + VMApplyCron = stats.Float64("vm/applyblocks_cron", "Time spent in cron", stats.UnitMilliseconds) + VMApplyFlush = stats.Float64("vm/applyblocks_flush", "Time spent flushing vm state", stats.UnitMilliseconds) + VMSends = stats.Int64("vm/sends", "Counter for sends processed by the VM", stats.UnitDimensionless) + VMApplied = stats.Int64("vm/applied", "Counter for messages (including internal messages) processed by the VM", stats.UnitDimensionless) // miner WorkerCallsStarted = stats.Int64("sealing/worker_calls_started", "Counter of started worker tasks", stats.UnitDimensionless) @@ -208,6 +215,34 @@ var ( Measure: VMFlushCopyCount, Aggregation: view.Sum(), } + VMApplyBlocksTotalView = &view.View{ + Measure: VMApplyBlocksTotal, + Aggregation: defaultMillisecondsDistribution, + } + VMApplyMessagesView = &view.View{ + Measure: VMApplyMessages, + Aggregation: defaultMillisecondsDistribution, + } + VMApplyEarlyView = &view.View{ + Measure: VMApplyEarly, + Aggregation: defaultMillisecondsDistribution, + } + VMApplyCronView = &view.View{ + Measure: VMApplyCron, + Aggregation: defaultMillisecondsDistribution, + } + VMApplyFlushView = &view.View{ + Measure: VMApplyFlush, + Aggregation: defaultMillisecondsDistribution, + } + VMSendsView = &view.View{ + Measure: VMSends, + Aggregation: view.LastValue(), + } + VMAppliedView = &view.View{ + Measure: VMApplied, + Aggregation: view.LastValue(), + } // miner WorkerCallsStartedView = &view.View{ @@ -292,6 +327,13 @@ var ChainNodeViews = append([]*view.View{ SplitstoreCompactionHotView, SplitstoreCompactionColdView, SplitstoreCompactionDeadView, + VMApplyBlocksTotalView, + VMApplyMessagesView, + VMApplyEarlyView, + VMApplyCronView, + VMApplyFlushView, + VMSendsView, + VMAppliedView, }, DefaultViews...) var MinerNodeViews = append([]*view.View{ From 1b696aae5f3e125e10132be1dcf493ef546f17c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Fri, 16 Apr 2021 14:57:59 +0200 Subject: [PATCH 2/3] stmgr: Fix VMApplyEarly metric --- chain/stmgr/stmgr.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/chain/stmgr/stmgr.go b/chain/stmgr/stmgr.go index 9dba6a781..ed8c18242 100644 --- a/chain/stmgr/stmgr.go +++ b/chain/stmgr/stmgr.go @@ -371,9 +371,6 @@ func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEp partDone() partDone = metrics.Timer(ctx, metrics.VMApplyMessages) - earlyDone := metrics.Timer(ctx, metrics.VMApplyEarly) - defer earlyDone() - var receipts []cbg.CBORMarshaler processedMsgs := make(map[cid.Cid]struct{}) for _, b := range bms { From bb7801e6b71b69034ceb247b506799c8aab4d048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Apr 2021 16:17:31 +0200 Subject: [PATCH 3/3] fix lint --- chain/stmgr/stmgr.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/chain/stmgr/stmgr.go b/chain/stmgr/stmgr.go index ed8c18242..7f8500399 100644 --- a/chain/stmgr/stmgr.go +++ b/chain/stmgr/stmgr.go @@ -4,6 +4,9 @@ import ( "context" "errors" "fmt" + "sync" + "sync/atomic" + "github.com/ipfs/go-cid" cbor "github.com/ipfs/go-ipld-cbor" logging "github.com/ipfs/go-log/v2" @@ -11,8 +14,6 @@ import ( "go.opencensus.io/stats" "go.opencensus.io/trace" "golang.org/x/xerrors" - "sync" - "sync/atomic" "github.com/filecoin-project/go-address" "github.com/filecoin-project/go-state-types/abi" @@ -286,7 +287,9 @@ func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEp defer done() partDone := metrics.Timer(ctx, metrics.VMApplyEarly) - defer partDone() + defer func() { + partDone() + }() makeVmWithBaseState := func(base cid.Cid) (*vm.VM, error) { vmopt := &vm.VMOpts{