Merge pull request #1294 from natewalck/more-opencensus-metrics
More opencensus metrics
This commit is contained in:
commit
41bf668189
@ -13,6 +13,8 @@ import (
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/chain/state"
|
||||
"github.com/filecoin-project/lotus/chain/vm"
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
"go.opencensus.io/stats"
|
||||
"go.opencensus.io/trace"
|
||||
"go.uber.org/multierr"
|
||||
|
||||
@ -100,7 +102,15 @@ func NewChainStore(bs bstore.Blockstore, ds dstore.Batching, vmcalls *types.VMSy
|
||||
return nil
|
||||
}
|
||||
|
||||
cs.headChangeNotifs = append(cs.headChangeNotifs, hcnf)
|
||||
hcmetric := func(rev, app []*types.TipSet) error {
|
||||
ctx := context.Background()
|
||||
for _, r := range app {
|
||||
stats.Record(ctx, metrics.ChainNodeHeight.M(int64(r.Height())))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
cs.headChangeNotifs = append(cs.headChangeNotifs, hcnf, hcmetric)
|
||||
|
||||
return cs
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package sub
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
lru "github.com/hashicorp/golang-lru"
|
||||
@ -10,11 +11,14 @@ import (
|
||||
connmgr "github.com/libp2p/go-libp2p-core/connmgr"
|
||||
peer "github.com/libp2p/go-libp2p-peer"
|
||||
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
||||
"go.opencensus.io/stats"
|
||||
"go.opencensus.io/tag"
|
||||
|
||||
"github.com/filecoin-project/lotus/build"
|
||||
"github.com/filecoin-project/lotus/chain"
|
||||
"github.com/filecoin-project/lotus/chain/messagepool"
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
)
|
||||
|
||||
var log = logging.Logger("sub")
|
||||
@ -107,15 +111,25 @@ func (bv *BlockValidator) flagPeer(p peer.ID) {
|
||||
}
|
||||
|
||||
func (bv *BlockValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) bool {
|
||||
stats.Record(ctx, metrics.BlockReceived.M(1))
|
||||
ctx, _ = tag.New(
|
||||
ctx,
|
||||
tag.Insert(metrics.PeerID, pid.String()),
|
||||
tag.Insert(metrics.ReceivedFrom, msg.ReceivedFrom.String()),
|
||||
)
|
||||
blk, err := types.DecodeBlockMsg(msg.GetData())
|
||||
if err != nil {
|
||||
log.Error("got invalid block over pubsub: ", err)
|
||||
ctx, _ = tag.New(ctx, tag.Insert(metrics.FailureType, "invalid"))
|
||||
stats.Record(ctx, metrics.BlockValidationFailure.M(1))
|
||||
bv.flagPeer(pid)
|
||||
return false
|
||||
}
|
||||
|
||||
if len(blk.BlsMessages)+len(blk.SecpkMessages) > build.BlockMessageLimit {
|
||||
log.Warnf("received block with too many messages over pubsub")
|
||||
ctx, _ = tag.New(ctx, tag.Insert(metrics.FailureType, "too_many_messages"))
|
||||
stats.Record(ctx, metrics.BlockValidationFailure.M(1))
|
||||
bv.flagPeer(pid)
|
||||
return false
|
||||
}
|
||||
@ -127,6 +141,7 @@ func (bv *BlockValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub
|
||||
}
|
||||
|
||||
msg.ValidatorData = blk
|
||||
stats.Record(ctx, metrics.BlockValidationSuccess.M(1))
|
||||
return true
|
||||
}
|
||||
|
||||
@ -162,17 +177,29 @@ func NewMessageValidator(mp *messagepool.MessagePool) *MessageValidator {
|
||||
}
|
||||
|
||||
func (mv *MessageValidator) Validate(ctx context.Context, pid peer.ID, msg *pubsub.Message) bool {
|
||||
stats.Record(ctx, metrics.MessageReceived.M(1))
|
||||
ctx, _ = tag.New(ctx, tag.Insert(metrics.PeerID, pid.String()))
|
||||
m, err := types.DecodeSignedMessage(msg.Message.GetData())
|
||||
if err != nil {
|
||||
log.Warnf("failed to decode incoming message: %s", err)
|
||||
ctx, _ = tag.New(ctx, tag.Insert(metrics.FailureType, "decode"))
|
||||
stats.Record(ctx, metrics.MessageValidationFailure.M(1))
|
||||
return false
|
||||
}
|
||||
|
||||
if err := mv.mpool.Add(m); err != nil {
|
||||
log.Warnf("failed to add message from network to message pool (From: %s, To: %s, Nonce: %d, Value: %s): %s", m.Message.From, m.Message.To, m.Message.Nonce, types.FIL(m.Message.Value), err)
|
||||
ctx, _ = tag.New(
|
||||
ctx,
|
||||
tag.Insert(metrics.MessageFrom, m.Message.From.String()),
|
||||
tag.Insert(metrics.MessageTo, m.Message.To.String()),
|
||||
tag.Insert(metrics.MessageNonce, fmt.Sprint(m.Message.Nonce)),
|
||||
tag.Insert(metrics.FailureType, "add"),
|
||||
)
|
||||
stats.Record(ctx, metrics.MessageValidationFailure.M(1))
|
||||
return false
|
||||
}
|
||||
|
||||
stats.Record(ctx, metrics.MessageValidationSuccess.M(1))
|
||||
return true
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
"github.com/libp2p/go-libp2p-core/peer"
|
||||
cbg "github.com/whyrusleeping/cbor-gen"
|
||||
"github.com/whyrusleeping/pubsub"
|
||||
"go.opencensus.io/stats"
|
||||
"go.opencensus.io/trace"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
@ -38,6 +39,7 @@ import (
|
||||
"github.com/filecoin-project/lotus/chain/store"
|
||||
"github.com/filecoin-project/lotus/chain/types"
|
||||
"github.com/filecoin-project/lotus/lib/sigs"
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
)
|
||||
|
||||
var log = logging.Logger("chain")
|
||||
@ -1038,6 +1040,7 @@ func (syncer *Syncer) syncMessagesAndCheckState(ctx context.Context, headers []*
|
||||
return xerrors.Errorf("message processing failed: %w", err)
|
||||
}
|
||||
|
||||
stats.Record(ctx, metrics.ChainNodeWorkerHeight.M(int64(fts.TipSet().Height())))
|
||||
ss.SetHeight(fts.TipSet().Height())
|
||||
|
||||
return nil
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"github.com/filecoin-project/lotus/chain/stmgr"
|
||||
"github.com/filecoin-project/lotus/chain/store"
|
||||
"github.com/filecoin-project/lotus/chain/vm"
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
"github.com/filecoin-project/lotus/node"
|
||||
"github.com/filecoin-project/lotus/node/modules"
|
||||
"github.com/filecoin-project/lotus/node/modules/testing"
|
||||
@ -36,12 +37,6 @@ const (
|
||||
preSealedSectorsFlag = "genesis-presealed-sectors"
|
||||
)
|
||||
|
||||
var (
|
||||
lotusInfo = stats.Int64("info", "Arbitrary counter to tag lotus info to", stats.UnitDimensionless)
|
||||
version, _ = tag.NewKey("version")
|
||||
commit, _ = tag.NewKey("commit")
|
||||
)
|
||||
|
||||
// DaemonCmd is the `go-lotus daemon` command
|
||||
var DaemonCmd = &cli.Command{
|
||||
Name: "daemon",
|
||||
@ -99,7 +94,7 @@ var DaemonCmd = &cli.Command{
|
||||
defer pprof.StopCPUProfile()
|
||||
}
|
||||
|
||||
ctx, _ := tag.New(context.Background(), tag.Insert(version, build.BuildVersion), tag.Insert(commit, build.CurrentCommit))
|
||||
ctx, _ := tag.New(context.Background(), tag.Insert(metrics.Version, build.BuildVersion), tag.Insert(metrics.Commit, build.CurrentCommit))
|
||||
{
|
||||
dir, err := homedir.Expand(cctx.String("repo"))
|
||||
if err != nil {
|
||||
@ -180,21 +175,15 @@ var DaemonCmd = &cli.Command{
|
||||
return xerrors.Errorf("initializing node: %w", err)
|
||||
}
|
||||
|
||||
// We are using this metric to tag info about lotus even though
|
||||
// it doesn't contain any actual metrics
|
||||
// Register all metric views
|
||||
if err = view.Register(
|
||||
&view.View{
|
||||
Name: "info",
|
||||
Description: "Lotus node information",
|
||||
Measure: lotusInfo,
|
||||
Aggregation: view.LastValue(),
|
||||
TagKeys: []tag.Key{version, commit},
|
||||
},
|
||||
metrics.DefaultViews...,
|
||||
); err != nil {
|
||||
log.Fatalf("Cannot register the view: %v", err)
|
||||
}
|
||||
|
||||
// Set the metric to one so it is published to the exporter
|
||||
stats.Record(ctx, lotusInfo.M(1))
|
||||
stats.Record(ctx, metrics.LotusInfo.M(1))
|
||||
|
||||
endpoint, err := r.APIEndpoint()
|
||||
if err != nil {
|
||||
|
@ -9,6 +9,9 @@ import (
|
||||
"io"
|
||||
"reflect"
|
||||
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
"go.opencensus.io/stats"
|
||||
"go.opencensus.io/tag"
|
||||
"go.opencensus.io/trace"
|
||||
"go.opencensus.io/trace/propagation"
|
||||
"golang.org/x/xerrors"
|
||||
@ -151,18 +154,22 @@ func (handlers) getSpan(ctx context.Context, req request) (context.Context, *tra
|
||||
}
|
||||
|
||||
func (h handlers) handle(ctx context.Context, req request, w func(func(io.Writer)), rpcError rpcErrFunc, done func(keepCtx bool), chOut chanOut) {
|
||||
// Not sure if we need to sanitize the incoming req.Method or not.
|
||||
ctx, span := h.getSpan(ctx, req)
|
||||
ctx, _ = tag.New(ctx, tag.Insert(metrics.RPCMethod, req.Method))
|
||||
defer span.End()
|
||||
|
||||
handler, ok := h[req.Method]
|
||||
if !ok {
|
||||
rpcError(w, &req, rpcMethodNotFound, fmt.Errorf("method '%s' not found", req.Method))
|
||||
stats.Record(ctx, metrics.RPCInvalidMethod.M(1))
|
||||
done(false)
|
||||
return
|
||||
}
|
||||
|
||||
if len(req.Params) != handler.nParams {
|
||||
rpcError(w, &req, rpcInvalidParams, fmt.Errorf("wrong param count"))
|
||||
stats.Record(ctx, metrics.RPCRequestError.M(1))
|
||||
done(false)
|
||||
return
|
||||
}
|
||||
@ -172,6 +179,7 @@ func (h handlers) handle(ctx context.Context, req request, w func(func(io.Writer
|
||||
|
||||
if chOut == nil && outCh {
|
||||
rpcError(w, &req, rpcMethodNotFound, fmt.Errorf("method '%s' not supported in this mode (no out channel support)", req.Method))
|
||||
stats.Record(ctx, metrics.RPCRequestError.M(1))
|
||||
return
|
||||
}
|
||||
|
||||
@ -185,6 +193,7 @@ func (h handlers) handle(ctx context.Context, req request, w func(func(io.Writer
|
||||
rp := reflect.New(handler.paramReceivers[i])
|
||||
if err := json.NewDecoder(bytes.NewReader(req.Params[i].data)).Decode(rp.Interface()); err != nil {
|
||||
rpcError(w, &req, rpcParseError, xerrors.Errorf("unmarshaling params for '%s': %w", handler.handlerFunc, err))
|
||||
stats.Record(ctx, metrics.RPCRequestError.M(1))
|
||||
return
|
||||
}
|
||||
|
||||
@ -196,6 +205,7 @@ func (h handlers) handle(ctx context.Context, req request, w func(func(io.Writer
|
||||
callResult, err := doCall(req.Method, handler.handlerFunc, callParams)
|
||||
if err != nil {
|
||||
rpcError(w, &req, 0, xerrors.Errorf("fatal error calling '%s': %w", req.Method, err))
|
||||
stats.Record(ctx, metrics.RPCRequestError.M(1))
|
||||
return
|
||||
}
|
||||
if req.ID == nil {
|
||||
@ -213,6 +223,7 @@ func (h handlers) handle(ctx context.Context, req request, w func(func(io.Writer
|
||||
err := callResult[handler.errOut].Interface()
|
||||
if err != nil {
|
||||
log.Warnf("error in RPC call to '%s': %+v", req.Method, err)
|
||||
stats.Record(ctx, metrics.RPCResponseError.M(1))
|
||||
resp.Error = &respError{
|
||||
Code: 1,
|
||||
Message: err.(error).Error(),
|
||||
@ -234,6 +245,7 @@ func (h handlers) handle(ctx context.Context, req request, w func(func(io.Writer
|
||||
}
|
||||
|
||||
log.Warnf("failed to setup channel in RPC call to '%s': %+v", req.Method, err)
|
||||
stats.Record(ctx, metrics.RPCResponseError.M(1))
|
||||
resp.Error = &respError{
|
||||
Code: 1,
|
||||
Message: err.(error).Error(),
|
||||
@ -243,6 +255,7 @@ func (h handlers) handle(ctx context.Context, req request, w func(func(io.Writer
|
||||
w(func(w io.Writer) {
|
||||
if err := json.NewEncoder(w).Encode(resp); err != nil {
|
||||
log.Error(err)
|
||||
stats.Record(ctx, metrics.RPCResponseError.M(1))
|
||||
return
|
||||
}
|
||||
})
|
||||
|
102
metrics/metrics.go
Normal file
102
metrics/metrics.go
Normal file
@ -0,0 +1,102 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"go.opencensus.io/stats"
|
||||
"go.opencensus.io/stats/view"
|
||||
"go.opencensus.io/tag"
|
||||
)
|
||||
|
||||
// Global Tags
|
||||
var (
|
||||
Version, _ = tag.NewKey("version")
|
||||
Commit, _ = tag.NewKey("commit")
|
||||
RPCMethod, _ = tag.NewKey("method")
|
||||
PeerID, _ = tag.NewKey("peer_id")
|
||||
FailureType, _ = tag.NewKey("failure_type")
|
||||
MessageFrom, _ = tag.NewKey("message_from")
|
||||
MessageTo, _ = tag.NewKey("message_to")
|
||||
MessageNonce, _ = tag.NewKey("message_nonce")
|
||||
ReceivedFrom, _ = tag.NewKey("received_from")
|
||||
)
|
||||
|
||||
// Measures
|
||||
var (
|
||||
LotusInfo = stats.Int64("info", "Arbitrary counter to tag lotus info to", stats.UnitDimensionless)
|
||||
ChainNodeHeight = stats.Int64("chain/node_height", "Current Height of the node", stats.UnitDimensionless)
|
||||
ChainNodeWorkerHeight = stats.Int64("chain/node_worker_height", "Current Height of workers on the node", stats.UnitDimensionless)
|
||||
MessageReceived = stats.Int64("message/received", "Counter for total received messages", stats.UnitDimensionless)
|
||||
MessageValidationFailure = stats.Int64("message/failure", "Counter for message validation failures", stats.UnitDimensionless)
|
||||
MessageValidationSuccess = stats.Int64("message/success", "Counter for message validation successes", stats.UnitDimensionless)
|
||||
BlockReceived = stats.Int64("block/received", "Counter for total received blocks", stats.UnitDimensionless)
|
||||
BlockValidationFailure = stats.Int64("block/failure", "Counter for block validation failures", stats.UnitDimensionless)
|
||||
BlockValidationSuccess = stats.Int64("block/success", "Counter for block validation successes", stats.UnitDimensionless)
|
||||
PeerCount = stats.Int64("peer/count", "Current number of FIL peers", stats.UnitDimensionless)
|
||||
RPCInvalidMethod = stats.Int64("rpc/invalid_method", "Total number of invalid RPC methods called", stats.UnitDimensionless)
|
||||
RPCRequestError = stats.Int64("rpc/request_error", "Total number of request errors handled", stats.UnitDimensionless)
|
||||
RPCResponseError = stats.Int64("rpc/response_error", "Total number of responses errors handled", stats.UnitDimensionless)
|
||||
)
|
||||
|
||||
// DefaultViews is an array of Consensus views for metric gathering purposes
|
||||
var DefaultViews = []*view.View{
|
||||
&view.View{
|
||||
Name: "info",
|
||||
Description: "Lotus node information",
|
||||
Measure: LotusInfo,
|
||||
Aggregation: view.LastValue(),
|
||||
TagKeys: []tag.Key{Version, Commit},
|
||||
},
|
||||
&view.View{
|
||||
Measure: ChainNodeHeight,
|
||||
Aggregation: view.LastValue(),
|
||||
},
|
||||
&view.View{
|
||||
Measure: ChainNodeWorkerHeight,
|
||||
Aggregation: view.LastValue(),
|
||||
},
|
||||
&view.View{
|
||||
Measure: BlockReceived,
|
||||
Aggregation: view.Count(),
|
||||
},
|
||||
&view.View{
|
||||
Measure: BlockValidationFailure,
|
||||
Aggregation: view.Count(),
|
||||
TagKeys: []tag.Key{FailureType, PeerID, ReceivedFrom},
|
||||
},
|
||||
&view.View{
|
||||
Measure: BlockValidationSuccess,
|
||||
Aggregation: view.Count(),
|
||||
},
|
||||
&view.View{
|
||||
Measure: MessageReceived,
|
||||
Aggregation: view.Count(),
|
||||
},
|
||||
&view.View{
|
||||
Measure: MessageValidationFailure,
|
||||
Aggregation: view.Count(),
|
||||
TagKeys: []tag.Key{FailureType, MessageFrom, MessageTo, MessageNonce},
|
||||
},
|
||||
&view.View{
|
||||
Measure: MessageValidationSuccess,
|
||||
Aggregation: view.Count(),
|
||||
},
|
||||
&view.View{
|
||||
Measure: PeerCount,
|
||||
Aggregation: view.LastValue(),
|
||||
},
|
||||
// All RPC related metrics should at the very least tag the RPCMethod
|
||||
&view.View{
|
||||
Measure: RPCInvalidMethod,
|
||||
Aggregation: view.Count(),
|
||||
TagKeys: []tag.Key{RPCMethod},
|
||||
},
|
||||
&view.View{
|
||||
Measure: RPCRequestError,
|
||||
Aggregation: view.Count(),
|
||||
TagKeys: []tag.Key{RPCMethod},
|
||||
},
|
||||
&view.View{
|
||||
Measure: RPCResponseError,
|
||||
Aggregation: view.Count(),
|
||||
TagKeys: []tag.Key{RPCMethod},
|
||||
},
|
||||
}
|
@ -5,7 +5,9 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/filecoin-project/lotus/metrics"
|
||||
"github.com/filecoin-project/lotus/node/modules/dtypes"
|
||||
"go.opencensus.io/stats"
|
||||
"go.uber.org/fx"
|
||||
|
||||
host "github.com/libp2p/go-libp2p-core/host"
|
||||
@ -115,6 +117,7 @@ func (pmgr *PeerMgr) Run(ctx context.Context) {
|
||||
} else if pcount > pmgr.maxFilPeers {
|
||||
log.Debug("peer count about threshold: %d > %d", pcount, pmgr.maxFilPeers)
|
||||
}
|
||||
stats.Record(ctx, metrics.PeerCount.M(int64(pmgr.getPeerCount())))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user