2020-02-26 02:42:34 +00:00
|
|
|
package metrics
|
|
|
|
|
|
|
|
import (
|
2020-10-21 08:10:27 +00:00
|
|
|
"context"
|
2020-07-24 05:47:41 +00:00
|
|
|
"time"
|
|
|
|
|
2020-02-26 02:42:34 +00:00
|
|
|
"go.opencensus.io/stats"
|
|
|
|
"go.opencensus.io/stats/view"
|
|
|
|
"go.opencensus.io/tag"
|
2020-05-20 17:43:22 +00:00
|
|
|
|
|
|
|
rpcmetrics "github.com/filecoin-project/go-jsonrpc/metrics"
|
2021-02-28 22:48:36 +00:00
|
|
|
|
|
|
|
"github.com/filecoin-project/lotus/blockstore"
|
2020-02-26 02:42:34 +00:00
|
|
|
)
|
|
|
|
|
2020-07-24 05:47:41 +00:00
|
|
|
// Distribution
|
2021-02-28 22:48:36 +00:00
|
|
|
var defaultMillisecondsDistribution = view.Distribution(0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65, 80, 100, 130, 160, 200, 250, 300, 400, 500, 650, 800, 1000, 2000, 3000, 4000, 5000, 7500, 10000, 20000, 50000, 100000)
|
2020-07-24 05:47:41 +00:00
|
|
|
|
2020-02-26 02:42:34 +00:00
|
|
|
// Global Tags
|
|
|
|
var (
|
2020-03-02 00:26:09 +00:00
|
|
|
Version, _ = tag.NewKey("version")
|
|
|
|
Commit, _ = tag.NewKey("commit")
|
|
|
|
PeerID, _ = tag.NewKey("peer_id")
|
2020-12-10 14:48:37 +00:00
|
|
|
MinerID, _ = tag.NewKey("miner_id")
|
2020-03-02 00:57:16 +00:00
|
|
|
FailureType, _ = tag.NewKey("failure_type")
|
2020-08-28 06:11:24 +00:00
|
|
|
Local, _ = tag.NewKey("local")
|
2020-03-02 00:26:09 +00:00
|
|
|
MessageFrom, _ = tag.NewKey("message_from")
|
|
|
|
MessageTo, _ = tag.NewKey("message_to")
|
|
|
|
MessageNonce, _ = tag.NewKey("message_nonce")
|
2020-03-02 00:57:16 +00:00
|
|
|
ReceivedFrom, _ = tag.NewKey("received_from")
|
2020-10-21 08:10:27 +00:00
|
|
|
Endpoint, _ = tag.NewKey("endpoint")
|
|
|
|
APIInterface, _ = tag.NewKey("api") // to distinguish between gateway api and full node api endpoint calls
|
2020-02-26 02:42:34 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Measures
|
|
|
|
var (
|
2020-06-04 22:18:14 +00:00
|
|
|
LotusInfo = stats.Int64("info", "Arbitrary counter to tag lotus info to", stats.UnitDimensionless)
|
|
|
|
ChainNodeHeight = stats.Int64("chain/node_height", "Current Height of the node", stats.UnitDimensionless)
|
2020-09-06 04:32:05 +00:00
|
|
|
ChainNodeHeightExpected = stats.Int64("chain/node_height_expected", "Expected Height of the node", stats.UnitDimensionless)
|
2020-06-04 22:18:14 +00:00
|
|
|
ChainNodeWorkerHeight = stats.Int64("chain/node_worker_height", "Current Height of workers on the node", stats.UnitDimensionless)
|
2020-08-28 06:11:24 +00:00
|
|
|
MessagePublished = stats.Int64("message/published", "Counter for total locally published messages", stats.UnitDimensionless)
|
2020-06-04 22:18:14 +00:00
|
|
|
MessageReceived = stats.Int64("message/received", "Counter for total received messages", stats.UnitDimensionless)
|
|
|
|
MessageValidationFailure = stats.Int64("message/failure", "Counter for message validation failures", stats.UnitDimensionless)
|
|
|
|
MessageValidationSuccess = stats.Int64("message/success", "Counter for message validation successes", stats.UnitDimensionless)
|
2020-08-17 06:04:22 +00:00
|
|
|
BlockPublished = stats.Int64("block/published", "Counter for total locally published blocks", stats.UnitDimensionless)
|
2020-06-04 22:18:14 +00:00
|
|
|
BlockReceived = stats.Int64("block/received", "Counter for total received blocks", stats.UnitDimensionless)
|
|
|
|
BlockValidationFailure = stats.Int64("block/failure", "Counter for block validation failures", stats.UnitDimensionless)
|
|
|
|
BlockValidationSuccess = stats.Int64("block/success", "Counter for block validation successes", stats.UnitDimensionless)
|
|
|
|
BlockValidationDurationMilliseconds = stats.Float64("block/validation_ms", "Duration for Block Validation in ms", stats.UnitMilliseconds)
|
2020-12-10 14:48:37 +00:00
|
|
|
BlockDelay = stats.Int64("block/delay", "Delay of accepted blocks, where delay is >5s", stats.UnitMilliseconds)
|
2020-06-04 22:18:14 +00:00
|
|
|
PeerCount = stats.Int64("peer/count", "Current number of FIL peers", stats.UnitDimensionless)
|
2020-08-17 07:46:20 +00:00
|
|
|
PubsubPublishMessage = stats.Int64("pubsub/published", "Counter for total published messages", stats.UnitDimensionless)
|
|
|
|
PubsubDeliverMessage = stats.Int64("pubsub/delivered", "Counter for total delivered messages", stats.UnitDimensionless)
|
|
|
|
PubsubRejectMessage = stats.Int64("pubsub/rejected", "Counter for total rejected messages", stats.UnitDimensionless)
|
|
|
|
PubsubDuplicateMessage = stats.Int64("pubsub/duplicate", "Counter for total duplicate messages", stats.UnitDimensionless)
|
2020-08-20 20:14:32 +00:00
|
|
|
PubsubRecvRPC = stats.Int64("pubsub/recv_rpc", "Counter for total received RPCs", stats.UnitDimensionless)
|
|
|
|
PubsubSendRPC = stats.Int64("pubsub/send_rpc", "Counter for total sent RPCs", stats.UnitDimensionless)
|
|
|
|
PubsubDropRPC = stats.Int64("pubsub/drop_rpc", "Counter for total dropped RPCs", stats.UnitDimensionless)
|
2020-10-21 08:10:27 +00:00
|
|
|
APIRequestDuration = stats.Float64("api/request_duration_ms", "Duration of API requests", stats.UnitMilliseconds)
|
2020-11-11 16:05:08 +00:00
|
|
|
VMFlushCopyDuration = stats.Float64("vm/flush_copy_ms", "Time spent in VM Flush Copy", stats.UnitMilliseconds)
|
|
|
|
VMFlushCopyCount = stats.Int64("vm/flush_copy_count", "Number of copied objects", stats.UnitDimensionless)
|
2020-02-26 02:42:34 +00:00
|
|
|
)
|
|
|
|
|
2020-03-05 09:47:20 +00:00
|
|
|
var (
|
|
|
|
InfoView = &view.View{
|
2020-02-26 02:42:34 +00:00
|
|
|
Name: "info",
|
|
|
|
Description: "Lotus node information",
|
|
|
|
Measure: LotusInfo,
|
|
|
|
Aggregation: view.LastValue(),
|
|
|
|
TagKeys: []tag.Key{Version, Commit},
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
|
|
|
ChainNodeHeightView = &view.View{
|
2020-03-02 00:26:09 +00:00
|
|
|
Measure: ChainNodeHeight,
|
2020-02-26 02:42:34 +00:00
|
|
|
Aggregation: view.LastValue(),
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
2020-09-06 04:32:05 +00:00
|
|
|
ChainNodeHeightExpectedView = &view.View{
|
|
|
|
Measure: ChainNodeHeightExpected,
|
|
|
|
Aggregation: view.LastValue(),
|
|
|
|
}
|
2020-03-05 09:47:20 +00:00
|
|
|
ChainNodeWorkerHeightView = &view.View{
|
2020-03-02 00:26:09 +00:00
|
|
|
Measure: ChainNodeWorkerHeight,
|
|
|
|
Aggregation: view.LastValue(),
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
|
|
|
BlockReceivedView = &view.View{
|
2020-03-02 00:57:16 +00:00
|
|
|
Measure: BlockReceived,
|
2020-03-02 00:26:09 +00:00
|
|
|
Aggregation: view.Count(),
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
|
|
|
BlockValidationFailureView = &view.View{
|
2020-03-02 00:57:16 +00:00
|
|
|
Measure: BlockValidationFailure,
|
|
|
|
Aggregation: view.Count(),
|
2020-03-05 09:47:20 +00:00
|
|
|
TagKeys: []tag.Key{FailureType},
|
|
|
|
}
|
|
|
|
BlockValidationSuccessView = &view.View{
|
2020-03-02 00:57:16 +00:00
|
|
|
Measure: BlockValidationSuccess,
|
|
|
|
Aggregation: view.Count(),
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
2020-06-04 22:18:14 +00:00
|
|
|
BlockValidationDurationView = &view.View{
|
|
|
|
Measure: BlockValidationDurationMilliseconds,
|
2020-07-24 05:47:41 +00:00
|
|
|
Aggregation: defaultMillisecondsDistribution,
|
2020-06-04 22:18:14 +00:00
|
|
|
}
|
2020-12-10 14:48:37 +00:00
|
|
|
BlockDelayView = &view.View{
|
|
|
|
Measure: BlockDelay,
|
|
|
|
TagKeys: []tag.Key{MinerID},
|
|
|
|
Aggregation: func() *view.Aggregation {
|
|
|
|
var bounds []float64
|
|
|
|
for i := 5; i < 29; i++ { // 5-29s, step 1s
|
|
|
|
bounds = append(bounds, float64(i*1000))
|
|
|
|
}
|
|
|
|
for i := 30; i < 60; i += 2 { // 30-58s, step 2s
|
|
|
|
bounds = append(bounds, float64(i*1000))
|
|
|
|
}
|
|
|
|
for i := 60; i <= 300; i += 10 { // 60-300s, step 10s
|
|
|
|
bounds = append(bounds, float64(i*1000))
|
|
|
|
}
|
|
|
|
bounds = append(bounds, 600*1000) // final cutoff at 10m
|
|
|
|
return view.Distribution(bounds...)
|
|
|
|
}(),
|
|
|
|
}
|
2020-08-28 06:25:50 +00:00
|
|
|
MessagePublishedView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: MessagePublished,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
2020-03-05 09:47:20 +00:00
|
|
|
MessageReceivedView = &view.View{
|
2020-03-02 00:57:16 +00:00
|
|
|
Measure: MessageReceived,
|
|
|
|
Aggregation: view.Count(),
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
|
|
|
MessageValidationFailureView = &view.View{
|
2020-03-02 00:57:16 +00:00
|
|
|
Measure: MessageValidationFailure,
|
|
|
|
Aggregation: view.Count(),
|
2020-08-28 06:11:24 +00:00
|
|
|
TagKeys: []tag.Key{FailureType, Local},
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
|
|
|
MessageValidationSuccessView = &view.View{
|
2020-03-02 00:57:16 +00:00
|
|
|
Measure: MessageValidationSuccess,
|
2020-03-02 00:26:09 +00:00
|
|
|
Aggregation: view.Count(),
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
|
|
|
PeerCountView = &view.View{
|
2020-03-02 00:26:09 +00:00
|
|
|
Measure: PeerCount,
|
2020-02-26 02:42:34 +00:00
|
|
|
Aggregation: view.LastValue(),
|
2020-03-05 09:47:20 +00:00
|
|
|
}
|
2020-08-28 06:25:50 +00:00
|
|
|
PubsubPublishMessageView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: PubsubPublishMessage,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
|
|
|
PubsubDeliverMessageView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: PubsubDeliverMessage,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
|
|
|
PubsubRejectMessageView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: PubsubRejectMessage,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
|
|
|
PubsubDuplicateMessageView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: PubsubDuplicateMessage,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
|
|
|
PubsubRecvRPCView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: PubsubRecvRPC,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
|
|
|
PubsubSendRPCView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: PubsubSendRPC,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
|
|
|
PubsubDropRPCView = &view.View{
|
2020-08-28 09:51:51 +00:00
|
|
|
Measure: PubsubDropRPC,
|
2020-08-28 06:25:50 +00:00
|
|
|
Aggregation: view.Count(),
|
|
|
|
}
|
2020-10-21 08:10:27 +00:00
|
|
|
APIRequestDurationView = &view.View{
|
|
|
|
Measure: APIRequestDuration,
|
|
|
|
Aggregation: defaultMillisecondsDistribution,
|
|
|
|
TagKeys: []tag.Key{APIInterface, Endpoint},
|
|
|
|
}
|
2020-11-11 16:05:08 +00:00
|
|
|
VMFlushCopyDurationView = &view.View{
|
|
|
|
Measure: VMFlushCopyDuration,
|
|
|
|
Aggregation: view.Sum(),
|
|
|
|
}
|
|
|
|
VMFlushCopyCountView = &view.View{
|
|
|
|
Measure: VMFlushCopyCount,
|
|
|
|
Aggregation: view.Sum(),
|
|
|
|
}
|
2020-03-05 09:47:20 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// DefaultViews is an array of OpenCensus views for metric gathering purposes
|
2021-02-28 22:48:36 +00:00
|
|
|
var DefaultViews = func() []*view.View {
|
|
|
|
views := []*view.View{
|
|
|
|
InfoView,
|
|
|
|
ChainNodeHeightView,
|
|
|
|
ChainNodeHeightExpectedView,
|
|
|
|
ChainNodeWorkerHeightView,
|
|
|
|
BlockReceivedView,
|
|
|
|
BlockValidationFailureView,
|
|
|
|
BlockValidationSuccessView,
|
|
|
|
BlockValidationDurationView,
|
|
|
|
BlockDelayView,
|
|
|
|
MessagePublishedView,
|
|
|
|
MessageReceivedView,
|
|
|
|
MessageValidationFailureView,
|
|
|
|
MessageValidationSuccessView,
|
|
|
|
PeerCountView,
|
|
|
|
PubsubPublishMessageView,
|
|
|
|
PubsubDeliverMessageView,
|
|
|
|
PubsubRejectMessageView,
|
|
|
|
PubsubDuplicateMessageView,
|
|
|
|
PubsubRecvRPCView,
|
|
|
|
PubsubSendRPCView,
|
|
|
|
PubsubDropRPCView,
|
|
|
|
APIRequestDurationView,
|
|
|
|
VMFlushCopyCountView,
|
|
|
|
VMFlushCopyDurationView,
|
|
|
|
}
|
|
|
|
views = append(views, blockstore.DefaultViews...)
|
|
|
|
views = append(views, rpcmetrics.DefaultViews...)
|
|
|
|
return views
|
|
|
|
}()
|
2020-07-24 05:47:41 +00:00
|
|
|
|
|
|
|
// SinceInMilliseconds returns the duration of time since the provide time as a float64.
|
|
|
|
func SinceInMilliseconds(startTime time.Time) float64 {
|
|
|
|
return float64(time.Since(startTime).Nanoseconds()) / 1e6
|
2020-07-28 14:37:29 +00:00
|
|
|
}
|
2020-10-21 08:10:27 +00:00
|
|
|
|
|
|
|
// Timer is a function stopwatch, calling it starts the timer,
|
|
|
|
// calling the returned function will record the duration.
|
|
|
|
func Timer(ctx context.Context, m *stats.Float64Measure) func() {
|
|
|
|
start := time.Now()
|
|
|
|
return func() {
|
|
|
|
stats.Record(ctx, m.M(SinceInMilliseconds(start)))
|
|
|
|
}
|
2020-10-21 08:39:57 +00:00
|
|
|
}
|