refactor: simplify collected metrics (#21963)

This commit is contained in:
Julien Robert 2024-09-28 13:20:39 +02:00 committed by GitHub
parent dd9e5825ae
commit 787ee6980f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 59 additions and 158 deletions

View File

@ -52,8 +52,10 @@ Every module contains its own CHANGELOG.md. Please refer to the module you are i
### Improvements
* (sims) [#21613](https://github.com/cosmos/cosmos-sdk/pull/21613) Add sims2 framework and factory methods for simpler message factories in modules
* (modules) [#21963](https://github.com/cosmos/cosmos-sdk/pull/21963) Duplicatable metrics are no more collected in modules. They were unecessary overhead.
### Bug Fixes
* (sims) [#21952](https://github.com/cosmos/cosmos-sdk/pull/21952) Use liveness matrix for validator sign status in sims
* (sims) [#21906](https://github.com/cosmos/cosmos-sdk/pull/21906) Skip sims test when running dry on validators
* (cli) [#21919](https://github.com/cosmos/cosmos-sdk/pull/21919) Query address-by-acc-num by account_id instead of id.

View File

@ -13,17 +13,18 @@ their application through the use of the `telemetry` package. To enable telemetr
The Cosmos SDK currently supports enabling in-memory and prometheus as telemetry sinks. In-memory sink is always attached (when the telemetry is enabled) with 10 second interval and 1 minute retention. This means that metrics will be aggregated over 10 seconds, and metrics will be kept alive for 1 minute.
To query active metrics (see retention note above) you have to enable API server (`api.enabled = true` in the app.toml). Single API endpoint is exposed: `http://localhost:1317/metrics?format={text|prometheus}`, the default being `text`.
To query active metrics (see retention note above) you have to enable API server (`api.enabled = true` in the app.toml). Single API endpoint is exposed: `http://localhost:1317/metrics?format={text|prometheus}` (or port `1318` in v2) , the default being `text`.
## Emitting metrics
If telemetry is enabled via configuration, a single global metrics collector is registered via the
[go-metrics](https://github.com/hashicorp/go-metrics) library. This allows emitting and collecting
metrics through simple [API](https://github.com/cosmos/cosmos-sdk/blob/v0.50.0-alpha.0/telemetry/wrapper.go). Example:
metrics through simple [API](https://github.com/cosmos/cosmos-sdk/blob/v0.50.10/telemetry/wrapper.go). Example:
```go
func EndBlocker(ctx sdk.Context, k keeper.Keeper) {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyEndBlocker)
// ...
}
@ -69,60 +70,32 @@ Consider the following examples with enough granularity and adequate cardinality
* begin/end blocker time
* tx gas used
* block gas used
* amount of tokens minted
* amount of accounts created
The following examples expose too much cardinality and may not even prove to be useful:
* transfers between accounts with amount
* voting/deposit amount from unique addresses
## Idempotency
Metrics aren't idempotent, so if a metric is emitted twice, it will be counted twice.
This is important to keep in mind when collecting metrics. If a module is called twice, the metrics will be emitted twice (for instance in `CheckTx`, `SimulateTx` or `DeliverTx`).
## Supported Metrics
| Metric | Description | Unit | Type |
|:--------------------------------|:------------------------------------------------------------------------------------------|:----------------|:--------|
| `tx_count` | Total number of txs processed via `DeliverTx` | tx | counter |
| `tx_successful` | Total number of successful txs processed via `DeliverTx` | tx | counter |
| `tx_failed` | Total number of failed txs processed via `DeliverTx` | tx | counter |
| `tx_gas_used` | The total amount of gas used by a tx | gas | gauge |
| `tx_gas_wanted` | The total amount of gas requested by a tx | gas | gauge |
| `tx_msg_send` | The total amount of tokens sent in a `MsgSend` (per denom) | token | gauge |
| `tx_msg_withdraw_reward` | The total amount of tokens withdrawn in a `MsgWithdrawDelegatorReward` (per denom) | token | gauge |
| `tx_msg_withdraw_commission` | The total amount of tokens withdrawn in a `MsgWithdrawValidatorCommission` (per denom) | token | gauge |
| `tx_msg_delegate` | The total amount of tokens delegated in a `MsgDelegate` | token | gauge |
| `tx_msg_begin_unbonding` | The total amount of tokens undelegated in a `MsgUndelegate` | token | gauge |
| `tx_msg_begin_begin_redelegate` | The total amount of tokens redelegated in a `MsgBeginRedelegate` | token | gauge |
| `tx_msg_ibc_transfer` | The total amount of tokens transferred via IBC in a `MsgTransfer` (source or sink chain) | token | gauge |
| `ibc_transfer_packet_receive` | The total amount of tokens received in a `FungibleTokenPacketData` (source or sink chain) | token | gauge |
| `new_account` | Total number of new accounts created | account | counter |
| `gov_proposal` | Total number of governance proposals | proposal | counter |
| `gov_vote` | Total number of governance votes for a proposal | vote | counter |
| `gov_deposit` | Total number of governance deposits for a proposal | deposit | counter |
| `staking_delegate` | Total number of delegations | delegation | counter |
| `staking_undelegate` | Total number of undelegations | undelegation | counter |
| `staking_redelegate` | Total number of redelegations | redelegation | counter |
| `ibc_transfer_send` | Total number of IBC transfers sent from a chain (source or sink) | transfer | counter |
| `ibc_transfer_receive` | Total number of IBC transfers received to a chain (source or sink) | transfer | counter |
| `ibc_client_create` | Total number of clients created | create | counter |
| `ibc_client_update` | Total number of client updates | update | counter |
| `ibc_client_upgrade` | Total number of client upgrades | upgrade | counter |
| `ibc_client_misbehaviour` | Total number of client misbehaviours | misbehaviour | counter |
| `ibc_connection_open-init` | Total number of connection `OpenInit` handshakes | handshake | counter |
| `ibc_connection_open-try` | Total number of connection `OpenTry` handshakes | handshake | counter |
| `ibc_connection_open-ack` | Total number of connection `OpenAck` handshakes | handshake | counter |
| `ibc_connection_open-confirm` | Total number of connection `OpenConfirm` handshakes | handshake | counter |
| `ibc_channel_open-init` | Total number of channel `OpenInit` handshakes | handshake | counter |
| `ibc_channel_open-try` | Total number of channel `OpenTry` handshakes | handshake | counter |
| `ibc_channel_open-ack` | Total number of channel `OpenAck` handshakes | handshake | counter |
| `ibc_channel_open-confirm` | Total number of channel `OpenConfirm` handshakes | handshake | counter |
| `ibc_channel_close-init` | Total number of channel `CloseInit` handshakes | handshake | counter |
| `ibc_channel_close-confirm` | Total number of channel `CloseConfirm` handshakes | handshake | counter |
| `tx_msg_ibc_recv_packet` | Total number of IBC packets received | packet | counter |
| `tx_msg_ibc_acknowledge_packet` | Total number of IBC packets acknowledged | acknowledgement | counter |
| `ibc_timeout_packet` | Total number of IBC timeout packets | timeout | counter |
| `store_iavl_get` | Duration of an IAVL `Store#Get` call | ms | summary |
| `store_iavl_set` | Duration of an IAVL `Store#Set` call | ms | summary |
| `store_iavl_has` | Duration of an IAVL `Store#Has` call | ms | summary |
| `store_iavl_delete` | Duration of an IAVL `Store#Delete` call | ms | summary |
| `store_iavl_commit` | Duration of an IAVL `Store#Commit` call | ms | summary |
| `store_iavl_query` | Duration of an IAVL `Store#Query` call | ms | summary |
| Metric | Description | Unit | Type |
| ------------------- | ------------------------------------------------------------------------------ | ---- | ------- |
| `tx_count` | Total number of txs processed via `DeliverTx` | tx | counter |
| `tx_successful` | Total number of successful txs processed via `DeliverTx` | tx | counter |
| `tx_failed` | Total number of failed txs processed via `DeliverTx` | tx | counter |
| `tx_gas_used` | The total amount of gas used by a tx | gas | gauge |
| `tx_gas_wanted` | The total amount of gas requested by a tx | gas | gauge |
| `store_iavl_get` | Duration of an IAVL `Store#Get` call | ms | summary |
| `store_iavl_set` | Duration of an IAVL `Store#Set` call | ms | summary |
| `store_iavl_has` | Duration of an IAVL `Store#Has` call | ms | summary |
| `store_iavl_delete` | Duration of an IAVL `Store#Delete` call | ms | summary |
| `store_iavl_commit` | Duration of an IAVL `Store#Commit` call | ms | summary |
| `store_iavl_query` | Duration of an IAVL `Store#Query` call | ms | summary |
| `begin_blocker` | Duration of the `BeginBlock` call per module | ms | summary |
| `end_blocker` | Duration of the `EndBlock` call per module | ms | summary |
| `server_info` | Information about the server, such as version, commit, and build date, upgrade | - | gauge |

View File

@ -8,11 +8,9 @@ import (
// Common metric key constants
const (
MetricKeyBeginBlocker = "begin_blocker"
MetricKeyEndBlocker = "end_blocker"
MetricKeyPrepareCheckStater = "prepare_check_stater"
MetricKeyPrecommiter = "precommiter"
MetricLabelNameModule = "module"
MetricKeyBeginBlocker = "begin_blocker"
MetricKeyEndBlocker = "end_blocker"
MetricLabelNameModule = "module"
)
// NewLabel creates a new instance of Label with name and value

View File

@ -19,7 +19,7 @@ require (
github.com/golang/mock v1.6.0
github.com/golang/protobuf v1.5.4
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/go-metrics v0.5.3
github.com/hashicorp/go-metrics v0.5.3 // indirect
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.9.0
google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142

View File

@ -3,12 +3,9 @@ package keeper
import (
"context"
"github.com/hashicorp/go-metrics"
errorsmod "cosmossdk.io/errors"
"cosmossdk.io/x/bank/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
)
@ -65,18 +62,6 @@ func (k msgServer) Send(ctx context.Context, msg *types.MsgSend) (*types.MsgSend
return nil, err
}
defer func() {
for _, a := range msg.Amount {
if a.Amount.IsInt64() {
telemetry.SetGaugeWithLabels(
[]string{"tx", "msg", "send"},
float32(a.Amount.Int64()),
[]metrics.Label{telemetry.NewLabel("denom", a.Denom)},
)
}
}
}()
return &types.MsgSendResponse{}, nil
}

View File

@ -6,14 +6,12 @@ import (
"errors"
"fmt"
"github.com/hashicorp/go-metrics"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
errorsmod "cosmossdk.io/errors"
"cosmossdk.io/x/bank/v2/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
)
@ -85,18 +83,6 @@ func (h handlers) MsgSend(ctx context.Context, msg *types.MsgSend) (*types.MsgSe
return nil, err
}
defer func() {
for _, a := range msg.Amount {
if a.Amount.IsInt64() {
telemetry.SetGaugeWithLabels(
[]string{"tx", "msg", "send"},
float32(a.Amount.Int64()),
[]metrics.Label{telemetry.NewLabel("denom", a.Denom)},
)
}
}
}()
return &types.MsgSendResponse{}, nil
}

View File

@ -18,7 +18,6 @@ require (
github.com/golang/mock v1.6.0
github.com/golang/protobuf v1.5.4
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/go-metrics v0.5.3
github.com/pkg/errors v0.9.1
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.9.0
@ -92,6 +91,7 @@ require (
github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c // indirect
github.com/hashicorp/go-hclog v1.6.3 // indirect
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
github.com/hashicorp/go-metrics v0.5.3 // indirect
github.com/hashicorp/go-plugin v1.6.1 // indirect
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect

View File

@ -11,7 +11,8 @@ import (
// BeginBlocker sets the proposer for determining distribution during endblock
// and distribute rewards for the previous block.
func (k Keeper) BeginBlocker(ctx context.Context) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyBeginBlocker)
// determine the total power signing the block
var previousTotalPower int64

View File

@ -4,12 +4,9 @@ import (
"context"
"fmt"
"github.com/hashicorp/go-metrics"
"cosmossdk.io/errors"
"cosmossdk.io/x/distribution/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
)
@ -61,18 +58,6 @@ func (k msgServer) WithdrawDelegatorReward(ctx context.Context, msg *types.MsgWi
return nil, err
}
defer func() {
for _, a := range amount {
if a.Amount.IsInt64() {
telemetry.SetGaugeWithLabels(
[]string{"tx", "msg", "withdraw_reward"},
float32(a.Amount.Int64()),
[]metrics.Label{telemetry.NewLabel("denom", a.Denom)},
)
}
}
}()
return &types.MsgWithdrawDelegatorRewardResponse{Amount: amount}, nil
}
@ -87,18 +72,6 @@ func (k msgServer) WithdrawValidatorCommission(ctx context.Context, msg *types.M
return nil, err
}
defer func() {
for _, a := range amount {
if a.Amount.IsInt64() {
telemetry.SetGaugeWithLabels(
[]string{"tx", "msg", "withdraw_commission"},
float32(a.Amount.Int64()),
[]metrics.Label{telemetry.NewLabel("denom", a.Denom)},
)
}
}
}()
return &types.MsgWithdrawValidatorCommissionResponse{Amount: amount}, nil
}

View File

@ -11,7 +11,8 @@ import (
// BeginBlocker of epochs module.
func (k Keeper) BeginBlocker(ctx context.Context) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyBeginBlocker)
headerInfo := k.HeaderService.HeaderInfo(ctx)
err := k.EpochInfo.Walk(

View File

@ -13,7 +13,8 @@ import (
// BeginBlocker iterates through and handles any newly discovered evidence of
// misbehavior submitted by CometBFT. Currently, only equivocation is handled.
func (k Keeper) BeginBlocker(ctx context.Context, cometService comet.Service) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyBeginBlocker)
bi := cometService.CometInfo(ctx)

View File

@ -20,7 +20,8 @@ import (
// EndBlocker is called every block.
func (k Keeper) EndBlocker(ctx context.Context) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyEndBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyEndBlocker)
// delete dead proposals from store and returns theirs deposits.
// A proposal is dead when it's inactive and didn't get enough deposit on time to get into voting phase.

View File

@ -2,11 +2,18 @@ package keeper
import (
"context"
"cosmossdk.io/x/gov/types"
"github.com/cosmos/cosmos-sdk/telemetry"
)
// EndBlocker called at every block, updates proposal's `FinalTallyResult` and
// prunes expired proposals.
func (k Keeper) EndBlocker(ctx context.Context) error {
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyEndBlocker)
if err := k.TallyProposalsAtVPEnd(ctx); err != nil {
return err
}

View File

@ -10,7 +10,8 @@ import (
// BeginBlocker mints new tokens for the previous block.
func (k Keeper) BeginBlocker(ctx context.Context) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyBeginBlocker)
// fetch stored minter & params
minter, err := k.Minter.Get(ctx)

View File

@ -14,6 +14,7 @@ import (
"cosmossdk.io/x/protocolpool/types"
"github.com/cosmos/cosmos-sdk/codec"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
)
@ -507,5 +508,8 @@ func (k Keeper) validateContinuousFund(ctx context.Context, msg types.MsgCreateC
}
func (k Keeper) BeginBlocker(ctx context.Context) error {
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyBeginBlocker)
return k.SetToDistribute(ctx)
}

View File

@ -13,7 +13,8 @@ import (
// BeginBlocker check for infraction evidence or downtime of validators
// on every begin block
func BeginBlocker(ctx context.Context, k keeper.Keeper, cometService comet.Service) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyBeginBlocker)
// Retrieve CometBFT info, then iterate through all validator votes
// from the last commit. For each vote, handle the validator's signature, potentially

View File

@ -19,7 +19,7 @@ require (
github.com/golang/mock v1.6.0
github.com/golang/protobuf v1.5.4
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/go-metrics v0.5.3
github.com/hashicorp/go-metrics v0.5.3 // indirect
github.com/spf13/cobra v1.8.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.9.0

View File

@ -13,5 +13,6 @@ import (
func (k *Keeper) EndBlocker(ctx context.Context) ([]appmodule.ValidatorUpdate, error) {
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyEndBlocker)
return k.BlockValidatorUpdates(ctx)
}

View File

@ -9,7 +9,6 @@ import (
"strconv"
"time"
"github.com/hashicorp/go-metrics"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@ -21,7 +20,6 @@ import (
"github.com/cosmos/cosmos-sdk/crypto/keys/ed25519"
cryptotypes "github.com/cosmos/cosmos-sdk/crypto/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
)
@ -301,17 +299,6 @@ func (k msgServer) Delegate(ctx context.Context, msg *types.MsgDelegate) (*types
return nil, err
}
if msg.Amount.Amount.IsInt64() {
defer func() {
telemetry.IncrCounter(1, types.ModuleName, "delegate")
telemetry.SetGaugeWithLabels(
[]string{"tx", "msg", sdk.MsgTypeURL(msg)},
float32(msg.Amount.Amount.Int64()),
[]metrics.Label{telemetry.NewLabel("denom", msg.Amount.Denom)},
)
}()
}
if err := k.EventService.EventManager(ctx).EmitKV(
types.EventTypeDelegate,
event.NewAttribute(types.AttributeKeyValidator, msg.ValidatorAddress),
@ -374,17 +361,6 @@ func (k msgServer) BeginRedelegate(ctx context.Context, msg *types.MsgBeginRedel
return nil, err
}
if msg.Amount.Amount.IsInt64() {
defer func() {
telemetry.IncrCounter(1, types.ModuleName, "redelegate")
telemetry.SetGaugeWithLabels(
[]string{"tx", "msg", sdk.MsgTypeURL(msg)},
float32(msg.Amount.Amount.Int64()),
[]metrics.Label{telemetry.NewLabel("denom", msg.Amount.Denom)},
)
}()
}
if err := k.EventService.EventManager(ctx).EmitKV(
types.EventTypeRedelegate,
event.NewAttribute(types.AttributeKeySrcValidator, msg.ValidatorSrcAddress),
@ -444,17 +420,6 @@ func (k msgServer) Undelegate(ctx context.Context, msg *types.MsgUndelegate) (*t
undelegatedCoin := sdk.NewCoin(msg.Amount.Denom, undelegatedAmt)
if msg.Amount.Amount.IsInt64() {
defer func() {
telemetry.IncrCounter(1, types.ModuleName, "undelegate")
telemetry.SetGaugeWithLabels(
[]string{"tx", "msg", sdk.MsgTypeURL(msg)},
float32(msg.Amount.Amount.Int64()),
[]metrics.Label{telemetry.NewLabel("denom", msg.Amount.Denom)},
)
}()
}
if err := k.EventService.EventManager(ctx).EmitKV(
types.EventTypeUnbond,
event.NewAttribute(types.AttributeKeyValidator, msg.ValidatorAddress),

View File

@ -19,7 +19,8 @@ import (
// a migration to be executed if needed upon this switch (migration defined in the new binary)
// skipUpgradeHeightArray is a set of block heights for which the upgrade must be skipped
func (k Keeper) PreBlocker(ctx context.Context) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)
start := telemetry.Now()
defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyBeginBlocker)
blockHeight := k.HeaderService.HeaderInfo(ctx).Height
plan, err := k.GetUpgradePlan(ctx)