Improve validator monitor experience for high validator counts (#3728)
## Issue Addressed NA ## Proposed Changes Myself and others (#3678) have observed that when running with lots of validators (e.g., 1000s) the cardinality is too much for Prometheus. I've seen Prometheus instances just grind to a halt when we turn the validator monitor on for our testnet validators (we have 10,000s of Goerli validators). Additionally, the debug log volume can get very high with one log per validator, per attestation. To address this, the `bn --validator-monitor-individual-tracking-threshold <INTEGER>` flag has been added to *disable* per-validator (i.e., non-aggregated) metrics/logging once the validator monitor exceeds the threshold of validators. The default value is `64`, which is a finger-to-the-wind value. I don't actually know the value at which Prometheus starts to become overwhelmed, but I've seen it work with ~64 validators and I've seen it *not* work with 1000s of validators. A default of `64` seems like it will result in a breaking change to users who are running millions of dollars worth of validators whilst resulting in a no-op for low-validator-count users. I'm open to changing this number, though. Additionally, this PR starts collecting aggregated Prometheus metrics (e.g., total count of head hits across all validators), so that high-validator-count validators still have some interesting metrics. We already had logging for aggregated values, so nothing has been added there. I've opted to make this a breaking change since it can be rather damaging to your Prometheus instance to accidentally enable the validator monitor with large numbers of validators. I've crashed a Prometheus instance myself and had a report from another user who's done the same thing. ## Additional Info NA ## Breaking Changes Note A new label has been added to the validator monitor Prometheus metrics: `total`. This label tracks the aggregated metrics of all validators in the validator monitor (as opposed to each validator being tracking individually using its pubkey as the label). Additionally, a new flag has been added to the Beacon Node: `--validator-monitor-individual-tracking-threshold`. The default value is `64`, which means that when the validator monitor is tracking more than 64 validators then it will stop tracking per-validator metrics and only track the `all_validators` metric. It will also stop logging per-validator logs and only emit aggregated logs (the exception being that exit and slashing logs are always emitted). These changes were introduced in #3728 to address issues with untenable Prometheus cardinality and log volume when using the validator monitor with high validator counts (e.g., 1000s of validators). Users with less than 65 validators will see no change in behavior (apart from the added `all_validators` metric). Users with more than 65 validators who wish to maintain the previous behavior can set something like `--validator-monitor-individual-tracking-threshold 999999`.
This commit is contained in:
parent
168a7805c3
commit
830efdb5c2
@ -579,11 +579,13 @@ where
|
|||||||
mut self,
|
mut self,
|
||||||
auto_register: bool,
|
auto_register: bool,
|
||||||
validators: Vec<PublicKeyBytes>,
|
validators: Vec<PublicKeyBytes>,
|
||||||
|
individual_metrics_threshold: usize,
|
||||||
log: Logger,
|
log: Logger,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
self.validator_monitor = Some(ValidatorMonitor::new(
|
self.validator_monitor = Some(ValidatorMonitor::new(
|
||||||
validators,
|
validators,
|
||||||
auto_register,
|
auto_register,
|
||||||
|
individual_metrics_threshold,
|
||||||
log.clone(),
|
log.clone(),
|
||||||
));
|
));
|
||||||
self
|
self
|
||||||
@ -989,6 +991,7 @@ fn descriptive_db_error(item: &str, error: &StoreError) -> String {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::validator_monitor::DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD;
|
||||||
use eth2_hashing::hash;
|
use eth2_hashing::hash;
|
||||||
use genesis::{
|
use genesis::{
|
||||||
generate_deterministic_keypairs, interop_genesis_state, DEFAULT_ETH1_BLOCK_HASH,
|
generate_deterministic_keypairs, interop_genesis_state, DEFAULT_ETH1_BLOCK_HASH,
|
||||||
@ -1045,7 +1048,12 @@ mod test {
|
|||||||
.testing_slot_clock(Duration::from_secs(1))
|
.testing_slot_clock(Duration::from_secs(1))
|
||||||
.expect("should configure testing slot clock")
|
.expect("should configure testing slot clock")
|
||||||
.shutdown_sender(shutdown_tx)
|
.shutdown_sender(shutdown_tx)
|
||||||
.monitor_validators(true, vec![], log.clone())
|
.monitor_validators(
|
||||||
|
true,
|
||||||
|
vec![],
|
||||||
|
DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD,
|
||||||
|
log.clone(),
|
||||||
|
)
|
||||||
.build()
|
.build()
|
||||||
.expect("should build");
|
.expect("should build");
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ pub use crate::persisted_beacon_chain::PersistedBeaconChain;
|
|||||||
pub use crate::{
|
pub use crate::{
|
||||||
beacon_chain::{BEACON_CHAIN_DB_KEY, ETH1_CACHE_DB_KEY, FORK_CHOICE_DB_KEY, OP_POOL_DB_KEY},
|
beacon_chain::{BEACON_CHAIN_DB_KEY, ETH1_CACHE_DB_KEY, FORK_CHOICE_DB_KEY, OP_POOL_DB_KEY},
|
||||||
migrate::MigratorConfig,
|
migrate::MigratorConfig,
|
||||||
|
validator_monitor::DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD,
|
||||||
BeaconChainError, NotifyExecutionLayer, ProduceBlockVerification,
|
BeaconChainError, NotifyExecutionLayer, ProduceBlockVerification,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -472,7 +473,7 @@ where
|
|||||||
log.clone(),
|
log.clone(),
|
||||||
5,
|
5,
|
||||||
)))
|
)))
|
||||||
.monitor_validators(true, vec![], log);
|
.monitor_validators(true, vec![], DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD, log);
|
||||||
|
|
||||||
builder = if let Some(mutator) = self.initial_mutator {
|
builder = if let Some(mutator) = self.initial_mutator {
|
||||||
mutator(builder)
|
mutator(builder)
|
||||||
|
@ -21,10 +21,21 @@ use types::{
|
|||||||
SignedContributionAndProof, Slot, SyncCommitteeMessage, VoluntaryExit,
|
SignedContributionAndProof, Slot, SyncCommitteeMessage, VoluntaryExit,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Used for Prometheus labels.
|
||||||
|
///
|
||||||
|
/// We've used `total` for this value to align with Nimbus, as per:
|
||||||
|
/// https://github.com/sigp/lighthouse/pull/3728#issuecomment-1375173063
|
||||||
|
const TOTAL_LABEL: &str = "total";
|
||||||
|
|
||||||
/// The validator monitor collects per-epoch data about each monitored validator. Historical data
|
/// The validator monitor collects per-epoch data about each monitored validator. Historical data
|
||||||
/// will be kept around for `HISTORIC_EPOCHS` before it is pruned.
|
/// will be kept around for `HISTORIC_EPOCHS` before it is pruned.
|
||||||
pub const HISTORIC_EPOCHS: usize = 4;
|
pub const HISTORIC_EPOCHS: usize = 4;
|
||||||
|
|
||||||
|
/// Once the validator monitor reaches this number of validators it will stop
|
||||||
|
/// tracking their metrics/logging individually in an effort to reduce
|
||||||
|
/// Prometheus cardinality and log volume.
|
||||||
|
pub const DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD: usize = 64;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
InvalidPubkey(String),
|
InvalidPubkey(String),
|
||||||
@ -258,16 +269,27 @@ pub struct ValidatorMonitor<T> {
|
|||||||
indices: HashMap<u64, PublicKeyBytes>,
|
indices: HashMap<u64, PublicKeyBytes>,
|
||||||
/// If true, allow the automatic registration of validators.
|
/// If true, allow the automatic registration of validators.
|
||||||
auto_register: bool,
|
auto_register: bool,
|
||||||
|
/// Once the number of monitored validators goes above this threshold, we
|
||||||
|
/// will stop tracking metrics/logs on a per-validator basis. This prevents
|
||||||
|
/// large validator counts causing infeasibly high cardinailty for
|
||||||
|
/// Prometheus and high log volumes.
|
||||||
|
individual_tracking_threshold: usize,
|
||||||
log: Logger,
|
log: Logger,
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: EthSpec> ValidatorMonitor<T> {
|
impl<T: EthSpec> ValidatorMonitor<T> {
|
||||||
pub fn new(pubkeys: Vec<PublicKeyBytes>, auto_register: bool, log: Logger) -> Self {
|
pub fn new(
|
||||||
|
pubkeys: Vec<PublicKeyBytes>,
|
||||||
|
auto_register: bool,
|
||||||
|
individual_tracking_threshold: usize,
|
||||||
|
log: Logger,
|
||||||
|
) -> Self {
|
||||||
let mut s = Self {
|
let mut s = Self {
|
||||||
validators: <_>::default(),
|
validators: <_>::default(),
|
||||||
indices: <_>::default(),
|
indices: <_>::default(),
|
||||||
auto_register,
|
auto_register,
|
||||||
|
individual_tracking_threshold,
|
||||||
log,
|
log,
|
||||||
_phantom: PhantomData,
|
_phantom: PhantomData,
|
||||||
};
|
};
|
||||||
@ -277,6 +299,13 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
s
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns `true` when the validator count is sufficiently low enough to
|
||||||
|
/// emit metrics and logs on a per-validator basis (rather than just an
|
||||||
|
/// aggregated basis).
|
||||||
|
fn individual_tracking(&self) -> bool {
|
||||||
|
self.validators.len() <= self.individual_tracking_threshold
|
||||||
|
}
|
||||||
|
|
||||||
/// Add some validators to `self` for additional monitoring.
|
/// Add some validators to `self` for additional monitoring.
|
||||||
fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) {
|
fn add_validator_pubkey(&mut self, pubkey: PublicKeyBytes) {
|
||||||
let index_opt = self
|
let index_opt = self
|
||||||
@ -317,6 +346,12 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
for monitored_validator in self.validators.values() {
|
for monitored_validator in self.validators.values() {
|
||||||
if let Some(i) = monitored_validator.index {
|
if let Some(i) = monitored_validator.index {
|
||||||
monitored_validator.touch_epoch_summary(current_epoch);
|
monitored_validator.touch_epoch_summary(current_epoch);
|
||||||
|
|
||||||
|
// Only log the per-validator metrics if it's enabled.
|
||||||
|
if !self.individual_tracking() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
let i = i as usize;
|
let i = i as usize;
|
||||||
let id = &monitored_validator.id;
|
let id = &monitored_validator.id;
|
||||||
|
|
||||||
@ -379,6 +414,24 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Run `func` with the `TOTAL_LABEL` and optionally the
|
||||||
|
/// `individual_id`.
|
||||||
|
///
|
||||||
|
/// This function is used for registering metrics that can be applied to
|
||||||
|
/// both all validators and an indivdual validator. For example, the count
|
||||||
|
/// of missed head votes can be aggregated across all validators in a single
|
||||||
|
/// metric and also tracked on a per-validator basis.
|
||||||
|
///
|
||||||
|
/// We allow disabling tracking metrics on an individual validator basis
|
||||||
|
/// since it can result in untenable cardinality with high validator counts.
|
||||||
|
fn aggregatable_metric<F: Fn(&str)>(&self, individual_id: &str, func: F) {
|
||||||
|
func(TOTAL_LABEL);
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
|
func(individual_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn process_validator_statuses(
|
pub fn process_validator_statuses(
|
||||||
&self,
|
&self,
|
||||||
epoch: Epoch,
|
epoch: Epoch,
|
||||||
@ -431,11 +484,14 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
// For Base states, this will be *any* attestation whatsoever. For Altair states,
|
// For Base states, this will be *any* attestation whatsoever. For Altair states,
|
||||||
// this will be any attestation that matched a "timely" flag.
|
// this will be any attestation that matched a "timely" flag.
|
||||||
if previous_epoch_matched_any {
|
if previous_epoch_matched_any {
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_ATTESTER_HIT,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_ATTESTER_HIT,
|
||||||
&[id],
|
&[label],
|
||||||
);
|
)
|
||||||
|
});
|
||||||
attestation_success.push(id);
|
attestation_success.push(id);
|
||||||
|
if self.individual_tracking() {
|
||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Previous epoch attestation success";
|
"Previous epoch attestation success";
|
||||||
@ -445,12 +501,16 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"epoch" => prev_epoch,
|
"epoch" => prev_epoch,
|
||||||
"validator" => id,
|
"validator" => id,
|
||||||
)
|
)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_ATTESTER_MISS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_ATTESTER_MISS,
|
||||||
&[id],
|
&[label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
attestation_miss.push(id);
|
attestation_miss.push(id);
|
||||||
|
if self.individual_tracking() {
|
||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Previous epoch attestation missing";
|
"Previous epoch attestation missing";
|
||||||
@ -458,19 +518,25 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"validator" => id,
|
"validator" => id,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Indicates if any on-chain attestation hit the head.
|
// Indicates if any on-chain attestation hit the head.
|
||||||
if previous_epoch_matched_head {
|
if previous_epoch_matched_head {
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_HEAD_ATTESTER_HIT,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_HEAD_ATTESTER_HIT,
|
||||||
&[id],
|
&[label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_HEAD_ATTESTER_MISS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_HEAD_ATTESTER_MISS,
|
||||||
&[id],
|
&[label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
head_miss.push(id);
|
head_miss.push(id);
|
||||||
|
if self.individual_tracking() {
|
||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Attestation failed to match head";
|
"Attestation failed to match head";
|
||||||
@ -478,19 +544,25 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"validator" => id,
|
"validator" => id,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Indicates if any on-chain attestation hit the target.
|
// Indicates if any on-chain attestation hit the target.
|
||||||
if previous_epoch_matched_target {
|
if previous_epoch_matched_target {
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_TARGET_ATTESTER_HIT,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_TARGET_ATTESTER_HIT,
|
||||||
&[id],
|
&[label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_TARGET_ATTESTER_MISS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_TARGET_ATTESTER_MISS,
|
||||||
&[id],
|
&[label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
target_miss.push(id);
|
target_miss.push(id);
|
||||||
|
if self.individual_tracking() {
|
||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Attestation failed to match target";
|
"Attestation failed to match target";
|
||||||
@ -498,6 +570,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"validator" => id,
|
"validator" => id,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get the minimum value among the validator monitor observed inclusion distance
|
// Get the minimum value among the validator monitor observed inclusion distance
|
||||||
// and the epoch summary inclusion distance.
|
// and the epoch summary inclusion distance.
|
||||||
@ -511,6 +584,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(inclusion_delay) = min_inclusion_distance {
|
if let Some(inclusion_delay) = min_inclusion_distance {
|
||||||
if inclusion_delay > spec.min_attestation_inclusion_delay {
|
if inclusion_delay > spec.min_attestation_inclusion_delay {
|
||||||
suboptimal_inclusion.push(id);
|
suboptimal_inclusion.push(id);
|
||||||
|
if self.individual_tracking() {
|
||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Potential sub-optimal inclusion delay";
|
"Potential sub-optimal inclusion delay";
|
||||||
@ -520,13 +594,16 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"validator" => id,
|
"validator" => id,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_int_gauge(
|
metrics::set_int_gauge(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_INCLUSION_DISTANCE,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ON_CHAIN_INCLUSION_DISTANCE,
|
||||||
&[id],
|
&[id],
|
||||||
inclusion_delay as i64,
|
inclusion_delay as i64,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Indicates the number of sync committee signatures that made it into
|
// Indicates the number of sync committee signatures that made it into
|
||||||
// a sync aggregate in the current_epoch (state.epoch - 1).
|
// a sync aggregate in the current_epoch (state.epoch - 1).
|
||||||
@ -536,13 +613,19 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
let current_epoch = epoch - 1;
|
let current_epoch = epoch - 1;
|
||||||
if let Some(sync_committee) = summary.sync_committee() {
|
if let Some(sync_committee) = summary.sync_committee() {
|
||||||
if sync_committee.contains(pubkey) {
|
if sync_committee.contains(pubkey) {
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_int_gauge(
|
metrics::set_int_gauge(
|
||||||
&metrics::VALIDATOR_MONITOR_VALIDATOR_IN_CURRENT_SYNC_COMMITTEE,
|
&metrics::VALIDATOR_MONITOR_VALIDATOR_IN_CURRENT_SYNC_COMMITTEE,
|
||||||
&[id],
|
&[id],
|
||||||
1,
|
1,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
let epoch_summary = monitored_validator.summaries.read();
|
let epoch_summary = monitored_validator.summaries.read();
|
||||||
if let Some(summary) = epoch_summary.get(¤t_epoch) {
|
if let Some(summary) = epoch_summary.get(¤t_epoch) {
|
||||||
|
// This log is not gated by
|
||||||
|
// `self.individual_tracking()` since the number of
|
||||||
|
// logs that can be generated is capped by the size
|
||||||
|
// of the sync committee.
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Current epoch sync signatures";
|
"Current epoch sync signatures";
|
||||||
@ -552,7 +635,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"validator" => id,
|
"validator" => id,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else if self.individual_tracking() {
|
||||||
metrics::set_int_gauge(
|
metrics::set_int_gauge(
|
||||||
&metrics::VALIDATOR_MONITOR_VALIDATOR_IN_CURRENT_SYNC_COMMITTEE,
|
&metrics::VALIDATOR_MONITOR_VALIDATOR_IN_CURRENT_SYNC_COMMITTEE,
|
||||||
&[id],
|
&[id],
|
||||||
@ -693,12 +776,17 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
let delay = get_block_delay_ms(seen_timestamp, block, slot_clock);
|
let delay = get_block_delay_ms(seen_timestamp, block, slot_clock);
|
||||||
|
|
||||||
metrics::inc_counter_vec(&metrics::VALIDATOR_MONITOR_BEACON_BLOCK_TOTAL, &[src, id]);
|
self.aggregatable_metric(id, |label| {
|
||||||
|
metrics::inc_counter_vec(
|
||||||
|
&metrics::VALIDATOR_MONITOR_BEACON_BLOCK_TOTAL,
|
||||||
|
&[src, label],
|
||||||
|
);
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_BEACON_BLOCK_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_BEACON_BLOCK_DELAY_SECONDS,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
@ -764,16 +852,19 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.get_validator(*i) {
|
if let Some(validator) = self.get_validator(*i) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_UNAGGREGATED_ATTESTATION_TOTAL,
|
&metrics::VALIDATOR_MONITOR_UNAGGREGATED_ATTESTATION_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_UNAGGREGATED_ATTESTATION_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_UNAGGREGATED_ATTESTATION_DELAY_SECONDS,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Unaggregated attestation";
|
"Unaggregated attestation";
|
||||||
@ -785,6 +876,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"src" => src,
|
"src" => src,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_unaggregated_attestation(delay)
|
summary.register_unaggregated_attestation(delay)
|
||||||
@ -848,16 +940,19 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.get_validator(aggregator_index) {
|
if let Some(validator) = self.get_validator(aggregator_index) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_AGGREGATED_ATTESTATION_TOTAL,
|
&metrics::VALIDATOR_MONITOR_AGGREGATED_ATTESTATION_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_AGGREGATED_ATTESTATION_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_AGGREGATED_ATTESTATION_DELAY_SECONDS,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Aggregated attestation";
|
"Aggregated attestation";
|
||||||
@ -869,6 +964,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"src" => src,
|
"src" => src,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_aggregated_attestation(delay)
|
summary.register_aggregated_attestation(delay)
|
||||||
@ -879,16 +975,19 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.get_validator(*i) {
|
if let Some(validator) = self.get_validator(*i) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_AGGREGATE_TOTAL,
|
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_AGGREGATE_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_AGGREGATE_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_AGGREGATE_DELAY_SECONDS,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Attestation included in aggregate";
|
"Attestation included in aggregate";
|
||||||
@ -900,6 +999,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"src" => src,
|
"src" => src,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_aggregate_attestation_inclusion()
|
summary.register_aggregate_attestation_inclusion()
|
||||||
@ -933,10 +1033,14 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.get_validator(*i) {
|
if let Some(validator) = self.get_validator(*i) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_BLOCK_TOTAL,
|
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_BLOCK_TOTAL,
|
||||||
&["block", id],
|
&["block", label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_int_gauge(
|
metrics::set_int_gauge(
|
||||||
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_BLOCK_DELAY_SLOTS,
|
&metrics::VALIDATOR_MONITOR_ATTESTATION_IN_BLOCK_DELAY_SLOTS,
|
||||||
&["block", id],
|
&["block", id],
|
||||||
@ -953,6 +1057,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"slot" => %data.slot,
|
"slot" => %data.slot,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_attestation_block_inclusion(inclusion_distance)
|
summary.register_attestation_block_inclusion(inclusion_distance)
|
||||||
@ -1010,16 +1115,19 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
slot_clock,
|
slot_clock,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGES_TOTAL,
|
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGES_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGES_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGES_DELAY_SECONDS,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Sync committee message";
|
"Sync committee message";
|
||||||
@ -1030,6 +1138,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"src" => src,
|
"src" => src,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_sync_committee_message(delay)
|
summary.register_sync_committee_message(delay)
|
||||||
@ -1094,16 +1203,19 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.get_validator(aggregator_index) {
|
if let Some(validator) = self.get_validator(aggregator_index) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_SYNC_CONTRIBUTIONS_TOTAL,
|
&metrics::VALIDATOR_MONITOR_SYNC_CONTRIBUTIONS_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_SYNC_CONTRIBUTIONS_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_SYNC_CONTRIBUTIONS_DELAY_SECONDS,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Sync contribution";
|
"Sync contribution";
|
||||||
@ -1114,6 +1226,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"src" => src,
|
"src" => src,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_sync_committee_contribution(delay)
|
summary.register_sync_committee_contribution(delay)
|
||||||
@ -1124,11 +1237,14 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.validators.get(validator_pubkey) {
|
if let Some(validator) = self.validators.get(validator_pubkey) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGE_IN_CONTRIBUTION_TOTAL,
|
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGE_IN_CONTRIBUTION_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Sync signature included in contribution";
|
"Sync signature included in contribution";
|
||||||
@ -1139,6 +1255,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"src" => src,
|
"src" => src,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_sync_signature_contribution_inclusion()
|
summary.register_sync_signature_contribution_inclusion()
|
||||||
@ -1160,11 +1277,14 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.validators.get(validator_pubkey) {
|
if let Some(validator) = self.validators.get(validator_pubkey) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGE_IN_BLOCK_TOTAL,
|
&metrics::VALIDATOR_MONITOR_SYNC_COMMITTEE_MESSAGE_IN_BLOCK_TOTAL,
|
||||||
&["block", id],
|
&["block", label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
if self.individual_tracking() {
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Sync signature included in block";
|
"Sync signature included in block";
|
||||||
@ -1173,6 +1293,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
"slot" => %slot,
|
"slot" => %slot,
|
||||||
"validator" => %id,
|
"validator" => %id,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
validator.with_epoch_summary(epoch, |summary| {
|
validator.with_epoch_summary(epoch, |summary| {
|
||||||
summary.register_sync_signature_block_inclusions();
|
summary.register_sync_signature_block_inclusions();
|
||||||
@ -1201,8 +1322,12 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
let epoch = exit.epoch;
|
let epoch = exit.epoch;
|
||||||
|
|
||||||
metrics::inc_counter_vec(&metrics::VALIDATOR_MONITOR_EXIT_TOTAL, &[src, id]);
|
self.aggregatable_metric(id, |label| {
|
||||||
|
metrics::inc_counter_vec(&metrics::VALIDATOR_MONITOR_EXIT_TOTAL, &[src, label]);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Not gated behind `self.individual_tracking()` since it's an
|
||||||
|
// infrequent and interesting message.
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"Voluntary exit";
|
"Voluntary exit";
|
||||||
@ -1240,11 +1365,15 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
if let Some(validator) = self.get_validator(proposer) {
|
if let Some(validator) = self.get_validator(proposer) {
|
||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PROPOSER_SLASHING_TOTAL,
|
&metrics::VALIDATOR_MONITOR_PROPOSER_SLASHING_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Not gated behind `self.individual_tracking()` since it's an
|
||||||
|
// infrequent and interesting message.
|
||||||
crit!(
|
crit!(
|
||||||
self.log,
|
self.log,
|
||||||
"Proposer slashing";
|
"Proposer slashing";
|
||||||
@ -1293,11 +1422,15 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
let id = &validator.id;
|
let id = &validator.id;
|
||||||
let epoch = data.slot.epoch(T::slots_per_epoch());
|
let epoch = data.slot.epoch(T::slots_per_epoch());
|
||||||
|
|
||||||
|
self.aggregatable_metric(id, |label| {
|
||||||
metrics::inc_counter_vec(
|
metrics::inc_counter_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_ATTESTER_SLASHING_TOTAL,
|
&metrics::VALIDATOR_MONITOR_ATTESTER_SLASHING_TOTAL,
|
||||||
&[src, id],
|
&[src, label],
|
||||||
);
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Not gated behind `self.individual_tracking()` since it's an
|
||||||
|
// infrequent and interesting message.
|
||||||
crit!(
|
crit!(
|
||||||
self.log,
|
self.log,
|
||||||
"Attester slashing";
|
"Attester slashing";
|
||||||
@ -1347,18 +1480,21 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
/*
|
/*
|
||||||
* Attestations
|
* Attestations
|
||||||
*/
|
*/
|
||||||
|
if let Some(delay) = summary.attestation_min_delay {
|
||||||
|
self.aggregatable_metric(id, |tag| {
|
||||||
|
metrics::observe_timer_vec(
|
||||||
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATIONS_MIN_DELAY_SECONDS,
|
||||||
|
&[tag],
|
||||||
|
delay,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATIONS_TOTAL,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATIONS_TOTAL,
|
||||||
&[id],
|
&[id],
|
||||||
summary.attestations as i64,
|
summary.attestations as i64,
|
||||||
);
|
);
|
||||||
if let Some(delay) = summary.attestation_min_delay {
|
|
||||||
metrics::observe_timer_vec(
|
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATIONS_MIN_DELAY_SECONDS,
|
|
||||||
&[id],
|
|
||||||
delay,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATION_AGGREGATE_INCLUSIONS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATION_AGGREGATE_INCLUSIONS,
|
||||||
&[id],
|
&[id],
|
||||||
@ -1369,6 +1505,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
&[id],
|
&[id],
|
||||||
summary.attestation_block_inclusions as i64,
|
summary.attestation_block_inclusions as i64,
|
||||||
);
|
);
|
||||||
|
|
||||||
if let Some(distance) = summary.attestation_min_block_inclusion_distance {
|
if let Some(distance) = summary.attestation_min_block_inclusion_distance {
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATION_BLOCK_MIN_INCLUSION_DISTANCE,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_ATTESTATION_BLOCK_MIN_INCLUSION_DISTANCE,
|
||||||
@ -1376,21 +1513,25 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
distance.as_u64() as i64,
|
distance.as_u64() as i64,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Sync committee messages
|
* Sync committee messages
|
||||||
*/
|
*/
|
||||||
|
if let Some(delay) = summary.sync_committee_message_min_delay {
|
||||||
|
self.aggregatable_metric(id, |tag| {
|
||||||
|
metrics::observe_timer_vec(
|
||||||
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_COMMITTEE_MESSAGES_MIN_DELAY_SECONDS,
|
||||||
|
&[tag],
|
||||||
|
delay,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_COMMITTEE_MESSAGES_TOTAL,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_COMMITTEE_MESSAGES_TOTAL,
|
||||||
&[id],
|
&[id],
|
||||||
summary.sync_committee_messages as i64,
|
summary.sync_committee_messages as i64,
|
||||||
);
|
);
|
||||||
if let Some(delay) = summary.sync_committee_message_min_delay {
|
|
||||||
metrics::observe_timer_vec(
|
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_COMMITTEE_MESSAGES_MIN_DELAY_SECONDS,
|
|
||||||
&[id],
|
|
||||||
delay,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_CONTRIBUTION_INCLUSIONS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_CONTRIBUTION_INCLUSIONS,
|
||||||
&[id],
|
&[id],
|
||||||
@ -1401,15 +1542,18 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
&[id],
|
&[id],
|
||||||
summary.sync_signature_block_inclusions as i64,
|
summary.sync_signature_block_inclusions as i64,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sync contributions
|
* Sync contributions
|
||||||
*/
|
*/
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_CONTRIBUTIONS_TOTAL,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_CONTRIBUTIONS_TOTAL,
|
||||||
&[id],
|
&[id],
|
||||||
summary.sync_contributions as i64,
|
summary.sync_contributions as i64,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
if let Some(delay) = summary.sync_contribution_min_delay {
|
if let Some(delay) = summary.sync_contribution_min_delay {
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_CONTRIBUTION_MIN_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_SYNC_CONTRIBUTION_MIN_DELAY_SECONDS,
|
||||||
@ -1421,36 +1565,45 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
/*
|
/*
|
||||||
* Blocks
|
* Blocks
|
||||||
*/
|
*/
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_BEACON_BLOCKS_TOTAL,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_BEACON_BLOCKS_TOTAL,
|
||||||
&[id],
|
&[id],
|
||||||
summary.blocks as i64,
|
summary.blocks as i64,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
if let Some(delay) = summary.block_min_delay {
|
if let Some(delay) = summary.block_min_delay {
|
||||||
|
self.aggregatable_metric(id, |tag| {
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_BEACON_BLOCKS_MIN_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_BEACON_BLOCKS_MIN_DELAY_SECONDS,
|
||||||
&[id],
|
&[tag],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* Aggregates
|
* Aggregates
|
||||||
*/
|
*/
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_AGGREGATES_TOTAL,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_AGGREGATES_TOTAL,
|
||||||
&[id],
|
&[id],
|
||||||
summary.aggregates as i64,
|
summary.aggregates as i64,
|
||||||
);
|
);
|
||||||
|
}
|
||||||
if let Some(delay) = summary.aggregate_min_delay {
|
if let Some(delay) = summary.aggregate_min_delay {
|
||||||
|
self.aggregatable_metric(id, |tag| {
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_AGGREGATES_MIN_DELAY_SECONDS,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_AGGREGATES_MIN_DELAY_SECONDS,
|
||||||
&[id],
|
&[tag],
|
||||||
delay,
|
delay,
|
||||||
);
|
);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* Other
|
* Other
|
||||||
*/
|
*/
|
||||||
|
if self.individual_tracking() {
|
||||||
metrics::set_gauge_vec(
|
metrics::set_gauge_vec(
|
||||||
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_EXITS_TOTAL,
|
&metrics::VALIDATOR_MONITOR_PREV_EPOCH_EXITS_TOTAL,
|
||||||
&[id],
|
&[id],
|
||||||
@ -1471,6 +1624,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the duration since the unix epoch.
|
/// Returns the duration since the unix epoch.
|
||||||
pub fn timestamp_now() -> Duration {
|
pub fn timestamp_now() -> Duration {
|
||||||
|
@ -5,6 +5,7 @@ use beacon_chain::builder::BeaconChainBuilder;
|
|||||||
use beacon_chain::test_utils::{
|
use beacon_chain::test_utils::{
|
||||||
test_spec, AttestationStrategy, BeaconChainHarness, BlockStrategy, DiskHarnessType,
|
test_spec, AttestationStrategy, BeaconChainHarness, BlockStrategy, DiskHarnessType,
|
||||||
};
|
};
|
||||||
|
use beacon_chain::validator_monitor::DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD;
|
||||||
use beacon_chain::{
|
use beacon_chain::{
|
||||||
historical_blocks::HistoricalBlockError, migrate::MigratorConfig, BeaconChain,
|
historical_blocks::HistoricalBlockError, migrate::MigratorConfig, BeaconChain,
|
||||||
BeaconChainError, BeaconChainTypes, BeaconSnapshot, ChainConfig, NotifyExecutionLayer,
|
BeaconChainError, BeaconChainTypes, BeaconSnapshot, ChainConfig, NotifyExecutionLayer,
|
||||||
@ -2121,7 +2122,7 @@ async fn weak_subjectivity_sync() {
|
|||||||
log.clone(),
|
log.clone(),
|
||||||
1,
|
1,
|
||||||
)))
|
)))
|
||||||
.monitor_validators(true, vec![], log)
|
.monitor_validators(true, vec![], DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD, log)
|
||||||
.build()
|
.build()
|
||||||
.expect("should build"),
|
.expect("should build"),
|
||||||
);
|
);
|
||||||
|
@ -173,6 +173,7 @@ where
|
|||||||
.monitor_validators(
|
.monitor_validators(
|
||||||
config.validator_monitor_auto,
|
config.validator_monitor_auto,
|
||||||
config.validator_monitor_pubkeys.clone(),
|
config.validator_monitor_pubkeys.clone(),
|
||||||
|
config.validator_monitor_individual_tracking_threshold,
|
||||||
runtime_context
|
runtime_context
|
||||||
.service_context("val_mon".to_string())
|
.service_context("val_mon".to_string())
|
||||||
.log()
|
.log()
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use beacon_chain::validator_monitor::DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD;
|
||||||
use directory::DEFAULT_ROOT_DIR;
|
use directory::DEFAULT_ROOT_DIR;
|
||||||
use environment::LoggerConfig;
|
use environment::LoggerConfig;
|
||||||
use network::NetworkConfig;
|
use network::NetworkConfig;
|
||||||
@ -59,6 +60,11 @@ pub struct Config {
|
|||||||
pub validator_monitor_auto: bool,
|
pub validator_monitor_auto: bool,
|
||||||
/// A list of validator pubkeys to monitor.
|
/// A list of validator pubkeys to monitor.
|
||||||
pub validator_monitor_pubkeys: Vec<PublicKeyBytes>,
|
pub validator_monitor_pubkeys: Vec<PublicKeyBytes>,
|
||||||
|
/// Once the number of monitored validators goes above this threshold, we
|
||||||
|
/// will stop tracking metrics on a per-validator basis. This prevents large
|
||||||
|
/// validator counts causing infeasibly high cardinailty for Prometheus and
|
||||||
|
/// high log volumes.
|
||||||
|
pub validator_monitor_individual_tracking_threshold: usize,
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
/// The `genesis` field is not serialized or deserialized by `serde` to ensure it is defined
|
/// The `genesis` field is not serialized or deserialized by `serde` to ensure it is defined
|
||||||
/// via the CLI at runtime, instead of from a configuration file saved to disk.
|
/// via the CLI at runtime, instead of from a configuration file saved to disk.
|
||||||
@ -97,6 +103,7 @@ impl Default for Config {
|
|||||||
slasher: None,
|
slasher: None,
|
||||||
validator_monitor_auto: false,
|
validator_monitor_auto: false,
|
||||||
validator_monitor_pubkeys: vec![],
|
validator_monitor_pubkeys: vec![],
|
||||||
|
validator_monitor_individual_tracking_threshold: DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD,
|
||||||
logger_config: LoggerConfig::default(),
|
logger_config: LoggerConfig::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ use super::*;
|
|||||||
use beacon_chain::{
|
use beacon_chain::{
|
||||||
builder::{BeaconChainBuilder, Witness},
|
builder::{BeaconChainBuilder, Witness},
|
||||||
eth1_chain::CachingEth1Backend,
|
eth1_chain::CachingEth1Backend,
|
||||||
|
validator_monitor::DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD,
|
||||||
BeaconChain,
|
BeaconChain,
|
||||||
};
|
};
|
||||||
use futures::prelude::*;
|
use futures::prelude::*;
|
||||||
@ -75,7 +76,7 @@ impl TestBeaconChain {
|
|||||||
Duration::from_millis(SLOT_DURATION_MILLIS),
|
Duration::from_millis(SLOT_DURATION_MILLIS),
|
||||||
))
|
))
|
||||||
.shutdown_sender(shutdown_tx)
|
.shutdown_sender(shutdown_tx)
|
||||||
.monitor_validators(true, vec![], log)
|
.monitor_validators(true, vec![], DEFAULT_INDIVIDUAL_TRACKING_THRESHOLD, log)
|
||||||
.build()
|
.build()
|
||||||
.expect("should build"),
|
.expect("should build"),
|
||||||
);
|
);
|
||||||
|
@ -753,6 +753,17 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
|
|||||||
.value_name("PATH")
|
.value_name("PATH")
|
||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("validator-monitor-individual-tracking-threshold")
|
||||||
|
.long("validator-monitor-individual-tracking-threshold")
|
||||||
|
.help("Once the validator monitor reaches this number of local validators \
|
||||||
|
it will stop collecting per-validator Prometheus metrics and issuing \
|
||||||
|
per-validator logs. Instead, it will provide aggregate metrics and logs. \
|
||||||
|
This avoids infeasibly high cardinality in the Prometheus database and \
|
||||||
|
high log volume when using many validators. Defaults to 64.")
|
||||||
|
.value_name("INTEGER")
|
||||||
|
.takes_value(true)
|
||||||
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("disable-lock-timeouts")
|
Arg::with_name("disable-lock-timeouts")
|
||||||
.long("disable-lock-timeouts")
|
.long("disable-lock-timeouts")
|
||||||
|
@ -675,6 +675,12 @@ pub fn get_config<E: EthSpec>(
|
|||||||
.extend_from_slice(&pubkeys);
|
.extend_from_slice(&pubkeys);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(count) =
|
||||||
|
clap_utils::parse_optional(cli_args, "validator-monitor-individual-tracking-threshold")?
|
||||||
|
{
|
||||||
|
client_config.validator_monitor_individual_tracking_threshold = count;
|
||||||
|
}
|
||||||
|
|
||||||
if cli_args.is_present("disable-lock-timeouts") {
|
if cli_args.is_present("disable-lock-timeouts") {
|
||||||
client_config.chain.enable_lock_timeouts = false;
|
client_config.chain.enable_lock_timeouts = false;
|
||||||
}
|
}
|
||||||
|
@ -1237,6 +1237,31 @@ fn validator_monitor_file_flag() {
|
|||||||
assert_eq!(config.validator_monitor_pubkeys[1].to_string(), "0xbeefdeadbeefdeaddeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef");
|
assert_eq!(config.validator_monitor_pubkeys[1].to_string(), "0xbeefdeadbeefdeaddeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
#[test]
|
||||||
|
fn validator_monitor_metrics_threshold_default() {
|
||||||
|
CommandLineTest::new()
|
||||||
|
.run_with_zero_port()
|
||||||
|
.with_config(|config| {
|
||||||
|
assert_eq!(
|
||||||
|
config.validator_monitor_individual_tracking_threshold,
|
||||||
|
// If this value changes make sure to update the help text for
|
||||||
|
// the CLI command.
|
||||||
|
64
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn validator_monitor_metrics_threshold_custom() {
|
||||||
|
CommandLineTest::new()
|
||||||
|
.flag(
|
||||||
|
"validator-monitor-individual-tracking-threshold",
|
||||||
|
Some("42"),
|
||||||
|
)
|
||||||
|
.run_with_zero_port()
|
||||||
|
.with_config(|config| {
|
||||||
|
assert_eq!(config.validator_monitor_individual_tracking_threshold, 42)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Tests for Store flags.
|
// Tests for Store flags.
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
Reference in New Issue
Block a user