diff --git a/validator_client/src/cli.rs b/validator_client/src/cli.rs index c82a1a9d3..9142a0c7e 100644 --- a/validator_client/src/cli.rs +++ b/validator_client/src/cli.rs @@ -231,6 +231,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { address of this server (e.g., http://localhost:5064).") .takes_value(true), ) + .arg( + Arg::with_name("enable-high-validator-count-metrics") + .long("enable-high-validator-count-metrics") + .help("Enable per validator metrics for > 64 validators. \ + Note: This flag is automatically enabled for <= 64 validators. \ + Enabling this metric for higher validator counts will lead to higher volume \ + of prometheus metrics being collected.") + .takes_value(false), + ) /* * Explorer metrics */ diff --git a/validator_client/src/config.rs b/validator_client/src/config.rs index 22741dabb..0f24e81d5 100644 --- a/validator_client/src/config.rs +++ b/validator_client/src/config.rs @@ -53,6 +53,11 @@ pub struct Config { /// If true, enable functionality that monitors the network for attestations or proposals from /// any of the validators managed by this client before starting up. pub enable_doppelganger_protection: bool, + /// If true, then we publish validator specific metrics (e.g next attestation duty slot) + /// for all our managed validators. + /// Note: We publish validator specific metrics for low validator counts without this flag + /// (<= 64 validators) + pub enable_high_validator_count_metrics: bool, /// Enable use of the blinded block endpoints during proposals. pub builder_proposals: bool, /// Overrides the timestamp field in builder api ValidatorRegistrationV1 @@ -99,6 +104,7 @@ impl Default for Config { http_metrics: <_>::default(), monitoring_api: None, enable_doppelganger_protection: false, + enable_high_validator_count_metrics: false, beacon_nodes_tls_certs: None, block_delay: None, builder_proposals: false, @@ -273,6 +279,10 @@ impl Config { config.http_metrics.enabled = true; } + if cli_args.is_present("enable-high-validator-count-metrics") { + config.enable_high_validator_count_metrics = true; + } + if let Some(address) = cli_args.value_of("metrics-address") { config.http_metrics.listen_addr = address .parse::() diff --git a/validator_client/src/duties_service.rs b/validator_client/src/duties_service.rs index 86b8ca870..6ba2a2d1f 100644 --- a/validator_client/src/duties_service.rs +++ b/validator_client/src/duties_service.rs @@ -9,6 +9,7 @@ mod sync; use crate::beacon_node_fallback::{BeaconNodeFallback, OfflineOnFailure, RequireSynced}; +use crate::http_metrics::metrics::{get_int_gauge, set_int_gauge, ATTESTATION_DUTY}; use crate::{ block_service::BlockServiceNotification, http_metrics::metrics, @@ -39,6 +40,11 @@ const SUBSCRIPTION_BUFFER_SLOTS: u64 = 2; /// Only retain `HISTORICAL_DUTIES_EPOCHS` duties prior to the current epoch. const HISTORICAL_DUTIES_EPOCHS: u64 = 2; +/// Minimum number of validators for which we auto-enable per-validator metrics. +/// For validators greater than this value, we need to manually set the `enable-per-validator-metrics` +/// flag in the cli to enable collection of per validator metrics. +const VALIDATOR_METRICS_MIN_COUNT: usize = 64; + #[derive(Debug)] pub enum Error { UnableToReadSlotClock, @@ -121,6 +127,7 @@ pub struct DutiesService { /// This functionality is a little redundant since most BNs will likely reject duties when they /// aren't synced, but we keep it around for an emergency. pub require_synced: RequireSynced, + pub enable_high_validator_count_metrics: bool, pub context: RuntimeContext, pub spec: ChainSpec, } @@ -220,6 +227,12 @@ impl DutiesService { .cloned() .collect() } + + /// Returns `true` if we should collect per validator metrics and `false` otherwise. + pub fn per_validator_metrics(&self) -> bool { + self.enable_high_validator_count_metrics + || self.total_validator_count() <= VALIDATOR_METRICS_MIN_COUNT + } } /// Start the service that periodically polls the beacon node for validator duties. This will start @@ -501,6 +514,7 @@ async fn poll_beacon_attesters( current_epoch, &local_indices, &local_pubkeys, + current_slot, ) .await { @@ -520,9 +534,14 @@ async fn poll_beacon_attesters( ); // Download the duties and update the duties for the next epoch. - if let Err(e) = - poll_beacon_attesters_for_epoch(duties_service, next_epoch, &local_indices, &local_pubkeys) - .await + if let Err(e) = poll_beacon_attesters_for_epoch( + duties_service, + next_epoch, + &local_indices, + &local_pubkeys, + current_slot, + ) + .await { error!( log, @@ -619,6 +638,7 @@ async fn poll_beacon_attesters_for_epoch( epoch: Epoch, local_indices: &[u64], local_pubkeys: &HashSet, + current_slot: Slot, ) -> Result<(), Error> { let log = duties_service.context.log(); @@ -671,6 +691,35 @@ async fn poll_beacon_attesters_for_epoch( .data .into_iter() .filter(|duty| { + if duties_service.per_validator_metrics() { + let validator_index = duty.validator_index; + let duty_slot = duty.slot; + if let Some(existing_slot_gauge) = + get_int_gauge(&ATTESTATION_DUTY, &[&validator_index.to_string()]) + { + let existing_slot = Slot::new(existing_slot_gauge.get() as u64); + let existing_epoch = existing_slot.epoch(E::slots_per_epoch()); + + // First condition ensures that we switch to the next epoch duty slot + // once the current epoch duty slot passes. + // Second condition is to ensure that next epoch duties don't override + // current epoch duties. + if existing_slot < current_slot + || (duty_slot.epoch(E::slots_per_epoch()) <= existing_epoch + && duty_slot > current_slot + && duty_slot != existing_slot) + { + existing_slot_gauge.set(duty_slot.as_u64() as i64); + } + } else { + set_int_gauge( + &ATTESTATION_DUTY, + &[&validator_index.to_string()], + duty_slot.as_u64() as i64, + ); + } + } + local_pubkeys.contains(&duty.pubkey) && { // Only update the duties if either is true: // diff --git a/validator_client/src/http_metrics/metrics.rs b/validator_client/src/http_metrics/metrics.rs index 146d008a5..0cb3417fc 100644 --- a/validator_client/src/http_metrics/metrics.rs +++ b/validator_client/src/http_metrics/metrics.rs @@ -172,6 +172,12 @@ lazy_static::lazy_static! { "Duration to obtain a signature", &["type"] ); + + pub static ref ATTESTATION_DUTY: Result = try_create_int_gauge_vec( + "vc_attestation_duty_slot", + "Attestation duty slot for all managed validators", + &["validator"] + ); } pub fn gather_prometheus_metrics( diff --git a/validator_client/src/lib.rs b/validator_client/src/lib.rs index 00c3db7aa..f2d647490 100644 --- a/validator_client/src/lib.rs +++ b/validator_client/src/lib.rs @@ -422,6 +422,7 @@ impl ProductionValidatorClient { }, spec: context.eth2_config.spec.clone(), context: duties_context, + enable_high_validator_count_metrics: config.enable_high_validator_count_metrics, }); // Update the metrics server.