Add attestation duty slot metric (#2704)

## Issue Addressed

Resolves #2521 

## Proposed Changes

Add a metric that indicates the next attestation duty slot for all managed validators in the validator client.
This commit is contained in:
Pawan Dhananjay 2023-02-09 23:51:17 +00:00
parent aa5b7ef783
commit 2b735a9e8b
5 changed files with 78 additions and 3 deletions

View File

@ -231,6 +231,15 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
address of this server (e.g., http://localhost:5064).")
.takes_value(true),
)
.arg(
Arg::with_name("enable-high-validator-count-metrics")
.long("enable-high-validator-count-metrics")
.help("Enable per validator metrics for > 64 validators. \
Note: This flag is automatically enabled for <= 64 validators. \
Enabling this metric for higher validator counts will lead to higher volume \
of prometheus metrics being collected.")
.takes_value(false),
)
/*
* Explorer metrics
*/

View File

@ -53,6 +53,11 @@ pub struct Config {
/// If true, enable functionality that monitors the network for attestations or proposals from
/// any of the validators managed by this client before starting up.
pub enable_doppelganger_protection: bool,
/// If true, then we publish validator specific metrics (e.g next attestation duty slot)
/// for all our managed validators.
/// Note: We publish validator specific metrics for low validator counts without this flag
/// (<= 64 validators)
pub enable_high_validator_count_metrics: bool,
/// Enable use of the blinded block endpoints during proposals.
pub builder_proposals: bool,
/// Overrides the timestamp field in builder api ValidatorRegistrationV1
@ -99,6 +104,7 @@ impl Default for Config {
http_metrics: <_>::default(),
monitoring_api: None,
enable_doppelganger_protection: false,
enable_high_validator_count_metrics: false,
beacon_nodes_tls_certs: None,
block_delay: None,
builder_proposals: false,
@ -273,6 +279,10 @@ impl Config {
config.http_metrics.enabled = true;
}
if cli_args.is_present("enable-high-validator-count-metrics") {
config.enable_high_validator_count_metrics = true;
}
if let Some(address) = cli_args.value_of("metrics-address") {
config.http_metrics.listen_addr = address
.parse::<IpAddr>()

View File

@ -9,6 +9,7 @@
mod sync;
use crate::beacon_node_fallback::{BeaconNodeFallback, OfflineOnFailure, RequireSynced};
use crate::http_metrics::metrics::{get_int_gauge, set_int_gauge, ATTESTATION_DUTY};
use crate::{
block_service::BlockServiceNotification,
http_metrics::metrics,
@ -39,6 +40,11 @@ const SUBSCRIPTION_BUFFER_SLOTS: u64 = 2;
/// Only retain `HISTORICAL_DUTIES_EPOCHS` duties prior to the current epoch.
const HISTORICAL_DUTIES_EPOCHS: u64 = 2;
/// Minimum number of validators for which we auto-enable per-validator metrics.
/// For validators greater than this value, we need to manually set the `enable-per-validator-metrics`
/// flag in the cli to enable collection of per validator metrics.
const VALIDATOR_METRICS_MIN_COUNT: usize = 64;
#[derive(Debug)]
pub enum Error {
UnableToReadSlotClock,
@ -121,6 +127,7 @@ pub struct DutiesService<T, E: EthSpec> {
/// This functionality is a little redundant since most BNs will likely reject duties when they
/// aren't synced, but we keep it around for an emergency.
pub require_synced: RequireSynced,
pub enable_high_validator_count_metrics: bool,
pub context: RuntimeContext<E>,
pub spec: ChainSpec,
}
@ -220,6 +227,12 @@ impl<T: SlotClock + 'static, E: EthSpec> DutiesService<T, E> {
.cloned()
.collect()
}
/// Returns `true` if we should collect per validator metrics and `false` otherwise.
pub fn per_validator_metrics(&self) -> bool {
self.enable_high_validator_count_metrics
|| self.total_validator_count() <= VALIDATOR_METRICS_MIN_COUNT
}
}
/// Start the service that periodically polls the beacon node for validator duties. This will start
@ -501,6 +514,7 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
current_epoch,
&local_indices,
&local_pubkeys,
current_slot,
)
.await
{
@ -520,9 +534,14 @@ async fn poll_beacon_attesters<T: SlotClock + 'static, E: EthSpec>(
);
// Download the duties and update the duties for the next epoch.
if let Err(e) =
poll_beacon_attesters_for_epoch(duties_service, next_epoch, &local_indices, &local_pubkeys)
.await
if let Err(e) = poll_beacon_attesters_for_epoch(
duties_service,
next_epoch,
&local_indices,
&local_pubkeys,
current_slot,
)
.await
{
error!(
log,
@ -619,6 +638,7 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
epoch: Epoch,
local_indices: &[u64],
local_pubkeys: &HashSet<PublicKeyBytes>,
current_slot: Slot,
) -> Result<(), Error> {
let log = duties_service.context.log();
@ -671,6 +691,35 @@ async fn poll_beacon_attesters_for_epoch<T: SlotClock + 'static, E: EthSpec>(
.data
.into_iter()
.filter(|duty| {
if duties_service.per_validator_metrics() {
let validator_index = duty.validator_index;
let duty_slot = duty.slot;
if let Some(existing_slot_gauge) =
get_int_gauge(&ATTESTATION_DUTY, &[&validator_index.to_string()])
{
let existing_slot = Slot::new(existing_slot_gauge.get() as u64);
let existing_epoch = existing_slot.epoch(E::slots_per_epoch());
// First condition ensures that we switch to the next epoch duty slot
// once the current epoch duty slot passes.
// Second condition is to ensure that next epoch duties don't override
// current epoch duties.
if existing_slot < current_slot
|| (duty_slot.epoch(E::slots_per_epoch()) <= existing_epoch
&& duty_slot > current_slot
&& duty_slot != existing_slot)
{
existing_slot_gauge.set(duty_slot.as_u64() as i64);
}
} else {
set_int_gauge(
&ATTESTATION_DUTY,
&[&validator_index.to_string()],
duty_slot.as_u64() as i64,
);
}
}
local_pubkeys.contains(&duty.pubkey) && {
// Only update the duties if either is true:
//

View File

@ -172,6 +172,12 @@ lazy_static::lazy_static! {
"Duration to obtain a signature",
&["type"]
);
pub static ref ATTESTATION_DUTY: Result<IntGaugeVec> = try_create_int_gauge_vec(
"vc_attestation_duty_slot",
"Attestation duty slot for all managed validators",
&["validator"]
);
}
pub fn gather_prometheus_metrics<T: EthSpec>(

View File

@ -422,6 +422,7 @@ impl<T: EthSpec> ProductionValidatorClient<T> {
},
spec: context.eth2_config.spec.clone(),
context: duties_context,
enable_high_validator_count_metrics: config.enable_high_validator_count_metrics,
});
// Update the metrics server.