Add metrics for beacon block propagation (#2173)
## Issue Addressed NA ## Proposed Changes Adds some metrics to track delays regarding: - LH processing of blocks - delays receiving blocks from other nodes. ## Additional Info NA
This commit is contained in:
parent
de193c95d3
commit
ff35fbb121
@ -24,7 +24,8 @@ use crate::shuffling_cache::{BlockShufflingIds, ShufflingCache};
|
|||||||
use crate::snapshot_cache::SnapshotCache;
|
use crate::snapshot_cache::SnapshotCache;
|
||||||
use crate::timeout_rw_lock::TimeoutRwLock;
|
use crate::timeout_rw_lock::TimeoutRwLock;
|
||||||
use crate::validator_monitor::{
|
use crate::validator_monitor::{
|
||||||
ValidatorMonitor, HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS,
|
get_block_delay_ms, timestamp_now, ValidatorMonitor,
|
||||||
|
HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS,
|
||||||
};
|
};
|
||||||
use crate::validator_pubkey_cache::ValidatorPubkeyCache;
|
use crate::validator_pubkey_cache::ValidatorPubkeyCache;
|
||||||
use crate::BeaconForkChoiceStore;
|
use crate::BeaconForkChoiceStore;
|
||||||
@ -1712,6 +1713,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
|||||||
// This prevents inconsistency between the two at the expense of concurrency.
|
// This prevents inconsistency between the two at the expense of concurrency.
|
||||||
drop(fork_choice);
|
drop(fork_choice);
|
||||||
|
|
||||||
|
// Log metrics to track the delay between when the block was made and when we imported it.
|
||||||
|
//
|
||||||
|
// We're declaring the block "imported" at this point, since fork choice and the DB know
|
||||||
|
// about it.
|
||||||
|
metrics::observe_duration(
|
||||||
|
&metrics::BEACON_BLOCK_IMPORTED_SLOT_START_DELAY_TIME,
|
||||||
|
get_block_delay_ms(timestamp_now(), &signed_block.message, &self.slot_clock),
|
||||||
|
);
|
||||||
|
|
||||||
let parent_root = block.parent_root;
|
let parent_root = block.parent_root;
|
||||||
let slot = block.slot;
|
let slot = block.slot;
|
||||||
|
|
||||||
|
@ -570,6 +570,13 @@ lazy_static! {
|
|||||||
&["src", "validator"]
|
&["src", "validator"]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block Delay Metrics
|
||||||
|
*/
|
||||||
|
pub static ref BEACON_BLOCK_IMPORTED_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram(
|
||||||
|
"beacon_block_imported_slot_start_delay_time",
|
||||||
|
"Duration between the start of the blocks slot and the current time.",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot,
|
/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot,
|
||||||
|
@ -367,18 +367,6 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the delay between the start of `block.slot` and `seen_timestamp`.
|
|
||||||
fn get_block_delay_ms<S: SlotClock>(
|
|
||||||
seen_timestamp: Duration,
|
|
||||||
block: &BeaconBlock<T>,
|
|
||||||
slot_clock: &S,
|
|
||||||
) -> Duration {
|
|
||||||
slot_clock
|
|
||||||
.start_of(block.slot)
|
|
||||||
.and_then(|slot_start| seen_timestamp.checked_sub(slot_start))
|
|
||||||
.unwrap_or_else(|| Duration::from_secs(0))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Process a block received on gossip.
|
/// Process a block received on gossip.
|
||||||
pub fn register_gossip_block<S: SlotClock>(
|
pub fn register_gossip_block<S: SlotClock>(
|
||||||
&self,
|
&self,
|
||||||
@ -410,7 +398,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
|
|||||||
slot_clock: &S,
|
slot_clock: &S,
|
||||||
) {
|
) {
|
||||||
if let Some(id) = self.get_validator_id(block.proposer_index) {
|
if let Some(id) = self.get_validator_id(block.proposer_index) {
|
||||||
let delay = Self::get_block_delay_ms(seen_timestamp, block, slot_clock);
|
let delay = get_block_delay_ms(seen_timestamp, block, slot_clock);
|
||||||
|
|
||||||
metrics::inc_counter_vec(&metrics::VALIDATOR_MONITOR_BEACON_BLOCK_TOTAL, &[src, id]);
|
metrics::inc_counter_vec(&metrics::VALIDATOR_MONITOR_BEACON_BLOCK_TOTAL, &[src, id]);
|
||||||
metrics::observe_timer_vec(
|
metrics::observe_timer_vec(
|
||||||
@ -951,3 +939,15 @@ pub fn timestamp_now() -> Duration {
|
|||||||
fn u64_to_i64(n: impl Into<u64>) -> i64 {
|
fn u64_to_i64(n: impl Into<u64>) -> i64 {
|
||||||
i64::try_from(n.into()).unwrap_or(i64::max_value())
|
i64::try_from(n.into()).unwrap_or(i64::max_value())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the delay between the start of `block.slot` and `seen_timestamp`.
|
||||||
|
pub fn get_block_delay_ms<T: EthSpec, S: SlotClock>(
|
||||||
|
seen_timestamp: Duration,
|
||||||
|
block: &BeaconBlock<T>,
|
||||||
|
slot_clock: &S,
|
||||||
|
) -> Duration {
|
||||||
|
slot_clock
|
||||||
|
.start_of(block.slot)
|
||||||
|
.and_then(|slot_start| seen_timestamp.checked_sub(slot_start))
|
||||||
|
.unwrap_or_else(|| Duration::from_secs(0))
|
||||||
|
}
|
||||||
|
@ -3,12 +3,13 @@ use crate::{metrics, service::NetworkMessage, sync::SyncMessage};
|
|||||||
use beacon_chain::{
|
use beacon_chain::{
|
||||||
attestation_verification::{Error as AttnError, SignatureVerifiedAttestation},
|
attestation_verification::{Error as AttnError, SignatureVerifiedAttestation},
|
||||||
observed_operations::ObservationOutcome,
|
observed_operations::ObservationOutcome,
|
||||||
|
validator_monitor::get_block_delay_ms,
|
||||||
BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError,
|
BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError,
|
||||||
};
|
};
|
||||||
use eth2_libp2p::{MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource};
|
use eth2_libp2p::{MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource};
|
||||||
use slog::{debug, error, info, trace, warn};
|
use slog::{debug, error, info, trace, warn};
|
||||||
use ssz::Encode;
|
use ssz::Encode;
|
||||||
use std::time::Duration;
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
use types::{
|
use types::{
|
||||||
Attestation, AttesterSlashing, Hash256, ProposerSlashing, SignedAggregateAndProof,
|
Attestation, AttesterSlashing, Hash256, ProposerSlashing, SignedAggregateAndProof,
|
||||||
SignedBeaconBlock, SignedVoluntaryExit, SubnetId,
|
SignedBeaconBlock, SignedVoluntaryExit, SubnetId,
|
||||||
@ -237,6 +238,12 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
block: SignedBeaconBlock<T::EthSpec>,
|
block: SignedBeaconBlock<T::EthSpec>,
|
||||||
seen_duration: Duration,
|
seen_duration: Duration,
|
||||||
) {
|
) {
|
||||||
|
// Log metrics to track delay from other nodes on the network.
|
||||||
|
metrics::observe_duration(
|
||||||
|
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
|
||||||
|
get_block_delay_ms(seen_duration, &block.message, &self.chain.slot_clock),
|
||||||
|
);
|
||||||
|
|
||||||
let verified_block = match self.chain.verify_block_for_gossip(block) {
|
let verified_block = match self.chain.verify_block_for_gossip(block) {
|
||||||
Ok(verified_block) => {
|
Ok(verified_block) => {
|
||||||
info!(
|
info!(
|
||||||
@ -246,6 +253,19 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
"hash" => %verified_block.block_root
|
"hash" => %verified_block.block_root
|
||||||
);
|
);
|
||||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
|
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
|
||||||
|
|
||||||
|
// Log metrics to keep track of propagation delay times.
|
||||||
|
if let Some(duration) = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.ok()
|
||||||
|
.and_then(|now| now.checked_sub(seen_duration))
|
||||||
|
{
|
||||||
|
metrics::observe_duration(
|
||||||
|
&metrics::BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME,
|
||||||
|
duration,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
verified_block
|
verified_block
|
||||||
}
|
}
|
||||||
Err(BlockError::ParentUnknown(block)) => {
|
Err(BlockError::ParentUnknown(block)) => {
|
||||||
|
@ -351,6 +351,17 @@ lazy_static! {
|
|||||||
&["range_type"]
|
&["range_type"]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block Delay Metrics
|
||||||
|
*/
|
||||||
|
pub static ref BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result<Histogram> = try_create_histogram(
|
||||||
|
"beacon_block_gossip_propagation_verification_delay_time",
|
||||||
|
"Duration between when the block is received and when it is verified for propagation.",
|
||||||
|
);
|
||||||
|
pub static ref BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram(
|
||||||
|
"beacon_block_gossip_slot_start_delay_time",
|
||||||
|
"Duration between when the block is received and the start of the slot it belongs to.",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn register_attestation_error(error: &AttnError) {
|
pub fn register_attestation_error(error: &AttnError) {
|
||||||
|
@ -252,14 +252,8 @@ pub fn start_timer(histogram: &Result<Histogram>) -> Option<HistogramTimer> {
|
|||||||
|
|
||||||
/// Starts a timer on `vec` with the given `name`.
|
/// Starts a timer on `vec` with the given `name`.
|
||||||
pub fn observe_timer_vec(vec: &Result<HistogramVec>, name: &[&str], duration: Duration) {
|
pub fn observe_timer_vec(vec: &Result<HistogramVec>, name: &[&str], duration: Duration) {
|
||||||
// This conversion was taken from here:
|
|
||||||
//
|
|
||||||
// https://docs.rs/prometheus/0.5.0/src/prometheus/histogram.rs.html#550-555
|
|
||||||
let nanos = f64::from(duration.subsec_nanos()) / 1e9;
|
|
||||||
let secs = duration.as_secs() as f64 + nanos;
|
|
||||||
|
|
||||||
if let Some(h) = get_histogram(vec, name) {
|
if let Some(h) = get_histogram(vec, name) {
|
||||||
h.observe(secs)
|
h.observe(duration_to_f64(duration))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,3 +324,17 @@ pub fn observe(histogram: &Result<Histogram>, value: f64) {
|
|||||||
histogram.observe(value);
|
histogram.observe(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn observe_duration(histogram: &Result<Histogram>, duration: Duration) {
|
||||||
|
if let Ok(histogram) = histogram {
|
||||||
|
histogram.observe(duration_to_f64(duration))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn duration_to_f64(duration: Duration) -> f64 {
|
||||||
|
// This conversion was taken from here:
|
||||||
|
//
|
||||||
|
// https://docs.rs/prometheus/0.5.0/src/prometheus/histogram.rs.html#550-555
|
||||||
|
let nanos = f64::from(duration.subsec_nanos()) / 1e9;
|
||||||
|
duration.as_secs() as f64 + nanos
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user