Add metrics for beacon block propagation (#2173)

## Issue Addressed

NA

## Proposed Changes

Adds some metrics to track delays regarding:

- LH processing of blocks
- delays receiving blocks from other nodes.

## Additional Info

NA
This commit is contained in:
Paul Hauner 2021-02-04 05:33:56 +00:00
parent de193c95d3
commit ff35fbb121
6 changed files with 78 additions and 22 deletions

View File

@ -24,7 +24,8 @@ use crate::shuffling_cache::{BlockShufflingIds, ShufflingCache};
use crate::snapshot_cache::SnapshotCache;
use crate::timeout_rw_lock::TimeoutRwLock;
use crate::validator_monitor::{
ValidatorMonitor, HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS,
get_block_delay_ms, timestamp_now, ValidatorMonitor,
HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS,
};
use crate::validator_pubkey_cache::ValidatorPubkeyCache;
use crate::BeaconForkChoiceStore;
@ -1712,6 +1713,15 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
// This prevents inconsistency between the two at the expense of concurrency.
drop(fork_choice);
// Log metrics to track the delay between when the block was made and when we imported it.
//
// We're declaring the block "imported" at this point, since fork choice and the DB know
// about it.
metrics::observe_duration(
&metrics::BEACON_BLOCK_IMPORTED_SLOT_START_DELAY_TIME,
get_block_delay_ms(timestamp_now(), &signed_block.message, &self.slot_clock),
);
let parent_root = block.parent_root;
let slot = block.slot;

View File

@ -570,6 +570,13 @@ lazy_static! {
&["src", "validator"]
);
/*
* Block Delay Metrics
*/
pub static ref BEACON_BLOCK_IMPORTED_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram(
"beacon_block_imported_slot_start_delay_time",
"Duration between the start of the blocks slot and the current time.",
);
}
/// Scrape the `beacon_chain` for metrics that are not constantly updated (e.g., the present slot,

View File

@ -367,18 +367,6 @@ impl<T: EthSpec> ValidatorMonitor<T> {
}
}
/// Returns the delay between the start of `block.slot` and `seen_timestamp`.
fn get_block_delay_ms<S: SlotClock>(
seen_timestamp: Duration,
block: &BeaconBlock<T>,
slot_clock: &S,
) -> Duration {
slot_clock
.start_of(block.slot)
.and_then(|slot_start| seen_timestamp.checked_sub(slot_start))
.unwrap_or_else(|| Duration::from_secs(0))
}
/// Process a block received on gossip.
pub fn register_gossip_block<S: SlotClock>(
&self,
@ -410,7 +398,7 @@ impl<T: EthSpec> ValidatorMonitor<T> {
slot_clock: &S,
) {
if let Some(id) = self.get_validator_id(block.proposer_index) {
let delay = Self::get_block_delay_ms(seen_timestamp, block, slot_clock);
let delay = get_block_delay_ms(seen_timestamp, block, slot_clock);
metrics::inc_counter_vec(&metrics::VALIDATOR_MONITOR_BEACON_BLOCK_TOTAL, &[src, id]);
metrics::observe_timer_vec(
@ -951,3 +939,15 @@ pub fn timestamp_now() -> Duration {
fn u64_to_i64(n: impl Into<u64>) -> i64 {
i64::try_from(n.into()).unwrap_or(i64::max_value())
}
/// Returns the delay between the start of `block.slot` and `seen_timestamp`.
pub fn get_block_delay_ms<T: EthSpec, S: SlotClock>(
seen_timestamp: Duration,
block: &BeaconBlock<T>,
slot_clock: &S,
) -> Duration {
slot_clock
.start_of(block.slot)
.and_then(|slot_start| seen_timestamp.checked_sub(slot_start))
.unwrap_or_else(|| Duration::from_secs(0))
}

View File

@ -3,12 +3,13 @@ use crate::{metrics, service::NetworkMessage, sync::SyncMessage};
use beacon_chain::{
attestation_verification::{Error as AttnError, SignatureVerifiedAttestation},
observed_operations::ObservationOutcome,
validator_monitor::get_block_delay_ms,
BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError,
};
use eth2_libp2p::{MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource};
use slog::{debug, error, info, trace, warn};
use ssz::Encode;
use std::time::Duration;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use types::{
Attestation, AttesterSlashing, Hash256, ProposerSlashing, SignedAggregateAndProof,
SignedBeaconBlock, SignedVoluntaryExit, SubnetId,
@ -237,6 +238,12 @@ impl<T: BeaconChainTypes> Worker<T> {
block: SignedBeaconBlock<T::EthSpec>,
seen_duration: Duration,
) {
// Log metrics to track delay from other nodes on the network.
metrics::observe_duration(
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
get_block_delay_ms(seen_duration, &block.message, &self.chain.slot_clock),
);
let verified_block = match self.chain.verify_block_for_gossip(block) {
Ok(verified_block) => {
info!(
@ -246,6 +253,19 @@ impl<T: BeaconChainTypes> Worker<T> {
"hash" => %verified_block.block_root
);
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
// Log metrics to keep track of propagation delay times.
if let Some(duration) = SystemTime::now()
.duration_since(UNIX_EPOCH)
.ok()
.and_then(|now| now.checked_sub(seen_duration))
{
metrics::observe_duration(
&metrics::BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME,
duration,
);
}
verified_block
}
Err(BlockError::ParentUnknown(block)) => {

View File

@ -351,6 +351,17 @@ lazy_static! {
&["range_type"]
);
/*
* Block Delay Metrics
*/
pub static ref BEACON_BLOCK_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: Result<Histogram> = try_create_histogram(
"beacon_block_gossip_propagation_verification_delay_time",
"Duration between when the block is received and when it is verified for propagation.",
);
pub static ref BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME: Result<Histogram> = try_create_histogram(
"beacon_block_gossip_slot_start_delay_time",
"Duration between when the block is received and the start of the slot it belongs to.",
);
}
pub fn register_attestation_error(error: &AttnError) {

View File

@ -252,14 +252,8 @@ pub fn start_timer(histogram: &Result<Histogram>) -> Option<HistogramTimer> {
/// Starts a timer on `vec` with the given `name`.
pub fn observe_timer_vec(vec: &Result<HistogramVec>, name: &[&str], duration: Duration) {
// This conversion was taken from here:
//
// https://docs.rs/prometheus/0.5.0/src/prometheus/histogram.rs.html#550-555
let nanos = f64::from(duration.subsec_nanos()) / 1e9;
let secs = duration.as_secs() as f64 + nanos;
if let Some(h) = get_histogram(vec, name) {
h.observe(secs)
h.observe(duration_to_f64(duration))
}
}
@ -330,3 +324,17 @@ pub fn observe(histogram: &Result<Histogram>, value: f64) {
histogram.observe(value);
}
}
pub fn observe_duration(histogram: &Result<Histogram>, duration: Duration) {
if let Ok(histogram) = histogram {
histogram.observe(duration_to_f64(duration))
}
}
fn duration_to_f64(duration: Duration) -> f64 {
// This conversion was taken from here:
//
// https://docs.rs/prometheus/0.5.0/src/prometheus/histogram.rs.html#550-555
let nanos = f64::from(duration.subsec_nanos()) / 1e9;
duration.as_secs() as f64 + nanos
}