Metrics and DEBG log for late gossip blocks (#2533)
## Issue Addressed Which issue # does this PR address? ## Proposed Changes - Add a counter metric to log when a block is received late from gossip. - Also push a `DEBG` log for the above condition. - Use Debug (`?`) instead of Display (`%`) for a bunch of logs in the beacon processor, so we don't have to deal with concatenated block roots. - Add new ERRO and CRIT to HTTP API to alert users when they're publishing late blocks. ## Additional Info NA
This commit is contained in:
parent
12fe72bd37
commit
f2a8c6229c
@ -2932,6 +2932,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
.beacon_state
|
||||
.previous_epoch()
|
||||
.start_slot(T::EthSpec::slots_per_epoch());
|
||||
let head_proposer_index = new_head.beacon_block.message().proposer_index();
|
||||
|
||||
// Update the snapshot that stores the head of the chain at the time it received the
|
||||
// block.
|
||||
@ -2962,9 +2963,10 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
||||
debug!(
|
||||
self.log,
|
||||
"Delayed head block";
|
||||
"delay" => ?block_delay,
|
||||
"root" => ?beacon_block_root,
|
||||
"block_root" => ?beacon_block_root,
|
||||
"proposer_index" => head_proposer_index,
|
||||
"slot" => head_slot,
|
||||
"block_delay" => ?block_delay,
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -895,7 +895,10 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
info!(
|
||||
log,
|
||||
"Valid block from HTTP API";
|
||||
"root" => format!("{}", root)
|
||||
"block_delay" => ?delay,
|
||||
"root" => format!("{}", root),
|
||||
"proposer_index" => block.message().proposer_index(),
|
||||
"slot" => block.slot(),
|
||||
);
|
||||
|
||||
// Notify the validator monitor.
|
||||
@ -917,25 +920,25 @@ pub fn serve<T: BeaconChainTypes>(
|
||||
//
|
||||
// Check to see the thresholds are non-zero to avoid logging errors with small
|
||||
// slot times (e.g., during testing)
|
||||
let crit_threshold = chain.spec.seconds_per_slot / 3;
|
||||
let warn_threshold = chain.spec.seconds_per_slot / 6;
|
||||
if crit_threshold > 0 && delay.as_secs() > crit_threshold {
|
||||
let crit_threshold = chain.slot_clock.unagg_attestation_production_delay();
|
||||
let error_threshold = crit_threshold / 2;
|
||||
if delay >= crit_threshold {
|
||||
crit!(
|
||||
log,
|
||||
"Block was broadcast too late";
|
||||
"root" => ?root,
|
||||
"slot" => block.slot(),
|
||||
"delay_ms" => delay.as_millis(),
|
||||
"msg" => "system may be overloaded, block likely to be orphaned",
|
||||
"delay_ms" => delay.as_millis(),
|
||||
"slot" => block.slot(),
|
||||
"root" => ?root,
|
||||
)
|
||||
} else if warn_threshold > 0 && delay.as_secs() > warn_threshold {
|
||||
warn!(
|
||||
} else if delay >= error_threshold {
|
||||
error!(
|
||||
log,
|
||||
"Block broadcast was delayed";
|
||||
"root" => ?root,
|
||||
"slot" => block.slot(),
|
||||
"delay_ms" => delay.as_millis(),
|
||||
"msg" => "system may be overloaded, block may be orphaned",
|
||||
"delay_ms" => delay.as_millis(),
|
||||
"slot" => block.slot(),
|
||||
"root" => ?root,
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -33,4 +33,12 @@ lazy_static::lazy_static! {
|
||||
"http_api_block_broadcast_delay_times",
|
||||
"Time between start of the slot and when the block was broadcast"
|
||||
);
|
||||
pub static ref HTTP_API_BLOCK_PUBLISHED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
|
||||
"http_api_block_published_late_total",
|
||||
"The count of times a block was published beyond more than half way to the attestation deadline"
|
||||
);
|
||||
pub static ref HTTP_API_BLOCK_PUBLISHED_VERY_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
|
||||
"http_api_block_published_very_late_total",
|
||||
"The count of times a block was published beyond the attestation deadline"
|
||||
);
|
||||
}
|
||||
|
@ -291,19 +291,33 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
reprocess_tx: mpsc::Sender<ReprocessQueueMessage<T>>,
|
||||
seen_duration: Duration,
|
||||
) {
|
||||
let block_delay =
|
||||
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock);
|
||||
// Log metrics to track delay from other nodes on the network.
|
||||
metrics::observe_duration(
|
||||
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
|
||||
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock),
|
||||
block_delay,
|
||||
);
|
||||
|
||||
let verified_block = match self.chain.verify_block_for_gossip(block) {
|
||||
Ok(verified_block) => {
|
||||
if block_delay >= self.chain.slot_clock.unagg_attestation_production_delay() {
|
||||
metrics::inc_counter(&metrics::BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL);
|
||||
debug!(
|
||||
self.log,
|
||||
"Gossip block arrived late";
|
||||
"block_root" => ?verified_block.block_root,
|
||||
"proposer_index" => verified_block.block.message().proposer_index(),
|
||||
"slot" => verified_block.block.slot(),
|
||||
"block_delay" => ?block_delay,
|
||||
);
|
||||
}
|
||||
|
||||
info!(
|
||||
self.log,
|
||||
"New block received";
|
||||
"slot" => verified_block.block.slot(),
|
||||
"hash" => %verified_block.block_root
|
||||
"hash" => ?verified_block.block_root
|
||||
);
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
|
||||
|
||||
@ -325,7 +339,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
debug!(
|
||||
self.log,
|
||||
"Unknown parent for gossip block";
|
||||
"root" => %block.canonical_root()
|
||||
"root" => ?block.canonical_root()
|
||||
);
|
||||
self.send_sync_message(SyncMessage::UnknownBlock(peer_id, block));
|
||||
return;
|
||||
@ -392,7 +406,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Block arrived early";
|
||||
"block_slot" => %block_slot,
|
||||
"block_root" => %block_root,
|
||||
"block_root" => ?block_root,
|
||||
"msg" => "if this happens consistently, check system clock"
|
||||
);
|
||||
|
||||
@ -423,7 +437,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Failed to defer block import";
|
||||
"block_slot" => %block_slot,
|
||||
"block_root" => %block_root,
|
||||
"block_root" => ?block_root,
|
||||
"location" => "block gossip"
|
||||
)
|
||||
}
|
||||
@ -440,7 +454,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
"Failed to defer block import";
|
||||
"error" => ?e,
|
||||
"block_slot" => %block_slot,
|
||||
"block_root" => %block_root,
|
||||
"block_root" => ?block_root,
|
||||
"location" => "block gossip"
|
||||
)
|
||||
}
|
||||
@ -472,7 +486,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Failed to inform block import";
|
||||
"source" => "gossip",
|
||||
"block_root" => %block_root,
|
||||
"block_root" => ?block_root,
|
||||
)
|
||||
};
|
||||
|
||||
@ -511,7 +525,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Invalid gossip beacon block";
|
||||
"outcome" => ?other,
|
||||
"block root" => %block.canonical_root(),
|
||||
"block root" => ?block.canonical_root(),
|
||||
"block slot" => block.slot()
|
||||
);
|
||||
self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError);
|
||||
@ -843,7 +857,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Attestation is not within the last ATTESTATION_PROPAGATION_SLOT_RANGE slots";
|
||||
"peer_id" => %peer_id,
|
||||
"block" => %beacon_block_root,
|
||||
"block" => ?beacon_block_root,
|
||||
"type" => ?attestation_type,
|
||||
);
|
||||
|
||||
@ -918,7 +932,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Attestation already known";
|
||||
"peer_id" => %peer_id,
|
||||
"block" => %beacon_block_root,
|
||||
"block" => ?beacon_block_root,
|
||||
"type" => ?attestation_type,
|
||||
);
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||
@ -935,7 +949,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Aggregator already known";
|
||||
"peer_id" => %peer_id,
|
||||
"block" => %beacon_block_root,
|
||||
"block" => ?beacon_block_root,
|
||||
"type" => ?attestation_type,
|
||||
);
|
||||
// This is an allowed behaviour.
|
||||
@ -956,7 +970,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Prior attestation known";
|
||||
"peer_id" => %peer_id,
|
||||
"block" => %beacon_block_root,
|
||||
"block" => ?beacon_block_root,
|
||||
"epoch" => %epoch,
|
||||
"validator_index" => validator_index,
|
||||
"type" => ?attestation_type,
|
||||
@ -980,7 +994,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Validation Index too high";
|
||||
"peer_id" => %peer_id,
|
||||
"block" => %beacon_block_root,
|
||||
"block" => ?beacon_block_root,
|
||||
"type" => ?attestation_type,
|
||||
);
|
||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject);
|
||||
@ -991,7 +1005,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Attestation for unknown block";
|
||||
"peer_id" => %peer_id,
|
||||
"block" => %beacon_block_root
|
||||
"block" => ?beacon_block_root
|
||||
);
|
||||
if let Some(sender) = reprocess_tx {
|
||||
// We don't know the block, get the sync manager to handle the block lookup, and
|
||||
@ -1200,7 +1214,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
||||
self.log,
|
||||
"Invalid attestation from network";
|
||||
"reason" => ?error,
|
||||
"block" => %beacon_block_root,
|
||||
"block" => ?beacon_block_root,
|
||||
"peer_id" => %peer_id,
|
||||
"type" => ?attestation_type,
|
||||
);
|
||||
|
@ -465,6 +465,10 @@ lazy_static! {
|
||||
"beacon_block_gossip_slot_start_delay_time",
|
||||
"Duration between when the block is received and the start of the slot it belongs to.",
|
||||
);
|
||||
pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
|
||||
"beacon_block_gossip_arrived_late_total",
|
||||
"Count of times when a gossip block arrived from the network later than the attestation deadline.",
|
||||
);
|
||||
|
||||
/*
|
||||
* Attestation reprocessing queue metrics.
|
||||
|
Loading…
Reference in New Issue
Block a user