Metrics and DEBG log for late gossip blocks (#2533)
## Issue Addressed Which issue # does this PR address? ## Proposed Changes - Add a counter metric to log when a block is received late from gossip. - Also push a `DEBG` log for the above condition. - Use Debug (`?`) instead of Display (`%`) for a bunch of logs in the beacon processor, so we don't have to deal with concatenated block roots. - Add new ERRO and CRIT to HTTP API to alert users when they're publishing late blocks. ## Additional Info NA
This commit is contained in:
parent
12fe72bd37
commit
f2a8c6229c
@ -2932,6 +2932,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
|||||||
.beacon_state
|
.beacon_state
|
||||||
.previous_epoch()
|
.previous_epoch()
|
||||||
.start_slot(T::EthSpec::slots_per_epoch());
|
.start_slot(T::EthSpec::slots_per_epoch());
|
||||||
|
let head_proposer_index = new_head.beacon_block.message().proposer_index();
|
||||||
|
|
||||||
// Update the snapshot that stores the head of the chain at the time it received the
|
// Update the snapshot that stores the head of the chain at the time it received the
|
||||||
// block.
|
// block.
|
||||||
@ -2962,9 +2963,10 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
|
|||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Delayed head block";
|
"Delayed head block";
|
||||||
"delay" => ?block_delay,
|
"block_root" => ?beacon_block_root,
|
||||||
"root" => ?beacon_block_root,
|
"proposer_index" => head_proposer_index,
|
||||||
"slot" => head_slot,
|
"slot" => head_slot,
|
||||||
|
"block_delay" => ?block_delay,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -895,7 +895,10 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
info!(
|
info!(
|
||||||
log,
|
log,
|
||||||
"Valid block from HTTP API";
|
"Valid block from HTTP API";
|
||||||
"root" => format!("{}", root)
|
"block_delay" => ?delay,
|
||||||
|
"root" => format!("{}", root),
|
||||||
|
"proposer_index" => block.message().proposer_index(),
|
||||||
|
"slot" => block.slot(),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Notify the validator monitor.
|
// Notify the validator monitor.
|
||||||
@ -917,25 +920,25 @@ pub fn serve<T: BeaconChainTypes>(
|
|||||||
//
|
//
|
||||||
// Check to see the thresholds are non-zero to avoid logging errors with small
|
// Check to see the thresholds are non-zero to avoid logging errors with small
|
||||||
// slot times (e.g., during testing)
|
// slot times (e.g., during testing)
|
||||||
let crit_threshold = chain.spec.seconds_per_slot / 3;
|
let crit_threshold = chain.slot_clock.unagg_attestation_production_delay();
|
||||||
let warn_threshold = chain.spec.seconds_per_slot / 6;
|
let error_threshold = crit_threshold / 2;
|
||||||
if crit_threshold > 0 && delay.as_secs() > crit_threshold {
|
if delay >= crit_threshold {
|
||||||
crit!(
|
crit!(
|
||||||
log,
|
log,
|
||||||
"Block was broadcast too late";
|
"Block was broadcast too late";
|
||||||
"root" => ?root,
|
|
||||||
"slot" => block.slot(),
|
|
||||||
"delay_ms" => delay.as_millis(),
|
|
||||||
"msg" => "system may be overloaded, block likely to be orphaned",
|
"msg" => "system may be overloaded, block likely to be orphaned",
|
||||||
|
"delay_ms" => delay.as_millis(),
|
||||||
|
"slot" => block.slot(),
|
||||||
|
"root" => ?root,
|
||||||
)
|
)
|
||||||
} else if warn_threshold > 0 && delay.as_secs() > warn_threshold {
|
} else if delay >= error_threshold {
|
||||||
warn!(
|
error!(
|
||||||
log,
|
log,
|
||||||
"Block broadcast was delayed";
|
"Block broadcast was delayed";
|
||||||
"root" => ?root,
|
|
||||||
"slot" => block.slot(),
|
|
||||||
"delay_ms" => delay.as_millis(),
|
|
||||||
"msg" => "system may be overloaded, block may be orphaned",
|
"msg" => "system may be overloaded, block may be orphaned",
|
||||||
|
"delay_ms" => delay.as_millis(),
|
||||||
|
"slot" => block.slot(),
|
||||||
|
"root" => ?root,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,4 +33,12 @@ lazy_static::lazy_static! {
|
|||||||
"http_api_block_broadcast_delay_times",
|
"http_api_block_broadcast_delay_times",
|
||||||
"Time between start of the slot and when the block was broadcast"
|
"Time between start of the slot and when the block was broadcast"
|
||||||
);
|
);
|
||||||
|
pub static ref HTTP_API_BLOCK_PUBLISHED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
|
||||||
|
"http_api_block_published_late_total",
|
||||||
|
"The count of times a block was published beyond more than half way to the attestation deadline"
|
||||||
|
);
|
||||||
|
pub static ref HTTP_API_BLOCK_PUBLISHED_VERY_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
|
||||||
|
"http_api_block_published_very_late_total",
|
||||||
|
"The count of times a block was published beyond the attestation deadline"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
@ -291,19 +291,33 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
reprocess_tx: mpsc::Sender<ReprocessQueueMessage<T>>,
|
reprocess_tx: mpsc::Sender<ReprocessQueueMessage<T>>,
|
||||||
seen_duration: Duration,
|
seen_duration: Duration,
|
||||||
) {
|
) {
|
||||||
|
let block_delay =
|
||||||
|
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock);
|
||||||
// Log metrics to track delay from other nodes on the network.
|
// Log metrics to track delay from other nodes on the network.
|
||||||
metrics::observe_duration(
|
metrics::observe_duration(
|
||||||
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
|
&metrics::BEACON_BLOCK_GOSSIP_SLOT_START_DELAY_TIME,
|
||||||
get_block_delay_ms(seen_duration, block.message(), &self.chain.slot_clock),
|
block_delay,
|
||||||
);
|
);
|
||||||
|
|
||||||
let verified_block = match self.chain.verify_block_for_gossip(block) {
|
let verified_block = match self.chain.verify_block_for_gossip(block) {
|
||||||
Ok(verified_block) => {
|
Ok(verified_block) => {
|
||||||
|
if block_delay >= self.chain.slot_clock.unagg_attestation_production_delay() {
|
||||||
|
metrics::inc_counter(&metrics::BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL);
|
||||||
|
debug!(
|
||||||
|
self.log,
|
||||||
|
"Gossip block arrived late";
|
||||||
|
"block_root" => ?verified_block.block_root,
|
||||||
|
"proposer_index" => verified_block.block.message().proposer_index(),
|
||||||
|
"slot" => verified_block.block.slot(),
|
||||||
|
"block_delay" => ?block_delay,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
self.log,
|
self.log,
|
||||||
"New block received";
|
"New block received";
|
||||||
"slot" => verified_block.block.slot(),
|
"slot" => verified_block.block.slot(),
|
||||||
"hash" => %verified_block.block_root
|
"hash" => ?verified_block.block_root
|
||||||
);
|
);
|
||||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
|
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Accept);
|
||||||
|
|
||||||
@ -325,7 +339,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
debug!(
|
debug!(
|
||||||
self.log,
|
self.log,
|
||||||
"Unknown parent for gossip block";
|
"Unknown parent for gossip block";
|
||||||
"root" => %block.canonical_root()
|
"root" => ?block.canonical_root()
|
||||||
);
|
);
|
||||||
self.send_sync_message(SyncMessage::UnknownBlock(peer_id, block));
|
self.send_sync_message(SyncMessage::UnknownBlock(peer_id, block));
|
||||||
return;
|
return;
|
||||||
@ -392,7 +406,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Block arrived early";
|
"Block arrived early";
|
||||||
"block_slot" => %block_slot,
|
"block_slot" => %block_slot,
|
||||||
"block_root" => %block_root,
|
"block_root" => ?block_root,
|
||||||
"msg" => "if this happens consistently, check system clock"
|
"msg" => "if this happens consistently, check system clock"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -423,7 +437,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Failed to defer block import";
|
"Failed to defer block import";
|
||||||
"block_slot" => %block_slot,
|
"block_slot" => %block_slot,
|
||||||
"block_root" => %block_root,
|
"block_root" => ?block_root,
|
||||||
"location" => "block gossip"
|
"location" => "block gossip"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -440,7 +454,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
"Failed to defer block import";
|
"Failed to defer block import";
|
||||||
"error" => ?e,
|
"error" => ?e,
|
||||||
"block_slot" => %block_slot,
|
"block_slot" => %block_slot,
|
||||||
"block_root" => %block_root,
|
"block_root" => ?block_root,
|
||||||
"location" => "block gossip"
|
"location" => "block gossip"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -472,7 +486,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Failed to inform block import";
|
"Failed to inform block import";
|
||||||
"source" => "gossip",
|
"source" => "gossip",
|
||||||
"block_root" => %block_root,
|
"block_root" => ?block_root,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -511,7 +525,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Invalid gossip beacon block";
|
"Invalid gossip beacon block";
|
||||||
"outcome" => ?other,
|
"outcome" => ?other,
|
||||||
"block root" => %block.canonical_root(),
|
"block root" => ?block.canonical_root(),
|
||||||
"block slot" => block.slot()
|
"block slot" => block.slot()
|
||||||
);
|
);
|
||||||
self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError);
|
self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError);
|
||||||
@ -843,7 +857,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Attestation is not within the last ATTESTATION_PROPAGATION_SLOT_RANGE slots";
|
"Attestation is not within the last ATTESTATION_PROPAGATION_SLOT_RANGE slots";
|
||||||
"peer_id" => %peer_id,
|
"peer_id" => %peer_id,
|
||||||
"block" => %beacon_block_root,
|
"block" => ?beacon_block_root,
|
||||||
"type" => ?attestation_type,
|
"type" => ?attestation_type,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -918,7 +932,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Attestation already known";
|
"Attestation already known";
|
||||||
"peer_id" => %peer_id,
|
"peer_id" => %peer_id,
|
||||||
"block" => %beacon_block_root,
|
"block" => ?beacon_block_root,
|
||||||
"type" => ?attestation_type,
|
"type" => ?attestation_type,
|
||||||
);
|
);
|
||||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore);
|
||||||
@ -935,7 +949,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Aggregator already known";
|
"Aggregator already known";
|
||||||
"peer_id" => %peer_id,
|
"peer_id" => %peer_id,
|
||||||
"block" => %beacon_block_root,
|
"block" => ?beacon_block_root,
|
||||||
"type" => ?attestation_type,
|
"type" => ?attestation_type,
|
||||||
);
|
);
|
||||||
// This is an allowed behaviour.
|
// This is an allowed behaviour.
|
||||||
@ -956,7 +970,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Prior attestation known";
|
"Prior attestation known";
|
||||||
"peer_id" => %peer_id,
|
"peer_id" => %peer_id,
|
||||||
"block" => %beacon_block_root,
|
"block" => ?beacon_block_root,
|
||||||
"epoch" => %epoch,
|
"epoch" => %epoch,
|
||||||
"validator_index" => validator_index,
|
"validator_index" => validator_index,
|
||||||
"type" => ?attestation_type,
|
"type" => ?attestation_type,
|
||||||
@ -980,7 +994,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Validation Index too high";
|
"Validation Index too high";
|
||||||
"peer_id" => %peer_id,
|
"peer_id" => %peer_id,
|
||||||
"block" => %beacon_block_root,
|
"block" => ?beacon_block_root,
|
||||||
"type" => ?attestation_type,
|
"type" => ?attestation_type,
|
||||||
);
|
);
|
||||||
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject);
|
self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject);
|
||||||
@ -991,7 +1005,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Attestation for unknown block";
|
"Attestation for unknown block";
|
||||||
"peer_id" => %peer_id,
|
"peer_id" => %peer_id,
|
||||||
"block" => %beacon_block_root
|
"block" => ?beacon_block_root
|
||||||
);
|
);
|
||||||
if let Some(sender) = reprocess_tx {
|
if let Some(sender) = reprocess_tx {
|
||||||
// We don't know the block, get the sync manager to handle the block lookup, and
|
// We don't know the block, get the sync manager to handle the block lookup, and
|
||||||
@ -1200,7 +1214,7 @@ impl<T: BeaconChainTypes> Worker<T> {
|
|||||||
self.log,
|
self.log,
|
||||||
"Invalid attestation from network";
|
"Invalid attestation from network";
|
||||||
"reason" => ?error,
|
"reason" => ?error,
|
||||||
"block" => %beacon_block_root,
|
"block" => ?beacon_block_root,
|
||||||
"peer_id" => %peer_id,
|
"peer_id" => %peer_id,
|
||||||
"type" => ?attestation_type,
|
"type" => ?attestation_type,
|
||||||
);
|
);
|
||||||
|
@ -465,6 +465,10 @@ lazy_static! {
|
|||||||
"beacon_block_gossip_slot_start_delay_time",
|
"beacon_block_gossip_slot_start_delay_time",
|
||||||
"Duration between when the block is received and the start of the slot it belongs to.",
|
"Duration between when the block is received and the start of the slot it belongs to.",
|
||||||
);
|
);
|
||||||
|
pub static ref BEACON_BLOCK_GOSSIP_ARRIVED_LATE_TOTAL: Result<IntCounter> = try_create_int_counter(
|
||||||
|
"beacon_block_gossip_arrived_late_total",
|
||||||
|
"Count of times when a gossip block arrived from the network later than the attestation deadline.",
|
||||||
|
);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attestation reprocessing queue metrics.
|
* Attestation reprocessing queue metrics.
|
||||||
|
Loading…
Reference in New Issue
Block a user