Make more noise when the EL is broken (#3986)

## Issue Addressed

Closes #3814, replaces #3818.

## Proposed Changes

* Add a WARN log for the case where we are attempting to sync chain segments but can't process them because they're building on an invalid parent. The most common case where we see this is when the execution node database is corrupt, causing sync to stall mysteriously (because we're currently logging the failure only at debug level).
* Additionally I've bumped up the logging for invalid execution payloads to `WARN`. This may result in some duplicate logs as we log errors from the `beacon_chain` and then again from the beacon processor. Invalid payloads and corrupt DBs _should_ be rare enough that this doesn't produce overwhelming log volume.
This commit is contained in:
Michael Sproul 2023-03-17 00:44:02 +00:00
parent 974b7e9f58
commit 4c2d4af6cd
5 changed files with 23 additions and 19 deletions

View File

@ -5097,7 +5097,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
latest_valid_hash,
ref validation_error,
} => {
debug!(
warn!(
self.log,
"Invalid execution payload";
"validation_error" => ?validation_error,
@ -5106,11 +5106,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
"head_block_root" => ?head_block_root,
"method" => "fcU",
);
warn!(
self.log,
"Fork choice update invalidated payload";
"status" => ?status
);
match latest_valid_hash {
// The `latest_valid_hash` is set to `None` when the EE
@ -5156,7 +5151,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
PayloadStatus::InvalidBlockHash {
ref validation_error,
} => {
debug!(
warn!(
self.log,
"Invalid execution payload block hash";
"validation_error" => ?validation_error,
@ -5164,11 +5159,6 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
"head_block_root" => ?head_block_root,
"method" => "fcU",
);
warn!(
self.log,
"Fork choice update invalidated payload";
"status" => ?status
);
// The execution engine has stated that the head block is invalid, however it
// hasn't returned a latest valid ancestor.
//

View File

@ -280,10 +280,10 @@ pub enum BlockError<T: EthSpec> {
///
/// ## Peer scoring
///
/// TODO(merge): reconsider how we score peers for this.
///
/// The peer sent us an invalid block, but I'm not really sure how to score this in an
/// "optimistic" sync world.
/// The peer sent us an invalid block, we must penalise harshly.
/// If it's actually our fault (e.g. our execution node database is corrupt) we have bigger
/// problems to worry about than losing peers, and we're doing the network a favour by
/// disconnecting.
ParentExecutionPayloadInvalid { parent_root: Hash256 },
}

View File

@ -159,7 +159,7 @@ async fn notify_new_payload<'a, T: BeaconChainTypes>(
latest_valid_hash,
ref validation_error,
} => {
debug!(
warn!(
chain.log,
"Invalid execution payload";
"validation_error" => ?validation_error,
@ -206,7 +206,7 @@ async fn notify_new_payload<'a, T: BeaconChainTypes>(
PayloadStatus::InvalidBlockHash {
ref validation_error,
} => {
debug!(
warn!(
chain.log,
"Invalid execution payload block hash";
"validation_error" => ?validation_error,

View File

@ -834,7 +834,6 @@ impl<T: BeaconChainTypes> Worker<T> {
| Err(e @ BlockError::WeakSubjectivityConflict)
| Err(e @ BlockError::InconsistentFork(_))
| Err(e @ BlockError::ExecutionPayloadError(_))
// TODO(merge): reconsider peer scoring for this event.
| Err(e @ BlockError::ParentExecutionPayloadInvalid { .. })
| Err(e @ BlockError::GenesisBlock) => {
warn!(self.log, "Could not verify block for gossip. Rejecting the block";

View File

@ -513,6 +513,21 @@ impl<T: BeaconChainTypes> Worker<T> {
})
}
}
ref err @ BlockError::ParentExecutionPayloadInvalid { ref parent_root } => {
warn!(
self.log,
"Failed to sync chain built on invalid parent";
"parent_root" => ?parent_root,
"advice" => "check execution node for corruption then restart it and Lighthouse",
);
Err(ChainSegmentFailed {
message: format!("Peer sent invalid block. Reason: {err:?}"),
// We need to penalise harshly in case this represents an actual attack. In case
// of a faulty EL it will usually require manual intervention to fix anyway, so
// it's not too bad if we drop most of our peers.
peer_action: Some(PeerAction::LowToleranceError),
})
}
other => {
debug!(
self.log, "Invalid block received";