diff --git a/beacon_node/network/src/beacon_processor/mod.rs b/beacon_node/network/src/beacon_processor/mod.rs index 4aa7c7692..6d7375cca 100644 --- a/beacon_node/network/src/beacon_processor/mod.rs +++ b/beacon_node/network/src/beacon_processor/mod.rs @@ -75,7 +75,9 @@ mod work_reprocessing_queue; mod worker; use crate::beacon_processor::work_reprocessing_queue::QueuedBlock; -pub use worker::{ChainSegmentProcessId, GossipAggregatePackage, GossipAttestationPackage}; +pub use worker::{ + ChainSegmentProcessId, FailureMode, GossipAggregatePackage, GossipAttestationPackage, +}; /// The maximum size of the channel for work events to the `BeaconProcessor`. /// diff --git a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs index b367f7f6d..aa0184110 100644 --- a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs +++ b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs @@ -943,6 +943,16 @@ impl Worker { ); self.send_sync_message(SyncMessage::UnknownBlock(peer_id, block)); } + Err(e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::RequestFailed(_))) + | Err( + e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::NoExecutionConnection), + ) => { + debug!( + self.log, + "Failed to verify execution payload"; + "error" => %e + ); + } other => { debug!( self.log, diff --git a/beacon_node/network/src/beacon_processor/worker/mod.rs b/beacon_node/network/src/beacon_processor/worker/mod.rs index f907c49b7..04147245e 100644 --- a/beacon_node/network/src/beacon_processor/worker/mod.rs +++ b/beacon_node/network/src/beacon_processor/worker/mod.rs @@ -10,7 +10,7 @@ mod rpc_methods; mod sync_methods; pub use gossip_methods::{GossipAggregatePackage, GossipAttestationPackage}; -pub use sync_methods::ChainSegmentProcessId; +pub use sync_methods::{ChainSegmentProcessId, FailureMode}; pub(crate) const FUTURE_SLOT_TOLERANCE: u64 = 1; diff --git a/beacon_node/network/src/beacon_processor/worker/sync_methods.rs b/beacon_node/network/src/beacon_processor/worker/sync_methods.rs index 943ee9cda..04ed1ff60 100644 --- a/beacon_node/network/src/beacon_processor/worker/sync_methods.rs +++ b/beacon_node/network/src/beacon_processor/worker/sync_methods.rs @@ -6,6 +6,7 @@ use crate::beacon_processor::DuplicateCache; use crate::metrics; use crate::sync::manager::{BlockProcessType, SyncMessage}; use crate::sync::{BatchProcessResult, ChainId}; +use beacon_chain::ExecutionPayloadError; use beacon_chain::{ BeaconChainError, BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError, }; @@ -31,6 +32,15 @@ struct ChainSegmentFailed { message: String, /// Used to penalize peers. peer_action: Option, + /// Failure mode + mode: FailureMode, +} + +/// Represents if a block processing failure was on the consensus or execution side. +#[derive(Debug)] +pub enum FailureMode { + ExecutionLayer { pause_sync: bool }, + ConsensusLayer, } impl Worker { @@ -128,6 +138,7 @@ impl Worker { BatchProcessResult::Failed { imported_blocks: imported_blocks > 0, peer_action: e.peer_action, + mode: e.mode, } } } @@ -158,6 +169,7 @@ impl Worker { BatchProcessResult::Failed { imported_blocks: false, peer_action: e.peer_action, + mode: e.mode, } } } @@ -177,6 +189,7 @@ impl Worker { BatchProcessResult::Failed { imported_blocks: imported_blocks > 0, peer_action: e.peer_action, + mode: e.mode, } } (imported_blocks, Ok(_)) => { @@ -257,6 +270,7 @@ impl Worker { message: String::from("mismatched_block_root"), // The peer is faulty if they send blocks with bad roots. peer_action: Some(PeerAction::LowToleranceError), + mode: FailureMode::ConsensusLayer, } } HistoricalBlockError::InvalidSignature @@ -271,6 +285,7 @@ impl Worker { message: "invalid_signature".into(), // The peer is faulty if they bad signatures. peer_action: Some(PeerAction::LowToleranceError), + mode: FailureMode::ConsensusLayer, } } HistoricalBlockError::ValidatorPubkeyCacheTimeout => { @@ -284,6 +299,7 @@ impl Worker { message: "pubkey_cache_timeout".into(), // This is an internal error, do not penalize the peer. peer_action: None, + mode: FailureMode::ConsensusLayer, } } HistoricalBlockError::NoAnchorInfo => { @@ -294,6 +310,7 @@ impl Worker { // There is no need to do a historical sync, this is not a fault of // the peer. peer_action: None, + mode: FailureMode::ConsensusLayer, } } HistoricalBlockError::IndexOutOfBounds => { @@ -306,6 +323,7 @@ impl Worker { message: String::from("logic_error"), // This should never occur, don't penalize the peer. peer_action: None, + mode: FailureMode::ConsensusLayer, } } HistoricalBlockError::BlockOutOfRange { .. } => { @@ -318,6 +336,7 @@ impl Worker { message: String::from("unexpected_error"), // This should never occur, don't penalize the peer. peer_action: None, + mode: FailureMode::ConsensusLayer, } } }, @@ -327,6 +346,7 @@ impl Worker { message: format!("{:?}", other), // This is an internal error, don't penalize the peer. peer_action: None, + mode: FailureMode::ConsensusLayer, } } }; @@ -365,6 +385,7 @@ impl Worker { message: format!("Block has an unknown parent: {}", block.parent_root()), // Peers are faulty if they send non-sequential blocks. peer_action: Some(PeerAction::LowToleranceError), + mode: FailureMode::ConsensusLayer, }) } BlockError::BlockIsAlreadyKnown => { @@ -402,6 +423,7 @@ impl Worker { ), // Peers are faulty if they send blocks from the future. peer_action: Some(PeerAction::LowToleranceError), + mode: FailureMode::ConsensusLayer, }) } BlockError::WouldRevertFinalizedSlot { .. } => { @@ -423,8 +445,41 @@ impl Worker { message: format!("Internal error whilst processing block: {:?}", e), // Do not penalize peers for internal errors. peer_action: None, + mode: FailureMode::ConsensusLayer, }) } + BlockError::ExecutionPayloadError(e) => match &e { + ExecutionPayloadError::NoExecutionConnection { .. } + | ExecutionPayloadError::RequestFailed { .. } => { + // These errors indicate an issue with the EL and not the `ChainSegment`. + // Pause the syncing while the EL recovers + debug!(self.log, + "Execution layer verification failed"; + "outcome" => "pausing sync", + "err" => ?e + ); + Err(ChainSegmentFailed { + message: format!("Execution layer offline. Reason: {:?}", e), + // Do not penalize peers for internal errors. + peer_action: None, + mode: FailureMode::ExecutionLayer { pause_sync: true }, + }) + } + err => { + debug!(self.log, + "Invalid execution payload"; + "error" => ?err + ); + Err(ChainSegmentFailed { + message: format!( + "Peer sent a block containing invalid execution payload. Reason: {:?}", + err + ), + peer_action: Some(PeerAction::LowToleranceError), + mode: FailureMode::ExecutionLayer { pause_sync: false }, + }) + } + }, other => { debug!( self.log, "Invalid block received"; @@ -436,6 +491,7 @@ impl Worker { message: format!("Peer sent invalid block. Reason: {:?}", other), // Do not penalize peers for internal errors. peer_action: None, + mode: FailureMode::ConsensusLayer, }) } } diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index be750e25f..d6bb802a2 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -8,7 +8,7 @@ //! If a batch fails, the backfill sync cannot progress. In this scenario, we mark the backfill //! sync as failed, log an error and attempt to retry once a new peer joins the node. -use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent as BeaconWorkEvent}; +use crate::beacon_processor::{ChainSegmentProcessId, FailureMode, WorkEvent as BeaconWorkEvent}; use crate::sync::manager::{BatchProcessResult, Id}; use crate::sync::network_context::SyncNetworkContext; use crate::sync::range_sync::{BatchConfig, BatchId, BatchInfo, BatchProcessingResult, BatchState}; @@ -554,6 +554,7 @@ impl BackFillSync { imported_blocks: false, // The beacon processor queue is full, no need to penalize the peer. peer_action: None, + mode: FailureMode::ConsensusLayer, }, ) } else { @@ -638,6 +639,7 @@ impl BackFillSync { BatchProcessResult::Failed { imported_blocks, peer_action, + mode: _, } => { let batch = match self.batches.get_mut(&batch_id) { Some(v) => v, diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index ece923ef5..2770171be 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -1,7 +1,7 @@ use std::collections::hash_map::Entry; use std::time::Duration; -use beacon_chain::{BeaconChainTypes, BlockError}; +use beacon_chain::{BeaconChainTypes, BlockError, ExecutionPayloadError}; use fnv::FnvHashMap; use lighthouse_network::{PeerAction, PeerId}; use lru_cache::LRUTimeCache; @@ -10,7 +10,7 @@ use smallvec::SmallVec; use store::{Hash256, SignedBeaconBlock}; use tokio::sync::mpsc; -use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent}; +use crate::beacon_processor::{ChainSegmentProcessId, FailureMode, WorkEvent}; use crate::metrics; use self::{ @@ -420,6 +420,20 @@ impl BlockLookups { BlockError::ParentUnknown(block) => { self.search_parent(block, peer_id, cx); } + + e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::RequestFailed(_)) + | e @ BlockError::ExecutionPayloadError( + ExecutionPayloadError::NoExecutionConnection, + ) => { + // These errors indicate that the execution layer is offline + // and failed to validate the execution payload. Do not downscore peer. + debug!( + self.log, + "Single block lookup failed. Execution layer is offline"; + "root" => %root, + "error" => ?e + ); + } other => { warn!(self.log, "Peer sent invalid block in single block lookup"; "root" => %root, "error" => ?other, "peer_id" => %peer_id); cx.report_peer( @@ -506,6 +520,19 @@ impl BlockLookups { } } } + Err(e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::RequestFailed(_))) + | Err( + e @ BlockError::ExecutionPayloadError(ExecutionPayloadError::NoExecutionConnection), + ) => { + // These errors indicate that the execution layer is offline + // and failed to validate the execution payload. Do not downscore peer. + debug!( + self.log, + "Parent lookup failed. Execution layer is offline"; + "chain_hash" => %chain_hash, + "error" => ?e + ); + } Err(outcome) => { // all else we consider the chain a failure and downvote the peer that sent // us the last block @@ -561,11 +588,21 @@ impl BlockLookups { BatchProcessResult::Failed { imported_blocks: _, peer_action, + mode, } => { - self.failed_chains.insert(parent_lookup.chain_hash()); - if let Some(peer_action) = peer_action { - for &peer_id in parent_lookup.used_peers() { - cx.report_peer(peer_id, peer_action, "parent_chain_failure") + if let FailureMode::ExecutionLayer { pause_sync: _ } = mode { + debug!( + self.log, + "Chain segment processing failed. Execution layer is offline"; + "chain_hash" => %chain_hash, + "error" => ?mode + ); + } else { + self.failed_chains.insert(parent_lookup.chain_hash()); + if let Some(peer_action) = peer_action { + for &peer_id in parent_lookup.used_peers() { + cx.report_peer(peer_id, peer_action, "parent_chain_failure") + } } } } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 0003db6ab..311fbf67c 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -38,7 +38,7 @@ use super::block_lookups::BlockLookups; use super::network_context::SyncNetworkContext; use super::peer_sync_info::{remote_sync_type, PeerSyncType}; use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH}; -use crate::beacon_processor::{ChainSegmentProcessId, WorkEvent as BeaconWorkEvent}; +use crate::beacon_processor::{ChainSegmentProcessId, FailureMode, WorkEvent as BeaconWorkEvent}; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError}; @@ -137,6 +137,7 @@ pub enum BatchProcessResult { Failed { imported_blocks: bool, peer_action: Option, + mode: FailureMode, }, } diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index 88837d0e1..0f5d63ea6 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -1,6 +1,6 @@ use super::batch::{BatchInfo, BatchProcessingResult, BatchState}; -use crate::beacon_processor::ChainSegmentProcessId; use crate::beacon_processor::WorkEvent as BeaconWorkEvent; +use crate::beacon_processor::{ChainSegmentProcessId, FailureMode}; use crate::sync::{manager::Id, network_context::SyncNetworkContext, BatchProcessResult}; use beacon_chain::BeaconChainTypes; use fnv::FnvHashMap; @@ -320,6 +320,7 @@ impl SyncingChain { &BatchProcessResult::Failed { imported_blocks: false, peer_action: None, + mode: FailureMode::ConsensusLayer, }, ) } else { @@ -499,6 +500,7 @@ impl SyncingChain { BatchProcessResult::Failed { imported_blocks, peer_action, + mode: _, } => { let batch = self.batches.get_mut(&batch_id).ok_or_else(|| { RemoveChain::WrongChainState(format!(