From aaa5344eab2c0bda90d0d4da3710982c05396814 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Wed, 12 Jan 2022 05:32:14 +0000 Subject: [PATCH] Add peer score adjustment msgs (#2901) ## Issue Addressed N/A ## Proposed Changes This PR adds the `msg` field to `Peer score adjusted` log messages. These `msg` fields help identify *why* a peer was banned. Example: ``` Jan 11 04:18:48.096 DEBG Peer score adjusted score: -100.00, peer_id: 16Uiu2HAmQskxKWWGYfginwZ51n5uDbhvjHYnvASK7PZ5gBdLmzWj, msg: attn_unknown_head, service: libp2p Jan 11 04:18:48.096 DEBG Peer score adjusted score: -27.86, peer_id: 16Uiu2HAmA7cCb3MemVDbK3MHZoSb7VN3cFUG3vuSZgnGesuVhPDE, msg: sync_past_slot, service: libp2p Jan 11 04:18:48.096 DEBG Peer score adjusted score: -100.00, peer_id: 16Uiu2HAmQskxKWWGYfginwZ51n5uDbhvjHYnvASK7PZ5gBdLmzWj, msg: attn_unknown_head, service: libp2p Jan 11 04:18:48.096 DEBG Peer score adjusted score: -28.86, peer_id: 16Uiu2HAmA7cCb3MemVDbK3MHZoSb7VN3cFUG3vuSZgnGesuVhPDE, msg: sync_past_slot, service: libp2p Jan 11 04:18:48.096 DEBG Peer score adjusted score: -29.86, peer_id: 16Uiu2HAmA7cCb3MemVDbK3MHZoSb7VN3cFUG3vuSZgnGesuVhPDE, msg: sync_past_slot, service: libp2p ``` There is also a `libp2p_report_peer_msgs_total` metrics which allows us to see count of reports per `msg` tag. ## Additional Info NA --- .../src/attestation_verification.rs | 18 -- .../lighthouse_network/src/behaviour/mod.rs | 1 + beacon_node/lighthouse_network/src/metrics.rs | 9 + .../src/peer_manager/mod.rs | 19 +- .../src/peer_manager/peerdb.rs | 108 ++++++-- beacon_node/lighthouse_network/src/service.rs | 10 +- .../lighthouse_network/tests/pm_tests.rs | 3 +- .../beacon_processor/worker/gossip_methods.rs | 249 ++++++++++++++---- beacon_node/network/src/service.rs | 3 +- .../network/src/sync/backfill_sync/mod.rs | 14 +- beacon_node/network/src/sync/manager.rs | 37 ++- .../network/src/sync/network_context.rs | 3 +- .../network/src/sync/range_sync/chain.rs | 14 +- 13 files changed, 378 insertions(+), 110 deletions(-) diff --git a/beacon_node/beacon_chain/src/attestation_verification.rs b/beacon_node/beacon_chain/src/attestation_verification.rs index fb05ef755..6692aa48c 100644 --- a/beacon_node/beacon_chain/src/attestation_verification.rs +++ b/beacon_node/beacon_chain/src/attestation_verification.rs @@ -183,24 +183,6 @@ pub enum Error { /// single-participant attestation from this validator for this epoch and should not observe /// another. PriorAttestationKnown { validator_index: u64, epoch: Epoch }, - /// The attestation is for an epoch in the future (with respect to the gossip clock disparity). - /// - /// ## Peer scoring - /// - /// Assuming the local clock is correct, the peer has sent an invalid message. - FutureEpoch { - attestation_epoch: Epoch, - current_epoch: Epoch, - }, - /// The attestation is for an epoch in the past (with respect to the gossip clock disparity). - /// - /// ## Peer scoring - /// - /// Assuming the local clock is correct, the peer has sent an invalid message. - PastEpoch { - attestation_epoch: Epoch, - current_epoch: Epoch, - }, /// The attestation is attesting to a state that is later than itself. (Viz., attesting to the /// future). /// diff --git a/beacon_node/lighthouse_network/src/behaviour/mod.rs b/beacon_node/lighthouse_network/src/behaviour/mod.rs index f14d24aac..32a87166b 100644 --- a/beacon_node/lighthouse_network/src/behaviour/mod.rs +++ b/beacon_node/lighthouse_network/src/behaviour/mod.rs @@ -887,6 +887,7 @@ impl NetworkBehaviourEventProcess for Behaviour< PeerAction::LowToleranceError, ReportSource::Gossipsub, Some(GoodbyeReason::Unknown), + "does_not_support_gossipsub", ); } } diff --git a/beacon_node/lighthouse_network/src/metrics.rs b/beacon_node/lighthouse_network/src/metrics.rs index b8fd8c584..1dfe0448b 100644 --- a/beacon_node/lighthouse_network/src/metrics.rs +++ b/beacon_node/lighthouse_network/src/metrics.rs @@ -106,6 +106,15 @@ lazy_static! { /// The number of peers that we dialed us. pub static ref NETWORK_OUTBOUND_PEERS: Result = try_create_int_gauge("network_outbound_peers","The number of peers that are currently connected that we dialed."); + + /* + * Peer Reporting + */ + pub static ref REPORT_PEER_MSGS: Result = try_create_int_counter_vec( + "libp2p_report_peer_msgs_total", + "Number of peer reports per msg", + &["msg"] + ); } /// Checks if we consider the NAT open. diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index 202738c25..318bdfcdf 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -155,7 +155,13 @@ impl PeerManager { } } - self.report_peer(peer_id, PeerAction::Fatal, source, Some(reason)); + self.report_peer( + peer_id, + PeerAction::Fatal, + source, + Some(reason), + "goodbye_peer", + ); } /// Reports a peer for some action. @@ -167,12 +173,13 @@ impl PeerManager { action: PeerAction, source: ReportSource, reason: Option, + msg: &'static str, ) { let action = self .network_globals .peers .write() - .report_peer(peer_id, action, source); + .report_peer(peer_id, action, source, msg); self.handle_score_action(peer_id, action, reason); } @@ -511,7 +518,13 @@ impl PeerManager { RPCError::Disconnected => return, // No penalty for a graceful disconnection }; - self.report_peer(peer_id, peer_action, ReportSource::RPC, None); + self.report_peer( + peer_id, + peer_action, + ReportSource::RPC, + None, + "handle_rpc_error", + ); } /// A ping request has been received. diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs index 81c03eaf7..f70f35b68 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs @@ -490,7 +490,10 @@ impl PeerDB { peer_id: &PeerId, action: PeerAction, source: ReportSource, + msg: &'static str, ) -> ScoreUpdateResult { + metrics::inc_counter_vec(&metrics::REPORT_PEER_MSGS, &[msg]); + match self.peers.get_mut(peer_id) { Some(info) => { let previous_state = info.score_state(); @@ -502,7 +505,13 @@ impl PeerDB { let result = Self::handle_score_transition(previous_state, peer_id, info, &self.log); if previous_state == info.score_state() { - debug!(self.log, "Peer score adjusted"; "peer_id" => %peer_id, "score" => %info.score()); + debug!( + self.log, + "Peer score adjusted"; + "msg" => %msg, + "peer_id" => %peer_id, + "score" => %info.score() + ); } match result { ScoreTransitionResult::Banned => { @@ -522,13 +531,23 @@ impl PeerDB { } ScoreTransitionResult::NoAction => ScoreUpdateResult::NoAction, ScoreTransitionResult::Unbanned => { - error!(self.log, "Report peer action lead to an unbanning"; "peer_id" => %peer_id); + error!( + self.log, + "Report peer action lead to an unbanning"; + "msg" => %msg, + "peer_id" => %peer_id + ); ScoreUpdateResult::NoAction } } } None => { - debug!(self.log, "Reporting a peer that doesn't exist"; "peer_id" =>%peer_id); + debug!( + self.log, + "Reporting a peer that doesn't exist"; + "msg" => %msg, + "peer_id" =>%peer_id + ); ScoreUpdateResult::NoAction } } @@ -1357,7 +1376,7 @@ mod tests { assert_eq!(pdb.banned_peers_count.banned_peers(), 0); for p in pdb.connected_peer_ids().cloned().collect::>() { - let _ = pdb.report_peer(&p, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer(&p, PeerAction::Fatal, ReportSource::PeerManager, ""); pdb.inject_disconnect(&p); } @@ -1426,9 +1445,19 @@ mod tests { pdb.inject_disconnect(&random_peer); assert_eq!(pdb.disconnected_peers, pdb.disconnected_peers().count()); - let _ = pdb.report_peer(&random_peer, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); pdb.inject_disconnect(&random_peer); - let _ = pdb.report_peer(&random_peer, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); assert_eq!(pdb.disconnected_peers, pdb.disconnected_peers().count()); pdb.inject_disconnect(&random_peer); assert_eq!(pdb.disconnected_peers, pdb.disconnected_peers().count()); @@ -1481,7 +1510,12 @@ mod tests { pdb.disconnected_peers, pdb.banned_peers_count.banned_peers ); // Disconnect and ban peer 2 - let _ = pdb.report_peer(&random_peer2, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer2, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); // Should be 1 disconnected peer and one peer in the process of being disconnected println!( "3:{},{}", @@ -1495,7 +1529,12 @@ mod tests { pdb.disconnected_peers, pdb.banned_peers_count.banned_peers ); // Now that the peer is disconnected, register the ban. - let _ = pdb.report_peer(&random_peer2, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer2, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); // There should be 1 disconnected peer and one banned peer. println!( "5:{},{}", @@ -1509,7 +1548,12 @@ mod tests { pdb.banned_peers().count() ); // Now ban peer 1. - let _ = pdb.report_peer(&random_peer1, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer1, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); // There should be no disconnected peers and 2 banned peers println!( "6:{},{}", @@ -1523,7 +1567,12 @@ mod tests { pdb.disconnected_peers, pdb.banned_peers_count.banned_peers ); // Same thing here. - let _ = pdb.report_peer(&random_peer1, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer1, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); println!( "8:{},{}", pdb.disconnected_peers, pdb.banned_peers_count.banned_peers @@ -1559,7 +1608,12 @@ mod tests { ); // Ban peer 3 - let _ = pdb.report_peer(&random_peer3, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer3, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); pdb.inject_disconnect(&random_peer3); // This should add a new banned peer, there should be 0 disconnected and 2 banned @@ -1576,7 +1630,12 @@ mod tests { ); // Ban peer 3 - let _ = pdb.report_peer(&random_peer3, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer3, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); pdb.inject_disconnect(&random_peer3); // Should still have 2 banned peers @@ -1606,7 +1665,12 @@ mod tests { ); // Ban peer 3 - let _ = pdb.report_peer(&random_peer3, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer3, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); pdb.inject_disconnect(&random_peer3); // Should have 1 disconnect (peer 2) and one banned (peer 3) @@ -1657,7 +1721,12 @@ mod tests { ); // Ban peer 0 - let _ = pdb.report_peer(&random_peer, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer( + &random_peer, + PeerAction::Fatal, + ReportSource::PeerManager, + "", + ); pdb.inject_disconnect(&random_peer); // Should have 1 disconnect ( peer 2) and two banned (peer0, peer 3) @@ -1709,7 +1778,7 @@ mod tests { let p5 = connect_peer_with_ips(&mut pdb, vec![ip5]); for p in &peers[..BANNED_PEERS_PER_IP_THRESHOLD + 1] { - let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager, ""); pdb.inject_disconnect(p); } @@ -1725,6 +1794,7 @@ mod tests { &peers[BANNED_PEERS_PER_IP_THRESHOLD + 1], PeerAction::Fatal, ReportSource::PeerManager, + "", ); pdb.inject_disconnect(&peers[BANNED_PEERS_PER_IP_THRESHOLD + 1]); @@ -1777,7 +1847,7 @@ mod tests { // ban all peers for p in &peers { - let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager, ""); pdb.inject_disconnect(p); } @@ -1806,7 +1876,7 @@ mod tests { socker_addr.push(Protocol::Tcp(8080)); for p in &peers { pdb.connect_ingoing(p, socker_addr.clone(), None); - let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager, ""); pdb.inject_disconnect(p); } @@ -1823,7 +1893,7 @@ mod tests { // reban every peer except one for p in &peers[1..] { - let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer(p, PeerAction::Fatal, ReportSource::PeerManager, ""); pdb.inject_disconnect(p); } @@ -1832,7 +1902,7 @@ mod tests { assert!(!pdb.ban_status(&p2).is_banned()); // reban last peer - let _ = pdb.report_peer(&peers[0], PeerAction::Fatal, ReportSource::PeerManager); + let _ = pdb.report_peer(&peers[0], PeerAction::Fatal, ReportSource::PeerManager, ""); pdb.inject_disconnect(&peers[0]); //Ip's are banned again diff --git a/beacon_node/lighthouse_network/src/service.rs b/beacon_node/lighthouse_network/src/service.rs index 23c198290..cbb11cae4 100644 --- a/beacon_node/lighthouse_network/src/service.rs +++ b/beacon_node/lighthouse_network/src/service.rs @@ -280,11 +280,17 @@ impl Service { } /// Report a peer's action. - pub fn report_peer(&mut self, peer_id: &PeerId, action: PeerAction, source: ReportSource) { + pub fn report_peer( + &mut self, + peer_id: &PeerId, + action: PeerAction, + source: ReportSource, + msg: &'static str, + ) { self.swarm .behaviour_mut() .peer_manager_mut() - .report_peer(peer_id, action, source, None); + .report_peer(peer_id, action, source, None, msg); } /// Disconnect and ban a peer, providing a reason. diff --git a/beacon_node/lighthouse_network/tests/pm_tests.rs b/beacon_node/lighthouse_network/tests/pm_tests.rs index 96f91797a..9b26e4939 100644 --- a/beacon_node/lighthouse_network/tests/pm_tests.rs +++ b/beacon_node/lighthouse_network/tests/pm_tests.rs @@ -167,7 +167,8 @@ async fn banned_peers_consistency() { &peer_id, PeerAction::Fatal, ReportSource::Processor, - None + None, + "" ); }, _ => {} diff --git a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs index 1b7ef7aa9..2b6ac02b6 100644 --- a/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs +++ b/beacon_node/network/src/beacon_processor/worker/gossip_methods.rs @@ -180,11 +180,12 @@ impl Worker { /* Auxiliary functions */ /// Penalizes a peer for misbehaviour. - fn gossip_penalize_peer(&self, peer_id: PeerId, action: PeerAction) { + fn gossip_penalize_peer(&self, peer_id: PeerId, action: PeerAction, msg: &'static str) { self.send_network_message(NetworkMessage::ReportPeer { peer_id, action, source: ReportSource::Gossipsub, + msg, }) } @@ -738,16 +739,24 @@ impl Worker { self.send_sync_message(SyncMessage::UnknownBlock(peer_id, block)); return None; } + Err(e @ BlockError::BeaconChainError(_)) => { + debug!( + self.log, + "Gossip block beacon chain error"; + "error" => ?e, + ); + self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); + return None; + } Err(e @ BlockError::FutureSlot { .. }) | Err(e @ BlockError::WouldRevertFinalizedSlot { .. }) | Err(e @ BlockError::BlockIsAlreadyKnown) | Err(e @ BlockError::RepeatProposal { .. }) - | Err(e @ BlockError::NotFinalizedDescendant { .. }) - | Err(e @ BlockError::BeaconChainError(_)) => { + | Err(e @ BlockError::NotFinalizedDescendant { .. }) => { debug!(self.log, "Could not verify block for gossip, ignoring the block"; "error" => %e); // Prevent recurring behaviour by penalizing the peer slightly. - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError, "gossip_block_high"); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); return None; } @@ -780,7 +789,7 @@ impl Worker { warn!(self.log, "Could not verify block for gossip, rejecting the block"; "error" => %e); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError, "gossip_block_low"); return None; } }; @@ -931,7 +940,11 @@ impl Worker { "block root" => ?block.canonical_root(), "block slot" => block.slot() ); - self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::MidToleranceError, + "bad_gossip_block_ssz", + ); trace!( self.log, "Invalid gossip beacon block ssz"; @@ -973,7 +986,11 @@ impl Worker { // the fault on the peer. self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); // We still penalize a peer slightly to prevent overuse of invalids. - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "invalid_gossip_exit", + ); return; } }; @@ -1032,7 +1049,11 @@ impl Worker { self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); // Penalize peer slightly for invalids. - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "invalid_gossip_proposer_slashing", + ); return; } }; @@ -1083,7 +1104,11 @@ impl Worker { ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); // Penalize peer slightly for invalids. - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "invalid_gossip_attester_slashing", + ); return; } }; @@ -1248,9 +1273,7 @@ impl Worker { let attestation_type = failed_att.kind(); metrics::register_attestation_error(&error); match &error { - AttnError::FutureEpoch { .. } - | AttnError::PastEpoch { .. } - | AttnError::FutureSlot { .. } => { + AttnError::FutureSlot { .. } => { /* * These errors can be triggered by a mismatch between our slot and the peer. * @@ -1267,7 +1290,11 @@ impl Worker { // Peers that are slow or not to spec can spam us with these messages draining our // bandwidth. We therefore penalize these peers when they do this. - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_future_slot", + ); // Do not propagate these messages. self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); @@ -1285,7 +1312,11 @@ impl Worker { // Only penalize the peer if it would have been invalid at the moment we received // it. if hindsight_verification.is_err() { - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_past_slot", + ); } self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); @@ -1297,7 +1328,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_selection_proof", + ); } AttnError::EmptyAggregationBitfield => { /* @@ -1307,7 +1342,11 @@ impl Worker { * */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_empty_agg_bitfield", + ); } AttnError::AggregatorPubkeyUnknown(_) => { /* @@ -1324,7 +1363,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_agg_pubkey", + ); } AttnError::AggregatorNotInCommittee { .. } => { /* @@ -1341,7 +1384,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_agg_not_in_committee", + ); } AttnError::AttestationAlreadyKnown { .. } => { /* @@ -1417,7 +1464,11 @@ impl Worker { "type" => ?attestation_type, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_val_index_too_high", + ); } AttnError::UnknownHeadBlock { beacon_block_root } => { trace!( @@ -1482,7 +1533,11 @@ impl Worker { } else { // We shouldn't make any further attempts to process this attestation. // Downscore the peer. - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_unknown_head", + ); self.propagate_validation_result( message_id, peer_id, @@ -1510,7 +1565,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_unknown_target", + ); } AttnError::BadTargetEpoch => { /* @@ -1520,7 +1579,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_bad_target", + ); } AttnError::NoCommitteeForSlotAndIndex { .. } => { /* @@ -1529,7 +1592,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_no_committee", + ); } AttnError::NotExactlyOneAggregationBitSet(_) => { /* @@ -1538,7 +1605,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_too_many_agg_bits", + ); } AttnError::AttestsToFutureBlock { .. } => { /* @@ -1547,7 +1618,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_future_block", + ); } AttnError::InvalidSubnetId { received, expected } => { /* @@ -1560,7 +1635,11 @@ impl Worker { "received" => ?received, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_invalid_subnet_id", + ); } AttnError::Invalid(_) => { /* @@ -1569,7 +1648,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_invalid_state_processing", + ); } AttnError::InvalidTargetEpoch { .. } => { /* @@ -1578,7 +1661,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_invalid_target_epoch", + ); } AttnError::InvalidTargetRoot { .. } => { /* @@ -1587,7 +1674,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "attn_invalid_target_root", + ); } AttnError::TooManySkippedSlots { head_block_slot, @@ -1607,7 +1698,11 @@ impl Worker { // In this case we wish to penalize gossipsub peers that do this to avoid future // attestations that have too many skip slots. self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::MidToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::MidToleranceError, + "attn_too_many_skipped_slots", + ); } AttnError::BeaconChainError(BeaconChainError::DBError(Error::HotColdDBError( HotColdDBError::AttestationStateIsFinalized { .. }, @@ -1630,8 +1725,6 @@ impl Worker { "error" => ?e, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); - // Penalize the peer slightly - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); } } @@ -1675,7 +1768,11 @@ impl Worker { // Unlike attestations, we have a zero slot buffer in case of sync committee messages, // so we don't penalize heavily. - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "sync_future_slot", + ); // Do not propagate these messages. self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); @@ -1719,7 +1816,11 @@ impl Worker { // Penalize the peer if the message was more than one slot late if excessively_late && invalid_in_hindsight() { - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "sync_past_slot", + ); } self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); @@ -1732,7 +1833,11 @@ impl Worker { * */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_empty_agg_bitfield", + ); } SyncCommitteeError::InvalidSelectionProof { .. } | SyncCommitteeError::InvalidSignature => { @@ -1742,7 +1847,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_invalid_proof_or_sig", + ); } SyncCommitteeError::AggregatorNotInCommittee { .. } | SyncCommitteeError::AggregatorPubkeyUnknown(_) => { @@ -1753,7 +1862,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_bad_aggregator", + ); } SyncCommitteeError::SyncContributionAlreadyKnown(_) | SyncCommitteeError::AggregatorAlreadyKnown(_) => { @@ -1786,7 +1899,11 @@ impl Worker { "type" => ?message_type, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_unknown_validator", + ); } SyncCommitteeError::UnknownValidatorPubkey(_) => { debug!( @@ -1796,7 +1913,11 @@ impl Worker { "type" => ?message_type, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_unknown_validator_pubkey", + ); } SyncCommitteeError::InvalidSubnetId { received, expected } => { /* @@ -1809,7 +1930,11 @@ impl Worker { "received" => ?received, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_invalid_subnet_id", + ); } SyncCommitteeError::Invalid(_) => { /* @@ -1818,7 +1943,11 @@ impl Worker { * The peer has published an invalid consensus message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_invalid_state_processing", + ); } SyncCommitteeError::PriorSyncCommitteeMessageKnown { .. } => { /* @@ -1834,7 +1963,11 @@ impl Worker { ); // We still penalize the peer slightly. We don't want this to be a recurring // behaviour. - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "sync_prior_known", + ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); @@ -1855,8 +1988,6 @@ impl Worker { "error" => ?e, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); - // Penalize the peer slightly - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); } SyncCommitteeError::BeaconStateError(e) => { /* @@ -1874,7 +2005,11 @@ impl Worker { ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); // Penalize the peer slightly - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "sync_beacon_state_error", + ); } SyncCommitteeError::ContributionError(e) => { error!( @@ -1885,7 +2020,11 @@ impl Worker { ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); // Penalize the peer slightly - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "sync_contribution_error", + ); } SyncCommitteeError::SyncCommitteeError(e) => { error!( @@ -1896,7 +2035,11 @@ impl Worker { ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); // Penalize the peer slightly - self.gossip_penalize_peer(peer_id, PeerAction::HighToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::HighToleranceError, + "sync_committee_error", + ); } SyncCommitteeError::ArithError(e) => { /* @@ -1909,7 +2052,11 @@ impl Worker { "error" => ?e, ); self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_arith_error", + ); } SyncCommitteeError::InvalidSubcommittee { .. } => { /* @@ -1917,7 +2064,11 @@ impl Worker { an invalid message. */ self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Reject); - self.gossip_penalize_peer(peer_id, PeerAction::LowToleranceError); + self.gossip_penalize_peer( + peer_id, + PeerAction::LowToleranceError, + "sync_invalid_subcommittee", + ); } } debug!( diff --git a/beacon_node/network/src/service.rs b/beacon_node/network/src/service.rs index 485b0a98f..35cf3fa90 100644 --- a/beacon_node/network/src/service.rs +++ b/beacon_node/network/src/service.rs @@ -96,6 +96,7 @@ pub enum NetworkMessage { peer_id: PeerId, action: PeerAction, source: ReportSource, + msg: &'static str, }, /// Disconnect an ban a peer, providing a reason. GoodbyePeer { @@ -445,7 +446,7 @@ fn spawn_service( ); service.libp2p.swarm.behaviour_mut().publish(messages); } - NetworkMessage::ReportPeer { peer_id, action, source } => service.libp2p.report_peer(&peer_id, action, source), + NetworkMessage::ReportPeer { peer_id, action, source, msg } => service.libp2p.report_peer(&peer_id, action, source, msg), NetworkMessage::GoodbyePeer { peer_id, reason, source } => service.libp2p.goodbye_peer(&peer_id, reason, source), NetworkMessage::AttestationSubscribe { subscriptions } => { if let Err(e) = service diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index fc94eaca0..610081319 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -682,7 +682,7 @@ impl BackFillSync { if let Some(peer_action) = peer_action { for peer in self.participating_peers.drain() { - network.report_peer(peer, *peer_action); + network.report_peer(peer, *peer_action, "backfill_batch_failed"); } } self.fail_sync(BackFillError::BatchProcessingFailed(batch_id)) @@ -804,7 +804,11 @@ impl BackFillSync { "batch_epoch" => id, "score_adjustment" => %action, "original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id ); - network.report_peer(attempt.peer_id, action); + network.report_peer( + attempt.peer_id, + action, + "backfill_reprocessed_original_peer", + ); } else { // The same peer corrected it's previous mistake. There was an error, so we // negative score the original peer. @@ -813,7 +817,11 @@ impl BackFillSync { "batch_epoch" => id, "score_adjustment" => %action, "original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id ); - network.report_peer(attempt.peer_id, action); + network.report_peer( + attempt.peer_id, + action, + "backfill_reprocessed_same_peer", + ); } } } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index f9055665c..32f2a2636 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -369,8 +369,11 @@ impl SyncManager { } else { crit!(self.log, "Parent chain has no blocks"); } - self.network - .report_peer(peer_id, PeerAction::MidToleranceError); + self.network.report_peer( + peer_id, + PeerAction::MidToleranceError, + "bbroot_failed_chains", + ); return; } // add the block to response @@ -388,8 +391,11 @@ impl SyncManager { // tolerate this behaviour. if !single_block_request.block_returned { warn!(self.log, "Peer didn't respond with a block it referenced"; "referenced_block_hash" => %single_block_request.hash, "peer_id" => %peer_id); - self.network - .report_peer(peer_id, PeerAction::MidToleranceError); + self.network.report_peer( + peer_id, + PeerAction::MidToleranceError, + "bbroot_no_block", + ); } return; } @@ -512,8 +518,11 @@ impl SyncManager { warn!(self.log, "Single block lookup failed"; "outcome" => ?outcome); // This could be a range of errors. But we couldn't process the block. // For now we consider this a mid tolerance error. - self.network - .report_peer(peer_id, PeerAction::MidToleranceError); + self.network.report_peer( + peer_id, + PeerAction::MidToleranceError, + "single_block_lookup_failed", + ); } } } @@ -836,8 +845,11 @@ impl SyncManager { self.request_parent(parent_request); // We do not tolerate these kinds of errors. We will accept a few but these are signs // of a faulty peer. - self.network - .report_peer(peer, PeerAction::LowToleranceError); + self.network.report_peer( + peer, + PeerAction::LowToleranceError, + "parent_request_bad_hash", + ); } else { // The last block in the queue is the only one that has not attempted to be processed yet. // @@ -907,6 +919,7 @@ impl SyncManager { self.network.report_peer( parent_request.last_submitted_peer, PeerAction::MidToleranceError, + "parent_request_err", ); } } @@ -945,6 +958,7 @@ impl SyncManager { self.network.report_peer( parent_request.last_submitted_peer, PeerAction::LowToleranceError, + "request_parent_import_failed", ); return; // drop the request } @@ -1112,8 +1126,11 @@ impl SyncManager { // A peer sent an object (block or attestation) that referenced a parent. // The processing of this chain failed. self.failed_chains.insert(chain_head); - self.network - .report_peer(peer_id, PeerAction::MidToleranceError); + self.network.report_peer( + peer_id, + PeerAction::MidToleranceError, + "parent_lookup_failed", + ); } } } diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index e991e86e0..9415f2100 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -170,13 +170,14 @@ impl SyncNetworkContext { } /// Reports to the scoring algorithm the behaviour of a peer. - pub fn report_peer(&mut self, peer_id: PeerId, action: PeerAction) { + pub fn report_peer(&mut self, peer_id: PeerId, action: PeerAction, msg: &'static str) { debug!(self.log, "Sync reporting peer"; "peer_id" => %peer_id, "action" => %action); self.network_send .send(NetworkMessage::ReportPeer { peer_id, action, source: ReportSource::SyncService, + msg, }) .unwrap_or_else(|e| { warn!(self.log, "Could not report peer, channel failed"; "error"=> %e); diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index 4b8980899..4474f1cc3 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -533,7 +533,7 @@ impl SyncingChain { if let Some(peer_action) = peer_action { for (peer, _) in self.peers.drain() { - network.report_peer(peer, *peer_action); + network.report_peer(peer, *peer_action, "batch_failed"); } } Err(RemoveChain::ChainFailed(batch_id)) @@ -624,7 +624,11 @@ impl SyncingChain { "batch_epoch" => id, "score_adjustment" => %action, "original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id ); - network.report_peer(attempt.peer_id, action); + network.report_peer( + attempt.peer_id, + action, + "batch_reprocessed_original_peer", + ); } else { // The same peer corrected it's previous mistake. There was an error, so we // negative score the original peer. @@ -633,7 +637,11 @@ impl SyncingChain { "batch_epoch" => id, "score_adjustment" => %action, "original_peer" => %attempt.peer_id, "new_peer" => %processed_attempt.peer_id ); - network.report_peer(attempt.peer_id, action); + network.report_peer( + attempt.peer_id, + action, + "batch_reprocessed_same_peer", + ); } } }