diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index 610081319..0c34eef27 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -54,6 +54,13 @@ impl BatchConfig for BackFillBatchConfig { fn max_batch_processing_attempts() -> u8 { MAX_BATCH_PROCESSING_ATTEMPTS } + fn batch_attempt_hash(blocks: &[SignedBeaconBlock]) -> u64 { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + let mut hasher = DefaultHasher::new(); + blocks.hash(&mut hasher); + hasher.finish() + } } /// Return type when attempting to start the backfill sync process. @@ -119,7 +126,7 @@ pub struct BackFillSync { /// Batches validated by this chain. validated_batches: u64, - /// We keep track of peer that are participating in the backfill sync. Unlike RangeSync, + /// We keep track of peers that are participating in the backfill sync. Unlike RangeSync, /// BackFillSync uses all synced peers to download the chain from. If BackFillSync fails, we don't /// want to penalize all our synced peers, so we use this variable to keep track of peers that /// have participated and only penalize these peers if backfill sync fails. @@ -539,7 +546,7 @@ impl BackFillSync { "error" => %e, "batch" => self.processing_target); // This is unlikely to happen but it would stall syncing since the batch now has no // blocks to continue, and the chain is expecting a processing result that won't - // arrive. To mitigate this, (fake) fail this processing so that the batch is + // arrive. To mitigate this, (fake) fail this processing so that the batch is // re-downloaded. self.on_batch_process_result( network, @@ -795,7 +802,7 @@ impl BackFillSync { for attempt in batch.attempts() { // The validated batch has been re-processed if attempt.hash != processed_attempt.hash { - // The re-downloaded version was different + // The re-downloaded version was different. if processed_attempt.peer_id != attempt.peer_id { // A different peer sent the correct batch, the previous peer did not // We negatively score the original peer. diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index 32f2a2636..960dd12af 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -131,7 +131,7 @@ pub enum SyncRequestType { RangeSync(Epoch, ChainId), } -/// The result of processing a multiple blocks (a chain segment). +/// The result of processing multiple blocks (a chain segment). #[derive(Debug)] pub enum BatchProcessResult { /// The batch was completed successfully. It carries whether the sent batch contained blocks. diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs index e0b15cb49..7239081ad 100644 --- a/beacon_node/network/src/sync/range_sync/batch.rs +++ b/beacon_node/network/src/sync/range_sync/batch.rs @@ -19,6 +19,34 @@ pub trait BatchConfig { fn max_batch_download_attempts() -> u8; /// The max batch processing attempts. fn max_batch_processing_attempts() -> u8; + /// Hashing function of a batch's attempt. Used for scoring purposes. + /// + /// When a batch fails processing, it is possible that the batch is wrong (faulty or + /// incomplete) or that a previous one is wrong. For this reason we need to re-download and + /// re-process the batches awaiting validation and the current one. Consider this scenario: + /// + /// ```ignore + /// BatchA BatchB BatchC BatchD + /// -----X Empty Empty Y----- + /// ``` + /// + /// BatchA declares that it refers X, but BatchD declares that it's first block is Y. There is no + /// way to know if BatchD is faulty/incomplete or if batches B and/or C are missing blocks. It is + /// also possible that BatchA belongs to a different chain to the rest starting in some block + /// midway in the batch's range. For this reason, the four batches would need to be re-downloaded + /// and re-processed. + /// + /// If batchD was actually good, it will still register two processing attempts for the same set of + /// blocks. In this case, we don't want to penalize the peer that provided the first version, since + /// it's equal to the successfully processed one. + /// + /// The function `batch_attempt_hash` provides a way to compare two batch attempts without + /// storing the full set of blocks. + /// + /// Note that simpler hashing functions considered in the past (hash of first block, hash of last + /// block, number of received blocks) are not good enough to differentiate attempts. For this + /// reason, we hash the complete set of blocks both in RangeSync and BackFillSync. + fn batch_attempt_hash(blocks: &[SignedBeaconBlock]) -> u64; } pub struct RangeSyncBatchConfig {} @@ -30,6 +58,11 @@ impl BatchConfig for RangeSyncBatchConfig { fn max_batch_processing_attempts() -> u8 { MAX_BATCH_PROCESSING_ATTEMPTS } + fn batch_attempt_hash(blocks: &[SignedBeaconBlock]) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + blocks.hash(&mut hasher); + hasher.finish() + } } /// Error type of a batch in a wrong state. @@ -300,7 +333,7 @@ impl BatchInfo { pub fn start_processing(&mut self) -> Result>, WrongState> { match self.state.poison() { BatchState::AwaitingProcessing(peer, blocks) => { - self.state = BatchState::Processing(Attempt::new(peer, &blocks)); + self.state = BatchState::Processing(Attempt::new::(peer, &blocks)); Ok(blocks) } BatchState::Poisoned => unreachable!("Poisoned batch"), @@ -386,11 +419,8 @@ pub struct Attempt { } impl Attempt { - #[allow(clippy::ptr_arg)] - fn new(peer_id: PeerId, blocks: &Vec>) -> Self { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - blocks.hash(&mut hasher); - let hash = hasher.finish(); + fn new(peer_id: PeerId, blocks: &[SignedBeaconBlock]) -> Self { + let hash = B::batch_attempt_hash(blocks); Attempt { peer_id, hash } } }