More sync updates (#1791)

## Issue Addressed
#1614 and a couple of sync-stalling problems, the most important is a cyclic dependency between the sync manager and the peer manager
This commit is contained in:
divma 2020-10-20 22:34:18 +00:00
parent 703c33bdc7
commit 2acf75785c
10 changed files with 397 additions and 469 deletions

View File

@ -35,8 +35,8 @@ use score::{PeerAction, ScoreState};
use std::collections::HashMap; use std::collections::HashMap;
/// The time in seconds between re-status's peers. /// The time in seconds between re-status's peers.
const STATUS_INTERVAL: u64 = 300; const STATUS_INTERVAL: u64 = 300;
/// The time in seconds between PING events. We do not send a ping if the other peer as PING'd us within /// The time in seconds between PING events. We do not send a ping if the other peer has PING'd us
/// this time frame (Seconds) /// within this time frame (Seconds)
const PING_INTERVAL: u64 = 30; const PING_INTERVAL: u64 = 30;
/// The heartbeat performs regular updates such as updating reputations and performing discovery /// The heartbeat performs regular updates such as updating reputations and performing discovery
@ -831,20 +831,16 @@ impl<TSpec: EthSpec> Stream for PeerManager<TSpec> {
} }
} }
// We don't want to update peers during syncing, since this may result in a new chain being loop {
// synced which leads to inefficient re-downloads of blocks. match self.status_peers.poll_next_unpin(cx) {
if !self.network_globals.is_syncing() { Poll::Ready(Some(Ok(peer_id))) => {
loop { self.status_peers.insert(peer_id.clone());
match self.status_peers.poll_next_unpin(cx) { self.events.push(PeerManagerEvent::Status(peer_id))
Poll::Ready(Some(Ok(peer_id))) => {
self.status_peers.insert(peer_id.clone());
self.events.push(PeerManagerEvent::Status(peer_id))
}
Poll::Ready(Some(Err(e))) => {
error!(self.log, "Failed to check for peers to ping"; "error" => e.to_string())
}
Poll::Ready(None) | Poll::Pending => break,
} }
Poll::Ready(Some(Err(e))) => {
error!(self.log, "Failed to check for peers to ping"; "error" => e.to_string())
}
Poll::Ready(None) | Poll::Pending => break,
} }
} }

View File

@ -110,25 +110,8 @@ impl<TSpec: EthSpec> NetworkGlobals<TSpec> {
/// Updates the syncing state of the node. /// Updates the syncing state of the node.
/// ///
/// If there is a new state, the old state and the new states are returned. /// The old state is returned
pub fn update_sync_state(&self) -> Option<(SyncState, SyncState)> { pub fn set_sync_state(&self, new_state: SyncState) -> SyncState {
let mut result = None; std::mem::replace(&mut *self.sync_state.write(), new_state)
// if we are in a range sync, nothing changes. Range sync will update this.
if !self.is_syncing() {
let new_state = self
.peers
.read()
.synced_peers()
.next()
.map(|_| SyncState::Synced)
.unwrap_or_else(|| SyncState::Stalled);
let mut peer_state = self.sync_state.write();
if new_state != *peer_state {
result = Some((peer_state.clone(), new_state.clone()));
}
*peer_state = new_state;
}
result
} }
} }

View File

@ -1,19 +1,15 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use types::{Hash256, Slot}; use types::Slot;
/// The current state of the node. /// The current state of the node.
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub enum SyncState { pub enum SyncState {
/// The node is performing a long-range (batch) sync over a finalized chain. /// The node is performing a long-range (batch) sync over a finalized chain.
/// In this state, parent lookups are disabled. /// In this state, parent lookups are disabled.
SyncingFinalized { SyncingFinalized { start_slot: Slot, target_slot: Slot },
start_slot: Slot,
head_slot: Slot,
head_root: Hash256,
},
/// The node is performing a long-range (batch) sync over one or many head chains. /// The node is performing a long-range (batch) sync over one or many head chains.
/// In this state parent lookups are disabled. /// In this state parent lookups are disabled.
SyncingHead { start_slot: Slot, head_slot: Slot }, SyncingHead { start_slot: Slot, target_slot: Slot },
/// The node is up to date with all known peers and is connected to at least one /// The node is up to date with all known peers and is connected to at least one
/// fully synced peer. In this state, parent lookups are enabled. /// fully synced peer. In this state, parent lookups are enabled.
Synced, Synced,

View File

@ -290,7 +290,7 @@ pub fn serve<T: BeaconChainTypes>(
.and_then( .and_then(
|network_globals: Arc<NetworkGlobals<T::EthSpec>>, chain: Arc<BeaconChain<T>>| async move { |network_globals: Arc<NetworkGlobals<T::EthSpec>>, chain: Arc<BeaconChain<T>>| async move {
match *network_globals.sync_state.read() { match *network_globals.sync_state.read() {
SyncState::SyncingFinalized { head_slot, .. } => { SyncState::SyncingFinalized { target_slot, .. } => {
let current_slot = chain let current_slot = chain
.slot_clock .slot_clock
.now_or_genesis() .now_or_genesis()
@ -302,12 +302,12 @@ pub fn serve<T: BeaconChainTypes>(
let tolerance = SYNC_TOLERANCE_EPOCHS * T::EthSpec::slots_per_epoch(); let tolerance = SYNC_TOLERANCE_EPOCHS * T::EthSpec::slots_per_epoch();
if head_slot + tolerance >= current_slot { if target_slot + tolerance >= current_slot {
Ok(()) Ok(())
} else { } else {
Err(warp_utils::reject::not_synced(format!( Err(warp_utils::reject::not_synced(format!(
"head slot is {}, current slot is {}", "head slot is {}, current slot is {}",
head_slot, current_slot target_slot, current_slot
))) )))
} }
} }

View File

@ -35,13 +35,13 @@
use super::network_context::SyncNetworkContext; use super::network_context::SyncNetworkContext;
use super::peer_sync_info::{PeerSyncInfo, PeerSyncType}; use super::peer_sync_info::{PeerSyncInfo, PeerSyncType};
use super::range_sync::{ChainId, RangeSync, EPOCHS_PER_BATCH}; use super::range_sync::{ChainId, RangeSync, RangeSyncType, EPOCHS_PER_BATCH};
use super::RequestId; use super::RequestId;
use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent}; use crate::beacon_processor::{ProcessId, WorkEvent as BeaconWorkEvent};
use crate::service::NetworkMessage; use crate::service::NetworkMessage;
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError}; use beacon_chain::{BeaconChain, BeaconChainTypes, BlockError};
use eth2_libp2p::rpc::{methods::MAX_REQUEST_BLOCKS, BlocksByRootRequest, GoodbyeReason}; use eth2_libp2p::rpc::{methods::MAX_REQUEST_BLOCKS, BlocksByRootRequest, GoodbyeReason};
use eth2_libp2p::types::NetworkGlobals; use eth2_libp2p::types::{NetworkGlobals, SyncState};
use eth2_libp2p::{PeerAction, PeerId}; use eth2_libp2p::{PeerAction, PeerId};
use fnv::FnvHashMap; use fnv::FnvHashMap;
use lru_cache::LRUCache; use lru_cache::LRUCache;
@ -176,11 +176,11 @@ pub struct SyncManager<T: BeaconChainTypes> {
/// The flag allows us to determine if the peer returned data or sent us nothing. /// The flag allows us to determine if the peer returned data or sent us nothing.
single_block_lookups: FnvHashMap<RequestId, SingleBlockRequest>, single_block_lookups: FnvHashMap<RequestId, SingleBlockRequest>,
/// The logger for the import manager.
log: Logger,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain. /// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>, beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
/// The logger for the import manager.
log: Logger,
} }
/// Object representing a single block lookup request. /// Object representing a single block lookup request.
@ -222,7 +222,6 @@ pub fn spawn<T: BeaconChainTypes>(
let mut sync_manager = SyncManager { let mut sync_manager = SyncManager {
range_sync: RangeSync::new( range_sync: RangeSync::new(
beacon_chain.clone(), beacon_chain.clone(),
network_globals.clone(),
beacon_processor_send.clone(), beacon_processor_send.clone(),
log.clone(), log.clone(),
), ),
@ -233,8 +232,8 @@ pub fn spawn<T: BeaconChainTypes>(
parent_queue: SmallVec::new(), parent_queue: SmallVec::new(),
failed_chains: LRUCache::new(500), failed_chains: LRUCache::new(500),
single_block_lookups: FnvHashMap::default(), single_block_lookups: FnvHashMap::default(),
log: log.clone(),
beacon_processor_send, beacon_processor_send,
log: log.clone(),
}; };
// spawn the sync manager thread // spawn the sync manager thread
@ -276,8 +275,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
"local_head_slot" => local_peer_info.head_slot, "local_head_slot" => local_peer_info.head_slot,
); );
self.synced_peer(&peer_id, remote); self.synced_peer(&peer_id, remote);
// notify the range sync that a peer has been added
self.range_sync.fully_synced_peer_found(&mut self.network);
} }
PeerSyncType::Advanced => { PeerSyncType::Advanced => {
trace!(self.log, "Useful peer for sync found"; trace!(self.log, "Useful peer for sync found";
@ -302,8 +299,6 @@ impl<T: BeaconChainTypes> SyncManager<T> {
// the second case // the second case
{ {
self.synced_peer(&peer_id, remote); self.synced_peer(&peer_id, remote);
// notify the range sync that a peer has been added
self.range_sync.fully_synced_peer_found(&mut self.network);
} else { } else {
// Add the peer to our RangeSync // Add the peer to our RangeSync
self.range_sync self.range_sync
@ -673,10 +668,41 @@ impl<T: BeaconChainTypes> SyncManager<T> {
/// Updates the global sync state and logs any changes. /// Updates the global sync state and logs any changes.
fn update_sync_state(&mut self) { fn update_sync_state(&mut self) {
if let Some((old_state, new_state)) = self.network_globals.update_sync_state() { let new_state: SyncState = match self.range_sync.state() {
Err(e) => {
debug!(self.log, "Error getting range sync state"; "error" => %e);
return;
}
Ok(state) => match state {
None => {
// no range sync, decide if we are stalled or synced
self.network_globals
.peers
.read()
.synced_peers()
.next()
.map(|_| SyncState::Synced)
.unwrap_or_else(|| SyncState::Stalled)
}
Some((RangeSyncType::Finalized, start_slot, target_slot)) => {
SyncState::SyncingFinalized {
start_slot,
target_slot,
}
}
Some((RangeSyncType::Head, start_slot, target_slot)) => SyncState::SyncingHead {
start_slot,
target_slot,
},
},
};
let old_state = self.network_globals.set_sync_state(new_state);
let new_state = self.network_globals.sync_state.read();
if !new_state.eq(&old_state) {
info!(self.log, "Sync state updated"; "old_state" => %old_state, "new_state" => %new_state); info!(self.log, "Sync state updated"; "old_state" => %old_state, "new_state" => %new_state);
// If we have become synced - Subscribe to all the core subnet topics // If we have become synced - Subscribe to all the core subnet topics
if new_state == eth2_libp2p::types::SyncState::Synced { if new_state.is_synced() {
self.network.subscribe_core_topics(); self.network.subscribe_core_topics();
} }
} }

View File

@ -98,6 +98,14 @@ impl<T: EthSpec> BatchInfo<T> {
peers peers
} }
/// Verifies if an incomming block belongs to this batch.
pub fn is_expecting_block(&self, peer_id: &PeerId, request_id: &RequestId) -> bool {
if let BatchState::Downloading(expected_peer, _, expected_id) = &self.state {
return peer_id == expected_peer && expected_id == request_id;
}
false
}
pub fn current_peer(&self) -> Option<&PeerId> { pub fn current_peer(&self) -> Option<&PeerId> {
match &self.state { match &self.state {
BatchState::AwaitingDownload | BatchState::Failed => None, BatchState::AwaitingDownload | BatchState::Failed => None,

View File

@ -2,14 +2,13 @@ use super::batch::{BatchInfo, BatchState};
use crate::beacon_processor::ProcessId; use crate::beacon_processor::ProcessId;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent; use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::{network_context::SyncNetworkContext, BatchProcessResult, RequestId}; use crate::sync::{network_context::SyncNetworkContext, BatchProcessResult, RequestId};
use beacon_chain::{BeaconChain, BeaconChainTypes}; use beacon_chain::BeaconChainTypes;
use eth2_libp2p::{PeerAction, PeerId}; use eth2_libp2p::{PeerAction, PeerId};
use fnv::FnvHashMap; use fnv::FnvHashMap;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use slog::{crit, debug, o, warn}; use slog::{crit, debug, o, warn};
use std::collections::{btree_map::Entry, BTreeMap, HashSet}; use std::collections::{btree_map::Entry, BTreeMap, HashSet};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::sync::Arc;
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot}; use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot};
@ -87,9 +86,6 @@ pub struct SyncingChain<T: BeaconChainTypes> {
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain. /// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: Sender<BeaconWorkEvent<T::EthSpec>>, beacon_processor_send: Sender<BeaconWorkEvent<T::EthSpec>>,
/// A reference to the underlying beacon chain.
chain: Arc<BeaconChain<T>>,
/// The chain's log. /// The chain's log.
log: slog::Logger, log: slog::Logger,
} }
@ -116,7 +112,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
target_head_root: Hash256, target_head_root: Hash256,
peer_id: PeerId, peer_id: PeerId,
beacon_processor_send: Sender<BeaconWorkEvent<T::EthSpec>>, beacon_processor_send: Sender<BeaconWorkEvent<T::EthSpec>>,
chain: Arc<BeaconChain<T>>,
log: &slog::Logger, log: &slog::Logger,
) -> Self { ) -> Self {
let mut peers = FnvHashMap::default(); let mut peers = FnvHashMap::default();
@ -138,7 +133,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
state: ChainSyncingState::Stopped, state: ChainSyncingState::Stopped,
current_processing_batch: None, current_processing_batch: None,
beacon_processor_send, beacon_processor_send,
chain,
log: log.new(o!("chain" => id)), log: log.new(o!("chain" => id)),
} }
} }
@ -163,17 +157,17 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
if let Some(batch_ids) = self.peers.remove(peer_id) { if let Some(batch_ids) = self.peers.remove(peer_id) {
// fail the batches // fail the batches
for id in batch_ids { for id in batch_ids {
if let BatchState::Failed = self if let Some(batch) = self.batches.get_mut(&id) {
.batches if let BatchState::Failed = batch.download_failed(&self.log) {
.get_mut(&id) return ProcessingResult::RemoveChain;
.expect("registered batch exists") }
.download_failed(&self.log) if let ProcessingResult::RemoveChain = self.retry_batch_download(network, id) {
{ // drop the chain early
return ProcessingResult::RemoveChain; return ProcessingResult::RemoveChain;
} }
if let ProcessingResult::RemoveChain = self.retry_batch_download(network, id) { } else {
// drop the chain early debug!(self.log, "Batch not found while removing peer";
return ProcessingResult::RemoveChain; "peer" => %peer_id, "batch" => "id")
} }
} }
} }
@ -215,12 +209,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// sending an error /timeout) if the peer is removed from the chain for other // sending an error /timeout) if the peer is removed from the chain for other
// reasons. Check that this block belongs to the expected peer, and that the // reasons. Check that this block belongs to the expected peer, and that the
// request_id matches // request_id matches
if let BatchState::Downloading(expected_peer, _, expected_request_id) = if !batch.is_expecting_block(peer_id, &request_id) {
batch.state() return ProcessingResult::KeepChain;
{
if expected_peer != peer_id || expected_request_id != &request_id {
return ProcessingResult::KeepChain;
}
} }
batch batch
} }
@ -275,7 +265,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
return ProcessingResult::KeepChain; return ProcessingResult::KeepChain;
} }
let batch = self.batches.get_mut(&batch_id).expect("Batch exists"); let batch = match self.batches.get_mut(&batch_id) {
Some(batch) => batch,
None => {
debug!(self.log, "Processing unknown batch"; "batch" => %batch_id);
return ProcessingResult::RemoveChain;
}
};
// NOTE: We send empty batches to the processor in order to trigger the block processor // NOTE: We send empty batches to the processor in order to trigger the block processor
// result callback. This is done, because an empty batch could end a chain and the logic // result callback. This is done, because an empty batch could end a chain and the logic
@ -340,10 +336,8 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// - Poisoned -> this is an intermediate state that should never be reached // - Poisoned -> this is an intermediate state that should never be reached
// - AwaitingDownload -> A recoverable failed batch should have been // - AwaitingDownload -> A recoverable failed batch should have been
// re-requested. // re-requested.
unreachable!( crit!(self.log, "Optimistic batch indicates inconsistent chain state"; "state" => ?state);
"Optimistic batch indicates inconsistent chain state: {:?}", return ProcessingResult::RemoveChain;
state
)
} }
BatchState::AwaitingValidation(_) => { BatchState::AwaitingValidation(_) => {
// This is possible due to race conditions, and tho it would be considered // This is possible due to race conditions, and tho it would be considered
@ -352,7 +346,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// candidate. If the batch was empty the chain rejects it; if it was non // candidate. If the batch was empty the chain rejects it; if it was non
// empty the chain is advanced to this point (so that the old optimistic // empty the chain is advanced to this point (so that the old optimistic
// batch is now the processing target) // batch is now the processing target)
crit!(self.log, "Optimistic batch should never be Awaiting Validation"; "batch" => epoch); debug!(self.log, "Optimistic batch should never be Awaiting Validation"; "batch" => epoch);
None None
} }
} }
@ -436,10 +430,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
match result { match result {
BatchProcessResult::Success(was_non_empty) => { BatchProcessResult::Success(was_non_empty) => {
let batch = self let batch = match self.batches.get_mut(&batch_id) {
.batches Some(batch) => batch,
.get_mut(&batch_id) None => {
.expect("Chain was expecting a known batch"); debug!(self.log, "Current processing batch not found"; "batch" => batch_id);
return ProcessingResult::RemoveChain;
}
};
let _ = batch.processing_completed(true, &self.log); let _ = batch.processing_completed(true, &self.log);
// If the processed batch was not empty, we can validate previous unvalidated // If the processed batch was not empty, we can validate previous unvalidated
// blocks. // blocks.
@ -479,13 +476,19 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
} }
} }
BatchProcessResult::Failed(imported_blocks) => { BatchProcessResult::Failed(imported_blocks) => {
let batch = self let (batch, peer) = match self.batches.get_mut(&batch_id) {
.batches Some(batch) => match batch.current_peer().cloned() {
.get_mut(&batch_id) Some(peer) => (batch, peer),
.expect("Chain was expecting a known batch"); None => {
let peer = batch debug!(self.log, "Current processing has no peer"; "batch" => batch_id);
.current_peer() return ProcessingResult::RemoveChain;
.expect("batch is processing blocks from a peer"); }
},
None => {
debug!(self.log, "Current processing batch not found"; "batch" => batch_id);
return ProcessingResult::RemoveChain;
}
};
debug!(self.log, "Batch processing failed"; "imported_blocks" => imported_blocks, debug!(self.log, "Batch processing failed"; "imported_blocks" => imported_blocks,
"batch_epoch" => batch_id, "peer" => %peer, "client" => %network.client_type(&peer)); "batch_epoch" => batch_id, "peer" => %peer, "client" => %network.client_type(&peer));
if let BatchState::Failed = batch.processing_completed(false, &self.log) { if let BatchState::Failed = batch.processing_completed(false, &self.log) {
@ -610,12 +613,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
active_batches.remove(&id); active_batches.remove(&id);
} }
} }
BatchState::Failed | BatchState::Poisoned | BatchState::AwaitingDownload => { BatchState::Failed | BatchState::Poisoned | BatchState::AwaitingDownload => crit!(
unreachable!("batch indicates inconsistent chain state while advancing chain") self.log,
} "batch indicates inconsistent chain state while advancing chain"
),
BatchState::AwaitingProcessing(..) => {} BatchState::AwaitingProcessing(..) => {}
BatchState::Processing(_) => { BatchState::Processing(_) => {
assert_eq!( debug_assert_eq!(
id, id,
self.current_processing_batch.expect( self.current_processing_batch.expect(
"A batch in a processing state means the chain is processing it" "A batch in a processing state means the chain is processing it"
@ -770,11 +774,6 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
} }
} }
/// Sends a STATUS message to all peers in the peer pool.
pub fn status_peers(&self, network: &mut SyncNetworkContext<T::EthSpec>) {
network.status_peers(self.chain.clone(), self.peers.keys().cloned());
}
/// An RPC error has occurred. /// An RPC error has occurred.
/// ///
/// If the batch exists it is re-requested. /// If the batch exists it is re-requested.
@ -789,16 +788,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// A batch could be retried without the peer failing the request (disconnecting/ // A batch could be retried without the peer failing the request (disconnecting/
// sending an error /timeout) if the peer is removed from the chain for other // sending an error /timeout) if the peer is removed from the chain for other
// reasons. Check that this block belongs to the expected peer // reasons. Check that this block belongs to the expected peer
if let BatchState::Downloading(expected_peer, _, expected_request_id) = batch.state() { if !batch.is_expecting_block(peer_id, &request_id) {
if expected_peer != peer_id || expected_request_id != &request_id { return ProcessingResult::KeepChain;
return ProcessingResult::KeepChain;
}
} }
debug!(self.log, "Batch failed. RPC Error"; "batch_epoch" => batch_id); debug!(self.log, "Batch failed. RPC Error"; "batch_epoch" => batch_id);
self.peers if let Some(active_requests) = self.peers.get_mut(peer_id) {
.get_mut(peer_id) active_requests.remove(&batch_id);
.expect("Peer belongs to the chain") }
.remove(&batch_id);
if let BatchState::Failed = batch.download_failed(&self.log) { if let BatchState::Failed = batch.download_failed(&self.log) {
return ProcessingResult::RemoveChain; return ProcessingResult::RemoveChain;
} }
@ -865,11 +861,14 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
debug!(self.log, "Requesting batch"; "epoch" => batch_id, &batch); debug!(self.log, "Requesting batch"; "epoch" => batch_id, &batch);
} }
// register the batch for this peer // register the batch for this peer
self.peers return self
.peers
.get_mut(&peer) .get_mut(&peer)
.expect("peer belongs to the peer pool") .map(|requests| {
.insert(batch_id); requests.insert(batch_id);
return ProcessingResult::KeepChain; ProcessingResult::KeepChain
})
.unwrap_or(ProcessingResult::RemoveChain);
} }
Err(e) => { Err(e) => {
// NOTE: under normal conditions this shouldn't happen but we handle it anyway // NOTE: under normal conditions this shouldn't happen but we handle it anyway
@ -879,8 +878,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
batch.start_downloading_from_peer(peer.clone(), 1, &self.log); // fake request_id is not relevant batch.start_downloading_from_peer(peer.clone(), 1, &self.log); // fake request_id is not relevant
self.peers self.peers
.get_mut(&peer) .get_mut(&peer)
.expect("peer belongs to the peer pool") .map(|request| request.remove(&batch_id));
.remove(&batch_id);
if let BatchState::Failed = batch.download_failed(&self.log) { if let BatchState::Failed = batch.download_failed(&self.log) {
return ProcessingResult::RemoveChain; return ProcessingResult::RemoveChain;
} else { } else {

View File

@ -3,16 +3,18 @@
//! Each chain type is stored in it's own map. A variety of helper functions are given along with //! Each chain type is stored in it's own map. A variety of helper functions are given along with
//! this struct to simplify the logic of the other layers of sync. //! this struct to simplify the logic of the other layers of sync.
use super::chain::{ChainId, ChainSyncingState, ProcessingResult, SyncingChain}; use super::chain::{ChainId, ProcessingResult, SyncingChain};
use super::sync_type::RangeSyncType; use super::sync_type::RangeSyncType;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent; use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::network_context::SyncNetworkContext; use crate::sync::network_context::SyncNetworkContext;
use crate::sync::PeerSyncInfo; use crate::sync::PeerSyncInfo;
use beacon_chain::{BeaconChain, BeaconChainTypes}; use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::{types::SyncState, NetworkGlobals, PeerId}; use eth2_libp2p::PeerId;
use fnv::FnvHashMap; use fnv::FnvHashMap;
use slog::{crit, debug, error, info, trace}; use slog::{debug, error};
use smallvec::SmallVec;
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use types::EthSpec; use types::EthSpec;
@ -25,66 +27,17 @@ const PARALLEL_HEAD_CHAINS: usize = 2;
#[derive(Clone)] #[derive(Clone)]
pub enum RangeSyncState { pub enum RangeSyncState {
/// A finalized chain is being synced. /// A finalized chain is being synced.
Finalized { Finalized(u64),
/// The start of the finalized chain.
start_slot: Slot,
/// The target head slot of the finalized chain.
head_slot: Slot,
/// The target head root of the finalized chain.
head_root: Hash256,
},
/// There are no finalized chains and we are syncing one more head chains. /// There are no finalized chains and we are syncing one more head chains.
Head { Head(SmallVec<[u64; PARALLEL_HEAD_CHAINS]>),
/// The last finalized checkpoint for all head chains.
start_slot: Slot,
/// The largest known slot to sync to.
head_slot: Slot,
},
/// There are no head or finalized chains and no long range sync is in progress. /// There are no head or finalized chains and no long range sync is in progress.
Idle, Idle,
} }
impl PartialEq for RangeSyncState {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(RangeSyncState::Finalized { .. }, RangeSyncState::Finalized { .. }) => true,
(RangeSyncState::Head { .. }, RangeSyncState::Head { .. }) => true,
(RangeSyncState::Idle, RangeSyncState::Idle) => true,
_ => false,
}
}
}
impl Into<SyncState> for RangeSyncState {
fn into(self) -> SyncState {
match self {
RangeSyncState::Finalized {
start_slot,
head_slot,
head_root,
} => SyncState::SyncingFinalized {
start_slot,
head_slot,
head_root,
},
RangeSyncState::Head {
start_slot,
head_slot,
} => SyncState::SyncingHead {
start_slot,
head_slot,
},
RangeSyncState::Idle => SyncState::Stalled, // this should never really be used
}
}
}
/// A collection of finalized and head chains currently being processed. /// A collection of finalized and head chains currently being processed.
pub struct ChainCollection<T: BeaconChainTypes> { pub struct ChainCollection<T: BeaconChainTypes> {
/// The beacon chain for processing. /// The beacon chain for processing.
beacon_chain: Arc<BeaconChain<T>>, beacon_chain: Arc<BeaconChain<T>>,
/// A reference to the global network parameters.
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
/// The set of finalized chains being synced. /// The set of finalized chains being synced.
finalized_chains: FnvHashMap<ChainId, SyncingChain<T>>, finalized_chains: FnvHashMap<ChainId, SyncingChain<T>>,
/// The set of head chains being synced. /// The set of head chains being synced.
@ -96,14 +49,9 @@ pub struct ChainCollection<T: BeaconChainTypes> {
} }
impl<T: BeaconChainTypes> ChainCollection<T> { impl<T: BeaconChainTypes> ChainCollection<T> {
pub fn new( pub fn new(beacon_chain: Arc<BeaconChain<T>>, log: slog::Logger) -> Self {
beacon_chain: Arc<BeaconChain<T>>,
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
log: slog::Logger,
) -> Self {
ChainCollection { ChainCollection {
beacon_chain, beacon_chain,
network_globals,
finalized_chains: FnvHashMap::default(), finalized_chains: FnvHashMap::default(),
head_chains: FnvHashMap::default(), head_chains: FnvHashMap::default(),
state: RangeSyncState::Idle, state: RangeSyncState::Idle,
@ -111,82 +59,55 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
} }
} }
pub fn state(&self) -> &RangeSyncState { /// Updates the Syncing state of the collection after a chain is removed.
&self.state fn on_chain_removed(&mut self, id: &ChainId, was_syncing: bool) {
} match self.state {
RangeSyncState::Finalized(ref syncing_id) => {
/// Updates the global sync state and logs any changes. if syncing_id == id {
pub fn update_sync_state(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) { // the finalized chain that was syncing was removed
// if there is no range sync occurring, the state is either synced or not based on debug_assert!(was_syncing);
// connected peers. let syncing_head_ids: SmallVec<[u64; PARALLEL_HEAD_CHAINS]> = self
.head_chains
if self.state == RangeSyncState::Idle { .iter()
// there is no range sync, let the state of peers determine the global node sync state .filter(|(_id, chain)| chain.is_syncing())
let new_state = self .map(|(id, _)| *id)
.network_globals .collect();
.peers self.state = if syncing_head_ids.is_empty() {
.read() RangeSyncState::Idle
.synced_peers() } else {
.next() RangeSyncState::Head(syncing_head_ids)
.map(|_| SyncState::Synced) };
.unwrap_or_else(|| SyncState::Stalled); } else {
let mut peer_state = self.network_globals.sync_state.write(); debug_assert!(!was_syncing);
if new_state != *peer_state {
info!(self.log, "Sync state updated"; "old_state" => %peer_state, "new_state" => %new_state);
if new_state == SyncState::Synced {
network.subscribe_core_topics();
} }
*peer_state = new_state;
} }
} else { RangeSyncState::Head(ref mut syncing_head_ids) => {
// The state is based on a range sync state, update it if let Some(index) = syncing_head_ids
let mut node_sync_state = self.network_globals.sync_state.write(); .iter()
let new_state: SyncState = self.state.clone().into(); .enumerate()
if *node_sync_state != new_state { .find(|(_, &chain_id)| &chain_id == id)
// we are updating the state, inform the user .map(|(i, _)| i)
info!(self.log, "Sync state updated"; "old_state" => %node_sync_state, "new_state" => %new_state); {
// a syncing head chain was removed
debug_assert!(was_syncing);
syncing_head_ids.swap_remove(index);
if syncing_head_ids.is_empty() {
self.state = RangeSyncState::Idle;
}
} else {
debug_assert!(!was_syncing);
}
} }
*node_sync_state = new_state; RangeSyncState::Idle => {
} // the removed chain should not be syncing
} debug_assert!(!was_syncing)
/// A fully synced peer has joined.
///
/// We could be awaiting a head sync. If we are in the head syncing state, without any head
/// chains, then update the state to idle.
pub fn fully_synced_peer_found(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) {
if let RangeSyncState::Head { .. } = self.state {
if self.head_chains.is_empty() {
// Update the global network state to either synced or stalled.
self.state = RangeSyncState::Idle;
self.update_sync_state(network);
} }
} }
} }
/// After a finalized chain completes this function is called. It ensures the state is set to
/// `SyncState::Head` indicating we are awaiting new peers to connect before we can consider
/// the state as idle.
pub fn set_head_sync(&mut self) {
if let RangeSyncState::Idle = self.state {
let current_slot = self
.beacon_chain
.head_info()
.map(|info| info.slot)
.unwrap_or_else(|_| Slot::from(0u64));
// NOTE: This will modify the /node/syncing API to show current slot for all fields
// while we update peers to look for new potentially HEAD chains.
let temp_head_state = RangeSyncState::Head {
start_slot: current_slot,
head_slot: current_slot,
};
self.state = temp_head_state;
}
}
/// Calls `func` on every chain of the collection. If the result is /// Calls `func` on every chain of the collection. If the result is
/// `ProcessingResult::RemoveChain`, the chain is removed and returned. /// `ProcessingResult::RemoveChain`, the chain is removed and returned.
/// NOTE: `func` must not change the syncing state of a chain.
pub fn call_all<F>(&mut self, mut func: F) -> Vec<(SyncingChain<T>, RangeSyncType)> pub fn call_all<F>(&mut self, mut func: F) -> Vec<(SyncingChain<T>, RangeSyncType)>
where where
F: FnMut(&mut SyncingChain<T>) -> ProcessingResult, F: FnMut(&mut SyncingChain<T>) -> ProcessingResult,
@ -211,7 +132,9 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
RangeSyncType::Finalized => self.finalized_chains.remove(&id), RangeSyncType::Finalized => self.finalized_chains.remove(&id),
RangeSyncType::Head => self.head_chains.remove(&id), RangeSyncType::Head => self.head_chains.remove(&id),
}; };
results.push((chain.expect("Chain exits"), sync_type)); let chain = chain.expect("Chain exists");
self.on_chain_removed(&id, chain.is_syncing());
results.push((chain, sync_type));
} }
results results
} }
@ -220,6 +143,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// ///
/// If the function returns `ProcessingResult::RemoveChain`, the chain is removed and returned. /// If the function returns `ProcessingResult::RemoveChain`, the chain is removed and returned.
/// If the chain is found, its syncing type is returned, or an error otherwise. /// If the chain is found, its syncing type is returned, or an error otherwise.
/// NOTE: `func` should not change the sync state of a chain.
pub fn call_by_id<F>( pub fn call_by_id<F>(
&mut self, &mut self,
id: ChainId, id: ChainId,
@ -231,14 +155,18 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
if let Entry::Occupied(mut entry) = self.finalized_chains.entry(id) { if let Entry::Occupied(mut entry) = self.finalized_chains.entry(id) {
// Search in our finalized chains first // Search in our finalized chains first
if let ProcessingResult::RemoveChain = func(entry.get_mut()) { if let ProcessingResult::RemoveChain = func(entry.get_mut()) {
Ok((Some(entry.remove()), RangeSyncType::Finalized)) let chain = entry.remove();
self.on_chain_removed(&id, chain.is_syncing());
Ok((Some(chain), RangeSyncType::Finalized))
} else { } else {
Ok((None, RangeSyncType::Finalized)) Ok((None, RangeSyncType::Finalized))
} }
} else if let Entry::Occupied(mut entry) = self.head_chains.entry(id) { } else if let Entry::Occupied(mut entry) = self.head_chains.entry(id) {
// Search in our head chains next // Search in our head chains next
if let ProcessingResult::RemoveChain = func(entry.get_mut()) { if let ProcessingResult::RemoveChain = func(entry.get_mut()) {
Ok((Some(entry.remove()), RangeSyncType::Head)) let chain = entry.remove();
self.on_chain_removed(&id, chain.is_syncing());
Ok((Some(chain), RangeSyncType::Head))
} else { } else {
Ok((None, RangeSyncType::Head)) Ok((None, RangeSyncType::Head))
} }
@ -253,7 +181,12 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// This removes any out-dated chains, swaps to any higher priority finalized chains and /// This removes any out-dated chains, swaps to any higher priority finalized chains and
/// updates the state of the collection. This starts head chains syncing if any are required to /// updates the state of the collection. This starts head chains syncing if any are required to
/// do so. /// do so.
pub fn update(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) { pub fn update(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
awaiting_head_peers: &mut HashMap<PeerId, PeerSyncInfo>,
beacon_processor_send: &mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
) {
let (local_finalized_epoch, local_head_epoch) = let (local_finalized_epoch, local_head_epoch) =
match PeerSyncInfo::from_chain(&self.beacon_chain) { match PeerSyncInfo::from_chain(&self.beacon_chain) {
None => { None => {
@ -270,14 +203,56 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
}; };
// Remove any outdated finalized/head chains // Remove any outdated finalized/head chains
self.purge_outdated_chains(network); self.purge_outdated_chains(awaiting_head_peers);
// Choose the best finalized chain if one needs to be selected. // Choose the best finalized chain if one needs to be selected.
self.update_finalized_chains(network, local_finalized_epoch, local_head_epoch); self.update_finalized_chains(network, local_finalized_epoch, local_head_epoch);
if self.finalized_syncing_chain().is_none() { if !matches!(self.state, RangeSyncState::Finalized(_)) {
// Handle head syncing chains if there are no finalized chains left. // Handle head syncing chains if there are no finalized chains left.
self.update_head_chains(network, local_finalized_epoch, local_head_epoch); self.update_head_chains(
network,
local_finalized_epoch,
local_head_epoch,
awaiting_head_peers,
beacon_processor_send,
);
}
}
pub fn state(&self) -> Result<Option<(RangeSyncType, Slot /* from */, Slot /* to */)>, String> {
match self.state {
RangeSyncState::Finalized(ref syncing_id) => {
let chain = self
.finalized_chains
.get(syncing_id)
.ok_or(format!("Finalized syncing chain not found: {}", syncing_id))?;
Ok(Some((
RangeSyncType::Finalized,
chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch()),
chain.target_head_slot,
)))
}
RangeSyncState::Head(ref syncing_head_ids) => {
let mut range: Option<(Slot, Slot)> = None;
for id in syncing_head_ids {
let chain = self
.head_chains
.get(id)
.ok_or(format!("Head syncing chain not found: {}", id))?;
range = range.map(|(min_start, max_slot)| {
(
min_start
.min(chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch())),
max_slot.max(chain.target_head_slot),
)
});
}
let (start_slot, target_slot) =
range.ok_or_else(|| "Syncing head with empty head ids".to_string())?;
Ok(Some((RangeSyncType::Head, start_slot, target_slot)))
}
RangeSyncState::Idle => Ok(None),
} }
} }
@ -290,26 +265,32 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
local_head_epoch: Epoch, local_head_epoch: Epoch,
) { ) {
// Find the chain with most peers and check if it is already syncing // Find the chain with most peers and check if it is already syncing
if let Some((new_id, peers)) = self if let Some((mut new_id, peers)) = self
.finalized_chains .finalized_chains
.iter() .iter()
.max_by_key(|(_, chain)| chain.available_peers()) .max_by_key(|(_, chain)| chain.available_peers())
.map(|(id, chain)| (*id, chain.available_peers())) .map(|(id, chain)| (*id, chain.available_peers()))
{ {
let old_id = self.finalized_syncing_chain().map( let mut old_id = None;
|(currently_syncing_id, currently_syncing_chain)| { if let RangeSyncState::Finalized(syncing_id) = self.state {
if *currently_syncing_id != new_id if syncing_id == new_id {
&& peers > currently_syncing_chain.available_peers() // best chain is already syncing
{ old_id = Some(None);
currently_syncing_chain.stop_syncing(); } else {
// we stop this chain and start syncing the one with more peers // chains are different, check that they don't have the same number of peers
Some(*currently_syncing_id) if let Some(syncing_chain) = self.finalized_chains.get_mut(&syncing_id) {
} else { if syncing_chain.available_peers() > peers {
// the best chain is already the syncing chain, advance it if possible syncing_chain.stop_syncing();
None old_id = Some(Some(syncing_id));
} else {
// chains have the same number of peers, pick the currently syncing
// chain to avoid unnecesary switchings and try to advance it
new_id = syncing_id;
old_id = Some(None);
}
} }
}, }
); }
let chain = self let chain = self
.finalized_chains .finalized_chains
@ -318,18 +299,15 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
match old_id { match old_id {
Some(Some(old_id)) => debug!(self.log, "Switching finalized chains"; Some(Some(old_id)) => debug!(self.log, "Switching finalized chains";
"old_id" => old_id, &chain), "old_id" => old_id, &chain),
None => debug!(self.log, "Syncing new chain"; &chain), None => debug!(self.log, "Syncing new chain"; &chain),
Some(None) => trace!(self.log, "Advancing currently syncing chain"), Some(None) => {
// this is the same chain. We try to advance it. // this is the same chain. We try to advance it.
}
} }
// update the state to a new finalized state // update the state to a new finalized state
let state = RangeSyncState::Finalized { self.state = RangeSyncState::Finalized(new_id);
start_slot: chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch()),
head_slot: chain.target_head_slot,
head_root: chain.target_head_root,
};
self.state = state;
if let ProcessingResult::RemoveChain = if let ProcessingResult::RemoveChain =
chain.start_syncing(network, local_epoch, local_head_epoch) chain.start_syncing(network, local_epoch, local_head_epoch)
@ -337,6 +315,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
// this happens only if sending a batch over the `network` fails a lot // this happens only if sending a batch over the `network` fails a lot
error!(self.log, "Chain removed while switching chains"); error!(self.log, "Chain removed while switching chains");
self.finalized_chains.remove(&new_id); self.finalized_chains.remove(&new_id);
self.on_chain_removed(&new_id, true);
} }
} }
} }
@ -347,23 +326,47 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
network: &mut SyncNetworkContext<T::EthSpec>, network: &mut SyncNetworkContext<T::EthSpec>,
local_epoch: Epoch, local_epoch: Epoch,
local_head_epoch: Epoch, local_head_epoch: Epoch,
awaiting_head_peers: &mut HashMap<PeerId, PeerSyncInfo>,
beacon_processor_send: &mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
) { ) {
// There are no finalized chains, update the state. // Include the awaiting head peers
for (peer_id, peer_sync_info) in awaiting_head_peers.drain() {
self.add_peer_or_create_chain(
local_epoch,
peer_sync_info.head_root,
peer_sync_info.head_slot,
peer_id,
RangeSyncType::Head,
beacon_processor_send,
network,
);
}
if self.head_chains.is_empty() { if self.head_chains.is_empty() {
// There are no finalized chains, update the state.
self.state = RangeSyncState::Idle; self.state = RangeSyncState::Idle;
return; return;
} }
let mut currently_syncing = self // NOTE: if switching from Head Syncing to Finalized Syncing, the head chains are allowed
.head_chains // to continue, so we check for such chains first, and allow them to continue.
.values() let mut syncing_chains = SmallVec::<[u64; PARALLEL_HEAD_CHAINS]>::new();
.filter(|chain| chain.is_syncing()) for (id, chain) in self.head_chains.iter_mut() {
.count(); if chain.is_syncing() {
let mut not_syncing = self.head_chains.len() - currently_syncing; if syncing_chains.len() < PARALLEL_HEAD_CHAINS {
syncing_chains.push(*id);
} else {
chain.stop_syncing();
debug!(self.log, "Stopping extra head chain"; "chain" => id);
}
}
}
let mut not_syncing = self.head_chains.len() - syncing_chains.len();
// Find all head chains that are not currently syncing ordered by peer count. // Find all head chains that are not currently syncing ordered by peer count.
while currently_syncing <= PARALLEL_HEAD_CHAINS && not_syncing > 0 { while syncing_chains.len() < PARALLEL_HEAD_CHAINS && not_syncing > 0 {
// Find the chain with the most peers and start syncing // Find the chain with the most peers and start syncing
if let Some((_id, chain)) = self if let Some((id, chain)) = self
.head_chains .head_chains
.iter_mut() .iter_mut()
.filter(|(_id, chain)| !chain.is_syncing()) .filter(|(_id, chain)| !chain.is_syncing())
@ -374,53 +377,21 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
if let ProcessingResult::RemoveChain = if let ProcessingResult::RemoveChain =
chain.start_syncing(network, local_epoch, local_head_epoch) chain.start_syncing(network, local_epoch, local_head_epoch)
{ {
error!(self.log, "Chain removed while switching head chains") let id = *id;
self.head_chains.remove(&id);
error!(self.log, "Chain removed while switching head chains"; "id" => id);
} else {
syncing_chains.push(*id);
} }
} }
// update variables // update variables
currently_syncing = self not_syncing = self.head_chains.len() - syncing_chains.len();
.head_chains
.iter()
.filter(|(_id, chain)| chain.is_syncing())
.count();
not_syncing = self.head_chains.len() - currently_syncing;
} }
// Start self.state = if syncing_chains.is_empty() {
// for the syncing API, we find the minimal start_slot and the maximum RangeSyncState::Idle
// target_slot of all head chains to report back. } else {
let (min_epoch, max_slot) = self RangeSyncState::Head(syncing_chains)
.head_chains
.values()
.filter(|chain| chain.is_syncing())
.fold(
(Epoch::from(0u64), Slot::from(0u64)),
|(min, max), chain| {
(
std::cmp::min(min, chain.start_epoch),
std::cmp::max(max, chain.target_head_slot),
)
},
);
let head_state = RangeSyncState::Head {
start_slot: min_epoch.start_slot(T::EthSpec::slots_per_epoch()),
head_slot: max_slot,
}; };
self.state = head_state;
}
/// This is called once a head chain has completed syncing. It removes all non-syncing head
/// chains and re-status their peers.
pub fn clear_head_chains(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) {
let log_ref = &self.log;
self.head_chains.retain(|_id, chain| {
if !chain.is_syncing() {
debug!(log_ref, "Removing old head chain"; &chain);
chain.status_peers(network);
false
} else {
true
}
});
} }
/// Returns if `true` if any finalized chains exist, `false` otherwise. /// Returns if `true` if any finalized chains exist, `false` otherwise.
@ -430,14 +401,11 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// Removes any outdated finalized or head chains. /// Removes any outdated finalized or head chains.
/// This removes chains with no peers, or chains whose start block slot is less than our current /// This removes chains with no peers, or chains whose start block slot is less than our current
/// finalized block slot. /// finalized block slot. Peers that would create outdated chains are removed too.
pub fn purge_outdated_chains(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) { pub fn purge_outdated_chains(
// Remove any chains that have no peers &mut self,
self.finalized_chains awaiting_head_peers: &mut HashMap<PeerId, PeerSyncInfo>,
.retain(|_id, chain| chain.available_peers() > 0); ) {
self.head_chains
.retain(|_id, chain| chain.available_peers() > 0);
let local_info = match PeerSyncInfo::from_chain(&self.beacon_chain) { let local_info = match PeerSyncInfo::from_chain(&self.beacon_chain) {
Some(local) => local, Some(local) => local,
None => { None => {
@ -455,39 +423,50 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
let beacon_chain = &self.beacon_chain; let beacon_chain = &self.beacon_chain;
let log_ref = &self.log; let log_ref = &self.log;
// Remove chains that are out-dated and re-status their peers
self.finalized_chains.retain(|_id, chain| { let is_outdated = |target_slot: &Slot, target_root: &Hash256| {
if chain.target_head_slot <= local_finalized_slot target_slot <= &local_finalized_slot
|| beacon_chain || beacon_chain.fork_choice.read().contains_block(target_root)
.fork_choice };
.read()
.contains_block(&chain.target_head_root) // Retain only head peers that remain relevant
awaiting_head_peers.retain(|_peer_id, peer_sync_info| {
!is_outdated(&peer_sync_info.head_slot, &peer_sync_info.head_root)
});
// Remove chains that are out-dated
let mut removed_chains = Vec::new();
self.finalized_chains.retain(|id, chain| {
if is_outdated(&chain.target_head_slot, &chain.target_head_root)
|| chain.available_peers() == 0
{ {
debug!(log_ref, "Purging out of finalized chain"; &chain); debug!(log_ref, "Purging out of finalized chain"; &chain);
chain.status_peers(network); removed_chains.push((*id, chain.is_syncing()));
false false
} else { } else {
true true
} }
}); });
self.head_chains.retain(|_id, chain| { self.head_chains.retain(|id, chain| {
if chain.target_head_slot <= local_finalized_slot if is_outdated(&chain.target_head_slot, &chain.target_head_root)
|| beacon_chain || chain.available_peers() == 0
.fork_choice
.read()
.contains_block(&chain.target_head_root)
{ {
debug!(log_ref, "Purging out of date head chain"; &chain); debug!(log_ref, "Purging out of date head chain"; &chain);
chain.status_peers(network); removed_chains.push((*id, chain.is_syncing()));
false false
} else { } else {
true true
} }
}); });
// update the state of the collection
for (id, was_syncing) in removed_chains {
self.on_chain_removed(&id, was_syncing);
}
} }
/// Adds a peer to a chain with the given target, or creates a new syncing chain if it doesn't /// Adds a peer to a chain with the given target, or creates a new syncing chain if it doesn't
/// exits. /// exists.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn add_peer_or_create_chain( pub fn add_peer_or_create_chain(
&mut self, &mut self,
@ -501,27 +480,20 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
) { ) {
let id = SyncingChain::<T>::id(&target_head_root, &target_head_slot); let id = SyncingChain::<T>::id(&target_head_root, &target_head_slot);
let collection = if let RangeSyncType::Finalized = sync_type { let collection = if let RangeSyncType::Finalized = sync_type {
if let Some(chain) = self.head_chains.get(&id) {
// sanity verification for chain duplication / purging issues
crit!(self.log, "Adding known head chain as finalized chain"; chain);
}
&mut self.finalized_chains &mut self.finalized_chains
} else { } else {
if let Some(chain) = self.finalized_chains.get(&id) {
// sanity verification for chain duplication / purging issues
crit!(self.log, "Adding known finalized chain as head chain"; chain);
}
&mut self.head_chains &mut self.head_chains
}; };
match collection.entry(id) { match collection.entry(id) {
Entry::Occupied(mut entry) => { Entry::Occupied(mut entry) => {
let chain = entry.get_mut(); let chain = entry.get_mut();
debug!(self.log, "Adding peer to known chain"; "peer_id" => %peer, "sync_type" => ?sync_type, &chain); debug!(self.log, "Adding peer to known chain"; "peer_id" => %peer, "sync_type" => ?sync_type, &chain);
assert_eq!(chain.target_head_root, target_head_root); debug_assert_eq!(chain.target_head_root, target_head_root);
assert_eq!(chain.target_head_slot, target_head_slot); debug_assert_eq!(chain.target_head_slot, target_head_slot);
if let ProcessingResult::RemoveChain = chain.add_peer(network, peer) { if let ProcessingResult::RemoveChain = chain.add_peer(network, peer) {
debug!(self.log, "Chain removed after adding peer"; "chain" => id); debug!(self.log, "Chain removed after adding peer"; "chain" => id);
entry.remove(); let chain = entry.remove();
self.on_chain_removed(&id, chain.is_syncing());
} }
} }
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
@ -532,25 +504,12 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
target_head_root, target_head_root,
peer, peer,
beacon_processor_send.clone(), beacon_processor_send.clone(),
self.beacon_chain.clone(),
&self.log, &self.log,
); );
assert_eq!(new_chain.get_id(), id); debug_assert_eq!(new_chain.get_id(), id);
debug!(self.log, "New chain added to sync"; "peer_id" => peer_rpr, "sync_type" => ?sync_type, &new_chain); debug!(self.log, "New chain added to sync"; "peer_id" => peer_rpr, "sync_type" => ?sync_type, &new_chain);
entry.insert(new_chain); entry.insert(new_chain);
} }
} }
} }
/// Returns the index of finalized chain that is currently syncing. Returns `None` if no
/// finalized chain is currently syncing.
fn finalized_syncing_chain(&mut self) -> Option<(&ChainId, &mut SyncingChain<T>)> {
self.finalized_chains.iter_mut().find_map(|(id, chain)| {
if chain.state == ChainSyncingState::Syncing {
Some((id, chain))
} else {
None
}
})
}
} }

View File

@ -10,3 +10,4 @@ mod sync_type;
pub use batch::BatchInfo; pub use batch::BatchInfo;
pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH}; pub use chain::{BatchId, ChainId, EPOCHS_PER_BATCH};
pub use range::RangeSync; pub use range::RangeSync;
pub use sync_type::RangeSyncType;

View File

@ -40,7 +40,7 @@
//! and further batches are requested as current blocks are being processed. //! and further batches are requested as current blocks are being processed.
use super::chain::ChainId; use super::chain::ChainId;
use super::chain_collection::{ChainCollection, RangeSyncState}; use super::chain_collection::ChainCollection;
use super::sync_type::RangeSyncType; use super::sync_type::RangeSyncType;
use crate::beacon_processor::WorkEvent as BeaconWorkEvent; use crate::beacon_processor::WorkEvent as BeaconWorkEvent;
use crate::sync::network_context::SyncNetworkContext; use crate::sync::network_context::SyncNetworkContext;
@ -48,12 +48,12 @@ use crate::sync::BatchProcessResult;
use crate::sync::PeerSyncInfo; use crate::sync::PeerSyncInfo;
use crate::sync::RequestId; use crate::sync::RequestId;
use beacon_chain::{BeaconChain, BeaconChainTypes}; use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::{NetworkGlobals, PeerId}; use eth2_libp2p::PeerId;
use slog::{debug, error, trace, warn}; use slog::{debug, error, trace};
use std::collections::HashSet; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use types::{Epoch, EthSpec, SignedBeaconBlock}; use types::{Epoch, EthSpec, SignedBeaconBlock, Slot};
/// The primary object dealing with long range/batch syncing. This contains all the active and /// The primary object dealing with long range/batch syncing. This contains all the active and
/// non-active chains that need to be processed before the syncing is considered complete. This /// non-active chains that need to be processed before the syncing is considered complete. This
@ -61,13 +61,12 @@ use types::{Epoch, EthSpec, SignedBeaconBlock};
pub struct RangeSync<T: BeaconChainTypes> { pub struct RangeSync<T: BeaconChainTypes> {
/// The beacon chain for processing. /// The beacon chain for processing.
beacon_chain: Arc<BeaconChain<T>>, beacon_chain: Arc<BeaconChain<T>>,
/// Last known sync info of our useful connected peers. We use this information to create Head
/// chains after all finalized chains have ended.
awaiting_head_peers: HashMap<PeerId, PeerSyncInfo>,
/// A collection of chains that need to be downloaded. This stores any head or finalized chains /// A collection of chains that need to be downloaded. This stores any head or finalized chains
/// that need to be downloaded. /// that need to be downloaded.
chains: ChainCollection<T>, chains: ChainCollection<T>,
/// Peers that join whilst a finalized chain is being downloaded, sit in this set. Once the
/// finalized chain(s) complete, these peer's get STATUS'ed to update their head slot before
/// the head chains are formed and downloaded.
awaiting_head_peers: HashSet<PeerId>,
/// A multi-threaded, non-blocking processor for applying messages to the beacon chain. /// A multi-threaded, non-blocking processor for applying messages to the beacon chain.
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>, beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
/// The syncing logger. /// The syncing logger.
@ -77,29 +76,20 @@ pub struct RangeSync<T: BeaconChainTypes> {
impl<T: BeaconChainTypes> RangeSync<T> { impl<T: BeaconChainTypes> RangeSync<T> {
pub fn new( pub fn new(
beacon_chain: Arc<BeaconChain<T>>, beacon_chain: Arc<BeaconChain<T>>,
network_globals: Arc<NetworkGlobals<T::EthSpec>>,
beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>, beacon_processor_send: mpsc::Sender<BeaconWorkEvent<T::EthSpec>>,
log: slog::Logger, log: slog::Logger,
) -> Self { ) -> Self {
RangeSync { RangeSync {
beacon_chain: beacon_chain.clone(), beacon_chain: beacon_chain.clone(),
chains: ChainCollection::new(beacon_chain, network_globals, log.clone()), chains: ChainCollection::new(beacon_chain, log.clone()),
awaiting_head_peers: HashSet::new(), awaiting_head_peers: HashMap::new(),
beacon_processor_send, beacon_processor_send,
log, log,
} }
} }
/// The `chains` collection stores the current state of syncing. Once a finalized chain pub fn state(&self) -> Result<Option<(RangeSyncType, Slot /* from */, Slot /* to */)>, String> {
/// completes, it's state is pre-emptively set to `SyncState::Head`. This ensures that self.chains.state()
/// during the transition period of finalized to head, the sync manager doesn't start
/// requesting blocks from gossipsub.
///
/// On re-status, a peer that has no head to download indicates that this state can be set to
/// idle as there are in fact no head chains to download. This function notifies the chain
/// collection that the state can safely be set to idle.
pub fn fully_synced_peer_found(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) {
self.chains.fully_synced_peer_found(network)
} }
/// A useful peer has been added. The SyncManager has identified this peer as needing either /// A useful peer has been added. The SyncManager has identified this peer as needing either
@ -133,16 +123,11 @@ impl<T: BeaconChainTypes> RangeSync<T> {
// NOTE: A peer that has been re-status'd may now exist in multiple finalized chains. // NOTE: A peer that has been re-status'd may now exist in multiple finalized chains.
// remove any out-of-date chains
self.chains.purge_outdated_chains(network);
// determine which kind of sync to perform and set up the chains // determine which kind of sync to perform and set up the chains
match RangeSyncType::new(&self.beacon_chain, &local_info, &remote_info) { match RangeSyncType::new(&self.beacon_chain, &local_info, &remote_info) {
RangeSyncType::Finalized => { RangeSyncType::Finalized => {
// Finalized chain search // Finalized chain search
debug!(self.log, "Finalization sync peer joined"; "peer_id" => %peer_id); debug!(self.log, "Finalization sync peer joined"; "peer_id" => %peer_id);
// remove the peer from the awaiting_head_peers list if it exists
self.awaiting_head_peers.remove(&peer_id); self.awaiting_head_peers.remove(&peer_id);
// Note: We keep current head chains. These can continue syncing whilst we complete // Note: We keep current head chains. These can continue syncing whilst we complete
@ -158,24 +143,27 @@ impl<T: BeaconChainTypes> RangeSync<T> {
network, network,
); );
self.chains.update(network); self.chains.update(
// update the global sync state network,
self.chains.update_sync_state(network); &mut self.awaiting_head_peers,
&self.beacon_processor_send,
);
} }
RangeSyncType::Head => { RangeSyncType::Head => {
// This peer requires a head chain sync // This peer requires a head chain sync
if self.chains.is_finalizing_sync() { if self.chains.is_finalizing_sync() {
// If there are finalized chains to sync, finish these first, before syncing head // If there are finalized chains to sync, finish these first, before syncing head
// chains. This allows us to re-sync all known peers // chains.
trace!(self.log, "Waiting for finalized sync to complete"; "peer_id" => %peer_id); trace!(self.log, "Waiting for finalized sync to complete";
// store the peer to re-status after all finalized chains complete "peer_id" => %peer_id, "awaiting_head_peers" => &self.awaiting_head_peers.len());
self.awaiting_head_peers.insert(peer_id); self.awaiting_head_peers.insert(peer_id, remote_info);
return; return;
} }
// if the peer existed in any other head chain, remove it. // if the peer existed in any other head chain, remove it.
self.remove_peer(network, &peer_id); self.remove_peer(network, &peer_id);
self.awaiting_head_peers.remove(&peer_id);
// The new peer has the same finalized (earlier filters should prevent a peer with an // The new peer has the same finalized (earlier filters should prevent a peer with an
// earlier finalized chain from reaching here). // earlier finalized chain from reaching here).
@ -191,8 +179,11 @@ impl<T: BeaconChainTypes> RangeSync<T> {
&self.beacon_processor_send, &self.beacon_processor_send,
network, network,
); );
self.chains.update(network); self.chains.update(
self.chains.update_sync_state(network); network,
&mut self.awaiting_head_peers,
&self.beacon_processor_send,
);
} }
} }
} }
@ -217,13 +208,14 @@ impl<T: BeaconChainTypes> RangeSync<T> {
chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block) chain.on_block_response(network, batch_id, &peer_id, request_id, beacon_block)
}) { }) {
Ok((removed_chain, sync_type)) => { Ok((removed_chain, sync_type)) => {
if let Some(removed_chain) = removed_chain { if let Some(_removed_chain) = removed_chain {
debug!(self.log, "Chain removed after block response"; "sync_type" => ?sync_type, "chain_id" => chain_id); debug!(self.log, "Chain removed after block response"; "sync_type" => ?sync_type, "chain_id" => chain_id);
removed_chain.status_peers(network);
// update the state of the collection // update the state of the collection
self.chains.update(network); self.chains.update(
// update the global state and inform the user network,
self.chains.update_sync_state(network); &mut self.awaiting_head_peers,
&self.beacon_processor_send,
);
} }
} }
Err(_) => { Err(_) => {
@ -249,42 +241,13 @@ impl<T: BeaconChainTypes> RangeSync<T> {
Ok((None, _sync_type)) => { Ok((None, _sync_type)) => {
// Chain was found and not removed // Chain was found and not removed
} }
Ok((Some(removed_chain), sync_type)) => { Ok((Some(_removed_chain), sync_type)) => {
debug!(self.log, "Chain removed after processing result"; "chain" => chain_id, "sync_type" => ?sync_type); debug!(self.log, "Chain removed after processing result"; "chain" => chain_id, "sync_type" => ?sync_type);
// Chain ended, re-status its peers self.chains.update(
removed_chain.status_peers(network); network,
match sync_type { &mut self.awaiting_head_peers,
RangeSyncType::Finalized => { &self.beacon_processor_send,
// update the state of the collection );
self.chains.update(network);
// set the state to a head sync if there are no finalized chains, to inform
// the manager that we are awaiting a head chain.
self.chains.set_head_sync();
// Update the global variables
self.chains.update_sync_state(network);
// if there are no more finalized chains, re-status all known peers
// awaiting a head sync
match self.chains.state() {
RangeSyncState::Idle | RangeSyncState::Head { .. } => {
network.status_peers(
self.beacon_chain.clone(),
self.awaiting_head_peers.drain(),
);
}
RangeSyncState::Finalized { .. } => {} // Have more finalized chains to complete
}
}
RangeSyncType::Head => {
// Remove non-syncing head chains and re-status the peers. This removes a
// build-up of potentially duplicate head chains. Any legitimate head
// chains will be re-established
self.chains.clear_head_chains(network);
// update the state of the collection
self.chains.update(network);
// update the global state and log any change
self.chains.update_sync_state(network);
}
}
} }
Err(_) => { Err(_) => {
@ -305,17 +268,12 @@ impl<T: BeaconChainTypes> RangeSync<T> {
// remove the peer from any peer pool, failing its batches // remove the peer from any peer pool, failing its batches
self.remove_peer(network, peer_id); self.remove_peer(network, peer_id);
// update the state of the collection
self.chains.update(network);
// update the global state and inform the user
self.chains.update_sync_state(network);
} }
/// When a peer gets removed, both the head and finalized chains need to be searched to check /// When a peer gets removed, both the head and finalized chains need to be searched to check
/// which pool the peer is in. The chain may also have a batch or batches awaiting /// which pool the peer is in. The chain may also have a batch or batches awaiting
/// for this peer. If so we mark the batch as failed. The batch may then hit it's maximum /// for this peer. If so we mark the batch as failed. The batch may then hit it's maximum
/// retries. In this case, we need to remove the chain and re-status all the peers. /// retries. In this case, we need to remove the chain.
fn remove_peer(&mut self, network: &mut SyncNetworkContext<T::EthSpec>, peer_id: &PeerId) { fn remove_peer(&mut self, network: &mut SyncNetworkContext<T::EthSpec>, peer_id: &PeerId) {
for (removed_chain, sync_type) in self for (removed_chain, sync_type) in self
.chains .chains
@ -323,10 +281,12 @@ impl<T: BeaconChainTypes> RangeSync<T> {
{ {
debug!(self.log, "Chain removed after removing peer"; "sync_type" => ?sync_type, "chain" => removed_chain.get_id()); debug!(self.log, "Chain removed after removing peer"; "sync_type" => ?sync_type, "chain" => removed_chain.get_id());
// update the state of the collection // update the state of the collection
self.chains.update(network);
// update the global state and inform the user
self.chains.update_sync_state(network);
} }
self.chains.update(
network,
&mut self.awaiting_head_peers,
&self.beacon_processor_send,
);
} }
/// An RPC error has occurred. /// An RPC error has occurred.
@ -348,11 +308,12 @@ impl<T: BeaconChainTypes> RangeSync<T> {
Ok((removed_chain, sync_type)) => { Ok((removed_chain, sync_type)) => {
if let Some(removed_chain) = removed_chain { if let Some(removed_chain) = removed_chain {
debug!(self.log, "Chain removed on rpc error"; "sync_type" => ?sync_type, "chain" => removed_chain.get_id()); debug!(self.log, "Chain removed on rpc error"; "sync_type" => ?sync_type, "chain" => removed_chain.get_id());
removed_chain.status_peers(network);
// update the state of the collection // update the state of the collection
self.chains.update(network); self.chains.update(
// update the global state and inform the user network,
self.chains.update_sync_state(network); &mut self.awaiting_head_peers,
&self.beacon_processor_send,
);
} }
} }
Err(_) => { Err(_) => {
@ -360,7 +321,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
} }
} }
} else { } else {
warn!(self.log, "Response/Error for non registered request"; "request_id" => request_id) debug!(self.log, "Response/Error for non registered request"; "request_id" => request_id)
} }
} }
} }