Sync state adjustments (#1804)

check for advanced peers and the state of the chain wrt the clock slot to decide if a chain is or not synced /transitioning to a head sync. Also a fix that prevented getting the right state while syncing heads
This commit is contained in:
divma 2020-10-22 00:26:06 +00:00
parent e1eec7828b
commit 668513b67e
4 changed files with 71 additions and 48 deletions

View File

@ -229,6 +229,19 @@ impl<TSpec: EthSpec> PeerDB<TSpec> {
.map(|(peer_id, _)| peer_id) .map(|(peer_id, _)| peer_id)
} }
/// Gives the `peer_id` of all known connected and advanced peers.
pub fn advanced_peers(&self) -> impl Iterator<Item = &PeerId> {
self.peers
.iter()
.filter(|(_, info)| {
if info.sync_status.is_advanced() {
return info.connection_status.is_connected();
}
false
})
.map(|(peer_id, _)| peer_id)
}
/// Gives an iterator of all peers on a given subnet. /// Gives an iterator of all peers on a given subnet.
pub fn peers_on_subnet(&self, subnet_id: SubnetId) -> impl Iterator<Item = &PeerId> { pub fn peers_on_subnet(&self, subnet_id: SubnetId) -> impl Iterator<Item = &PeerId> {
self.peers self.peers

View File

@ -232,9 +232,9 @@ impl<T: BeaconChainTypes> Processor<T> {
// because they are using a different genesis time, or that theirs or our system // because they are using a different genesis time, or that theirs or our system
// clock is incorrect. // clock is incorrect.
debug!( debug!(
self.log, "Handshake Failure"; self.log, "Handshake Failure";
"peer" => peer_id.to_string(), "peer" => peer_id.to_string(),
"reason" => "different system clocks or genesis time" "reason" => "different system clocks or genesis time"
); );
self.network self.network
.goodbye_peer(peer_id, GoodbyeReason::IrrelevantNetwork); .goodbye_peer(peer_id, GoodbyeReason::IrrelevantNetwork);

View File

@ -670,19 +670,38 @@ impl<T: BeaconChainTypes> SyncManager<T> {
fn update_sync_state(&mut self) { fn update_sync_state(&mut self) {
let new_state: SyncState = match self.range_sync.state() { let new_state: SyncState = match self.range_sync.state() {
Err(e) => { Err(e) => {
debug!(self.log, "Error getting range sync state"; "error" => %e); crit!(self.log, "Error getting range sync state"; "error" => %e);
return; return;
} }
Ok(state) => match state { Ok(state) => match state {
None => { None => {
// no range sync, decide if we are stalled or synced // no range sync, decide if we are stalled or synced.
self.network_globals // For this we check if there is at least one advanced peer. An advanced peer
.peers // with Idle range is possible since a peer's status is updated periodically.
.read() // If we synced a peer between status messages, most likely the peer has
.synced_peers() // advanced and will produce a head chain on re-status. Otherwise it will shift
.next() // to being synced
.map(|_| SyncState::Synced) let head = self.chain.best_slot().unwrap_or_else(|_| Slot::new(0));
.unwrap_or_else(|| SyncState::Stalled) let current_slot = self.chain.slot().unwrap_or_else(|_| Slot::new(0));
let peers = self.network_globals.peers.read();
if current_slot >= head
&& current_slot.sub(head) <= (SLOT_IMPORT_TOLERANCE as u64)
&& head > 0
{
SyncState::Synced
} else if peers.advanced_peers().next().is_some() {
SyncState::SyncingHead {
start_slot: head,
target_slot: current_slot,
}
} else if peers.synced_peers().next().is_none() {
SyncState::Stalled
} else {
// There are no peers that require syncing and we have at least one synced
// peer
SyncState::Synced
}
} }
Some((RangeSyncType::Finalized, start_slot, target_slot)) => { Some((RangeSyncType::Finalized, start_slot, target_slot)) => {
SyncState::SyncingFinalized { SyncState::SyncingFinalized {

View File

@ -240,13 +240,12 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.head_chains .head_chains
.get(id) .get(id)
.ok_or(format!("Head syncing chain not found: {}", id))?; .ok_or(format!("Head syncing chain not found: {}", id))?;
range = range.map(|(min_start, max_slot)| { let start = chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch());
( let target = chain.target_head_slot;
min_start
.min(chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch())), range = range
max_slot.max(chain.target_head_slot), .map(|(min_start, max_slot)| (min_start.min(start), max_slot.max(target)))
) .or(Some((start, target)));
});
} }
let (start_slot, target_slot) = let (start_slot, target_slot) =
range.ok_or_else(|| "Syncing head with empty head ids".to_string())?; range.ok_or_else(|| "Syncing head with empty head ids".to_string())?;
@ -348,45 +347,37 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
return; return;
} }
// NOTE: if switching from Head Syncing to Finalized Syncing, the head chains are allowed // Order chains by available peers, if two chains have the same number of peers, prefer one
// to continue, so we check for such chains first, and allow them to continue. // that is already syncing
let mut syncing_chains = SmallVec::<[u64; PARALLEL_HEAD_CHAINS]>::new(); let mut preferred_ids = self
for (id, chain) in self.head_chains.iter_mut() { .head_chains
if chain.is_syncing() { .iter()
if syncing_chains.len() < PARALLEL_HEAD_CHAINS { .map(|(id, chain)| (chain.available_peers(), !chain.is_syncing(), *id))
syncing_chains.push(*id); .collect::<Vec<_>>();
} else { preferred_ids.sort_unstable();
chain.stop_syncing();
debug!(self.log, "Stopping extra head chain"; "chain" => id);
}
}
}
let mut not_syncing = self.head_chains.len() - syncing_chains.len(); let mut syncing_chains = SmallVec::<[u64; PARALLEL_HEAD_CHAINS]>::new();
// Find all head chains that are not currently syncing ordered by peer count. for (_, _, id) in preferred_ids {
while syncing_chains.len() < PARALLEL_HEAD_CHAINS && not_syncing > 0 { let chain = self.head_chains.get_mut(&id).expect("known chain");
// Find the chain with the most peers and start syncing if syncing_chains.len() < PARALLEL_HEAD_CHAINS {
if let Some((id, chain)) = self // start this chain if it's not already syncing
.head_chains if !chain.is_syncing() {
.iter_mut() debug!(self.log, "New head chain started syncing"; &chain);
.filter(|(_id, chain)| !chain.is_syncing()) }
.max_by_key(|(_id, chain)| chain.available_peers())
{
// start syncing this chain
debug!(self.log, "New head chain started syncing"; &chain);
if let ProcessingResult::RemoveChain = if let ProcessingResult::RemoveChain =
chain.start_syncing(network, local_epoch, local_head_epoch) chain.start_syncing(network, local_epoch, local_head_epoch)
{ {
let id = *id;
self.head_chains.remove(&id); self.head_chains.remove(&id);
error!(self.log, "Chain removed while switching head chains"; "id" => id); error!(self.log, "Chain removed while switching head chains"; "id" => id);
} else { } else {
syncing_chains.push(*id); syncing_chains.push(id);
} }
} else {
// stop any other chain
chain.stop_syncing();
} }
// update variables
not_syncing = self.head_chains.len() - syncing_chains.len();
} }
self.state = if syncing_chains.is_empty() { self.state = if syncing_chains.is_empty() {
RangeSyncState::Idle RangeSyncState::Idle
} else { } else {