Sync state adjustments (#1804)

check for advanced peers and the state of the chain wrt the clock slot to decide if a chain is or not synced /transitioning to a head sync. Also a fix that prevented getting the right state while syncing heads
This commit is contained in:
divma 2020-10-22 00:26:06 +00:00
parent e1eec7828b
commit 668513b67e
4 changed files with 71 additions and 48 deletions

View File

@ -229,6 +229,19 @@ impl<TSpec: EthSpec> PeerDB<TSpec> {
.map(|(peer_id, _)| peer_id)
}
/// Gives the `peer_id` of all known connected and advanced peers.
pub fn advanced_peers(&self) -> impl Iterator<Item = &PeerId> {
self.peers
.iter()
.filter(|(_, info)| {
if info.sync_status.is_advanced() {
return info.connection_status.is_connected();
}
false
})
.map(|(peer_id, _)| peer_id)
}
/// Gives an iterator of all peers on a given subnet.
pub fn peers_on_subnet(&self, subnet_id: SubnetId) -> impl Iterator<Item = &PeerId> {
self.peers

View File

@ -232,9 +232,9 @@ impl<T: BeaconChainTypes> Processor<T> {
// because they are using a different genesis time, or that theirs or our system
// clock is incorrect.
debug!(
self.log, "Handshake Failure";
"peer" => peer_id.to_string(),
"reason" => "different system clocks or genesis time"
self.log, "Handshake Failure";
"peer" => peer_id.to_string(),
"reason" => "different system clocks or genesis time"
);
self.network
.goodbye_peer(peer_id, GoodbyeReason::IrrelevantNetwork);

View File

@ -670,19 +670,38 @@ impl<T: BeaconChainTypes> SyncManager<T> {
fn update_sync_state(&mut self) {
let new_state: SyncState = match self.range_sync.state() {
Err(e) => {
debug!(self.log, "Error getting range sync state"; "error" => %e);
crit!(self.log, "Error getting range sync state"; "error" => %e);
return;
}
Ok(state) => match state {
None => {
// no range sync, decide if we are stalled or synced
self.network_globals
.peers
.read()
.synced_peers()
.next()
.map(|_| SyncState::Synced)
.unwrap_or_else(|| SyncState::Stalled)
// no range sync, decide if we are stalled or synced.
// For this we check if there is at least one advanced peer. An advanced peer
// with Idle range is possible since a peer's status is updated periodically.
// If we synced a peer between status messages, most likely the peer has
// advanced and will produce a head chain on re-status. Otherwise it will shift
// to being synced
let head = self.chain.best_slot().unwrap_or_else(|_| Slot::new(0));
let current_slot = self.chain.slot().unwrap_or_else(|_| Slot::new(0));
let peers = self.network_globals.peers.read();
if current_slot >= head
&& current_slot.sub(head) <= (SLOT_IMPORT_TOLERANCE as u64)
&& head > 0
{
SyncState::Synced
} else if peers.advanced_peers().next().is_some() {
SyncState::SyncingHead {
start_slot: head,
target_slot: current_slot,
}
} else if peers.synced_peers().next().is_none() {
SyncState::Stalled
} else {
// There are no peers that require syncing and we have at least one synced
// peer
SyncState::Synced
}
}
Some((RangeSyncType::Finalized, start_slot, target_slot)) => {
SyncState::SyncingFinalized {

View File

@ -240,13 +240,12 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.head_chains
.get(id)
.ok_or(format!("Head syncing chain not found: {}", id))?;
range = range.map(|(min_start, max_slot)| {
(
min_start
.min(chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch())),
max_slot.max(chain.target_head_slot),
)
});
let start = chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch());
let target = chain.target_head_slot;
range = range
.map(|(min_start, max_slot)| (min_start.min(start), max_slot.max(target)))
.or(Some((start, target)));
}
let (start_slot, target_slot) =
range.ok_or_else(|| "Syncing head with empty head ids".to_string())?;
@ -348,45 +347,37 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
return;
}
// NOTE: if switching from Head Syncing to Finalized Syncing, the head chains are allowed
// to continue, so we check for such chains first, and allow them to continue.
let mut syncing_chains = SmallVec::<[u64; PARALLEL_HEAD_CHAINS]>::new();
for (id, chain) in self.head_chains.iter_mut() {
if chain.is_syncing() {
if syncing_chains.len() < PARALLEL_HEAD_CHAINS {
syncing_chains.push(*id);
} else {
chain.stop_syncing();
debug!(self.log, "Stopping extra head chain"; "chain" => id);
}
}
}
// Order chains by available peers, if two chains have the same number of peers, prefer one
// that is already syncing
let mut preferred_ids = self
.head_chains
.iter()
.map(|(id, chain)| (chain.available_peers(), !chain.is_syncing(), *id))
.collect::<Vec<_>>();
preferred_ids.sort_unstable();
let mut not_syncing = self.head_chains.len() - syncing_chains.len();
// Find all head chains that are not currently syncing ordered by peer count.
while syncing_chains.len() < PARALLEL_HEAD_CHAINS && not_syncing > 0 {
// Find the chain with the most peers and start syncing
if let Some((id, chain)) = self
.head_chains
.iter_mut()
.filter(|(_id, chain)| !chain.is_syncing())
.max_by_key(|(_id, chain)| chain.available_peers())
{
// start syncing this chain
debug!(self.log, "New head chain started syncing"; &chain);
let mut syncing_chains = SmallVec::<[u64; PARALLEL_HEAD_CHAINS]>::new();
for (_, _, id) in preferred_ids {
let chain = self.head_chains.get_mut(&id).expect("known chain");
if syncing_chains.len() < PARALLEL_HEAD_CHAINS {
// start this chain if it's not already syncing
if !chain.is_syncing() {
debug!(self.log, "New head chain started syncing"; &chain);
}
if let ProcessingResult::RemoveChain =
chain.start_syncing(network, local_epoch, local_head_epoch)
{
let id = *id;
self.head_chains.remove(&id);
error!(self.log, "Chain removed while switching head chains"; "id" => id);
} else {
syncing_chains.push(*id);
syncing_chains.push(id);
}
} else {
// stop any other chain
chain.stop_syncing();
}
// update variables
not_syncing = self.head_chains.len() - syncing_chains.len();
}
self.state = if syncing_chains.is_empty() {
RangeSyncState::Idle
} else {