Merge pull request #418 from sigp/fix-syncing

Improve syncing, fix bugs
This commit is contained in:
Paul Hauner 2019-07-03 13:48:44 +10:00 committed by GitHub
commit d395feb027
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 154 additions and 172 deletions

View File

@ -1,7 +1,8 @@
use beacon_chain::{BeaconChain, BeaconChainTypes};
use eth2_libp2p::rpc::methods::*;
use eth2_libp2p::PeerId;
use slog::{debug, error};
use slog::error;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tree_hash::TreeHash;
@ -22,7 +23,7 @@ use types::{BeaconBlock, BeaconBlockBody, BeaconBlockHeader, Hash256, Slot};
pub struct ImportQueue<T: BeaconChainTypes> {
pub chain: Arc<BeaconChain<T>>,
/// Partially imported blocks, keyed by the root of `BeaconBlockBody`.
pub partials: Vec<PartialBeaconBlock>,
partials: HashMap<Hash256, PartialBeaconBlock>,
/// Time before a queue entry is considered state.
pub stale_time: Duration,
/// Logging
@ -34,7 +35,7 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
pub fn new(chain: Arc<BeaconChain<T>>, stale_time: Duration, log: slog::Logger) -> Self {
Self {
chain,
partials: vec![],
partials: HashMap::new(),
stale_time,
log,
}
@ -52,7 +53,7 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
let mut complete: Vec<(Hash256, BeaconBlock, PeerId)> = self
.partials
.iter()
.filter_map(|partial| partial.clone().complete())
.filter_map(|(_, partial)| partial.clone().complete())
.collect();
// Sort the completable partials to be in ascending slot order.
@ -61,14 +62,14 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
complete
}
pub fn contains_block_root(&self, block_root: Hash256) -> bool {
self.partials.contains_key(&block_root)
}
/// Removes the first `PartialBeaconBlock` with a matching `block_root`, returning the partial
/// if it exists.
pub fn remove(&mut self, block_root: Hash256) -> Option<PartialBeaconBlock> {
let position = self
.partials
.iter()
.position(|p| p.block_root == block_root)?;
Some(self.partials.remove(position))
self.partials.remove(&block_root)
}
/// Flushes all stale entries from the queue.
@ -76,31 +77,10 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
/// An entry is stale if it has as a `inserted` time that is more than `self.stale_time` in the
/// past.
pub fn remove_stale(&mut self) {
let stale_indices: Vec<usize> = self
.partials
.iter()
.enumerate()
.filter_map(|(i, partial)| {
if partial.inserted + self.stale_time <= Instant::now() {
Some(i)
} else {
None
}
})
.collect();
let stale_time = self.stale_time;
if !stale_indices.is_empty() {
debug!(
self.log,
"ImportQueue removing stale entries";
"stale_items" => stale_indices.len(),
"stale_time_seconds" => self.stale_time.as_secs()
);
}
stale_indices.iter().for_each(|&i| {
self.partials.remove(i);
});
self.partials
.retain(|_, partial| partial.inserted + stale_time > Instant::now())
}
/// Returns `true` if `self.chain` has not yet processed this block.
@ -122,27 +102,30 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
block_roots: &[BlockRootSlot],
sender: PeerId,
) -> Vec<BlockRootSlot> {
let new_roots: Vec<BlockRootSlot> = block_roots
let new_block_root_slots: Vec<BlockRootSlot> = block_roots
.iter()
// Ignore any roots already stored in the queue.
.filter(|brs| !self.contains_block_root(brs.block_root))
// Ignore any roots already processed by the chain.
.filter(|brs| self.chain_has_not_seen_block(&brs.block_root))
// Ignore any roots already stored in the queue.
.filter(|brs| !self.partials.iter().any(|p| p.block_root == brs.block_root))
.cloned()
.collect();
new_roots.iter().for_each(|brs| {
self.partials.push(PartialBeaconBlock {
slot: brs.slot,
block_root: brs.block_root,
sender: sender.clone(),
header: None,
body: None,
inserted: Instant::now(),
})
});
self.partials.extend(
new_block_root_slots
.iter()
.map(|brs| PartialBeaconBlock {
slot: brs.slot,
block_root: brs.block_root,
sender: sender.clone(),
header: None,
body: None,
inserted: Instant::now(),
})
.map(|partial| (partial.block_root, partial)),
);
new_roots
new_block_root_slots
}
/// Adds the `headers` to the `partials` queue. Returns a list of `Hash256` block roots for
@ -170,7 +153,7 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
if self.chain_has_not_seen_block(&block_root) {
self.insert_header(block_root, header, sender.clone());
required_bodies.push(block_root)
required_bodies.push(block_root);
}
}
@ -197,31 +180,20 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
/// If the header already exists, the `inserted` time is set to `now` and not other
/// modifications are made.
fn insert_header(&mut self, block_root: Hash256, header: BeaconBlockHeader, sender: PeerId) {
if let Some(i) = self
.partials
.iter()
.position(|p| p.block_root == block_root)
{
// Case 1: there already exists a partial with a matching block root.
//
// The `inserted` time is set to now and the header is replaced, regardless of whether
// it existed or not.
self.partials[i].header = Some(header);
self.partials[i].inserted = Instant::now();
} else {
// Case 2: there was no partial with a matching block root.
//
// A new partial is added. This case permits adding a header without already known the
// root.
self.partials.push(PartialBeaconBlock {
self.partials
.entry(block_root)
.and_modify(|partial| {
partial.header = Some(header.clone());
partial.inserted = Instant::now();
})
.or_insert_with(|| PartialBeaconBlock {
slot: header.slot,
block_root,
header: Some(header),
body: None,
inserted: Instant::now(),
sender,
})
}
});
}
/// Updates an existing partial with the `body`.
@ -232,7 +204,7 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
fn insert_body(&mut self, body: BeaconBlockBody, sender: PeerId) {
let body_root = Hash256::from_slice(&body.tree_hash_root()[..]);
self.partials.iter_mut().for_each(|mut p| {
self.partials.iter_mut().for_each(|(_, mut p)| {
if let Some(header) = &mut p.header {
if body_root == header.block_body_root {
p.inserted = Instant::now();
@ -261,15 +233,10 @@ impl<T: BeaconChainTypes> ImportQueue<T> {
sender,
};
if let Some(i) = self
.partials
.iter()
.position(|p| p.block_root == block_root)
{
self.partials[i] = partial;
} else {
self.partials.push(partial)
}
self.partials
.entry(block_root)
.and_modify(|existing_partial| *existing_partial = partial.clone())
.or_insert(partial);
}
}

View File

@ -17,7 +17,7 @@ use types::{
const SLOT_IMPORT_TOLERANCE: u64 = 100;
/// The amount of seconds a block (or partial block) may exist in the import queue.
const QUEUE_STALE_SECS: u64 = 600;
const QUEUE_STALE_SECS: u64 = 6;
/// If a block is more than `FUTURE_SLOT_TOLERANCE` slots ahead of our slot clock, we drop it.
/// Otherwise we queue it.
@ -72,7 +72,6 @@ pub struct SimpleSync<T: BeaconChainTypes> {
import_queue: ImportQueue<T>,
/// The current state of the syncing protocol.
state: SyncState,
/// Sync logger.
log: slog::Logger,
}
@ -160,96 +159,100 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
hello: HelloMessage,
network: &mut NetworkContext,
) {
let spec = &self.chain.spec;
let remote = PeerSyncInfo::from(hello);
let local = PeerSyncInfo::from(&self.chain);
// Disconnect nodes who are on a different network.
let start_slot = |epoch: Epoch| epoch.start_slot(T::EthSpec::slots_per_epoch());
if local.network_id != remote.network_id {
// The node is on a different network, disconnect them.
info!(
self.log, "HandshakeFailure";
"peer" => format!("{:?}", peer_id),
"reason" => "network_id"
);
network.disconnect(peer_id.clone(), GoodbyeReason::IrreleventNetwork);
// Disconnect nodes if our finalized epoch is greater than thieirs, and their finalized
// epoch is not in our chain. Viz., they are on another chain.
//
// If the local or remote have a `latest_finalized_root == ZERO_HASH`, skips checks about
// the finalized_root. The logic is akward and I think we're better without it.
} else if (local.latest_finalized_epoch >= remote.latest_finalized_epoch)
&& (!self
.chain
.rev_iter_block_roots(local.best_slot)
.any(|(root, _slot)| root == remote.latest_finalized_root))
&& (local.latest_finalized_root != spec.zero_hash)
&& (remote.latest_finalized_root != spec.zero_hash)
} else if remote.latest_finalized_epoch <= local.latest_finalized_epoch
&& remote.latest_finalized_root != self.chain.spec.zero_hash
&& local.latest_finalized_root != self.chain.spec.zero_hash
&& (self.root_at_slot(start_slot(remote.latest_finalized_epoch))
!= Some(remote.latest_finalized_root))
{
// The remotes finalized epoch is less than or greater than ours, but the block root is
// different to the one in our chain.
//
// Therefore, the node is on a different chain and we should not communicate with them.
info!(
self.log, "HandshakeFailure";
"peer" => format!("{:?}", peer_id),
"reason" => "wrong_finalized_chain"
"reason" => "different finalized chain"
);
network.disconnect(peer_id.clone(), GoodbyeReason::IrreleventNetwork);
// Process handshakes from peers that seem to be on our chain.
} else {
info!(self.log, "HandshakeSuccess"; "peer" => format!("{:?}", peer_id));
self.known_peers.insert(peer_id.clone(), remote);
// If we have equal or better finalized epochs and best slots, we require nothing else from
// this peer.
} else if remote.latest_finalized_epoch < local.latest_finalized_epoch {
// The node has a lower finalized epoch, their chain is not useful to us. There are two
// cases where a node can have a lower finalized epoch:
//
// We make an exception when our best slot is 0. Best slot does not indicate wether or
// not there is a block at slot zero.
if (remote.latest_finalized_epoch <= local.latest_finalized_epoch)
&& (remote.best_slot <= local.best_slot)
&& (local.best_slot > 0)
{
debug!(self.log, "Peer is naive"; "peer" => format!("{:?}", peer_id));
return;
}
// ## The node is on the same chain
//
// If a node is on the same chain but has a lower finalized epoch, their head must be
// lower than ours. Therefore, we have nothing to request from them.
//
// ## The node is on a fork
//
// If a node is on a fork that has a lower finalized epoch, switching to that fork would
// cause us to revert a finalized block. This is not permitted, therefore we have no
// interest in their blocks.
debug!(
self.log,
"NaivePeer";
"peer" => format!("{:?}", peer_id),
"reason" => "lower finalized epoch"
);
} else if self
.chain
.store
.exists::<BeaconBlock>(&remote.best_root)
.unwrap_or_else(|_| false)
{
// If the node's best-block is already known to us, we have nothing to request.
debug!(
self.log,
"NaivePeer";
"peer" => format!("{:?}", peer_id),
"reason" => "best block is known"
);
} else {
// The remote node has an equal or great finalized epoch and we don't know it's head.
//
// Therefore, there are some blocks between the local finalized epoch and the remote
// head that are worth downloading.
debug!(self.log, "UsefulPeer"; "peer" => format!("{:?}", peer_id));
// If the remote has a higher finalized epoch, request all block roots from our finalized
// epoch through to its best slot.
if remote.latest_finalized_epoch > local.latest_finalized_epoch {
debug!(self.log, "Peer has high finalized epoch"; "peer" => format!("{:?}", peer_id));
let start_slot = local
.latest_finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch());
let required_slots = remote.best_slot - start_slot;
let start_slot = local
.latest_finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch());
let required_slots = remote.best_slot - start_slot;
self.request_block_roots(
peer_id,
BeaconBlockRootsRequest {
start_slot,
count: required_slots.into(),
},
network,
);
// If the remote has a greater best slot, request the roots between our best slot and their
// best slot.
} else if remote.best_slot > local.best_slot {
debug!(self.log, "Peer has higher best slot"; "peer" => format!("{:?}", peer_id));
let start_slot = local
.latest_finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch());
let required_slots = remote.best_slot - start_slot;
self.request_block_roots(
peer_id,
BeaconBlockRootsRequest {
start_slot,
count: required_slots.into(),
},
network,
);
} else {
debug!(self.log, "Nothing to request from peer"; "peer" => format!("{:?}", peer_id));
}
self.request_block_roots(
peer_id,
BeaconBlockRootsRequest {
start_slot,
count: required_slots.into(),
},
network,
);
}
}
fn root_at_slot(&self, target_slot: Slot) -> Option<Hash256> {
self.chain
.rev_iter_block_roots(target_slot)
.take(1)
.find(|(_root, slot)| *slot == target_slot)
.map(|(root, _slot)| root)
}
/// Handle a `BeaconBlockRoots` request from the peer.
pub fn on_beacon_block_roots_request(
&mut self,
@ -275,11 +278,13 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
.collect();
if roots.len() as u64 != req.count {
debug!(
warn!(
self.log,
"BlockRootsRequest";
"peer" => format!("{:?}", peer_id),
"msg" => "Failed to return all requested hashes",
"start_slot" => req.start_slot,
"current_slot" => self.chain.current_state().slot,
"requested" => req.count,
"returned" => roots.len(),
);
@ -351,7 +356,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
BeaconBlockHeadersRequest {
start_root: first.block_root,
start_slot: first.slot,
max_headers: (last.slot - first.slot + 1).as_u64(),
max_headers: (last.slot - first.slot).as_u64(),
skip_slots: 0,
},
network,
@ -433,7 +438,9 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
.import_queue
.enqueue_headers(res.headers, peer_id.clone());
self.request_block_bodies(peer_id, BeaconBlockBodiesRequest { block_roots }, network);
if !block_roots.is_empty() {
self.request_block_bodies(peer_id, BeaconBlockBodiesRequest { block_roots }, network);
}
}
/// Handle a `BeaconBlockBodies` request from the peer.
@ -518,10 +525,28 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
{
match outcome {
BlockProcessingOutcome::Processed { .. } => SHOULD_FORWARD_GOSSIP_BLOCK,
BlockProcessingOutcome::ParentUnknown { .. } => {
BlockProcessingOutcome::ParentUnknown { parent } => {
// Clean the stale entries from the queue.
self.import_queue.remove_stale();
// Add this block to the queue
self.import_queue
.enqueue_full_blocks(vec![block], peer_id.clone());
// Unless the parent is in the queue, request the parent block from the peer.
//
// It is likely that this is duplicate work, given we already send a hello
// request. However, I believe there are some edge-cases where the hello
// message doesn't suffice, so we perform this request as well.
if !self.import_queue.contains_block_root(parent) {
// Send a hello to learn of the clients best slot so we can then sync the required
// parent(s).
network.send_rpc_request(
peer_id.clone(),
RPCRequest::Hello(hello_message(&self.chain)),
);
}
SHOULD_FORWARD_GOSSIP_BLOCK
}
BlockProcessingOutcome::FutureSlot {
@ -696,7 +721,7 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
if let Ok(outcome) = processing_result {
match outcome {
BlockProcessingOutcome::Processed { block_root } => {
info!(
debug!(
self.log, "Imported block from network";
"source" => source,
"slot" => block.slot,
@ -713,28 +738,19 @@ impl<T: BeaconChainTypes> SimpleSync<T> {
"peer" => format!("{:?}", peer_id),
);
// Send a hello to learn of the clients best slot so we can then sync the require
// parent(s).
network.send_rpc_request(
peer_id.clone(),
RPCRequest::Hello(hello_message(&self.chain)),
);
// Explicitly request the parent block from the peer.
// Unless the parent is in the queue, request the parent block from the peer.
//
// It is likely that this is duplicate work, given we already send a hello
// request. However, I believe there are some edge-cases where the hello
// message doesn't suffice, so we perform this request as well.
self.request_block_headers(
peer_id,
BeaconBlockHeadersRequest {
start_root: parent,
start_slot: block.slot - 1,
max_headers: 1,
skip_slots: 0,
},
network,
)
if !self.import_queue.contains_block_root(parent) {
// Send a hello to learn of the clients best slot so we can then sync the require
// parent(s).
network.send_rpc_request(
peer_id.clone(),
RPCRequest::Hello(hello_message(&self.chain)),
);
}
}
BlockProcessingOutcome::FutureSlot {
present_slot,

View File

@ -139,8 +139,7 @@ impl<'a, T: EthSpec, U: Store> Iterator for BlockRootsIterator<'a, T, U> {
Err(BeaconStateError::SlotOutOfBounds) => {
// Read a `BeaconState` from the store that has access to prior historical root.
let beacon_state: BeaconState<T> = {
// Load the earlier state from disk. Skip forward one slot, because a state
// doesn't return it's own state root.
// Load the earliest state from disk.
let new_state_root = self.beacon_state.get_oldest_state_root().ok()?;
self.store.get(&new_state_root).ok()?