lighthouse/beacon_node/network/src/sync/manager.rs
Age Manning 192380cb58
Ethereum 2.0 Network Specification Upgrade (#510)
Updates lighthouse to the latest networking spec

-  Sync re-write (#496)
-  Updates to the latest eth2 networking spec (#495)
-  Libp2p updates and improvements
2019-08-29 13:23:28 +02:00

726 lines
29 KiB
Rust

use super::simple_sync::{PeerSyncInfo, FUTURE_SLOT_TOLERANCE};
use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessingOutcome};
use eth2_libp2p::rpc::methods::*;
use eth2_libp2p::rpc::RequestId;
use eth2_libp2p::PeerId;
use slog::{debug, info, trace, warn, Logger};
use std::collections::{HashMap, HashSet};
use std::ops::{Add, Sub};
use std::sync::Arc;
use types::{BeaconBlock, EthSpec, Hash256, Slot};
const MAX_BLOCKS_PER_REQUEST: u64 = 10;
/// The number of slots that we can import blocks ahead of us, before going into full Sync mode.
const SLOT_IMPORT_TOLERANCE: usize = 10;
const PARENT_FAIL_TOLERANCE: usize = 3;
const PARENT_DEPTH_TOLERANCE: usize = SLOT_IMPORT_TOLERANCE * 2;
#[derive(PartialEq)]
enum BlockRequestsState {
Queued,
Pending(RequestId),
Complete,
Failed,
}
struct BlockRequests<T: EthSpec> {
target_head_slot: Slot,
target_head_root: Hash256,
downloaded_blocks: Vec<BeaconBlock<T>>,
state: BlockRequestsState,
/// Specifies whether the current state is syncing forwards or backwards.
forward_sync: bool,
/// The current `start_slot` of the batched block request.
current_start_slot: Slot,
}
struct ParentRequests<T: EthSpec> {
downloaded_blocks: Vec<BeaconBlock<T>>,
failed_attempts: usize,
last_submitted_peer: PeerId, // to downvote the submitting peer.
state: BlockRequestsState,
}
impl<T: EthSpec> BlockRequests<T> {
// gets the start slot for next batch
// last block slot downloaded plus 1
fn update_start_slot(&mut self) {
if self.forward_sync {
self.current_start_slot += Slot::from(MAX_BLOCKS_PER_REQUEST);
} else {
self.current_start_slot -= Slot::from(MAX_BLOCKS_PER_REQUEST);
}
self.state = BlockRequestsState::Queued;
}
}
#[derive(PartialEq, Debug, Clone)]
enum ManagerState {
Syncing,
Regular,
Stalled,
}
pub(crate) enum ImportManagerOutcome {
Idle,
RequestBlocks {
peer_id: PeerId,
request_id: RequestId,
request: BeaconBlocksRequest,
},
/// Updates information with peer via requesting another HELLO handshake.
Hello(PeerId),
RecentRequest(PeerId, RecentBeaconBlocksRequest),
DownvotePeer(PeerId),
}
pub struct ImportManager<T: BeaconChainTypes> {
/// A reference to the underlying beacon chain.
chain: Arc<BeaconChain<T>>,
state: ManagerState,
import_queue: HashMap<PeerId, BlockRequests<T::EthSpec>>,
parent_queue: Vec<ParentRequests<T::EthSpec>>,
full_peers: HashSet<PeerId>,
current_req_id: usize,
log: Logger,
}
impl<T: BeaconChainTypes> ImportManager<T> {
pub fn new(beacon_chain: Arc<BeaconChain<T>>, log: &slog::Logger) -> Self {
ImportManager {
chain: beacon_chain.clone(),
state: ManagerState::Regular,
import_queue: HashMap::new(),
parent_queue: Vec::new(),
full_peers: HashSet::new(),
current_req_id: 0,
log: log.clone(),
}
}
pub fn add_peer(&mut self, peer_id: PeerId, remote: PeerSyncInfo) {
// TODO: Improve comments.
// initially try to download blocks from our current head
// then backwards search all the way back to our finalized epoch until we match on a chain
// has to be done sequentially to find next slot to start the batch from
let local = PeerSyncInfo::from(&self.chain);
// If a peer is within SLOT_IMPORT_TOLERANCE from our head slot, ignore a batch sync
if remote.head_slot.sub(local.head_slot).as_usize() < SLOT_IMPORT_TOLERANCE {
trace!(self.log, "Ignoring full sync with peer";
"peer" => format!("{:?}", peer_id),
"peer_head_slot" => remote.head_slot,
"local_head_slot" => local.head_slot,
);
// remove the peer from the queue if it exists
self.import_queue.remove(&peer_id);
return;
}
if let Some(block_requests) = self.import_queue.get_mut(&peer_id) {
// update the target head slot
if remote.head_slot > block_requests.target_head_slot {
block_requests.target_head_slot = remote.head_slot;
}
} else {
let block_requests = BlockRequests {
target_head_slot: remote.head_slot, // this should be larger than the current head. It is checked in the SyncManager before add_peer is called
target_head_root: remote.head_root,
downloaded_blocks: Vec::new(),
state: BlockRequestsState::Queued,
forward_sync: true,
current_start_slot: self.chain.best_slot(),
};
self.import_queue.insert(peer_id, block_requests);
}
}
pub fn beacon_blocks_response(
&mut self,
peer_id: PeerId,
request_id: RequestId,
mut blocks: Vec<BeaconBlock<T::EthSpec>>,
) {
// find the request
let block_requests = match self
.import_queue
.get_mut(&peer_id)
.filter(|r| r.state == BlockRequestsState::Pending(request_id))
{
Some(req) => req,
_ => {
// No pending request, invalid request_id or coding error
warn!(self.log, "BeaconBlocks response unknown"; "request_id" => request_id);
return;
}
};
// If we are syncing up to a target head block, at least the target head block should be
// returned. If we are syncing back to our last finalized block the request should return
// at least the last block we received (last known block). In diagram form:
//
// unknown blocks requested blocks downloaded blocks
// |-------------------|------------------------|------------------------|
// ^finalized slot ^ requested start slot ^ last known block ^ remote head
if blocks.is_empty() {
debug!(self.log, "BeaconBlocks response was empty"; "request_id" => request_id);
block_requests.update_start_slot();
return;
}
// verify the range of received blocks
// Note that the order of blocks is verified in block processing
let last_sent_slot = blocks[blocks.len() - 1].slot;
if block_requests.current_start_slot > blocks[0].slot
|| block_requests
.current_start_slot
.add(MAX_BLOCKS_PER_REQUEST)
< last_sent_slot
{
//TODO: Downvote peer - add a reason to failed
dbg!(&blocks);
warn!(self.log, "BeaconBlocks response returned out of range blocks";
"request_id" => request_id,
"response_initial_slot" => blocks[0].slot,
"requested_initial_slot" => block_requests.current_start_slot);
// consider this sync failed
block_requests.state = BlockRequestsState::Failed;
return;
}
// Determine if more blocks need to be downloaded. There are a few cases:
// - We have downloaded a batch from our head_slot, which has not reached the remotes head
// (target head). Therefore we need to download another sequential batch.
// - The latest batch includes blocks that greater than or equal to the target_head slot,
// which means we have caught up to their head. We then check to see if the first
// block downloaded matches our head. If so, we are on the same chain and can process
// the blocks. If not we need to sync back further until we are on the same chain. So
// request more blocks.
// - We are syncing backwards (from our head slot) and need to check if we are on the same
// chain. If so, process the blocks, if not, request more blocks all the way up to
// our last finalized slot.
if block_requests.forward_sync {
// append blocks if syncing forward
block_requests.downloaded_blocks.append(&mut blocks);
} else {
// prepend blocks if syncing backwards
block_requests.downloaded_blocks.splice(..0, blocks);
}
// does the batch contain the target_head_slot
let last_element_index = block_requests.downloaded_blocks.len() - 1;
if block_requests.downloaded_blocks[last_element_index].slot
>= block_requests.target_head_slot
|| !block_requests.forward_sync
{
// if the batch is on our chain, this is complete and we can then process.
// Otherwise start backwards syncing until we reach a common chain.
let earliest_slot = block_requests.downloaded_blocks[0].slot;
//TODO: Decide which is faster. Reading block from db and comparing or calculating
//the hash tree root and comparing.
if Some(block_requests.downloaded_blocks[0].canonical_root())
== root_at_slot(&self.chain, earliest_slot)
{
block_requests.state = BlockRequestsState::Complete;
return;
}
// not on the same chain, request blocks backwards
let state = &self.chain.head().beacon_state;
let local_finalized_slot = state
.finalized_checkpoint
.epoch
.start_slot(T::EthSpec::slots_per_epoch());
// check that the request hasn't failed by having no common chain
if local_finalized_slot >= block_requests.current_start_slot {
warn!(self.log, "Peer returned an unknown chain."; "request_id" => request_id);
block_requests.state = BlockRequestsState::Failed;
return;
}
// if this is a forward sync, then we have reached the head without a common chain
// and we need to start syncing backwards.
if block_requests.forward_sync {
// Start a backwards sync by requesting earlier blocks
block_requests.forward_sync = false;
block_requests.current_start_slot = std::cmp::min(
self.chain.best_slot(),
block_requests.downloaded_blocks[0].slot,
);
}
}
// update the start slot and re-queue the batch
block_requests.update_start_slot();
}
pub fn recent_blocks_response(
&mut self,
peer_id: PeerId,
request_id: RequestId,
blocks: Vec<BeaconBlock<T::EthSpec>>,
) {
// find the request
let parent_request = match self
.parent_queue
.iter_mut()
.find(|request| request.state == BlockRequestsState::Pending(request_id))
{
Some(req) => req,
None => {
// No pending request, invalid request_id or coding error
warn!(self.log, "RecentBeaconBlocks response unknown"; "request_id" => request_id);
return;
}
};
// if an empty response is given, the peer didn't have the requested block, try again
if blocks.is_empty() {
parent_request.failed_attempts += 1;
parent_request.state = BlockRequestsState::Queued;
parent_request.last_submitted_peer = peer_id;
return;
}
// currently only support a single block lookup. Reject any response that has more than 1
// block
if blocks.len() != 1 {
//TODO: Potentially downvote the peer
debug!(self.log, "Peer sent more than 1 parent. Ignoring";
"peer_id" => format!("{:?}", peer_id),
"no_parents" => blocks.len()
);
return;
}
// queue for processing
parent_request.state = BlockRequestsState::Complete;
}
pub fn _inject_error(_peer_id: PeerId, _id: RequestId) {
//TODO: Remove block state from pending
}
pub fn peer_disconnect(&mut self, peer_id: &PeerId) {
self.import_queue.remove(peer_id);
self.full_peers.remove(peer_id);
self.update_state();
}
pub fn add_full_peer(&mut self, peer_id: PeerId) {
debug!(
self.log, "Fully synced peer added";
"peer" => format!("{:?}", peer_id),
);
self.full_peers.insert(peer_id);
self.update_state();
}
pub fn add_unknown_block(&mut self, block: BeaconBlock<T::EthSpec>, peer_id: PeerId) {
// if we are not in regular sync mode, ignore this block
if let ManagerState::Regular = self.state {
return;
}
// make sure this block is not already being searched for
// TODO: Potentially store a hashset of blocks for O(1) lookups
for parent_req in self.parent_queue.iter() {
if let Some(_) = parent_req
.downloaded_blocks
.iter()
.find(|d_block| d_block == &&block)
{
// we are already searching for this block, ignore it
return;
}
}
let req = ParentRequests {
downloaded_blocks: vec![block],
failed_attempts: 0,
last_submitted_peer: peer_id,
state: BlockRequestsState::Queued,
};
self.parent_queue.push(req);
}
pub(crate) fn poll(&mut self) -> ImportManagerOutcome {
loop {
// update the state of the manager
self.update_state();
// process potential block requests
if let Some(outcome) = self.process_potential_block_requests() {
return outcome;
}
// process any complete long-range batches
if let Some(outcome) = self.process_complete_batches() {
return outcome;
}
// process any parent block lookup-requests
if let Some(outcome) = self.process_parent_requests() {
return outcome;
}
// process any complete parent lookups
let (re_run, outcome) = self.process_complete_parent_requests();
if let Some(outcome) = outcome {
return outcome;
} else if !re_run {
break;
}
}
return ImportManagerOutcome::Idle;
}
fn update_state(&mut self) {
let previous_state = self.state.clone();
self.state = {
if !self.import_queue.is_empty() {
ManagerState::Syncing
} else if !self.full_peers.is_empty() {
ManagerState::Regular
} else {
ManagerState::Stalled
}
};
if self.state != previous_state {
info!(self.log, "Syncing state updated";
"old_state" => format!("{:?}", previous_state),
"new_state" => format!("{:?}", self.state),
);
}
}
fn process_potential_block_requests(&mut self) -> Option<ImportManagerOutcome> {
// check if an outbound request is required
// Managing a fixed number of outbound requests is maintained at the RPC protocol libp2p
// layer and not needed here.
// If any in queued state we submit a request.
// remove any failed batches
let debug_log = &self.log;
self.import_queue.retain(|peer_id, block_request| {
if let BlockRequestsState::Failed = block_request.state {
debug!(debug_log, "Block import from peer failed";
"peer_id" => format!("{:?}", peer_id),
"downloaded_blocks" => block_request.downloaded_blocks.len()
);
false
} else {
true
}
});
// process queued block requests
for (peer_id, block_requests) in self
.import_queue
.iter_mut()
.find(|(_peer_id, req)| req.state == BlockRequestsState::Queued)
{
let request_id = self.current_req_id;
block_requests.state = BlockRequestsState::Pending(request_id);
self.current_req_id += 1;
let request = BeaconBlocksRequest {
head_block_root: block_requests.target_head_root,
start_slot: block_requests.current_start_slot.as_u64(),
count: MAX_BLOCKS_PER_REQUEST,
step: 0,
};
return Some(ImportManagerOutcome::RequestBlocks {
peer_id: peer_id.clone(),
request,
request_id,
});
}
None
}
fn process_complete_batches(&mut self) -> Option<ImportManagerOutcome> {
let completed_batches = self
.import_queue
.iter()
.filter(|(_peer, block_requests)| block_requests.state == BlockRequestsState::Complete)
.map(|(peer, _)| peer)
.cloned()
.collect::<Vec<PeerId>>();
for peer_id in completed_batches {
let block_requests = self.import_queue.remove(&peer_id).expect("key exists");
match self.process_blocks(block_requests.downloaded_blocks.clone()) {
Ok(()) => {
//TODO: Verify it's impossible to have empty downloaded_blocks
let last_element = block_requests.downloaded_blocks.len() - 1;
debug!(self.log, "Blocks processed successfully";
"peer" => format!("{:?}", peer_id),
"start_slot" => block_requests.downloaded_blocks[0].slot,
"end_slot" => block_requests.downloaded_blocks[last_element].slot,
"no_blocks" => last_element + 1,
);
// Re-HELLO to ensure we are up to the latest head
return Some(ImportManagerOutcome::Hello(peer_id));
}
Err(e) => {
let last_element = block_requests.downloaded_blocks.len() - 1;
warn!(self.log, "Block processing failed";
"peer" => format!("{:?}", peer_id),
"start_slot" => block_requests.downloaded_blocks[0].slot,
"end_slot" => block_requests.downloaded_blocks[last_element].slot,
"no_blocks" => last_element + 1,
"error" => format!("{:?}", e),
);
return Some(ImportManagerOutcome::DownvotePeer(peer_id));
}
}
}
None
}
fn process_parent_requests(&mut self) -> Option<ImportManagerOutcome> {
// remove any failed requests
let debug_log = &self.log;
self.parent_queue.retain(|parent_request| {
if parent_request.state == BlockRequestsState::Failed {
debug!(debug_log, "Parent import failed";
"block" => format!("{:?}",parent_request.downloaded_blocks[0].canonical_root()),
"ancestors_found" => parent_request.downloaded_blocks.len()
);
false
} else {
true
}
});
// check to make sure there are peers to search for the parent from
if self.full_peers.is_empty() {
return None;
}
// check if parents need to be searched for
for parent_request in self.parent_queue.iter_mut() {
if parent_request.failed_attempts >= PARENT_FAIL_TOLERANCE {
parent_request.state = BlockRequestsState::Failed;
continue;
} else if parent_request.state == BlockRequestsState::Queued {
// check the depth isn't too large
if parent_request.downloaded_blocks.len() >= PARENT_DEPTH_TOLERANCE {
parent_request.state = BlockRequestsState::Failed;
continue;
}
parent_request.state = BlockRequestsState::Pending(self.current_req_id);
self.current_req_id += 1;
let last_element_index = parent_request.downloaded_blocks.len() - 1;
let parent_hash = parent_request.downloaded_blocks[last_element_index].parent_root;
let req = RecentBeaconBlocksRequest {
block_roots: vec![parent_hash],
};
// select a random fully synced peer to attempt to download the parent block
let peer_id = self.full_peers.iter().next().expect("List is not empty");
return Some(ImportManagerOutcome::RecentRequest(peer_id.clone(), req));
}
}
None
}
fn process_complete_parent_requests(&mut self) -> (bool, Option<ImportManagerOutcome>) {
// flag to determine if there is more process to drive or if the manager can be switched to
// an idle state
let mut re_run = false;
// Find any parent_requests ready to be processed
for completed_request in self
.parent_queue
.iter_mut()
.filter(|req| req.state == BlockRequestsState::Complete)
{
// verify the last added block is the parent of the last requested block
let last_index = completed_request.downloaded_blocks.len() - 1;
let expected_hash = completed_request.downloaded_blocks[last_index].parent_root;
// Note: the length must be greater than 1 so this cannot panic.
let block_hash = completed_request.downloaded_blocks[last_index - 1].canonical_root();
if block_hash != expected_hash {
// remove the head block
let _ = completed_request.downloaded_blocks.pop();
completed_request.state = BlockRequestsState::Queued;
//TODO: Potentially downvote the peer
let peer = completed_request.last_submitted_peer.clone();
debug!(self.log, "Peer sent invalid parent. Ignoring";
"peer_id" => format!("{:?}",peer),
"received_block" => format!("{}", block_hash),
"expected_parent" => format!("{}", expected_hash),
);
return (true, Some(ImportManagerOutcome::DownvotePeer(peer)));
}
// try and process the list of blocks up to the requested block
while !completed_request.downloaded_blocks.is_empty() {
let block = completed_request
.downloaded_blocks
.pop()
.expect("Block must exist exist");
match self.chain.process_block(block.clone()) {
Ok(BlockProcessingOutcome::ParentUnknown { parent: _ }) => {
// need to keep looking for parents
completed_request.downloaded_blocks.push(block);
completed_request.state = BlockRequestsState::Queued;
re_run = true;
break;
}
Ok(BlockProcessingOutcome::Processed { block_root: _ }) => {}
Ok(outcome) => {
// it's a future slot or an invalid block, remove it and try again
completed_request.failed_attempts += 1;
trace!(
self.log, "Invalid parent block";
"outcome" => format!("{:?}", outcome),
"peer" => format!("{:?}", completed_request.last_submitted_peer),
);
completed_request.state = BlockRequestsState::Queued;
re_run = true;
return (
re_run,
Some(ImportManagerOutcome::DownvotePeer(
completed_request.last_submitted_peer.clone(),
)),
);
}
Err(e) => {
completed_request.failed_attempts += 1;
warn!(
self.log, "Parent processing error";
"error" => format!("{:?}", e)
);
completed_request.state = BlockRequestsState::Queued;
re_run = true;
return (
re_run,
Some(ImportManagerOutcome::DownvotePeer(
completed_request.last_submitted_peer.clone(),
)),
);
}
}
}
}
// remove any full completed and processed parent chains
self.parent_queue.retain(|req| {
if req.state == BlockRequestsState::Complete {
false
} else {
true
}
});
(re_run, None)
}
fn process_blocks(&mut self, blocks: Vec<BeaconBlock<T::EthSpec>>) -> Result<(), String> {
for block in blocks {
let processing_result = self.chain.process_block(block.clone());
if let Ok(outcome) = processing_result {
match outcome {
BlockProcessingOutcome::Processed { block_root } => {
// The block was valid and we processed it successfully.
trace!(
self.log, "Imported block from network";
"slot" => block.slot,
"block_root" => format!("{}", block_root),
);
}
BlockProcessingOutcome::ParentUnknown { parent } => {
// blocks should be sequential and all parents should exist
trace!(
self.log, "ParentBlockUnknown";
"parent_root" => format!("{}", parent),
"baby_block_slot" => block.slot,
);
return Err(format!(
"Block at slot {} has an unknown parent.",
block.slot
));
}
BlockProcessingOutcome::FutureSlot {
present_slot,
block_slot,
} => {
if present_slot + FUTURE_SLOT_TOLERANCE >= block_slot {
// The block is too far in the future, drop it.
trace!(
self.log, "FutureBlock";
"msg" => "block for future slot rejected, check your time",
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
return Err(format!(
"Block at slot {} is too far in the future",
block.slot
));
} else {
// The block is in the future, but not too far.
trace!(
self.log, "QueuedFutureBlock";
"msg" => "queuing future block, check your time",
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
}
}
BlockProcessingOutcome::FinalizedSlot => {
trace!(
self.log, "Finalized or earlier block processed";
"outcome" => format!("{:?}", outcome),
);
// block reached our finalized slot or was earlier, move to the next block
}
_ => {
trace!(
self.log, "InvalidBlock";
"msg" => "peer sent invalid block",
"outcome" => format!("{:?}", outcome),
);
return Err(format!("Invalid block at slot {}", block.slot));
}
}
} else {
trace!(
self.log, "BlockProcessingFailure";
"msg" => "unexpected condition in processing block.",
"outcome" => format!("{:?}", processing_result)
);
return Err(format!(
"Unexpected block processing error: {:?}",
processing_result
));
}
}
Ok(())
}
}
fn root_at_slot<T: BeaconChainTypes>(
chain: &Arc<BeaconChain<T>>,
target_slot: Slot,
) -> Option<Hash256> {
chain
.rev_iter_block_roots()
.find(|(_root, slot)| *slot == target_slot)
.map(|(root, _slot)| root)
}