Ensure batches align to epoch boundaries (#1021)

* Ensure batches align to epoch boundaries

* Clean up range_sync logs
This commit is contained in:
divma 2020-04-26 23:18:09 -05:00 committed by GitHub
parent a50ade3ffc
commit fa8154e3da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 63 deletions

View File

@ -1,4 +1,4 @@
use super::chain::BLOCKS_PER_BATCH;
use super::chain::EPOCHS_PER_BATCH;
use eth2_libp2p::rpc::methods::*;
use eth2_libp2p::rpc::RequestId;
use eth2_libp2p::PeerId;
@ -76,7 +76,10 @@ impl<T: EthSpec> Batch<T> {
pub fn to_blocks_by_range_request(&self) -> BlocksByRangeRequest {
BlocksByRangeRequest {
start_slot: self.start_slot.into(),
count: std::cmp::min(BLOCKS_PER_BATCH, self.end_slot.sub(self.start_slot).into()),
count: std::cmp::min(
T::slots_per_epoch() * EPOCHS_PER_BATCH,
self.end_slot.sub(self.start_slot).into(),
),
step: 1,
}
}

View File

@ -10,14 +10,15 @@ use slog::{crit, debug, warn};
use std::collections::HashSet;
use std::sync::Arc;
use tokio::sync::mpsc;
use types::{Hash256, SignedBeaconBlock, Slot};
use types::{Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot};
/// Blocks are downloaded in batches from peers. This constant specifies how many blocks per batch
/// is requested. There is a timeout for each batch request. If this value is too high, we will
/// downvote peers with poor bandwidth. This can be set arbitrarily high, in which case the
/// Blocks are downloaded in batches from peers. This constant specifies how many epochs worth of
/// blocks per batch are requested _at most_. A batch may request less blocks to account for
/// already requested slots. There is a timeout for each batch request. If this value is too high,
/// we will downvote peers with poor bandwidth. This can be set arbitrarily high, in which case the
/// responder will fill the response up to the max request size, assuming they have the bandwidth
/// to do so.
pub const BLOCKS_PER_BATCH: u64 = 64;
pub const EPOCHS_PER_BATCH: u64 = 2;
/// The number of times to retry a batch before the chain is considered failed and removed.
const MAX_BATCH_RETRIES: u8 = 5;
@ -49,7 +50,7 @@ pub struct SyncingChain<T: BeaconChainTypes> {
id: ChainId,
/// The original start slot when this chain was initialised.
pub start_slot: Slot,
pub start_epoch: Epoch,
/// The target head slot.
pub target_head_slot: Slot,
@ -80,8 +81,7 @@ pub struct SyncingChain<T: BeaconChainTypes> {
/// The current state of the chain.
pub state: ChainSyncingState,
/// A random id given to a batch process request. This is None if there is no ongoing batch
/// process.
/// The current processing batch, if any.
current_processing_batch: Option<Batch<T::EthSpec>>,
/// A send channel to the sync manager. This is given to the batch processor thread to report
@ -106,7 +106,7 @@ pub enum ChainSyncingState {
impl<T: BeaconChainTypes> SyncingChain<T> {
pub fn new(
id: u64,
start_slot: Slot,
start_epoch: Epoch,
target_head_slot: Slot,
target_head_root: Hash256,
peer_id: PeerId,
@ -119,7 +119,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
SyncingChain {
id,
start_slot,
start_epoch,
target_head_slot,
target_head_root,
pending_batches: PendingBatches::new(),
@ -138,8 +138,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
/// Returns the latest slot number that has been processed.
fn current_processed_slot(&self) -> Slot {
self.start_slot
.saturating_add(self.to_be_processed_id.saturating_sub(1u64) * BLOCKS_PER_BATCH)
self.start_epoch
.start_slot(T::EthSpec::slots_per_epoch())
.saturating_add(
self.to_be_processed_id.saturating_sub(1u64)
* T::EthSpec::slots_per_epoch()
* EPOCHS_PER_BATCH,
)
}
/// A batch of blocks has been received. This function gets run on all chains and should
@ -539,7 +544,7 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
pub fn start_syncing(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
local_finalized_slot: Slot,
local_finalized_epoch: Epoch,
) {
// A local finalized slot is provided as other chains may have made
// progress whilst this chain was Stopped or paused. If so, update the `processed_batch_id` to
@ -550,11 +555,17 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
// to start from this point and re-index all subsequent batches starting from one
// (effectively creating a new chain).
if local_finalized_slot > self.current_processed_slot() {
let local_finalized_slot = local_finalized_epoch.start_slot(T::EthSpec::slots_per_epoch());
let current_processed_slot = self.current_processed_slot();
if local_finalized_slot > current_processed_slot {
// Advance the chain to account for already downloaded blocks.
self.start_epoch = local_finalized_epoch;
debug!(self.log, "Updating chain's progress";
"chain_id" => self.id,
"prev_completed_slot" => self.current_processed_slot(),
"new_completed_slot" => local_finalized_slot.as_u64());
"prev_completed_slot" => current_processed_slot,
"new_completed_slot" => self.current_processed_slot());
// Re-index batches
*self.to_be_downloaded_id = 1;
*self.to_be_processed_id = 1;
@ -706,6 +717,9 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
/// Returns the next required batch from the chain if it exists. If there are no more batches
/// required, `None` is returned.
fn get_next_batch(&mut self, peer_id: PeerId) -> Option<Batch<T::EthSpec>> {
let slots_per_epoch = T::EthSpec::slots_per_epoch();
let blocks_per_batch = slots_per_epoch * EPOCHS_PER_BATCH;
// only request batches up to the buffer size limit
if self
.completed_batches
@ -716,16 +730,23 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
return None;
}
let batch_start_slot = self.start_epoch.start_slot(slots_per_epoch)
+ self.to_be_downloaded_id.saturating_sub(1) * blocks_per_batch;
// don't request batches beyond the target head slot
let batch_start_slot =
self.start_slot + self.to_be_downloaded_id.saturating_sub(1) * BLOCKS_PER_BATCH;
if batch_start_slot > self.target_head_slot {
return None;
}
// truncate the batch to the target head of the chain
// truncate the batch to the epoch containing the target head of the chain
let batch_end_slot = std::cmp::min(
batch_start_slot + BLOCKS_PER_BATCH,
self.target_head_slot.saturating_add(1u64),
// request either a batch containing the max number of blocks per batch
batch_start_slot + blocks_per_batch,
// or a batch of one epoch of blocks, which contains the `target_head_slot`
self.target_head_slot
.saturating_add(slots_per_epoch)
.epoch(slots_per_epoch)
.start_slot(slots_per_epoch),
);
let batch_id = self.to_be_downloaded_id;

View File

@ -13,7 +13,7 @@ use slog::{debug, error, info};
use std::sync::Arc;
use tokio::sync::mpsc;
use types::EthSpec;
use types::{Hash256, Slot};
use types::{Epoch, Hash256, Slot};
/// The state of the long range/batch sync.
#[derive(Clone)]
@ -207,7 +207,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// This removes any out-dated chains, swaps to any higher priority finalized chains and
/// updates the state of the collection.
pub fn update_finalized(&mut self, network: &mut SyncNetworkContext<T::EthSpec>) {
let local_slot = {
let local_epoch = {
let local = match PeerSyncInfo::from_chain(&self.beacon_chain) {
Some(local) => local,
None => {
@ -219,9 +219,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
}
};
local
.finalized_epoch
.start_slot(T::EthSpec::slots_per_epoch())
local.finalized_epoch
};
// Remove any outdated finalized chains
@ -242,11 +240,11 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
})
{
// A chain has more peers. Swap the syncing chain
debug!(self.log, "Switching finalized chains to sync"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_slot"=> chain.start_slot);
debug!(self.log, "Switching finalized chains to sync"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_epoch"=> local_epoch);
// update the state to a new finalized state
let state = RangeSyncState::Finalized {
start_slot: chain.start_slot,
start_slot: chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch()),
head_slot: chain.target_head_slot,
head_root: chain.target_head_root,
};
@ -255,7 +253,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
// Stop the current chain from syncing
self.finalized_chains[index].stop_syncing();
// Start the new chain
self.finalized_chains[new_index].start_syncing(network, local_slot);
self.finalized_chains[new_index].start_syncing(network, local_epoch);
}
} else if let Some(chain) = self
.finalized_chains
@ -263,10 +261,10 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.max_by_key(|chain| chain.peer_pool.len())
{
// There is no currently syncing finalization chain, starting the one with the most peers
debug!(self.log, "New finalized chain started syncing"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_slot"=> chain.start_slot);
chain.start_syncing(network, local_slot);
debug!(self.log, "New finalized chain started syncing"; "new_target_root" => format!("{}", chain.target_head_root), "new_end_slot" => chain.target_head_slot, "new_start_epoch"=> chain.start_epoch);
chain.start_syncing(network, local_epoch);
let state = RangeSyncState::Finalized {
start_slot: chain.start_slot,
start_slot: chain.start_epoch.start_slot(T::EthSpec::slots_per_epoch()),
head_slot: chain.target_head_slot,
head_root: chain.target_head_root,
};
@ -279,17 +277,17 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
// for the syncing API, we find the minimal start_slot and the maximum
// target_slot of all head chains to report back.
let (min_slot, max_slot) = self.head_chains.iter().fold(
(Slot::from(0u64), Slot::from(0u64)),
let (min_epoch, max_slot) = self.head_chains.iter().fold(
(Epoch::from(0u64), Slot::from(0u64)),
|(min, max), chain| {
(
std::cmp::min(min, chain.start_slot),
std::cmp::min(min, chain.start_epoch),
std::cmp::max(max, chain.target_head_slot),
)
},
);
let head_state = RangeSyncState::Head {
start_slot: min_slot,
start_slot: min_epoch.start_slot(T::EthSpec::slots_per_epoch()),
head_slot: max_slot,
};
self.state = head_state;
@ -300,7 +298,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
/// Add a new finalized chain to the collection.
pub fn new_finalized_chain(
&mut self,
local_finalized_slot: Slot,
local_finalized_epoch: Epoch,
target_head: Hash256,
target_slot: Slot,
peer_id: PeerId,
@ -309,7 +307,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
let chain_id = rand::random();
self.finalized_chains.push(SyncingChain::new(
chain_id,
local_finalized_slot,
local_finalized_epoch,
target_slot,
target_head,
peer_id,
@ -324,7 +322,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
pub fn new_head_chain(
&mut self,
network: &mut SyncNetworkContext<T::EthSpec>,
remote_finalized_slot: Slot,
remote_finalized_epoch: Epoch,
target_head: Hash256,
target_slot: Slot,
peer_id: PeerId,
@ -340,7 +338,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
let chain_id = rand::random();
let mut new_head_chain = SyncingChain::new(
chain_id,
remote_finalized_slot,
remote_finalized_epoch,
target_slot,
target_head,
peer_id,
@ -349,7 +347,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
self.log.clone(),
);
// All head chains can sync simultaneously
new_head_chain.start_syncing(network, remote_finalized_slot);
new_head_chain.start_syncing(network, remote_finalized_epoch);
self.head_chains.push(new_head_chain);
}
@ -434,7 +432,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.fork_choice
.contains_block(&chain.target_head_root)
{
debug!(log_ref, "Purging out of finalized chain"; "start_slot" => chain.start_slot, "end_slot" => chain.target_head_slot);
debug!(log_ref, "Purging out of finalized chain"; "start_epoch" => chain.start_epoch, "end_slot" => chain.target_head_slot);
chain.status_peers(network);
false
} else {
@ -447,7 +445,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
.fork_choice
.contains_block(&chain.target_head_root)
{
debug!(log_ref, "Purging out of date head chain"; "start_slot" => chain.start_slot, "end_slot" => chain.target_head_slot);
debug!(log_ref, "Purging out of date head chain"; "start_epoch" => chain.start_epoch, "end_slot" => chain.target_head_slot);
chain.status_peers(network);
false
} else {
@ -483,7 +481,7 @@ impl<T: BeaconChainTypes> ChainCollection<T> {
chain
};
debug!(self.log, "Chain was removed"; "start_slot" => chain.start_slot.as_u64(), "end_slot" => chain.target_head_slot.as_u64());
debug!(self.log, "Chain was removed"; "start_epoch" => chain.start_epoch, "end_slot" => chain.target_head_slot);
// update the state
self.update_finalized(network);

View File

@ -11,10 +11,10 @@
//! ## Finalized chain sync
//!
//! This occurs when a peer connects that claims to have a finalized head slot that is greater
//! than our own. In this case, we form a chain from our last finalized slot, to their claimed
//! than our own. In this case, we form a chain from our last finalized epoch, to their claimed
//! finalized slot. Any peer that also claims to have this last finalized slot is added to a pool
//! of peers from which batches of blocks may be downloaded. Blocks are downloaded until
//! the finalized slot of the chain is reached. Once reached, all peers within the pool are sent a
//! of peers from which batches of blocks may be downloaded. Blocks are downloaded until the
//! finalized slot of the chain is reached. Once reached, all peers within the pool are sent a
//! STATUS message to potentially start a head chain sync, or check if further finalized chains
//! need to be downloaded.
//!
@ -26,11 +26,11 @@
//!
//! ## Head Chain Sync
//!
//! If a peer joins and there is no active finalized chains being synced, and it's head is
//! beyond our `SLOT_IMPORT_TOLERANCE` a chain is formed starting from this peers finalized slot
//! (this has been necessarily downloaded by our node, otherwise we would start a finalized chain
//! sync) to this peers head slot. Any other peers that match this head slot and head root, are
//! added to this chain's peer pool, which will be downloaded in parallel.
//! If a peer joins and there is no active finalized chains being synced, and it's head is beyond
//! our `SLOT_IMPORT_TOLERANCE` a chain is formed starting from this peers finalized epoch (this
//! has been necessarily downloaded by our node, otherwise we would start a finalized chain sync)
//! to this peers head slot. Any other peers that match this head slot and head root, are added to
//! this chain's peer pool, which will be downloaded in parallel.
//!
//! Unlike finalized chains, head chains can be synced in parallel.
//!
@ -65,7 +65,7 @@ pub struct RangeSync<T: BeaconChainTypes> {
/// A collection of chains that need to be downloaded. This stores any head or finalized chains
/// that need to be downloaded.
chains: ChainCollection<T>,
/// Peers that join whilst a finalized chain is being download, sit in this set. Once the
/// Peers that join whilst a finalized chain is being downloaded, sit in this set. Once the
/// finalized chain(s) complete, these peer's get STATUS'ed to update their head slot before
/// the head chains are formed and downloaded.
awaiting_head_peers: HashSet<PeerId>,
@ -162,7 +162,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
.chains
.get_finalized_mut(remote_info.finalized_root, remote_finalized_slot)
{
debug!(self.log, "Finalized chain exists, adding peer"; "peer_id" => format!("{:?}", peer_id), "target_root" => format!("{}", chain.target_head_root), "end_slot" => chain.target_head_slot, "start_slot"=> chain.start_slot);
debug!(self.log, "Finalized chain exists, adding peer"; "peer_id" => format!("{:?}", peer_id), "target_root" => format!("{}", chain.target_head_root), "end_slot" => chain.target_head_slot, "start_epoch"=> chain.start_epoch);
// add the peer to the chain's peer pool
chain.add_peer(network, peer_id);
@ -174,10 +174,10 @@ impl<T: BeaconChainTypes> RangeSync<T> {
} else {
// there is no finalized chain that matches this peer's last finalized target
// create a new finalized chain
debug!(self.log, "New finalized chain added to sync"; "peer_id" => format!("{:?}", peer_id), "start_slot" => local_finalized_slot.as_u64(), "end_slot" => remote_finalized_slot.as_u64(), "finalized_root" => format!("{}", remote_info.finalized_root));
debug!(self.log, "New finalized chain added to sync"; "peer_id" => format!("{:?}", peer_id), "start_epoch" => local_finalized_slot, "end_slot" => remote_finalized_slot, "finalized_root" => format!("{}", remote_info.finalized_root));
self.chains.new_finalized_chain(
local_finalized_slot,
local_info.finalized_epoch,
remote_info.finalized_root,
remote_finalized_slot,
peer_id,
@ -218,11 +218,13 @@ impl<T: BeaconChainTypes> RangeSync<T> {
chain.add_peer(network, peer_id);
} else {
// There are no other head chains that match this peer's status, create a new one, and
let start_slot = std::cmp::min(local_info.head_slot, remote_finalized_slot);
debug!(self.log, "Creating a new syncing head chain"; "head_root" => format!("{}",remote_info.head_root), "start_slot" => start_slot, "head_slot" => remote_info.head_slot, "peer_id" => format!("{:?}", peer_id));
let start_epoch = std::cmp::min(local_info.head_slot, remote_finalized_slot)
.epoch(T::EthSpec::slots_per_epoch());
debug!(self.log, "Creating a new syncing head chain"; "head_root" => format!("{}",remote_info.head_root), "start_epoch" => start_epoch, "head_slot" => remote_info.head_slot, "peer_id" => format!("{:?}", peer_id));
self.chains.new_head_chain(
network,
start_slot,
start_epoch,
remote_info.head_root,
remote_info.head_slot,
peer_id,
@ -288,7 +290,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
}) {
Some((index, ProcessingResult::RemoveChain)) => {
let chain = self.chains.remove_finalized_chain(index);
debug!(self.log, "Finalized chain removed"; "start_slot" => chain.start_slot.as_u64(), "end_slot" => chain.target_head_slot.as_u64());
debug!(self.log, "Finalized chain removed"; "start_epoch" => chain.start_epoch, "end_slot" => chain.target_head_slot);
// update the state of the collection
self.chains.update_finalized(network);
@ -325,7 +327,7 @@ impl<T: BeaconChainTypes> RangeSync<T> {
}) {
Some((index, ProcessingResult::RemoveChain)) => {
let chain = self.chains.remove_head_chain(index);
debug!(self.log, "Head chain completed"; "start_slot" => chain.start_slot.as_u64(), "end_slot" => chain.target_head_slot.as_u64());
debug!(self.log, "Head chain completed"; "start_epoch" => chain.start_epoch, "end_slot" => chain.target_head_slot);
// the chain is complete, re-status it's peers and remove it
chain.status_peers(network);